---
title: "Classification"
output:
    pdf_document:
        fig_height: 3
---

# Data Set -- Iris

```{r}
# load data
data(iris)
# this is what it looks like...
head(iris)

# ...and this is what it looks like plotted
library(ggplot2)
ggplot(iris, aes(Petal.Length, Petal.Width)) +
    geom_point(aes(color = Species, shape = Species))
```

# Logistic Regression

```{r}
library(mlr)

# Create task and learner
# Logistic regression can only handle two classes, so subset data accordingly
iris2 = iris[51:150,]
task = makeClassifTask(data = iris2, target = "Species")
learner = makeLearner("classif.logreg")

# split the data into train and test set
n = nrow(iris2)
train.set = sample(n, size = 2/3*n)
test.set = setdiff(1:n, train.set)

# train a model
model = train(learner, task, subset = train.set)
model

# now predict on the test set
predictions = predict(model, task = task, subset = test.set)
predictions

# How did we do?
performance(predictions, measures = acc)
calculateConfusionMatrix(predictions)

# What does the learned model look like?
getLearnerModel(model)

# let's plot the predictions
plotLearnerPrediction(learner, task, measures = acc,
    features = c("Petal.Length", "Petal.Width"))
```

## Predicting Probabilities

```{r}
learner = makeLearner("classif.logreg", predict.type = "prob")
model = train(learner, task, subset = train.set)
predictions = predict(model, task = task, subset = test.set)
predictions

# plot how performance changes if we move the threshold for the classes
d = generateThreshVsPerfData(predictions, measures = acc)
plotThreshVsPerf(d)
```

## Using Resampling

```{r}
# mlr can do the partitioning into train and test set automatically
rdesc = makeResampleDesc(method = "Holdout", split = 2/3)
result = resample(learner, task, rdesc, measures = acc, models = TRUE)

# get predictions
getRRPredictions(result)

# get model
getLearnerModel(result$models[[1]])
```

# Linear Discriminant Analysis

```{r}
task = makeClassifTask(data = iris, target = "Species")
learner = makeLearner("classif.lda")
result = resample(learner, task, rdesc, measures = acc, models = TRUE)
plotLearnerPrediction(learner, task, measures = acc,
    features = c("Petal.Length", "Petal.Width"))

getLearnerModel(result$models[[1]])
```

# Support Vector Machines

```{r}
learner = makeLearner("classif.ksvm")
result = resample(learner, task, rdesc, measures = acc, models = TRUE)
getLearnerModel(result$models[[1]])
plotLearnerPrediction(learner, task, measures = acc,
    features = c("Petal.Length", "Petal.Width"))
```

## Different Kernel

```{r}
learner = makeLearner("classif.ksvm", par.vals = list(kernel = "vanilladot"))
result = resample(learner, task, rdesc, measures = acc, models = TRUE)
getLearnerModel(result$models[[1]])
plotLearnerPrediction(learner, task, measures = acc,
    features = c("Petal.Length", "Petal.Width"))
```

# Classification Trees

```{r}
learner = makeLearner("classif.rpart")
result = resample(learner, task, rdesc, measures = acc, models = TRUE)
plotLearnerPrediction(learner, task, measures = acc,
    features = c("Petal.Length", "Petal.Width"))

getLearnerModel(result$models[[1]])

# this is not part of mlr
library(rpart.plot)
rpart.plot(getLearnerModel(result$models[[1]]))
```

# Random Forests

```{r}
learner = makeLearner("classif.randomForest")
result = resample(learner, task, rdesc, measures = acc, models = TRUE)
getLearnerModel(result$models[[1]])
plotLearnerPrediction(learner, task, measures = acc,
    features = c("Petal.Length", "Petal.Width"))
```

# More

```{r}
listLearners(task)$class
listMeasures(task)
```