--- title: "Feature Engineering" output: pdf_document: fig_height: 4 --- # Feature Selection ## Feature Filtering ### Classification ```{r} library(mlr) fv = generateFilterValuesData(iris.task, method = "information.gain") print(fv) ``` ```{r} plotFilterValues(fv) ``` ### Regression ```{r} fv = generateFilterValuesData(bh.task, method = "chi.squared") print(fv) ``` ```{r} plotFilterValues(fv) ``` ### Available Methods ```{r} listFilterMethods() ``` ## Embedded Feature Selection ### Classification ```{r} mod = train(makeLearner("classif.randomForest"), iris.task) getFeatureImportance(mod) ``` ### Regression ```{r} mod = train(makeLearner("regr.randomForest"), bh.task) getFeatureImportance(mod) ``` ## Feature Selection Wrapper ### Random Subsets Search ```{r} rdesc = makeResampleDesc("CV", iters = 10) ctrl = makeFeatSelControlRandom(maxit = 10) selectFeatures(learner = "regr.rpart", task = bh.task, resampling = rdesc, control = ctrl) ``` ### Sequential Forward Search ```{r} ctrl = makeFeatSelControlSequential(method = "sfs") selectFeatures(learner = "regr.rpart", task = bh.task, resampling = rdesc, control = ctrl) ``` ### Sequential Backward Search ```{r} ctrl = makeFeatSelControlSequential(method = "sbs") selectFeatures(learner = "regr.rpart", task = bh.task, resampling = rdesc, control = ctrl) ``` ### Genetic Algorithm Search ```{r} ctrl = makeFeatSelControlGA(maxit = 10) selectFeatures(learner = "regr.rpart", task = bh.task, resampling = rdesc, control = ctrl) ``` ### Wrapper ```{r} ctrl = makeFeatSelControlRandom(maxit = 10) lrn = makeFeatSelWrapper("regr.rpart", resampling = rdesc, control = ctrl) resample(lrn, bh.task, rdesc) ``` # Principal Component Analysis ```{r} library(ggplot2) pca = prcomp(iris[,-5]) ggplot(cbind(as.data.frame(pca$x), species = iris$Species), aes(PC1, PC2)) + geom_point(aes(colour = species, shape = species)) ``` ## As Preprocessing Step ```{r} lrn = makeLearner("regr.lm") resample(lrn, bh.task, rdesc) ``` ```{r} lrn = makePreprocWrapperCaret(learner = lrn, ppc.pca = TRUE, ppc.n.comp = 2) resample(lrn, bh.task, rdesc) ``` # Feature Expansion ```{r} lrn = makeLearner("classif.rpart", maxdepth = 1) resample(lrn, iris.task, rdesc) ``` ```{r} head(iris) ``` ```{r} d = as.data.frame(do.call(poly, c(lapply(1:4, function(x) iris[,x]), degree = 2, raw = TRUE))) head(d) ``` ```{r} colnames(d) = paste0("X", colnames(d)) task = makeClassifTask(data = cbind(d, Species = iris$Species), target = "Species") resample(lrn, task, rdesc) ```