From 756ebf57f1d62ed6bbe73ef0da8823abff86a836 Mon Sep 17 00:00:00 2001
From: Matej Klemen <matej.klemen1337@gmail.com>
Date: Sat, 13 Jul 2019 12:31:42 +0200
Subject: [PATCH 01/15] Update docs for CsvLoader (loadCsvDataset)

---
 .../picnicml/doddlemodel/data/CsvLoader.scala | 24 ++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/main/scala/io/picnicml/doddlemodel/data/CsvLoader.scala b/src/main/scala/io/picnicml/doddlemodel/data/CsvLoader.scala
index 4931c036..24f1d840 100644
--- a/src/main/scala/io/picnicml/doddlemodel/data/CsvLoader.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/data/CsvLoader.scala
@@ -9,7 +9,29 @@ import scala.io.{BufferedSource, Source}
 
 object CsvLoader {
 
-  /** Loads a csv dataset with 2 header lines (1st line for feature names and 2nd for types). */
+  /** Loads a csv dataset with 2 header lines (1st line for feature names and 2nd for types).
+    * @param datasetFilePath csv file to load
+    * @param na value to interpret as N/A data in the given dataset
+    *
+    * @example Reading the iris dataset.
+    *   {{{
+    *     import java.io.File
+    *
+    *     val file = new File("/datasets/iris.csv")
+    *     val (data, featureInfo) = loadCsvDataset(file)
+    *     // separate features from the label
+    *     val (irisFeatures, irisLabels) = data(::, 0 to 3), data(::, -1)
+    *   }}}
+    *
+    * @example Reading a dataset where N/A values are marked with `NA`.
+    *   {{{
+    *     import java.io.File
+    *
+    *     val file = new File("/datasets/dummy_csv_reading.csv")
+    *     // specify a value to interpret as N/A data
+    *     val (data, featureInfo) = loadCsvDataset(file, "NA")
+    *   }}}
+    * */
   def loadCsvDataset(datasetFilePath: String, na: String = "NA"): FeaturesWithIndex =
     loadCsvDataset(Source.fromFile(datasetFilePath), na)
 

From 14c588d3dafb50220515391eaa643a8af2bdabe0 Mon Sep 17 00:00:00 2001
From: Matej Klemen <matej.klemen1337@gmail.com>
Date: Sat, 13 Jul 2019 12:37:33 +0200
Subject: [PATCH 02/15] Update docs for DatasetUtils (shuffleDataset,
 splitDataset)

---
 .../doddlemodel/data/DatasetUtils.scala       | 38 ++++++++++++++++++-
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/src/main/scala/io/picnicml/doddlemodel/data/DatasetUtils.scala b/src/main/scala/io/picnicml/doddlemodel/data/DatasetUtils.scala
index 78e94c94..44e86b0c 100644
--- a/src/main/scala/io/picnicml/doddlemodel/data/DatasetUtils.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/data/DatasetUtils.scala
@@ -6,13 +6,47 @@ import scala.util.Random
 
 object DatasetUtils {
 
-  /** Shuffles rows of the dataset. */
+  /** Shuffles rows of the dataset.
+    * @param x features to be shuffled
+    * @param y labels corresponding to features
+    *
+    * @example Shuffle a dataset randomly.
+    *   {{{
+    *     import scala.util.Random
+    *
+    *     // we are assuming data was previously loaded
+    *     val (dataX, dataY) = ...
+    *
+    *     val (shuffledX, shuffledY) = shuffleDataset(dataX, dataY)
+    *
+    *     // seeded shuffle - seed passed to shuffler implicitly
+    *     implicit val rand: Random = new Random(42)
+    *     val (shuffledX, shuffledY) = shuffleDataset(dataX, dataY)
+    *   }}}
+    * */
   def shuffleDataset(x: Features, y: Target)(implicit rand: Random = new Random()): Dataset = {
     val shuffleIndices = rand.shuffle((0 until y.length).toIndexedSeq)
     (x(shuffleIndices, ::).toDenseMatrix, y(shuffleIndices).toDenseVector)
   }
 
-  /** Splits the dataset into two subsets for training and testing. */
+  /** Splits the dataset into two subsets for training and testing.
+    * @param x features to be split
+    * @param y labels corresponding to features
+    * @param proportionTrain proportion of dataset to be put into training set - between 0.0 and 1.0
+    *
+    * @example Split dataset into training and test set.
+    *   {{{
+    *     // we are assuming data was previously loaded
+    *     val (dataX, dataY) = ...
+    *
+    *     // by default, the split is 50%:50%
+    *     val trTeSplit = splitDataset(dataX, dataY)
+    *
+    *     // put 80% of data into training set and 20% into test set
+    *     val trTeSplit = splitDataset(dataX, dataY, 0.8)
+    *     val (trainX, trainY, testX, testY) = (trTeSplit.xTr, trTeSplit.yTr, trTeSplit.xTe, trTeSplit.yTe)
+    *   }}}
+    * */
   def splitDataset(x: Features, y: Target, proportionTrain: Float = 0.5f): TrainTestSplit = {
     val numTrain = numberOfTrainExamplesBasedOnProportion(x.rows, proportionTrain)
     val trIndices = 0 until numTrain

From 7b3a30bb555e4ae32813d04de884ad14498d8c15 Mon Sep 17 00:00:00 2001
From: Matej Klemen <matej.klemen1337@gmail.com>
Date: Sat, 13 Jul 2019 12:39:13 +0200
Subject: [PATCH 03/15] Update docs for Feature (FeatureIndex's functions)

---
 .../picnicml/doddlemodel/data/Feature.scala   | 53 ++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/src/main/scala/io/picnicml/doddlemodel/data/Feature.scala b/src/main/scala/io/picnicml/doddlemodel/data/Feature.scala
index b5a5c929..09b52bea 100644
--- a/src/main/scala/io/picnicml/doddlemodel/data/Feature.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/data/Feature.scala
@@ -35,11 +35,33 @@ object Feature {
       subset(subsetIndices:_*)
     }
 
+    /** Create a feature index with subset of features, provided by feature names.
+      * @param names subset of features to be selected
+      *
+      * @example Create feature index based on features "f1" and "f3" from a constructed feature index.
+      *   {{{
+      *     val featureIndex = FeatureIndex(List("f1", "f2", "f3"), List(NumericalFeature, NumericalFeature,
+      *       NumericalFeature), List(0, 1, 2))
+      *     val subIndex = featureIndex.subset("f1", "f3")
+      *   }}}
+      *
+      * */
     def subset(names: String*): FeatureIndex = {
       val nameToIndex = this.names.zipWithIndex.toMap
       subset(names.map(n => nameToIndex(n)):_*)
     }
 
+    /** Create a feature index with subset of features, provided by feature indices.
+      * @param indices column indices for subset of features to be selected
+      *
+      * @example Create feature index based on second and third (i.e. indices 1, 2) features from a constructed
+      *          feature index.
+      * {{{
+      *   val featureIndex = FeatureIndex(List("f1", "f2", "f3"), List(NumericalFeature, NumericalFeature,
+      *     NumericalFeature), List(0, 1, 2))
+      *   val subIndex = featureIndex.subset(1 to 2)
+      * }}}
+      * */
     def subset(indices: IndexedSeq[Int]): FeatureIndex = subset(indices:_*)
 
     // DummyImplicit is needed to avoid the same type as String* after erasure
@@ -49,6 +71,7 @@ object Feature {
       indices.toIndexedSeq.map(i => this.columnIndices(i))
     )
 
+    /** Create a feature index by dropping a feature by column index. */
     def drop(index: Int): FeatureIndex = new FeatureIndex(
       this.names.zipWithIndex.flatMap { case (n, i) => if (i != index) n.some else none[String] },
       this.types.zipWithIndex.flatMap { case (t, i) => if (i != index) t.some else none[FeatureType] },
@@ -61,26 +84,54 @@ object Feature {
       this.names.zip(this.types).map { case (n, t) => s"$n (${t.headerLineString})" } mkString ", "
   }
 
+  /** A structure that keeps track of feature metadata (names, types and indices). This is needed
+    * because some methods are only applicable to a certain type of features, e.g. [0, 1] scaling
+    * only makes sense for numerical features. */
   object FeatureIndex {
-
+    /** Construct feature index with `n` categorical features. Feature names are generated automatically.
+      * @param n number of categorical features in feature index
+      */
     def categorical(n: Int): FeatureIndex =
       categorical((0 until n).toList)
 
     def categorical(columnIndices: List[Int]): FeatureIndex =
       apply(columnIndices.indices.map(i => s"f$i").toList, columnIndices.map(_ => CategoricalFeature), columnIndices)
 
+    /** Construct feature index with `n` numerical features. Feature names are generated automatically.
+      * @param n number of numerical features in feature index
+      */
     def numerical(n: Int): FeatureIndex =
       numerical((0 until n).toList)
 
     def numerical(columnIndices: List[Int]): FeatureIndex =
       apply(columnIndices.indices.map(i => s"f$i").toList, columnIndices.map(_ => NumericalFeature), columnIndices)
 
+    /** Construct feature index from feature types. Feature names are generated automatically.
+      * @param types list of feature types
+      *
+      * @example Construct a feature index with one numerical and two categorical features.
+      *   {{{
+      *     val featureIndex = FeatureIndex(List(CategoricalFeature, NumericalFeature, CategoricalFeature))
+      *   }}}
+      */
     def apply(types: List[FeatureType]): FeatureIndex =
       apply(types.indices.map(i => s"f$i").toList, types, types.indices.toList)
 
     def apply(types: List[FeatureType], columnIndices: List[Int]): FeatureIndex =
       apply(types.indices.map(i => s"f$i").toList, types, columnIndices)
 
+    /** Construct a feature index with custom feature names, types and column indices.
+      * @param names feature names
+      * @param types feature types
+      * @param columnIndices column index for each feature
+      *
+      * @example Construct a feature index with three features, named "age" (numerical), "height" (numerical)
+      *          and "group" (categorical).
+      *   {{{
+      *     val featureIndex = FeatureIndex(List("age", "height", "group"), List(NumericalFeature,
+      *       NumericalFeature, CategoricalFeature), List(0, 1, 2))
+      *   }}}
+      */
     def apply(names: List[String], types: List[FeatureType], columnIndices: List[Int]): FeatureIndex =
       new FeatureIndex(names.toIndexedSeq, types.toIndexedSeq, columnIndices.toIndexedSeq)
   }

From 491f5df123c9ccf127a5e68118c0b014dac580e6 Mon Sep 17 00:00:00 2001
From: Matej Klemen <matej.klemen1337@gmail.com>
Date: Sat, 13 Jul 2019 13:04:36 +0200
Subject: [PATCH 04/15] Update package documentation (basic dataset info)

---
 .../io/picnicml/doddlemodel/data/package.scala     | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/main/scala/io/picnicml/doddlemodel/data/package.scala b/src/main/scala/io/picnicml/doddlemodel/data/package.scala
index 4bcc8911..f2ea2ced 100644
--- a/src/main/scala/io/picnicml/doddlemodel/data/package.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/data/package.scala
@@ -4,6 +4,7 @@ import breeze.linalg.{DenseMatrix, DenseVector, unique}
 import io.picnicml.doddlemodel.CrossScalaCompat.floatOrdering
 import io.picnicml.doddlemodel.data.Feature.FeatureIndex
 
+/** Provides data management utilities and definitions of custom doddle-model data types. */
 package object data {
 
   type RealVector = DenseVector[Float]
@@ -17,9 +18,22 @@ package object data {
   type Dataset = (Features, Target)
   type DatasetWithIndex = (Features, Target, FeatureIndex)
 
+  /** Loads and returns the Boston Housing prices dataset. */
   def loadBostonDataset: DatasetWithIndex = ResourceDatasetLoaders.loadBostonDataset
+  /** Loads and returns the Breast cancer Wisconsin (diagnostic) dataset.
+    *
+    * @see <a href="https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29" target="_blank">
+    *        Breast cancer dataset on UCI Machine Learning Repository </a>
+    * */
   def loadBreastCancerDataset: DatasetWithIndex = ResourceDatasetLoaders.loadBreastCancerDataset
+  /** Loads and returns the Iris dataset.
+    *
+    * @see <a href="https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29" target="_blank">
+    *        Iris dataset on UCI Machine Learning Repository </a>
+    * */
   def loadIrisDataset: DatasetWithIndex = ResourceDatasetLoaders.loadIrisDataset
+
+  /** Loads and returns an artificial dataset with a Poisson target variable. */
   def loadHighSchoolTestDataset: DatasetWithIndex = ResourceDatasetLoaders.loadHighSchoolTestDataset
 
   def numberOfUniqueGroups(groups: IntVector): Int = {

From d8aebb46e1258d99988328b8c0b3e3d685a61803 Mon Sep 17 00:00:00 2001
From: Matej Klemen <matej.klemen1337@gmail.com>
Date: Tue, 16 Jul 2019 01:45:35 +0200
Subject: [PATCH 05/15] Format code examples in dummy package

---
 .../classification/MostFrequentClassifier.scala      | 10 ++++++----
 .../dummy/classification/StratifiedClassifier.scala  | 12 +++++++-----
 .../dummy/classification/UniformClassifier.scala     | 10 ++++++----
 .../doddlemodel/dummy/regression/MeanRegressor.scala |  6 ++++--
 .../dummy/regression/MedianRegressor.scala           |  6 ++++--
 5 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/src/main/scala/io/picnicml/doddlemodel/dummy/classification/MostFrequentClassifier.scala b/src/main/scala/io/picnicml/doddlemodel/dummy/classification/MostFrequentClassifier.scala
index 7ba48156..c00cb079 100644
--- a/src/main/scala/io/picnicml/doddlemodel/dummy/classification/MostFrequentClassifier.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/dummy/classification/MostFrequentClassifier.scala
@@ -6,13 +6,15 @@ import io.picnicml.doddlemodel.CrossScalaCompat.floatOrdering
 import io.picnicml.doddlemodel.data.{Features, Simplex, Target}
 import io.picnicml.doddlemodel.typeclasses.Classifier
 
+case class MostFrequentClassifier private (numClasses: Option[Int], mostFrequentClass: Option[Float])
+
 /** An immutable dummy classifier that always predicts the most frequent label.
   *
-  * Examples:
-  * val model = MostFrequentClassifier()
+  * @example
+  * {{{
+  *   val model = MostFrequentClassifier()
+  * }}}
   */
-case class MostFrequentClassifier private (numClasses: Option[Int], mostFrequentClass: Option[Float])
-
 object MostFrequentClassifier {
 
   def apply(): MostFrequentClassifier = MostFrequentClassifier(none, none)
diff --git a/src/main/scala/io/picnicml/doddlemodel/dummy/classification/StratifiedClassifier.scala b/src/main/scala/io/picnicml/doddlemodel/dummy/classification/StratifiedClassifier.scala
index 3ebd0bd9..0f79bfb6 100644
--- a/src/main/scala/io/picnicml/doddlemodel/dummy/classification/StratifiedClassifier.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/dummy/classification/StratifiedClassifier.scala
@@ -9,11 +9,6 @@ import io.picnicml.doddlemodel.dummy.classification.StratifiedClassifier.ev
 import io.picnicml.doddlemodel.syntax.OptionSyntax._
 import io.picnicml.doddlemodel.typeclasses.Classifier
 
-/** An immutable dummy classifier that samples predictions from a stratified categorical distribution.
-  *
-  * Examples:
-  * val model = StratifiedClassifier()
-  */
 case class StratifiedClassifier private (numClasses: Option[Int],
                                          targetDistr: Option[Multinomial[DenseVector[Double], Int]]) {
 
@@ -23,6 +18,13 @@ case class StratifiedClassifier private (numClasses: Option[Int],
   }
 }
 
+/** An immutable dummy classifier that samples predictions from a stratified categorical distribution.
+  *
+  * @example
+  *   {{{
+  *     val model = StratifiedClassifier()
+  *   }}}
+  */
 object StratifiedClassifier {
 
   def apply(): StratifiedClassifier = StratifiedClassifier(none, none)
diff --git a/src/main/scala/io/picnicml/doddlemodel/dummy/classification/UniformClassifier.scala b/src/main/scala/io/picnicml/doddlemodel/dummy/classification/UniformClassifier.scala
index 169c7cf0..379549a9 100644
--- a/src/main/scala/io/picnicml/doddlemodel/dummy/classification/UniformClassifier.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/dummy/classification/UniformClassifier.scala
@@ -6,13 +6,15 @@ import cats.syntax.option._
 import io.picnicml.doddlemodel.data.{Features, Simplex, Target}
 import io.picnicml.doddlemodel.typeclasses.Classifier
 
+case class UniformClassifier private (numClasses: Option[Int])
+
 /** An immutable dummy classifier that samples predictions from a uniform categorical distribution.
   *
-  * Examples:
-  * val model = UniformClassifier()
+  * @example
+  *   {{{
+  *     val model = UniformClassifier()
+  *   }}}
   */
-case class UniformClassifier private (numClasses: Option[Int])
-
 object UniformClassifier {
 
   def apply(): UniformClassifier = UniformClassifier(none)
diff --git a/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MeanRegressor.scala b/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MeanRegressor.scala
index f4ac5df7..83785559 100644
--- a/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MeanRegressor.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MeanRegressor.scala
@@ -8,8 +8,10 @@ import io.picnicml.doddlemodel.typeclasses.Regressor
 
 /** An immutable dummy regressor that always predicts the sample mean.
   *
-  * Examples:
-  * val model = MeanRegressor()
+  * @example
+  *   {{{
+  *     val model = MeanRegressor()
+  *   }}}
   */
 case class MeanRegressor private (mean: Option[Float])
 
diff --git a/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MedianRegressor.scala b/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MedianRegressor.scala
index bf063e53..ea8356e7 100644
--- a/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MedianRegressor.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MedianRegressor.scala
@@ -8,8 +8,10 @@ import io.picnicml.doddlemodel.typeclasses.Regressor
 
 /** An immutable dummy regressor that always predicts the sample median.
   *
-  * Examples:
-  * val model = MedianRegressor()
+  * @example
+  *   {{{
+  *     val model = MedianRegressor()
+  *   }}}
   */
 case class MedianRegressor private (median: Option[Float])
 

From 68112c42d09a806a6f31b5b9b078ba60514a441a Mon Sep 17 00:00:00 2001
From: Matej Klemen <matej.klemen1337@gmail.com>
Date: Tue, 16 Jul 2019 01:52:00 +0200
Subject: [PATCH 06/15] Add docs for impute package

---
 .../doddlemodel/impute/MeanValueImputer.scala | 29 ++++++++++++------
 .../impute/MostFrequentValueImputer.scala     | 30 ++++++++++++-------
 2 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/src/main/scala/io/picnicml/doddlemodel/impute/MeanValueImputer.scala b/src/main/scala/io/picnicml/doddlemodel/impute/MeanValueImputer.scala
index b3c61f92..3b414310 100644
--- a/src/main/scala/io/picnicml/doddlemodel/impute/MeanValueImputer.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/impute/MeanValueImputer.scala
@@ -8,20 +8,31 @@ import io.picnicml.doddlemodel.data.{Features, RealVector}
 import io.picnicml.doddlemodel.syntax.OptionSyntax._
 import io.picnicml.doddlemodel.typeclasses.Transformer
 
-/** An immutable simple imputer that replaces all NaN values with column means.
-  *
-  * @param featureIndex feature index associated with features, this is needed so that only numerical features
-  *                     are transformed by this preprocessor, could be a subset of columns to be transformed
-  *
-  * Examples:
-  * val imputer = MeanValueImputer(featureIndex)
-  * val imputerSubsetOfColumns = MeanValueImputer(featureIndex.subset("f0", "f2"))
-  */
 case class MeanValueImputer private (private[impute] val means: Option[RealVector],
                                      private val featureIndex: FeatureIndex)
 
+/** An immutable simple imputer that replaces all NaN values with column means. */
 object MeanValueImputer {
 
+  /** Create an imputer based on a feature index.
+    *
+    * @param featureIndex feature index associated with features, this is needed so that only numerical features
+    *                     are transformed by this preprocessor, could be a subset of columns to be transformed
+    *
+    * @example Impute values for all (numerical) features.
+    *   {{{
+    *     val featureIndex = FeatureIndex(List(NumericalFeature, CategoricalFeature, NumericalFeature,
+    *       NumericalFeature))
+    *     val imputer = MeanValueImputer(featureIndex)
+    *   }}}
+    *
+    * @example Impute values for a subset of features.
+    *   {{{
+    *     val featureIndex = FeatureIndex(List("f0", "f1", "f2"), List(NumericalFeature, NumericalFeature,
+    *       NumericalFeature), List(0, 1, 2))
+    *     val imputerSubsetOfColumns = MeanValueImputer(featureIndex.subset("f0", "f2"))
+    *   }}}
+    */
   def apply(featureIndex: FeatureIndex): MeanValueImputer = MeanValueImputer(none, featureIndex)
 
   @SerialVersionUID(0L)
diff --git a/src/main/scala/io/picnicml/doddlemodel/impute/MostFrequentValueImputer.scala b/src/main/scala/io/picnicml/doddlemodel/impute/MostFrequentValueImputer.scala
index ec9ab926..2d912cb2 100644
--- a/src/main/scala/io/picnicml/doddlemodel/impute/MostFrequentValueImputer.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/impute/MostFrequentValueImputer.scala
@@ -7,20 +7,30 @@ import io.picnicml.doddlemodel.data.{Features, RealVector}
 import io.picnicml.doddlemodel.syntax.OptionSyntax._
 import io.picnicml.doddlemodel.typeclasses.Transformer
 
-/** An immutable simple imputer that replaces all NaN values with most frequent value of a corresponding column.
-  *
-  * @param featureIndex feature index associated with features, this is needed so that only categorical features
-  *                     are transformed by this preprocessor, could be a subset of columns to be transformed
-  *
-  * Examples:
-  * val imputer = MostFrequentValueImputer(featureIndex)
-  * val imputerSubsetOfColumns = MostFrequentValueImputer(featureIndex.subset("f0", "f2"))
-  */
 case class MostFrequentValueImputer private (private[impute] val mostFrequent: Option[RealVector],
                                              private val featureIndex: FeatureIndex)
-
+/** An immutable simple imputer that replaces all NaN values with most frequent value of a corresponding column. */
 object MostFrequentValueImputer {
 
+  /** Create an imputer based on a feature index.
+    *
+    * @param featureIndex feature index associated with features, this is needed so that only categorical features
+    *                     are transformed by this preprocessor, could be a subset of columns to be transformed
+    *
+    * @example Impute values for all (numerical) features.
+    *   {{{
+    *     val featureIndex = FeatureIndex(List(NumericalFeature, CategoricalFeature, NumericalFeature,
+    *       NumericalFeature))
+    *     val imputer = MostFrequentValueImputer(featureIndex)
+    *   }}}
+    *
+    * @example Impute values for a subset of features.
+    *   {{{
+    *     val featureIndex = FeatureIndex(List("f0", "f1", "f2"), List(NumericalFeature, NumericalFeature,
+    *       NumericalFeature), List(0, 1, 2))
+    *     val imputerSubsetOfColumns = MostFrequentValueImputer(featureIndex.subset("f0", "f2"))
+    *   }}}
+    */
   def apply(featureIndex: FeatureIndex): MostFrequentValueImputer =
     MostFrequentValueImputer(None, featureIndex)
 

From fcfd2955e0833d5df3a195452a05e98ad2dd36b9 Mon Sep 17 00:00:00 2001
From: Matej Klemen <matej.klemen1337@gmail.com>
Date: Tue, 16 Jul 2019 23:35:34 +0200
Subject: [PATCH 07/15] Add docs for linear package

---
 .../doddlemodel/linear/LinearRegression.scala | 23 ++++++++++++-------
 .../linear/LogisticRegression.scala           | 23 ++++++++++++-------
 .../linear/PoissonRegression.scala            | 23 ++++++++++++-------
 .../linear/SoftmaxClassifier.scala            | 23 ++++++++++++-------
 4 files changed, 60 insertions(+), 32 deletions(-)

diff --git a/src/main/scala/io/picnicml/doddlemodel/linear/LinearRegression.scala b/src/main/scala/io/picnicml/doddlemodel/linear/LinearRegression.scala
index 3d6e4ae8..34ede7cf 100644
--- a/src/main/scala/io/picnicml/doddlemodel/linear/LinearRegression.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/linear/LinearRegression.scala
@@ -4,20 +4,27 @@ import cats.syntax.option._
 import io.picnicml.doddlemodel.data.{Features, RealVector, Target}
 import io.picnicml.doddlemodel.linear.typeclasses.LinearRegressor
 
-/** An immutable multiple linear regression model with ridge regularization.
-  *
-  * @param lambda L2 regularization strength, must be positive, 0 means no regularization
-  *
-  * Examples:
-  * val model = LinearRegression()
-  * val model = LinearRegression(lambda = 1.5f)
-  */
+/** An immutable multiple linear regression model with ridge regularization. */
 case class LinearRegression private (lambda: Float, private val w: Option[RealVector]) {
   private var yPredCache: Target = _
 }
 
 object LinearRegression {
 
+  /** Create a regularized linear regression model.
+    *
+    * @param lambda L2 regularization strength, must be non-negative, 0.0 means no regularization
+    *
+    * @example Create and fit a regularized linear regression model with lambda = 1.5.
+    *   {{{
+    *     import io.picnicml.doddlemodel.linear.LinearRegression.ev
+    *
+    *     val X: Features = DenseMatrix(List(1.0, 2.0), List(3.0, 4.0))
+    *     val y: Target = DenseVector(-3.0, 2.0)
+    *     val model = LinearRegression(lambda = 1.5f)
+    *     val fittedModel = ev.fit(model, X, y)
+    *   }}}
+    */
   def apply(lambda: Float = 0.0f): LinearRegression = {
     require(lambda >= 0.0f, "L2 regularization strength must be non-negative")
     LinearRegression(lambda, none)
diff --git a/src/main/scala/io/picnicml/doddlemodel/linear/LogisticRegression.scala b/src/main/scala/io/picnicml/doddlemodel/linear/LogisticRegression.scala
index a62d2da6..3a30860b 100644
--- a/src/main/scala/io/picnicml/doddlemodel/linear/LogisticRegression.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/linear/LogisticRegression.scala
@@ -6,20 +6,27 @@ import cats.syntax.option._
 import io.picnicml.doddlemodel.data.{Features, RealVector, Simplex, Target}
 import io.picnicml.doddlemodel.linear.typeclasses.LinearClassifier
 
-/** An immutable multiple logistic regression model with ridge regularization.
-  *
-  * @param lambda L2 regularization strength, must be positive, 0 means no regularization
-  *
-  * Examples:
-  * val model = LogisticRegression()
-  * val model = LogisticRegression(lambda = 1.5f)
-  */
+/** An immutable multiple logistic regression model with ridge regularization. */
 case class LogisticRegression private (lambda: Float, numClasses: Option[Int], private val w: Option[RealVector]) {
   private var yPredProbaCache: RealVector = _
 }
 
 object LogisticRegression {
 
+  /** Create a regularized logistic regression model.
+    *
+    * @param lambda L2 regularization strength, must be non-negative, 0.0 means no regularization
+    *
+    * @example Create and fit a logistic regression model with lambda = 1.5.
+    * {{{
+    *     import io.picnicml.doddlemodel.linear.LogisticRegression.ev
+    *
+    *     val X: Features = DenseMatrix(List(1.0, 2.0), List(3.0, 4.0))
+    *     val y: Target = DenseVector(0.0, 1.0)
+    *     val model = LogisticRegression(lambda = 1.5f)
+    *     val fittedModel = ev.fit(model, X, y)
+    * }}}
+    */
   def apply(lambda: Float = 0.0f): LogisticRegression = {
     require(lambda >= 0.0f, "L2 regularization strength must be non-negative")
     LogisticRegression(lambda, none, none)
diff --git a/src/main/scala/io/picnicml/doddlemodel/linear/PoissonRegression.scala b/src/main/scala/io/picnicml/doddlemodel/linear/PoissonRegression.scala
index fc0b80e6..9e8b2237 100644
--- a/src/main/scala/io/picnicml/doddlemodel/linear/PoissonRegression.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/linear/PoissonRegression.scala
@@ -6,20 +6,27 @@ import cats.syntax.option._
 import io.picnicml.doddlemodel.data.{Features, RealVector, Target}
 import io.picnicml.doddlemodel.linear.typeclasses.LinearRegressor
 
-/** An immutable multiple Poisson regression model with ridge regularization.
-  *
-  * @param lambda L2 regularization strength, must be positive, 0 means no regularization
-  *
-  * Examples:
-  * val model = PoissonRegression()
-  * val model = PoissonRegression(lambda = 1.5f)
-  */
+/** An immutable multiple Poisson regression model with ridge regularization. */
 case class PoissonRegression private (lambda: Float, private val w: Option[RealVector]) {
   private var yPredMeanCache: Target = _
 }
 
 object PoissonRegression {
 
+  /** Create a regularized Poisson regression model.
+    *
+    * @param lambda L2 regularization strength, must be non-negative, 0.0 means no regularization
+    *
+    * @example Create and fit a regularized Poisson regression model with lambda = 1.5.
+    *   {{{
+    *     import io.picnicml.doddlemodel.linear.PoissonRegression.ev
+    *
+    *     val X: Features = DenseMatrix(List(1.0, 2.0), List(3.0, 4.0))
+    *     val y: Target = DenseVector(-3.0, 2.0)
+    *     val model = PoissonRegression(lambda = 1.5f)
+    *     val fittedModel = ev.fit(model, X, y)
+    *   }}}
+    */
   def apply(lambda: Float = 0.0f): PoissonRegression = {
     require(lambda >= 0.0f, "L2 regularization strength must be non-negative")
     PoissonRegression(lambda, none)
diff --git a/src/main/scala/io/picnicml/doddlemodel/linear/SoftmaxClassifier.scala b/src/main/scala/io/picnicml/doddlemodel/linear/SoftmaxClassifier.scala
index 30001ea3..e1e0bd43 100644
--- a/src/main/scala/io/picnicml/doddlemodel/linear/SoftmaxClassifier.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/linear/SoftmaxClassifier.scala
@@ -7,20 +7,27 @@ import io.picnicml.doddlemodel.data.{Features, RealVector, Simplex, Target}
 import io.picnicml.doddlemodel.linear.typeclasses.LinearClassifier
 import io.picnicml.doddlemodel.syntax.OptionSyntax._
 
-/** An immutable multiple multinomial regression model with ridge regularization.
-  *
-  * @param lambda L2 regularization strength, must be positive, 0 means no regularization
-  *
-  * Examples:
-  * val model = SoftmaxClassifier()
-  * val model = SoftmaxClassifier(lambda = 1.5f)
-  */
+/** An immutable multiple multinomial regression model with ridge regularization. */
 case class SoftmaxClassifier private (lambda: Float, numClasses: Option[Int], private val w: Option[RealVector]) {
   private var yPredProbaCache: Simplex = _
 }
 
 object SoftmaxClassifier {
 
+  /** Create a regularized softmax model.
+    *
+    * @param lambda L2 regularization strength, must be non-negative, 0.0 means no regularization
+    *
+    * @example Create and fit a regularized softmax classifier with lambda = 1.5.
+    *   {{{
+    *     import io.picnicml.doddlemodel.linear.SoftmaxClassifier.ev
+    *
+    *     val X: Features = DenseMatrix(List(1.0, 2.0), List(3.0, 4.0))
+    *     val y: Target = DenseVector(0.0, 1.0)
+    *     val model = SoftmaxClassifier(lambda = 1.5f)
+    *     val fittedModel = ev.fit(model, X, y)
+    *   }}}
+    */
   def apply(lambda: Float = 0.0f): SoftmaxClassifier = {
     require(lambda >= 0.0f, "L2 regularization strength must be non-negative")
     SoftmaxClassifier(lambda, none, none)

From bebf025a6cb11530fc7280735255993e7c3ef90f Mon Sep 17 00:00:00 2001
From: Matej Klemen <matej.klemen1337@gmail.com>
Date: Wed, 17 Jul 2019 23:11:03 +0200
Subject: [PATCH 08/15] Update docs for metrics package

---
 .../metrics/ClassificationMetrics.scala       | 24 +++++++++++++++----
 .../doddlemodel/metrics/RankingMetrics.scala  | 17 ++++++++++++-
 .../metrics/RegressionMetrics.scala           | 20 ++++++++++++----
 .../doddlemodel/metrics/package.scala         | 18 ++++++++++++++
 4 files changed, 69 insertions(+), 10 deletions(-)

diff --git a/src/main/scala/io/picnicml/doddlemodel/metrics/ClassificationMetrics.scala b/src/main/scala/io/picnicml/doddlemodel/metrics/ClassificationMetrics.scala
index 6091a8ae..57d88388 100644
--- a/src/main/scala/io/picnicml/doddlemodel/metrics/ClassificationMetrics.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/metrics/ClassificationMetrics.scala
@@ -4,7 +4,7 @@ import io.picnicml.doddlemodel.data.{Target, numberOfTargetClasses}
 
 object ClassificationMetrics {
 
-  /** Classification accuracy. */
+  /** Classification accuracy - measures the proportion of correctly classified examples among all examples. */
   object Accuracy extends Metric {
 
     override lazy val higherValueIsBetter: Boolean = true
@@ -15,7 +15,12 @@ object ClassificationMetrics {
     override def toString: String = "accuracy"
   }
 
-  /** Positive predictive value. */
+  /** Precision (positive predictive value) - measures the proportion of correctly classified positive examples
+    * (true positives) among all examples classified as positive.
+    *
+    * @note Only defined for a binary classification task.
+    * @see [[https://en.wikipedia.org/wiki/Precision_and_recall]]
+    * */
   object Precision extends Metric {
 
     override lazy val higherValueIsBetter: Boolean = true
@@ -36,7 +41,12 @@ object ClassificationMetrics {
     override def toString: String = "precision"
   }
 
-  /** Sensitivity. */
+  /** Recall (sensitivity) - measures the proportion of correctly classified positive examples (true positives)
+    * among all <b>actual</b> positive examples.
+    *
+    * @note Only defined for a binary classification task.
+    * @see [[https://en.wikipedia.org/wiki/Precision_and_recall]]
+    * */
   object Recall extends Metric {
 
     override lazy val higherValueIsBetter: Boolean = true
@@ -57,7 +67,11 @@ object ClassificationMetrics {
     override def toString: String = "recall"
   }
 
-  /** F1 score. */
+  /** F1 score - defined as the harmonic average of precision and recall.
+    *
+    * @note Only defined for a binary classification task.
+    * @see [[https://en.wikipedia.org/wiki/F1_score]]
+    * */
   object F1Score extends Metric {
 
     override lazy val higherValueIsBetter: Boolean = true
@@ -77,7 +91,7 @@ object ClassificationMetrics {
     override def toString: String = "F1 score"
   }
 
-  /** Hamming loss. */
+  /** Hamming loss - measures the proportion of incorrectly classified examples. */
   object HammingLoss extends Metric {
 
     override lazy val higherValueIsBetter: Boolean = false
diff --git a/src/main/scala/io/picnicml/doddlemodel/metrics/RankingMetrics.scala b/src/main/scala/io/picnicml/doddlemodel/metrics/RankingMetrics.scala
index 690c24d9..f768843d 100644
--- a/src/main/scala/io/picnicml/doddlemodel/metrics/RankingMetrics.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/metrics/RankingMetrics.scala
@@ -7,7 +7,14 @@ import scala.collection.compat.immutable.ArraySeq
 
 object RankingMetrics {
 
-  /** Area under the ROC-curve. **/
+  /** Area under the ROC-curve.
+    *
+    * Can be interpreted as the probability that a classifier will rank a randomly chosen positive example higher
+    * than a randomly chosen negative example.
+    *
+    * @note Only defined for a binary classification task.
+    * @see [[https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve]]
+    * */
   object Auc extends Metric {
 
     override lazy val higherValueIsBetter: Boolean = true
@@ -27,7 +34,15 @@ object RankingMetrics {
 
   /** Receiver operating characteristic curve (ROC-curve).
     *
+    * Presents the ability of a binary classifier (in terms of true positive rate and false positive rate) as the
+    * discrimination threshold is varied.
+    *
+    * @param y ground truth labels
+    * @param yPredProba predicted probabilities
     * @param length the number of thresholds to take into account, i.e. the number of coordinates returned
+    * @note Only defined for a binary classification task.
+    * @note Currently, ROC-curve is only defined for probability scores (i.e. `yPredProba` needs to contain values
+    *       between 0.0 and 1.0)
     */
   def rocCurve(y: Target, yPredProba: RealVector, length: Int = 30): RocCurve = {
     require(length >= 5, "Number of points of the ROC-curve must be at least 3")
diff --git a/src/main/scala/io/picnicml/doddlemodel/metrics/RegressionMetrics.scala b/src/main/scala/io/picnicml/doddlemodel/metrics/RegressionMetrics.scala
index c02b4643..d37be2b7 100644
--- a/src/main/scala/io/picnicml/doddlemodel/metrics/RegressionMetrics.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/metrics/RegressionMetrics.scala
@@ -7,7 +7,10 @@ import io.picnicml.doddlemodel.data.Target
 
 object RegressionMetrics {
 
-  /** Root mean squared error. */
+  /** Root mean squared error - defined as the square root of mean squared error.
+    *
+    * @see [[https://en.wikipedia.org/wiki/Root-mean-square_deviation]]q
+    * */
   object Rmse extends Metric {
 
     override lazy val higherValueIsBetter: Boolean = false
@@ -17,7 +20,10 @@ object RegressionMetrics {
     override def toString: String = "RMSE"
   }
 
-  /** Mean squared error. */
+  /** Mean squared error - defined as the average of the squares of error.
+    *
+    * @see [[https://en.wikipedia.org/wiki/Mean_squared_error]]
+    * */
   object Mse extends Metric {
 
     override lazy val higherValueIsBetter: Boolean = false
@@ -30,7 +36,10 @@ object RegressionMetrics {
     override def toString: String = "MSE"
   }
 
-  /** Mean absolute error. */
+  /** Mean absolute error - defined as the average of absolute error.
+    *
+    * @see [[https://en.wikipedia.org/wiki/Mean_absolute_error]]
+    * */
   object Mae extends Metric {
 
     override lazy val higherValueIsBetter: Boolean = false
@@ -41,7 +50,10 @@ object RegressionMetrics {
   }
 
 
-  /** Explained variance. */
+  /** Explained variance - measures the proportion of variance in dataset that is captured by the model.
+    *
+    * @see [[https://en.wikipedia.org/wiki/Explained_variation]]
+    * */
   object ExplainedVariance extends Metric {
 
     override lazy val higherValueIsBetter: Boolean = true
diff --git a/src/main/scala/io/picnicml/doddlemodel/metrics/package.scala b/src/main/scala/io/picnicml/doddlemodel/metrics/package.scala
index c4a926bb..fb4cdf55 100644
--- a/src/main/scala/io/picnicml/doddlemodel/metrics/package.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/metrics/package.scala
@@ -4,21 +4,39 @@ import io.picnicml.doddlemodel.metrics.ClassificationMetrics._
 import io.picnicml.doddlemodel.metrics.RankingMetrics._
 import io.picnicml.doddlemodel.metrics.RegressionMetrics._
 
+/** Provides various evaluation metrics for prediction tasks. */
 package object metrics {
 
   // regression metrics
+  /** @see [[io.picnicml.doddlemodel.metrics.RegressionMetrics.Mse]] */
   lazy val mse: Metric = Mse
+
+  /** @see [[io.picnicml.doddlemodel.metrics.RegressionMetrics.Rmse]] */
   lazy val rmse: Metric = Rmse
+
+  /** @see [[io.picnicml.doddlemodel.metrics.RegressionMetrics.Mae]] */
   lazy val mae: Metric = Mae
+
+  /** @see [[io.picnicml.doddlemodel.metrics.RegressionMetrics.ExplainedVariance]] */
   lazy val explainedVariance: Metric = ExplainedVariance
 
   // classification metrics
+  /** @see [[io.picnicml.doddlemodel.metrics.ClassificationMetrics.Accuracy]] */
   lazy val accuracy: Metric = Accuracy
+
+  /** @see [[io.picnicml.doddlemodel.metrics.ClassificationMetrics.Precision]] */
   lazy val precision: Metric = Precision
+
+  /** @see [[io.picnicml.doddlemodel.metrics.ClassificationMetrics.Recall]] */
   lazy val recall: Metric = Recall
+
+  /** @see [[io.picnicml.doddlemodel.metrics.ClassificationMetrics.F1Score]] */
   lazy val f1Score: Metric = F1Score
+
+  /** @see [[io.picnicml.doddlemodel.metrics.ClassificationMetrics.HammingLoss]] */
   lazy val hammingLoss: Metric = HammingLoss
 
   // ranking metrics
+  /** @see [[io.picnicml.doddlemodel.metrics.RankingMetrics.Auc]] */
   lazy val auc: Metric = Auc
 }

From 47503a29477dd7736d47b8548a54ef7f4fa2f6f0 Mon Sep 17 00:00:00 2001
From: Matej Klemen <matej.klemen1337@gmail.com>
Date: Wed, 11 Sep 2019 12:09:12 +0200
Subject: [PATCH 09/15] Update docs for model selection package

---
 .../modelselection/CrossValidation.scala      | 35 +++++++++++++------
 .../modelselection/GroupKFoldSplitter.scala   | 28 +++++++++------
 .../modelselection/HyperparameterSearch.scala | 25 +++++++++++++
 .../modelselection/KFoldSplitter.scala        | 23 +++++++-----
 4 files changed, 80 insertions(+), 31 deletions(-)

diff --git a/src/main/scala/io/picnicml/doddlemodel/modelselection/CrossValidation.scala b/src/main/scala/io/picnicml/doddlemodel/modelselection/CrossValidation.scala
index e0d2a29e..c3798426 100644
--- a/src/main/scala/io/picnicml/doddlemodel/modelselection/CrossValidation.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/modelselection/CrossValidation.scala
@@ -9,21 +9,12 @@ import scala.concurrent.duration.Duration
 import scala.concurrent.{Await, Future}
 import scala.util.Random
 
-/** A parallel, n-fold cross validation technique.
-  *
-  * @param metric a function from io.picnicml.doddlemodel.metrics used to calculate each fold's score
-  * @param dataSplitter a strategy for splitting the dataset into multiple folds
-  *
-  * Examples:
-  * val splitter = KFoldSplitter(folds = 3)
-  * val cv = CrossValidation(metric = rmse, dataSplitter = splitter))
-  * cv.score(model, x, y)
-  */
 class CrossValidation private (val metric: Metric, val dataSplitter: DataSplitter) {
 
   private implicit val ec: CVExecutionContext = new CVExecutionContext()
 
-  /**
+  /** Obtain the average score of all folds.
+    *
     * @param reusable indicates whether to shutdown the thread pool after the cv score is computed
     *  and by default it is, if the same CrossValidation instance is needed after the first call
     *  to score(...), bring implicit CrossValReusable(true) to scope and call CrossValidation.shutdownNow()
@@ -57,8 +48,30 @@ class CrossValidation private (val metric: Metric, val dataSplitter: DataSplitte
   def shutdownNow(): Unit = this.ec.shutdownNow()
  }
 
+/** A parallel, k-fold cross validation technique. */
 object CrossValidation {
 
+  /** Create a k-fold cross validation instance.
+    * @param metric a function from [[io.picnicml.doddlemodel.metrics]] used to calculate each fold's score
+    * @param dataSplitter a strategy for splitting the dataset into multiple folds
+    *
+    * @example Perform 2-fold cross validation using logistic regression and evaluate its performance
+    *          using root mean squared error.
+    *   {{{
+    *   import io.picnicml.doddlemodel.metrics.rmse
+    *   import io.picnicml.doddlemodel.linear.LogisticRegression
+    *
+    *   val X: Features = DenseMatrix(List(1.0, 2.0), List(3.0, 4.0), List(5.0, 6.0), List(7.0, 8.0))
+    *   val y: Target = DenseVector(0.0, 1.0, 0.0, 1.0)
+    *   val model = LogisticRegression(1.0)
+    *
+    *   val splitter = KFoldSplitter(numFolds = 2)
+    *   val cv = CrossValidation(metric = rmse, dataSplitter = splitter))
+    *   cv.score(model, X, y)
+    *   }}}
+    *
+    * @see [[io.picnicml.doddlemodel.metrics Metrics in doddle-model]]
+    */
   def apply(metric: Metric, dataSplitter: DataSplitter): CrossValidation =
     new CrossValidation(metric, dataSplitter)
 }
diff --git a/src/main/scala/io/picnicml/doddlemodel/modelselection/GroupKFoldSplitter.scala b/src/main/scala/io/picnicml/doddlemodel/modelselection/GroupKFoldSplitter.scala
index 4405aff4..0af085fb 100644
--- a/src/main/scala/io/picnicml/doddlemodel/modelselection/GroupKFoldSplitter.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/modelselection/GroupKFoldSplitter.scala
@@ -8,16 +8,6 @@ import io.picnicml.doddlemodel.modelselection.GroupKFoldSplitter.{TestFolds, Tra
 
 import scala.util.Random
 
-/** K-Folds strategy for splitting data that makes sure groups in each fold are non-overlapping,
-  * i.e no group is present in both training and testing splits. Folds try to be as balanced
-  * as possible, i.e. the number of test examples in each fold is approximately the same.
-  *
-  * @param numFolds number of folds
-  *
-  * Examples:
-  * val dataSplitter = GroupKFoldSplitter(folds = 3)
-  * datasplitter.splitData(x, y, groups)
-  */
 class GroupKFoldSplitter private (val numFolds: Int) extends DataSplitter {
 
   override def splitData(x: Features, y: Target, groups: IntVector)
@@ -61,9 +51,25 @@ class GroupKFoldSplitter private (val numFolds: Int) extends DataSplitter {
     throw new NotImplementedError("GroupKFoldSplitter only splits data based on groups")
 }
 
-
+/** A strategy for splitting data into k folds that makes sure groups in each fold are non-overlapping,
+  * i.e no group is present in both training and testing splits. */
 object GroupKFoldSplitter {
 
+  /** Create a group k-fold splitter.
+    * @param numFolds number of folds
+    *
+    * @example Split 10 examples, corresponding to data of 3 patients into 3 folds, making sure that data of a patient
+    *          never appears in both training and test set in the same fold.
+    * {{{
+    *   val patientFeatures = DenseMatrix.rand(10, 3)
+    *   val isSick = DenseVector(0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0)
+    *   val idPatient = DenseVector(1, 2, 2, 0, 0, 0, 2, 1, 1, 2)
+    *
+    *   val splitter = GroupKFoldSplitter(numFolds = 3)
+    *   // stream, containing 3 TrainTestSplits
+    *   val splits = splitter.splitData(patientFeatures, isSick, idPatient)
+    * }}}
+    */
   def apply(numFolds: Int): GroupKFoldSplitter = {
     require(numFolds > 0, "Number of folds must be positive")
     new GroupKFoldSplitter(numFolds)
diff --git a/src/main/scala/io/picnicml/doddlemodel/modelselection/HyperparameterSearch.scala b/src/main/scala/io/picnicml/doddlemodel/modelselection/HyperparameterSearch.scala
index 9d28195c..49688bdb 100644
--- a/src/main/scala/io/picnicml/doddlemodel/modelselection/HyperparameterSearch.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/modelselection/HyperparameterSearch.scala
@@ -52,8 +52,33 @@ class HyperparameterSearch private (val numIterations: Int, val crossVal: CrossV
   }
 }
 
+/** A parallel hyperparameter search using k-fold cross validation. */
 object HyperparameterSearch {
 
+  /** Create a hyperparameter search instance.
+    * @param numIterations number of predictors for which the cross validation score is calculated
+    * @param crossValidation k-fold cross validation instance
+    * @param verbose flag that specifies whether validation score of the selected model is printed to standard output
+    *
+    * @example Search among 3 different regularization values (0.1, 0.2, 0.5) for logistic regression using
+    *          3-fold cross validation and store the (re-fitted on entire dataset) model that obtains highest accuracy.
+    * {{{
+    *   import io.picnicml.doddlemodel.metrics.accuracy
+    *   import io.picnicml.doddlemodel.linear.LogisticRegression
+    *
+    *   val x = DenseMatrix.rand(10, 3)
+    *   val y = DenseVector(0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0)
+    *
+    *   val splitter = KFoldSplitter(numFolds = 3)
+    *   val cv = CrossValidation(metric = accuracy, dataSplitter = splitter)
+    *   val search = HyperparameterSearch(numIterations = 3, crossValidation = cv)
+    *   val lambdas = List(0.1, 0.2, 0.5).iterator
+    *
+    *   val modelBestParams = search.bestOf(x, y) {
+    *     LogisticRegression(lambda = lambdas.next)
+    *   }
+    * }}}
+    */
   def apply(numIterations: Int, crossValidation: CrossValidation, verbose: Boolean = true): HyperparameterSearch = {
     require(numIterations > 0, "Number of iterations must be positive")
     new HyperparameterSearch(numIterations, crossValidation, verbose)
diff --git a/src/main/scala/io/picnicml/doddlemodel/modelselection/KFoldSplitter.scala b/src/main/scala/io/picnicml/doddlemodel/modelselection/KFoldSplitter.scala
index 2f462b7b..c2877269 100644
--- a/src/main/scala/io/picnicml/doddlemodel/modelselection/KFoldSplitter.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/modelselection/KFoldSplitter.scala
@@ -4,15 +4,6 @@ import io.picnicml.doddlemodel.data.{Features, IntVector, Target, TrainTestSplit
 
 import scala.util.Random
 
-/** K-Folds strategy for splitting data.
-  *
-  * @param numFolds number of folds
-  * @param shuffleRows indicates whether examples should be shuffled prior to calculating the score
-  *
-  * Examples:
-  * val dataSplitter = KFoldSplitter(folds = 3)
-  * datasplitter.splitData(x, y)
-  */
 class KFoldSplitter private (val numFolds: Int, val shuffleRows: Boolean) extends DataSplitter {
 
   override def splitData(x: Features, y: Target)
@@ -60,8 +51,22 @@ class KFoldSplitter private (val numFolds: Int, val shuffleRows: Boolean) extend
     throw new NotImplementedError("KFoldSplitter doesn't split data based on groups")
 }
 
+/** K-folds strategy for splitting data. */
 object KFoldSplitter {
 
+  /** Create a k-fold splitter instance.
+    * @param numFolds number of folds
+    * @param shuffleRows a flag indicating whether examples should be shuffled prior to calculating the splits
+    *
+    * @example Split data into 3 folds.
+    * {{{
+    *   val x = DenseMatrix.rand(7, 2)
+    *   val y = DenseVector(0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0)
+    *
+    *   val splitter = KFoldSplitter(numFolds = 3)
+    *   splitter.splitData(x, y)
+    * }}}
+    */
   def apply(numFolds: Int, shuffleRows: Boolean = true): KFoldSplitter = {
     require(numFolds > 0, "Number of folds must be positive")
     new KFoldSplitter(numFolds, shuffleRows)

From 64e5862d5a62c7adc6220f2a16b4aa7113690ab4 Mon Sep 17 00:00:00 2001
From: Matej Klemen <matej.klemen1337@gmail.com>
Date: Sun, 13 Oct 2019 23:44:53 +0200
Subject: [PATCH 10/15] Verify and improve docs for `data` package * make sure
 examples in docs are working after rebase * make examples complete by
 including imports

---
 .../picnicml/doddlemodel/data/CsvLoader.scala | 16 +++---
 .../doddlemodel/data/DatasetUtils.scala       | 40 +++++++++++---
 .../picnicml/doddlemodel/data/Feature.scala   | 52 +++++++++++++++----
 .../picnicml/doddlemodel/data/package.scala   |  6 ++-
 4 files changed, 86 insertions(+), 28 deletions(-)

diff --git a/src/main/scala/io/picnicml/doddlemodel/data/CsvLoader.scala b/src/main/scala/io/picnicml/doddlemodel/data/CsvLoader.scala
index 24f1d840..8437173a 100644
--- a/src/main/scala/io/picnicml/doddlemodel/data/CsvLoader.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/data/CsvLoader.scala
@@ -9,29 +9,27 @@ import scala.io.{BufferedSource, Source}
 
 object CsvLoader {
 
-  /** Loads a csv dataset with 2 header lines (1st line for feature names and 2nd for types).
+  /** Loads a csv dataset with 2 header lines (first line for feature names and second for types).
     * @param datasetFilePath csv file to load
     * @param na value to interpret as N/A data in the given dataset
     *
     * @example Reading the iris dataset.
     *   {{{
-    *     import java.io.File
+    *     import io.picnicml.doddlemodel.data.CsvLoader.loadCsvDataset
     *
-    *     val file = new File("/datasets/iris.csv")
-    *     val (data, featureInfo) = loadCsvDataset(file)
+    *     val (data, featureInfo) = loadCsvDataset("src/main/resources/datasets/iris.csv")
     *     // separate features from the label
-    *     val (irisFeatures, irisLabels) = data(::, 0 to 3), data(::, -1)
+    *     val (irisFeatures, irisLabels) = (data(::, 0 to 3), data(::, -1))
     *   }}}
     *
     * @example Reading a dataset where N/A values are marked with `NA`.
     *   {{{
-    *     import java.io.File
+    *     import io.picnicml.doddlemodel.data.CsvLoader.loadCsvDataset
     *
-    *     val file = new File("/datasets/dummy_csv_reading.csv")
     *     // specify a value to interpret as N/A data
-    *     val (data, featureInfo) = loadCsvDataset(file, "NA")
+    *     val (data, featureInfo) = loadCsvDataset("src/main/resources/datasets/dummy_csv_reading.csv", "NA")
     *   }}}
-    * */
+    */
   def loadCsvDataset(datasetFilePath: String, na: String = "NA"): FeaturesWithIndex =
     loadCsvDataset(Source.fromFile(datasetFilePath), na)
 
diff --git a/src/main/scala/io/picnicml/doddlemodel/data/DatasetUtils.scala b/src/main/scala/io/picnicml/doddlemodel/data/DatasetUtils.scala
index 44e86b0c..a79b5c96 100644
--- a/src/main/scala/io/picnicml/doddlemodel/data/DatasetUtils.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/data/DatasetUtils.scala
@@ -13,17 +13,19 @@ object DatasetUtils {
     * @example Shuffle a dataset randomly.
     *   {{{
     *     import scala.util.Random
+    *     import io.picnicml.doddlemodel.data.CsvLoader.loadCsvDataset
+    *     import io.picnicml.doddlemodel.data.DatasetUtils.shuffleDataset
     *
-    *     // we are assuming data was previously loaded
-    *     val (dataX, dataY) = ...
+    *     val (data, featureInfo) = loadCsvDataset("src/main/resources/datasets/iris.csv")
+    *     val (dataX, dataY) = (data(::, 0 to 3), data(::, -1))
     *
     *     val (shuffledX, shuffledY) = shuffleDataset(dataX, dataY)
     *
-    *     // seeded shuffle - seed passed to shuffler implicitly
+    *     // seeded shuffle - seed is passed to shuffler implicitly
     *     implicit val rand: Random = new Random(42)
     *     val (shuffledX, shuffledY) = shuffleDataset(dataX, dataY)
     *   }}}
-    * */
+    */
   def shuffleDataset(x: Features, y: Target)(implicit rand: Random = new Random()): Dataset = {
     val shuffleIndices = rand.shuffle((0 until y.length).toIndexedSeq)
     (x(shuffleIndices, ::).toDenseMatrix, y(shuffleIndices).toDenseVector)
@@ -36,17 +38,39 @@ object DatasetUtils {
     *
     * @example Split dataset into training and test set.
     *   {{{
-    *     // we are assuming data was previously loaded
-    *     val (dataX, dataY) = ...
+    *     import io.picnicml.doddlemodel.data.CsvLoader.loadCsvDataset
+    *     import io.picnicml.doddlemodel.data.DatasetUtils.splitDataset
+    *
+    *     val (data, featureInfo) = loadCsvDataset("src/main/resources/datasets/iris.csv")
+    *     val (dataX, dataY) = (data(::, 0 to 3), data(::, -1))
     *
     *     // by default, the split is 50%:50%
     *     val trTeSplit = splitDataset(dataX, dataY)
     *
     *     // put 80% of data into training set and 20% into test set
-    *     val trTeSplit = splitDataset(dataX, dataY, 0.8)
+    *     val trTeSplit = splitDataset(dataX, dataY, 0.8f)
     *     val (trainX, trainY, testX, testY) = (trTeSplit.xTr, trTeSplit.yTr, trTeSplit.xTe, trTeSplit.yTe)
     *   }}}
-    * */
+    *
+    *   @example Split dataset into training, validation and test set in the ratio 60%: 10%: 30%. This is done by
+    *            performing two train-test splits in a row. First we split the dataset in ratio (60% + 10%): 30%,
+    *            obtaining combined training and validation set and the test set. Then we split the first part using
+    *            the ratio (60% / 70%): (10% / 70%) to obtain the training and validation set.
+    *     {{{
+    *       import io.picnicml.doddlemodel.data.CsvLoader.loadCsvDataset
+    *       import io.picnicml.doddlemodel.data.DatasetUtils.splitDataset
+    *
+    *       val (data, featureInfo) = loadCsvDataset("src/main/resources/datasets/iris.csv")
+    *       val (dataX, dataY) = (data(::, 0 to 3), data(::, -1))
+    *
+    *       val teVsNonTeSplit = splitDataset(dataX, dataY, 0.7f)
+    *       val (trValX, trValY, testX, testY) = (teVsNonTeSplit.xTr, teVsNonTeSplit.yTr,
+    *                                             teVsNonTeSplit.xTe, teVsNonTeSplit.yTe)
+    *
+    *       val trValSplit = splitDataset(trValX, trValY, (0.6f / 0.7f))
+    *       val (trainX, trainY, valX, valY) = (trValSplit.xTr, trValSplit.yTr, trValSplit.xTe, trValSplit.yTe)
+    *     }}}
+    */
   def splitDataset(x: Features, y: Target, proportionTrain: Float = 0.5f): TrainTestSplit = {
     val numTrain = numberOfTrainExamplesBasedOnProportion(x.rows, proportionTrain)
     val trIndices = 0 until numTrain
diff --git a/src/main/scala/io/picnicml/doddlemodel/data/Feature.scala b/src/main/scala/io/picnicml/doddlemodel/data/Feature.scala
index 09b52bea..73d75da0 100644
--- a/src/main/scala/io/picnicml/doddlemodel/data/Feature.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/data/Feature.scala
@@ -40,30 +40,48 @@ object Feature {
       *
       * @example Create feature index based on features "f1" and "f3" from a constructed feature index.
       *   {{{
+      *     import io.picnicml.doddlemodel.data.Feature.{FeatureIndex, NumericalFeature}
+      *
       *     val featureIndex = FeatureIndex(List("f1", "f2", "f3"), List(NumericalFeature, NumericalFeature,
       *       NumericalFeature), List(0, 1, 2))
       *     val subIndex = featureIndex.subset("f1", "f3")
       *   }}}
       *
-      * */
+      */
     def subset(names: String*): FeatureIndex = {
       val nameToIndex = this.names.zipWithIndex.toMap
       subset(names.map(n => nameToIndex(n)):_*)
     }
 
     /** Create a feature index with subset of features, provided by feature indices.
+      * @param indices column indices for subset of features to be selected
+      *
+      * @example Create feature index based on second and third (i.e. indices 1, 2) features from a constructed
+      *          feature index.
+      *   {{{
+      *     import io.picnicml.doddlemodel.data.Feature.{FeatureIndex, NumericalFeature}
+      *
+      *     val featureIndex = FeatureIndex(List("f1", "f2", "f3"), List(NumericalFeature, NumericalFeature,
+      *       NumericalFeature), List(0, 1, 2))
+      *     val subIndex = featureIndex.subset(1 to 2)
+      *   }}}
+      */
+    def subset(indices: IndexedSeq[Int]): FeatureIndex = subset(indices:_*)
+
+    /** Create a feature index with subset of features, provided by feature indices. Alternative interface to do same
+      * as with `FeatureIndex.subset(indices: IndexedSeq[Int])`.
       * @param indices column indices for subset of features to be selected
       *
       * @example Create feature index based on second and third (i.e. indices 1, 2) features from a constructed
       *          feature index.
       * {{{
+      *   import io.picnicml.doddlemodel.data.Feature.{FeatureIndex, NumericalFeature}
+      *
       *   val featureIndex = FeatureIndex(List("f1", "f2", "f3"), List(NumericalFeature, NumericalFeature,
       *     NumericalFeature), List(0, 1, 2))
-      *   val subIndex = featureIndex.subset(1 to 2)
+      *   val subIndex = featureIndex.subset(1, 2)
       * }}}
-      * */
-    def subset(indices: IndexedSeq[Int]): FeatureIndex = subset(indices:_*)
-
+      */
     // DummyImplicit is needed to avoid the same type as String* after erasure
     def subset(indices: Int*)(implicit di: DummyImplicit): FeatureIndex = new FeatureIndex(
       indices.toIndexedSeq.map(i => this.names(i)),
@@ -71,7 +89,17 @@ object Feature {
       indices.toIndexedSeq.map(i => this.columnIndices(i))
     )
 
-    /** Create a feature index by dropping a feature by column index. */
+    /** Create a feature index by dropping a feature by column index.
+      * @param index index of column to be dropped
+      * @example Drop the third (index 2) feature from a feature index.
+      *   {{{
+      *     import io.picnicml.doddlemodel.data.Feature.{FeatureIndex, NumericalFeature}
+      *
+      *     val featureIndex = FeatureIndex(List("f1", "f2", "f3"), List(NumericalFeature, NumericalFeature,
+      *       NumericalFeature), List(0, 1, 2))
+      *     val subIndex = featureIndex.drop(2)
+      *   }}}
+      */
     def drop(index: Int): FeatureIndex = new FeatureIndex(
       this.names.zipWithIndex.flatMap { case (n, i) => if (i != index) n.some else none[String] },
       this.types.zipWithIndex.flatMap { case (t, i) => if (i != index) t.some else none[FeatureType] },
@@ -88,7 +116,9 @@ object Feature {
     * because some methods are only applicable to a certain type of features, e.g. [0, 1] scaling
     * only makes sense for numerical features. */
   object FeatureIndex {
-    /** Construct feature index with `n` categorical features. Feature names are generated automatically.
+
+    /** Construct feature index with `n` categorical features. Feature names are generated automatically - `i`th
+      * feature gets assigned the name "f`i`" (using 0-based counting).
       * @param n number of categorical features in feature index
       */
     def categorical(n: Int): FeatureIndex =
@@ -97,7 +127,8 @@ object Feature {
     def categorical(columnIndices: List[Int]): FeatureIndex =
       apply(columnIndices.indices.map(i => s"f$i").toList, columnIndices.map(_ => CategoricalFeature), columnIndices)
 
-    /** Construct feature index with `n` numerical features. Feature names are generated automatically.
+    /** Construct feature index with `n` numerical features. Feature names are generated automatically - `i`th
+      * feature gets assigned the name "f`i`" (using 0-based counting).
       * @param n number of numerical features in feature index
       */
     def numerical(n: Int): FeatureIndex =
@@ -106,11 +137,13 @@ object Feature {
     def numerical(columnIndices: List[Int]): FeatureIndex =
       apply(columnIndices.indices.map(i => s"f$i").toList, columnIndices.map(_ => NumericalFeature), columnIndices)
 
-    /** Construct feature index from feature types. Feature names are generated automatically.
+    /** Construct feature index from feature types. Feature names are generated automatically - `i`th
+      * feature gets assigned the name "f`i`" (using 0-based counting).
       * @param types list of feature types
       *
       * @example Construct a feature index with one numerical and two categorical features.
       *   {{{
+      *     import io.picnicml.doddlemodel.data.Feature.{FeatureIndex, NumericalFeature, CategoricalFeature}
       *     val featureIndex = FeatureIndex(List(CategoricalFeature, NumericalFeature, CategoricalFeature))
       *   }}}
       */
@@ -128,6 +161,7 @@ object Feature {
       * @example Construct a feature index with three features, named "age" (numerical), "height" (numerical)
       *          and "group" (categorical).
       *   {{{
+      *     import io.picnicml.doddlemodel.data.Feature.{FeatureIndex, NumericalFeature, CategoricalFeature}
       *     val featureIndex = FeatureIndex(List("age", "height", "group"), List(NumericalFeature,
       *       NumericalFeature, CategoricalFeature), List(0, 1, 2))
       *   }}}
diff --git a/src/main/scala/io/picnicml/doddlemodel/data/package.scala b/src/main/scala/io/picnicml/doddlemodel/data/package.scala
index f2ea2ced..96954fa8 100644
--- a/src/main/scala/io/picnicml/doddlemodel/data/package.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/data/package.scala
@@ -20,17 +20,19 @@ package object data {
 
   /** Loads and returns the Boston Housing prices dataset. */
   def loadBostonDataset: DatasetWithIndex = ResourceDatasetLoaders.loadBostonDataset
+
   /** Loads and returns the Breast cancer Wisconsin (diagnostic) dataset.
     *
     * @see <a href="https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29" target="_blank">
     *        Breast cancer dataset on UCI Machine Learning Repository </a>
-    * */
+    */
   def loadBreastCancerDataset: DatasetWithIndex = ResourceDatasetLoaders.loadBreastCancerDataset
+
   /** Loads and returns the Iris dataset.
     *
     * @see <a href="https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29" target="_blank">
     *        Iris dataset on UCI Machine Learning Repository </a>
-    * */
+    */
   def loadIrisDataset: DatasetWithIndex = ResourceDatasetLoaders.loadIrisDataset
 
   /** Loads and returns an artificial dataset with a Poisson target variable. */

From 1df2df1aa89c1ec7df382087beb1d18ce3370945 Mon Sep 17 00:00:00 2001
From: Matej Klemen <matej.klemen1337@gmail.com>
Date: Mon, 14 Oct 2019 17:18:21 +0200
Subject: [PATCH 11/15] Verify and improve docs for  package

---
 .../classification/MostFrequentClassifier.scala | 16 +++++++++-------
 .../classification/StratifiedClassifier.scala   | 16 +++++++++-------
 .../classification/UniformClassifier.scala      | 16 +++++++++-------
 .../dummy/regression/MeanRegressor.scala        | 17 ++++++++++-------
 .../dummy/regression/MedianRegressor.scala      | 16 +++++++++-------
 5 files changed, 46 insertions(+), 35 deletions(-)

diff --git a/src/main/scala/io/picnicml/doddlemodel/dummy/classification/MostFrequentClassifier.scala b/src/main/scala/io/picnicml/doddlemodel/dummy/classification/MostFrequentClassifier.scala
index c00cb079..55489b48 100644
--- a/src/main/scala/io/picnicml/doddlemodel/dummy/classification/MostFrequentClassifier.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/dummy/classification/MostFrequentClassifier.scala
@@ -8,15 +8,17 @@ import io.picnicml.doddlemodel.typeclasses.Classifier
 
 case class MostFrequentClassifier private (numClasses: Option[Int], mostFrequentClass: Option[Float])
 
-/** An immutable dummy classifier that always predicts the most frequent label.
-  *
-  * @example
-  * {{{
-  *   val model = MostFrequentClassifier()
-  * }}}
-  */
+/** An immutable dummy classifier that always predicts the most frequent label. */
 object MostFrequentClassifier {
 
+  /** Create a majority classifier.
+    *
+    * @example
+    *   {{{
+    *     import io.picnicml.doddlemodel.dummy.classification.MostFrequentClassifier
+    *     val model = MostFrequentClassifier()
+    *   }}}
+    */
   def apply(): MostFrequentClassifier = MostFrequentClassifier(none, none)
 
   @SerialVersionUID(0L)
diff --git a/src/main/scala/io/picnicml/doddlemodel/dummy/classification/StratifiedClassifier.scala b/src/main/scala/io/picnicml/doddlemodel/dummy/classification/StratifiedClassifier.scala
index 0f79bfb6..31e05cee 100644
--- a/src/main/scala/io/picnicml/doddlemodel/dummy/classification/StratifiedClassifier.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/dummy/classification/StratifiedClassifier.scala
@@ -18,15 +18,17 @@ case class StratifiedClassifier private (numClasses: Option[Int],
   }
 }
 
-/** An immutable dummy classifier that samples predictions from a stratified categorical distribution.
-  *
-  * @example
-  *   {{{
-  *     val model = StratifiedClassifier()
-  *   }}}
-  */
+/** An immutable dummy classifier that samples predictions from a stratified categorical distribution. */
 object StratifiedClassifier {
 
+  /** Create a stratified classifier.
+    *
+    * @example
+    *   {{{
+    *     import io.picnicml.doddlemodel.dummy.classification.StratifiedClassifier
+    *     val model = StratifiedClassifier()
+    *   }}}
+    */
   def apply(): StratifiedClassifier = StratifiedClassifier(none, none)
 
   @SerialVersionUID(0L)
diff --git a/src/main/scala/io/picnicml/doddlemodel/dummy/classification/UniformClassifier.scala b/src/main/scala/io/picnicml/doddlemodel/dummy/classification/UniformClassifier.scala
index 379549a9..8e82d517 100644
--- a/src/main/scala/io/picnicml/doddlemodel/dummy/classification/UniformClassifier.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/dummy/classification/UniformClassifier.scala
@@ -8,15 +8,17 @@ import io.picnicml.doddlemodel.typeclasses.Classifier
 
 case class UniformClassifier private (numClasses: Option[Int])
 
-/** An immutable dummy classifier that samples predictions from a uniform categorical distribution.
-  *
-  * @example
-  *   {{{
-  *     val model = UniformClassifier()
-  *   }}}
-  */
+/** An immutable dummy classifier that samples predictions from a uniform categorical distribution. */
 object UniformClassifier {
 
+  /** Create a uniform classifier.
+    *
+    * @example
+    *   {{{
+    *     import io.picnicml.doddlemodel.dummy.classification.UniformClassifier
+    *     val model = UniformClassifier()
+    *   }}}
+    */
   def apply(): UniformClassifier = UniformClassifier(none)
 
   @SerialVersionUID(0L)
diff --git a/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MeanRegressor.scala b/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MeanRegressor.scala
index 83785559..581c02eb 100644
--- a/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MeanRegressor.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MeanRegressor.scala
@@ -6,17 +6,20 @@ import cats.syntax.option._
 import io.picnicml.doddlemodel.data.{Features, Target}
 import io.picnicml.doddlemodel.typeclasses.Regressor
 
-/** An immutable dummy regressor that always predicts the sample mean.
-  *
-  * @example
-  *   {{{
-  *     val model = MeanRegressor()
-  *   }}}
-  */
+
 case class MeanRegressor private (mean: Option[Float])
 
+/** An immutable dummy regressor that always predicts the sample mean. */
 object MeanRegressor {
 
+  /** Create a mean regressor.
+    *
+    * @example
+    * {{{
+    *   import io.picnicml.doddlemodel.dummy.regression.MeanRegressor
+    *   val model = MeanRegressor()
+    * }}}
+    */
   def apply(): MeanRegressor = MeanRegressor(none)
 
   @SerialVersionUID(0L)
diff --git a/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MedianRegressor.scala b/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MedianRegressor.scala
index ea8356e7..04145d71 100644
--- a/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MedianRegressor.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/dummy/regression/MedianRegressor.scala
@@ -6,17 +6,19 @@ import cats.syntax.option._
 import io.picnicml.doddlemodel.data.{Features, Target}
 import io.picnicml.doddlemodel.typeclasses.Regressor
 
-/** An immutable dummy regressor that always predicts the sample median.
-  *
-  * @example
-  *   {{{
-  *     val model = MedianRegressor()
-  *   }}}
-  */
 case class MedianRegressor private (median: Option[Float])
 
+/** An immutable dummy regressor that always predicts the sample median. */
 object MedianRegressor {
 
+  /** Create a median regressor.
+    *
+    * @example
+    * {{{
+    *   import io.picnicml.doddlemodel.dummy.regression.MedianRegressor
+    *   val model = MedianRegressor()
+    * }}}
+    */
   def apply(): MedianRegressor = MedianRegressor(none)
 
   @SerialVersionUID(0L)

From c4a8b57b9d398ceec592d7e401f7f557d2ca040d Mon Sep 17 00:00:00 2001
From: Matej Klemen <matej.klemen1337@gmail.com>
Date: Tue, 15 Oct 2019 10:09:05 +0200
Subject: [PATCH 12/15] Verify and improve docs for `impute`

---
 .../doddlemodel/impute/MeanValueImputer.scala | 28 ++++++++++++-----
 .../impute/MostFrequentValueImputer.scala     | 31 +++++++++++++------
 2 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/src/main/scala/io/picnicml/doddlemodel/impute/MeanValueImputer.scala b/src/main/scala/io/picnicml/doddlemodel/impute/MeanValueImputer.scala
index 3b414310..2fcc5278 100644
--- a/src/main/scala/io/picnicml/doddlemodel/impute/MeanValueImputer.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/impute/MeanValueImputer.scala
@@ -11,26 +11,38 @@ import io.picnicml.doddlemodel.typeclasses.Transformer
 case class MeanValueImputer private (private[impute] val means: Option[RealVector],
                                      private val featureIndex: FeatureIndex)
 
-/** An immutable simple imputer that replaces all NaN values with column means. */
+/** An immutable simple imputer that replaces numerical NaN values with column means. Categorical values are left
+  * untouched. */
 object MeanValueImputer {
 
   /** Create an imputer based on a feature index.
     *
-    * @param featureIndex feature index associated with features, this is needed so that only numerical features
+    * @param featureIndex feature index associated with features - this is needed so that only numerical features
     *                     are transformed by this preprocessor, could be a subset of columns to be transformed
     *
     * @example Impute values for all (numerical) features.
     *   {{{
-    *     val featureIndex = FeatureIndex(List(NumericalFeature, CategoricalFeature, NumericalFeature,
-    *       NumericalFeature))
-    *     val imputer = MeanValueImputer(featureIndex)
+    *     import io.picnicml.doddlemodel.data.CsvLoader.loadCsvDataset
+    *     import io.picnicml.doddlemodel.impute.MeanValueImputer
+    *     import io.picnicml.doddlemodel.syntax.TransformerSyntax._
+    *
+    *     val (data, featureInfo) = loadCsvDataset("src/main/resources/datasets/dummy_csv_reading.csv", "NA")
+    *     val imputer = MeanValueImputer(featureInfo)
+    *     val fittedImputer = imputer.fit(data)
+    *     // Note: only fourth (index 3) column gets imputed as it's the only numerical column with NAs
+    *     fittedImputer.transform(data)
     *   }}}
     *
     * @example Impute values for a subset of features.
     *   {{{
-    *     val featureIndex = FeatureIndex(List("f0", "f1", "f2"), List(NumericalFeature, NumericalFeature,
-    *       NumericalFeature), List(0, 1, 2))
-    *     val imputerSubsetOfColumns = MeanValueImputer(featureIndex.subset("f0", "f2"))
+    *     import io.picnicml.doddlemodel.data.CsvLoader.loadCsvDataset
+    *     import io.picnicml.doddlemodel.impute.MeanValueImputer
+    *     import io.picnicml.doddlemodel.syntax.TransformerSyntax._
+    *
+    *     val (data, featureInfo) = loadCsvDataset("src/main/resources/datasets/dummy_csv_reading.csv", "NA")
+    *     val imputerSubset = MeanValueImputer(featureInfo.subset("f3"))
+    *     val fittedImputer = imputerSubset.fit(data)
+    *     fittedImputer.transform(data)
     *   }}}
     */
   def apply(featureIndex: FeatureIndex): MeanValueImputer = MeanValueImputer(none, featureIndex)
diff --git a/src/main/scala/io/picnicml/doddlemodel/impute/MostFrequentValueImputer.scala b/src/main/scala/io/picnicml/doddlemodel/impute/MostFrequentValueImputer.scala
index 2d912cb2..05a71f8c 100644
--- a/src/main/scala/io/picnicml/doddlemodel/impute/MostFrequentValueImputer.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/impute/MostFrequentValueImputer.scala
@@ -9,26 +9,39 @@ import io.picnicml.doddlemodel.typeclasses.Transformer
 
 case class MostFrequentValueImputer private (private[impute] val mostFrequent: Option[RealVector],
                                              private val featureIndex: FeatureIndex)
-/** An immutable simple imputer that replaces all NaN values with most frequent value of a corresponding column. */
+
+/** An immutable simple imputer that replaces categorical NaN values with most frequent value of the corresponding
+  * column. Numerical values are left untouched. */
 object MostFrequentValueImputer {
 
   /** Create an imputer based on a feature index.
     *
-    * @param featureIndex feature index associated with features, this is needed so that only categorical features
+    * @param featureIndex feature index associated with features - this is needed so that only categorical features
     *                     are transformed by this preprocessor, could be a subset of columns to be transformed
     *
-    * @example Impute values for all (numerical) features.
+    * @example Impute values for all (categorical) features.
     *   {{{
-    *     val featureIndex = FeatureIndex(List(NumericalFeature, CategoricalFeature, NumericalFeature,
-    *       NumericalFeature))
-    *     val imputer = MostFrequentValueImputer(featureIndex)
+    *     import io.picnicml.doddlemodel.data.CsvLoader.loadCsvDataset
+    *     import io.picnicml.doddlemodel.impute.MostFrequentValueImputer
+    *     import io.picnicml.doddlemodel.syntax.TransformerSyntax._
+    *
+    *     val (data, featureInfo) = loadCsvDataset("src/main/resources/datasets/dummy_csv_reading.csv", "NA")
+    *     val imputer = MostFrequentValueImputer(featureInfo)
+    *     val fittedImputer = imputer.fit(data)
+    *     // Note: only second (index 1) column gets imputed as it's the only categorical column with NAs
+    *     fittedImputer.transform(data)
     *   }}}
     *
     * @example Impute values for a subset of features.
     *   {{{
-    *     val featureIndex = FeatureIndex(List("f0", "f1", "f2"), List(NumericalFeature, NumericalFeature,
-    *       NumericalFeature), List(0, 1, 2))
-    *     val imputerSubsetOfColumns = MostFrequentValueImputer(featureIndex.subset("f0", "f2"))
+    *     import io.picnicml.doddlemodel.data.CsvLoader.loadCsvDataset
+    *     import io.picnicml.doddlemodel.impute.MostFrequentValueImputer
+    *     import io.picnicml.doddlemodel.syntax.TransformerSyntax._
+    *
+    *     val (data, featureInfo) = loadCsvDataset("src/main/resources/datasets/dummy_csv_reading.csv", "NA")
+    *     val imputerSubset = MostFrequentValueImputer(featureInfo.subset("f1"))
+    *     val fittedImputer = imputerSubset.fit(data)
+    *     fittedImputer.transform(data)
     *   }}}
     */
   def apply(featureIndex: FeatureIndex): MostFrequentValueImputer =

From 8406cb933484c08c9be6fa64bb2c984f9663efb1 Mon Sep 17 00:00:00 2001
From: Matej Klemen <matej.klemen1337@gmail.com>
Date: Tue, 15 Oct 2019 11:33:25 +0200
Subject: [PATCH 13/15] Verify and improve docs for `linear`

---
 .../doddlemodel/linear/LinearRegression.scala  | 14 ++++++++------
 .../linear/LogisticRegression.scala            | 18 ++++++++++--------
 .../doddlemodel/linear/PoissonRegression.scala | 14 ++++++++------
 .../doddlemodel/linear/SoftmaxClassifier.scala | 14 ++++++++------
 4 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/src/main/scala/io/picnicml/doddlemodel/linear/LinearRegression.scala b/src/main/scala/io/picnicml/doddlemodel/linear/LinearRegression.scala
index 34ede7cf..e5c1b92f 100644
--- a/src/main/scala/io/picnicml/doddlemodel/linear/LinearRegression.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/linear/LinearRegression.scala
@@ -13,16 +13,18 @@ object LinearRegression {
 
   /** Create a regularized linear regression model.
     *
-    * @param lambda L2 regularization strength, must be non-negative, 0.0 means no regularization
+    * @param lambda L2 regularization strength - must be non-negative, 0.0 means no regularization
     *
-    * @example Create and fit a regularized linear regression model with lambda = 1.5.
+    * @example Create and fit a regularized linear regression model with lambda 1.5.
     *   {{{
-    *     import io.picnicml.doddlemodel.linear.LinearRegression.ev
+    *     import breeze.linalg.{DenseMatrix, DenseVector}
+    *     import io.picnicml.doddlemodel.linear.LinearRegression
+    *     import io.picnicml.doddlemodel.syntax.RegressorSyntax._
     *
-    *     val X: Features = DenseMatrix(List(1.0, 2.0), List(3.0, 4.0))
-    *     val y: Target = DenseVector(-3.0, 2.0)
+    *     val X = DenseMatrix(List(1.0f, 2.0f), List(3.0f, 4.0f))
+    *     val y = DenseVector(-3.0f, 2.0f)
     *     val model = LinearRegression(lambda = 1.5f)
-    *     val fittedModel = ev.fit(model, X, y)
+    *     val fittedModel = model.fit(X, y)
     *   }}}
     */
   def apply(lambda: Float = 0.0f): LinearRegression = {
diff --git a/src/main/scala/io/picnicml/doddlemodel/linear/LogisticRegression.scala b/src/main/scala/io/picnicml/doddlemodel/linear/LogisticRegression.scala
index 3a30860b..5d32f17b 100644
--- a/src/main/scala/io/picnicml/doddlemodel/linear/LogisticRegression.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/linear/LogisticRegression.scala
@@ -15,17 +15,19 @@ object LogisticRegression {
 
   /** Create a regularized logistic regression model.
     *
-    * @param lambda L2 regularization strength, must be non-negative, 0.0 means no regularization
+    * @param lambda L2 regularization strength - must be non-negative, 0.0 means no regularization
     *
-    * @example Create and fit a logistic regression model with lambda = 1.5.
-    * {{{
-    *     import io.picnicml.doddlemodel.linear.LogisticRegression.ev
+    * @example Create and fit a logistic regression model with lambda 1.5.
+    *   {{{
+    *     import breeze.linalg.{DenseMatrix, DenseVector}
+    *     import io.picnicml.doddlemodel.linear.LogisticRegression
+    *     import io.picnicml.doddlemodel.syntax.ClassifierSyntax._
     *
-    *     val X: Features = DenseMatrix(List(1.0, 2.0), List(3.0, 4.0))
-    *     val y: Target = DenseVector(0.0, 1.0)
+    *     val X = DenseMatrix(List(1.0f, 2.0f), List(3.0f, 4.0f))
+    *     val y = DenseVector(0.0f, 1.0f)
     *     val model = LogisticRegression(lambda = 1.5f)
-    *     val fittedModel = ev.fit(model, X, y)
-    * }}}
+    *     val fittedModel = model.fit(X, y)
+    *   }}}
     */
   def apply(lambda: Float = 0.0f): LogisticRegression = {
     require(lambda >= 0.0f, "L2 regularization strength must be non-negative")
diff --git a/src/main/scala/io/picnicml/doddlemodel/linear/PoissonRegression.scala b/src/main/scala/io/picnicml/doddlemodel/linear/PoissonRegression.scala
index 9e8b2237..8df43dbc 100644
--- a/src/main/scala/io/picnicml/doddlemodel/linear/PoissonRegression.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/linear/PoissonRegression.scala
@@ -15,16 +15,18 @@ object PoissonRegression {
 
   /** Create a regularized Poisson regression model.
     *
-    * @param lambda L2 regularization strength, must be non-negative, 0.0 means no regularization
+    * @param lambda L2 regularization strength - must be non-negative, 0.0 means no regularization
     *
-    * @example Create and fit a regularized Poisson regression model with lambda = 1.5.
+    * @example Create and fit a regularized Poisson regression model with lambda 1.5.
     *   {{{
-    *     import io.picnicml.doddlemodel.linear.PoissonRegression.ev
+    *     import breeze.linalg.{DenseMatrix, DenseVector}
+    *     import io.picnicml.doddlemodel.syntax.RegressorSyntax._
+    *     import io.picnicml.doddlemodel.linear.PoissonRegression
     *
-    *     val X: Features = DenseMatrix(List(1.0, 2.0), List(3.0, 4.0))
-    *     val y: Target = DenseVector(-3.0, 2.0)
+    *     val X = DenseMatrix(List(1.0f, 2.0f), List(3.0f, 4.0f))
+    *     val y = DenseVector(-3.0f, 2.0f)
     *     val model = PoissonRegression(lambda = 1.5f)
-    *     val fittedModel = ev.fit(model, X, y)
+    *     val fittedModel = model.fit(X, y)
     *   }}}
     */
   def apply(lambda: Float = 0.0f): PoissonRegression = {
diff --git a/src/main/scala/io/picnicml/doddlemodel/linear/SoftmaxClassifier.scala b/src/main/scala/io/picnicml/doddlemodel/linear/SoftmaxClassifier.scala
index e1e0bd43..5193ae36 100644
--- a/src/main/scala/io/picnicml/doddlemodel/linear/SoftmaxClassifier.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/linear/SoftmaxClassifier.scala
@@ -16,16 +16,18 @@ object SoftmaxClassifier {
 
   /** Create a regularized softmax model.
     *
-    * @param lambda L2 regularization strength, must be non-negative, 0.0 means no regularization
+    * @param lambda L2 regularization strength - must be non-negative, 0.0 means no regularization
     *
-    * @example Create and fit a regularized softmax classifier with lambda = 1.5.
+    * @example Create and fit a regularized softmax classifier with lambda 1.5.
     *   {{{
-    *     import io.picnicml.doddlemodel.linear.SoftmaxClassifier.ev
+    *     import breeze.linalg.{DenseMatrix, DenseVector}
+    *     import io.picnicml.doddlemodel.linear.SoftmaxClassifier
+    *     import io.picnicml.doddlemodel.syntax.ClassifierSyntax._
     *
-    *     val X: Features = DenseMatrix(List(1.0, 2.0), List(3.0, 4.0))
-    *     val y: Target = DenseVector(0.0, 1.0)
+    *     val X = DenseMatrix(List(1.0f, 2.0f), List(3.0f, 4.0f))
+    *     val y = DenseVector(0.0f, 1.0f)
     *     val model = SoftmaxClassifier(lambda = 1.5f)
-    *     val fittedModel = ev.fit(model, X, y)
+    *     val fittedModel = model.fit(X, y)
     *   }}}
     */
   def apply(lambda: Float = 0.0f): SoftmaxClassifier = {

From 9df160c920e097b30348d9d059ab9e2f64ffa11e Mon Sep 17 00:00:00 2001
From: Matej Klemen <matej.klemen1337@gmail.com>
Date: Tue, 15 Oct 2019 11:42:44 +0200
Subject: [PATCH 14/15] Consistency fix for linear docs (document object, not
 case class)

---
 .../scala/io/picnicml/doddlemodel/linear/LinearRegression.scala | 2 +-
 .../io/picnicml/doddlemodel/linear/LogisticRegression.scala     | 2 +-
 .../io/picnicml/doddlemodel/linear/PoissonRegression.scala      | 2 +-
 .../io/picnicml/doddlemodel/linear/SoftmaxClassifier.scala      | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/main/scala/io/picnicml/doddlemodel/linear/LinearRegression.scala b/src/main/scala/io/picnicml/doddlemodel/linear/LinearRegression.scala
index e5c1b92f..5739c4e8 100644
--- a/src/main/scala/io/picnicml/doddlemodel/linear/LinearRegression.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/linear/LinearRegression.scala
@@ -4,11 +4,11 @@ import cats.syntax.option._
 import io.picnicml.doddlemodel.data.{Features, RealVector, Target}
 import io.picnicml.doddlemodel.linear.typeclasses.LinearRegressor
 
-/** An immutable multiple linear regression model with ridge regularization. */
 case class LinearRegression private (lambda: Float, private val w: Option[RealVector]) {
   private var yPredCache: Target = _
 }
 
+/** An immutable multiple linear regression model with ridge regularization. */
 object LinearRegression {
 
   /** Create a regularized linear regression model.
diff --git a/src/main/scala/io/picnicml/doddlemodel/linear/LogisticRegression.scala b/src/main/scala/io/picnicml/doddlemodel/linear/LogisticRegression.scala
index 5d32f17b..efc3aa24 100644
--- a/src/main/scala/io/picnicml/doddlemodel/linear/LogisticRegression.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/linear/LogisticRegression.scala
@@ -6,11 +6,11 @@ import cats.syntax.option._
 import io.picnicml.doddlemodel.data.{Features, RealVector, Simplex, Target}
 import io.picnicml.doddlemodel.linear.typeclasses.LinearClassifier
 
-/** An immutable multiple logistic regression model with ridge regularization. */
 case class LogisticRegression private (lambda: Float, numClasses: Option[Int], private val w: Option[RealVector]) {
   private var yPredProbaCache: RealVector = _
 }
 
+/** An immutable multiple logistic regression model with ridge regularization. */
 object LogisticRegression {
 
   /** Create a regularized logistic regression model.
diff --git a/src/main/scala/io/picnicml/doddlemodel/linear/PoissonRegression.scala b/src/main/scala/io/picnicml/doddlemodel/linear/PoissonRegression.scala
index 8df43dbc..1cf50476 100644
--- a/src/main/scala/io/picnicml/doddlemodel/linear/PoissonRegression.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/linear/PoissonRegression.scala
@@ -6,11 +6,11 @@ import cats.syntax.option._
 import io.picnicml.doddlemodel.data.{Features, RealVector, Target}
 import io.picnicml.doddlemodel.linear.typeclasses.LinearRegressor
 
-/** An immutable multiple Poisson regression model with ridge regularization. */
 case class PoissonRegression private (lambda: Float, private val w: Option[RealVector]) {
   private var yPredMeanCache: Target = _
 }
 
+/** An immutable multiple Poisson regression model with ridge regularization. */
 object PoissonRegression {
 
   /** Create a regularized Poisson regression model.
diff --git a/src/main/scala/io/picnicml/doddlemodel/linear/SoftmaxClassifier.scala b/src/main/scala/io/picnicml/doddlemodel/linear/SoftmaxClassifier.scala
index 5193ae36..ae0871d4 100644
--- a/src/main/scala/io/picnicml/doddlemodel/linear/SoftmaxClassifier.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/linear/SoftmaxClassifier.scala
@@ -7,11 +7,11 @@ import io.picnicml.doddlemodel.data.{Features, RealVector, Simplex, Target}
 import io.picnicml.doddlemodel.linear.typeclasses.LinearClassifier
 import io.picnicml.doddlemodel.syntax.OptionSyntax._
 
-/** An immutable multiple multinomial regression model with ridge regularization. */
 case class SoftmaxClassifier private (lambda: Float, numClasses: Option[Int], private val w: Option[RealVector]) {
   private var yPredProbaCache: Simplex = _
 }
 
+/** An immutable multiple multinomial regression model with ridge regularization. */
 object SoftmaxClassifier {
 
   /** Create a regularized softmax model.

From ea6454913408ad83ddcabb1eebd433e9eeb9e625 Mon Sep 17 00:00:00 2001
From: Matej Klemen <matej.klemen1337@gmail.com>
Date: Tue, 15 Oct 2019 13:44:52 +0200
Subject: [PATCH 15/15] Verify and improve docs for `modelselection`

---
 .../modelselection/CrossValidation.scala      | 51 ++++++++++++++-----
 .../modelselection/GroupKFoldSplitter.scala   |  7 ++-
 .../modelselection/HyperparameterSearch.scala | 23 ++-------
 .../modelselection/KFoldSplitter.scala        |  7 ++-
 4 files changed, 53 insertions(+), 35 deletions(-)

diff --git a/src/main/scala/io/picnicml/doddlemodel/modelselection/CrossValidation.scala b/src/main/scala/io/picnicml/doddlemodel/modelselection/CrossValidation.scala
index c3798426..5d631f23 100644
--- a/src/main/scala/io/picnicml/doddlemodel/modelselection/CrossValidation.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/modelselection/CrossValidation.scala
@@ -15,10 +15,33 @@ class CrossValidation private (val metric: Metric, val dataSplitter: DataSplitte
 
   /** Obtain the average score of all folds.
     *
-    * @param reusable indicates whether to shutdown the thread pool after the cv score is computed
-    *  and by default it is, if the same CrossValidation instance is needed after the first call
-    *  to score(...), bring implicit CrossValReusable(true) to scope and call CrossValidation.shutdownNow()
-    *  after the instance is not needed anymore
+    * @param reusable indicates whether to shutdown the thread pool after the cv score is computed.
+    *  By default it is shutdown.
+    *
+    *  @note If the same `CrossValidation` instance is needed after the first call to `score(...)`, bring an implicit
+    *        `CrossValReusable(true)` to scope and call `CrossValidation.shutdownNow()` after the instance is not
+    *        needed anymore.
+    *
+    *  @example Reuse a `CrossValidation` instance.
+    *   {{{
+    *     import breeze.linalg.{DenseMatrix, DenseVector}
+    *     import io.picnicml.doddlemodel.metrics.rmse
+    *     import io.picnicml.doddlemodel.linear.LogisticRegression
+    *     import io.picnicml.doddlemodel.modelselection.{CrossValidation, KFoldSplitter}
+    *     import io.picnicml.doddlemodel.modelselection.CrossValReusable
+    *
+    *     implicit val cvReusable = CrossValReusable(true)
+    *     val X = DenseMatrix(List(1.0f, 2.0f), List(3.0f, 4.0f), List(5.0f, 6.0f), List(7.0f, 8.0f))
+    *     val y = DenseVector(0.0f, 1.0f, 0.0f, 1.0f)
+    *     val model = LogisticRegression(1.0f)
+    *
+    *     val splitter = KFoldSplitter(numFolds = 2)
+    *     val cv = CrossValidation(metric = rmse, dataSplitter = splitter)
+    *     cv.score(model, X, y)
+    *     // would throw a `RejectedExecutionException` if an implicit `CrossValReusable` instance was not defined
+    *     cv.score(model, X, y)
+    *     cv.shutdownNow()
+    *   }}}
     */
   def score[A](model: A, x: Features, y: Target, groups: Option[IntVector] = none)
               (implicit ev: Predictor[A],
@@ -43,7 +66,7 @@ class CrossValidation private (val metric: Metric, val dataSplitter: DataSplitte
 
   /**
     * Shuts down the current thread pool. Call this if the CrossValidation instance is not needed
-    * anymore and CrossValReusable(true) is in scope.
+    * anymore and `CrossValReusable(true)` is in scope.
     */
   def shutdownNow(): Unit = this.ec.shutdownNow()
  }
@@ -58,16 +81,18 @@ object CrossValidation {
     * @example Perform 2-fold cross validation using logistic regression and evaluate its performance
     *          using root mean squared error.
     *   {{{
-    *   import io.picnicml.doddlemodel.metrics.rmse
-    *   import io.picnicml.doddlemodel.linear.LogisticRegression
+    *     import breeze.linalg.{DenseMatrix, DenseVector}
+    *     import io.picnicml.doddlemodel.metrics.rmse
+    *     import io.picnicml.doddlemodel.linear.LogisticRegression
+    *     import io.picnicml.doddlemodel.modelselection.{CrossValidation, KFoldSplitter}
     *
-    *   val X: Features = DenseMatrix(List(1.0, 2.0), List(3.0, 4.0), List(5.0, 6.0), List(7.0, 8.0))
-    *   val y: Target = DenseVector(0.0, 1.0, 0.0, 1.0)
-    *   val model = LogisticRegression(1.0)
+    *     val X = DenseMatrix(List(1.0f, 2.0f), List(3.0f, 4.0f), List(5.0f, 6.0f), List(7.0f, 8.0f))
+    *     val y = DenseVector(0.0f, 1.0f, 0.0f, 1.0f)
+    *     val model = LogisticRegression(1.0f)
     *
-    *   val splitter = KFoldSplitter(numFolds = 2)
-    *   val cv = CrossValidation(metric = rmse, dataSplitter = splitter))
-    *   cv.score(model, X, y)
+    *     val splitter = KFoldSplitter(numFolds = 2)
+    *     val cv = CrossValidation(metric = rmse, dataSplitter = splitter)
+    *     cv.score(model, X, y)
     *   }}}
     *
     * @see [[io.picnicml.doddlemodel.metrics Metrics in doddle-model]]
diff --git a/src/main/scala/io/picnicml/doddlemodel/modelselection/GroupKFoldSplitter.scala b/src/main/scala/io/picnicml/doddlemodel/modelselection/GroupKFoldSplitter.scala
index 0af085fb..427f4451 100644
--- a/src/main/scala/io/picnicml/doddlemodel/modelselection/GroupKFoldSplitter.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/modelselection/GroupKFoldSplitter.scala
@@ -61,8 +61,11 @@ object GroupKFoldSplitter {
     * @example Split 10 examples, corresponding to data of 3 patients into 3 folds, making sure that data of a patient
     *          never appears in both training and test set in the same fold.
     * {{{
-    *   val patientFeatures = DenseMatrix.rand(10, 3)
-    *   val isSick = DenseVector(0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0)
+    *   import breeze.linalg.{DenseMatrix, DenseVector, convert}
+    *   import io.picnicml.doddlemodel.modelselection.GroupKFoldSplitter
+    *
+    *   val patientFeatures = convert(DenseMatrix.rand(10, 3), Float)
+    *   val isSick = DenseVector(0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f)
     *   val idPatient = DenseVector(1, 2, 2, 0, 0, 0, 2, 1, 1, 2)
     *
     *   val splitter = GroupKFoldSplitter(numFolds = 3)
diff --git a/src/main/scala/io/picnicml/doddlemodel/modelselection/HyperparameterSearch.scala b/src/main/scala/io/picnicml/doddlemodel/modelselection/HyperparameterSearch.scala
index 49688bdb..f1cf2e88 100644
--- a/src/main/scala/io/picnicml/doddlemodel/modelselection/HyperparameterSearch.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/modelselection/HyperparameterSearch.scala
@@ -7,21 +7,6 @@ import io.picnicml.doddlemodel.typeclasses.Predictor
 
 import scala.util.Random
 
-
-/** A parallel hyperparameter search using n-fold cross validation.
-  *
-  * @param numIterations number of predictors for which the cross validation score is calculated
-  * @param verbose flag that specifies whether validation score of the selected model is
-  *                printed to standard output
-  *
-  * Examples:
-  * val splitter = KFoldSplitter(numFolds = 3)
-  * val cv: CrossValidation = CrossValidation(metric = accuracy, dataSplitter = splitter)
-  * val search = HyperparameterSearch(numIterations = 3, crossValidation = cv)
-  * val bestModel = search.bestOf(x, y) {
-  *   LogisticRegression(lambda = gamma.draw())
-  * }
-  */
 class HyperparameterSearch private (val numIterations: Int, val crossVal: CrossValidation, verbose: Boolean) {
 
   implicit val cvReusable: CrossValReusable = CrossValReusable(true)
@@ -63,16 +48,18 @@ object HyperparameterSearch {
     * @example Search among 3 different regularization values (0.1, 0.2, 0.5) for logistic regression using
     *          3-fold cross validation and store the (re-fitted on entire dataset) model that obtains highest accuracy.
     * {{{
+    *   import breeze.linalg.{DenseMatrix, DenseVector, convert}
     *   import io.picnicml.doddlemodel.metrics.accuracy
     *   import io.picnicml.doddlemodel.linear.LogisticRegression
+    *   import io.picnicml.doddlemodel.modelselection.{CrossValidation, KFoldSplitter, HyperparameterSearch}
     *
-    *   val x = DenseMatrix.rand(10, 3)
-    *   val y = DenseVector(0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0)
+    *   val x = convert(DenseMatrix.rand(10, 3), Float)
+    *   val y = DenseVector(0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f)
     *
     *   val splitter = KFoldSplitter(numFolds = 3)
     *   val cv = CrossValidation(metric = accuracy, dataSplitter = splitter)
     *   val search = HyperparameterSearch(numIterations = 3, crossValidation = cv)
-    *   val lambdas = List(0.1, 0.2, 0.5).iterator
+    *   val lambdas = List(0.1f, 0.2f, 0.5f).iterator
     *
     *   val modelBestParams = search.bestOf(x, y) {
     *     LogisticRegression(lambda = lambdas.next)
diff --git a/src/main/scala/io/picnicml/doddlemodel/modelselection/KFoldSplitter.scala b/src/main/scala/io/picnicml/doddlemodel/modelselection/KFoldSplitter.scala
index c2877269..78770317 100644
--- a/src/main/scala/io/picnicml/doddlemodel/modelselection/KFoldSplitter.scala
+++ b/src/main/scala/io/picnicml/doddlemodel/modelselection/KFoldSplitter.scala
@@ -60,8 +60,11 @@ object KFoldSplitter {
     *
     * @example Split data into 3 folds.
     * {{{
-    *   val x = DenseMatrix.rand(7, 2)
-    *   val y = DenseVector(0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0)
+    *   import breeze.linalg.{DenseMatrix, DenseVector, convert}
+    *   import io.picnicml.doddlemodel.modelselection.KFoldSplitter
+    *
+    *   val x = convert(DenseMatrix.rand(7, 2), Float)
+    *   val y = DenseVector(0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f)
     *
     *   val splitter = KFoldSplitter(numFolds = 3)
     *   splitter.splitData(x, y)