diff --git a/src/main/kotlin/org/jetbrains/bio/genome/Genome.kt b/src/main/kotlin/org/jetbrains/bio/genome/Genome.kt
index c4610fd..f727277 100644
--- a/src/main/kotlin/org/jetbrains/bio/genome/Genome.kt
+++ b/src/main/kotlin/org/jetbrains/bio/genome/Genome.kt
@@ -243,6 +243,7 @@ class Genome private constructor(
                 genesGTFPath: Path? = null,
                 genesDescriptionsPath: Path? = null
         ) = getOrAdd(build, true) {
+            val chromSizesDir = chromSizesPath.parent
             Genome(
                 build,
                 annotationsConfig = annotationsConfig,
@@ -253,7 +254,7 @@ class Genome private constructor(
                 cytobandsPath = cytobandsPath,
                 repeatsPath = repeatsPath,
                 gapsPath = gapsPath,
-                twoBitPath = twoBitPath,
+                twoBitPath = twoBitPath ?: (chromSizesDir / "$build.2bit").let { if (it.exists) it else null },
                 genesGTFPath = genesGTFPath,
                 genesDescriptionsPath = genesDescriptionsPath
 
diff --git a/src/main/kotlin/org/jetbrains/bio/genome/PeaksInfo.kt b/src/main/kotlin/org/jetbrains/bio/genome/PeaksInfo.kt
index 139f2d6..a7a837c 100644
--- a/src/main/kotlin/org/jetbrains/bio/genome/PeaksInfo.kt
+++ b/src/main/kotlin/org/jetbrains/bio/genome/PeaksInfo.kt
@@ -26,11 +26,13 @@ object PeaksInfo {
 
     private fun Long.formatLongNumber() = String.format("%,d", this).replace(',', ' ')
 
-    fun compute(genomeQuery: GenomeQuery,
-                peaksStream: Stream<Location>,
-                src: URI?,
-                paths: List<Path>,
-                fragment: Fragment = AutoFragment): Map<String, String> {
+    fun compute(
+            genomeQuery: GenomeQuery,
+            peaksStream: Stream<Location>,
+            src: URI?,
+            paths: List<Path>,
+            fragment: Fragment = AutoFragment
+    ): Map<String, String> {
         val peaks = peaksStream.collect(Collectors.toList())
         val peaksLengths = peaks.map { it.length().toDouble() }.toDoubleArray()
         val peaksCount = peaksLengths.count()
diff --git a/src/main/kotlin/org/jetbrains/bio/genome/TestOrganismDataGenerator.kt b/src/main/kotlin/org/jetbrains/bio/genome/TestOrganismDataGenerator.kt
index 56dede5..68f258f 100644
--- a/src/main/kotlin/org/jetbrains/bio/genome/TestOrganismDataGenerator.kt
+++ b/src/main/kotlin/org/jetbrains/bio/genome/TestOrganismDataGenerator.kt
@@ -1,9 +1,12 @@
 package org.jetbrains.bio.genome
 
 import com.google.common.math.IntMath
+import gnu.trove.list.array.TFloatArrayList
 import org.apache.commons.csv.CSVFormat
 import org.apache.log4j.Logger
 import org.jetbrains.bio.Configuration
+import org.jetbrains.bio.big.BigWigFile
+import org.jetbrains.bio.big.FixedStepSection
 import org.jetbrains.bio.genome.sequence.Nucleotide
 import org.jetbrains.bio.genome.sequence.TwoBitWriter
 import org.jetbrains.bio.io.FastaRecord
@@ -26,7 +29,8 @@ object TestOrganismDataGenerator {
             "chr2" to IntMath.pow(10, 6),
             "chr3" to IntMath.pow(10, 6),
             "chrX" to IntMath.pow(10, 6),
-            "chrM" to IntMath.pow(10, 6))
+            "chrM" to IntMath.pow(10, 6)
+    )
 
     @JvmStatic
     fun main(args: Array<String>) {
@@ -58,6 +62,7 @@ object TestOrganismDataGenerator {
         generateCytobands(genome)
         generateRepeats(genome)
         generateCGI(genome)
+        generateMapability(genome)
         LOG.info("Done")
     }
 
@@ -278,4 +283,28 @@ object TestOrganismDataGenerator {
             """.trimMargin().trim())
         }
     }
+
+    /**
+     * Mapability is a wiggle track with 0 for non-mapable nucleotides and 1 for mapable ones.
+     *
+     * It's generated in the same folder as "chrom.sizes" with a filename "mapability.bigWig".
+     *
+     * We only generate mapability for chrX.
+     * This is done to test that the genome mean substitution for no-data chromosome works correctly.
+     */
+    private fun generateMapability(genome: Genome) {
+        LOG.info("Generating mapability bigWig")
+        val path = genome.chromSizesPath.parent / "mapability.bigWig"
+        val gq = genome.toQuery()
+        val chrX = gq["chrX"]!!
+        val random = ThreadLocalRandom.current()
+        val section = FixedStepSection(
+            chrX.name,
+            start = 0,
+            values = TFloatArrayList(
+                (0 until chrX.length).map { if (random.nextInt(5) == 4) 0.0f else 1.0f }.toFloatArray()
+            )
+        )
+        BigWigFile.write(listOf(section), gq.get().map { it.name to it.length }, path)
+    }
 }
\ No newline at end of file
diff --git a/src/main/kotlin/org/jetbrains/bio/genome/sequence/CpGContent.kt b/src/main/kotlin/org/jetbrains/bio/genome/sequence/CpGContent.kt
index 6f479d8..08391d2 100644
--- a/src/main/kotlin/org/jetbrains/bio/genome/sequence/CpGContent.kt
+++ b/src/main/kotlin/org/jetbrains/bio/genome/sequence/CpGContent.kt
@@ -1,6 +1,7 @@
 package org.jetbrains.bio.genome.sequence
 
 import com.google.common.annotations.VisibleForTesting
+import org.jetbrains.bio.genome.Chromosome
 import org.jetbrains.bio.genome.Location
 
 /**
@@ -130,6 +131,18 @@ enum class CpGContent {
             }
             return cg
         }
+
+        /**
+         * Slice the given chromosome into bins and return an array containing the mean GC content for each bin.
+         */
+        fun binnedMeanCG(chromosome: Chromosome, binSize: Int): DoubleArray {
+            val sequence = chromosome.sequence
+            return chromosome.range.slice(binSize).mapToDouble { bin ->
+                (bin.startOffset until bin.endOffset).count { pos ->
+                    sequence.charAt(pos).let { it == 'c' || it == 'g' }
+                }.toDouble() / bin.length()
+            }.toArray()
+        }
     }
 }
 
diff --git a/src/main/kotlin/org/jetbrains/bio/statistics/ClassificationModel.kt b/src/main/kotlin/org/jetbrains/bio/statistics/ClassificationModel.kt
index 244e54c..0ad9ecd 100644
--- a/src/main/kotlin/org/jetbrains/bio/statistics/ClassificationModel.kt
+++ b/src/main/kotlin/org/jetbrains/bio/statistics/ClassificationModel.kt
@@ -162,22 +162,29 @@ interface Fitter<out Model : ClassificationModel> {
      * @param maxIter an upper bound on fitting iterations (if applicable).
      * @return guessed classification model.
      */
-    fun guess(preprocessed: Preprocessed<DataFrame>,
-              title: String, threshold: Double, maxIter: Int, attempt: Int): Model
-
-    fun guess(preprocessed: List<Preprocessed<DataFrame>>,
-              title: String, threshold: Double, maxIter: Int, attempt: Int): Model =
-            guess(preprocessed.first(), title, threshold, maxIter, attempt)
-
-    fun fit(preprocessed: Preprocessed<DataFrame>,
+    fun guess(
+            preprocessed: Preprocessed<DataFrame>,
+              title: String, threshold: Double, maxIter: Int, attempt: Int
+    ): Model
+
+    fun guess(
+            preprocessed: List<Preprocessed<DataFrame>>,
+              title: String, threshold: Double, maxIter: Int, attempt: Int
+    ): Model = guess(preprocessed.first(), title, threshold, maxIter, attempt)
+
+    fun fit(
+            preprocessed: Preprocessed<DataFrame>,
             title: String = TITLE, threshold: Double = THRESHOLD,
             maxIter: Int = MAX_ITERATIONS,
-            attempt: Int = 0): Model = fit(listOf(preprocessed), title, threshold, maxIter, attempt)
+            attempt: Int = 0
+    ): Model = fit(listOf(preprocessed), title, threshold, maxIter, attempt)
 
-    fun fit(preprocessed: List<Preprocessed<DataFrame>>,
+    fun fit(
+            preprocessed: List<Preprocessed<DataFrame>>,
             title: String = TITLE, threshold: Double = THRESHOLD,
             maxIter: Int = MAX_ITERATIONS,
-            attempt: Int = 0): Model {
+            attempt: Int = 0
+    ): Model {
         require(threshold > 0) { "threshold $threshold must be >0" }
         require(maxIter > 0) { "maximum number of iterations $maxIter must be >0" }
 
@@ -199,8 +206,10 @@ interface Fitter<out Model : ClassificationModel> {
             require(multiStarts > 1) { "number of starts $multiStarts must be >1" }
         }
 
-        override fun fit(preprocessed: Preprocessed<DataFrame>, title: String,
-                         threshold: Double, maxIter: Int, attempt: Int): Model {
+        override fun fit(
+                preprocessed: Preprocessed<DataFrame>, title: String,
+                         threshold: Double, maxIter: Int, attempt: Int
+        ): Model {
             require(attempt == 0) {
                 "cyclic multistart is not allowed"
             }
@@ -218,8 +227,10 @@ interface Fitter<out Model : ClassificationModel> {
             return msModel
         }
 
-        override fun fit(preprocessed: List<Preprocessed<DataFrame>>, title: String,
-                         threshold: Double, maxIter: Int, attempt: Int): Model {
+        override fun fit(
+                preprocessed: List<Preprocessed<DataFrame>>, title: String,
+                         threshold: Double, maxIter: Int, attempt: Int
+        ): Model {
             require(attempt == 0) {
                 "cyclic multistart is not allowed"
             }
diff --git a/src/main/kotlin/org/jetbrains/bio/statistics/emission/IntegerRegressionEmissionScheme.kt b/src/main/kotlin/org/jetbrains/bio/statistics/emission/IntegerRegressionEmissionScheme.kt
new file mode 100644
index 0000000..71432c5
--- /dev/null
+++ b/src/main/kotlin/org/jetbrains/bio/statistics/emission/IntegerRegressionEmissionScheme.kt
@@ -0,0 +1,243 @@
+package org.jetbrains.bio.statistics.emission
+import org.apache.commons.math3.distribution.FDistribution
+import org.apache.commons.math3.linear.*
+import org.jetbrains.bio.dataframe.DataFrame
+import org.jetbrains.bio.viktor.F64Array
+import org.jetbrains.bio.viktor.asF64Array
+import java.util.function.IntPredicate
+import kotlin.math.abs
+
+/**
+ *
+ * Regression with integer support.
+ * Examples: Poisson, binomial.
+ *
+ * @param covariateLabels the labels that will be used to procure covariates from the supplied [DataFrame]
+ * @param regressionCoefficients the regression coefficients. Note that
+ *     regressionCoefficients.size == covariateLabels.size + 1
+ * because the 0th coefficient corresponds to the intercept, the 1st coefficient corresponds to 0th label etc.
+ *
+ * @author Elena Kartysheva
+ * @date 5/25/19
+ */
+abstract class IntegerRegressionEmissionScheme(
+        covariateLabels: List<String>, regressionCoefficients: DoubleArray
+) : EmissionScheme {
+    val covariateLabels = covariateLabels.map { it.intern() }
+    var regressionCoefficients: DoubleArray = regressionCoefficients
+        protected set
+    @Transient var W = DoubleArray(0)
+
+    /**
+     * Provides the mean function (also known as the inverted link function), h = g^{-1}.
+     *
+     * The generalized linear model can be defined through the link function.
+     *
+     * η = g(μ),
+     * where μ is the distribution mean, η = Xβ is the linear predictor, and g is the link function.
+     *
+     * Conversely,
+     * μ = h(η),
+     * where h = g^{-1} is called the inverse link function, or sometimes the mean function.
+     *
+     * @param eta η = Xβ, the linear predictor
+     * @return μ = h(η), the distribution mean
+     *
+     * Note that it's often more efficient to provide an implementation for the vector-based [meanInPlace].
+     * It's also possible to override [zW] and skip other link-related implementations altogether.
+     */
+    abstract fun mean(eta: Double): Double
+
+    /**
+     * The mean derivative function, h'(η).
+     *
+     * η = Xβ is the linear predictor. See [mean] for more information.
+     *
+     * @param eta η = Xβ, the linear predictor
+     * @return h'(η)
+     *
+     * Note that it's often more efficient to provide an implementation for the vector-based [meanDerivativeInPlace].
+     * It's also possible to override [zW] and skip other link-related implementations altogether.
+     */
+    abstract fun meanDerivative(eta: Double): Double
+
+    /**
+     * The mean variance function, var h(η)
+     *
+     * h(η) is the mean function, η = Xβ is the linear predictor. See [mean] for more information.
+     *
+     * @param mean μ = h(η)
+     * @return var(h(η))
+     *
+     * Note that it's often more efficient to provide an implementation for the vector-based [meanVarianceInPlace].
+     * It's also possible to override [zW] and skip other mean-related implementations altogether.
+     */
+    abstract fun meanVariance(mean: Double): Double
+
+    /**
+     * Samples from the regression given the distribution mean.
+     *
+     * @param mean the distribution mean μ = h(η)
+     */
+    abstract fun sampler(mean: Double): Int
+
+    override val degreesOfFreedom: Int = regressionCoefficients.size
+
+    /**
+     * Apply [mean] function h(η) in place.
+     *
+     * Can be overridden to use more efficient vector operations.
+     * It's also possible to override [zW] and skip other mean-related implementations altogether.
+     *
+     * @param eta η = Xβ, the linear predictor vector. Will be modified to contain μ = h(η).
+     * @return [eta] for easier call chaining.
+     */
+    open fun meanInPlace(eta: F64Array): F64Array {
+        for(i in 0 until eta.size) {
+            eta[i] = mean(eta[i])
+        }
+        return eta
+    }
+
+    /**
+     * Apply [meanDerivative] function h'(η) in place.
+     *
+     * Can be overridden to use more efficient vector operations.
+     * It's also possible to override [zW] and skip other mean-related implementations altogether.
+     *
+     * @param eta η = Xβ, the linear predictor vector. Will be modified to contain h'(η).
+     * @return [eta] for easier call chaining.
+     */
+    open fun meanDerivativeInPlace(eta: F64Array): F64Array {
+        for(i in 0 until eta.size) {
+            eta[i] = meanDerivative(eta[i])
+        }
+        return eta
+    }
+
+    /**
+     * Apply [meanVariance] function var(h(η)) in place.
+     *
+     * Can be overridden to use more efficient vector operations.
+     * It's also possible to override [zW] and skip other mean-related implementations altogether.
+     *
+     * @param mean μ = h(η), the distribution mean vector. Will be modified to contain var(h(η)).
+     * @return [mean] for easier call chaining.
+     */
+    open fun meanVarianceInPlace(mean: F64Array): F64Array {
+        for(i in 0 until mean.size) {
+            mean[i] = meanVariance(mean[i])
+        }
+        return mean
+    }
+
+    /**
+     * Calculates z and W for IRLS as defined in https://bwlewis.github.io/GLM/ .
+     *
+     * Note that the document linked above uses definitions and denotations that differ from
+     * the widely accepted ones.
+     * In particular, "GLMs, abridged" uses the term "link function" and the symbol "g" to describe
+     * what is normally called the mean function (or the inverse link function) and denoted by "h".
+     * It also uses "b" instead of "y" for the response vector and "A" instead of "X" for the model matrix.
+     *
+     * z = η + (y - h(η)) / h'(η)
+     * W = diag(h'(η)^2 / var(h(η)))
+     *
+     * This is the only function necessary for IRLS and [update] to function. You can override it directly
+     * instead of providing implementations for [mean] and friends if there's a more efficient way to compute
+     * z and W. For example, with a logarithmic link, h = h' = var h, which allows to simplify W.
+     *
+     * @param y the response vector
+     * @param eta η = Xβ, the linear predictor. WARNING: [eta] can be modified to reduce copying!
+     * @return z and W as a [Pair] (the diagonal matrix W is stored as a vector)
+     */
+    open fun zW(y: F64Array, eta: F64Array): Pair<F64Array, F64Array> {
+        val countedLink = meanInPlace(eta.copy())
+        val countedLinkDerivative = meanDerivativeInPlace(eta.copy())
+
+        eta += (y - countedLink).apply { divAssign(countedLinkDerivative)}
+
+        meanVarianceInPlace(countedLink)
+
+        countedLinkDerivative *= countedLinkDerivative
+        countedLinkDerivative /= countedLink
+        return eta to countedLinkDerivative
+    }
+
+    override fun update(df: DataFrame, d: Int, weights: F64Array) {
+        val X = generateDesignMatrix(df)
+        val yInt = df.sliceAsInt(df.labels[d])
+        val y = DoubleArray (yInt.size) {yInt[it].toDouble()}.asF64Array()
+        val iterMax = 5
+        val tol = 1e-8
+        var beta0 = regressionCoefficients
+        var beta1 = regressionCoefficients
+        for (i in 0 until iterMax) {
+            val eta = WLSRegression.calculateEta(X, beta0)
+            val (z, W) = zW(y, eta)
+            W *= weights
+            beta1 = WLSRegression.calculateBeta(X, z, W)
+            if ((beta1.zip(beta0) { a, b -> abs(a - b) }).sum() < tol) {
+                break
+            }
+            beta0 = beta1
+        }
+        regressionCoefficients = beta1
+    }
+
+    /**
+     * Generates the model matrix from the dataframe.
+     *
+     * Extracts the labelled covariates from the supplied dataframe and prepends the intercept column.
+     */
+    private fun generateDesignMatrix(df: DataFrame) =
+            WLSRegression.designMatrix(Array(covariateLabels.size) { df.sliceAsDouble(covariateLabels[it]) })
+
+    /**
+     * Calculates η = Xβ, the linear predictor.
+     *
+     * Uses a row from the provided dataframe to get the covariate values.
+     *
+     * @param df dataframe
+     * @param t row number
+     */
+    fun getPredictor(df: DataFrame, t: Int): Double {
+        var res = regressionCoefficients[0]
+        covariateLabels.forEachIndexed { index, label ->
+            res += df.getAsDouble(t, label) * regressionCoefficients[index + 1]
+        }
+        return res
+    }
+
+    override fun sample(df: DataFrame, d: Int, fill: IntPredicate) {
+        val observations = df.sliceAsInt(df.labels[d])
+        (0 until df.rowsNumber).forEach { row ->
+            if (fill.test(row)) {
+                observations[row] = sampler(mean(getPredictor(df, row)))
+            }
+        }
+    }
+
+    /**
+     * Calculates the f-test p-value.
+     *
+     * Performs the f-test for the null hypothesis that Rβ = r and returns the p-value.
+     */
+    fun Ftest(df: DataFrame, d: Int, R: RealMatrix, r: RealVector): Double {
+        val x = Array(covariateLabels.size) {df.sliceAsDouble(covariateLabels[it])}
+        val yInt = df.sliceAsInt(df.labels[d])
+        val y = DoubleArray (yInt.size) {yInt[it].toDouble()}
+        val X = WLSRegression.designMatrix(x)
+        val residuals = WLSRegression.calculateEta(X, regressionCoefficients).apply { minusAssign(y.asF64Array()) }
+        val sigma2 = residuals.dot(W.asF64Array() * residuals) / (X[0].size - X.size)
+        val XTWXI = WLSRegression.calculateBetaVariance(X, W.asF64Array())
+        val RBeta = R.transpose().operate(ArrayRealVector(regressionCoefficients))
+        val RBetaMinusr = RBeta.subtract(r)
+        val inverse = LUDecomposition(R.transpose().multiply(Array2DRowRealMatrix(XTWXI)).multiply(R)).solver.inverse
+        val Fstat = inverse.operate(RBetaMinusr).dotProduct(RBetaMinusr) / (r.dimension * sigma2)
+        val cumulativeProb = FDistribution(
+            r.dimension.toDouble(), (X[0].size - X.size).toDouble()
+        ).cumulativeProbability(Fstat)
+        return 1 - cumulativeProb
+    }
+}
diff --git a/src/main/kotlin/org/jetbrains/bio/statistics/emission/PoissonRegressionEmissionScheme.kt b/src/main/kotlin/org/jetbrains/bio/statistics/emission/PoissonRegressionEmissionScheme.kt
new file mode 100644
index 0000000..bf8b16f
--- /dev/null
+++ b/src/main/kotlin/org/jetbrains/bio/statistics/emission/PoissonRegressionEmissionScheme.kt
@@ -0,0 +1,48 @@
+package org.jetbrains.bio.statistics.emission
+
+import org.apache.commons.math3.util.FastMath
+import org.jetbrains.bio.dataframe.DataFrame
+import org.jetbrains.bio.statistics.MoreMath
+import org.jetbrains.bio.statistics.distribution.Sampling
+import org.jetbrains.bio.viktor.F64Array
+import kotlin.math.exp
+
+/**
+ *
+ * Poisson regression.
+ *
+ * @author Elena Kartysheva
+ * @date 5/25/19
+ */
+class PoissonRegressionEmissionScheme(
+        covariateLabels: List<String>,
+        regressionCoefficients: DoubleArray
+) : IntegerRegressionEmissionScheme(covariateLabels, regressionCoefficients) {
+
+    override fun mean(eta: Double) = exp(eta)
+    override fun meanDerivative(eta: Double) = exp(eta)
+    override fun meanVariance(mean: Double) = mean
+
+    override fun sampler(mean: Double) = Sampling.samplePoisson(mean)
+
+    override fun meanInPlace(eta: F64Array) = eta.apply { expInPlace() }
+    override fun meanDerivativeInPlace(eta: F64Array) = eta.apply { expInPlace() }
+    override fun meanVarianceInPlace(mean: F64Array) = mean
+
+    override fun zW(y: F64Array, eta: F64Array): Pair<F64Array, F64Array> {
+        // Since h(η) = h'(η) = var(h(η)), we can skip h'(η) and var(h(η)) calculations and simplify W:
+        // W = diag(h'(η)^2 / var(h(η))) = h(η)
+        val countedLink = meanInPlace(eta.copy())
+        eta += (y - countedLink).apply { divAssign(countedLink) }
+        return eta to countedLink
+    }
+
+    override fun logProbability(df: DataFrame, t: Int, d: Int): Double {
+        // We don't use the existing Poisson log probability because that saves us one logarithm.
+        // We would have to provide lambda = exp(logLambda), and the Poisson implementation would then have to
+        // calculate log(lambda) again.
+        val logLambda = getPredictor(df, t)
+        val y = df.getAsInt(t, df.labels[d])
+        return y * logLambda - MoreMath.factorialLog(y) - FastMath.exp(logLambda)
+    }
+}
\ No newline at end of file
diff --git a/src/main/kotlin/org/jetbrains/bio/statistics/emission/WLSRegression.kt b/src/main/kotlin/org/jetbrains/bio/statistics/emission/WLSRegression.kt
new file mode 100644
index 0000000..783e05f
--- /dev/null
+++ b/src/main/kotlin/org/jetbrains/bio/statistics/emission/WLSRegression.kt
@@ -0,0 +1,77 @@
+package org.jetbrains.bio.statistics.emission
+
+import org.apache.commons.math3.linear.Array2DRowRealMatrix
+import org.apache.commons.math3.linear.LUDecomposition
+import org.jetbrains.bio.viktor.F64Array
+import org.jetbrains.bio.viktor.asF64Array
+
+/**
+ * A utility object with various methods useful for WLS and IRLS.
+ */
+object WLSRegression {
+
+    /**
+     * Converts x to a design matrix. No copying of x is performed.
+     *
+     * Prepends an appropriate-sized array filled with 1.0.
+     */
+    fun designMatrix(x: Array<DoubleArray>): Array<DoubleArray> {
+        check(x.isNotEmpty()) { "empty matrix" }
+        check(x.drop(1).all { it.size == x[0].size }) { "different column sizes" }
+        return Array(x.size + 1) { if (it == 0) DoubleArray(x[0].size) { 1.0 } else x[it - 1] }
+    }
+
+    /**
+     * Calculates η = Xβ.
+     *
+     * @param x a matrix that is stored column-first
+     * @param beta a vector of appropriate size
+     */
+    fun calculateEta(x: Array<DoubleArray>, beta: DoubleArray): F64Array {
+        check(x.isNotEmpty()) { "empty matrix" }
+        check(x.size == beta.size) { "X and beta have different size" }
+        check(x.drop(1).all { it.size == x[0].size }) { "different column sizes" }
+        val xColumnsTimesBetaElements = Array(x.size) { i -> x[i].asF64Array() * beta[i] }
+        xColumnsTimesBetaElements.drop(1).forEach { xColumnsTimesBetaElements[0].plusAssign(it) }
+        return xColumnsTimesBetaElements[0]
+    }
+
+    /**
+     * Calculates WLS estimator β.
+     *
+     * β = (X^T W X)^{-1} X^T W y, where X is the design matrix, y is the response vector,
+     * and W = diag(w), where w is the weight vector.
+     *
+     * @param x the design matrix X that is stored column-first
+     * @param y the response vector y
+     * @param weights the weight vector w
+     */
+    fun calculateBeta(x: Array<DoubleArray>, y: F64Array, weights: F64Array): DoubleArray {
+        check(weights.size == y.size) { "weights and y have different size" }
+        check(x.all { it.size == weights.size }) { "X columns and weights vector have different size" }
+
+        val inverse = calculateBetaVariance(x, weights)
+        val Wy = weights * y
+        val XTWy = DoubleArray(x.size) { i -> x[i].asF64Array().dot(Wy) }
+        return DoubleArray(x.size) { inverse[it].asF64Array().dot(XTWy) }
+    }
+
+    /**
+     * Calculates (X^T W X)^{-1}, the variance multiplier of Var beta.
+     *
+     * Var beta = sigma^2 (X^T W X)^{-1}
+     *
+     * @return (X^T W X)^{-1}. Since the matrix is symmetric, the storage orientation is not important.
+     */
+    fun calculateBetaVariance(x: Array<DoubleArray>, weights: F64Array): Array<DoubleArray> {
+        // the resulting matrix is symmetric by construction, let's not waste time computing the upper half
+        val XTWX = Array(x.size) { i ->
+            DoubleArray(x.size) { j ->
+                if (i < j) 0.0 else x[i].asF64Array().times(weights).dot(x[j].asF64Array())
+            }
+        }
+        // fill the upper half
+        XTWX.indices.forEach { i -> XTWX.indices.forEach { j -> if (i < j) XTWX[i][j] = XTWX[j][i] }}
+        return (LUDecomposition(Array2DRowRealMatrix(XTWX)).solver.inverse as Array2DRowRealMatrix).dataRef
+    }
+}
\ No newline at end of file
diff --git a/src/main/kotlin/org/jetbrains/bio/statistics/gson/GSONUtil.kt b/src/main/kotlin/org/jetbrains/bio/statistics/gson/GSONUtil.kt
index 2427709..d6c7206 100644
--- a/src/main/kotlin/org/jetbrains/bio/statistics/gson/GSONUtil.kt
+++ b/src/main/kotlin/org/jetbrains/bio/statistics/gson/GSONUtil.kt
@@ -75,7 +75,7 @@ object GSONUtil {
                 val fqnElement = element.asJsonObject.remove(fqnField)
                 if (fqnElement == null) {
                     deserializationError("Class name (%s) field is missing." +
-                                         " Please, recalculate the model.", fqnField)
+                            " Please, recalculate the model.", fqnField)
                 }
                 val fqn = fqnElement!!.asString
 
@@ -99,17 +99,21 @@ object GSONUtil {
     }
 
     /**
-     * Same as [classAwareAdapter], but also saves the contents of a static field named "VERSION" and later
+     * Same as [classAwareAdapter], but also saves the contents of a static int field named "VERSION" and later
      * checks it against the actual contents of this field when loading.
      * Helps to work around the changes in the class field layout;
      * a version change is more self-explanatory than a random field reading failure.
      */
-    @JvmStatic fun <T : Any> classAndVersionAdapter(gson: Gson, factory: TypeAdapterFactory,
-                                                    fqnField: String, versionField: String): TypeAdapter<T> {
+    @JvmStatic fun <T : Any> classAndVersionAdapter(
+            gson: Gson,
+            factory: TypeAdapterFactory,
+            fqnField: String,
+            versionField: String
+    ): TypeAdapter<T> {
         return object : TypeAdapter<T>() {
             override fun write(out: JsonWriter, value: T) {
                 val modelFQN = value.javaClass.name
-                val modelVersion = getSerializationFormatVersion(value.javaClass)
+                val modelVersion = getSerializationFormatVersion(value.javaClass).toInt()
 
                 val delegate = gson.getDelegateAdapter(factory, TypeToken.get(value.javaClass))
 
@@ -127,9 +131,11 @@ object GSONUtil {
                 val element = Streams.parse(`in`)
                 val modelFQNElement = element.asJsonObject.remove(fqnField)
                 val modelFormatVersElement = element.asJsonObject.remove(versionField)
-                if (modelFQNElement == null || modelFormatVersElement == null) {
-                    deserializationError("Model class name (%s) or version field (%s) is missing."
-                                         + " Please, recalculate the model.", fqnField, versionField)
+                if (modelFQNElement == null) {
+                    deserializationError("Class name ($fqnField) is missing.")
+                }
+                if (modelFormatVersElement == null) {
+                    deserializationError("Version field ($versionField) is missing.")
                 }
                 val fqn = modelFQNElement!!.asString
                 val vers = modelFormatVersElement!!.asString
@@ -138,15 +144,15 @@ object GSONUtil {
                 try {
                     aClass = Class.forName(fqn) as Class<T>
                 } catch (e: ClassNotFoundException) {
-                    deserializationError("Cannot load model class %s", fqn, cause = e)
+                    deserializationError("Cannot load class %s", fqn, cause = e)
                     return null
                 }
 
 
                 val recentVersion = getSerializationFormatVersion(aClass)
                 if (recentVersion != vers) {
-                    deserializationError("Format has changed, '%s' expects '%s' version, but was '%s'",
-                                         fqn, recentVersion, vers)
+                    deserializationError("Format has changed, '%s' expects '%s' version, but got '%s'",
+                        fqn, recentVersion, vers)
                 }
 
                 val adapter = gson.getDelegateAdapter(factory, TypeToken.get(aClass))
@@ -161,8 +167,8 @@ object GSONUtil {
                     return (aClass.getDeclaredField("VERSION").get(null)).toString()
                 } catch (e: Exception) {
                     deserializationError("Cannot get serialization format version." +
-                                         " Probably VERSION field is missing in %s. Exception message: %s",
-                                         aClass.name, e.message, cause = e)
+                            " Probably VERSION field is missing in %s. Exception message: %s",
+                        aClass.name, e.message, cause = e)
                     return "" // this statement can never be reached
                 }
             }
diff --git a/src/main/kotlin/org/jetbrains/bio/statistics/mixture/Internals.kt b/src/main/kotlin/org/jetbrains/bio/statistics/mixture/Internals.kt
index 876e746..b31b7b0 100644
--- a/src/main/kotlin/org/jetbrains/bio/statistics/mixture/Internals.kt
+++ b/src/main/kotlin/org/jetbrains/bio/statistics/mixture/Internals.kt
@@ -14,7 +14,7 @@ import org.jetbrains.bio.viktor._I
 object MixtureInternals {
     @JvmStatic
     fun predict(logGammas: F64Array): IntArray {
-        return IntArray(logGammas.shape[0]) { logGammas.V[_I, it].argMax() }
+        return IntArray(logGammas.shape[1]) { logGammas.V[_I, it].argMax() }
     }
 
     @JvmStatic
diff --git a/src/main/kotlin/org/jetbrains/bio/statistics/mixture/MLAbstractMixture.kt b/src/main/kotlin/org/jetbrains/bio/statistics/mixture/MLAbstractMixture.kt
index 56b9d2d..e4acf36 100644
--- a/src/main/kotlin/org/jetbrains/bio/statistics/mixture/MLAbstractMixture.kt
+++ b/src/main/kotlin/org/jetbrains/bio/statistics/mixture/MLAbstractMixture.kt
@@ -14,7 +14,7 @@ import org.jetbrains.bio.viktor.F64Array
  * @since 06/09/13
  */
 abstract class MLAbstractMixture(protected val numComponents: Int, weights: F64Array) : ClassificationModel {
-    protected val logWeights: F64Array = weights.log()
+    val logWeights: F64Array = weights.log()
 
     val weights: F64Array get() = logWeights.exp()
 
diff --git a/src/main/kotlin/org/jetbrains/bio/statistics/mixture/MLFreeMixture.kt b/src/main/kotlin/org/jetbrains/bio/statistics/mixture/MLFreeMixture.kt
index cdcebc5..c3b4180 100644
--- a/src/main/kotlin/org/jetbrains/bio/statistics/mixture/MLFreeMixture.kt
+++ b/src/main/kotlin/org/jetbrains/bio/statistics/mixture/MLFreeMixture.kt
@@ -47,6 +47,16 @@ abstract class MLFreeMixture(numComponents: Int,
         return res.with("state", states)
     }
 
+    open fun sample(df:DataFrame, d: IntArray) {
+        val states = sampleStates(df.rowsNumber)
+        for (t in 0 until numDimensions) {
+            for (i in 0 until numComponents) {
+                getEmissionScheme(i, t).sample(df, d[t], IntPredicate { states[it] == i })
+            }
+
+        }
+    }
+
     override fun degreesOfFreedom(): Int {
         var res = super.degreesOfFreedom()
         for (i in 0 until numComponents) {
diff --git a/src/test/kotlin/org/jetbrains/bio/Tests.kt b/src/test/kotlin/org/jetbrains/bio/Tests.kt
index 568c02f..297e0c7 100644
--- a/src/test/kotlin/org/jetbrains/bio/Tests.kt
+++ b/src/test/kotlin/org/jetbrains/bio/Tests.kt
@@ -1,5 +1,7 @@
 package org.jetbrains.bio
 
+import kotlin.math.abs
+import kotlin.test.assertEquals
 import kotlin.test.assertFalse
 import kotlin.test.assertTrue
 
@@ -49,4 +51,25 @@ object Tests {
         "Regex ${regex.pattern} doesn't match content:\n<$output>"
     )
 
+    fun assertDeepEquals(expected: Array<*>, actual: Array<*>) {
+        assertTrue(expected.contentDeepEquals(actual), "Array contents differ")
+    }
+
+    fun assertEquals(expected: Double, actual: Double, precision: Double, message: String?) {
+        assertTrue(
+            abs(expected - actual) < precision,
+            message ?: "Expected $expected and actual $actual values differ by more than $precision."
+        )
+    }
+
+    fun assertEquals(expected: DoubleArray, actual: DoubleArray, precision: Double) {
+        assertEquals(expected.size, actual.size, "Array sizes differ")
+        expected.indices.forEach {
+            assertEquals(
+                expected[it], actual[it], precision,
+                "Arrays differ at position $it: expected ${expected[it]}, actual ${actual[it]}."
+            )
+        }
+    }
+
 }
\ No newline at end of file
diff --git a/src/test/kotlin/org/jetbrains/bio/genome/sequence/CPgContentTest.kt b/src/test/kotlin/org/jetbrains/bio/genome/sequence/CPgContentTest.kt
index e2ad05f..5a9e534 100644
--- a/src/test/kotlin/org/jetbrains/bio/genome/sequence/CPgContentTest.kt
+++ b/src/test/kotlin/org/jetbrains/bio/genome/sequence/CPgContentTest.kt
@@ -1,7 +1,11 @@
 package org.jetbrains.bio.genome.sequence
 
+import org.jetbrains.bio.Tests
+import org.jetbrains.bio.genome.Genome
+import org.jetbrains.bio.genome.toQuery
 import org.junit.Test
 import kotlin.test.assertEquals
+import kotlin.test.assertTrue
 
 class CpGContentTest {
     @Test fun testComputeCpG() {
@@ -22,4 +26,25 @@ class CpGContentTest {
         assertEquals(CpGContent.HCP, CpGContent.classify("CCCGCG".asNucleotideSequence(), 5))
         assertEquals(CpGContent.HCP, CpGContent.classify("CGCGCG".asNucleotideSequence(), 5))
     }
+
+    @Test fun testBinnedMeanCG() {
+        val chr = Genome["to1"].toQuery()["chr1"]!!
+        val binned100 = CpGContent.binnedMeanCG(chr, 100)
+        val binned200 = CpGContent.binnedMeanCG(chr, 200)
+        assertEquals((chr.length - 1) / 100 + 1, binned100.size)
+        assertEquals((chr.length - 1) / 200 + 1, binned200.size)
+        binned200.forEachIndexed { index, value ->
+            assertTrue(value >= 0, "Mean CG content was negative ($value at bin $index)")
+            assertTrue(value <= 1, "Mean CG content was greater than 1 ($value at bin $index)")
+            if (index < binned200.size -1) {
+                Tests.assertEquals(
+                    (binned100[2 * index] + binned100[2 * index + 1]) / 2,
+                    value,
+                    1E-10, // we want to ignore double arithmetic precision issues
+                    "Expected mean CG $value at 200 bps bin $index to be equal to mean of 100bps bins " +
+                            "${2 * index} (${binned100[2 * index]}) and " +
+                            "${2 * index + 1} (${binned100[2 * index + 1]}).")
+            }
+        }
+    }
 }
diff --git a/src/test/kotlin/org/jetbrains/bio/statistics/ClassificationModelTest.kt b/src/test/kotlin/org/jetbrains/bio/statistics/ClassificationModelTest.kt
index 5a738b5..2b8a863 100644
--- a/src/test/kotlin/org/jetbrains/bio/statistics/ClassificationModelTest.kt
+++ b/src/test/kotlin/org/jetbrains/bio/statistics/ClassificationModelTest.kt
@@ -27,7 +27,7 @@ class ClassificationModelTest {
                      "  \"value\": 1,\n" +
                      "  \"name\": \"bboo\",\n" +
                      "  \"model.class.fqn\": \"org.jetbrains.bio.statistics.Boo\",\n" +
-                     "  \"model.class.format\": \"222\"\n}",
+                     "  \"model.class.format\": 222\n}",
                 path.read())
     }
 
@@ -48,8 +48,7 @@ class ClassificationModelTest {
             path.bufferedWriter().use { gson.toJson(obj, it) }
 
             thrown.expect(JsonParseException::class.java)
-            thrown.expectMessage("Deserialization error: Model class name (model.class.fqn) or version" +
-                                 " field (model.class.format) is missing. Please, recalculate the model.")
+            thrown.expectMessage("Deserialization error: Class name (model.class.fqn) is missing.")
             ClassificationModel.load<Boo>(path)
         }
     }
@@ -60,7 +59,7 @@ class ClassificationModelTest {
         assertEquals("{\n  \"value\": NaN,\n  " +
                      "\"name\": \"NaN-boo\",\n  " +
                      "\"model.class.fqn\": \"org.jetbrains.bio.statistics.NanBoo\",\n  " +
-                     "\"model.class.format\": \"0\"\n}",
+                     "\"model.class.format\": 0\n}",
                 path.read())
 
         assertEquals(nanBoo, ClassificationModel.load<NanBoo>(path))
@@ -76,7 +75,7 @@ class ClassificationModelTest {
 
             thrown.expect(JsonParseException::class.java)
             thrown.expectMessage("Deserialization error: Format has changed, " +
-                                 "'org.jetbrains.bio.statistics.Boo' expects '123' version, but was '222'")
+                                 "'org.jetbrains.bio.statistics.Boo' expects '123' version, but got '222'")
             ClassificationModel.load<Boo>(path)
         }
     }
@@ -92,7 +91,7 @@ class ClassificationModelTest {
 
             thrown.expect(JsonParseException::class.java)
             thrown.expectMessage("Deserialization error: Format has changed, " +
-                                 "'org.jetbrains.bio.statistics.Boo' expects '666' version, but was '222'")
+                                 "'org.jetbrains.bio.statistics.Boo' expects '666' version, but got '222'")
 
             ClassificationModel.load<Boo>(path)
         }
diff --git a/src/test/kotlin/org/jetbrains/bio/statistics/emission/WLSRegressionTest.kt b/src/test/kotlin/org/jetbrains/bio/statistics/emission/WLSRegressionTest.kt
new file mode 100644
index 0000000..2e969cb
--- /dev/null
+++ b/src/test/kotlin/org/jetbrains/bio/statistics/emission/WLSRegressionTest.kt
@@ -0,0 +1,86 @@
+package org.jetbrains.bio.statistics.emission
+
+import org.jetbrains.bio.Tests
+import org.jetbrains.bio.viktor.asF64Array
+import org.junit.Assert.assertEquals
+import org.junit.Test
+import kotlin.test.assertFails
+
+class WLSRegressionTest {
+
+    private val matrix123456 = arrayOf(
+        doubleArrayOf(1.0, 2.0, 3.0),
+        doubleArrayOf(4.0, 5.0, 6.0)
+    )
+
+    @Test fun testDesignMatrix() {
+        // column size mismatch
+        assertFails {
+            WLSRegression.designMatrix(arrayOf(doubleArrayOf(1.0), doubleArrayOf(2.0, 3.0)))
+        }
+        // empty matrix
+        assertFails {
+            WLSRegression.designMatrix(arrayOf())
+        }
+        Tests.assertDeepEquals(
+            arrayOf(
+                doubleArrayOf(1.0, 1.0, 1.0),
+                doubleArrayOf(1.0, 2.0, 3.0),
+                doubleArrayOf(4.0, 5.0, 6.0)
+            ),
+            WLSRegression.designMatrix(matrix123456)
+        )
+    }
+
+    @Test fun testCalculateEta() {
+        assertFails {
+            WLSRegression.calculateEta(emptyArray(), doubleArrayOf(1.0))
+        }
+        assertFails {
+            WLSRegression.calculateEta(matrix123456, doubleArrayOf())
+        }
+        assertFails {
+            WLSRegression.calculateEta(matrix123456, doubleArrayOf(1.0, 2.0, 3.0))
+        }
+        assertEquals(
+            doubleArrayOf(39.0, 54.0, 69.0).asF64Array(),
+            WLSRegression.calculateEta(matrix123456, doubleArrayOf(7.0, 8.0))
+        )
+    }
+
+    @Test fun testCalculateBeta() {
+        assertFails {
+            WLSRegression.calculateBeta(emptyArray(), doubleArrayOf(1.0).asF64Array(), doubleArrayOf(2.0).asF64Array())
+        }
+        assertFails {
+            WLSRegression.calculateBeta(matrix123456, doubleArrayOf().asF64Array(), doubleArrayOf().asF64Array())
+        }
+        assertFails {
+            WLSRegression.calculateBeta(
+                matrix123456,
+                doubleArrayOf(1.0, 2.0).asF64Array(),
+                doubleArrayOf(3.0, 4.0, 5.0).asF64Array()
+            )
+        }
+        assertFails {
+            WLSRegression.calculateBeta(
+                matrix123456,
+                doubleArrayOf(1.0, 2.0, 3.0).asF64Array(),
+                doubleArrayOf(4.0, 5.0).asF64Array()
+            )
+        }
+        // data and reference values generated by R:
+        // > X = data.frame(x1=runif(4), x2=runif(4), y=runif(4))
+        // > w = runif(4)
+        // > lm(y ~ x1 + x2, data=X, weights=w)
+        val x = arrayOf(
+            doubleArrayOf(0.0130203, 0.5831732, 0.1911626, 0.7604341),
+            doubleArrayOf(0.76642767, 0.03048108, 0.52236819, 0.65585551)
+        )
+        val y = doubleArrayOf(0.5986931, 0.8171509, 0.5662360, 0.4927709).asF64Array()
+        val weights = doubleArrayOf(0.180464329, 0.825891734, 0.008628437, 0.696381745).asF64Array()
+        val expectedBeta = doubleArrayOf(0.95045, -0.20516, -0.45993)
+        val actualBeta = WLSRegression.calculateBeta(WLSRegression.designMatrix(x), y, weights)
+        Tests.assertEquals(expectedBeta, actualBeta, 1E-5)
+    }
+}