scikit-learn · jmetzen · Nov 25, 2014 · Dec 5, 2014 · Dec 14, 2014 · Jan 17, 2015
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -883,6 +883,7 @@ See the :ref:`metrics` section of the user guide for further details.
    metrics.pairwise.pairwise_kernels
    metrics.pairwise.polynomial_kernel
    metrics.pairwise.rbf_kernel
+   metrics.pairwise.matern_kernel
    metrics.pairwise_distances
    metrics.pairwise_distances_argmin
    metrics.pairwise_distances_argmin_min

diff --git a/doc/modules/metrics.rst b/doc/modules/metrics.rst
@@ -123,6 +123,62 @@ between two vectors. This kernel is defined as:
 where ``x`` and ``y`` are the input vectors. If :math:`\gamma = \sigma^{-2}`
 the kernel is known as the Gaussian kernel of variance :math:`\sigma^2`.
 
+Matérn kernel
+-------------
+The function :func:`matern_kernel` is a generalization of the RBF kernel. It has
+an additional parameter :math:`\nu` (set via the keyword coef0) which controls
+the smoothness of the resulting function. The general functional form of a
+Matérn is given by
+
+.. math::
+
+    k(d) = \sigma^2\frac{1}{\Gamma(\nu)2^{\nu-1}}\Bigg(\gamma\sqrt{2\nu} d\Bigg)^\nu K_\nu\Bigg(\gamma\sqrt{2\nu} d\Bigg),
+
+where :math:`d=\| x-y \|` and ``x`` and ``y`` are the input vectors. 
+
+As :math:`\nu\rightarrow\infty`, the Matérn kernel converges to the RBF kernel.
+When :math:`\nu = 1/2`, the Matérn kernel becomes identical to the absolute
+exponential kernel, i.e.,
+
+.. math::
+    k(d) = \sigma^2 \exp \Bigg(-\gamma d \Bigg) \quad \quad \nu= \tfrac{1}{2}
+
+In particular, :math:`\nu = 3/2`:
+
+.. math::
+    k(d) = \sigma^2 \Bigg(1 + \gamma \sqrt{3} d \Bigg) \exp \Bigg(-\gamma \sqrt{3}d \Bigg) \quad \quad \nu= \tfrac{3}{2}
+
+and :math:`\nu = 5/2`:
+
+.. math::
+    k(d) = \sigma^2 \Bigg(1 + \gamma \sqrt{5}d +\frac{5}{3} \gamma^2d^2 \Bigg) \exp \Bigg(-\gamma \sqrt{5}d \Bigg) \quad \quad \nu= \tfrac{5}{2}
+
+are popular choices for learning functions that are not infinitely
+differentiable (as assumed by the RBF kernel) but at least once (:math:`\nu =
+3/2`) or twice differentiable (:math:`\nu = 5/2`).
+
+The following example illustrates how the Matérn kernel's covariance decreases
+with increasing dissimilarity of the two inputs for different values of coef0
+(the parameter :math:`\nu` of the Matérn kernel):
+
+.. figure:: ../auto_examples/metrics/images/plot_matern_kernel_001.png
+    :target: ../auto_examples/metrics/plot_matern_kernel.html
+    :align: center
+
+The flexibility of controlling the smoothness of the learned function via coef0
+allows adapting to the properties of the true underlying functional relation.
+The following example shows that support vector regression with Matérn kernel
+with smaller values of coef0 can better approximate a discontinuous 
+step-function:
+
+.. figure:: ../auto_examples/svm/images/plot_svm_matern_kernel_001.png
+    :target: ../auto_examples/svm/plot_svm_matern_kernel.html
+    :align: center
+
+See Rasmussen and Williams 2006, pp84 for further details regarding the
+different variants of the Matérn kernel.
+
+
 Chi-squared kernel
 ------------------
 The chi-squared kernel is a very popular choice for training non-linear SVMs in
@@ -172,3 +228,8 @@ The chi squared kernel is most commonly used on histograms (bags) of visual word
       International Journal of Computer Vision 2007
       http://eprints.pascal-network.org/archive/00002309/01/Zhang06-IJCV.pdf
 
+    * Rasmussen, C. E. and Williams, C.
+      Gaussian Processes for Machine Learning
+      The MIT Press, 2006
+      http://www.gaussianprocess.org/gpml/chapters/
+
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -75,6 +75,10 @@ New features
      for fixed user-provided cross-validation folds.
      By `untom <https://github.com/untom>`_.
 
+   - Added :func:`metrics.pairwise.matern_kernel`, a kernel where the
+     smoothness of the learned function can be controlled.
+     By `Jan Hendrik Metzen`_.
+
 
 Enhancements
 ............

diff --git a/examples/metrics/README.txt b/examples/metrics/README.txt
@@ -0,0 +1,6 @@
+.. _metrics_examples:
+
+Metrics
+-------
+
+Examples concerning the :mod:`sklearn.metrics` module.
diff --git a/examples/metrics/plot_matern_kernel.py b/examples/metrics/plot_matern_kernel.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+r"""
+============================================================================
+Matern kernel: influence of coef0 on kernel covariance
+============================================================================
+
+The example shows how the kernel covariance decreases with increasing
+dissimilarity of the two inputs for different values of coef0 (the parameter
+"nu" of the Matern kernel)
+
+See Rasmussen and Williams 2006, pp84 for details regarding the different
+variants of the Matern kernel.
+
+"""
+print(__doc__)
+
+# Author: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
+# Licence: BSD 3 clause
+
+
+import numpy as np
+
+import matplotlib.pyplot as plt
+
+from sklearn.metrics.pairwise import matern_kernel
+
+d = np.linspace(-4, 4, 500)[:, None]
+
+for coef0 in [0.5, 1.5, 2.5, np.inf]:
+    K = matern_kernel(d, [[0.0]], gamma=1, coef0=coef0)
+    plt.plot(d[:, 0], K[:, 0], label=coef0)
+
+plt.xlabel("distance")
+plt.ylabel("covariance")
+plt.yscale("log")
+plt.ylim(1e-3, 1e0)
+plt.legend(title="coef0")
+plt.show()
diff --git a/examples/svm/plot_svm_matern_kernel.py b/examples/svm/plot_svm_matern_kernel.py
@@ -0,0 +1,75 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+r"""
+============================================================================
+Support Vector Regression: comparing different variants of Matern kernel
+============================================================================
+
+Support Vector Regression with four different variants of the Matern kernel
+is compared on a (discontinuous) step-function:
+ * The Matern kernel for coef0==1.5, learning a once differentiable function
+ * The Matern kernel for coef0==2.5, learning a twice differentiable function
+ * The Matern kernel for coef0==3.5, learning a three-times differentiable
+   function
+ * The absolute-exponential kernel which corresponds to a Matern kernel
+   with coef0==0.5
+ * The squared-exponential (RBF) kernel which corresponds to a Matern kernel
+   for the limit of coef0 becoming infinitely large
+
+See Rasmussen and Williams 2006, pp84 for details regarding the different
+variants of the Matern kernel.
+
+The example shows that smaller values of coef0 can better approximate the
+discontinuous step-function.
+"""
+print(__doc__)
+
+# Author: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
+# Licence: BSD 3 clause
+
+from functools import partial
+
+import numpy as np
+
+import matplotlib.pyplot as plt
+
+from sklearn.svm import NuSVR
+from sklearn.metrics.pairwise import matern_kernel
+
+
+np.random.seed(0)
+
+# Train SVR with RBF and Matern kernels and plot resulting
+# predictions
+x = np.random.uniform(0, 10, 50)
+y = (x < 5)
+
+svr_rbf = NuSVR(nu=0.25, C=1e2, kernel="rbf", gamma=0.25)
+svr_matern0_5 = NuSVR(nu=0.25, C=1e2,
+                      kernel=partial(matern_kernel, coef0=0.5, gamma=0.25))
+svr_matern1_5 = NuSVR(nu=0.25, C=1e2,
+                      kernel=partial(matern_kernel, coef0=1.5, gamma=0.25))
+svr_matern2_5 = NuSVR(nu=0.25, C=1e2,
+                      kernel=partial(matern_kernel, coef0=2.5, gamma=0.25))
+svr_matern3_5 = NuSVR(nu=0.25, C=1e2,
+                      kernel=partial(matern_kernel, coef0=3.5, gamma=0.25))
+
+svr_rbf.fit(x[:, None], y)
+svr_matern0_5.fit(x[:, None], y)
+svr_matern1_5.fit(x[:, None], y)
+svr_matern2_5.fit(x[:, None], y)
+svr_matern3_5.fit(x[:, None], y)
+
+xp = np.linspace(0, 10, 100)
+plt.scatter(x, y, c='k', s=25, zorder=10)
+plt.plot(xp, xp < 5, label="True", c='k')
+plt.plot(xp, svr_rbf.predict(xp[:, None]), label="RBF", c='g')
+plt.plot(xp, svr_matern0_5.predict(xp[:, None]), label="Matern(0.5)", c='m')
+plt.plot(xp, svr_matern1_5.predict(xp[:, None]), label="Matern(1.5)", c='r')
+plt.plot(xp, svr_matern2_5.predict(xp[:, None]), label="Matern(2.5)", c='c')
+plt.plot(xp, svr_matern3_5.predict(xp[:, None]), label="Matern(3.5)", c='b')
+plt.legend(loc='best', title="kernel")
+plt.xlabel("input")
+plt.ylabel("target")
+plt.show()
diff --git a/sklearn/decomposition/kernel_pca.py b/sklearn/decomposition/kernel_pca.py
@@ -24,7 +24,8 @@ class KernelPCA(BaseEstimator, TransformerMixin):
     n_components: int or None
         Number of components. If None, all non-zero components are kept.
 
-    kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed"
+    kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "matern" |
+            "precomputed"
         Kernel.
         Default: "linear"