From 641d0b1d220081857b1d1e9f895ac16d92b1020c Mon Sep 17 00:00:00 2001
From: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
Date: Tue, 25 Nov 2014 14:10:06 +0100
Subject: [PATCH 1/4] ADD Matern kernel added to pairwise.py

REFACTOR Performance improvements in matern_kernel based on @afabisch's suggestions

EXAMPLE Comparing Matern kernels of different smoothness on a step-function.

ENH Support for arbitrary values of coef0 ("nu") in the Matern kernel

EXAMPLE Added illustration of Matern kernel for different values of coef0
---
 examples/metrics/README.txt            |  6 +++
 examples/metrics/plot_matern_kernel.py | 40 ++++++++++++++
 examples/svm/plot_svm_matern_kernel.py | 73 ++++++++++++++++++++++++++
 sklearn/metrics/pairwise.py            | 60 ++++++++++++++++++++-
 4 files changed, 178 insertions(+), 1 deletion(-)
 create mode 100644 examples/metrics/README.txt
 create mode 100644 examples/metrics/plot_matern_kernel.py
 create mode 100644 examples/svm/plot_svm_matern_kernel.py

diff --git a/examples/metrics/README.txt b/examples/metrics/README.txt
new file mode 100644
index 0000000000000..f93905133e44b
--- /dev/null
+++ b/examples/metrics/README.txt
@@ -0,0 +1,6 @@
+.. _metrics_examples:
+
+Metrics
+-------
+
+Examples concerning the :mod:`sklearn.metrics` module.
diff --git a/examples/metrics/plot_matern_kernel.py b/examples/metrics/plot_matern_kernel.py
new file mode 100644
index 0000000000000..7b2d6bf2fefef
--- /dev/null
+++ b/examples/metrics/plot_matern_kernel.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+r"""
+============================================================================
+Matern kernel: influence of coef0 on kernel covariance
+============================================================================
+
+The example shows how the kernel covariance decreases with increasing
+dissimilarity of the two inputs for different values of coef0 (the parameter
+"nu" of the Matern kernel)
+
+See Rasmussen and Williams 2006, pp84 for details regarding the different
+variants of the Matern kernel.
+
+"""
+print(__doc__)
+
+# Author: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
+# Licence: BSD 3 clause
+
+
+import numpy as np
+
+from sklearn.metrics.pairwise import matern_kernel
+
+import matplotlib.pyplot as plt
+
+d = np.linspace(-4, 4, 500)[:, None]
+
+for coef0 in [0.5, 1.5, 2.5, np.inf]:
+	K = matern_kernel(d, [[0.0]], gamma=1, coef0=coef0)
+	plt.plot(d[:, 0], K[:, 0], label=coef0)
+
+plt.xlabel("distance")
+plt.ylabel("covariance")
+plt.yscale("log")
+plt.ylim(1e-3, 1e0)
+plt.legend(title="coef0")
+plt.show()
\ No newline at end of file
diff --git a/examples/svm/plot_svm_matern_kernel.py b/examples/svm/plot_svm_matern_kernel.py
new file mode 100644
index 0000000000000..770e07d3650d3
--- /dev/null
+++ b/examples/svm/plot_svm_matern_kernel.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+r"""
+============================================================================
+Support Vector Regression: comparing different variants of Matern kernel
+============================================================================
+
+Support Vector Regression with four different variants of the Matern kernel
+is compared on a (discontinuous) step-function:
+ * The Matern kernel for coef0==1.5, learning a once differentiable function
+ * The Matern kernel for coef0==2.5, learning a twice differentiable function
+ * The Matern kernel for coef0==3.5, learning a three-rimes differentiable 
+   function
+ * The absolute-exponential kernel which corresponds to a Matern kernel
+   with coef0==0.5
+ * The squared-exponential (RBF) kernel which corresponds to a Matern kernel
+   for the limit of coef0 becoming infinitely large
+
+See Rasmussen and Williams 2006, pp84 for details regarding the different
+variants of the Matern kernel.
+
+The example shows that smaller values of coef0 can better approximate the
+discontinuous step-function.
+"""
+print(__doc__)
+
+# Author: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
+# Licence: BSD 3 clause
+
+from functools import partial
+
+import numpy as np
+from sklearn.svm import NuSVR
+from sklearn.metrics.pairwise import matern_kernel
+
+import matplotlib.pyplot as plt
+
+np.random.seed(0)
+
+# Train SVR with RBF and Matern kernels and plot resulting
+# predictions
+x = np.random.uniform(0, 10, 50)
+y = (x < 5)
+
+svr_rbf = NuSVR(nu=0.25, C=1e2, kernel="rbf", gamma=0.25)
+svr_matern0_5 = NuSVR(nu=0.25, C=1e2,
+                      kernel=partial(matern_kernel, coef0=0.5, gamma=0.25))
+svr_matern1_5 = NuSVR(nu=0.25, C=1e2,
+                      kernel=partial(matern_kernel, coef0=1.5, gamma=0.25))
+svr_matern2_5 = NuSVR(nu=0.25, C=1e2,
+                      kernel=partial(matern_kernel, coef0=2.5, gamma=0.25))
+svr_matern3_5 = NuSVR(nu=0.25, C=1e2,
+                      kernel=partial(matern_kernel, coef0=3.5, gamma=0.25))
+
+svr_rbf.fit(x[:, None], y)
+svr_matern0_5.fit(x[:, None], y)
+svr_matern1_5.fit(x[:, None], y)
+svr_matern2_5.fit(x[:, None], y)
+svr_matern3_5.fit(x[:, None], y)
+
+xp = np.linspace(0, 10, 100)
+plt.scatter(x, y, c='k', s=25, zorder=10)
+plt.plot(xp, xp < 5, label="True", c='k')
+plt.plot(xp, svr_rbf.predict(xp[:, None]), label="RBF", c='g')
+plt.plot(xp, svr_matern0_5.predict(xp[:, None]), label="Matern(0.5)", c='m')
+plt.plot(xp, svr_matern1_5.predict(xp[:, None]), label="Matern(1.5)", c='r')
+plt.plot(xp, svr_matern2_5.predict(xp[:, None]), label="Matern(2.5)", c='c')
+plt.plot(xp, svr_matern3_5.predict(xp[:, None]), label="Matern(3.5)", c='b')
+plt.legend(loc='best', title="kernel")
+plt.xlabel("input")
+plt.ylabel("target")
+plt.show()
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index ed979183071a8..703556b74e815 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -7,6 +7,7 @@
 #          Philippe Gervais <philippe.gervais@inria.fr>
 #          Lars Buitinck <larsmans@gmail.com>
 #          Joel Nothman <joel.nothman@gmail.com>
+#          Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
 # License: BSD 3 clause
 
 import itertools
@@ -15,6 +16,7 @@
 from scipy.spatial import distance
 from scipy.sparse import csr_matrix
 from scipy.sparse import issparse
+import scipy.special
 
 from ..utils import check_array
 from ..utils import gen_even_slices
@@ -767,6 +769,60 @@ def rbf_kernel(X, Y=None, gamma=None):
     return K
 
 
+def matern_kernel(X, Y=None, gamma=None, coef0=1.5):
+    """ Compute the Matern kernel between X and Y.
+
+    The class of Matern kernels is a generalization of the RBF and
+    absolute exponential kernel parameterized by an additional parameter
+    coef0 (commonly denoted as nu in the literature). The smaller coef0,
+    the less smooth the approximated function is. For nu->inf, the kernel
+    becomes equivalent to the RBF kernel and for nu=0.5 to the absolute
+    exponential kernel. Important intermediate values are nu=1.5 (once
+    differentiable functions) and nu=2.5 (twice differentiable functions).
+
+    See Rasmussen and Williams 2006, pp84 for details regarding the
+    different variants of the Matern kernel.
+
+    Parameters
+    ----------
+    X : array of shape (n_samples_X, n_features)
+
+    Y : array of shape (n_samples_Y, n_features)
+
+    gamma : float
+
+    coef0 : float in [0.5, 1.5, 2.5, inf]
+
+    Returns
+    -------
+    kernel_matrix : array of shape (n_samples_X, n_samples_Y)
+    """
+    if coef0 == np.inf:  # fall back to rbf-kernel
+        return rbf_kernel(X, Y, gamma)
+
+    X, Y = check_pairwise_arrays(X, Y)
+    if gamma is None:
+        gamma = 1.0 / X.shape[1]
+
+    K = euclidean_distances(X, Y, squared=False)
+    if coef0 == 0.5:
+        K *= -gamma
+        np.exp(K, K) # exponentiate K in-place
+    elif coef0 == 1.5:
+        K *= np.sqrt(3) * gamma
+        K = (1 + K) * np.exp(-K)
+    elif coef0 == 2.5:
+        K *= np.sqrt(5) * gamma
+        K = (1 + K + K ** 2 / 3.0) * np.exp(-K)
+    else:  # general case; expensive to evaluate
+        K[K == 0.0] += 1e-10  # strict zeros would result in nan
+        tmp = (np.sqrt(2 * coef0) * gamma * K)
+        K[:] = (2 ** (1 - coef0)) / scipy.special.gamma(coef0)
+        K *= tmp ** coef0
+        K *= scipy.special.kv(coef0, tmp)
+    return K
+
+
 def cosine_similarity(X, Y=None):
     """Compute cosine similarity between samples in X and Y.
 
@@ -1121,7 +1177,8 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=1, **kwds):
     'poly': polynomial_kernel,
     'rbf': rbf_kernel,
     'sigmoid': sigmoid_kernel,
-    'cosine': cosine_similarity, }
+    'cosine': cosine_similarity,
+    'matern': matern_kernel}
 
 
 def kernel_metrics():
@@ -1158,6 +1215,7 @@ def kernel_metrics():
     "polynomial": frozenset(["gamma", "degree", "coef0"]),
     "rbf": frozenset(["gamma"]),
     "sigmoid": frozenset(["gamma", "coef0"]),
+    "matern": frozenset(["gamma", "coef0"]),
 }
 
 

From 4614e9c579ff510dc9b3e382baef241a64f2e1e1 Mon Sep 17 00:00:00 2001
From: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
Date: Fri, 5 Dec 2014 21:46:23 +0100
Subject: [PATCH 2/4] DOC Adding documentation for Matern kernel

DOC Extended documentation of Matern kernel

DOC Documentation of parameter coef0 of matern_kernel
---
 doc/modules/classes.rst     |  1 +
 doc/modules/metrics.rst     | 61 +++++++++++++++++++++++++++++++++++++
 doc/whats_new.rst           |  4 +++
 sklearn/metrics/pairwise.py | 16 ++++++++--
 4 files changed, 80 insertions(+), 2 deletions(-)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 3df61164ab870..9ea86169cb569 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -883,6 +883,7 @@ See the :ref:`metrics` section of the user guide for further details.
    metrics.pairwise.pairwise_kernels
    metrics.pairwise.polynomial_kernel
    metrics.pairwise.rbf_kernel
+   metrics.pairwise.matern_kernel
    metrics.pairwise_distances
    metrics.pairwise_distances_argmin
    metrics.pairwise_distances_argmin_min
diff --git a/doc/modules/metrics.rst b/doc/modules/metrics.rst
index 9bba16c7d77ff..ec3168bd414b7 100644
--- a/doc/modules/metrics.rst
+++ b/doc/modules/metrics.rst
@@ -123,6 +123,62 @@ between two vectors. This kernel is defined as:
 where ``x`` and ``y`` are the input vectors. If :math:`\gamma = \sigma^{-2}`
 the kernel is known as the Gaussian kernel of variance :math:`\sigma^2`.
 
+Matérn kernel
+-------------
+The function :func:`matern_kernel` is a generalization of the RBF kernel. It has
+an additional parameter :math:`\nu` (set via the keyword coef0) which controls
+the smoothness of the resulting function. The general functional form of a
+Matérn is given by
+
+.. math::
+
+    k(d) = \sigma^2\frac{1}{\Gamma(\nu)2^{\nu-1}}\Bigg(\gamma\sqrt{2\nu} d\Bigg)^\nu K_\nu\Bigg(\gamma\sqrt{2\nu} d\Bigg),
+
+where :math:`d=\| x-y \|` and ``x`` and ``y`` are the input vectors. 
+
+As :math:`\nu\rightarrow\infty`, the Matérn kernel converges to the RBF kernel.
+When :math:`\nu = 1/2`, the Matérn kernel becomes identical to the absolute
+exponential kernel, i.e.,
+
+.. math::
+    k(d) = \sigma^2 \exp \Bigg(-\gamma d \Bigg) \quad \quad \nu= \tfrac{1}{2}
+
+In particular, :math:`\nu = 3/2`:
+
+.. math::
+    k(d) = \sigma^2 \Bigg(1 + \gamma \sqrt{3} d \Bigg) \exp \Bigg(-\gamma \sqrt{3}d \Bigg) \quad \quad \nu= \tfrac{3}{2}
+
+and :math:`\nu = 5/2`:
+
+.. math::
+    k(d) = \sigma^2 \Bigg(1 + \gamma \sqrt{5}d +\frac{5}{3} \gamma^2d^2 \Bigg) \exp \Bigg(-\gamma \sqrt{5}d \Bigg) \quad \quad \nu= \tfrac{5}{2}
+
+are popular choices for learning functions that are not infinitely
+differentiable (as assumed by the RBF kernel) but at least once (:math:`\nu =
+3/2`) or twice differentiable (:math:`\nu = 5/2`).
+
+The following example illustrates how the Matérn kernel's covariance decreases
+with increasing dissimilarity of the two inputs for different values of coef0
+(the parameter :math:`\nu` of the Matérn kernel):
+
+.. figure:: ../auto_examples/metrics/images/plot_matern_kernel_001.png
+    :target: ../auto_examples/metrics/plot_matern_kernel.html
+    :align: center
+
+The flexibility of controlling the smoothness of the learned function via coef0
+allows adapting to the properties of the true underlying functional relation.
+The following example shows that support vector regression with Matérn kernel
+with smaller values of coef0 can better approximate a discontinuous 
+step-function:
+
+.. figure:: ../auto_examples/svm/images/plot_svm_matern_kernel_001.png
+    :target: ../auto_examples/svm/plot_svm_matern_kernel.html
+    :align: center
+
+See Rasmussen and Williams 2006, pp84 for further details regarding the
+different variants of the Matérn kernel.
+
+
 Chi-squared kernel
 ------------------
 The chi-squared kernel is a very popular choice for training non-linear SVMs in
@@ -172,3 +228,8 @@ The chi squared kernel is most commonly used on histograms (bags) of visual word
       International Journal of Computer Vision 2007
       http://eprints.pascal-network.org/archive/00002309/01/Zhang06-IJCV.pdf
 
+    * Rasmussen, C. E. and Williams, C.
+      Gaussian Processes for Machine Learning
+      The MIT Press, 2006
+      http://www.gaussianprocess.org/gpml/chapters/
+
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index c49769567f960..c338febb82d8b 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -75,6 +75,10 @@ New features
      for fixed user-provided cross-validation folds.
      By `untom <https://github.com/untom>`_.
 
+   - Added :func:`metrics.pairwise.matern_kernel`, a kernel where the
+     smoothness of the learned function can be controlled.
+     By `Jan Hendrik Metzen`_.
+
 
 Enhancements
 ............
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index 703556b74e815..3dfa96f96ee2f 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -775,7 +775,7 @@ def matern_kernel(X, Y=None, gamma=None, coef0=1.5):
     The class of Matern kernels is a generalization of the RBF and
     absolute exponential kernel parameterized by an additional parameter
     coef0 (commonly denoted as nu in the literature). The smaller coef0,
-    the less smooth the approximated function is. For nu->inf, the kernel
+    the less smooth the approximated function is. For nu=inf, the kernel
     becomes equivalent to the RBF kernel and for nu=0.5 to the absolute
     exponential kernel. Important intermediate values are nu=1.5 (once
     differentiable functions) and nu=2.5 (twice differentiable functions).
@@ -791,7 +791,17 @@ def matern_kernel(X, Y=None, gamma=None, coef0=1.5):
 
     gamma : float
 
-    coef0 : float in [0.5, 1.5, 2.5, inf]
+    coef0 : float>0.0 (the parameter nu)
+        The parameter nu controlling the smoothness of the learned function.
+        The smaller coef0, the less smooth the approximated function is. 
+        For nu=inf, the kernel becomes equivalent to the RBF kernel and for 
+        nu=0.5 to the absolute exponential kernel. Important intermediate 
+        values are nu=1.5 (once differentiable functions) and nu=2.5 
+        (twice differentiable functions). Note that values of nu not in 
+        [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost
+        (appr. 10 times higher) since they require to evaluate the modified
+        Bessel function.
+
 
     Returns
     -------
@@ -799,6 +809,8 @@ def matern_kernel(X, Y=None, gamma=None, coef0=1.5):
     """
     if coef0 == np.inf:  # fall back to rbf-kernel
         return rbf_kernel(X, Y, gamma)
+    elif coef0 <= 0.0:
+        raise ValueError("coef0 of Matérn kernel must be strictly positive.")
 
     X, Y = check_pairwise_arrays(X, Y)
     if gamma is None:

From bb8bfbbd5469ca18d5d832775a392b4c1b4e6837 Mon Sep 17 00:00:00 2001
From: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
Date: Sun, 14 Dec 2014 09:29:16 +0100
Subject: [PATCH 3/4] TEST Adding tests for Matern kernel

MISC Using np.finfo(float).eps instead of 1e-10 in Matern kernel

Furthmore, fixed typo and increased readability of test_matern_kernel.

TEST Extended tests of Matern kernel
---
 examples/svm/plot_svm_matern_kernel.py |  2 +-
 sklearn/metrics/pairwise.py            |  2 +-
 sklearn/metrics/tests/test_pairwise.py | 31 +++++++++++++++++++++++---
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/examples/svm/plot_svm_matern_kernel.py b/examples/svm/plot_svm_matern_kernel.py
index 770e07d3650d3..d35406a4e6cbb 100644
--- a/examples/svm/plot_svm_matern_kernel.py
+++ b/examples/svm/plot_svm_matern_kernel.py
@@ -10,7 +10,7 @@
 is compared on a (discontinuous) step-function:
  * The Matern kernel for coef0==1.5, learning a once differentiable function
  * The Matern kernel for coef0==2.5, learning a twice differentiable function
- * The Matern kernel for coef0==3.5, learning a three-rimes differentiable 
+ * The Matern kernel for coef0==3.5, learning a three-times differentiable 
    function
  * The absolute-exponential kernel which corresponds to a Matern kernel
    with coef0==0.5
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index 3dfa96f96ee2f..a215031932eba 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -827,7 +827,7 @@ def matern_kernel(X, Y=None, gamma=None, coef0=1.5):
         K *= np.sqrt(5) * gamma
         K = (1 + K + K ** 2 / 3.0) * np.exp(-K)
     else:  # general case; expensive to evaluate
-        K[K == 0.0] += 1e-10  # strict zeros would result in nan
+        K[K == 0.0] += np.finfo(float).eps  # strict zeros would result in nan
         tmp = (np.sqrt(2 * coef0) * gamma * K)
         K[:] = (2 ** (1 - coef0)) / scipy.special.gamma(coef0)
         K *= tmp ** coef0
diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py
index b5a375be361d2..b758a82148309 100644
--- a/sklearn/metrics/tests/test_pairwise.py
+++ b/sklearn/metrics/tests/test_pairwise.py
@@ -19,6 +19,7 @@
 from sklearn.metrics.pairwise import linear_kernel
 from sklearn.metrics.pairwise import chi2_kernel, additive_chi2_kernel
 from sklearn.metrics.pairwise import polynomial_kernel
+from sklearn.metrics.pairwise import matern_kernel
 from sklearn.metrics.pairwise import rbf_kernel
 from sklearn.metrics.pairwise import sigmoid_kernel
 from sklearn.metrics.pairwise import cosine_similarity
@@ -178,7 +179,7 @@ def test_pairwise_kernels():
     Y = rng.random_sample((2, 4))
     # Test with all metrics that should be in PAIRWISE_KERNEL_FUNCTIONS.
     test_metrics = ["rbf", "sigmoid", "polynomial", "linear", "chi2",
-                    "additive_chi2"]
+                    "additive_chi2", "matern"]
     for metric in test_metrics:
         function = PAIRWISE_KERNEL_FUNCTIONS[metric]
         # Test with Y=None
@@ -415,7 +416,7 @@ def test_kernel_symmetry():
     rng = np.random.RandomState(0)
     X = rng.random_sample((5, 4))
     for kernel in (linear_kernel, polynomial_kernel, rbf_kernel,
-                   sigmoid_kernel, cosine_similarity):
+                   matern_kernel, sigmoid_kernel, cosine_similarity):
         K = kernel(X, X)
         assert_array_almost_equal(K, K.T, 15)
 
@@ -425,7 +426,7 @@ def test_kernel_sparse():
     X = rng.random_sample((5, 4))
     X_sparse = csr_matrix(X)
     for kernel in (linear_kernel, polynomial_kernel, rbf_kernel,
-                   sigmoid_kernel, cosine_similarity):
+                   matern_kernel, sigmoid_kernel, cosine_similarity):
         K = kernel(X, X)
         K2 = kernel(X_sparse, X_sparse)
         assert_array_almost_equal(K, K2)
@@ -447,6 +448,30 @@ def test_rbf_kernel():
     assert_array_almost_equal(K.flat[::6], np.ones(5))
 
 
+def test_matern_kernel():
+    rng = np.random.RandomState(0)
+    X = rng.random_sample((5, 4))
+    K = matern_kernel(X, X)
+    # the diagonal elements of a matern kernel are 1
+    assert_array_almost_equal(np.diag(K), np.ones(5))
+    # matern kernel for coef0==inf is equal to rbf kernel
+    K_rbf = rbf_kernel(X, X)
+    K = matern_kernel(X, X, coef0=np.inf)
+    assert_array_almost_equal(K, K_rbf)
+    # matern kernel for coef0==0.5 is equal to absolute exponential kernel
+    K_absexp = np.exp(-euclidean_distances(X, X, squared=False))
+    K = matern_kernel(X, X, coef0=0.5, gamma=1.0)
+    assert_array_almost_equal(K, K_absexp)
+    # test that special cases of matern kernel (coef0 in [0.5, 1.5, 2.5])
+    # result in nearly identical results as the general case for coef0 in
+    # [0.5 + tiny, 1.5 + tiny, 2.5 + tiny]
+    tiny = 1e-10
+    for coef0 in [0.5, 1.5, 2.5]:
+        K1 = matern_kernel(X, X, coef0=coef0)
+        K2 = matern_kernel(X, X, coef0=coef0 + tiny)
+        assert_array_almost_equal(K1, K2)
+
+
 def test_cosine_similarity():
     """ Test the cosine_similarity. """
 

From 340b4021116173c42a0864eea17b037569d83a2c Mon Sep 17 00:00:00 2001
From: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
Date: Sat, 17 Jan 2015 19:36:54 +0100
Subject: [PATCH 4/4] PEP8 conformity improved

DOC Add "matern" as valid kernel name in various places in the doc

MISC Revising Matern kernel PR based on @agramfort's review

REFACTOR Using K.fill(X) instead of K[:] = X
---
 examples/metrics/plot_matern_kernel.py |  10 +--
 examples/svm/plot_svm_matern_kernel.py |   6 +-
 sklearn/decomposition/kernel_pca.py    |   3 +-
 sklearn/metrics/pairwise.py            | 105 ++++++++++++-------------
 4 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/examples/metrics/plot_matern_kernel.py b/examples/metrics/plot_matern_kernel.py
index 7b2d6bf2fefef..c50019f0a6244 100644
--- a/examples/metrics/plot_matern_kernel.py
+++ b/examples/metrics/plot_matern_kernel.py
@@ -22,19 +22,19 @@
 
 import numpy as np
 
-from sklearn.metrics.pairwise import matern_kernel
-
 import matplotlib.pyplot as plt
 
+from sklearn.metrics.pairwise import matern_kernel
+
 d = np.linspace(-4, 4, 500)[:, None]
 
 for coef0 in [0.5, 1.5, 2.5, np.inf]:
-	K = matern_kernel(d, [[0.0]], gamma=1, coef0=coef0)
-	plt.plot(d[:, 0], K[:, 0], label=coef0)
+    K = matern_kernel(d, [[0.0]], gamma=1, coef0=coef0)
+    plt.plot(d[:, 0], K[:, 0], label=coef0)
 
 plt.xlabel("distance")
 plt.ylabel("covariance")
 plt.yscale("log")
 plt.ylim(1e-3, 1e0)
 plt.legend(title="coef0")
-plt.show()
\ No newline at end of file
+plt.show()
diff --git a/examples/svm/plot_svm_matern_kernel.py b/examples/svm/plot_svm_matern_kernel.py
index d35406a4e6cbb..506cb420fa7fe 100644
--- a/examples/svm/plot_svm_matern_kernel.py
+++ b/examples/svm/plot_svm_matern_kernel.py
@@ -10,7 +10,7 @@
 is compared on a (discontinuous) step-function:
  * The Matern kernel for coef0==1.5, learning a once differentiable function
  * The Matern kernel for coef0==2.5, learning a twice differentiable function
- * The Matern kernel for coef0==3.5, learning a three-times differentiable 
+ * The Matern kernel for coef0==3.5, learning a three-times differentiable
    function
  * The absolute-exponential kernel which corresponds to a Matern kernel
    with coef0==0.5
@@ -31,10 +31,12 @@
 from functools import partial
 
 import numpy as np
+
+import matplotlib.pyplot as plt
+
 from sklearn.svm import NuSVR
 from sklearn.metrics.pairwise import matern_kernel
 
-import matplotlib.pyplot as plt
 
 np.random.seed(0)
 
diff --git a/sklearn/decomposition/kernel_pca.py b/sklearn/decomposition/kernel_pca.py
index f447e47934e17..0e820989d14c8 100644
--- a/sklearn/decomposition/kernel_pca.py
+++ b/sklearn/decomposition/kernel_pca.py
@@ -24,7 +24,8 @@ class KernelPCA(BaseEstimator, TransformerMixin):
     n_components: int or None
         Number of components. If None, all non-zero components are kept.
 
-    kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed"
+    kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "matern" |
+            "precomputed"
         Kernel.
         Default: "linear"
 
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index a215031932eba..bd527cbe84693 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -11,6 +11,7 @@
 # License: BSD 3 clause
 
 import itertools
+import math
 
 import numpy as np
 from scipy.spatial import distance
@@ -508,11 +509,9 @@ def cosine_distances(X, Y=None):
 
     Parameters
     ----------
-    X : array_like, sparse matrix
-        with shape (n_samples_X, n_features).
+    X : array_like, sparse matrix, shape (n_samples_X, n_features).
 
-    Y : array_like, sparse matrix (optional)
-        with shape (n_samples_Y, n_features).
+    Y : array_like, sparse matrix (optional), shape (n_samples_Y, n_features).
 
     Returns
     -------
@@ -613,10 +612,10 @@ def paired_distances(X, Y, metric="euclidean", **kwds):
 
     Parameters
     ----------
-    X : ndarray (n_samples, n_features)
+    X : array, shape (n_samples, n_features)
         Array 1 for distance computation.
 
-    Y : ndarray (n_samples, n_features)
+    Y : array, shape (n_samples, n_features)
         Array 2 for distance computation.
 
     metric : string or callable
@@ -631,7 +630,7 @@ def paired_distances(X, Y, metric="euclidean", **kwds):
 
     Returns
     -------
-    distances : ndarray (n_samples, )
+    distances : array, shape (n_samples, )
 
     Examples
     --------
@@ -667,13 +666,13 @@ def linear_kernel(X, Y=None):
 
     Parameters
     ----------
-    X : array of shape (n_samples_1, n_features)
+    X : array, shape (n_samples_X, n_features)
 
-    Y : array of shape (n_samples_2, n_features)
+    Y : array, shape (n_samples_Y, n_features)
 
     Returns
     -------
-    Gram matrix : array of shape (n_samples_1, n_samples_2)
+    Gram matrix : array, shape (n_samples_X, n_samples_Y)
     """
     X, Y = check_pairwise_arrays(X, Y)
     return safe_sparse_dot(X, Y.T, dense_output=True)
@@ -687,9 +686,9 @@ def polynomial_kernel(X, Y=None, degree=3, gamma=None, coef0=1):
 
     Parameters
     ----------
-    X : ndarray of shape (n_samples_1, n_features)
+    X : array, shape (n_samples_X, n_features)
 
-    Y : ndarray of shape (n_samples_2, n_features)
+    Y : array, shape (n_samples_Y, n_features)
 
     coef0 : int, default 1
 
@@ -697,7 +696,7 @@ def polynomial_kernel(X, Y=None, degree=3, gamma=None, coef0=1):
 
     Returns
     -------
-    Gram matrix : array of shape (n_samples_1, n_samples_2)
+    Gram matrix : array, shape (n_samples_X, n_samples_Y)
     """
     X, Y = check_pairwise_arrays(X, Y)
     if gamma is None:
@@ -718,15 +717,15 @@ def sigmoid_kernel(X, Y=None, gamma=None, coef0=1):
 
     Parameters
     ----------
-    X : ndarray of shape (n_samples_1, n_features)
+    X : array, shape (n_samples_X, n_features)
 
-    Y : ndarray of shape (n_samples_2, n_features)
+    Y : array, shape (n_samples_Y, n_features)
 
     coef0 : int, default 1
 
     Returns
     -------
-    Gram matrix: array of shape (n_samples_1, n_samples_2)
+    Gram matrix: array, shape (n_samples_X, n_samples_Y)
     """
     X, Y = check_pairwise_arrays(X, Y)
     if gamma is None:
@@ -749,15 +748,15 @@ def rbf_kernel(X, Y=None, gamma=None):
 
     Parameters
     ----------
-    X : array of shape (n_samples_X, n_features)
+    X : array, shape (n_samples_X, n_features)
 
-    Y : array of shape (n_samples_Y, n_features)
+    Y : array, shape (n_samples_Y, n_features)
 
     gamma : float
 
     Returns
     -------
-    kernel_matrix : array of shape (n_samples_X, n_samples_Y)
+    kernel_matrix : array, shape (n_samples_X, n_samples_Y)
     """
     X, Y = check_pairwise_arrays(X, Y)
     if gamma is None:
@@ -785,19 +784,19 @@ def matern_kernel(X, Y=None, gamma=None, coef0=1.5):
 
     Parameters
     ----------
-    X : array of shape (n_samples_X, n_features)
+    X : array, shape (n_samples_X, n_features)
 
-    Y : array of shape (n_samples_Y, n_features)
+    Y : array, shape (n_samples_Y, n_features)
 
     gamma : float
 
     coef0 : float>0.0 (the parameter nu)
         The parameter nu controlling the smoothness of the learned function.
-        The smaller coef0, the less smooth the approximated function is. 
-        For nu=inf, the kernel becomes equivalent to the RBF kernel and for 
-        nu=0.5 to the absolute exponential kernel. Important intermediate 
-        values are nu=1.5 (once differentiable functions) and nu=2.5 
-        (twice differentiable functions). Note that values of nu not in 
+        The smaller coef0, the less smooth the approximated function is.
+        For nu=inf, the kernel becomes equivalent to the RBF kernel and for
+        nu=0.5 to the absolute exponential kernel. Important intermediate
+        values are nu=1.5 (once differentiable functions) and nu=2.5
+        (twice differentiable functions). Note that values of nu not in
         [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost
         (appr. 10 times higher) since they require to evaluate the modified
         Bessel function.
@@ -805,7 +804,7 @@ def matern_kernel(X, Y=None, gamma=None, coef0=1.5):
 
     Returns
     -------
-    kernel_matrix : array of shape (n_samples_X, n_samples_Y)
+    kernel_matrix : array, shape (n_samples_X, n_samples_Y)
     """
     if coef0 == np.inf:  # fall back to rbf-kernel
         return rbf_kernel(X, Y, gamma)
@@ -819,17 +818,17 @@ def matern_kernel(X, Y=None, gamma=None, coef0=1.5):
     K = euclidean_distances(X, Y, squared=False)
     if coef0 == 0.5:
         K *= -gamma
-        np.exp(K, K) # exponentiate K in-place
+        np.exp(K, K)  # exponentiate K in-place
     elif coef0 == 1.5:
-        K *= np.sqrt(3) * gamma
-        K = (1 + K) * np.exp(-K)
+        K *= math.sqrt(3) * gamma
+        K = (1. + K) * np.exp(-K)
     elif coef0 == 2.5:
-        K *= np.sqrt(5) * gamma
-        K = (1 + K + K ** 2 / 3.0) * np.exp(-K)
+        K *= math.sqrt(5) * gamma
+        K = (1. + K + K ** 2 / 3.0) * np.exp(-K)
     else:  # general case; expensive to evaluate
         K[K == 0.0] += np.finfo(float).eps  # strict zeros would result in nan
-        tmp = (np.sqrt(2 * coef0) * gamma * K)
-        K[:] = (2 ** (1 - coef0)) / scipy.special.gamma(coef0)
+        tmp = (math.sqrt(2 * coef0) * gamma * K)
+        K.fill((2 ** (1. - coef0)) / scipy.special.gamma(coef0))
         K *= tmp ** coef0
         K *= scipy.special.kv(coef0, tmp)
     return K
@@ -847,16 +846,13 @@ def cosine_similarity(X, Y=None):
 
     Parameters
     ----------
-    X : array_like, sparse matrix
-        with shape (n_samples_X, n_features).
+    X : array_like, sparse matrix, shape (n_samples_X, n_features).
 
-    Y : array_like, sparse matrix (optional)
-        with shape (n_samples_Y, n_features).
+    Y : array_like, sparse matrix (optional), shape (n_samples_Y, n_features).
 
     Returns
     -------
-    kernel matrix : array
-        An array with shape (n_samples_X, n_samples_Y).
+    kernel matrix : array, shape (n_samples_X, n_samples_Y).
     """
     # to avoid recursive import
 
@@ -894,13 +890,13 @@ def additive_chi2_kernel(X, Y=None):
 
     Parameters
     ----------
-    X : array-like of shape (n_samples_X, n_features)
+    X : array, shape (n_samples_X, n_features)
 
-    Y : array of shape (n_samples_Y, n_features)
+    Y : array, shape (n_samples_Y, n_features)
 
     Returns
     -------
-    kernel_matrix : array of shape (n_samples_X, n_samples_Y)
+    kernel_matrix : array, shape (n_samples_X, n_samples_Y)
 
     References
     ----------
@@ -947,16 +943,16 @@ def chi2_kernel(X, Y=None, gamma=1.):
 
     Parameters
     ----------
-    X : array-like of shape (n_samples_X, n_features)
+    X : array, shape (n_samples_X, n_features)
 
-    Y : array of shape (n_samples_Y, n_features)
+    Y : array, shape (n_samples_Y, n_features)
 
     gamma : float, default=1.
         Scaling parameter of the chi2 kernel.
 
     Returns
     -------
-    kernel_matrix : array of shape (n_samples_X, n_samples_Y)
+    kernel_matrix : array, shape (n_samples_X, n_samples_Y)
 
     References
     ----------
@@ -1111,11 +1107,11 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=1, **kwds):
 
     Parameters
     ----------
-    X : array [n_samples_a, n_samples_a] if metric == "precomputed", or, \
-             [n_samples_a, n_features] otherwise
+    X : array, shape (n_samples_a, n_samples_a) if metric == "precomputed", \
+                     (n_samples_a, n_features) otherwise
         Array of pairwise distances between samples, or a feature array.
 
-    Y : array [n_samples_b, n_features]
+    Y : array, shape (n_samples_b, n_features)
         A second feature array only if X has shape [n_samples_a, n_features].
 
     metric : string, or callable
@@ -1146,7 +1142,7 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=1, **kwds):
 
     Returns
     -------
-    D : array [n_samples_a, n_samples_a] or [n_samples_a, n_samples_b]
+    D : array, shape (n_samples_a, n_samples_a) or (n_samples_a, n_samples_b)
         A distance matrix D such that D_{i, j} is the distance between the
         ith and jth vectors of the given matrix X, if Y is None.
         If Y is not None, then D_{i, j} is the distance between the ith array
@@ -1212,6 +1208,7 @@ def kernel_metrics():
       'rbf'             sklearn.pairwise.rbf_kernel
       'sigmoid'         sklearn.pairwise.sigmoid_kernel
       'cosine'          sklearn.pairwise.cosine_similarity
+      'matern'          sklearn.pairwise.matern_kernel
       ===============   ========================================
     """
     return PAIRWISE_KERNEL_FUNCTIONS
@@ -1247,12 +1244,12 @@ def pairwise_kernels(X, Y=None, metric="linear", filter_params=False,
     kernel between the arrays from both X and Y.
 
     Valid values for metric are::
-        ['rbf', 'sigmoid', 'polynomial', 'poly', 'linear', 'cosine']
+        ['rbf', 'sigmoid', 'polynomial', 'poly', 'linear', 'cosine', 'matern']
 
     Parameters
     ----------
-    X : array [n_samples_a, n_samples_a] if metric == "precomputed", or, \
-             [n_samples_a, n_features] otherwise
+    X : array, shape (n_samples_a, n_samples_a) if metric == "precomputed", \
+                     (n_samples_a, n_features) otherwise
         Array of pairwise kernels between samples, or a feature array.
 
     Y : array [n_samples_b, n_features]
@@ -1286,7 +1283,7 @@ def pairwise_kernels(X, Y=None, metric="linear", filter_params=False,
 
     Returns
     -------
-    K : array [n_samples_a, n_samples_a] or [n_samples_a, n_samples_b]
+    K : array, shape (n_samples_a, n_samples_a) or (n_samples_a, n_samples_b)
         A kernel matrix K such that K_{i, j} is the kernel between the
         ith and jth vectors of the given matrix X, if Y is None.
         If Y is not None, then K_{i, j} is the kernel between the ith array