From 641d0b1d220081857b1d1e9f895ac16d92b1020c Mon Sep 17 00:00:00 2001 From: Jan Hendrik Metzen Date: Tue, 25 Nov 2014 14:10:06 +0100 Subject: [PATCH 1/4] ADD Matern kernel added to pairwise.py REFACTOR Performance improvements in matern_kernel based on @afabisch's suggestions EXAMPLE Comparing Matern kernels of different smoothness on a step-function. ENH Support for arbitrary values of coef0 ("nu") in the Matern kernel EXAMPLE Added illustration of Matern kernel for different values of coef0 --- examples/metrics/README.txt | 6 +++ examples/metrics/plot_matern_kernel.py | 40 ++++++++++++++ examples/svm/plot_svm_matern_kernel.py | 73 ++++++++++++++++++++++++++ sklearn/metrics/pairwise.py | 60 ++++++++++++++++++++- 4 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 examples/metrics/README.txt create mode 100644 examples/metrics/plot_matern_kernel.py create mode 100644 examples/svm/plot_svm_matern_kernel.py diff --git a/examples/metrics/README.txt b/examples/metrics/README.txt new file mode 100644 index 0000000000000..f93905133e44b --- /dev/null +++ b/examples/metrics/README.txt @@ -0,0 +1,6 @@ +.. _metrics_examples: + +Metrics +------- + +Examples concerning the :mod:`sklearn.metrics` module. diff --git a/examples/metrics/plot_matern_kernel.py b/examples/metrics/plot_matern_kernel.py new file mode 100644 index 0000000000000..7b2d6bf2fefef --- /dev/null +++ b/examples/metrics/plot_matern_kernel.py @@ -0,0 +1,40 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +r""" +============================================================================ +Matern kernel: influence of coef0 on kernel covariance +============================================================================ + +The example shows how the kernel covariance decreases with increasing +dissimilarity of the two inputs for different values of coef0 (the parameter +"nu" of the Matern kernel) + +See Rasmussen and Williams 2006, pp84 for details regarding the different +variants of the Matern kernel. + +""" +print(__doc__) + +# Author: Jan Hendrik Metzen +# Licence: BSD 3 clause + + +import numpy as np + +from sklearn.metrics.pairwise import matern_kernel + +import matplotlib.pyplot as plt + +d = np.linspace(-4, 4, 500)[:, None] + +for coef0 in [0.5, 1.5, 2.5, np.inf]: + K = matern_kernel(d, [[0.0]], gamma=1, coef0=coef0) + plt.plot(d[:, 0], K[:, 0], label=coef0) + +plt.xlabel("distance") +plt.ylabel("covariance") +plt.yscale("log") +plt.ylim(1e-3, 1e0) +plt.legend(title="coef0") +plt.show() \ No newline at end of file diff --git a/examples/svm/plot_svm_matern_kernel.py b/examples/svm/plot_svm_matern_kernel.py new file mode 100644 index 0000000000000..770e07d3650d3 --- /dev/null +++ b/examples/svm/plot_svm_matern_kernel.py @@ -0,0 +1,73 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +r""" +============================================================================ +Support Vector Regression: comparing different variants of Matern kernel +============================================================================ + +Support Vector Regression with four different variants of the Matern kernel +is compared on a (discontinuous) step-function: + * The Matern kernel for coef0==1.5, learning a once differentiable function + * The Matern kernel for coef0==2.5, learning a twice differentiable function + * The Matern kernel for coef0==3.5, learning a three-rimes differentiable + function + * The absolute-exponential kernel which corresponds to a Matern kernel + with coef0==0.5 + * The squared-exponential (RBF) kernel which corresponds to a Matern kernel + for the limit of coef0 becoming infinitely large + +See Rasmussen and Williams 2006, pp84 for details regarding the different +variants of the Matern kernel. + +The example shows that smaller values of coef0 can better approximate the +discontinuous step-function. +""" +print(__doc__) + +# Author: Jan Hendrik Metzen +# Licence: BSD 3 clause + +from functools import partial + +import numpy as np +from sklearn.svm import NuSVR +from sklearn.metrics.pairwise import matern_kernel + +import matplotlib.pyplot as plt + +np.random.seed(0) + +# Train SVR with RBF and Matern kernels and plot resulting +# predictions +x = np.random.uniform(0, 10, 50) +y = (x < 5) + +svr_rbf = NuSVR(nu=0.25, C=1e2, kernel="rbf", gamma=0.25) +svr_matern0_5 = NuSVR(nu=0.25, C=1e2, + kernel=partial(matern_kernel, coef0=0.5, gamma=0.25)) +svr_matern1_5 = NuSVR(nu=0.25, C=1e2, + kernel=partial(matern_kernel, coef0=1.5, gamma=0.25)) +svr_matern2_5 = NuSVR(nu=0.25, C=1e2, + kernel=partial(matern_kernel, coef0=2.5, gamma=0.25)) +svr_matern3_5 = NuSVR(nu=0.25, C=1e2, + kernel=partial(matern_kernel, coef0=3.5, gamma=0.25)) + +svr_rbf.fit(x[:, None], y) +svr_matern0_5.fit(x[:, None], y) +svr_matern1_5.fit(x[:, None], y) +svr_matern2_5.fit(x[:, None], y) +svr_matern3_5.fit(x[:, None], y) + +xp = np.linspace(0, 10, 100) +plt.scatter(x, y, c='k', s=25, zorder=10) +plt.plot(xp, xp < 5, label="True", c='k') +plt.plot(xp, svr_rbf.predict(xp[:, None]), label="RBF", c='g') +plt.plot(xp, svr_matern0_5.predict(xp[:, None]), label="Matern(0.5)", c='m') +plt.plot(xp, svr_matern1_5.predict(xp[:, None]), label="Matern(1.5)", c='r') +plt.plot(xp, svr_matern2_5.predict(xp[:, None]), label="Matern(2.5)", c='c') +plt.plot(xp, svr_matern3_5.predict(xp[:, None]), label="Matern(3.5)", c='b') +plt.legend(loc='best', title="kernel") +plt.xlabel("input") +plt.ylabel("target") +plt.show() diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index ed979183071a8..703556b74e815 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -7,6 +7,7 @@ # Philippe Gervais # Lars Buitinck # Joel Nothman +# Jan Hendrik Metzen # License: BSD 3 clause import itertools @@ -15,6 +16,7 @@ from scipy.spatial import distance from scipy.sparse import csr_matrix from scipy.sparse import issparse +import scipy.special from ..utils import check_array from ..utils import gen_even_slices @@ -767,6 +769,60 @@ def rbf_kernel(X, Y=None, gamma=None): return K +def matern_kernel(X, Y=None, gamma=None, coef0=1.5): + """ Compute the Matern kernel between X and Y. + + The class of Matern kernels is a generalization of the RBF and + absolute exponential kernel parameterized by an additional parameter + coef0 (commonly denoted as nu in the literature). The smaller coef0, + the less smooth the approximated function is. For nu->inf, the kernel + becomes equivalent to the RBF kernel and for nu=0.5 to the absolute + exponential kernel. Important intermediate values are nu=1.5 (once + differentiable functions) and nu=2.5 (twice differentiable functions). + + See Rasmussen and Williams 2006, pp84 for details regarding the + different variants of the Matern kernel. + + Parameters + ---------- + X : array of shape (n_samples_X, n_features) + + Y : array of shape (n_samples_Y, n_features) + + gamma : float + + coef0 : float in [0.5, 1.5, 2.5, inf] + + Returns + ------- + kernel_matrix : array of shape (n_samples_X, n_samples_Y) + """ + if coef0 == np.inf: # fall back to rbf-kernel + return rbf_kernel(X, Y, gamma) + + X, Y = check_pairwise_arrays(X, Y) + if gamma is None: + gamma = 1.0 / X.shape[1] + + K = euclidean_distances(X, Y, squared=False) + if coef0 == 0.5: + K *= -gamma + np.exp(K, K) # exponentiate K in-place + elif coef0 == 1.5: + K *= np.sqrt(3) * gamma + K = (1 + K) * np.exp(-K) + elif coef0 == 2.5: + K *= np.sqrt(5) * gamma + K = (1 + K + K ** 2 / 3.0) * np.exp(-K) + else: # general case; expensive to evaluate + K[K == 0.0] += 1e-10 # strict zeros would result in nan + tmp = (np.sqrt(2 * coef0) * gamma * K) + K[:] = (2 ** (1 - coef0)) / scipy.special.gamma(coef0) + K *= tmp ** coef0 + K *= scipy.special.kv(coef0, tmp) + return K + + def cosine_similarity(X, Y=None): """Compute cosine similarity between samples in X and Y. @@ -1121,7 +1177,8 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=1, **kwds): 'poly': polynomial_kernel, 'rbf': rbf_kernel, 'sigmoid': sigmoid_kernel, - 'cosine': cosine_similarity, } + 'cosine': cosine_similarity, + 'matern': matern_kernel} def kernel_metrics(): @@ -1158,6 +1215,7 @@ def kernel_metrics(): "polynomial": frozenset(["gamma", "degree", "coef0"]), "rbf": frozenset(["gamma"]), "sigmoid": frozenset(["gamma", "coef0"]), + "matern": frozenset(["gamma", "coef0"]), } From 4614e9c579ff510dc9b3e382baef241a64f2e1e1 Mon Sep 17 00:00:00 2001 From: Jan Hendrik Metzen Date: Fri, 5 Dec 2014 21:46:23 +0100 Subject: [PATCH 2/4] DOC Adding documentation for Matern kernel DOC Extended documentation of Matern kernel DOC Documentation of parameter coef0 of matern_kernel --- doc/modules/classes.rst | 1 + doc/modules/metrics.rst | 61 +++++++++++++++++++++++++++++++++++++ doc/whats_new.rst | 4 +++ sklearn/metrics/pairwise.py | 16 ++++++++-- 4 files changed, 80 insertions(+), 2 deletions(-) diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index 3df61164ab870..9ea86169cb569 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -883,6 +883,7 @@ See the :ref:`metrics` section of the user guide for further details. metrics.pairwise.pairwise_kernels metrics.pairwise.polynomial_kernel metrics.pairwise.rbf_kernel + metrics.pairwise.matern_kernel metrics.pairwise_distances metrics.pairwise_distances_argmin metrics.pairwise_distances_argmin_min diff --git a/doc/modules/metrics.rst b/doc/modules/metrics.rst index 9bba16c7d77ff..ec3168bd414b7 100644 --- a/doc/modules/metrics.rst +++ b/doc/modules/metrics.rst @@ -123,6 +123,62 @@ between two vectors. This kernel is defined as: where ``x`` and ``y`` are the input vectors. If :math:`\gamma = \sigma^{-2}` the kernel is known as the Gaussian kernel of variance :math:`\sigma^2`. +Matérn kernel +------------- +The function :func:`matern_kernel` is a generalization of the RBF kernel. It has +an additional parameter :math:`\nu` (set via the keyword coef0) which controls +the smoothness of the resulting function. The general functional form of a +Matérn is given by + +.. math:: + + k(d) = \sigma^2\frac{1}{\Gamma(\nu)2^{\nu-1}}\Bigg(\gamma\sqrt{2\nu} d\Bigg)^\nu K_\nu\Bigg(\gamma\sqrt{2\nu} d\Bigg), + +where :math:`d=\| x-y \|` and ``x`` and ``y`` are the input vectors. + +As :math:`\nu\rightarrow\infty`, the Matérn kernel converges to the RBF kernel. +When :math:`\nu = 1/2`, the Matérn kernel becomes identical to the absolute +exponential kernel, i.e., + +.. math:: + k(d) = \sigma^2 \exp \Bigg(-\gamma d \Bigg) \quad \quad \nu= \tfrac{1}{2} + +In particular, :math:`\nu = 3/2`: + +.. math:: + k(d) = \sigma^2 \Bigg(1 + \gamma \sqrt{3} d \Bigg) \exp \Bigg(-\gamma \sqrt{3}d \Bigg) \quad \quad \nu= \tfrac{3}{2} + +and :math:`\nu = 5/2`: + +.. math:: + k(d) = \sigma^2 \Bigg(1 + \gamma \sqrt{5}d +\frac{5}{3} \gamma^2d^2 \Bigg) \exp \Bigg(-\gamma \sqrt{5}d \Bigg) \quad \quad \nu= \tfrac{5}{2} + +are popular choices for learning functions that are not infinitely +differentiable (as assumed by the RBF kernel) but at least once (:math:`\nu = +3/2`) or twice differentiable (:math:`\nu = 5/2`). + +The following example illustrates how the Matérn kernel's covariance decreases +with increasing dissimilarity of the two inputs for different values of coef0 +(the parameter :math:`\nu` of the Matérn kernel): + +.. figure:: ../auto_examples/metrics/images/plot_matern_kernel_001.png + :target: ../auto_examples/metrics/plot_matern_kernel.html + :align: center + +The flexibility of controlling the smoothness of the learned function via coef0 +allows adapting to the properties of the true underlying functional relation. +The following example shows that support vector regression with Matérn kernel +with smaller values of coef0 can better approximate a discontinuous +step-function: + +.. figure:: ../auto_examples/svm/images/plot_svm_matern_kernel_001.png + :target: ../auto_examples/svm/plot_svm_matern_kernel.html + :align: center + +See Rasmussen and Williams 2006, pp84 for further details regarding the +different variants of the Matérn kernel. + + Chi-squared kernel ------------------ The chi-squared kernel is a very popular choice for training non-linear SVMs in @@ -172,3 +228,8 @@ The chi squared kernel is most commonly used on histograms (bags) of visual word International Journal of Computer Vision 2007 http://eprints.pascal-network.org/archive/00002309/01/Zhang06-IJCV.pdf + * Rasmussen, C. E. and Williams, C. + Gaussian Processes for Machine Learning + The MIT Press, 2006 + http://www.gaussianprocess.org/gpml/chapters/ + diff --git a/doc/whats_new.rst b/doc/whats_new.rst index c49769567f960..c338febb82d8b 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -75,6 +75,10 @@ New features for fixed user-provided cross-validation folds. By `untom `_. + - Added :func:`metrics.pairwise.matern_kernel`, a kernel where the + smoothness of the learned function can be controlled. + By `Jan Hendrik Metzen`_. + Enhancements ............ diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 703556b74e815..3dfa96f96ee2f 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -775,7 +775,7 @@ def matern_kernel(X, Y=None, gamma=None, coef0=1.5): The class of Matern kernels is a generalization of the RBF and absolute exponential kernel parameterized by an additional parameter coef0 (commonly denoted as nu in the literature). The smaller coef0, - the less smooth the approximated function is. For nu->inf, the kernel + the less smooth the approximated function is. For nu=inf, the kernel becomes equivalent to the RBF kernel and for nu=0.5 to the absolute exponential kernel. Important intermediate values are nu=1.5 (once differentiable functions) and nu=2.5 (twice differentiable functions). @@ -791,7 +791,17 @@ def matern_kernel(X, Y=None, gamma=None, coef0=1.5): gamma : float - coef0 : float in [0.5, 1.5, 2.5, inf] + coef0 : float>0.0 (the parameter nu) + The parameter nu controlling the smoothness of the learned function. + The smaller coef0, the less smooth the approximated function is. + For nu=inf, the kernel becomes equivalent to the RBF kernel and for + nu=0.5 to the absolute exponential kernel. Important intermediate + values are nu=1.5 (once differentiable functions) and nu=2.5 + (twice differentiable functions). Note that values of nu not in + [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost + (appr. 10 times higher) since they require to evaluate the modified + Bessel function. + Returns ------- @@ -799,6 +809,8 @@ def matern_kernel(X, Y=None, gamma=None, coef0=1.5): """ if coef0 == np.inf: # fall back to rbf-kernel return rbf_kernel(X, Y, gamma) + elif coef0 <= 0.0: + raise ValueError("coef0 of Matérn kernel must be strictly positive.") X, Y = check_pairwise_arrays(X, Y) if gamma is None: From bb8bfbbd5469ca18d5d832775a392b4c1b4e6837 Mon Sep 17 00:00:00 2001 From: Jan Hendrik Metzen Date: Sun, 14 Dec 2014 09:29:16 +0100 Subject: [PATCH 3/4] TEST Adding tests for Matern kernel MISC Using np.finfo(float).eps instead of 1e-10 in Matern kernel Furthmore, fixed typo and increased readability of test_matern_kernel. TEST Extended tests of Matern kernel --- examples/svm/plot_svm_matern_kernel.py | 2 +- sklearn/metrics/pairwise.py | 2 +- sklearn/metrics/tests/test_pairwise.py | 31 +++++++++++++++++++++++--- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/examples/svm/plot_svm_matern_kernel.py b/examples/svm/plot_svm_matern_kernel.py index 770e07d3650d3..d35406a4e6cbb 100644 --- a/examples/svm/plot_svm_matern_kernel.py +++ b/examples/svm/plot_svm_matern_kernel.py @@ -10,7 +10,7 @@ is compared on a (discontinuous) step-function: * The Matern kernel for coef0==1.5, learning a once differentiable function * The Matern kernel for coef0==2.5, learning a twice differentiable function - * The Matern kernel for coef0==3.5, learning a three-rimes differentiable + * The Matern kernel for coef0==3.5, learning a three-times differentiable function * The absolute-exponential kernel which corresponds to a Matern kernel with coef0==0.5 diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 3dfa96f96ee2f..a215031932eba 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -827,7 +827,7 @@ def matern_kernel(X, Y=None, gamma=None, coef0=1.5): K *= np.sqrt(5) * gamma K = (1 + K + K ** 2 / 3.0) * np.exp(-K) else: # general case; expensive to evaluate - K[K == 0.0] += 1e-10 # strict zeros would result in nan + K[K == 0.0] += np.finfo(float).eps # strict zeros would result in nan tmp = (np.sqrt(2 * coef0) * gamma * K) K[:] = (2 ** (1 - coef0)) / scipy.special.gamma(coef0) K *= tmp ** coef0 diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py index b5a375be361d2..b758a82148309 100644 --- a/sklearn/metrics/tests/test_pairwise.py +++ b/sklearn/metrics/tests/test_pairwise.py @@ -19,6 +19,7 @@ from sklearn.metrics.pairwise import linear_kernel from sklearn.metrics.pairwise import chi2_kernel, additive_chi2_kernel from sklearn.metrics.pairwise import polynomial_kernel +from sklearn.metrics.pairwise import matern_kernel from sklearn.metrics.pairwise import rbf_kernel from sklearn.metrics.pairwise import sigmoid_kernel from sklearn.metrics.pairwise import cosine_similarity @@ -178,7 +179,7 @@ def test_pairwise_kernels(): Y = rng.random_sample((2, 4)) # Test with all metrics that should be in PAIRWISE_KERNEL_FUNCTIONS. test_metrics = ["rbf", "sigmoid", "polynomial", "linear", "chi2", - "additive_chi2"] + "additive_chi2", "matern"] for metric in test_metrics: function = PAIRWISE_KERNEL_FUNCTIONS[metric] # Test with Y=None @@ -415,7 +416,7 @@ def test_kernel_symmetry(): rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) for kernel in (linear_kernel, polynomial_kernel, rbf_kernel, - sigmoid_kernel, cosine_similarity): + matern_kernel, sigmoid_kernel, cosine_similarity): K = kernel(X, X) assert_array_almost_equal(K, K.T, 15) @@ -425,7 +426,7 @@ def test_kernel_sparse(): X = rng.random_sample((5, 4)) X_sparse = csr_matrix(X) for kernel in (linear_kernel, polynomial_kernel, rbf_kernel, - sigmoid_kernel, cosine_similarity): + matern_kernel, sigmoid_kernel, cosine_similarity): K = kernel(X, X) K2 = kernel(X_sparse, X_sparse) assert_array_almost_equal(K, K2) @@ -447,6 +448,30 @@ def test_rbf_kernel(): assert_array_almost_equal(K.flat[::6], np.ones(5)) +def test_matern_kernel(): + rng = np.random.RandomState(0) + X = rng.random_sample((5, 4)) + K = matern_kernel(X, X) + # the diagonal elements of a matern kernel are 1 + assert_array_almost_equal(np.diag(K), np.ones(5)) + # matern kernel for coef0==inf is equal to rbf kernel + K_rbf = rbf_kernel(X, X) + K = matern_kernel(X, X, coef0=np.inf) + assert_array_almost_equal(K, K_rbf) + # matern kernel for coef0==0.5 is equal to absolute exponential kernel + K_absexp = np.exp(-euclidean_distances(X, X, squared=False)) + K = matern_kernel(X, X, coef0=0.5, gamma=1.0) + assert_array_almost_equal(K, K_absexp) + # test that special cases of matern kernel (coef0 in [0.5, 1.5, 2.5]) + # result in nearly identical results as the general case for coef0 in + # [0.5 + tiny, 1.5 + tiny, 2.5 + tiny] + tiny = 1e-10 + for coef0 in [0.5, 1.5, 2.5]: + K1 = matern_kernel(X, X, coef0=coef0) + K2 = matern_kernel(X, X, coef0=coef0 + tiny) + assert_array_almost_equal(K1, K2) + + def test_cosine_similarity(): """ Test the cosine_similarity. """ From 340b4021116173c42a0864eea17b037569d83a2c Mon Sep 17 00:00:00 2001 From: Jan Hendrik Metzen Date: Sat, 17 Jan 2015 19:36:54 +0100 Subject: [PATCH 4/4] PEP8 conformity improved DOC Add "matern" as valid kernel name in various places in the doc MISC Revising Matern kernel PR based on @agramfort's review REFACTOR Using K.fill(X) instead of K[:] = X --- examples/metrics/plot_matern_kernel.py | 10 +-- examples/svm/plot_svm_matern_kernel.py | 6 +- sklearn/decomposition/kernel_pca.py | 3 +- sklearn/metrics/pairwise.py | 105 ++++++++++++------------- 4 files changed, 62 insertions(+), 62 deletions(-) diff --git a/examples/metrics/plot_matern_kernel.py b/examples/metrics/plot_matern_kernel.py index 7b2d6bf2fefef..c50019f0a6244 100644 --- a/examples/metrics/plot_matern_kernel.py +++ b/examples/metrics/plot_matern_kernel.py @@ -22,19 +22,19 @@ import numpy as np -from sklearn.metrics.pairwise import matern_kernel - import matplotlib.pyplot as plt +from sklearn.metrics.pairwise import matern_kernel + d = np.linspace(-4, 4, 500)[:, None] for coef0 in [0.5, 1.5, 2.5, np.inf]: - K = matern_kernel(d, [[0.0]], gamma=1, coef0=coef0) - plt.plot(d[:, 0], K[:, 0], label=coef0) + K = matern_kernel(d, [[0.0]], gamma=1, coef0=coef0) + plt.plot(d[:, 0], K[:, 0], label=coef0) plt.xlabel("distance") plt.ylabel("covariance") plt.yscale("log") plt.ylim(1e-3, 1e0) plt.legend(title="coef0") -plt.show() \ No newline at end of file +plt.show() diff --git a/examples/svm/plot_svm_matern_kernel.py b/examples/svm/plot_svm_matern_kernel.py index d35406a4e6cbb..506cb420fa7fe 100644 --- a/examples/svm/plot_svm_matern_kernel.py +++ b/examples/svm/plot_svm_matern_kernel.py @@ -10,7 +10,7 @@ is compared on a (discontinuous) step-function: * The Matern kernel for coef0==1.5, learning a once differentiable function * The Matern kernel for coef0==2.5, learning a twice differentiable function - * The Matern kernel for coef0==3.5, learning a three-times differentiable + * The Matern kernel for coef0==3.5, learning a three-times differentiable function * The absolute-exponential kernel which corresponds to a Matern kernel with coef0==0.5 @@ -31,10 +31,12 @@ from functools import partial import numpy as np + +import matplotlib.pyplot as plt + from sklearn.svm import NuSVR from sklearn.metrics.pairwise import matern_kernel -import matplotlib.pyplot as plt np.random.seed(0) diff --git a/sklearn/decomposition/kernel_pca.py b/sklearn/decomposition/kernel_pca.py index f447e47934e17..0e820989d14c8 100644 --- a/sklearn/decomposition/kernel_pca.py +++ b/sklearn/decomposition/kernel_pca.py @@ -24,7 +24,8 @@ class KernelPCA(BaseEstimator, TransformerMixin): n_components: int or None Number of components. If None, all non-zero components are kept. - kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" + kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "matern" | + "precomputed" Kernel. Default: "linear" diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index a215031932eba..bd527cbe84693 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -11,6 +11,7 @@ # License: BSD 3 clause import itertools +import math import numpy as np from scipy.spatial import distance @@ -508,11 +509,9 @@ def cosine_distances(X, Y=None): Parameters ---------- - X : array_like, sparse matrix - with shape (n_samples_X, n_features). + X : array_like, sparse matrix, shape (n_samples_X, n_features). - Y : array_like, sparse matrix (optional) - with shape (n_samples_Y, n_features). + Y : array_like, sparse matrix (optional), shape (n_samples_Y, n_features). Returns ------- @@ -613,10 +612,10 @@ def paired_distances(X, Y, metric="euclidean", **kwds): Parameters ---------- - X : ndarray (n_samples, n_features) + X : array, shape (n_samples, n_features) Array 1 for distance computation. - Y : ndarray (n_samples, n_features) + Y : array, shape (n_samples, n_features) Array 2 for distance computation. metric : string or callable @@ -631,7 +630,7 @@ def paired_distances(X, Y, metric="euclidean", **kwds): Returns ------- - distances : ndarray (n_samples, ) + distances : array, shape (n_samples, ) Examples -------- @@ -667,13 +666,13 @@ def linear_kernel(X, Y=None): Parameters ---------- - X : array of shape (n_samples_1, n_features) + X : array, shape (n_samples_X, n_features) - Y : array of shape (n_samples_2, n_features) + Y : array, shape (n_samples_Y, n_features) Returns ------- - Gram matrix : array of shape (n_samples_1, n_samples_2) + Gram matrix : array, shape (n_samples_X, n_samples_Y) """ X, Y = check_pairwise_arrays(X, Y) return safe_sparse_dot(X, Y.T, dense_output=True) @@ -687,9 +686,9 @@ def polynomial_kernel(X, Y=None, degree=3, gamma=None, coef0=1): Parameters ---------- - X : ndarray of shape (n_samples_1, n_features) + X : array, shape (n_samples_X, n_features) - Y : ndarray of shape (n_samples_2, n_features) + Y : array, shape (n_samples_Y, n_features) coef0 : int, default 1 @@ -697,7 +696,7 @@ def polynomial_kernel(X, Y=None, degree=3, gamma=None, coef0=1): Returns ------- - Gram matrix : array of shape (n_samples_1, n_samples_2) + Gram matrix : array, shape (n_samples_X, n_samples_Y) """ X, Y = check_pairwise_arrays(X, Y) if gamma is None: @@ -718,15 +717,15 @@ def sigmoid_kernel(X, Y=None, gamma=None, coef0=1): Parameters ---------- - X : ndarray of shape (n_samples_1, n_features) + X : array, shape (n_samples_X, n_features) - Y : ndarray of shape (n_samples_2, n_features) + Y : array, shape (n_samples_Y, n_features) coef0 : int, default 1 Returns ------- - Gram matrix: array of shape (n_samples_1, n_samples_2) + Gram matrix: array, shape (n_samples_X, n_samples_Y) """ X, Y = check_pairwise_arrays(X, Y) if gamma is None: @@ -749,15 +748,15 @@ def rbf_kernel(X, Y=None, gamma=None): Parameters ---------- - X : array of shape (n_samples_X, n_features) + X : array, shape (n_samples_X, n_features) - Y : array of shape (n_samples_Y, n_features) + Y : array, shape (n_samples_Y, n_features) gamma : float Returns ------- - kernel_matrix : array of shape (n_samples_X, n_samples_Y) + kernel_matrix : array, shape (n_samples_X, n_samples_Y) """ X, Y = check_pairwise_arrays(X, Y) if gamma is None: @@ -785,19 +784,19 @@ def matern_kernel(X, Y=None, gamma=None, coef0=1.5): Parameters ---------- - X : array of shape (n_samples_X, n_features) + X : array, shape (n_samples_X, n_features) - Y : array of shape (n_samples_Y, n_features) + Y : array, shape (n_samples_Y, n_features) gamma : float coef0 : float>0.0 (the parameter nu) The parameter nu controlling the smoothness of the learned function. - The smaller coef0, the less smooth the approximated function is. - For nu=inf, the kernel becomes equivalent to the RBF kernel and for - nu=0.5 to the absolute exponential kernel. Important intermediate - values are nu=1.5 (once differentiable functions) and nu=2.5 - (twice differentiable functions). Note that values of nu not in + The smaller coef0, the less smooth the approximated function is. + For nu=inf, the kernel becomes equivalent to the RBF kernel and for + nu=0.5 to the absolute exponential kernel. Important intermediate + values are nu=1.5 (once differentiable functions) and nu=2.5 + (twice differentiable functions). Note that values of nu not in [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost (appr. 10 times higher) since they require to evaluate the modified Bessel function. @@ -805,7 +804,7 @@ def matern_kernel(X, Y=None, gamma=None, coef0=1.5): Returns ------- - kernel_matrix : array of shape (n_samples_X, n_samples_Y) + kernel_matrix : array, shape (n_samples_X, n_samples_Y) """ if coef0 == np.inf: # fall back to rbf-kernel return rbf_kernel(X, Y, gamma) @@ -819,17 +818,17 @@ def matern_kernel(X, Y=None, gamma=None, coef0=1.5): K = euclidean_distances(X, Y, squared=False) if coef0 == 0.5: K *= -gamma - np.exp(K, K) # exponentiate K in-place + np.exp(K, K) # exponentiate K in-place elif coef0 == 1.5: - K *= np.sqrt(3) * gamma - K = (1 + K) * np.exp(-K) + K *= math.sqrt(3) * gamma + K = (1. + K) * np.exp(-K) elif coef0 == 2.5: - K *= np.sqrt(5) * gamma - K = (1 + K + K ** 2 / 3.0) * np.exp(-K) + K *= math.sqrt(5) * gamma + K = (1. + K + K ** 2 / 3.0) * np.exp(-K) else: # general case; expensive to evaluate K[K == 0.0] += np.finfo(float).eps # strict zeros would result in nan - tmp = (np.sqrt(2 * coef0) * gamma * K) - K[:] = (2 ** (1 - coef0)) / scipy.special.gamma(coef0) + tmp = (math.sqrt(2 * coef0) * gamma * K) + K.fill((2 ** (1. - coef0)) / scipy.special.gamma(coef0)) K *= tmp ** coef0 K *= scipy.special.kv(coef0, tmp) return K @@ -847,16 +846,13 @@ def cosine_similarity(X, Y=None): Parameters ---------- - X : array_like, sparse matrix - with shape (n_samples_X, n_features). + X : array_like, sparse matrix, shape (n_samples_X, n_features). - Y : array_like, sparse matrix (optional) - with shape (n_samples_Y, n_features). + Y : array_like, sparse matrix (optional), shape (n_samples_Y, n_features). Returns ------- - kernel matrix : array - An array with shape (n_samples_X, n_samples_Y). + kernel matrix : array, shape (n_samples_X, n_samples_Y). """ # to avoid recursive import @@ -894,13 +890,13 @@ def additive_chi2_kernel(X, Y=None): Parameters ---------- - X : array-like of shape (n_samples_X, n_features) + X : array, shape (n_samples_X, n_features) - Y : array of shape (n_samples_Y, n_features) + Y : array, shape (n_samples_Y, n_features) Returns ------- - kernel_matrix : array of shape (n_samples_X, n_samples_Y) + kernel_matrix : array, shape (n_samples_X, n_samples_Y) References ---------- @@ -947,16 +943,16 @@ def chi2_kernel(X, Y=None, gamma=1.): Parameters ---------- - X : array-like of shape (n_samples_X, n_features) + X : array, shape (n_samples_X, n_features) - Y : array of shape (n_samples_Y, n_features) + Y : array, shape (n_samples_Y, n_features) gamma : float, default=1. Scaling parameter of the chi2 kernel. Returns ------- - kernel_matrix : array of shape (n_samples_X, n_samples_Y) + kernel_matrix : array, shape (n_samples_X, n_samples_Y) References ---------- @@ -1111,11 +1107,11 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=1, **kwds): Parameters ---------- - X : array [n_samples_a, n_samples_a] if metric == "precomputed", or, \ - [n_samples_a, n_features] otherwise + X : array, shape (n_samples_a, n_samples_a) if metric == "precomputed", \ + (n_samples_a, n_features) otherwise Array of pairwise distances between samples, or a feature array. - Y : array [n_samples_b, n_features] + Y : array, shape (n_samples_b, n_features) A second feature array only if X has shape [n_samples_a, n_features]. metric : string, or callable @@ -1146,7 +1142,7 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=1, **kwds): Returns ------- - D : array [n_samples_a, n_samples_a] or [n_samples_a, n_samples_b] + D : array, shape (n_samples_a, n_samples_a) or (n_samples_a, n_samples_b) A distance matrix D such that D_{i, j} is the distance between the ith and jth vectors of the given matrix X, if Y is None. If Y is not None, then D_{i, j} is the distance between the ith array @@ -1212,6 +1208,7 @@ def kernel_metrics(): 'rbf' sklearn.pairwise.rbf_kernel 'sigmoid' sklearn.pairwise.sigmoid_kernel 'cosine' sklearn.pairwise.cosine_similarity + 'matern' sklearn.pairwise.matern_kernel =============== ======================================== """ return PAIRWISE_KERNEL_FUNCTIONS @@ -1247,12 +1244,12 @@ def pairwise_kernels(X, Y=None, metric="linear", filter_params=False, kernel between the arrays from both X and Y. Valid values for metric are:: - ['rbf', 'sigmoid', 'polynomial', 'poly', 'linear', 'cosine'] + ['rbf', 'sigmoid', 'polynomial', 'poly', 'linear', 'cosine', 'matern'] Parameters ---------- - X : array [n_samples_a, n_samples_a] if metric == "precomputed", or, \ - [n_samples_a, n_features] otherwise + X : array, shape (n_samples_a, n_samples_a) if metric == "precomputed", \ + (n_samples_a, n_features) otherwise Array of pairwise kernels between samples, or a feature array. Y : array [n_samples_b, n_features] @@ -1286,7 +1283,7 @@ def pairwise_kernels(X, Y=None, metric="linear", filter_params=False, Returns ------- - K : array [n_samples_a, n_samples_a] or [n_samples_a, n_samples_b] + K : array, shape (n_samples_a, n_samples_a) or (n_samples_a, n_samples_b) A kernel matrix K such that K_{i, j} is the kernel between the ith and jth vectors of the given matrix X, if Y is None. If Y is not None, then K_{i, j} is the kernel between the ith array