diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 36582d834c708..fc79a4d859c18 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -76,9 +76,6 @@ Support for Python 3.4 and below has been officially dropped. the default value is used. :issue:`12988` by :user:`Zijie (ZJ) Poh `. -:mod:`sklearn.decomposition` -............................ - - |Fix| Fixed a bug in :class:`decomposition.NMF` where `init = 'nndsvd'`, `init = 'nndsvda'`, and `init = 'nndsvdar'` are allowed when `n_components < n_features` instead of @@ -86,6 +83,10 @@ Support for Python 3.4 and below has been officially dropped. :issue:`11650` by :user:`Hossein Pourbozorg ` and :user:`Zijie (ZJ) Poh `. +- |Enhancement| :class:`decomposition.KernelPCA` now has deterministic output + (resolved sign ambiguity in eigenvalue decomposition of the kernel matrix). + :issue:`13241` by :user:`Aurélien Bellet `. + :mod:`sklearn.discriminant_analysis` .................................... diff --git a/sklearn/decomposition/kernel_pca.py b/sklearn/decomposition/kernel_pca.py index bb91d0cbbad6c..bff79be619be9 100644 --- a/sklearn/decomposition/kernel_pca.py +++ b/sklearn/decomposition/kernel_pca.py @@ -8,6 +8,7 @@ from scipy.sparse.linalg import eigsh from ..utils import check_random_state +from ..utils.extmath import svd_flip from ..utils.validation import check_is_fitted, check_array from ..exceptions import NotFittedError from ..base import BaseEstimator, TransformerMixin, _UnstableOn32BitMixin @@ -210,6 +211,10 @@ def _fit_transform(self, K): maxiter=self.max_iter, v0=v0) + # flip eigenvectors' sign to enforce deterministic output + self.alphas_, _ = svd_flip(self.alphas_, + np.empty_like(self.alphas_).T) + # sort eigenvectors in descending order indices = self.lambdas_.argsort()[::-1] self.lambdas_ = self.lambdas_[indices] diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py index 4e4c5cb2be4b5..0b78f74a58143 100644 --- a/sklearn/decomposition/tests/test_kernel_pca.py +++ b/sklearn/decomposition/tests/test_kernel_pca.py @@ -4,7 +4,7 @@ from sklearn.utils.testing import (assert_array_almost_equal, assert_less, assert_equal, assert_not_equal, - assert_raises) + assert_raises, assert_allclose) from sklearn.decomposition import PCA, KernelPCA from sklearn.datasets import make_circles @@ -71,6 +71,21 @@ def test_kernel_pca_consistent_transform(): assert_array_almost_equal(transformed1, transformed2) +def test_kernel_pca_deterministic_output(): + rng = np.random.RandomState(0) + X = rng.rand(10, 10) + eigen_solver = ('arpack', 'dense') + + for solver in eigen_solver: + transformed_X = np.zeros((20, 2)) + for i in range(20): + kpca = KernelPCA(n_components=2, eigen_solver=solver, + random_state=rng) + transformed_X[i, :] = kpca.fit_transform(X)[0] + assert_allclose( + transformed_X, np.tile(transformed_X[0, :], 20).reshape(20, 2)) + + def test_kernel_pca_sparse(): rng = np.random.RandomState(0) X_fit = sp.csr_matrix(rng.random_sample((5, 4))) diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py index db562836cbab0..cbd9ca06143ba 100644 --- a/sklearn/decomposition/tests/test_pca.py +++ b/sklearn/decomposition/tests/test_pca.py @@ -6,6 +6,7 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_almost_equal +from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_raise_message @@ -703,6 +704,19 @@ def test_pca_dtype_preservation(svd_solver): check_pca_int_dtype_upcast_to_double(svd_solver) +def test_pca_deterministic_output(): + rng = np.random.RandomState(0) + X = rng.rand(10, 10) + + for solver in solver_list: + transformed_X = np.zeros((20, 2)) + for i in range(20): + pca = PCA(n_components=2, svd_solver=solver, random_state=rng) + transformed_X[i, :] = pca.fit_transform(X)[0] + assert_allclose( + transformed_X, np.tile(transformed_X[0, :], 20).reshape(20, 2)) + + def check_pca_float_dtype_preservation(svd_solver): # Ensure that PCA does not upscale the dtype when input is float32 X_64 = np.random.RandomState(0).rand(1000, 4).astype(np.float64)