8000 [MRG+1] Enforce deterministic output in kernel PCA (#13241) · xhluca/scikit-learn@11d5539 · GitHub
[go: up one dir, main page]

Skip to content

Commit 11d5539

Browse files
belletXing
authored andcommitted
[MRG+1] Enforce deterministic output in kernel PCA (scikit-learn#13241)
* enforce deterministic output in kernel PCA * add tests and update whats new * replace state by rng * simplified assert * avoid copy * clarify tests * remove now useless comment * use rng as seed everywhere
1 parent 1292815 commit 11d5539

File tree

4 files changed

+39
-4
lines changed

4 files changed

+39
-4
lines changed

doc/whats_new/v0.21.rst

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,16 +76,17 @@ Support for Python 3.4 and below has been officially dropped.
7676
the default value is used.
7777
:issue:`12988` by :user:`Zijie (ZJ) Poh <zjpoh>`.
7878

79-
:mod:`sklearn.decomposition`
80-
............................
81-
8279
- |Fix| Fixed a bug in :class:`decomposition.NMF` where `init = 'nndsvd'`,
8380
`init = 'nndsvda'`, and `init = 'nndsvdar'` are allowed when
8481
`n_components < n_features` instead of
8582
`n_components <= min(n_samples, n_features)`.
8683
:issue:`11650` by :user:`Hossein Pourbozorg <hossein-pourbozorg>` and
8784
:user:`Zijie (ZJ) Poh <zjpoh>`.
8885

86+
- |Enhancement| :class:`decomposition.KernelPCA` now has deterministic output
87+
(resolved sign ambiguity in eigenvalue decomposition of the kernel matrix).
88+
:issue:`13241` by :user:`Aurélien Bellet <bellet>`.
89+
8990
:mod:`sklearn.discriminant_analysis`
9091
....................................
9192

sklearn/decomposition/kernel_pca.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from scipy.sparse.linalg import eigsh
99

1010
from ..utils import check_random_state
11+
from ..utils.extmath import svd_flip
1112
from ..utils.validation import check_is_fitted, check_array
1213
from ..exceptions import NotFittedError
1314
from ..base import BaseEstimator, TransformerMixin, _UnstableOn32BitMixin
@@ -210,6 +211,10 @@ def _fit_transform(self, K):
210211
maxiter=self.max_iter,
211212
v0=v0)
212213

214+
# flip eigenvectors' sign to enforce deterministic output
215+
self.alphas_, _ = svd_flip(self.alphas_,
216+
np.empty_like(self.alphas_).T)
217+
213218
# sort eigenvectors in descending order
214219
indices = self.lambdas_.argsort()[::-1]
215220
self.lambdas_ = self.lambdas_[indices]

sklearn/decomposition/tests/test_kernel_pca.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from sklearn.utils.testing import (assert_array_almost_equal, assert_less,
66
assert_equal, assert_not_equal,
7-
assert_raises)
7+
assert_raises, assert_allclose)
88

99
from sklearn.decomposition import PCA, KernelPCA
1010
from sklearn.datasets import make_circles
@@ -71,6 +71,21 @@ def test_kernel_pca_consistent_transform():
7171
assert_array_almost_equal(transformed1, transformed2)
7272

7373

74+
def test_kernel_pca_deterministic_output():
75+
rng = np.random.RandomState(0)
76+
X = rng.rand(10, 10)
77+
eigen_solver = ('arpack', 'dense')
78+
79+
for solver in eigen_solver:
80+
transformed_X = np.zeros((20, 2))
81+
for i in range(20):
82+
kpca = KernelPCA(n_components=2, eigen_solver=solver,
83+
random_state=rng)
84+
transformed_X[i, :] = kpca.fit_transform(X)[0]
85+
assert_allclose(
86+
transformed_X, np.tile(transformed_X[0, :], 20).reshape(20, 2))
87+
88+
7489
def test_kernel_pca_sparse():
7590
rng = np.random.RandomState(0)
7691
X_fit = sp.csr_matrix(rng.random_sample((5, 4)))

sklearn/decomposition/tests/test_pca.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from sklearn.utils.testing import assert_almost_equal
88
from sklearn.utils.testing import assert_array_almost_equal
9+
from sklearn.utils.testing import assert_allclose
910
from sklearn.utils.testing import assert_equal
1011
from sklearn.utils.testing import assert_greater
1112
from sklearn.utils.testing import assert_raise_message
@@ -703,6 +704,19 @@ def test_pca_dtype_preservation(svd_solver):
703704
check_pca_int_dtype_upcast_to_double(svd_solver)
704705

705706

707+
def test_pca_deterministic_output():
708+
rng = np.random.RandomState(0)
709+
X = rng.rand(10, 10)
710+
711+
for solver in solver_list:
712+
transformed_X = np.zeros((20, 2))
713+
for i in range(20):
714+
pca = PCA(n_components=2, svd_solver=solver, random_state=rng)
715+
transformed_X[i, :] = pca.fit_transform(X)[0]
716+
assert_allclose(
717+
transformed_X, np.tile(transformed_X[0, :], 20).reshape(20, 2))
718+
719+
706720
def check_pca_float_dtype_preservation(svd_solver):
707721
# Ensure that PCA does not upscale the dtype when input is float32
708722
X_64 = np.random.RandomState(0).rand(1000, 4).astype(np.float64)

0 commit comments

Comments
 (0)
0