-
-
Notifications
You must be signed in to change notification settings - Fork 25.9k
[MRG] Creation of a v0 ARPACK initialization function #11524
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b3ae546
4099f91
0b8eec3
68ecfaf
67d5632
b81afe5
666c22b
d314984
128aecc
b450b03
a16e52f
b0281d3
c6ea0bd
054e9b7
3f8bfc2
bbc38f9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
|
||
from ..base import BaseEstimator, RegressorMixin, TransformerMixin | ||
from ..utils import check_array, check_consistent_length | ||
from ..utils.arpack import _init_arpack_v0 | ||
from ..utils.extmath import svd_flip | ||
from ..utils.validation import check_is_fitted, FLOAT_DTYPES | ||
from ..exceptions import ConvergenceWarning | ||
|
@@ -761,6 +762,15 @@ class PLSSVD(BaseEstimator, TransformerMixin): | |
copy : boolean, default True | ||
Whether to copy X and Y, or perform in-place computations. | ||
|
||
random_state : int, RandomState instance or None, optional (default=None) | ||
The seed of the pseudo random number generator to use when shuffling | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not used when shuffling the data. Please fix. |
||
the data. If int, random_state is the seed used by the random number | ||
generator; If RandomState instance, random_state is the random number | ||
generator; If None, the random number generator is the RandomState | ||
instance used by `np.random`. | ||
|
||
.. versionadded:: 0.21 | ||
|
||
Attributes | ||
---------- | ||
x_weights_ : array, [p, n_components] | ||
|
@@ -789,7 +799,7 @@ class PLSSVD(BaseEstimator, TransformerMixin): | |
... [11.9, 12.3]]) | ||
>>> plsca = PLSSVD(n_components=2) | ||
>>> plsca.fit(X, Y) | ||
PLSSVD(copy=True, n_components=2, scale=True) | ||
PLSSVD(copy=True, n_components=2, random_state=None, scale=True) | ||
>>> X_c, Y_c = plsca.transform(X, Y) | ||
>>> X_c.shape, Y_c.shape | ||
((4, 2), (4, 2)) | ||
|
@@ -800,10 +810,12 @@ class PLSSVD(BaseEstimator, TransformerMixin): | |
CCA | ||
""" | ||
|
||
def __init__(self, n_components=2, scale=True, copy=True): | ||
def __init__(self, n_components=2, scale=True, | ||
copy=True, random_state=None): | ||
self.n_components = n_components | ||
self.scale = scale | ||
self.copy = copy | ||
self.random_state = random_state | ||
|
||
def fit(self, X, Y): | ||
"""Fit model to data. | ||
|
@@ -844,7 +856,8 @@ def fit(self, X, Y): | |
if self.n_components >= np.min(C.shape): | ||
U, s, V = svd(C, full_matrices=False) | ||
else: | ||
U, s, V = svds(C, k=self.n_components) | ||
v0 = _init_arpack_v0(min(C.shape), self.random_state) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This may change the result (as per my comment in truncated svd below) |
||
U, s, V = svds(C, k=self.n_components, v0=v0) | ||
# Deterministic output | ||
U, V = svd_flip(U, V) | ||
V = V.T | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ | |
|
||
from ..base import BaseEstimator, TransformerMixin | ||
from ..utils import check_array, check_random_state | ||
from ..utils.arpack import _init_arpack_v0 | ||
from ..utils.extmath import randomized_svd, safe_sparse_dot, svd_flip | ||
from ..utils.sparsefuncs import mean_variance_axis | ||
|
||
|
@@ -160,7 +161,8 @@ def fit_transform(self, X, y=None): | |
random_state = check_random_state(self.random_state) | ||
|
||
if self.algorithm == "arpack": | ||
U, Sigma, VT = svds(X, k=self.n_components, tol=self.tol) | ||
v0 = _init_arpack_v0(min(X.shape), self.random_state) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is going to change the results of the function.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
U, Sigma, VT = svds(X, k=self.n_components, tol=self.tol, v0=v0) | ||
# svds doesn't abide by scipy.linalg.svd/randomized_svd | ||
# conventions, so reverse its outputs. | ||
Sigma = Sigma[::-1] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from .validation import check_random_state | ||
|
||
|
||
def _init_arpack_v0(size, random_state): | ||
"""Initialize the starting vector for iteration in ARPACK functions | ||
|
||
Initialize a ndarray with values sampled from the uniform distribution | ||
on [-1, 1]. This initialization model has been chosen to be | ||
consistent with the ARPACK one as another initialization can lead to | ||
convergence issues. | ||
|
||
Parameters | ||
---------- | ||
size : int | ||
the size of the eigenvalue vector to be initialized | ||
|
||
random_state : int, RandomState instance or None, optional (default=None) | ||
The seed of the pseudo random number generator to use when shuffling | ||
the data. If int, random_state is the seed used by the random number | ||
generator; If RandomState instance, random_state is the random number | ||
generator; If None, the random number generator is the RandomState | ||
instance used by `np.random`. | ||
|
||
Returns | ||
------- | ||
v0 : array of shape (size,) | ||
the initialized vector | ||
""" | ||
|
||
random_state = check_random_state(random_state) | ||
v0 = random_state.uniform(-1, 1, size) | ||
return v0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import numpy as np | ||
|
||
from sklearn.utils.arpack import _init_arpack_v0 | ||
|
||
|
||
def test_init_arpack_v0(): | ||
v0s = [] | ||
for i in range(100): | ||
v0s.append(_init_arpack_v0(1000, i)) | ||
if i > 0: | ||
assert not any(np.equal(v0s[i], v0s[i-1])) | ||
|
||
v0 = np.concatenate(v0s) | ||
assert np.allclose(np.mean(v0), 0, atol=1e-2) | ||
assert np.allclose(np.std(v0), 1/np.sqrt(3), atol=1e-3) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This creates a private method. We only need to document user-facing changes in what's new.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All right then I'll remove that