10000 Address NicolasHugs comments on docstrings and atol in tests · scikit-learn/scikit-learn@473b940 · GitHub
[go: up one dir, main page]

Skip to content

Commit 473b940

Browse files
Address NicolasHugs comments on docstrings and atol in tests
1 parent d18a339 commit 473b940

File tree

6 files changed

+25
-24
lines changed

6 files changed

+25
-24
lines changed

sklearn/decomposition/pca.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -159,24 +159,24 @@ class PCA(_BasePCA):
159159
160160
svd_solver : string {'auto', 'full', 'arpack', 'randomized'}
161161
auto :
162-
the solver is selected by a default policy based on `X.shape` and
162+
The solver is selected by a default policy based on `X.shape` and
163163
`n_components`: if the input data is larger than 500x500 and the
164164
number of components to extract is lower than 80% of the smallest
165165
dimension of the data, then the more efficient 'randomized'
166166
method is enabled. Otherwise the exact full SVD is computed and
167167
optionally truncated afterwards.
168168
169-
in case sparse data is used, 'randomized' is used as this is the
169+
In case sparse data is used, 'randomized' is used as this is the
170170
only method that supports sparse data.
171171
full :
172-
run exact full SVD calling the standard LAPACK solver via
172+
Run exact full SVD calling the standard LAPACK solver via
173173
`scipy.linalg.svd` and select the components by postprocessing
174174
arpack :
175-
run SVD truncated to n_components calling ARPACK solver via
175+
Run SVD truncated to n_components calling ARPACK solver via
176176
`scipy.sparse.linalg.svds`. It requires strictly
177177
0 < n_components < min(X.shape)
178178
randomized :
179-
run randomized SVD by the method of Halko et al. This is the only
179+
Run randomized SVD by the method of Halko et al. This is the only
180180
method that supports sparse data.
181181
182182
.. versionadded:: 0.18.0
@@ -403,7 +403,7 @@ def _fit(self, X):
403403
# Ensure we don't try call arpack or full on a sparse matrix
404404
if issparse(X) and self._fit_svd_solver != 'randomized':
405405
raise ValueError(
406-
'only the randomized solver supports sparse matrices'
406+
'Only the randomized solver supports sparse matrices'
407407
)
408408

409409
# Call different fits for either full or truncated SVD

sklearn/decomposition/tests/test_pca.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -264,9 +264,9 @@ def test_singular_values():
264264
# Increase the number of power iterations to get greater accuracy in tests
265265
rpca = PCA(n_components=2, svd_solver='randomized', iterated_power=40,
266266
random_state=rng).fit(X)
267-
assert_allclose(pca.singular_values_, apca.singular_values_, 12)
268-
assert_allclose(pca.singular_values_, rpca.singular_values_, 12)
269-
assert_allclose(apca.singular_values_, rpca.singular_values_, 12)
267+
assert_allclose(pca.singular_values_, apca.singular_values_, atol=12)
268+
assert_allclose(pca.singular_values_, rpca.singular_values_, atol=12)
269+
assert_allclose(apca.singular_values_, rpca.singular_values_, atol=12)
270270

271271
# Compare to the Frobenius norm
272272
X_pca = pca.transform(X)
@@ -717,7 +717,7 @@ def test_pca_sparse_input_bad_solvers(svd_solver):
717717

718718
pca = PCA(n_components=3, svd_solver=svd_solver)
719719

720-
with pytest.raises(ValueError, match='only the randomized solver supports '
720+
with pytest.raises(ValueError, match='Only the randomized solver supports '
721721
'sparse matrices'):
722722
pca.fit(X)
723723

sklearn/utils/extmath.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -386,14 +386,14 @@ def randomized_pca(A, n_components, n_oversamples=10, n_iter="auto",
386386
n_components : int
387387
Number of singular values and vectors to extract.
388388
389-
n_oversamples : int (default is 10)
389+
n_oversamples : int (default=10)
390390
Additional number of random vectors to sample the range of M so as
391391
to ensure proper conditioning. The total number of random vectors
392392
used to find the range of M is n_components + n_oversamples. Smaller
393393
number can improve speed but can negatively impact the quality of
394394
approximation of singular vectors and singular values.
395395
396-
n_iter : int or 'auto' (default is 'auto')
396+
n_iter : int or 'auto' (default='auto')
397397
Number of power iterations. It can be used to deal with very noisy
398398
problems. When 'auto', it is set to 4, unless `n_components` is small
399399
(< .1 * min(X.shape)) `n_iter` in which case is set to 7.
@@ -407,7 +407,7 @@ def randomized_pca(A, n_components, n_oversamples=10, n_iter="auto",
407407
but can lose slightly in accuracy). The 'auto' mode applies no
408408
normalization if `n_iter` <= 2 and switches to LU otherwise.
409409
410-
flip_sign : boolean, (True by default)
410+
flip_sign : boolean, (default=True)
411411
The output of a singular value decomposition is only unique up to a
412412
permutation of the signs of the singular vectors. If `flip_sign` is
413413
set to `True`, the sign ambiguity is resolved by making the largest

sklearn/utils/sparsefuncs.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,9 @@ def mean_variance_axis(X, axis, ddof=0):
7272
axis : int (either 0 or 1)
7373
Axis along which the axis should be computed.
7474
75-
ddof : int, optional
75+
ddof : int, optional (default=0)
7676
“Delta Degrees of Freedom”: the divisor used in the calculation is
77-
``N - ddof``, where ``N`` represents the number of elements. By default
78-
ddof is zero.
77+
``N - ddof``, where ``N`` represents the number of elements.
7978
8079
.. versionadded:: 0.21
8180
@@ -95,7 +94,10 @@ def mean_variance_axis(X, axis, ddof=0):
9594
raise ValueError('ddof cannot be <0')
9695

9796
if ddof >= X.shape[axis]:
98-
raise ValueError('ddof must be <N')
97+
raise ValueError(
98+
'ddof=%r must be smaller than the number of samples=%r' % (
99+
ddof, X.shape[axis])
100+
)
99101

100102
if isinstance(X, sp.csr_matrix):
101103
if axis == 0:

sklearn/utils/sparsefuncs_fast.pyx

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,9 @@ def csr_mean_variance_axis0(X, ddof=0):
6363
X : CSR sparse matrix, shape (n_samples, n_features)
6464
Input data.
6565
66-
ddof : int, optional
66+
ddof : int, optional (default=0)
6767
“Delta Degrees of Freedom”: the divisor used in the calculation is
68-
``N - ddof``, where ``N`` represents the number of elements. By default
69-
ddof is zero.
68+
``N - ddof``, where ``N`` represents the number of elements.
7069
7170
.. versionadded:: 0.21
7271
@@ -156,10 +155,9 @@ def csc_mean_variance_axis0(X, ddof=0):
156155
X : CSC sparse matrix, shape (n_samples, n_features)
157156
Input data.
158157
159-
ddof : int, optional
158+
ddof : int, optional (default=0)
160159
“Delta Degrees of Freedom”: the divisor used in the calculation is
161-
``N - ddof``, where ``N`` represents the number of elements. By default
162-
ddof is zero.
160+
``N - ddof``, where ``N`` represents the number of elements.
163161
164162
.. versionadded:: 0.21
165163

sklearn/utils/tests/test_sparsefuncs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,8 @@ def test_mean_variance_too_large_ddof():
116116
X, _ = make_classification(5, 4, random_state=0)
117117
X = sp.csr_matrix(X)
118118

119-
with pytest.raises(ValueError, match='ddof must be <N'):
119+
with pytest.raises(ValueError, match='ddof=10 must be smaller than the '
120+
'number of samples=5'):
120121
mean_variance_axis(X, axis=0, ddof=10)
121122

122123

0 commit comments

Comments
 (0)
0