8000 [MRG] TST Ensure __dict__ is unmodified by predict, transform, etc (#… · Sundrique/scikit-learn@f425ef9 · GitHub
[go: up one dir, main page]

Skip to content

Commit f425ef9

Browse files
kioteSundrique
authored andcommitted
[MRG] TST Ensure __dict__ is unmodified by predict, transform, etc (scikit-learn#7553)
* Add test for __dict__ for estimator checks check that "predict", "transform", "decision_function" or "predict_proba" methods do not change the state of __dict__ of any estimator see scikit-learn#7297 * Add a test to test_check_estimator that shows that check_estimator fails on an estimator that violates this * Fixed bug where NMF's n_iter_ attribute was set by calls to transform
1 parent 179fd6d commit f425ef9

File tree

4 files changed

+72
-23
lines changed

4 files changed

+72
-23
lines changed

doc/whats_new.rst

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,14 @@ Enhancements
6565
- Added ``sample_weight`` parameter to :meth:`pipeline.Pipeline.score`.
6666
:issue:`7723` by `Mikhail Korobov`_.
6767

68+
- ``check_estimator`` now attempts to ensure that methods transform, predict, etc.
69+
do not set attributes on the estimator.
70+
:issue:`7533` by `Ekaterina Krivich`_.
71+
6872
Bug fixes
6973
.........
7074

71-
- Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not
75+
- Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not
7276
exactly implement Benjamini-Hochberg procedure. It formerly may have
7377
selected fewer features than it should.
7478
:issue:`7490` by `Peng Meng`_.
@@ -88,6 +92,9 @@ Bug fixes
8892
- Tree splitting criterion classes' cloning/pickling is now memory safe
8993
:issue:`7680` by `Ibraim Ganiev`_.
9094

95+
- Fixed a bug where :class:`decomposition.NMF` sets its ``n_iters_``
96+
attribute in `transform()`. :issue:`7553` by `Ekaterina Krivich`_.
97+
9198
.. _changes_0_18_1:
9299

93100
Version 0.18.1

sklearn/decomposition/nmf.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1016,14 +1016,6 @@ def fit_transform(self, X, y=None, W=None, H=None):
10161016
H : array-like, shape (n_components, n_features)
10171017
If init='custom', it is used as initial guess for the solution.
10181018
1019-
Attributes
1020-
----------
1021-
components_ : array-like, shape (n_components, n_features)
1022-
Factorization matrix, sometimes called 'dictionary'.
1023-
1024-
n_iter_ : int
1025-
Actual number of iterations for the transform.
1026-
10271019
Returns
10281020
-------
10291021
W: array, shape (n_samples, n_components)
@@ -1061,14 +1053,6 @@ def fit(self, X, y=None, **params):
10611053
X: {array-like, sparse matrix}, shape (n_samples, n_features)
10621054
Data matrix to be decomposed
10631055
1064-
Attributes
1065-
----------
1066-
components_ : array-like, shape (n_components, n_features)
1067-
Factorization matrix, sometimes called 'dictionary'.
1068-
1069-
n_iter_ : int
1070-
Actual number of iterations for the transform.
1071-
10721056
Returns
10731057
-------
10741058
self
@@ -1084,11 +1068,6 @@ def transform(self, X):
10841068
X: {array-like, sparse matrix}, shape (n_samples, n_features)
10851069
Data matrix to be transformed by the model
10861070
1087-
Attributes
1088-
----------
1089-
n_iter_ : int
1090-
Actual number of iterations for the transform.
1091-
10921071
Returns
10931072
-------
10941073
W: array, shape (n_samples, n_components)
@@ -1106,7 +1085,6 @@ def transform(self, X):
11061085
nls_max_iter=self.nls_max_iter, sparseness=self.sparseness,
11071086
beta=self.beta, eta=self.eta)
11081087

1109-
self.n_iter_ = n_iter_
11101088
return W
11111089

11121090
def inverse_transform(self, W):

sklearn/utils/estimator_checks.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from sklearn.utils.testing import SkipTest
3131
from sklearn.utils.testing import ignore_warnings
3232
from sklearn.utils.testing import assert_warns
33+
from sklearn.utils.testing import assert_dict_equal
3334

3435

3536
from sklearn.base import (clone, ClassifierMixin, RegressorMixin,
@@ -230,6 +231,7 @@ def _yield_all_checks(name, Estimator):
230231
yield check_fit1d_1feature
231232
yield check_fit1d_1sample
232233
yield check_get_params_invariance
234+
yield check_dict_unchanged
233235

234236

235237
def check_estimator(Estimator):
@@ -409,6 +411,49 @@ def check_dtype_object(name, Estimator):
409411

410412

411413
@ignore_warnings
414+
def check_dict_unchanged(name, Estimator):
415+
# this estimator raises
416+
# ValueError: Found array with 0 feature(s) (shape=(23, 0))
417+
# while a minimum of 1 is required.
418+
# error
419+
if name in ['SpectralCoclustering']:
420+
return
421+
rnd = np.random.RandomState(0)
422+
if name in ['RANSACRegressor']:
423+
X = 3 * rnd.uniform(size=(20, 3))
424+
else:
425+
X = 2 * rnd.uniform(size=(20, 3))
426+
427+
y = X[:, 0].astype(np.int)
428+
y = multioutput_estimator_convert_y_2d(name, y)
429+
estimator = Estimator()
430+
set_testing_parameters(estimator)
431+
if hasattr(estimator, "n_components"):
432+
estimator.n_components = 1
433+
434+
if hasattr(estimator, "n_clusters"):
435+
estimator.n_clusters = 1
436+
437+
if hasattr(estimator, "n_best"):
438+
estimator.n_best = 1
439+
440+
set_random_state(estimator, 1)
441+
442+
# should be just `estimator.fit(X, y)`
443+
# after merging #6141
444+
if name in ['SpectralBiclustering']:
445+
estimator.fit(X)
446+
else:
447+
estimator.fit(X, y)
448+
for method in ["predict", "transform", "decision_function",
449+
"predict_proba"]:
450+
if hasattr(estimator, method):
451+
dict_before = estimator.__dict__.copy()
452+
getattr(estimator, method)(X)
453+
assert_dict_equal(estimator.__dict__, dict_before,
454+
'Estimator changes __dict__ during %s' % method)
455+
456+
412457
def check_fit2d_predict1d(name, Estimator):
413458
# check by fitting a 2d array and prediting with a 1d array
414459
rnd = np.random.RandomState(0)

sklearn/utils/tests/test_estimator_checks.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,20 @@ def predict(self, X):
2828
return np.ones(X.shape[0])
2929

3030

31+
class ChangesDict(BaseEstimator):
32+
def __init__(self):
33+
self.key = 0
34+
35+
def fit(self, X, y=None):
36+
X, y = check_X_y(X, y)
37+
return self
38+
39+
def predict(self, X):
40+
X = check_array(X)
41+
self.key = 1000
42+
return np.ones(X.shape[0])
43+
44+
3145
class NoCheckinPredict(BaseBadClassifier):
3246
def fit(self, X, y):
3347
X, y = check_X_y(X, y)
@@ -75,6 +89,11 @@ def test_check_estimator():
7589
# check that predict does input validation (doesn't accept dicts in input)
7690
msg = "Estimator doesn't check for NaN and inf in predict"
7791
assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict)
92+
# check that estimator state does not change
93+
# at transform/predict/predict_proba time
94+
msg = 'Estimator changes __dict__ during predict'
95+
assert_raises_regex(AssertionError, msg, check_estimator, ChangesDict)
96+
7897
# check for sparse matrix input handling
7998
name = NoSparseClassifier.__name__
8099
msg = "Estimator " + name + " doesn't seem to fail gracefully on sparse data"

0 commit comments

Comments
 (0)
0