From fd07f3ce44d17c215e0f76a67a64e8461c4bc04c Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 6 Sep 2017 12:37:04 +0200 Subject: [PATCH 1/4] FIX avoid making deepcopy in clone --- sklearn/base.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index aa4f9f9ce17c1..e3670def51e5f 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -10,6 +10,7 @@ from scipy import sparse from .externals import six from .utils.fixes import signature +from .exceptions import ChangedBehaviorWarning from . import __version__ @@ -26,7 +27,7 @@ def _first_and_last_element(arr): return arr[0, 0], arr[-1, -1] -def clone(estimator, safe=True): +def clone(estimator, safe=True, deepcopy=None): """Constructs a new estimator with the same parameters. Clone does a deep copy of the model in an estimator @@ -39,17 +40,39 @@ def clone(estimator, safe=True): The estimator or group of estimators to be cloned safe : boolean, optional - If safe is false, clone will fall back to a deep copy on objects - that are not estimators. + + If safe is false, clone will fall back to a copy (deep copy or simple + copy depending of the ``deepcopy`` parameter) on objects that are not + estimators. + + .. deprecated:: 0.20 + From 0.22, only a simple copy will be done instead of a deep copy. + Use ``deepcopy=True`` to get the previous behavior. + + deepcopy : boolean, optional + Whether to make a deep copy or a simple copy of the objects that ware + not estimators. + + .. versionadded:: 0.20 """ + if deepcopy is None: + warnings.warn("A simple copy will be performed after 0.22 instead of a" + " deep copy. Set 'deepcopy=True' if you wish to make a" + " deep copy of the objects which are not estimators.", + ChangedBehaviorWarning) + deepcopy = True + estimator_type = type(estimator) # XXX: not handling dictionaries if estimator_type in (list, tuple, set, frozenset): return estimator_type([clone(e, safe=safe) for e in estimator]) elif not hasattr(estimator, 'get_params'): if not safe: - return copy.deepcopy(estimator) + if deepcopy: + return copy.deepcopy(estimator) + else: + return copy.copy(estimator) else: raise TypeError("Cannot clone object '%s' (type %s): " "it does not seem to be a scikit-learn estimator " From 47084ab1c6d1d2da3575439c5a71bb78b0117362 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 6 Sep 2017 12:51:31 +0200 Subject: [PATCH 2/4] propagate deepcopy --- sklearn/base.py | 5 +++-- sklearn/tests/test_base.py | 7 +++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index e3670def51e5f..9ce2919777661 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -66,7 +66,8 @@ def clone(estimator, safe=True, deepcopy=None): estimator_type = type(estimator) # XXX: not handling dictionaries if estimator_type in (list, tuple, set, frozenset): - return estimator_type([clone(e, safe=safe) for e in estimator]) + return estimator_type([clone(e, safe=safe, deepcopy=deepcopy) + for e in estimator]) elif not hasattr(estimator, 'get_params'): if not safe: if deepcopy: @@ -81,7 +82,7 @@ def clone(estimator, safe=True, deepcopy=None): klass = estimator.__class__ new_object_params = estimator.get_params(deep=False) for name, param in six.iteritems(new_object_params): - new_object_params[name] = clone(param, safe=False) + new_object_params[name] = clone(param, safe=False, deepcopy=deepcopy) new_object = klass(**new_object_params) params_set = new_object.get_params(deep=False) diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index 948d5818b9b0e..ce93d9f7d422f 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -25,6 +25,7 @@ from sklearn.tree import DecisionTreeRegressor from sklearn import datasets from sklearn.utils import deprecated +from sklearn.exceptions import ChangedBehaviorWarning from sklearn.base import TransformerMixin from sklearn.utils.mocking import MockDataFrame @@ -188,6 +189,12 @@ def test_clone_sparse_matrices(): assert_array_equal(clf.empty.toarray(), clf_cloned.empty.toarray()) +def test_clone_change_behavior_warning(): + assert_warns_message(ChangedBehaviorWarning, + "A simple copy will be performed after 0.22", + clone, MyEstimator()) + + def test_repr(): # Smoke test the repr of the base estimator. my_estimator = MyEstimator() From 8c4a28e765c26895ee95f7b8f192c9e92edc8ddd Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 6 Sep 2017 23:18:27 +0200 Subject: [PATCH 3/4] only raise the warning when safe is false --- sklearn/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index 9ce2919777661..aeb44699b4197 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -50,16 +50,16 @@ def clone(estimator, safe=True, deepcopy=None): Use ``deepcopy=True`` to get the previous behavior. deepcopy : boolean, optional - Whether to make a deep copy or a simple copy of the objects that ware + Whether to make a deep copy or a simple copy of the objects that are not estimators. .. versionadded:: 0.20 """ - if deepcopy is None: + if not safe and deepcopy is None: warnings.warn("A simple copy will be performed after 0.22 instead of a" " deep copy. Set 'deepcopy=True' if you wish to make a" - " deep copy of the objects which are not estimators.", + " deep copy of the parameters which are not estimators.", ChangedBehaviorWarning) deepcopy = True From f8dcdeb815a6e9f8db4371ea50eb0bb76618a01e Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 7 Sep 2017 16:16:24 +0200 Subject: [PATCH 4/4] change all cloning occurences --- sklearn/calibration.py | 2 +- sklearn/cross_validation.py | 10 +- sklearn/decomposition/tests/test_nmf.py | 2 +- sklearn/ensemble/base.py | 2 +- .../ensemble/tests/test_gradient_boosting.py | 10 +- sklearn/ensemble/voting_classifier.py | 3 +- sklearn/feature_extraction/tests/test_text.py | 2 +- sklearn/feature_selection/from_model.py | 4 +- sklearn/feature_selection/rfe.py | 6 +- sklearn/gaussian_process/gpc.py | 2 +- sklearn/gaussian_process/gpr.py | 2 +- sklearn/gaussian_process/kernels.py | 2 +- .../gaussian_process/tests/test_kernels.py | 6 +- sklearn/grid_search.py | 7 +- sklearn/learning_curve.py | 8 +- sklearn/linear_model/ransac.py | 2 +- sklearn/linear_model/tests/test_sag.py | 20 ++-- sklearn/linear_model/tests/test_sgd.py | 2 +- sklearn/model_selection/_search.py | 8 +- sklearn/model_selection/_validation.py | 13 ++- sklearn/model_selection/tests/test_search.py | 4 +- .../model_selection/tests/test_validation.py | 2 +- sklearn/multiclass.py | 7 +- sklearn/multioutput.py | 6 +- sklearn/pipeline.py | 2 +- sklearn/svm/tests/test_sparse.py | 2 +- sklearn/svm/tests/test_svm.py | 2 +- sklearn/tests/test_base.py | 21 ++-- sklearn/tests/test_dummy.py | 6 +- sklearn/tests/test_multioutput.py | 8 +- sklearn/tests/test_pipeline.py | 5 +- sklearn/utils/estimator_checks.py | 101 +++++++++--------- 32 files changed, 149 insertions(+), 130 deletions(-) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 0d2f76cd12239..6ccb40d4b400d 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -172,7 +172,7 @@ def fit(self, X, y, sample_weight=None): check_consistent_length(y, sample_weight) base_estimator_sample_weight = sample_weight for train, test in cv.split(X, y): - this_estimator = clone(base_estimator) + this_estimator = clone(base_estimator, deepcopy=False) if base_estimator_sample_weight is not None: this_estimator.fit( X[train], y[train], diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 7646459da3936..bb067c496ddea 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -1378,7 +1378,9 @@ def cross_val_predict(estimator, X, y=None, cv=None, n_jobs=1, # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) - preds_blocks = parallel(delayed(_fit_and_predict)(clone(estimator), X, y, + preds_blocks = parallel(delayed(_fit_and_predict)(clone(estimator, + deepcopy=False), + X, y, train, test, verbose, fit_params) for train, test in cv) @@ -1575,7 +1577,8 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1, # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) - scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer, + scores = parallel(delayed(_fit_and_score)(clone(estimator, deepcopy=False), + X, y, scorer, train, test, verbose, None, fit_params) for train, test in cv) @@ -1942,7 +1945,8 @@ def permutation_test_score(estimator, X, y, cv=None, # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. - score = _permutation_test_score(clone(estimator), X, y, cv, scorer) + score = _permutation_test_score(clone(estimator, deepcopy=False), + X, y, cv, scorer) permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_permutation_test_score)( clone(estimator), X, _shuffle(y, labels, random_state), cv, diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py index 3ce53b550cb0e..6e4206f959509 100644 --- a/sklearn/decomposition/tests/test_nmf.py +++ b/sklearn/decomposition/tests/test_nmf.py @@ -163,7 +163,7 @@ def test_nmf_sparse_input(): for solver in ('cd', 'mu'): est1 = NMF(solver=solver, n_components=5, init='random', random_state=0, tol=1e-2) - est2 = clone(est1) + est2 = clone(est1, deepcopy=False) W1 = est1.fit_transform(A) W2 = est2.fit_transform(A_sparse) diff --git a/sklearn/ensemble/base.py b/sklearn/ensemble/base.py index 2477cc1c21c7d..31633bed4e4a0 100644 --- a/sklearn/ensemble/base.py +++ b/sklearn/ensemble/base.py @@ -122,7 +122,7 @@ def _make_estimator(self, append=True, random_state=None): Warning: This method should be used to properly instantiate new sub-estimators. """ - estimator = clone(self.base_estimator_) + estimator = clone(self.base_estimator_, deepcopy=False) estimator.set_params(**dict((p, getattr(self, p)) for p in self.estimator_params)) diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index 2042da3474ec9..c227f4191c33d 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -789,7 +789,7 @@ def test_warm_start_equal_n_estimators(): est = Cls(n_estimators=100, max_depth=1) est.fit(X, y) - est2 = clone(est) + est2 = clone(est, deepcopy=False) est2.set_params(n_estimators=est.n_estimators, warm_start=True) est2.fit(X, y) @@ -1160,15 +1160,15 @@ def test_gradient_boosting_validation_fraction(): validation_fraction=0.1, learning_rate=0.1, max_depth=3, random_state=42) - gbc2 = clone(gbc).set_params(validation_fraction=0.3) - gbc3 = clone(gbc).set_params(n_iter_no_change=20) + gbc2 = clone(gbc, deepcopy=False).set_params(validation_fraction=0.3) + gbc3 = clone(gbc, deepcopy=False).set_params(n_iter_no_change=20) gbr = GradientBoostingRegressor(n_estimators=100, n_iter_no_change=10, learning_rate=0.1, max_depth=3, validation_fraction=0.1, random_state=42) - gbr2 = clone(gbr).set_params(validation_fraction=0.3) - gbr3 = clone(gbr).set_params(n_iter_no_change=20) + gbr2 = clone(gbr, deepcopy=False).set_params(validation_fraction=0.3) + gbr3 = clone(gbr, deepcopy=False).set_params(n_iter_no_change=20) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) # Check if validation_fraction has an effect diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py index 26bc8e66df01a..6a3c081dd59c7 100644 --- a/sklearn/ensemble/voting_classifier.py +++ b/sklearn/ensemble/voting_classifier.py @@ -193,7 +193,8 @@ def fit(self, X, y, sample_weight=None): transformed_y = self.le_.transform(y) self.estimators_ = Parallel(n_jobs=self.n_jobs)( - delayed(_parallel_fit_estimator)(clone(clf), X, transformed_y, + delayed(_parallel_fit_estimator)(clone(clf, deepcopy=False), + X, transformed_y, sample_weight=sample_weight) for clf in clfs if clf is not None) diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index 9e613b1bca8c1..2d45250e99815 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -980,7 +980,7 @@ def test_tfidfvectorizer_export_idf(): def test_vectorizer_vocab_clone(): vect_vocab = TfidfVectorizer(vocabulary=["the"]) - vect_vocab_clone = clone(vect_vocab) + vect_vocab_clone = clone(vect_vocab, deepcopy=False) vect_vocab.fit(ALL_FOOD_DOCS) vect_vocab_clone.fit(ALL_FOOD_DOCS) assert_equal(vect_vocab_clone.vocabulary_, vect_vocab.vocabulary_) diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py index 2502643453d79..58041791295c3 100644 --- a/sklearn/feature_selection/from_model.py +++ b/sklearn/feature_selection/from_model.py @@ -165,7 +165,7 @@ def fit(self, X, y=None, **fit_params): if self.prefit: raise NotFittedError( "Since 'prefit=True', call transform directly") - self.estimator_ = clone(self.estimator) + self.estimator_ = clone(self.estimator, deepcopy=False) self.estimator_.fit(X, y, **fit_params) return self @@ -198,6 +198,6 @@ def partial_fit(self, X, y=None, **fit_params): raise NotFittedError( "Since 'prefit=True', call transform directly") if not hasattr(self, "estimator_"): - self.estimator_ = clone(self.estimator) + self.estimator_ = clone(self.estimator, deepcopy=False) self.estimator_.partial_fit(X, y, **fit_params) return self diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py index d505099cc6a88..4abe612dc5dce 100644 --- a/sklearn/feature_selection/rfe.py +++ b/sklearn/feature_selection/rfe.py @@ -166,7 +166,7 @@ def _fit(self, X, y, step_score=None): features = np.arange(n_features)[support_] # Rank the remaining features - estimator = clone(self.estimator) + estimator = clone(self.estimator, deepcopy=False) if self.verbose > 0: print("Fitting estimator with %d features." % np.sum(support_)) @@ -204,7 +204,7 @@ def _fit(self, X, y, step_score=None): # Set final attributes features = np.arange(n_features)[support_] - self.estimator_ = clone(self.estimator) + self.estimator_ = clone(self.estimator, deepcopy=False) self.estimator_.fit(X[:, features], y) # Compute step score when only n_features_to_select features left @@ -450,7 +450,7 @@ def fit(self, X, y): self.support_ = rfe.support_ self.n_features_ = rfe.n_features_ self.ranking_ = rfe.ranking_ - self.estimator_ = clone(self.estimator) + self.estimator_ = clone(self.estimator, deepcopy=False) self.estimator_.fit(self.transform(X), y) # Fixing a normalization error, n is equal to get_n_splits(X, y) - 1 diff --git a/sklearn/gaussian_process/gpc.py b/sklearn/gaussian_process/gpc.py index 31d15e533dc9e..00af4ed089941 100644 --- a/sklearn/gaussian_process/gpc.py +++ b/sklearn/gaussian_process/gpc.py @@ -173,7 +173,7 @@ def fit(self, X, y): self.kernel_ = C(1.0, constant_value_bounds="fixed") \ * RBF(1.0, length_scale_bounds="fixed") else: - self.kernel_ = clone(self.kernel) + self.kernel_ = clone(self.kernel, deepcopy=False) self.rng = check_random_state(self.random_state) diff --git a/sklearn/gaussian_process/gpr.py b/sklearn/gaussian_process/gpr.py index c92ca7f68f368..d781c86226c24 100644 --- a/sklearn/gaussian_process/gpr.py +++ b/sklearn/gaussian_process/gpr.py @@ -174,7 +174,7 @@ def fit(self, X, y): self.kernel_ = C(1.0, constant_value_bounds="fixed") \ * RBF(1.0, length_scale_bounds="fixed") else: - self.kernel_ = clone(self.kernel) + self.kernel_ = clone(self.kernel, deepcopy=False) self._rng = check_random_state(self.random_state) diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py index 50febc8542570..62eb961a76d47 100644 --- a/sklearn/gaussian_process/kernels.py +++ b/sklearn/gaussian_process/kernels.py @@ -200,7 +200,7 @@ def set_params(self, **params): def clone_with_theta(self, theta): """Returns a clone of self with given hyperparameters theta. """ - cloned = clone(self) + cloned = clone(self, deepcopy=False) cloned.theta = theta return cloned diff --git a/sklearn/gaussian_process/tests/test_kernels.py b/sklearn/gaussian_process/tests/test_kernels.py index a07a406a415dd..96e708268fa84 100644 --- a/sklearn/gaussian_process/tests/test_kernels.py +++ b/sklearn/gaussian_process/tests/test_kernels.py @@ -195,7 +195,7 @@ def check_hyperparameters_equal(kernel1, kernel2): def test_kernel_clone(): # Test that sklearn's clone works correctly on kernels. for kernel in kernels: - kernel_cloned = clone(kernel) + kernel_cloned = clone(kernel, deepcopy=False) # XXX: Should this be fixed? # This differs from the sklearn's estimators equality check. @@ -218,7 +218,7 @@ def test_kernel_clone_after_set_params(): # for more details. bounds = (1e-5, 1e5) for kernel in kernels: - kernel_cloned = clone(kernel) + kernel_cloned = clone(kernel, deepcopy=False) params = kernel.get_params() # RationalQuadratic kernel is isotropic. isotropic_kernels = (ExpSineSquared, RationalQuadratic) @@ -232,7 +232,7 @@ def test_kernel_clone_after_set_params(): params['length_scale'] = [length_scale] * 2 params['length_scale_bounds'] = bounds * 2 kernel_cloned.set_params(**params) - kernel_cloned_clone = clone(kernel_cloned) + kernel_cloned_clone = clone(kernel_cloned, deepcopy=False) assert_equal(kernel_cloned_clone.get_params(), kernel_cloned.get_params()) assert_not_equal(id(kernel_cloned_clone), id(kernel_cloned)) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 76cdaa7cb1de5..ab1207bd35e63 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -559,7 +559,7 @@ def _fit(self, X, y, parameter_iterable): " {2} fits".format(len(cv), n_candidates, n_candidates * len(cv))) - base_estimator = clone(self.estimator) + base_estimator = clone(self.estimator, deepcopy=False) pre_dispatch = self.pre_dispatch @@ -567,7 +567,8 @@ def _fit(self, X, y, parameter_iterable): n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch )( - delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_, + delayed(_fit_and_score)(clone(base_estimator, deepcopy=False), + X, y, self.scorer_, train, test, self.verbose, parameters, self.fit_params, return_parameters=True, error_score=self.error_score) @@ -614,7 +615,7 @@ def _fit(self, X, y, parameter_iterable): if self.refit: # fit the best estimator using the entire dataset # clone first to work around broken estimators - best_estimator = clone(base_estimator).set_params( + best_estimator = clone(base_estimator, deepcopy=False).set_params( **best.parameters) if y is not None: best_estimator.fit(X, y, **self.fit_params) diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py index 5571138d68d83..f7528fcb38662 100644 --- a/sklearn/learning_curve.py +++ b/sklearn/learning_curve.py @@ -161,11 +161,13 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 5), if exploit_incremental_learning: classes = np.unique(y) if is_classifier(estimator) else None out = parallel(delayed(_incremental_fit_estimator)( - clone(estimator), X, y, classes, train, test, train_sizes_abs, + clone(estimator, deepcopy=False), X, y, classes, + train, test, train_sizes_abs, scorer, verbose) for train, test in cv) else: out = parallel(delayed(_fit_and_score)( - clone(estimator), X, y, scorer, train[:n_train_samples], test, + clone(estimator, deepcopy=False), X, y, scorer, + train[:n_train_samples], test, verbose, parameters=None, fit_params=None, return_train_score=True, error_score=error_score) for train, test in cv for n_train_samples in train_sizes_abs) @@ -348,7 +350,7 @@ def validation_curve(estimator, X, y, param_name, param_range, cv=None, parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose) out = parallel(delayed(_fit_and_score)( - clone(estimator), X, y, scorer, train, test, verbose, + clone(estimator, deepcopy=False), X, y, scorer, train, test, verbose, parameters={param_name: v}, fit_params=None, return_train_score=True) for train, test in cv for v in param_range) diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py index ec43c3719b68a..aef23d42534e7 100644 --- a/sklearn/linear_model/ransac.py +++ b/sklearn/linear_model/ransac.py @@ -251,7 +251,7 @@ def fit(self, X, y, sample_weight=None): check_consistent_length(X, y) if self.base_estimator is not None: - base_estimator = clone(self.base_estimator) + base_estimator = clone(self.base_estimator, deepcopy=False) else: base_estimator = LinearRegression() diff --git a/sklearn/linear_model/tests/test_sag.py b/sklearn/linear_model/tests/test_sag.py index 02a557d56ef7f..eefb6c37039f9 100644 --- a/sklearn/linear_model/tests/test_sag.py +++ b/sklearn/linear_model/tests/test_sag.py @@ -313,7 +313,7 @@ def test_sag_pobj_matches_logistic_regression(): clf1 = LogisticRegression(solver='sag', fit_intercept=False, tol=.0000001, C=1. / alpha / n_samples, max_iter=max_iter, random_state=10) - clf2 = clone(clf1) + clf2 = clone(clf1, deepcopy=False) clf3 = LogisticRegression(fit_intercept=False, tol=.0000001, C=1. / alpha / n_samples, max_iter=max_iter, random_state=10) @@ -346,7 +346,7 @@ def test_sag_pobj_matches_ridge_regression(): clf1 = Ridge(fit_intercept=fit_intercept, tol=.00000000001, solver='sag', alpha=alpha, max_iter=n_iter, random_state=42) - clf2 = clone(clf1) + clf2 = clone(clf1, deepcopy=False) clf3 = Ridge(fit_intercept=fit_intercept, tol=.00001, solver='lsqr', alpha=alpha, max_iter=n_iter, random_state=42) @@ -380,7 +380,7 @@ def test_sag_regressor_computed_correctly(): clf1 = Ridge(fit_intercept=fit_intercept, tol=tol, solver='sag', alpha=alpha * n_samples, max_iter=max_iter) - clf2 = clone(clf1) + clf2 = clone(clf1, deepcopy=False) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) @@ -468,7 +468,7 @@ def test_sag_regressor(): clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter, alpha=alpha * n_samples) - clf2 = clone(clf1) + clf2 = clone(clf1, deepcopy=False) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) score1 = clf1.score(X, y) @@ -481,7 +481,7 @@ def test_sag_regressor(): clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter, alpha=alpha * n_samples) - clf2 = clone(clf1) + clf2 = clone(clf1, deepcopy=False) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) score1 = clf1.score(X, y) @@ -510,7 +510,7 @@ def test_sag_classifier_computed_correctly(): clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples, max_iter=n_iter, tol=tol, random_state=77, fit_intercept=fit_intercept) - clf2 = clone(clf1) + clf2 = clone(clf1, deepcopy=False) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) @@ -550,7 +550,7 @@ def test_sag_multiclass_computed_correctly(): clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples, max_iter=max_iter, tol=tol, random_state=77, fit_intercept=fit_intercept) - clf2 = clone(clf1) + clf2 = clone(clf1, deepcopy=False) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) @@ -608,7 +608,7 @@ def test_classifier_results(): y = np.sign(y) clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples, max_iter=max_iter, tol=tol, random_state=77) - clf2 = clone(clf1) + clf2 = clone(clf1, deepcopy=False) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) @@ -639,7 +639,7 @@ def test_binary_classifier_class_weight(): max_iter=n_iter, tol=tol, random_state=77, fit_intercept=fit_intercept, class_weight=class_weight) - clf2 = clone(clf1) + clf2 = clone(clf1, deepcopy=False) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) @@ -686,7 +686,7 @@ def test_multiclass_classifier_class_weight(): max_iter=max_iter, tol=tol, random_state=77, fit_intercept=fit_intercept, class_weight=class_weight) - clf2 = clone(clf1) + clf2 = clone(clf1, deepcopy=False) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index d4552a9934cf1..f423ed73b83a3 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -198,7 +198,7 @@ def test_input_format(self): def test_clone(self): # Test whether clone works ok. clf = self.factory(alpha=0.01, penalty='l1') - clf = clone(clf) + clf = clone(clf, deepcopy=False) clf.set_params(penalty='l2') clf.fit(X, Y) diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index ebfa1e9bd3e18..53973b3d9113e 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -621,13 +621,14 @@ def fit(self, X, y=None, groups=None, **fit_params): " {2} fits".format(n_splits, n_candidates, n_candidates * n_splits)) - base_estimator = clone(self.estimator) + base_estimator = clone(self.estimator, deepcopy=False) pre_dispatch = self.pre_dispatch out = Parallel( n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch - )(delayed(_fit_and_score)(clone(base_estimator), X, y, scorers, train, + )(delayed(_fit_and_score)(clone(base_estimator, deepcopy=False), + X, y, scorers, train, test, self.verbose, parameters, fit_params=fit_params, return_train_score=self.return_train_score, @@ -719,7 +720,8 @@ def _store(key_name, array, weights=None, splits=False, rank=False): self.best_index_] if self.refit: - self.best_estimator_ = clone(base_estimator).set_params( + self.best_estimator_ = clone(base_estimator, + deepcopy=False).set_params( **self.best_params_) if y is not None: self.best_estimator_.fit(X, y, **fit_params) diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 798f771534571..646dd777e5c33 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -191,7 +191,8 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None, pre_dispatch=pre_dispatch) scores = parallel( delayed(_fit_and_score)( - clone(estimator), X, y, scorers, train, test, verbose, None, + clone(estimator, deepcopy=False), X, y, scorers, + train, test, verbose, None, fit_params, return_train_score=return_train_score, return_times=True) for train, test in cv.split(X, y, groups)) @@ -648,7 +649,8 @@ def cross_val_predict(estimator, X, y=None, groups=None, cv=None, n_jobs=1, parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) prediction_blocks = parallel(delayed(_fit_and_predict)( - clone(estimator), X, y, train, test, verbose, fit_params, method) + clone(estimator, deepcopy=False), X, y, + train, test, verbose, fit_params, method) for train, test in cv.split(X, y, groups)) # Concatenate the predictions @@ -875,7 +877,8 @@ def permutation_test_score(estimator, X, y, groups=None, cv=None, score = _permutation_test_score(clone(estimator), X, y, groups, cv, scorer) permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_permutation_test_score)( - clone(estimator), X, _shuffle(y, groups, random_state), + clone(estimator, deepcopy=False), + X, _shuffle(y, groups, random_state), groups, cv, scorer) for _ in range(n_permutations)) permutation_scores = np.array(permutation_scores) @@ -1058,7 +1061,7 @@ def learning_curve(estimator, X, y, groups=None, train_test_proportions.append((train[:n_train_samples], test)) out = parallel(delayed(_fit_and_score)( - clone(estimator), X, y, scorer, train, test, + clone(estimator, deepcopy=False), X, y, scorer, train, test, verbose, parameters=None, fit_params=None, return_train_score=True) for train, test in train_test_proportions) out = np.array(out) @@ -1240,7 +1243,7 @@ def validation_curve(estimator, X, y, param_name, param_range, groups=None, parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose) out = parallel(delayed(_fit_and_score)( - clone(estimator), X, y, scorer, train, test, verbose, + clone(estimator, deepcopy=False), X, y, scorer, train, test, verbose, parameters={param_name: v}, fit_params=None, return_train_score=True) # NOTE do not change order of iteration to allow one time cv splitters for train, test in cv.split(X, y, groups) for v in param_range) diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index ee3fe26eedd8c..21fd21f416492 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -1159,10 +1159,10 @@ def test_fit_grid_point(): for params in ({'C': 0.1}, {'C': 0.01}, {'C': 0.001}): for train, test in cv.split(X, y): this_scores, this_params, n_test_samples = fit_grid_point( - X, y, clone(svc), params, train, test, + X, y, clone(svc, deepcopy=False), params, train, test, scorer, verbose=False) - est = clone(svc).set_params(**params) + est = clone(svc, deepcopy=False).set_params(**params) est.fit(X[train], y[train]) expected_score = scorer(est, X[test], y[test]) diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index baff76257447d..3974a67e7977c 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -361,7 +361,7 @@ def test_cross_validate(): train_r2_scores = [] test_r2_scores = [] for train, test in cv.split(X, y): - est = clone(reg).fit(X[train], y[train]) + est = clone(reg, deepcopy=False).fit(X[train], y[train]) train_mse_scores.append(mse_scorer(est, X[train], y[train])) train_r2_scores.append(r2_scorer(est, X[train], y[train])) test_mse_scores.append(mse_scorer(est, X[test], y[test])) diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py index a8510cf0a0a85..7b0df4f6de57c 100644 --- a/sklearn/multiclass.py +++ b/sklearn/multiclass.py @@ -76,7 +76,7 @@ def _fit_binary(estimator, X, y, classes=None): str(classes[c])) estimator = _ConstantPredictor().fit(X, unique_y) else: - estimator = clone(estimator) + estimator = clone(estimator, deepcopy=False) estimator.fit(X, y) return estimator @@ -247,7 +247,8 @@ def partial_fit(self, X, y, classes=None): if not hasattr(self.estimator, "partial_fit"): raise ValueError(("Base estimator {0}, doesn't have " "partial_fit method").format(self.estimator)) - self.estimators_ = [clone(self.estimator) for _ in range + self.estimators_ = [clone(self.estimator, deepcopy=False) + for _ in range (self.n_classes_)] # A sparse LabelBinarizer, with sparse_output=True, has been @@ -541,7 +542,7 @@ def partial_fit(self, X, y, classes=None): self """ if _check_partial_fit_first_call(self, classes): - self.estimators_ = [clone(self.estimator) for i in + self.estimators_ = [clone(self.estimator, deepcopy=False) for i in range(self.n_classes_ * (self.n_classes_ - 1) // 2)] diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 6c9fbc55f7863..5f1044d1cda34 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -32,7 +32,7 @@ def _fit_estimator(estimator, X, y, sample_weight=None): - estimator = clone(estimator) + estimator = clone(estimator, deepcopy=False) if sample_weight is not None: estimator.fit(X, y, sample_weight=sample_weight) else: @@ -43,7 +43,7 @@ def _fit_estimator(estimator, X, y, sample_weight=None): def _partial_fit_estimator(estimator, X, y, classes=None, sample_weight=None, first_time=True): if first_time: - estimator = clone(estimator) + estimator = clone(estimator, deepcopy=False) if sample_weight is not None: if classes is not None: @@ -467,7 +467,7 @@ def fit(self, X, Y): elif sorted(self.order_) != list(range(Y.shape[1])): raise ValueError("invalid order") - self.estimators_ = [clone(self.base_estimator) + self.estimators_ = [clone(self.base_estimator, deepcopy=False) for _ in range(Y.shape[1])] self.classes_ = [] diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 66da9dffeb066..8a4843c5279dc 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -205,7 +205,7 @@ def _fit(self, X, y=None, **fit_params): # backward compatibility cloned_transformer = transformer else: - cloned_transformer = clone(transformer) + cloned_transformer = clone(transformer, deepcopy=False) # Fit or load from cache the current transfomer Xt, fitted_transformer = fit_transform_one_cached( cloned_transformer, None, Xt, y, diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py index f2c10ceddd0f1..c16bcfa8a168c 100644 --- a/sklearn/svm/tests/test_sparse.py +++ b/sklearn/svm/tests/test_sparse.py @@ -312,7 +312,7 @@ def test_sparse_svc_clone_with_callable_kernel(): # meaning that everything works fine. a = svm.SVC(C=1, kernel=lambda x, y: x * y.T, probability=True, random_state=0) - b = base.clone(a) + b = base.clone(a, deepcopy=False) b.fit(X_sp, Y) pred = b.predict(X_sp) diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index daf35f82a39e5..8916edb41b09e 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -810,7 +810,7 @@ def test_svc_clone_with_callable_kernel(): probability=True, random_state=0, decision_function_shape='ovr') # clone for checking clonability with lambda functions.. - svm_cloned = base.clone(svm_callable) + svm_cloned = base.clone(svm_callable, deepcopy=False) svm_cloned.fit(iris.data, iris.target) svm_builtin = svm.SVC(kernel='linear', probability=True, random_state=0, diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index ce93d9f7d422f..83638a1c8168d 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -130,7 +130,7 @@ def test_clone_2(): selector = SelectFpr(f_classif, alpha=0.1) selector.own_attribute = "test" - new_selector = clone(selector) + new_selector = clone(selector, deepcopy=False) assert_false(hasattr(new_selector, "own_attribute")) @@ -138,30 +138,30 @@ def test_clone_buggy(): # Check that clone raises an error on buggy estimators. buggy = Buggy() buggy.a = 2 - assert_raises(RuntimeError, clone, buggy) + assert_raises(RuntimeError, clone, buggy, deepcopy=False) no_estimator = NoEstimator() - assert_raises(TypeError, clone, no_estimator) + assert_raises(TypeError, clone, no_estimator, deepcopy=False) varg_est = VargEstimator() - assert_raises(RuntimeError, clone, varg_est) + assert_raises(RuntimeError, clone, varg_est, deepcopy=False) def test_clone_empty_array(): # Regression test for cloning estimators with empty arrays clf = MyEstimator(empty=np.array([])) - clf2 = clone(clf) + clf2 = clone(clf, deepcopy=False) assert_array_equal(clf.empty, clf2.empty) clf = MyEstimator(empty=sp.csr_matrix(np.array([[0]]))) - clf2 = clone(clf) + clf2 = clone(clf, deepcopy=False) assert_array_equal(clf.empty.data, clf2.empty.data) def test_clone_nan(): # Regression test for cloning estimators with default parameter as np.nan clf = MyEstimator(empty=np.nan) - clf2 = clone(clf) + clf2 = clone(clf, deepcopy=False) assert_true(clf.empty is clf2.empty) @@ -173,7 +173,8 @@ def test_clone_copy_init_params(): "This behavior is deprecated as of 0.18 and support " "for this behavior will be removed in 0.20.") - assert_warns_message(DeprecationWarning, message, clone, est) + assert_warns_message(DeprecationWarning, message, clone, est, + deepcopy=False) def test_clone_sparse_matrices(): @@ -184,7 +185,7 @@ def test_clone_sparse_matrices(): for cls in sparse_matrix_classes: sparse_matrix = cls(np.eye(5)) clf = MyEstimator(empty=sparse_matrix) - clf_cloned = clone(clf) + clf_cloned = clone(clf, deepcopy=False) assert_true(clf.empty.__class__ is clf_cloned.empty.__class__) assert_array_equal(clf.empty.toarray(), clf_cloned.empty.toarray()) @@ -314,7 +315,7 @@ def transform(self, X): d = np.arange(10) df = MockDataFrame(d) e = DummyEstimator(df, scalar_param=1) - cloned_e = clone(e) + cloned_e = clone(e, deepcopy=False) # the test assert_true((e.df == cloned_e.df).values.all()) diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py index 537a6184b944c..4f7123fe91e1d 100644 --- a/sklearn/tests/test_dummy.py +++ b/sklearn/tests/test_dummy.py @@ -46,7 +46,7 @@ def _check_behavior_2d(clf): # 1d case X = np.array([[0], [0], [0], [0]]) # ignored y = np.array([1, 2, 1, 1]) - est = clone(clf) + est = clone(clf, deepcopy=False) est.fit(X, y) y_pred = est.predict(X) assert_equal(y.shape, y_pred.shape) @@ -56,7 +56,7 @@ def _check_behavior_2d(clf): [2, 0], [1, 0], [1, 3]]) - est = clone(clf) + est = clone(clf, deepcopy=False) est.fit(X, y) y_pred = est.predict(X) assert_equal(y.shape, y_pred.shape) @@ -69,7 +69,7 @@ def _check_behavior_2d_for_constant(clf): [2, 0, 1, 2, 5], [1, 0, 4, 5, 2], [1, 3, 3, 2, 0]]) - est = clone(clf) + est = clone(clf, deepcopy=False) est.fit(X, y) y_pred = est.predict(X) assert_equal(y.shape, y_pred.shape) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 26981d20fc633..41590f71ffd46 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -195,7 +195,7 @@ def test_multi_output_classification_partial_fit(): # predictions are equal after first partial_fit and second partial_fit for i in range(3): # create a clone with the same state - sgd_linear_clf = clone(sgd_linear_clf) + sgd_linear_clf = clone(sgd_linear_clf, deepcopy=False) sgd_linear_clf.partial_fit( X[:half_index], y[:half_index, i], classes=classes[i]) assert_array_equal(sgd_linear_clf.predict(X), first_predictions[:, i]) @@ -235,7 +235,8 @@ def test_multi_output_classification(): # train the forest with each column and assert that predictions are equal for i in range(3): - forest_ = clone(forest) # create a clone with the same state + # create a clone with the same state + forest_ = clone(forest, deepcopy=False) forest_.fit(X, y[:, i]) assert_equal(list(forest_.predict(X)), list(predictions[:, i])) assert_array_equal(list(forest_.predict_proba(X)), @@ -255,7 +256,8 @@ def test_multiclass_multioutput_estimator(): # train the forest with each column and assert that predictions are equal for i in range(3): - multi_class_svc_ = clone(multi_class_svc) # create a clone + # create a clone + multi_class_svc_ = clone(multi_class_svc, deepcopy=False) multi_class_svc_.fit(X, y[:, i]) assert_equal(list(multi_class_svc_.predict(X)), list(predictions[:, i])) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 1165370885d36..3311fe49747a2 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -187,7 +187,7 @@ def test_pipeline_init(): assert_raises(ValueError, pipe.set_params, anova__C=0.1) # Test clone - pipe2 = clone(pipe) + pipe2 = clone(pipe, deepcopy=False) assert_false(pipe.named_steps['svc'] is pipe2.named_steps['svc']) # Check that apart from estimators, the parameters are the same @@ -907,7 +907,8 @@ def test_pipeline_memory(): # Test with Transformer + SVC clf = SVC(probability=True, random_state=0) transf = DummyTransf() - pipe = Pipeline([('transf', clone(transf)), ('svc', clf)]) + pipe = Pipeline([('transf', clone(transf, deepcopy=False)), + ('svc', clf)]) cached_pipe = Pipeline([('transf', transf), ('svc', clf)], memory=memory) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 3e7cb198a9d12..1d3c6a881b74e 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -137,7 +137,7 @@ def _yield_classifier_checks(name, classifier): @ignore_warnings(category=(DeprecationWarning, FutureWarning)) def check_supervised_y_no_nan(name, estimator_orig): # Checks that the Estimator targets are not NaN. - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) rng = np.random.RandomState(888) X = rng.randn(10, 5) y = np.ones(10) * np.inf @@ -361,16 +361,17 @@ def check_estimator_sparse_data(name, estimator_orig): y = (4 * rng.rand(40)).astype(np.int) # catch deprecation warnings with ignore_warnings(category=DeprecationWarning): - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) y = multioutput_estimator_convert_y_2d(estimator, y) for sparse_format in ['csr', 'csc', 'dok', 'lil', 'coo', 'dia', 'bsr']: X = X_csr.asformat(sparse_format) # catch deprecation warnings with ignore_warnings(category=(DeprecationWarning, FutureWarning)): if name in ['Scaler', 'StandardScaler']: - estimator = clone(estimator).set_params(with_mean=False) + estimator = clone(estimator, + deepcopy=False).set_params(with_mean=False) else: - estimator = clone(estimator) + estimator = clone(estimator, deepcopy=False) # fit and predict try: with ignore_warnings(category=(DeprecationWarning, FutureWarning)): @@ -399,7 +400,7 @@ def check_estimator_sparse_data(name, estimator_orig): def check_sample_weights_pandas_series(name, estimator_orig): # check that estimators will accept a 'sample_weight' parameter of # type pandas.Series in the 'fit' function. - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) if has_fit_parameter(estimator, "sample_weight"): try: import pandas as pd @@ -422,7 +423,7 @@ def check_sample_weights_list(name, estimator_orig): # check that estimators will accept a 'sample_weight' parameter of # type list in the 'fit' function. if has_fit_parameter(estimator_orig, "sample_weight"): - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) rnd = np.random.RandomState(0) X = rnd.uniform(size=(10, 3)) y = np.arange(10) % 3 @@ -438,7 +439,7 @@ def check_dtype_object(name, estimator_orig): rng = np.random.RandomState(0) X = rng.rand(40, 10).astype(object) y = (X[:, 0] * 4).astype(np.int) - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) y = multioutput_estimator_convert_y_2d(estimator, y) estimator.fit(X, y) @@ -464,7 +465,7 @@ def check_complex_data(name, estimator_orig): X = np.random.sample(10) + 1j * np.random.sample(10) X = X.reshape(-1, 1) y = np.random.sample(10) + 1j * np.random.sample(10) - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) assert_raises_regex(ValueError, "Complex data not supported", estimator.fit, X, y) @@ -484,7 +485,7 @@ def check_dict_unchanged(name, estimator_orig): X = 2 * rnd.uniform(size=(20, 3)) y = X[:, 0].astype(np.int) - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) y = multioutput_estimator_convert_y_2d(estimator, y) if hasattr(estimator, "n_components"): estimator.n_components = 1 @@ -517,7 +518,7 @@ def check_dont_overwrite_parameters(name, estimator_orig): if hasattr(estimator_orig.__init__, "deprecated_original"): # to not check deprecated classes return - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) rnd = np.random.RandomState(0) X = 3 * rnd.uniform(size=(20, 3)) y = X[:, 0].astype(np.int) @@ -567,7 +568,7 @@ def check_fit2d_predict1d(name, estimator_orig): rnd = np.random.RandomState(0) X = 3 * rnd.uniform(size=(20, 3)) y = X[:, 0].astype(np.int) - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) y = multioutput_estimator_convert_y_2d(estimator, y) if hasattr(estimator, "n_components"): @@ -591,7 +592,7 @@ def check_fit2d_1sample(name, estimator_orig): rnd = np.random.RandomState(0) X = 3 * rnd.uniform(size=(1, 10)) y = X[:, 0].astype(np.int) - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) y = multioutput_estimator_convert_y_2d(estimator, y) if hasattr(estimator, "n_components"): @@ -612,7 +613,7 @@ def check_fit2d_1feature(name, estimator_orig): rnd = np.random.RandomState(0) X = 3 * rnd.uniform(size=(10, 1)) y = X[:, 0].astype(np.int) - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) y = multioutput_estimator_convert_y_2d(estimator, y) if hasattr(estimator, "n_components"): @@ -633,7 +634,7 @@ def check_fit1d_1feature(name, estimator_orig): rnd = np.random.RandomState(0) X = 3 * rnd.uniform(size=(20)) y = X.astype(np.int) - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) y = multioutput_estimator_convert_y_2d(estimator, y) if hasattr(estimator, "n_components"): @@ -655,7 +656,7 @@ def check_fit1d_1sample(name, estimator_orig): rnd = np.random.RandomState(0) X = 3 * rnd.uniform(size=(20)) y = np.array([1]) - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) y = multioutput_estimator_convert_y_2d(estimator, y) if hasattr(estimator, "n_components"): @@ -698,7 +699,7 @@ def check_transformer_data_not_an_array(name, transformer): def check_transformers_unfitted(name, transformer): X, y = _boston_subset() - transformer = clone(transformer) + transformer = clone(transformer, deepcopy=False) with assert_raises((AttributeError, ValueError), msg="The unfitted " "transformer {} does not raise an error when " "transform is called. Perhaps use " @@ -716,7 +717,7 @@ def _check_transformer(name, transformer_orig, X, y): msg = name + ' is non deterministic on 32bit Python' raise SkipTest(msg) n_samples, n_features = np.asarray(X).shape - transformer = clone(transformer_orig) + transformer = clone(transformer_orig, deepcopy=False) set_random_state(transformer) # fit @@ -729,7 +730,7 @@ def _check_transformer(name, transformer_orig, X, y): transformer.fit(X, y_) # fit_transform method should work on non fitted estimator - transformer_clone = clone(transformer) + transformer_clone = clone(transformer, deepcopy=False) X_pred = transformer_clone.fit_transform(X, y=y_) if isinstance(X_pred, tuple): @@ -798,7 +799,7 @@ def check_pipeline_consistency(name, estimator_orig): X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]], random_state=0, n_features=2, cluster_std=0.1) X -= X.min() - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) y = multioutput_estimator_convert_y_2d(estimator, y) set_random_state(estimator) pipeline = make_pipeline(estimator) @@ -823,7 +824,7 @@ def check_fit_score_takes_y(name, estimator_orig): rnd = np.random.RandomState(0) X = rnd.uniform(size=(10, 3)) y = np.arange(10) % 3 - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) y = multioutput_estimator_convert_y_2d(estimator, y) set_random_state(estimator) @@ -856,7 +857,7 @@ def check_estimators_dtypes(name, estimator_orig): methods = ["predict", "transform", "decision_function", "predict_proba"] for X_train in [X_train_32, X_train_64, X_train_int_64, X_train_int_32]: - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) set_random_state(estimator, 1) estimator.fit(X_train, y) @@ -867,7 +868,7 @@ def check_estimators_dtypes(name, estimator_orig): @ignore_warnings(category=(DeprecationWarning, FutureWarning)) def check_estimators_empty_data_messages(name, estimator_orig): - e = clone(estimator_orig) + e = clone(estimator_orig, deepcopy=False) set_random_state(e, 1) X_zero_samples = np.empty(0).reshape(0, 3) @@ -908,7 +909,7 @@ def check_estimators_nan_inf(name, estimator_orig): for X_train in [X_train_nan, X_train_inf]: # catch deprecation warnings with ignore_warnings(category=(DeprecationWarning, FutureWarning)): - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) set_random_state(estimator, 1) # try to fit try: @@ -970,7 +971,7 @@ def check_estimators_pickle(name, estimator_orig): # some estimators can't do features less than 0 X -= X.min() - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) # some estimators only take multioutputs y = multioutput_estimator_convert_y_2d(estimator, y) @@ -999,7 +1000,7 @@ def check_estimators_partial_fit_n_features(name, estimator_orig): # check if number of features changes between calls to partial_fit. if not hasattr(estimator_orig, 'partial_fit'): return - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) X, y = make_blobs(n_samples=50, random_state=1) X -= X.min() @@ -1022,7 +1023,7 @@ def check_estimators_partial_fit_n_features(name, estimator_orig): @ignore_warnings(category=(DeprecationWarning, FutureWarning)) def check_clustering(name, clusterer_orig): - clusterer = clone(clusterer_orig) + clusterer = clone(clusterer_orig, deepcopy=False) X, y = make_blobs(n_samples=50, random_state=1) X, y = shuffle(X, y, random_state=7) X = StandardScaler().fit_transform(X) @@ -1057,7 +1058,7 @@ def check_clustering(name, clusterer_orig): def check_clusterer_compute_labels_predict(name, clusterer_orig): """Check that predict is invariant of compute_labels""" X, y = make_blobs(n_samples=20, random_state=0) - clusterer = clone(clusterer_orig) + clusterer = clone(clusterer_orig, deepcopy=False) if hasattr(clusterer, "compute_labels"): # MiniBatchKMeans @@ -1081,7 +1082,7 @@ def check_classifiers_one_label(name, classifier_orig): y = np.ones(10) # catch deprecation warnings with ignore_warnings(category=(DeprecationWarning, FutureWarning)): - classifier = clone(classifier_orig) + classifier = clone(classifier_orig, deepcopy=False) # try to fit try: classifier.fit(X_train, y) @@ -1116,7 +1117,7 @@ def check_classifiers_train(name, classifier_orig): classes = np.unique(y) n_classes = len(classes) n_samples, n_features = X.shape - classifier = clone(classifier_orig) + classifier = clone(classifier_orig, deepcopy=False) if name in ['BernoulliNB', 'MultinomialNB', 'ComplementNB']: X -= X.min() set_random_state(classifier) @@ -1195,7 +1196,7 @@ def check_estimators_fit_returns_self(name, estimator_orig): # some want non-negative input X -= X.min() - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) y = multioutput_estimator_convert_y_2d(estimator, y) set_random_state(estimator) @@ -1215,7 +1216,7 @@ def check_estimators_unfitted(name, estimator_orig): # Common test for Regressors as well as Classifiers X, y = _boston_subset() - est = clone(estimator_orig) + est = clone(estimator_orig, deepcopy=False) msg = "fit" if hasattr(est, 'predict'): @@ -1243,7 +1244,7 @@ def check_supervised_y_2d(name, estimator_orig): rnd = np.random.RandomState(0) X = rnd.uniform(size=(10, 3)) y = np.arange(10) % 3 - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) set_random_state(estimator) # fit estimator.fit(X, y) @@ -1285,7 +1286,7 @@ def check_classifiers_classes(name, classifier_orig): y_ = y_names classes = np.unique(y_) - classifier = clone(classifier_orig) + classifier = clone(classifier_orig, deepcopy=False) if name in ['BernoulliNB', 'ComplementNB']: X = X > X.mean() set_random_state(classifier) @@ -1310,8 +1311,8 @@ def check_regressors_int(name, regressor_orig): y = multioutput_estimator_convert_y_2d(regressor_orig, y) rnd = np.random.RandomState(0) # separate estimators to control random seeds - regressor_1 = clone(regressor_orig) - regressor_2 = clone(regressor_orig) + regressor_1 = clone(regressor_orig, deepcopy=False) + regressor_2 = clone(regressor_orig, deepcopy=False) set_random_state(regressor_1) set_random_state(regressor_2) @@ -1334,7 +1335,7 @@ def check_regressors_train(name, regressor_orig): X, y = _boston_subset() y = StandardScaler().fit_transform(y.reshape(-1, 1)) # X is already scaled y = y.ravel() - regressor = clone(regressor_orig) + regressor = clone(regressor_orig, deepcopy=False) y = multioutput_estimator_convert_y_2d(regressor, y) rnd = np.random.RandomState(0) if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'): @@ -1374,7 +1375,7 @@ def check_regressors_no_decision_function(name, regressor_orig): # checks whether regressors have decision_function or predict_proba rng = np.random.RandomState(0) X = rng.normal(size=(10, 4)) - regressor = clone(regressor_orig) + regressor = clone(regressor_orig, deepcopy=False) y = multioutput_estimator_convert_y_2d(regressor, X[:, 0]) if hasattr(regressor, "n_components"): @@ -1415,7 +1416,7 @@ def check_class_weight_classifiers(name, classifier_orig): else: class_weight = {0: 1000, 1: 0.0001, 2: 0.0001} - classifier = clone(classifier_orig).set_params( + classifier = clone(classifier_orig, deepcopy=False).set_params( class_weight=class_weight) if hasattr(classifier, "n_iter"): classifier.set_params(n_iter=100) @@ -1435,7 +1436,7 @@ def check_class_weight_classifiers(name, classifier_orig): @ignore_warnings(category=(DeprecationWarning, FutureWarning)) def check_class_weight_balanced_classifiers(name, classifier_orig, X_train, y_train, X_test, y_test, weights): - classifier = clone(classifier_orig) + classifier = clone(classifier_orig, deepcopy=False) if hasattr(classifier, "n_iter"): classifier.set_params(n_iter=100) if hasattr(classifier, "max_iter"): @@ -1491,7 +1492,7 @@ def check_estimators_overwrite_params(name, estimator_orig): X, y = make_blobs(random_state=0, n_samples=9) # some want non-negative input X -= X.min() - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) y = multioutput_estimator_convert_y_2d(estimator, y) set_random_state(estimator) @@ -1544,7 +1545,7 @@ def check_sparsify_coefficients(name, estimator_orig): X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [-1, -2], [2, 2], [-2, -2]]) y = [1, 1, 1, 2, 2, 2, 3, 3, 3] - est = clone(estimator_orig) + est = clone(estimator_orig, deepcopy=False) est.fit(X, y) pred_orig = est.predict(X) @@ -1584,8 +1585,8 @@ def check_estimators_data_not_an_array(name, estimator_orig, X, y): "for cross decomposition module as estimators " "are not deterministic.") # separate estimators to control random seeds - estimator_1 = clone(estimator_orig) - estimator_2 = clone(estimator_orig) + estimator_1 = clone(estimator_orig, deepcopy=False) + estimator_2 = clone(estimator_orig, deepcopy=False) set_random_state(estimator_1) set_random_state(estimator_2) @@ -1611,7 +1612,7 @@ def check_parameters_default_constructible(name, Estimator): else: estimator = Estimator() # test cloning - clone(estimator) + clone(estimator, deepcopy=False) # test __repr__ repr(estimator) # test that set_params returns self @@ -1695,9 +1696,9 @@ def check_non_transformer_estimators_n_iter(name, estimator_orig): # LassoLars stops early for the default alpha=1.0 the iris dataset. if name == 'LassoLars': - estimator = clone(estimator_orig).set_params(alpha=0.) + estimator = clone(estimator_orig, deepcopy=False).set_params(alpha=0.) else: - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) if hasattr(estimator, 'max_iter'): iris = load_iris() X, y_ = iris.data, iris.target @@ -1719,7 +1720,7 @@ def check_non_transformer_estimators_n_iter(name, estimator_orig): def check_transformer_n_iter(name, estimator_orig): # Test that transformers with a parameter max_iter, return the # attribute of n_iter_ at least 1. - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) if hasattr(estimator, "max_iter"): if name in CROSS_DECOMPOSITION: # Check using default data @@ -1757,7 +1758,7 @@ def fit(self, X, y): def transform(self, X): return X - e = clone(estimator_orig) + e = clone(estimator_orig, deepcopy=False) shallow_params = e.get_params(deep=False) deep_params = e.get_params(deep=True) @@ -1772,7 +1773,7 @@ def check_classifiers_regression_target(name, estimator_orig): boston = load_boston() X, y = boston.data, boston.target - e = clone(estimator_orig) + e = clone(estimator_orig, deepcopy=False) msg = 'Unknown label type: ' assert_raises_regex(ValueError, msg, e.fit, X, y) @@ -1786,7 +1787,7 @@ def check_decision_proba_consistency(name, estimator_orig): X, y = make_blobs(n_samples=100, random_state=0, n_features=4, centers=centers, cluster_std=1.0, shuffle=True) X_test = np.random.randn(20, 2) + 4 - estimator = clone(estimator_orig) + estimator = clone(estimator_orig, deepcopy=False) if (hasattr(estimator, "decision_function") and hasattr(estimator, "predict_proba")):