From fd07f3ce44d17c215e0f76a67a64e8461c4bc04c Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 6 Sep 2017 12:37:04 +0200
Subject: [PATCH 1/4] FIX avoid making deepcopy in clone

---
 sklearn/base.py | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index aa4f9f9ce17c1..e3670def51e5f 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -10,6 +10,7 @@
 from scipy import sparse
 from .externals import six
 from .utils.fixes import signature
+from .exceptions import ChangedBehaviorWarning
 from . import __version__
 
 
@@ -26,7 +27,7 @@ def _first_and_last_element(arr):
         return arr[0, 0], arr[-1, -1]
 
 
-def clone(estimator, safe=True):
+def clone(estimator, safe=True, deepcopy=None):
     """Constructs a new estimator with the same parameters.
 
     Clone does a deep copy of the model in an estimator
@@ -39,17 +40,39 @@ def clone(estimator, safe=True):
         The estimator or group of estimators to be cloned
 
     safe : boolean, optional
-        If safe is false, clone will fall back to a deep copy on objects
-        that are not estimators.
+
+        If safe is false, clone will fall back to a copy (deep copy or simple
+        copy depending of the ``deepcopy`` parameter) on objects that are not
+        estimators.
+
+        .. deprecated:: 0.20
+           From 0.22, only a simple copy will be done instead of a deep copy.
+           Use ``deepcopy=True`` to get the previous behavior.
+
+    deepcopy : boolean, optional
+        Whether to make a deep copy or a simple copy of the objects that ware
+        not estimators.
+
+        .. versionadded:: 0.20
 
     """
+    if deepcopy is None:
+        warnings.warn("A simple copy will be performed after 0.22 instead of a"
+                      " deep copy. Set 'deepcopy=True' if you wish to make a"
+                      " deep copy of the objects which are not estimators.",
+                      ChangedBehaviorWarning)
+        deepcopy = True
+
     estimator_type = type(estimator)
     # XXX: not handling dictionaries
     if estimator_type in (list, tuple, set, frozenset):
         return estimator_type([clone(e, safe=safe) for e in estimator])
     elif not hasattr(estimator, 'get_params'):
         if not safe:
-            return copy.deepcopy(estimator)
+            if deepcopy:
+                return copy.deepcopy(estimator)
+            else:
+                return copy.copy(estimator)
         else:
             raise TypeError("Cannot clone object '%s' (type %s): "
                             "it does not seem to be a scikit-learn estimator "

From 47084ab1c6d1d2da3575439c5a71bb78b0117362 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 6 Sep 2017 12:51:31 +0200
Subject: [PATCH 2/4] propagate deepcopy

---
 sklearn/base.py            | 5 +++--
 sklearn/tests/test_base.py | 7 +++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index e3670def51e5f..9ce2919777661 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -66,7 +66,8 @@ def clone(estimator, safe=True, deepcopy=None):
     estimator_type = type(estimator)
     # XXX: not handling dictionaries
     if estimator_type in (list, tuple, set, frozenset):
-        return estimator_type([clone(e, safe=safe) for e in estimator])
+        return estimator_type([clone(e, safe=safe, deepcopy=deepcopy)
+                               for e in estimator])
     elif not hasattr(estimator, 'get_params'):
         if not safe:
             if deepcopy:
@@ -81,7 +82,7 @@ def clone(estimator, safe=True, deepcopy=None):
     klass = estimator.__class__
     new_object_params = estimator.get_params(deep=False)
     for name, param in six.iteritems(new_object_params):
-        new_object_params[name] = clone(param, safe=False)
+        new_object_params[name] = clone(param, safe=False, deepcopy=deepcopy)
     new_object = klass(**new_object_params)
     params_set = new_object.get_params(deep=False)
 
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index 948d5818b9b0e..ce93d9f7d422f 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -25,6 +25,7 @@
 from sklearn.tree import DecisionTreeRegressor
 from sklearn import datasets
 from sklearn.utils import deprecated
+from sklearn.exceptions import ChangedBehaviorWarning
 
 from sklearn.base import TransformerMixin
 from sklearn.utils.mocking import MockDataFrame
@@ -188,6 +189,12 @@ def test_clone_sparse_matrices():
         assert_array_equal(clf.empty.toarray(), clf_cloned.empty.toarray())
 
 
+def test_clone_change_behavior_warning():
+    assert_warns_message(ChangedBehaviorWarning,
+                         "A simple copy will be performed after 0.22",
+                         clone, MyEstimator())
+
+
 def test_repr():
     # Smoke test the repr of the base estimator.
     my_estimator = MyEstimator()

From 8c4a28e765c26895ee95f7b8f192c9e92edc8ddd Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 6 Sep 2017 23:18:27 +0200
Subject: [PATCH 3/4] only raise the warning when safe is false

---
 sklearn/base.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 9ce2919777661..aeb44699b4197 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -50,16 +50,16 @@ def clone(estimator, safe=True, deepcopy=None):
            Use ``deepcopy=True`` to get the previous behavior.
 
     deepcopy : boolean, optional
-        Whether to make a deep copy or a simple copy of the objects that ware
+        Whether to make a deep copy or a simple copy of the objects that are
         not estimators.
 
         .. versionadded:: 0.20
 
     """
-    if deepcopy is None:
+    if not safe and deepcopy is None:
         warnings.warn("A simple copy will be performed after 0.22 instead of a"
                       " deep copy. Set 'deepcopy=True' if you wish to make a"
-                      " deep copy of the objects which are not estimators.",
+                      " deep copy of the parameters which are not estimators.",
                       ChangedBehaviorWarning)
         deepcopy = True
 

From f8dcdeb815a6e9f8db4371ea50eb0bb76618a01e Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 7 Sep 2017 16:16:24 +0200
Subject: [PATCH 4/4] change all cloning occurences

---
 sklearn/calibration.py                        |   2 +-
 sklearn/cross_validation.py                   |  10 +-
 sklearn/decomposition/tests/test_nmf.py       |   2 +-
 sklearn/ensemble/base.py                      |   2 +-
 .../ensemble/tests/test_gradient_boosting.py  |  10 +-
 sklearn/ensemble/voting_classifier.py         |   3 +-
 sklearn/feature_extraction/tests/test_text.py |   2 +-
 sklearn/feature_selection/from_model.py       |   4 +-
 sklearn/feature_selection/rfe.py              |   6 +-
 sklearn/gaussian_process/gpc.py               |   2 +-
 sklearn/gaussian_process/gpr.py               |   2 +-
 sklearn/gaussian_process/kernels.py           |   2 +-
 .../gaussian_process/tests/test_kernels.py    |   6 +-
 sklearn/grid_search.py                        |   7 +-
 sklearn/learning_curve.py                     |   8 +-
 sklearn/linear_model/ransac.py                |   2 +-
 sklearn/linear_model/tests/test_sag.py        |  20 ++--
 sklearn/linear_model/tests/test_sgd.py        |   2 +-
 sklearn/model_selection/_search.py            |   8 +-
 sklearn/model_selection/_validation.py        |  13 ++-
 sklearn/model_selection/tests/test_search.py  |   4 +-
 .../model_selection/tests/test_validation.py  |   2 +-
 sklearn/multiclass.py                         |   7 +-
 sklearn/multioutput.py                        |   6 +-
 sklearn/pipeline.py                           |   2 +-
 sklearn/svm/tests/test_sparse.py              |   2 +-
 sklearn/svm/tests/test_svm.py                 |   2 +-
 sklearn/tests/test_base.py                    |  21 ++--
 sklearn/tests/test_dummy.py                   |   6 +-
 sklearn/tests/test_multioutput.py             |   8 +-
 sklearn/tests/test_pipeline.py                |   5 +-
 sklearn/utils/estimator_checks.py             | 101 +++++++++---------
 32 files changed, 149 insertions(+), 130 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 0d2f76cd12239..6ccb40d4b400d 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -172,7 +172,7 @@ def fit(self, X, y, sample_weight=None):
                     check_consistent_length(y, sample_weight)
                 base_estimator_sample_weight = sample_weight
             for train, test in cv.split(X, y):
-                this_estimator = clone(base_estimator)
+                this_estimator = clone(base_estimator, deepcopy=False)
                 if base_estimator_sample_weight is not None:
                     this_estimator.fit(
                         X[train], y[train],
diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py
index 7646459da3936..bb067c496ddea 100644
--- a/sklearn/cross_validation.py
+++ b/sklearn/cross_validation.py
@@ -1378,7 +1378,9 @@ def cross_val_predict(estimator, X, y=None, cv=None, n_jobs=1,
     # independent, and that it is pickle-able.
     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
                         pre_dispatch=pre_dispatch)
-    preds_blocks = parallel(delayed(_fit_and_predict)(clone(estimator), X, y,
+    preds_blocks = parallel(delayed(_fit_and_predict)(clone(estimator,
+                                                            deepcopy=False),
+                                                      X, y,
                                                       train, test, verbose,
                                                       fit_params)
                             for train, test in cv)
@@ -1575,7 +1577,8 @@ def cross_val_score(estimator, X, y=None, scoring=None, cv=None, n_jobs=1,
     # independent, and that it is pickle-able.
     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
                         pre_dispatch=pre_dispatch)
-    scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer,
+    scores = parallel(delayed(_fit_and_score)(clone(estimator, deepcopy=False),
+                                              X, y, scorer,
                                               train, test, verbose, None,
                                               fit_params)
                       for train, test in cv)
@@ -1942,7 +1945,8 @@ def permutation_test_score(estimator, X, y, cv=None,
 
     # We clone the estimator to make sure that all the folds are
     # independent, and that it is pickle-able.
-    score = _permutation_test_score(clone(estimator), X, y, cv, scorer)
+    score = _permutation_test_score(clone(estimator, deepcopy=False),
+                                    X, y, cv, scorer)
     permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
         delayed(_permutation_test_score)(
             clone(estimator), X, _shuffle(y, labels, random_state), cv,
diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py
index 3ce53b550cb0e..6e4206f959509 100644
--- a/sklearn/decomposition/tests/test_nmf.py
+++ b/sklearn/decomposition/tests/test_nmf.py
@@ -163,7 +163,7 @@ def test_nmf_sparse_input():
     for solver in ('cd', 'mu'):
         est1 = NMF(solver=solver, n_components=5, init='random',
                    random_state=0, tol=1e-2)
-        est2 = clone(est1)
+        est2 = clone(est1, deepcopy=False)
 
     W1 = est1.fit_transform(A)
     W2 = est2.fit_transform(A_sparse)
diff --git a/sklearn/ensemble/base.py b/sklearn/ensemble/base.py
index 2477cc1c21c7d..31633bed4e4a0 100644
--- a/sklearn/ensemble/base.py
+++ b/sklearn/ensemble/base.py
@@ -122,7 +122,7 @@ def _make_estimator(self, append=True, random_state=None):
         Warning: This method should be used to properly instantiate new
         sub-estimators.
         """
-        estimator = clone(self.base_estimator_)
+        estimator = clone(self.base_estimator_, deepcopy=False)
         estimator.set_params(**dict((p, getattr(self, p))
                                     for p in self.estimator_params))
 
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 2042da3474ec9..c227f4191c33d 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -789,7 +789,7 @@ def test_warm_start_equal_n_estimators():
         est = Cls(n_estimators=100, max_depth=1)
         est.fit(X, y)
 
-        est2 = clone(est)
+        est2 = clone(est, deepcopy=False)
         est2.set_params(n_estimators=est.n_estimators, warm_start=True)
         est2.fit(X, y)
 
@@ -1160,15 +1160,15 @@ def test_gradient_boosting_validation_fraction():
                                      validation_fraction=0.1,
                                      learning_rate=0.1, max_depth=3,
                                      random_state=42)
-    gbc2 = clone(gbc).set_params(validation_fraction=0.3)
-    gbc3 = clone(gbc).set_params(n_iter_no_change=20)
+    gbc2 = clone(gbc, deepcopy=False).set_params(validation_fraction=0.3)
+    gbc3 = clone(gbc, deepcopy=False).set_params(n_iter_no_change=20)
 
     gbr = GradientBoostingRegressor(n_estimators=100, n_iter_no_change=10,
                                     learning_rate=0.1, max_depth=3,
                                     validation_fraction=0.1,
                                     random_state=42)
-    gbr2 = clone(gbr).set_params(validation_fraction=0.3)
-    gbr3 = clone(gbr).set_params(n_iter_no_change=20)
+    gbr2 = clone(gbr, deepcopy=False).set_params(validation_fraction=0.3)
+    gbr3 = clone(gbr, deepcopy=False).set_params(n_iter_no_change=20)
 
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
     # Check if validation_fraction has an effect
diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py
index 26bc8e66df01a..6a3c081dd59c7 100644
--- a/sklearn/ensemble/voting_classifier.py
+++ b/sklearn/ensemble/voting_classifier.py
@@ -193,7 +193,8 @@ def fit(self, X, y, sample_weight=None):
         transformed_y = self.le_.transform(y)
 
         self.estimators_ = Parallel(n_jobs=self.n_jobs)(
-                delayed(_parallel_fit_estimator)(clone(clf), X, transformed_y,
+                delayed(_parallel_fit_estimator)(clone(clf, deepcopy=False),
+                                                 X, transformed_y,
                                                  sample_weight=sample_weight)
                 for clf in clfs if clf is not None)
 
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index 9e613b1bca8c1..2d45250e99815 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -980,7 +980,7 @@ def test_tfidfvectorizer_export_idf():
 
 def test_vectorizer_vocab_clone():
     vect_vocab = TfidfVectorizer(vocabulary=["the"])
-    vect_vocab_clone = clone(vect_vocab)
+    vect_vocab_clone = clone(vect_vocab, deepcopy=False)
     vect_vocab.fit(ALL_FOOD_DOCS)
     vect_vocab_clone.fit(ALL_FOOD_DOCS)
     assert_equal(vect_vocab_clone.vocabulary_, vect_vocab.vocabulary_)
diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py
index 2502643453d79..58041791295c3 100644
--- a/sklearn/feature_selection/from_model.py
+++ b/sklearn/feature_selection/from_model.py
@@ -165,7 +165,7 @@ def fit(self, X, y=None, **fit_params):
         if self.prefit:
             raise NotFittedError(
                 "Since 'prefit=True', call transform directly")
-        self.estimator_ = clone(self.estimator)
+        self.estimator_ = clone(self.estimator, deepcopy=False)
         self.estimator_.fit(X, y, **fit_params)
         return self
 
@@ -198,6 +198,6 @@ def partial_fit(self, X, y=None, **fit_params):
             raise NotFittedError(
                 "Since 'prefit=True', call transform directly")
         if not hasattr(self, "estimator_"):
-            self.estimator_ = clone(self.estimator)
+            self.estimator_ = clone(self.estimator, deepcopy=False)
         self.estimator_.partial_fit(X, y, **fit_params)
         return self
diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
index d505099cc6a88..4abe612dc5dce 100644
--- a/sklearn/feature_selection/rfe.py
+++ b/sklearn/feature_selection/rfe.py
@@ -166,7 +166,7 @@ def _fit(self, X, y, step_score=None):
             features = np.arange(n_features)[support_]
 
             # Rank the remaining features
-            estimator = clone(self.estimator)
+            estimator = clone(self.estimator, deepcopy=False)
             if self.verbose > 0:
                 print("Fitting estimator with %d features." % np.sum(support_))
 
@@ -204,7 +204,7 @@ def _fit(self, X, y, step_score=None):
 
         # Set final attributes
         features = np.arange(n_features)[support_]
-        self.estimator_ = clone(self.estimator)
+        self.estimator_ = clone(self.estimator, deepcopy=False)
         self.estimator_.fit(X[:, features], y)
 
         # Compute step score when only n_features_to_select features left
@@ -450,7 +450,7 @@ def fit(self, X, y):
         self.support_ = rfe.support_
         self.n_features_ = rfe.n_features_
         self.ranking_ = rfe.ranking_
-        self.estimator_ = clone(self.estimator)
+        self.estimator_ = clone(self.estimator, deepcopy=False)
         self.estimator_.fit(self.transform(X), y)
 
         # Fixing a normalization error, n is equal to get_n_splits(X, y) - 1
diff --git a/sklearn/gaussian_process/gpc.py b/sklearn/gaussian_process/gpc.py
index 31d15e533dc9e..00af4ed089941 100644
--- a/sklearn/gaussian_process/gpc.py
+++ b/sklearn/gaussian_process/gpc.py
@@ -173,7 +173,7 @@ def fit(self, X, y):
             self.kernel_ = C(1.0, constant_value_bounds="fixed") \
                 * RBF(1.0, length_scale_bounds="fixed")
         else:
-            self.kernel_ = clone(self.kernel)
+            self.kernel_ = clone(self.kernel, deepcopy=False)
 
         self.rng = check_random_state(self.random_state)
 
diff --git a/sklearn/gaussian_process/gpr.py b/sklearn/gaussian_process/gpr.py
index c92ca7f68f368..d781c86226c24 100644
--- a/sklearn/gaussian_process/gpr.py
+++ b/sklearn/gaussian_process/gpr.py
@@ -174,7 +174,7 @@ def fit(self, X, y):
             self.kernel_ = C(1.0, constant_value_bounds="fixed") \
                 * RBF(1.0, length_scale_bounds="fixed")
         else:
-            self.kernel_ = clone(self.kernel)
+            self.kernel_ = clone(self.kernel, deepcopy=False)
 
         self._rng = check_random_state(self.random_state)
 
diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py
index 50febc8542570..62eb961a76d47 100644
--- a/sklearn/gaussian_process/kernels.py
+++ b/sklearn/gaussian_process/kernels.py
@@ -200,7 +200,7 @@ def set_params(self, **params):
 
     def clone_with_theta(self, theta):
         """Returns a clone of self with given hyperparameters theta. """
-        cloned = clone(self)
+        cloned = clone(self, deepcopy=False)
         cloned.theta = theta
         return cloned
 
diff --git a/sklearn/gaussian_process/tests/test_kernels.py b/sklearn/gaussian_process/tests/test_kernels.py
index a07a406a415dd..96e708268fa84 100644
--- a/sklearn/gaussian_process/tests/test_kernels.py
+++ b/sklearn/gaussian_process/tests/test_kernels.py
@@ -195,7 +195,7 @@ def check_hyperparameters_equal(kernel1, kernel2):
 def test_kernel_clone():
     # Test that sklearn's clone works correctly on kernels.
     for kernel in kernels:
-        kernel_cloned = clone(kernel)
+        kernel_cloned = clone(kernel, deepcopy=False)
 
         # XXX: Should this be fixed?
         # This differs from the sklearn's estimators equality check.
@@ -218,7 +218,7 @@ def test_kernel_clone_after_set_params():
     # for more details.
     bounds = (1e-5, 1e5)
     for kernel in kernels:
-        kernel_cloned = clone(kernel)
+        kernel_cloned = clone(kernel, deepcopy=False)
         params = kernel.get_params()
         # RationalQuadratic kernel is isotropic.
         isotropic_kernels = (ExpSineSquared, RationalQuadratic)
@@ -232,7 +232,7 @@ def test_kernel_clone_after_set_params():
                 params['length_scale'] = [length_scale] * 2
                 params['length_scale_bounds'] = bounds * 2
             kernel_cloned.set_params(**params)
-            kernel_cloned_clone = clone(kernel_cloned)
+            kernel_cloned_clone = clone(kernel_cloned, deepcopy=False)
             assert_equal(kernel_cloned_clone.get_params(),
                          kernel_cloned.get_params())
             assert_not_equal(id(kernel_cloned_clone), id(kernel_cloned))
diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py
index 76cdaa7cb1de5..ab1207bd35e63 100644
--- a/sklearn/grid_search.py
+++ b/sklearn/grid_search.py
@@ -559,7 +559,7 @@ def _fit(self, X, y, parameter_iterable):
                       " {2} fits".format(len(cv), n_candidates,
                                          n_candidates * len(cv)))
 
-        base_estimator = clone(self.estimator)
+        base_estimator = clone(self.estimator, deepcopy=False)
 
         pre_dispatch = self.pre_dispatch
 
@@ -567,7 +567,8 @@ def _fit(self, X, y, parameter_iterable):
             n_jobs=self.n_jobs, verbose=self.verbose,
             pre_dispatch=pre_dispatch
         )(
-            delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
+            delayed(_fit_and_score)(clone(base_estimator, deepcopy=False),
+                                    X, y, self.scorer_,
                                     train, test, self.verbose, parameters,
                                     self.fit_params, return_parameters=True,
                                     error_score=self.error_score)
@@ -614,7 +615,7 @@ def _fit(self, X, y, parameter_iterable):
         if self.refit:
             # fit the best estimator using the entire dataset
             # clone first to work around broken estimators
-            best_estimator = clone(base_estimator).set_params(
+            best_estimator = clone(base_estimator, deepcopy=False).set_params(
                 **best.parameters)
             if y is not None:
                 best_estimator.fit(X, y, **self.fit_params)
diff --git a/sklearn/learning_curve.py b/sklearn/learning_curve.py
index 5571138d68d83..f7528fcb38662 100644
--- a/sklearn/learning_curve.py
+++ b/sklearn/learning_curve.py
@@ -161,11 +161,13 @@ def learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 5),
     if exploit_incremental_learning:
         classes = np.unique(y) if is_classifier(estimator) else None
         out = parallel(delayed(_incremental_fit_estimator)(
-            clone(estimator), X, y, classes, train, test, train_sizes_abs,
+            clone(estimator, deepcopy=False), X, y, classes,
+            train, test, train_sizes_abs,
             scorer, verbose) for train, test in cv)
     else:
         out = parallel(delayed(_fit_and_score)(
-            clone(estimator), X, y, scorer, train[:n_train_samples], test,
+            clone(estimator, deepcopy=False), X, y, scorer,
+            train[:n_train_samples], test,
             verbose, parameters=None, fit_params=None, return_train_score=True,
             error_score=error_score)
             for train, test in cv for n_train_samples in train_sizes_abs)
@@ -348,7 +350,7 @@ def validation_curve(estimator, X, y, param_name, param_range, cv=None,
     parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch,
                         verbose=verbose)
     out = parallel(delayed(_fit_and_score)(
-        clone(estimator), X, y, scorer, train, test, verbose,
+        clone(estimator, deepcopy=False), X, y, scorer, train, test, verbose,
         parameters={param_name: v}, fit_params=None, return_train_score=True)
         for train, test in cv for v in param_range)
 
diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py
index ec43c3719b68a..aef23d42534e7 100644
--- a/sklearn/linear_model/ransac.py
+++ b/sklearn/linear_model/ransac.py
@@ -251,7 +251,7 @@ def fit(self, X, y, sample_weight=None):
         check_consistent_length(X, y)
 
         if self.base_estimator is not None:
-            base_estimator = clone(self.base_estimator)
+            base_estimator = clone(self.base_estimator, deepcopy=False)
         else:
             base_estimator = LinearRegression()
 
diff --git a/sklearn/linear_model/tests/test_sag.py b/sklearn/linear_model/tests/test_sag.py
index 02a557d56ef7f..eefb6c37039f9 100644
--- a/sklearn/linear_model/tests/test_sag.py
+++ b/sklearn/linear_model/tests/test_sag.py
@@ -313,7 +313,7 @@ def test_sag_pobj_matches_logistic_regression():
     clf1 = LogisticRegression(solver='sag', fit_intercept=False, tol=.0000001,
                               C=1. / alpha / n_samples, max_iter=max_iter,
                               random_state=10)
-    clf2 = clone(clf1)
+    clf2 = clone(clf1, deepcopy=False)
     clf3 = LogisticRegression(fit_intercept=False, tol=.0000001,
                               C=1. / alpha / n_samples, max_iter=max_iter,
                               random_state=10)
@@ -346,7 +346,7 @@ def test_sag_pobj_matches_ridge_regression():
 
     clf1 = Ridge(fit_intercept=fit_intercept, tol=.00000000001, solver='sag',
                  alpha=alpha, max_iter=n_iter, random_state=42)
-    clf2 = clone(clf1)
+    clf2 = clone(clf1, deepcopy=False)
     clf3 = Ridge(fit_intercept=fit_intercept, tol=.00001, solver='lsqr',
                  alpha=alpha, max_iter=n_iter, random_state=42)
 
@@ -380,7 +380,7 @@ def test_sag_regressor_computed_correctly():
 
     clf1 = Ridge(fit_intercept=fit_intercept, tol=tol, solver='sag',
                  alpha=alpha * n_samples, max_iter=max_iter)
-    clf2 = clone(clf1)
+    clf2 = clone(clf1, deepcopy=False)
 
     clf1.fit(X, y)
     clf2.fit(sp.csr_matrix(X), y)
@@ -468,7 +468,7 @@ def test_sag_regressor():
 
     clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter,
                  alpha=alpha * n_samples)
-    clf2 = clone(clf1)
+    clf2 = clone(clf1, deepcopy=False)
     clf1.fit(X, y)
     clf2.fit(sp.csr_matrix(X), y)
     score1 = clf1.score(X, y)
@@ -481,7 +481,7 @@ def test_sag_regressor():
 
     clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter,
                  alpha=alpha * n_samples)
-    clf2 = clone(clf1)
+    clf2 = clone(clf1, deepcopy=False)
     clf1.fit(X, y)
     clf2.fit(sp.csr_matrix(X), y)
     score1 = clf1.score(X, y)
@@ -510,7 +510,7 @@ def test_sag_classifier_computed_correctly():
     clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
                               max_iter=n_iter, tol=tol, random_state=77,
                               fit_intercept=fit_intercept)
-    clf2 = clone(clf1)
+    clf2 = clone(clf1, deepcopy=False)
 
     clf1.fit(X, y)
     clf2.fit(sp.csr_matrix(X), y)
@@ -550,7 +550,7 @@ def test_sag_multiclass_computed_correctly():
     clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
                               max_iter=max_iter, tol=tol, random_state=77,
                               fit_intercept=fit_intercept)
-    clf2 = clone(clf1)
+    clf2 = clone(clf1, deepcopy=False)
 
     clf1.fit(X, y)
     clf2.fit(sp.csr_matrix(X), y)
@@ -608,7 +608,7 @@ def test_classifier_results():
     y = np.sign(y)
     clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
                               max_iter=max_iter, tol=tol, random_state=77)
-    clf2 = clone(clf1)
+    clf2 = clone(clf1, deepcopy=False)
 
     clf1.fit(X, y)
     clf2.fit(sp.csr_matrix(X), y)
@@ -639,7 +639,7 @@ def test_binary_classifier_class_weight():
                               max_iter=n_iter, tol=tol, random_state=77,
                               fit_intercept=fit_intercept,
                               class_weight=class_weight)
-    clf2 = clone(clf1)
+    clf2 = clone(clf1, deepcopy=False)
 
     clf1.fit(X, y)
     clf2.fit(sp.csr_matrix(X), y)
@@ -686,7 +686,7 @@ def test_multiclass_classifier_class_weight():
                               max_iter=max_iter, tol=tol, random_state=77,
                               fit_intercept=fit_intercept,
                               class_weight=class_weight)
-    clf2 = clone(clf1)
+    clf2 = clone(clf1, deepcopy=False)
     clf1.fit(X, y)
     clf2.fit(sp.csr_matrix(X), y)
 
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
index d4552a9934cf1..f423ed73b83a3 100644
--- a/sklearn/linear_model/tests/test_sgd.py
+++ b/sklearn/linear_model/tests/test_sgd.py
@@ -198,7 +198,7 @@ def test_input_format(self):
     def test_clone(self):
         # Test whether clone works ok.
         clf = self.factory(alpha=0.01, penalty='l1')
-        clf = clone(clf)
+        clf = clone(clf, deepcopy=False)
         clf.set_params(penalty='l2')
         clf.fit(X, Y)
 
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index ebfa1e9bd3e18..53973b3d9113e 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -621,13 +621,14 @@ def fit(self, X, y=None, groups=None, **fit_params):
                   " {2} fits".format(n_splits, n_candidates,
                                      n_candidates * n_splits))
 
-        base_estimator = clone(self.estimator)
+        base_estimator = clone(self.estimator, deepcopy=False)
         pre_dispatch = self.pre_dispatch
 
         out = Parallel(
             n_jobs=self.n_jobs, verbose=self.verbose,
             pre_dispatch=pre_dispatch
-        )(delayed(_fit_and_score)(clone(base_estimator), X, y, scorers, train,
+        )(delayed(_fit_and_score)(clone(base_estimator, deepcopy=False),
+                                  X, y, scorers, train,
                                   test, self.verbose, parameters,
                                   fit_params=fit_params,
                                   return_train_score=self.return_train_score,
@@ -719,7 +720,8 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
                 self.best_index_]
 
         if self.refit:
-            self.best_estimator_ = clone(base_estimator).set_params(
+            self.best_estimator_ = clone(base_estimator,
+                                         deepcopy=False).set_params(
                 **self.best_params_)
             if y is not None:
                 self.best_estimator_.fit(X, y, **fit_params)
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 798f771534571..646dd777e5c33 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -191,7 +191,8 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None,
                         pre_dispatch=pre_dispatch)
     scores = parallel(
         delayed(_fit_and_score)(
-            clone(estimator), X, y, scorers, train, test, verbose, None,
+            clone(estimator, deepcopy=False), X, y, scorers,
+            train, test, verbose, None,
             fit_params, return_train_score=return_train_score,
             return_times=True)
         for train, test in cv.split(X, y, groups))
@@ -648,7 +649,8 @@ def cross_val_predict(estimator, X, y=None, groups=None, cv=None, n_jobs=1,
     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
                         pre_dispatch=pre_dispatch)
     prediction_blocks = parallel(delayed(_fit_and_predict)(
-        clone(estimator), X, y, train, test, verbose, fit_params, method)
+        clone(estimator, deepcopy=False), X, y,
+        train, test, verbose, fit_params, method)
         for train, test in cv.split(X, y, groups))
 
     # Concatenate the predictions
@@ -875,7 +877,8 @@ def permutation_test_score(estimator, X, y, groups=None, cv=None,
     score = _permutation_test_score(clone(estimator), X, y, groups, cv, scorer)
     permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
         delayed(_permutation_test_score)(
-            clone(estimator), X, _shuffle(y, groups, random_state),
+            clone(estimator, deepcopy=False),
+            X, _shuffle(y, groups, random_state),
             groups, cv, scorer)
         for _ in range(n_permutations))
     permutation_scores = np.array(permutation_scores)
@@ -1058,7 +1061,7 @@ def learning_curve(estimator, X, y, groups=None,
                 train_test_proportions.append((train[:n_train_samples], test))
 
         out = parallel(delayed(_fit_and_score)(
-            clone(estimator), X, y, scorer, train, test,
+            clone(estimator, deepcopy=False), X, y, scorer, train, test,
             verbose, parameters=None, fit_params=None, return_train_score=True)
             for train, test in train_test_proportions)
         out = np.array(out)
@@ -1240,7 +1243,7 @@ def validation_curve(estimator, X, y, param_name, param_range, groups=None,
     parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch,
                         verbose=verbose)
     out = parallel(delayed(_fit_and_score)(
-        clone(estimator), X, y, scorer, train, test, verbose,
+        clone(estimator, deepcopy=False), X, y, scorer, train, test, verbose,
         parameters={param_name: v}, fit_params=None, return_train_score=True)
         # NOTE do not change order of iteration to allow one time cv splitters
         for train, test in cv.split(X, y, groups) for v in param_range)
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index ee3fe26eedd8c..21fd21f416492 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -1159,10 +1159,10 @@ def test_fit_grid_point():
     for params in ({'C': 0.1}, {'C': 0.01}, {'C': 0.001}):
         for train, test in cv.split(X, y):
             this_scores, this_params, n_test_samples = fit_grid_point(
-                X, y, clone(svc), params, train, test,
+                X, y, clone(svc, deepcopy=False), params, train, test,
                 scorer, verbose=False)
 
-            est = clone(svc).set_params(**params)
+            est = clone(svc, deepcopy=False).set_params(**params)
             est.fit(X[train], y[train])
             expected_score = scorer(est, X[test], y[test])
 
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index baff76257447d..3974a67e7977c 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -361,7 +361,7 @@ def test_cross_validate():
         train_r2_scores = []
         test_r2_scores = []
         for train, test in cv.split(X, y):
-            est = clone(reg).fit(X[train], y[train])
+            est = clone(reg, deepcopy=False).fit(X[train], y[train])
             train_mse_scores.append(mse_scorer(est, X[train], y[train]))
             train_r2_scores.append(r2_scorer(est, X[train], y[train]))
             test_mse_scores.append(mse_scorer(est, X[test], y[test]))
diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
index a8510cf0a0a85..7b0df4f6de57c 100644
--- a/sklearn/multiclass.py
+++ b/sklearn/multiclass.py
@@ -76,7 +76,7 @@ def _fit_binary(estimator, X, y, classes=None):
                           str(classes[c]))
         estimator = _ConstantPredictor().fit(X, unique_y)
     else:
-        estimator = clone(estimator)
+        estimator = clone(estimator, deepcopy=False)
         estimator.fit(X, y)
     return estimator
 
@@ -247,7 +247,8 @@ def partial_fit(self, X, y, classes=None):
             if not hasattr(self.estimator, "partial_fit"):
                 raise ValueError(("Base estimator {0}, doesn't have "
                                  "partial_fit method").format(self.estimator))
-            self.estimators_ = [clone(self.estimator) for _ in range
+            self.estimators_ = [clone(self.estimator, deepcopy=False)
+                                for _ in range
                                 (self.n_classes_)]
 
             # A sparse LabelBinarizer, with sparse_output=True, has been
@@ -541,7 +542,7 @@ def partial_fit(self, X, y, classes=None):
         self
         """
         if _check_partial_fit_first_call(self, classes):
-            self.estimators_ = [clone(self.estimator) for i in
+            self.estimators_ = [clone(self.estimator, deepcopy=False) for i in
                                 range(self.n_classes_ *
                                       (self.n_classes_ - 1) // 2)]
 
diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
index 6c9fbc55f7863..5f1044d1cda34 100644
--- a/sklearn/multioutput.py
+++ b/sklearn/multioutput.py
@@ -32,7 +32,7 @@
 
 
 def _fit_estimator(estimator, X, y, sample_weight=None):
-    estimator = clone(estimator)
+    estimator = clone(estimator, deepcopy=False)
     if sample_weight is not None:
         estimator.fit(X, y, sample_weight=sample_weight)
     else:
@@ -43,7 +43,7 @@ def _fit_estimator(estimator, X, y, sample_weight=None):
 def _partial_fit_estimator(estimator, X, y, classes=None, sample_weight=None,
                            first_time=True):
     if first_time:
-        estimator = clone(estimator)
+        estimator = clone(estimator, deepcopy=False)
 
     if sample_weight is not None:
         if classes is not None:
@@ -467,7 +467,7 @@ def fit(self, X, Y):
         elif sorted(self.order_) != list(range(Y.shape[1])):
                 raise ValueError("invalid order")
 
-        self.estimators_ = [clone(self.base_estimator)
+        self.estimators_ = [clone(self.base_estimator, deepcopy=False)
                             for _ in range(Y.shape[1])]
 
         self.classes_ = []
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 66da9dffeb066..8a4843c5279dc 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -205,7 +205,7 @@ def _fit(self, X, y=None, **fit_params):
                     # backward compatibility
                     cloned_transformer = transformer
                 else:
-                    cloned_transformer = clone(transformer)
+                    cloned_transformer = clone(transformer, deepcopy=False)
                 # Fit or load from cache the current transfomer
                 Xt, fitted_transformer = fit_transform_one_cached(
                     cloned_transformer, None, Xt, y,
diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py
index f2c10ceddd0f1..c16bcfa8a168c 100644
--- a/sklearn/svm/tests/test_sparse.py
+++ b/sklearn/svm/tests/test_sparse.py
@@ -312,7 +312,7 @@ def test_sparse_svc_clone_with_callable_kernel():
     # meaning that everything works fine.
     a = svm.SVC(C=1, kernel=lambda x, y: x * y.T, probability=True,
                 random_state=0)
-    b = base.clone(a)
+    b = base.clone(a, deepcopy=False)
 
     b.fit(X_sp, Y)
     pred = b.predict(X_sp)
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index daf35f82a39e5..8916edb41b09e 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -810,7 +810,7 @@ def test_svc_clone_with_callable_kernel():
                            probability=True, random_state=0,
                            decision_function_shape='ovr')
     # clone for checking clonability with lambda functions..
-    svm_cloned = base.clone(svm_callable)
+    svm_cloned = base.clone(svm_callable, deepcopy=False)
     svm_cloned.fit(iris.data, iris.target)
 
     svm_builtin = svm.SVC(kernel='linear', probability=True, random_state=0,
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index ce93d9f7d422f..83638a1c8168d 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -130,7 +130,7 @@ def test_clone_2():
 
     selector = SelectFpr(f_classif, alpha=0.1)
     selector.own_attribute = "test"
-    new_selector = clone(selector)
+    new_selector = clone(selector, deepcopy=False)
     assert_false(hasattr(new_selector, "own_attribute"))
 
 
@@ -138,30 +138,30 @@ def test_clone_buggy():
     # Check that clone raises an error on buggy estimators.
     buggy = Buggy()
     buggy.a = 2
-    assert_raises(RuntimeError, clone, buggy)
+    assert_raises(RuntimeError, clone, buggy, deepcopy=False)
 
     no_estimator = NoEstimator()
-    assert_raises(TypeError, clone, no_estimator)
+    assert_raises(TypeError, clone, no_estimator, deepcopy=False)
 
     varg_est = VargEstimator()
-    assert_raises(RuntimeError, clone, varg_est)
+    assert_raises(RuntimeError, clone, varg_est, deepcopy=False)
 
 
 def test_clone_empty_array():
     # Regression test for cloning estimators with empty arrays
     clf = MyEstimator(empty=np.array([]))
-    clf2 = clone(clf)
+    clf2 = clone(clf, deepcopy=False)
     assert_array_equal(clf.empty, clf2.empty)
 
     clf = MyEstimator(empty=sp.csr_matrix(np.array([[0]])))
-    clf2 = clone(clf)
+    clf2 = clone(clf, deepcopy=False)
     assert_array_equal(clf.empty.data, clf2.empty.data)
 
 
 def test_clone_nan():
     # Regression test for cloning estimators with default parameter as np.nan
     clf = MyEstimator(empty=np.nan)
-    clf2 = clone(clf)
+    clf2 = clone(clf, deepcopy=False)
 
     assert_true(clf.empty is clf2.empty)
 
@@ -173,7 +173,8 @@ def test_clone_copy_init_params():
                "This behavior is deprecated as of 0.18 and support "
                "for this behavior will be removed in 0.20.")
 
-    assert_warns_message(DeprecationWarning, message, clone, est)
+    assert_warns_message(DeprecationWarning, message, clone, est,
+                         deepcopy=False)
 
 
 def test_clone_sparse_matrices():
@@ -184,7 +185,7 @@ def test_clone_sparse_matrices():
     for cls in sparse_matrix_classes:
         sparse_matrix = cls(np.eye(5))
         clf = MyEstimator(empty=sparse_matrix)
-        clf_cloned = clone(clf)
+        clf_cloned = clone(clf, deepcopy=False)
         assert_true(clf.empty.__class__ is clf_cloned.empty.__class__)
         assert_array_equal(clf.empty.toarray(), clf_cloned.empty.toarray())
 
@@ -314,7 +315,7 @@ def transform(self, X):
     d = np.arange(10)
     df = MockDataFrame(d)
     e = DummyEstimator(df, scalar_param=1)
-    cloned_e = clone(e)
+    cloned_e = clone(e, deepcopy=False)
 
     # the test
     assert_true((e.df == cloned_e.df).values.all())
diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py
index 537a6184b944c..4f7123fe91e1d 100644
--- a/sklearn/tests/test_dummy.py
+++ b/sklearn/tests/test_dummy.py
@@ -46,7 +46,7 @@ def _check_behavior_2d(clf):
     # 1d case
     X = np.array([[0], [0], [0], [0]])  # ignored
     y = np.array([1, 2, 1, 1])
-    est = clone(clf)
+    est = clone(clf, deepcopy=False)
     est.fit(X, y)
     y_pred = est.predict(X)
     assert_equal(y.shape, y_pred.shape)
@@ -56,7 +56,7 @@ def _check_behavior_2d(clf):
                   [2, 0],
                   [1, 0],
                   [1, 3]])
-    est = clone(clf)
+    est = clone(clf, deepcopy=False)
     est.fit(X, y)
     y_pred = est.predict(X)
     assert_equal(y.shape, y_pred.shape)
@@ -69,7 +69,7 @@ def _check_behavior_2d_for_constant(clf):
                   [2, 0, 1, 2, 5],
                   [1, 0, 4, 5, 2],
                   [1, 3, 3, 2, 0]])
-    est = clone(clf)
+    est = clone(clf, deepcopy=False)
     est.fit(X, y)
     y_pred = est.predict(X)
     assert_equal(y.shape, y_pred.shape)
diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py
index 26981d20fc633..41590f71ffd46 100644
--- a/sklearn/tests/test_multioutput.py
+++ b/sklearn/tests/test_multioutput.py
@@ -195,7 +195,7 @@ def test_multi_output_classification_partial_fit():
     # predictions are equal after first partial_fit and second partial_fit
     for i in range(3):
         # create a clone with the same state
-        sgd_linear_clf = clone(sgd_linear_clf)
+        sgd_linear_clf = clone(sgd_linear_clf, deepcopy=False)
         sgd_linear_clf.partial_fit(
             X[:half_index], y[:half_index, i], classes=classes[i])
         assert_array_equal(sgd_linear_clf.predict(X), first_predictions[:, i])
@@ -235,7 +235,8 @@ def test_multi_output_classification():
 
     # train the forest with each column and assert that predictions are equal
     for i in range(3):
-        forest_ = clone(forest)  # create a clone with the same state
+        # create a clone with the same state
+        forest_ = clone(forest, deepcopy=False)
         forest_.fit(X, y[:, i])
         assert_equal(list(forest_.predict(X)), list(predictions[:, i]))
         assert_array_equal(list(forest_.predict_proba(X)),
@@ -255,7 +256,8 @@ def test_multiclass_multioutput_estimator():
 
     # train the forest with each column and assert that predictions are equal
     for i in range(3):
-        multi_class_svc_ = clone(multi_class_svc)  # create a clone
+        # create a clone
+        multi_class_svc_ = clone(multi_class_svc, deepcopy=False)
         multi_class_svc_.fit(X, y[:, i])
         assert_equal(list(multi_class_svc_.predict(X)),
                      list(predictions[:, i]))
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 1165370885d36..3311fe49747a2 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -187,7 +187,7 @@ def test_pipeline_init():
     assert_raises(ValueError, pipe.set_params, anova__C=0.1)
 
     # Test clone
-    pipe2 = clone(pipe)
+    pipe2 = clone(pipe, deepcopy=False)
     assert_false(pipe.named_steps['svc'] is pipe2.named_steps['svc'])
 
     # Check that apart from estimators, the parameters are the same
@@ -907,7 +907,8 @@ def test_pipeline_memory():
         # Test with Transformer + SVC
         clf = SVC(probability=True, random_state=0)
         transf = DummyTransf()
-        pipe = Pipeline([('transf', clone(transf)), ('svc', clf)])
+        pipe = Pipeline([('transf', clone(transf, deepcopy=False)),
+                         ('svc', clf)])
         cached_pipe = Pipeline([('transf', transf), ('svc', clf)],
                                memory=memory)
 
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 3e7cb198a9d12..1d3c6a881b74e 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -137,7 +137,7 @@ def _yield_classifier_checks(name, classifier):
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_supervised_y_no_nan(name, estimator_orig):
     # Checks that the Estimator targets are not NaN.
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     rng = np.random.RandomState(888)
     X = rng.randn(10, 5)
     y = np.ones(10) * np.inf
@@ -361,16 +361,17 @@ def check_estimator_sparse_data(name, estimator_orig):
     y = (4 * rng.rand(40)).astype(np.int)
     # catch deprecation warnings
     with ignore_warnings(category=DeprecationWarning):
-        estimator = clone(estimator_orig)
+        estimator = clone(estimator_orig, deepcopy=False)
     y = multioutput_estimator_convert_y_2d(estimator, y)
     for sparse_format in ['csr', 'csc', 'dok', 'lil', 'coo', 'dia', 'bsr']:
         X = X_csr.asformat(sparse_format)
         # catch deprecation warnings
         with ignore_warnings(category=(DeprecationWarning, FutureWarning)):
             if name in ['Scaler', 'StandardScaler']:
-                estimator = clone(estimator).set_params(with_mean=False)
+                estimator = clone(estimator,
+                                  deepcopy=False).set_params(with_mean=False)
             else:
-                estimator = clone(estimator)
+                estimator = clone(estimator, deepcopy=False)
         # fit and predict
         try:
             with ignore_warnings(category=(DeprecationWarning, FutureWarning)):
@@ -399,7 +400,7 @@ def check_estimator_sparse_data(name, estimator_orig):
 def check_sample_weights_pandas_series(name, estimator_orig):
     # check that estimators will accept a 'sample_weight' parameter of
     # type pandas.Series in the 'fit' function.
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     if has_fit_parameter(estimator, "sample_weight"):
         try:
             import pandas as pd
@@ -422,7 +423,7 @@ def check_sample_weights_list(name, estimator_orig):
     # check that estimators will accept a 'sample_weight' parameter of
     # type list in the 'fit' function.
     if has_fit_parameter(estimator_orig, "sample_weight"):
-        estimator = clone(estimator_orig)
+        estimator = clone(estimator_orig, deepcopy=False)
         rnd = np.random.RandomState(0)
         X = rnd.uniform(size=(10, 3))
         y = np.arange(10) % 3
@@ -438,7 +439,7 @@ def check_dtype_object(name, estimator_orig):
     rng = np.random.RandomState(0)
     X = rng.rand(40, 10).astype(object)
     y = (X[:, 0] * 4).astype(np.int)
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     y = multioutput_estimator_convert_y_2d(estimator, y)
 
     estimator.fit(X, y)
@@ -464,7 +465,7 @@ def check_complex_data(name, estimator_orig):
     X = np.random.sample(10) + 1j * np.random.sample(10)
     X = X.reshape(-1, 1)
     y = np.random.sample(10) + 1j * np.random.sample(10)
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     assert_raises_regex(ValueError, "Complex data not supported",
                         estimator.fit, X, y)
 
@@ -484,7 +485,7 @@ def check_dict_unchanged(name, estimator_orig):
         X = 2 * rnd.uniform(size=(20, 3))
 
     y = X[:, 0].astype(np.int)
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     y = multioutput_estimator_convert_y_2d(estimator, y)
     if hasattr(estimator, "n_components"):
         estimator.n_components = 1
@@ -517,7 +518,7 @@ def check_dont_overwrite_parameters(name, estimator_orig):
     if hasattr(estimator_orig.__init__, "deprecated_original"):
         # to not check deprecated classes
         return
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20, 3))
     y = X[:, 0].astype(np.int)
@@ -567,7 +568,7 @@ def check_fit2d_predict1d(name, estimator_orig):
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20, 3))
     y = X[:, 0].astype(np.int)
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     y = multioutput_estimator_convert_y_2d(estimator, y)
 
     if hasattr(estimator, "n_components"):
@@ -591,7 +592,7 @@ def check_fit2d_1sample(name, estimator_orig):
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(1, 10))
     y = X[:, 0].astype(np.int)
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     y = multioutput_estimator_convert_y_2d(estimator, y)
 
     if hasattr(estimator, "n_components"):
@@ -612,7 +613,7 @@ def check_fit2d_1feature(name, estimator_orig):
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(10, 1))
     y = X[:, 0].astype(np.int)
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     y = multioutput_estimator_convert_y_2d(estimator, y)
 
     if hasattr(estimator, "n_components"):
@@ -633,7 +634,7 @@ def check_fit1d_1feature(name, estimator_orig):
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20))
     y = X.astype(np.int)
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     y = multioutput_estimator_convert_y_2d(estimator, y)
 
     if hasattr(estimator, "n_components"):
@@ -655,7 +656,7 @@ def check_fit1d_1sample(name, estimator_orig):
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20))
     y = np.array([1])
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     y = multioutput_estimator_convert_y_2d(estimator, y)
 
     if hasattr(estimator, "n_components"):
@@ -698,7 +699,7 @@ def check_transformer_data_not_an_array(name, transformer):
 def check_transformers_unfitted(name, transformer):
     X, y = _boston_subset()
 
-    transformer = clone(transformer)
+    transformer = clone(transformer, deepcopy=False)
     with assert_raises((AttributeError, ValueError), msg="The unfitted "
                        "transformer {} does not raise an error when "
                        "transform is called. Perhaps use "
@@ -716,7 +717,7 @@ def _check_transformer(name, transformer_orig, X, y):
         msg = name + ' is non deterministic on 32bit Python'
         raise SkipTest(msg)
     n_samples, n_features = np.asarray(X).shape
-    transformer = clone(transformer_orig)
+    transformer = clone(transformer_orig, deepcopy=False)
     set_random_state(transformer)
 
     # fit
@@ -729,7 +730,7 @@ def _check_transformer(name, transformer_orig, X, y):
 
     transformer.fit(X, y_)
     # fit_transform method should work on non fitted estimator
-    transformer_clone = clone(transformer)
+    transformer_clone = clone(transformer, deepcopy=False)
     X_pred = transformer_clone.fit_transform(X, y=y_)
 
     if isinstance(X_pred, tuple):
@@ -798,7 +799,7 @@ def check_pipeline_consistency(name, estimator_orig):
     X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                       random_state=0, n_features=2, cluster_std=0.1)
     X -= X.min()
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     y = multioutput_estimator_convert_y_2d(estimator, y)
     set_random_state(estimator)
     pipeline = make_pipeline(estimator)
@@ -823,7 +824,7 @@ def check_fit_score_takes_y(name, estimator_orig):
     rnd = np.random.RandomState(0)
     X = rnd.uniform(size=(10, 3))
     y = np.arange(10) % 3
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     y = multioutput_estimator_convert_y_2d(estimator, y)
     set_random_state(estimator)
 
@@ -856,7 +857,7 @@ def check_estimators_dtypes(name, estimator_orig):
     methods = ["predict", "transform", "decision_function", "predict_proba"]
 
     for X_train in [X_train_32, X_train_64, X_train_int_64, X_train_int_32]:
-        estimator = clone(estimator_orig)
+        estimator = clone(estimator_orig, deepcopy=False)
         set_random_state(estimator, 1)
         estimator.fit(X_train, y)
 
@@ -867,7 +868,7 @@ def check_estimators_dtypes(name, estimator_orig):
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_estimators_empty_data_messages(name, estimator_orig):
-    e = clone(estimator_orig)
+    e = clone(estimator_orig, deepcopy=False)
     set_random_state(e, 1)
 
     X_zero_samples = np.empty(0).reshape(0, 3)
@@ -908,7 +909,7 @@ def check_estimators_nan_inf(name, estimator_orig):
     for X_train in [X_train_nan, X_train_inf]:
         # catch deprecation warnings
         with ignore_warnings(category=(DeprecationWarning, FutureWarning)):
-            estimator = clone(estimator_orig)
+            estimator = clone(estimator_orig, deepcopy=False)
             set_random_state(estimator, 1)
             # try to fit
             try:
@@ -970,7 +971,7 @@ def check_estimators_pickle(name, estimator_orig):
     # some estimators can't do features less than 0
     X -= X.min()
 
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
 
     # some estimators only take multioutputs
     y = multioutput_estimator_convert_y_2d(estimator, y)
@@ -999,7 +1000,7 @@ def check_estimators_partial_fit_n_features(name, estimator_orig):
     # check if number of features changes between calls to partial_fit.
     if not hasattr(estimator_orig, 'partial_fit'):
         return
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     X, y = make_blobs(n_samples=50, random_state=1)
     X -= X.min()
 
@@ -1022,7 +1023,7 @@ def check_estimators_partial_fit_n_features(name, estimator_orig):
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_clustering(name, clusterer_orig):
-    clusterer = clone(clusterer_orig)
+    clusterer = clone(clusterer_orig, deepcopy=False)
     X, y = make_blobs(n_samples=50, random_state=1)
     X, y = shuffle(X, y, random_state=7)
     X = StandardScaler().fit_transform(X)
@@ -1057,7 +1058,7 @@ def check_clustering(name, clusterer_orig):
 def check_clusterer_compute_labels_predict(name, clusterer_orig):
     """Check that predict is invariant of compute_labels"""
     X, y = make_blobs(n_samples=20, random_state=0)
-    clusterer = clone(clusterer_orig)
+    clusterer = clone(clusterer_orig, deepcopy=False)
 
     if hasattr(clusterer, "compute_labels"):
         # MiniBatchKMeans
@@ -1081,7 +1082,7 @@ def check_classifiers_one_label(name, classifier_orig):
     y = np.ones(10)
     # catch deprecation warnings
     with ignore_warnings(category=(DeprecationWarning, FutureWarning)):
-        classifier = clone(classifier_orig)
+        classifier = clone(classifier_orig, deepcopy=False)
         # try to fit
         try:
             classifier.fit(X_train, y)
@@ -1116,7 +1117,7 @@ def check_classifiers_train(name, classifier_orig):
         classes = np.unique(y)
         n_classes = len(classes)
         n_samples, n_features = X.shape
-        classifier = clone(classifier_orig)
+        classifier = clone(classifier_orig, deepcopy=False)
         if name in ['BernoulliNB', 'MultinomialNB', 'ComplementNB']:
             X -= X.min()
         set_random_state(classifier)
@@ -1195,7 +1196,7 @@ def check_estimators_fit_returns_self(name, estimator_orig):
     # some want non-negative input
     X -= X.min()
 
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     y = multioutput_estimator_convert_y_2d(estimator, y)
 
     set_random_state(estimator)
@@ -1215,7 +1216,7 @@ def check_estimators_unfitted(name, estimator_orig):
     # Common test for Regressors as well as Classifiers
     X, y = _boston_subset()
 
-    est = clone(estimator_orig)
+    est = clone(estimator_orig, deepcopy=False)
 
     msg = "fit"
     if hasattr(est, 'predict'):
@@ -1243,7 +1244,7 @@ def check_supervised_y_2d(name, estimator_orig):
     rnd = np.random.RandomState(0)
     X = rnd.uniform(size=(10, 3))
     y = np.arange(10) % 3
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     set_random_state(estimator)
     # fit
     estimator.fit(X, y)
@@ -1285,7 +1286,7 @@ def check_classifiers_classes(name, classifier_orig):
             y_ = y_names
 
         classes = np.unique(y_)
-        classifier = clone(classifier_orig)
+        classifier = clone(classifier_orig, deepcopy=False)
         if name in ['BernoulliNB', 'ComplementNB']:
             X = X > X.mean()
         set_random_state(classifier)
@@ -1310,8 +1311,8 @@ def check_regressors_int(name, regressor_orig):
     y = multioutput_estimator_convert_y_2d(regressor_orig, y)
     rnd = np.random.RandomState(0)
     # separate estimators to control random seeds
-    regressor_1 = clone(regressor_orig)
-    regressor_2 = clone(regressor_orig)
+    regressor_1 = clone(regressor_orig, deepcopy=False)
+    regressor_2 = clone(regressor_orig, deepcopy=False)
     set_random_state(regressor_1)
     set_random_state(regressor_2)
 
@@ -1334,7 +1335,7 @@ def check_regressors_train(name, regressor_orig):
     X, y = _boston_subset()
     y = StandardScaler().fit_transform(y.reshape(-1, 1))  # X is already scaled
     y = y.ravel()
-    regressor = clone(regressor_orig)
+    regressor = clone(regressor_orig, deepcopy=False)
     y = multioutput_estimator_convert_y_2d(regressor, y)
     rnd = np.random.RandomState(0)
     if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'):
@@ -1374,7 +1375,7 @@ def check_regressors_no_decision_function(name, regressor_orig):
     # checks whether regressors have decision_function or predict_proba
     rng = np.random.RandomState(0)
     X = rng.normal(size=(10, 4))
-    regressor = clone(regressor_orig)
+    regressor = clone(regressor_orig, deepcopy=False)
     y = multioutput_estimator_convert_y_2d(regressor, X[:, 0])
 
     if hasattr(regressor, "n_components"):
@@ -1415,7 +1416,7 @@ def check_class_weight_classifiers(name, classifier_orig):
         else:
             class_weight = {0: 1000, 1: 0.0001, 2: 0.0001}
 
-        classifier = clone(classifier_orig).set_params(
+        classifier = clone(classifier_orig, deepcopy=False).set_params(
             class_weight=class_weight)
         if hasattr(classifier, "n_iter"):
             classifier.set_params(n_iter=100)
@@ -1435,7 +1436,7 @@ def check_class_weight_classifiers(name, classifier_orig):
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_class_weight_balanced_classifiers(name, classifier_orig, X_train,
                                             y_train, X_test, y_test, weights):
-    classifier = clone(classifier_orig)
+    classifier = clone(classifier_orig, deepcopy=False)
     if hasattr(classifier, "n_iter"):
         classifier.set_params(n_iter=100)
     if hasattr(classifier, "max_iter"):
@@ -1491,7 +1492,7 @@ def check_estimators_overwrite_params(name, estimator_orig):
     X, y = make_blobs(random_state=0, n_samples=9)
     # some want non-negative input
     X -= X.min()
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     y = multioutput_estimator_convert_y_2d(estimator, y)
 
     set_random_state(estimator)
@@ -1544,7 +1545,7 @@ def check_sparsify_coefficients(name, estimator_orig):
     X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1],
                   [-1, -2], [2, 2], [-2, -2]])
     y = [1, 1, 1, 2, 2, 2, 3, 3, 3]
-    est = clone(estimator_orig)
+    est = clone(estimator_orig, deepcopy=False)
 
     est.fit(X, y)
     pred_orig = est.predict(X)
@@ -1584,8 +1585,8 @@ def check_estimators_data_not_an_array(name, estimator_orig, X, y):
                        "for cross decomposition module as estimators "
                        "are not deterministic.")
     # separate estimators to control random seeds
-    estimator_1 = clone(estimator_orig)
-    estimator_2 = clone(estimator_orig)
+    estimator_1 = clone(estimator_orig, deepcopy=False)
+    estimator_2 = clone(estimator_orig, deepcopy=False)
     set_random_state(estimator_1)
     set_random_state(estimator_2)
 
@@ -1611,7 +1612,7 @@ def check_parameters_default_constructible(name, Estimator):
         else:
             estimator = Estimator()
         # test cloning
-        clone(estimator)
+        clone(estimator, deepcopy=False)
         # test __repr__
         repr(estimator)
         # test that set_params returns self
@@ -1695,9 +1696,9 @@ def check_non_transformer_estimators_n_iter(name, estimator_orig):
 
     # LassoLars stops early for the default alpha=1.0 the iris dataset.
     if name == 'LassoLars':
-        estimator = clone(estimator_orig).set_params(alpha=0.)
+        estimator = clone(estimator_orig, deepcopy=False).set_params(alpha=0.)
     else:
-        estimator = clone(estimator_orig)
+        estimator = clone(estimator_orig, deepcopy=False)
     if hasattr(estimator, 'max_iter'):
         iris = load_iris()
         X, y_ = iris.data, iris.target
@@ -1719,7 +1720,7 @@ def check_non_transformer_estimators_n_iter(name, estimator_orig):
 def check_transformer_n_iter(name, estimator_orig):
     # Test that transformers with a parameter max_iter, return the
     # attribute of n_iter_ at least 1.
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
     if hasattr(estimator, "max_iter"):
         if name in CROSS_DECOMPOSITION:
             # Check using default data
@@ -1757,7 +1758,7 @@ def fit(self, X, y):
         def transform(self, X):
             return X
 
-    e = clone(estimator_orig)
+    e = clone(estimator_orig, deepcopy=False)
 
     shallow_params = e.get_params(deep=False)
     deep_params = e.get_params(deep=True)
@@ -1772,7 +1773,7 @@ def check_classifiers_regression_target(name, estimator_orig):
 
     boston = load_boston()
     X, y = boston.data, boston.target
-    e = clone(estimator_orig)
+    e = clone(estimator_orig, deepcopy=False)
     msg = 'Unknown label type: '
     assert_raises_regex(ValueError, msg, e.fit, X, y)
 
@@ -1786,7 +1787,7 @@ def check_decision_proba_consistency(name, estimator_orig):
     X, y = make_blobs(n_samples=100, random_state=0, n_features=4,
                       centers=centers, cluster_std=1.0, shuffle=True)
     X_test = np.random.randn(20, 2) + 4
-    estimator = clone(estimator_orig)
+    estimator = clone(estimator_orig, deepcopy=False)
 
     if (hasattr(estimator, "decision_function") and
             hasattr(estimator, "predict_proba")):