scikit-learn
diff --git a/‎doc/data_transforms.rst
Lines changed: 1 addition & 0 deletions b/‎doc/data_transforms.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/modules/freeze.rst
Lines changed: 49 additions & 0 deletions b/‎doc/modules/freeze.rst
Lines changed: 49 additions & 0 deletions
diff --git a/‎sklearn/calibration.py
Lines changed: 2 additions & 1 deletion b/‎sklearn/calibration.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎sklearn/ensemble/voting_classifier.py
Lines changed: 3 additions & 1 deletion b/‎sklearn/ensemble/voting_classifier.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎sklearn/feature_selection/from_model.py
Lines changed: 12 additions & 20 deletions b/‎sklearn/feature_selection/from_model.py
Lines changed: 12 additions & 20 deletions
diff --git a/‎sklearn/feature_selection/tests/test_from_model.py
Lines changed: 3 additions & 0 deletions b/‎sklearn/feature_selection/tests/test_from_model.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎sklearn/freeze.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/freeze.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/tests/test_freeze.py
Lines changed: 3 additions & 8 deletions b/‎sklearn/tests/test_freeze.py
Lines changed: 3 additions & 8 deletions
@@ -33,3 +33,4 @@ scikit-learn.
     modules/kernel_approximation
     modules/metrics
     modules/preprocessing_targets
+    modules/freeze
@@ -0,0 +1,49 @@
+.. _frozen:
+
+Frozen estimators and transfer learning
+=======================================
+.. currentmodule:: sklearn
+
+It can be useful to pre-fit an estimator before including it in a Pipeline,
+FeatureUnion or other meta-estimators.  Example applications include:
+
+* transfer learning: incorporating a transformer trained on a large unlabelled
+  dataset in a prediction pipeline where the data to be modelled is much smaller
+* feature selection on the basis of an already fitted predictive model
+
+To enable this, your estimator can be wrapped in :class:`freeze.FreezeWrap`.
+For example::
+
+    Without transfer learning
+
+    >>> from sklearn.datasets import load_...
+    >>> from sklearn.model_selection import cross_val_score
+    >>> cross_val_score(make_pipeline(TfidfVectorizer(), LogisticRegression()),
+    ...                 X, y)
+
+    With transfer learning:
+    >>> from sklearn.freeze import FreezeWrap
+    >>> tfidf = TfidfVectorizer().fit(large_X)
+    >>> cross_val_score(make_pipeline(FreezeWrap(tfidf), LogisticRegression()),
+    ...                 X, y)
+
+In particular, calling ``FrezeWrap(tfidf).fit(X, y)`` now does nothing,
+while calling ``FrezeWrap(tfidf).fit_transform(X, y)`` just returns the result of
+``tfidf.transform(X)``.
+
+.. note::
+    When an estimator is frozen, calling :func:`clone` on it will return
+    itself.::
+
+        >>> from base import clone
+        >>> frozen = FreezeWrap(tfidf)
+        >>> clone(frozen) is frozen
+        True
+
+    This allows the model to be left untouched in cross-validation and
+    meta-estimators which clear the estimator with ``clone``.
+
+.. warning:: Leakage:
+    Please take care to not introduce data leakage by this method: do not
+    incorporate your test set into the training of some frozen component,
+    unless it would be realistic to do so in the target application.
@@ -45,7 +45,8 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
     base_estimator : instance BaseEstimator
         The classifier whose output decision function needs to be calibrated
         to offer more accurate predict_proba outputs. If cv=prefit, the
-        classifier must have been fit already on data.
+        classifier must have been fit already on data, and it is recommended
+        that the classifier be frozen (see :ref:`frozen`) in this case.
 
     method : 'sigmoid' or 'isotonic'
         The method to use for calibration. Can be 'sigmoid' which
 
@@ -26,7 +26,7 @@
 def _parallel_fit_estimator(estimator, X, y, sample_weight):
     """Private function used to fit an estimator within a job."""
     if sample_weight is not None:
-        estimator.fit(X, y, sample_weight)
+        estimator.fit(X, y, sample_weight=sample_weight)
     else:
         estimator.fit(X, y)
     return estimator
@@ -47,6 +47,8 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin):
         ``self.estimators_``. An estimator can be set to `None` using
         ``set_params``.
 
+        Some of these estimators may be frozen (see :ref:`frozen`).
+
     voting : str, {'hard', 'soft'} (default='hard')
         If 'hard', uses predicted class labels for majority rule voting.
         Else if 'soft', predicts the class label based on the argmax of
@@ -1,6 +1,8 @@
 # Authors: Gilles Louppe, Mathieu Blondel, Maheshakya Wijewardena
 # License: BSD 3 clause
 
+import warnings
+
 import numpy as np
 
 from .base import SelectorMixin
@@ -86,9 +88,10 @@ class SelectFromModel(BaseEstimator, SelectorMixin, MetaEstimatorMixin):
     ----------
     estimator : object
         The base estimator from which the transformer is built.
-        This can be both a fitted (if ``prefit`` is set to True)
-        or a non-fitted estimator. The estimator must have either a
-        ``feature_importances_`` or ``coef_`` attribute after fitting.
+        The estimator must have either a ``feature_importances_``
+        or ``coef_`` attribute after fitting.
+
+        Use :class:`freeze.FreezeWrap` if your estimator is already fitted.
 
     threshold : string, float, optional default None
         The threshold value to use for feature selection. Features whose
@@ -100,14 +103,6 @@ class SelectFromModel(BaseEstimator, SelectorMixin, MetaEstimatorMixin):
         or implicitly (e.g, Lasso), the threshold used is 1e-5.
         Otherwise, "mean" is used by default.
 
-    prefit : bool, default False
-        Whether a prefit model is expected to be passed into the constructor
-        directly or not. If True, ``transform`` must be called directly
-        and SelectFromModel cannot be used with ``cross_val_score``,
-        ``GridSearchCV`` and similar utilities that clone the estimator.
-        Otherwise train the model using ``fit`` and then ``transform`` to do
-        feature selection.
-
     norm_order : non-zero int, inf, -inf, default 1
         Order of the norm used to filter the vectors of coefficients below
         ``threshold`` in the case where the ``coef_`` attribute of the
@@ -117,28 +112,22 @@ class SelectFromModel(BaseEstimator, SelectorMixin, MetaEstimatorMixin):
     ----------
     estimator_ : an estimator
         The base estimator from which the transformer is built.
-        This is stored only when a non-fitted estimator is passed to the
-        ``SelectFromModel``, i.e when prefit is False.
 
     threshold_ : float
         The threshold value used for feature selection.
     """
-    def __init__(self, estimator, threshold=None, prefit=False, norm_order=1):
+    def __init__(self, estimator, threshold=None, prefit=None, norm_order=1):
         self.estimator = estimator
         self.threshold = threshold
         self.prefit = prefit
         self.norm_order = norm_order
 
     def _get_support_mask(self):
-        # SelectFromModel can directly call on transform.
         if self.prefit:
             estimator = self.estimator
-        elif hasattr(self, 'estimator_'):
-            estimator = self.estimator_
         else:
-            raise ValueError(
-                'Either fit SelectFromModel before transform or set "prefit='
-                'True" and pass a fitted estimator to the constructor.')
+            from ..utils.validation import check_is_fitted
+            check_is_fitted(self, 'estimator_')
         scores = _get_feature_importances(estimator, self.norm_order)
         threshold = _calculate_threshold(estimator, scores, self.threshold)
         return scores >= threshold
@@ -162,6 +151,9 @@ def fit(self, X, y=None, **fit_params):
         self : object
             Returns self.
         """
+        if self.prefit is not None:
+            warnings.warn('Parameter prefit is deprecated and will be removed '
+                          'in version 0.22. Use FreezeWrap instead.')
         if self.prefit:
             raise NotFittedError(
                 "Since 'prefit=True', call transform directly")
 
@@ -183,3 +183,6 @@ def test_threshold_without_refitting():
     # Set a higher threshold to filter out more features.
     model.threshold = "1.0 * mean"
     assert_greater(X_transform.shape[1], model.transform(data).shape[1])
+
+
+# TODO: test deprecation of prefit and that FreezeWrap behaves similarly
@@ -15,7 +15,7 @@ class FreezeWrap(BaseEstimator):
     """
 
     def __init__(self, estimator):
-        self.estimator
+        self.estimator = estimator
 
     def fit(self, X, y=None, **kwargs):
         """Return self
 
@@ -19,22 +19,17 @@ def test_freeze():
     dumped = pickle.dumps(frozen_est)
     frozen_est2 = pickle.loads(dumped)
     assert_false(frozen_est is frozen_est2)
-    assert_array_equal(est.scores_, frozen_est2.scores_)
-
-    # scores should be unaffected by new fit
-    assert_true(frozen_est2.fit() is frozen_est2)
-    assert_array_equal(est.scores_, frozen_est2.scores_)
 
     # Test fit_transform where expected
     assert_true(hasattr(est, 'fit_transform'))
     assert_true(hasattr(frozen_est, 'fit_transform'))
     assert_false(est.fit_transform is frozen_est.fit_transform)
     frozen_est.fit_transform([np.arange(X.shape[1])], [0])
-    # scores should be unaffected by new fit_transform
-    assert_array_equal(est.scores_, frozen_est.scores_)
 
-    # Test fit_transform not set when not needed
+    # Test fit_transform not available when not on base
     est = DecisionTreeClassifier().fit(X, y)
     frozen_est = FreezeWrap(est)
     assert_false(hasattr(est, 'fit_transform'))
     assert_false(hasattr(frozen_est, 'fit_transform'))
+
+    # TODO: much more