diff --git a/doc/data_transforms.rst b/doc/data_transforms.rst
index e861762891ecc..744791396cc90 100644
--- a/doc/data_transforms.rst
+++ b/doc/data_transforms.rst
@@ -33,3 +33,4 @@ scikit-learn.
     modules/kernel_approximation
     modules/metrics
     modules/preprocessing_targets
+    modules/freeze
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 128f1c85f13e2..cf1fd48b2cb13 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -537,6 +537,24 @@ From text
    feature_selection.mutual_info_regression
 
 
+:mod:`sklearn.freeze`: Estimator Freezing
+=========================================
+
+.. automodule:: sklearn.freeze
+   :no-members:
+   :no-inherited-members:
+
+**User guide:** See the :ref:`freeze` section for further details.
+
+Classes
+-------
+.. currentmodule:: sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+
 .. _gaussian_process_ref:
 
 :mod:`sklearn.gaussian_process`: Gaussian Processes
diff --git a/doc/modules/freeze.rst b/doc/modules/freeze.rst
new file mode 100644
index 0000000000000..4683e4b98e7de
--- /dev/null
+++ b/doc/modules/freeze.rst
@@ -0,0 +1,50 @@
+.. _freeze:
+
+Frozen estimators and transfer learning
+=======================================
+
+.. currentmodule:: sklearn
+
+It can be useful to pre-fit an estimator before including it in a Pipeline,
+FeatureUnion or other meta-estimators.  Example applications include:
+
+* transfer learning: incorporating a transformer trained on a large unlabelled
+  dataset in a prediction pipeline where the data to be modelled is much smaller
+* feature selection on the basis of an already fitted predictive model
+
+To enable this, your estimator can be wrapped in :class:`freeze.FreezeWrap`.
+For example::
+
+    Without transfer learning
+
+    >>> from sklearn.datasets import load_...
+    >>> from sklearn.model_selection import cross_val_score
+    >>> cross_val_score(make_pipeline(TfidfVectorizer(), LogisticRegression()),
+    ...                 X, y)
+
+    With transfer learning:
+    >>> from sklearn.freeze import FreezeWrap
+    >>> tfidf = TfidfVectorizer().fit(large_X)
+    >>> cross_val_score(make_pipeline(FreezeWrap(tfidf), LogisticRegression()),
+    ...                 X, y)
+
+In particular, calling ``FrezeWrap(tfidf).fit(X, y)`` now does nothing,
+while calling ``FrezeWrap(tfidf).fit_transform(X, y)`` just returns the result of
+``tfidf.transform(X)``.
+
+.. note::
+    When an estimator is frozen, calling :func:`clone` on it will return
+    itself.::
+
+        >>> from base import clone
+        >>> frozen = FreezeWrap(tfidf)
+        >>> clone(frozen) is frozen
+        True
+
+    This allows the model to be left untouched in cross-validation and
+    meta-estimators which clear the estimator with ``clone``.
+
+.. warning:: Leakage:
+    Please take care to not introduce data leakage by this method: do not
+    incorporate your test set into the training of some frozen component,
+    unless it would be realistic to do so in the target application.
diff --git a/sklearn/__init__.py b/sklearn/__init__.py
index 8a25715498fcd..bdb77609da280 100644
--- a/sklearn/__init__.py
+++ b/sklearn/__init__.py
@@ -132,13 +132,13 @@ def config_context(**new_config):
     __all__ = ['calibration', 'cluster', 'covariance', 'cross_decomposition',
                'cross_validation', 'datasets', 'decomposition', 'dummy',
                'ensemble', 'exceptions', 'externals', 'feature_extraction',
-               'feature_selection', 'gaussian_process', 'grid_search',
-               'isotonic', 'kernel_approximation', 'kernel_ridge',
-               'learning_curve', 'linear_model', 'manifold', 'metrics',
-               'mixture', 'model_selection', 'multiclass', 'multioutput',
-               'naive_bayes', 'neighbors', 'neural_network', 'pipeline',
-               'preprocessing', 'random_projection', 'semi_supervised',
-               'svm', 'tree', 'discriminant_analysis',
+               'feature_selection', 'freeze', 'gaussian_process',
+               'grid_search', 'isotonic', 'kernel_approximation',
+               'kernel_ridge', 'learning_curve', 'linear_model', 'manifold',
+               'metrics', 'mixture', 'model_selection', 'multiclass',
+               'multioutput', 'naive_bayes', 'neighbors', 'neural_network',
+               'pipeline', 'preprocessing', 'random_projection',
+               'semi_supervised', 'svm', 'tree', 'discriminant_analysis',
                # Non-modules:
                'clone']
 
diff --git a/sklearn/base.py b/sklearn/base.py
index aa4f9f9ce17c1..f9559ce7ded66 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -3,7 +3,7 @@
 # Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
 # License: BSD 3 clause
 
-import copy
+from copy import deepcopy
 import warnings
 
 import numpy as np
@@ -45,11 +45,14 @@ def clone(estimator, safe=True):
     """
     estimator_type = type(estimator)
     # XXX: not handling dictionaries
+    from .freeze import FreezeWrap
+    if isinstance(estimator, FreezeWrap):
+        return estimator
     if estimator_type in (list, tuple, set, frozenset):
         return estimator_type([clone(e, safe=safe) for e in estimator])
     elif not hasattr(estimator, 'get_params'):
         if not safe:
-            return copy.deepcopy(estimator)
+            return deepcopy(estimator)
         else:
             raise TypeError("Cannot clone object '%s' (type %s): "
                             "it does not seem to be a scikit-learn estimator "
diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 0d2f76cd12239..d66a3e60f453e 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -45,7 +45,8 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
     base_estimator : instance BaseEstimator
         The classifier whose output decision function needs to be calibrated
         to offer more accurate predict_proba outputs. If cv=prefit, the
-        classifier must have been fit already on data.
+        classifier must have been fit already on data, and it is recommended
+        that the classifier be frozen (see :ref:`freeze`) in this case.
 
     method : 'sigmoid' or 'isotonic'
         The method to use for calibration. Can be 'sigmoid' which
diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py
index 2502643453d79..f20af8be297a0 100644
--- a/sklearn/feature_selection/from_model.py
+++ b/sklearn/feature_selection/from_model.py
@@ -1,6 +1,8 @@
 # Authors: Gilles Louppe, Mathieu Blondel, Maheshakya Wijewardena
 # License: BSD 3 clause
 
+import warnings
+
 import numpy as np
 
 from .base import SelectorMixin
@@ -9,6 +11,7 @@
 
 from ..exceptions import NotFittedError
 from ..utils.metaestimators import if_delegate_has_method
+from ..utils.validation import check_is_fitted
 
 
 def _get_feature_importances(estimator, norm_order=1):
@@ -86,9 +89,10 @@ class SelectFromModel(BaseEstimator, SelectorMixin, MetaEstimatorMixin):
     ----------
     estimator : object
         The base estimator from which the transformer is built.
-        This can be both a fitted (if ``prefit`` is set to True)
-        or a non-fitted estimator. The estimator must have either a
-        ``feature_importances_`` or ``coef_`` attribute after fitting.
+        The estimator must have either a ``feature_importances_``
+        or ``coef_`` attribute after fitting.
+
+        Use :class:`freeze.FreezeWrap` if your estimator is already fitted.
 
     threshold : string, float, optional default None
         The threshold value to use for feature selection. Features whose
@@ -100,14 +104,6 @@ class SelectFromModel(BaseEstimator, SelectorMixin, MetaEstimatorMixin):
         or implicitly (e.g, Lasso), the threshold used is 1e-5.
         Otherwise, "mean" is used by default.
 
-    prefit : bool, default False
-        Whether a prefit model is expected to be passed into the constructor
-        directly or not. If True, ``transform`` must be called directly
-        and SelectFromModel cannot be used with ``cross_val_score``,
-        ``GridSearchCV`` and similar utilities that clone the estimator.
-        Otherwise train the model using ``fit`` and then ``transform`` to do
-        feature selection.
-
     norm_order : non-zero int, inf, -inf, default 1
         Order of the norm used to filter the vectors of coefficients below
         ``threshold`` in the case where the ``coef_`` attribute of the
@@ -117,28 +113,22 @@ class SelectFromModel(BaseEstimator, SelectorMixin, MetaEstimatorMixin):
     ----------
     estimator_ : an estimator
         The base estimator from which the transformer is built.
-        This is stored only when a non-fitted estimator is passed to the
-        ``SelectFromModel``, i.e when prefit is False.
 
     threshold_ : float
         The threshold value used for feature selection.
     """
-    def __init__(self, estimator, threshold=None, prefit=False, norm_order=1):
+    def __init__(self, estimator, threshold=None, prefit=None, norm_order=1):
         self.estimator = estimator
         self.threshold = threshold
         self.prefit = prefit
         self.norm_order = norm_order
 
     def _get_support_mask(self):
-        # SelectFromModel can directly call on transform.
         if self.prefit:
             estimator = self.estimator
-        elif hasattr(self, 'estimator_'):
-            estimator = self.estimator_
         else:
-            raise ValueError(
-                'Either fit SelectFromModel before transform or set "prefit='
-                'True" and pass a fitted estimator to the constructor.')
+            check_is_fitted(self, 'estimator_')
+            estimator = self.estimator_
         scores = _get_feature_importances(estimator, self.norm_order)
         threshold = _calculate_threshold(estimator, scores, self.threshold)
         return scores >= threshold
@@ -162,6 +152,9 @@ def fit(self, X, y=None, **fit_params):
         self : object
             Returns self.
         """
+        if self.prefit is not None:
+            warnings.warn('Parameter prefit is deprecated and will be removed '
+                          'in version 0.22. Use FreezeWrap instead.')
         if self.prefit:
             raise NotFittedError(
                 "Since 'prefit=True', call transform directly")
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
index ae4d1ba4331a6..2392117fe16c4 100644
--- a/sklearn/feature_selection/tests/test_from_model.py
+++ b/sklearn/feature_selection/tests/test_from_model.py
@@ -183,3 +183,6 @@ def test_threshold_without_refitting():
     # Set a higher threshold to filter out more features.
     model.threshold = "1.0 * mean"
     assert_greater(X_transform.shape[1], model.transform(data).shape[1])
+
+
+# TODO: test deprecation of prefit and that FreezeWrap behaves similarly
diff --git a/sklearn/freeze.py b/sklearn/freeze.py
new file mode 100644
index 0000000000000..999384e823b05
--- /dev/null
+++ b/sklearn/freeze.py
@@ -0,0 +1,124 @@
+"""
+Utility for making estimators frozen / un-trainable.
+"""
+# Author: Joel Nothman
+# License: BSD
+
+from .base import BaseEstimator, MetaEstimatorMixin
+from .utils.metaestimators import if_delegate_has_method
+
+__all__ = ['FreezeWrap']
+
+
+class FreezeWrap(BaseEstimator, MetaEstimatorMixin):
+    """Disable fitting and cloning for the wrapped estimator
+
+    Wrapping an estimator in this freezes it, such that:
+
+    * ``clone(FreezeWrap(estimator))`` will return the same model without
+      clearing it
+    * ``FreezeWrap(estimator).fit(...)`` will not call ``estimator.fit()``
+    * ``FreezeWrap(estimator).fit_transform(X, y)`` will just return
+      ``estimator.transform(X)``
+
+    Read more in the :ref:`User Guide <freeze>`.
+
+    Parameters
+    ----------
+    estimator : estimator
+
+    Notes
+    -----
+    Any keyword arguments passed to ``fit_transform``, will *not*
+    be passed on to ``transform`` (and similar for ``fit_predict``).
+    """
+
+    def __init__(self, estimator):
+        self.estimator = estimator
+
+    def fit(self, X, y=None, **kwargs):
+        """Return self
+
+        Parameters
+        ----------
+        X
+            ignored
+        y : optional
+            ignored
+        kwargs : optional
+            ignored
+        """
+        return self
+
+    @if_delegate_has_method(delegate='estimator')
+    def fit_transform(self, X, y=None, **kwargs):
+        """Execute transform on estimator
+
+        Parameters
+        ----------
+        X
+            data to transform
+        y : optional
+            ignored
+        kwargs : ignored
+            ignored
+        """
+        return self.estimator.transform(X)
+
+    @if_delegate_has_method(delegate='estimator')
+    def fit_predict(self, X, y=None, **kwargs):
+        """Execute predict on estimator
+
+        Parameters
+        ----------
+        X
+            data to predict
+        y : optional
+            ignored
+        kwargs : ignored
+            ignored
+        """
+        return self.estimator.predict(X)
+
+    @if_delegate_has_method(delegate='estimator')
+    def transform(self, *args, **kwargs):
+        """Execute estimator's equivalent method
+        """
+        return self.estimator.transform(*args, **kwargs)
+
+    @if_delegate_has_method(delegate='estimator')
+    def decision_function(self, *args, **kwargs):
+        """Execute estimator's equivalent method
+        """
+        return self.estimator.decision_function(*args, **kwargs)
+
+    @if_delegate_has_method(delegate='estimator')
+    def predict(self, *args, **kwargs):
+        """Execute estimator's equivalent method
+        """
+        return self.estimator.predict(*args, **kwargs)
+
+    @if_delegate_has_method(delegate='estimator')
+    def predict_log_proba(self, *args, **kwargs):
+        """Execute estimator's equivalent method
+        """
+        return self.estimator.predict_log_proba(*args, **kwargs)
+
+    @if_delegate_has_method(delegate='estimator')
+    def predict_proba(self, *args, **kwargs):
+        """Execute estimator's equivalent method
+        """
+        return self.estimator.predict_proba(*args, **kwargs)
+
+    @property
+    def _estimator_type(self):
+        return self.estimator._estimator_type
+
+    @property
+    def classes_(self):
+        return self.estimator.classes_
+
+    @property
+    def _pairwise(self):
+        # check if first estimator expects pairwise input
+        return getattr(self.estimator, '_pairwise', False)
diff --git a/sklearn/tests/test_freeze.py b/sklearn/tests/test_freeze.py
new file mode 100644
index 0000000000000..a15329ffb7f65
--- /dev/null
+++ b/sklearn/tests/test_freeze.py
@@ -0,0 +1,35 @@
+import pickle
+import numpy as np
+from sklearn import datasets
+from sklearn.freeze import FreezeWrap
+from sklearn.feature_selection import SelectKBest
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils.testing import assert_array_equal
+from sklearn.utils.testing import assert_true
+from sklearn.utils.testing import assert_false
+
+
+def test_freeze():
+    X, y = datasets.load_iris(return_X_y=True)
+
+    est = SelectKBest(k=1).fit(X, y)
+
+    frozen_est = FreezeWrap(est)
+
+    dumped = pickle.dumps(frozen_est)
+    frozen_est2 = pickle.loads(dumped)
+    assert_false(frozen_est is frozen_est2)
+
+    # Test fit_transform where expected
+    assert_true(hasattr(est, 'fit_transform'))
+    assert_true(hasattr(frozen_est, 'fit_transform'))
+    assert_false(est.fit_transform is frozen_est.fit_transform)
+    frozen_est.fit_transform([np.arange(X.shape[1])], [0])
+
+    # Test fit_transform not available when not on base
+    est = DecisionTreeClassifier().fit(X, y)
+    frozen_est = FreezeWrap(est)
+    assert_false(hasattr(est, 'fit_transform'))
+    assert_false(hasattr(frozen_est, 'fit_transform'))
+
+    # TODO: much more
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 4a33d64d69bee..276acea654efa 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -508,7 +508,8 @@ def uninstall_mldata_mock():
 META_ESTIMATORS = ["OneVsOneClassifier", "MultiOutputEstimator",
                    "MultiOutputRegressor", "MultiOutputClassifier",
                    "OutputCodeClassifier", "OneVsRestClassifier",
-                   "RFE", "RFECV", "BaseEnsemble", "ClassifierChain"]
+                   "RFE", "RFECV", "BaseEnsemble", "ClassifierChain",
+                   "FreezeWrap"]
 # estimators that there is no way to default-construct sensibly
 OTHER = ["Pipeline", "FeatureUnion", "GridSearchCV", "RandomizedSearchCV",
          "SelectFromModel"]