scikit-learn
diff --git a/‎examples/plot_compare_reduction.py
100644100755
Lines changed: 50 additions & 10 deletions b/‎examples/plot_compare_reduction.py
100644100755
Lines changed: 50 additions & 10 deletions
diff --git a/‎examples/plot_compare_reduction_cached.py
Lines changed: 0 additions & 77 deletions b/‎examples/plot_compare_reduction_cached.py
Lines changed: 0 additions & 77 deletions
@@ -1,32 +1,46 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
-=================================================================
-Selecting dimensionality reduction with Pipeline and GridSearchCV
-=================================================================
+======================================================================
+Selecting dimensionality reduction with Pipeline, CachedPipeline, and\
+GridSearchCV
+======================================================================
 
 This example constructs a pipeline that does dimensionality
 reduction followed by prediction with a support vector
-classifier. It demonstrates the use of GridSearchCV and
-Pipeline to optimize over different classes of estimators in a
-single CV run -- unsupervised PCA and NMF dimensionality
+classifier. It demonstrates the use of ``GridSearchCV`` and
+``Pipeline`` to optimize over different classes of estimators in a
+single CV run -- unsupervised ``PCA`` and ``NMF`` dimensionality
 reductions are compared to univariate feature selection during
 the grid search.
+
+Additionally, ``Pipeline`` can be exchanged with ``CachedPipeline``
+to memoize the transformers within the pipeline, avoiding to fit
+again the same transformers over and over.
+
+Note that the use of ``CachedPipeline`` becomes interesting when the
+fitting of a transformer is costly.
 """
 # Authors: Robert McGibbon, Joel Nothman
 
+###############################################################################
+# Illustration of ``Pipeline`` and ``GridSearchCV``
+###############################################################################
+# This section illustrates the use of a ``Pipeline`` with
+# ``GridSearchCV``
+
 from __future__ import print_function, division
 
+from tempfile import mkdtemp
 import numpy as np
 import matplotlib.pyplot as plt
 from sklearn.datasets import load_digits
 from sklearn.model_selection import GridSearchCV
-from sklearn.pipeline import Pipeline
+from sklearn.pipeline import Pipeline, CachedPipeline
 from sklearn.svm import LinearSVC
 from sklearn.decomposition import PCA, NMF
 from sklearn.feature_selection import SelectKBest, chi2
-
-print(__doc__)
+from sklearn.externals.joblib import Memory
 
 pipe = Pipeline([
     ('reduce_dim', PCA()),
@@ -73,3 +87,29 @@
 plt.ylim((0, 1))
 plt.legend(loc='upper left')
 plt.show()
+
+###############################################################################
+# Illustration of ``CachedPipeline`` instead of ``Pipeline``
+###############################################################################
+# It is sometimes interesting to store the state of a specific transformer
+# since it could be used again. Using a pipeline in ``GridSearchCV`` triggers
+# such situations. Therefore, we replace ``Pipeline`` with ``CachedPipeline``
+# to memoize the transfomers within the pipeline.
+#
+# .. warning::
+#     Note that this example is, however, only an illustration since for this
+#     specific case fitting PCA is not necessarily slower than loading the
+#     cache. Hence, use ``CachedPipeline`` when the fitting of a transformer
+#     is costly.
+
+# Create a temporary folder to store the transformers of the pipeline
+cachedir = mkdtemp()
+memory = Memory(cachedir=cachedir, verbose=10)
+cached_pipe = CachedPipeline([('reduce_dim', PCA()),
+                              ('classify', LinearSVC())],
+                             memory=memory)
+
+# This time, a cached pipeline will be used within the grid search
+grid = GridSearchCV(cached_pipe, cv=3, n_jobs=2, param_grid=param_grid)
+digits = load_digits()
+grid.fit(digits.data, digits.target)