scikit-learn · thismlguy · Dec 19, 2016 · Dec 19, 2016 · Dec 19, 2016 · Dec 19, 2016
diff --git a/.travis.yml b/.travis.yml
@@ -36,14 +36,14 @@ matrix:
     # Python 3.4 build
     - env: DISTRIB="conda" PYTHON_VERSION="3.4" INSTALL_MKL="false"
            NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.16.1" CYTHON_VERSION="0.25.2"
-           COVERAGE=true
+           COVERAGE=true MATPLOTLIB_VERSION="2.0.2"
       if: type != cron
     # This environment tests the newest supported Anaconda release (5.0.0)
     # It also runs tests requiring Pandas and PyAMG
     - env: DISTRIB="conda" PYTHON_VERSION="3.6.2" INSTALL_MKL="true"
            NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" PANDAS_VERSION="0.20.3"
            CYTHON_VERSION="0.26.1" PYAMG_VERSION="3.3.2" COVERAGE=true
-           CHECK_PYTEST_SOFT_DEPENDENCY="true"
+           CHECK_PYTEST_SOFT_DEPENDENCY="true" MATPLOTLIB_VERSION="2.0.2"
       if: type != cron
     # flake8 linting on diff wrt common ancestor with upstream/master
     - env: RUN_FLAKE8="true" SKIP_TESTS="true"

diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
@@ -43,8 +43,19 @@ if [[ "$DISTRIB" == "conda" ]]; then
 
     if [[ "$INSTALL_MKL" == "true" ]]; then
         TO_INSTALL="$TO_INSTALL mkl"
+        conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \
+            numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
+            mkl cython=$CYTHON_VERSION \
+            ${PANDAS_VERSION+pandas=$PANDAS_VERSION} \
+            ${MATPLOTLIB_VERSION+matplotlib=$MATPLOTLIB_VERSION}
+
     else
         TO_INSTALL="$TO_INSTALL nomkl"
+        conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \
+            numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
+            nomkl cython=$CYTHON_VERSION \
+            ${PANDAS_VERSION+pandas=$PANDAS_VERSION}
+
     fi
 
     if [[ -n "$PANDAS_VERSION" ]]; then

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -1395,6 +1395,28 @@ Low-level methods
    utils.validation.column_or_1d
    utils.validation.has_fit_parameter
 
+:mod:`sklearn.plot`: Plotting functions
+=======================================
+
+.. automodule:: sklearn.plot
+   :no-members:
+   :no-inherited-members:
+
+This module is experimental. Use at your own risk.
+Use of this module requires the matplotlib library,
+version 1.5 or later.
+
+.. currentmodule:: sklearn.plot
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+   plot_heatmap
+   plot_confusion_matrix
+   plot_gridsearch_results
+
+
 Recently deprecated
 ===================
 

diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py
@@ -24,15 +24,12 @@
 
 """
 
-print(__doc__)
-
-import itertools
 import numpy as np
 import matplotlib.pyplot as plt
 
 from sklearn import svm, datasets
 from sklearn.model_selection import train_test_split
-from sklearn.metrics import confusion_matrix
+from sklearn.plot import plot_confusion_matrix
 
 # import some data to play with
 iris = datasets.load_iris()
@@ -48,53 +45,20 @@
 classifier = svm.SVC(kernel='linear', C=0.01)
 y_pred = classifier.fit(X_train, y_train).predict(X_test)
 
-
-def plot_confusion_matrix(cm, classes,
-                          normalize=False,
-                          title='Confusion matrix',
-                          cmap=plt.cm.Blues):
-    """
-    This function prints and plots the confusion matrix.
-    Normalization can be applied by setting `normalize=True`.
-    """
-    if normalize:
-        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
-        print("Normalized confusion matrix")
-    else:
-        print('Confusion matrix, without normalization')
-
-    print(cm)
-
-    plt.imshow(cm, interpolation='nearest', cmap=cmap)
-    plt.title(title)
-    plt.colorbar()
-    tick_marks = np.arange(len(classes))
-    plt.xticks(tick_marks, classes, rotation=45)
-    plt.yticks(tick_marks, classes)
-
-    fmt = '.2f' if normalize else 'd'
-    thresh = cm.max() / 2.
-    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
-        plt.text(j, i, format(cm[i, j], fmt),
-                 horizontalalignment="center",
-                 color="white" if cm[i, j] > thresh else "black")
-
-    plt.tight_layout()
-    plt.ylabel('True label')
-    plt.xlabel('Predicted label')
-
-# Compute confusion matrix
-cnf_matrix = confusion_matrix(y_test, y_pred)
 np.set_printoptions(precision=2)
 
 # Plot non-normalized confusion matrix
 plt.figure()
-plot_confusion_matrix(cnf_matrix, classes=class_names,
-                      title='Confusion matrix, without normalization')
+plot_confusion_matrix(y_test, y_pred, classes=class_names,
+                      title='Confusion matrix, without normalization',
+                      cmap=plt.cm.Blues)
+plt.tight_layout()
 
 # Plot normalized confusion matrix
 plt.figure()
-plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
-                      title='Normalized confusion matrix')
+plot_confusion_matrix(y_test, y_pred, classes=class_names, normalize=True,
+                      title='Confusion matrix, with normalization',
+                      cmap=plt.cm.Blues)
+plt.tight_layout()
 
 plt.show()
diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py
@@ -49,3 +49,28 @@
                  color="navy", lw=lw)
 plt.legend(loc="best")
 plt.show()
+
+#####################################################################
+# The same plot can also be generated using a combination of GridSearchCV and
+# plotting module of scikit-learn.
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn.datasets import load_digits
+from sklearn.svm import SVC
+from sklearn.model_selection import GridSearchCV
+from sklearn.plot import plot_gridsearch_results
+
+digits = load_digits()
+X, y = digits.data, digits.target
+
+param_grid = {'gamma': np.logspace(-6, -1, 5)}
+gs = GridSearchCV(SVC(),
+                  param_grid=param_grid,
+                  cv=10, scoring="accuracy")
+
+gs.fit(X, y)
+plot_gridsearch_results(gs.cv_results_, fmt='{:.1e}')
+plt.tight_layout()
+plt.show()
diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py
@@ -51,11 +51,11 @@
 
 Finally one can also observe that for some intermediate values of ``gamma`` we
 get equally performing models when ``C`` becomes very large: it is not
-necessary to regularize by limiting the number of support vectors. The radius of
-the RBF kernel alone acts as a good structural regularizer. In practice though
-it might still be interesting to limit the number of support vectors with a
-lower value of ``C`` so as to favor models that use less memory and that are
-faster to predict.
+necessary to regularize by limiting the number of support vectors. The radius
+of the RBF kernel alone acts as a good structural regularizer. In practice
+though it might still be interesting to limit the number of support vectors
+with a lower value of ``C`` so as to favor models that use less memory and that
+are faster to predict.
 
 We should also note that small differences in scores results from the random
 splits of the cross-validation procedure. Those spurious variations can be
@@ -65,7 +65,6 @@
 map.
 
 '''
-print(__doc__)
 
 import numpy as np
 import matplotlib.pyplot as plt
@@ -76,6 +75,9 @@
 from sklearn.datasets import load_iris
 from sklearn.model_selection import StratifiedShuffleSplit
 from sklearn.model_selection import GridSearchCV
+from sklearn.plot import plot_gridsearch_results
+
+print(__doc__)
 
 
 # Utility function to move the midpoint of a colormap to be around
@@ -172,9 +174,6 @@ def __call__(self, value, clip=None):
     plt.yticks(())
     plt.axis('tight')
 
-scores = grid.cv_results_['mean_test_score'].reshape(len(C_range),
-                                                     len(gamma_range))
-
 # Draw heatmap of the validation accuracy as a function of gamma and C
 #
 # The score are encoded as colors with the hot colormap which varies from dark
@@ -184,14 +183,10 @@ def __call__(self, value, clip=None):
 # interesting range while not brutally collapsing all the low score values to
 # the same color.
 
-plt.figure(figsize=(8, 6))
+plt.figure(figsize=(10, 10))
 plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95)
-plt.imshow(scores, interpolation='nearest', cmap=plt.cm.hot,
-           norm=MidpointNormalize(vmin=0.2, midpoint=0.92))
-plt.xlabel('gamma')
-plt.ylabel('C')
-plt.colorbar()
-plt.xticks(np.arange(len(gamma_range)), gamma_range, rotation=45)
-plt.yticks(np.arange(len(C_range)), C_range)
-plt.title('Validation accuracy')
+plot_gridsearch_results(grid.cv_results_, title="Validation accuracy",
+                        cmap=plt.cm.hot,
+                        norm=MidpointNormalize(vmin=0.2, midpoint=0.92))
+plt.tight_layout()
 plt.show()
diff --git a/sklearn/__init__.py b/sklearn/__init__.py
@@ -143,7 +143,7 @@ def config_context(**new_config):
                'mixture', 'model_selection', 'multiclass', 'multioutput',
                'naive_bayes', 'neighbors', 'neural_network', 'pipeline',
                'preprocessing', 'random_projection', 'semi_supervised',
-               'svm', 'tree', 'discriminant_analysis',
+               'svm', 'tree', 'discriminant_analysis', 'plot',
                # Non-modules:
                'clone']
 

diff --git a/sklearn/plot/__init__.py b/sklearn/plot/__init__.py
@@ -0,0 +1,5 @@
+from ._heatmap import plot_heatmap
+from ._confusion_matrix import plot_confusion_matrix
+from ._gridsearch_results import plot_gridsearch_results
+
+__all__ = ["plot_heatmap", "plot_confusion_matrix", "plot_gridsearch_results"]
diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py
@@ -0,0 +1,102 @@
+import numpy as np
+from sklearn.metrics import confusion_matrix
+from sklearn.plot import plot_heatmap
+from sklearn.utils.multiclass import unique_labels
+
+
+def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None,
+                          normalize=False,
+                          xlabel="Predicted Label", ylabel="True Label",
+                          title='Confusion matrix', cmap=None, vmin=None,
+                          vmax=None, ax=None, fmt="{:.2f}",
+                          xtickrotation=45, norm=None):
+    """Plot confusion matrix as a heatmap.
+
+    A confusion matrix is computed using `y_true`, `y_pred` and
+    `sample_weights` arguments. Normalization can be applied by setting
+    `normalize=True`.
+
+    Parameters
+    ----------
+    y_true : array, shape = [n_samples]
+        Ground truth (correct) target values.
+
+    y_pred : array, shape = [n_samples]
+        Estimated targets as returned by a classifier.
+
+    classes : list of strings, optional (default=None)
+        The list of names of classes represented in the two-dimensional input
+        array. If not passed in function call, the classes will be infered
+        from y_true and y_pred
+
+    sample_weight : array-like of shape = [n_samples], optional (default=None)
+        Sample weights used to calculate the confusion matrix
+
+    normalize : boolean, optional (default=False)
+        If True, the confusion matrix will be normalized by row.
+
+    xlabel : string, optional (default="Predicted Label")
+        Label for the x-axis.
+
+    ylabel : string, optional (default="True Label")
+        Label for the y-axis.
+
+    title : string, optional (default="Confusion matrix")
+        Title for the heatmap.
+
+    cmap : string or colormap, optional (default=None)
+        Matpotlib colormap to use. If None, plt.cm.hot will be used.
+
+    vmin : int, float or None, optional (default=None)
+        Minimum clipping value. This argument will be passed on to the
+        pcolormesh function from matplotlib u
10000
sed to generate the heatmap.
+
+    vmax : int, float or None, optional (default=None)
+        Maximum clipping value. This argument will be passed on to the
+        pcolormesh function from matplotlib used to generate the heatmap.
+
+    ax : axes object or None, optional (default=None)
+        Matplotlib axes object to plot into. If None, the current axes are
+        used.
+
+    fmt : string, optional (default="{:.2f}")
+        Format string to convert value to text. This will be ignored if
+        normalize argument is False.
+
+    xtickrotation : float, optional (default=45)
+        Rotation of the xticklabels.
+
+    norm : matplotlib normalizer, optional (default=None)
+        Normalizer passed to pcolormesh function from matplotlib used to
+        generate the heatmap.
+    """
+    import matplotlib.pyplot as plt
+
+    unique_y = unique_labels(y_true, y_pred)
+
+    if classes is None:
+        classes = unique_y
+    else:
+        if len(classes) != len(unique_y):
+            raise ValueError("y_true and y_pred contain %d unique classes, "
+                             "which is not the same as %d "
+                             "classes found in `classes=%s` parameter" %
+                             (len(unique_y), len(classes), classes))
+
+    values = confusion_matrix(y_true, y_pred, sample_weight=sample_weight)
+
+    if normalize:
+        values = values.astype('float') / values.sum(axis=1)[:, np.newaxis]
+
+    fmt = fmt if normalize else '{:d}'
+
+    if ax is None:
+        fig = plt.figure()
+        ax = fig.add_subplot(111)
+
+    img = plot_heatmap(values, xticklabels=classes, yticklabels=classes,
+                       cmap=cmap, xlabel=xlabel, ylabel=ylabel, title=title,
+                       vmin=vmin, vmax=vmax, ax=ax, fmt=fmt,
+                       xtickrotation=xtickrotation, norm=norm)
+
+    return img