From fe41a8bb0204c901cff776386de2ae1dc17a9d2e Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 19 Dec 2016 11:41:27 -0500 Subject: [PATCH 01/97] add plotting module and "plot_heatmap" function --- sklearn/plot/__init__.py | 3 ++ sklearn/plot/_heatmap.py | 73 ++++++++++++++++++++++++++++++ sklearn/plot/tests/__init__.py | 0 sklearn/plot/tests/test_heatmap.py | 24 ++++++++++ sklearn/setup.py | 2 + 5 files changed, 102 insertions(+) create mode 100644 sklearn/plot/__init__.py create mode 100644 sklearn/plot/_heatmap.py create mode 100644 sklearn/plot/tests/__init__.py create mode 100644 sklearn/plot/tests/test_heatmap.py diff --git a/sklearn/plot/__init__.py b/sklearn/plot/__init__.py new file mode 100644 index 0000000000000..7cff0ad0ee943 --- /dev/null +++ b/sklearn/plot/__init__.py @@ -0,0 +1,3 @@ +from ._heatmap import plot_heatmap + +__all__ = ["plot_heatmap"] diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py new file mode 100644 index 0000000000000..92a38d3bc8104 --- /dev/null +++ b/sklearn/plot/_heatmap.py @@ -0,0 +1,73 @@ +import numpy as np + + +def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, + yticklabels=None, cmap=None, vmin=None, vmax=None, ax=None, + fmt="{:.2f}"): + """Plot a matrix as heatmap with explicit numbers. + + Parameters + ---------- + values : ndarray + Two-dimensional array to visualize. + + xlabel : string, default="" + Label for the x-axis. + + ylabel : string, default="" + Label for the y-axis. + + xticklabels : list of string or None, default=None + Tick labels for the x-axis. + + yticklabels : list of string or None, default=None + Tick labels for the y-axis + + cmap : string or colormap + Matpotlib colormap to use. + + vmin : int, float or None + Minimum clipping value. + + vmax : int, float or None + Maximum clipping value. + + ax : axes object or None + Matplotlib axes object to plot into. If None, the current axes are + used. + + fmt : string, default="{:.2f}" + Format string to convert value to text. + """ + import matplotlib.pyplot as plt + if ax is None: + ax = plt.gca() + img = ax.pcolor(values, cmap=cmap, vmin=None, vmax=None) + # this will allow us to access the pixel values: + img.update_scalarmappable() + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + if xticklabels is None: + xticklabels = [""] * values.shape[1] + if yticklabels is None: + yticklabels = [""] * values.shape[0] + + ax.xaxis.set_ticks_position('bottom') + + # +.5 makes the ticks centered on the pixels + ax.set_xticks(np.arange(values.shape[1]) + .5) + ax.set_xticklabels(xticklabels, ha="center") + ax.set_yticks(np.arange(values.shape[0]) + .5) + ax.set_yticklabels(yticklabels, va="center") + ax.set_aspect(1) + + for p, color, value in zip(img.get_paths(), img.get_facecolors(), + img.get_array()): + x, y = p.vertices[:-2, :].mean(0) + if np.mean(color[:3]) > 0.5: + # pixel bright: use black for number + c = 'k' + else: + c = 'w' + ax.text(x, y, fmt.format(value), color=c, ha="center", va="center") + return ax diff --git a/sklearn/plot/tests/__init__.py b/sklearn/plot/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py new file mode 100644 index 0000000000000..fb27bdf0ae9be --- /dev/null +++ b/sklearn/plot/tests/test_heatmap.py @@ -0,0 +1,24 @@ +from sklearn.plot import plot_heatmap +from sklearn.utils.testing import SkipTest +import numpy as np + + +def test_heatmap(): + try: + import matplotlib + except ImportError: + raise SkipTest("Not testing plot_heatmap, matplotlib not installed.") + + import matplotlib.pyplot as plt + + with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): + plt.figure() + rng = np.random.RandomState(0) + X = rng.normal(size=(10, 5)) + # use mixture of default values and keyword args + plot_heatmap(X, ylabel="y-axis", + xticklabels=["a", "b", "c", "d", "efgh"], + cmap="Paired", ax=plt.gca()) + + plt.draw() + plt.close() diff --git a/sklearn/setup.py b/sklearn/setup.py index 8adbbd9d49132..5bae68530cedc 100644 --- a/sklearn/setup.py +++ b/sklearn/setup.py @@ -36,6 +36,8 @@ def configuration(parent_package='', top_path=None): config.add_subpackage('model_selection/tests') config.add_subpackage('neural_network') config.add_subpackage('neural_network/tests') + config.add_subpackage('plot') + config.add_subpackage('plot/tests') config.add_subpackage('preprocessing') config.add_subpackage('preprocessing/tests') config.add_subpackage('semi_supervised') From 62da4fb5a465a3be276e2e2b92b75291163b5d9a Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 19 Dec 2016 11:43:56 -0500 Subject: [PATCH 02/97] add plotting module to the API docs --- doc/modules/classes.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index 3aee8f258b9d1..a23a3de01ee42 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -1357,6 +1357,25 @@ Low-level methods utils.shuffle +:mod:`sklearn.plot`: Plotting functions +======================================= + +.. automodule:: sklearn.plot + :no-members: + :no-inherited-members: + +This module is experimental. Use at your own risk. +Use of this module requires the matplotlib library. + +.. currentmodule:: sklearn.plot + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + plot_heatmap + + Recently deprecated =================== From 23d86710ecd7ed1846e14c684352291958160d37 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 19 Dec 2016 11:50:21 -0500 Subject: [PATCH 03/97] simplify plot_confusion_matrix example --- .../model_selection/plot_confusion_matrix.py | 28 ++++--------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py index 4b7c360988071..5625fa6d4b59f 100644 --- a/examples/model_selection/plot_confusion_matrix.py +++ b/examples/model_selection/plot_confusion_matrix.py @@ -24,15 +24,13 @@ """ -print(__doc__) - -import itertools import numpy as np import matplotlib.pyplot as plt from sklearn import svm, datasets from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix +from sklearn.plot import plot_heatmap # import some data to play with iris = datasets.load_iris() @@ -59,29 +57,15 @@ def plot_confusion_matrix(cm, classes, """ if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] - print("Normalized confusion matrix") - else: - print('Confusion matrix, without normalization') + print(title) print(cm) - plt.imshow(cm, interpolation='nearest', cmap=cmap) + fmt = '{:.2f}' if normalize else '{:d}' + plot_heatmap(cm, xticklabels=classes, yticklabels=classes, cmap=cmap, + xlabel="Predicted label", ylabel="True label", fmt=fmt) + plt.title(title) - plt.colorbar() - tick_marks = np.arange(len(classes)) - plt.xticks(tick_marks, classes, rotation=45) - plt.yticks(tick_marks, classes) - - fmt = '.2f' if normalize else 'd' - thresh = cm.max() / 2. - for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): - plt.text(j, i, format(cm[i, j], fmt), - horizontalalignment="center", - color="white" if cm[i, j] > thresh else "black") - - plt.tight_layout() - plt.ylabel('True label') - plt.xlabel('Predicted label') # Compute confusion matrix cnf_matrix = confusion_matrix(y_test, y_pred) From 67308d4c05641df118abdc31df17640b5d9ae370 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 19 Dec 2016 12:18:08 -0500 Subject: [PATCH 04/97] add normalizer support to heatmap, use heatmap when plotting gridsearch results --- examples/svm/plot_rbf_parameters.py | 26 ++++++++++++-------------- sklearn/plot/_heatmap.py | 18 +++++++++++++----- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py index acec9896169b8..89eefc2e1d33e 100644 --- a/examples/svm/plot_rbf_parameters.py +++ b/examples/svm/plot_rbf_parameters.py @@ -51,11 +51,11 @@ Finally one can also observe that for some intermediate values of ``gamma`` we get equally performing models when ``C`` becomes very large: it is not -necessary to regularize by limiting the number of support vectors. The radius of -the RBF kernel alone acts as a good structural regularizer. In practice though -it might still be interesting to limit the number of support vectors with a -lower value of ``C`` so as to favor models that use less memory and that are -faster to predict. +necessary to regularize by limiting the number of support vectors. The radius +of the RBF kernel alone acts as a good structural regularizer. In practice +though it might still be interesting to limit the number of support vectors +with a lower value of ``C`` so as to favor models that use less memory and that +are faster to predict. We should also note that small differences in scores results from the random splits of the cross-validation procedure. Those spurious variations can be @@ -65,7 +65,6 @@ map. ''' -print(__doc__) import numpy as np import matplotlib.pyplot as plt @@ -76,6 +75,9 @@ from sklearn.datasets import load_iris from sklearn.model_selection import StratifiedShuffleSplit from sklearn.model_selection import GridSearchCV +from sklearn.plot import plot_heatmap + +print(__doc__) # Utility function to move the midpoint of a colormap to be around @@ -183,14 +185,10 @@ def __call__(self, value, clip=None): # interesting range while not brutally collapsing all the low score values to # the same color. -plt.figure(figsize=(8, 6)) +plt.figure(figsize=(10, 10)) plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) -plt.imshow(scores, interpolation='nearest', cmap=plt.cm.hot, - norm=MidpointNormalize(vmin=0.2, midpoint=0.92)) -plt.xlabel('gamma') -plt.ylabel('C') -plt.colorbar() -plt.xticks(np.arange(len(gamma_range)), gamma_range, rotation=45) -plt.yticks(np.arange(len(C_range)), C_range) +plot_heatmap(scores, cmap=plt.cm.hot, xlabel="gamma", ylabel="C", + xticklabels=gamma_range, yticklabels=C_range, + norm=MidpointNormalize(vmin=0.2, midpoint=0.92)) plt.title('Validation accuracy') plt.show() diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index 92a38d3bc8104..74aabfd82bb97 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -3,7 +3,7 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, yticklabels=None, cmap=None, vmin=None, vmax=None, ax=None, - fmt="{:.2f}"): + fmt="{:.2f}", xtickrotation=45, norm=None): """Plot a matrix as heatmap with explicit numbers. Parameters @@ -38,25 +38,33 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, fmt : string, default="{:.2f}" Format string to convert value to text. + + xtickrotation : float, default=45 + Rotation of the xticklabels. + + norm : matplotlib normalizer + Normalizer passed to pcolor """ import matplotlib.pyplot as plt if ax is None: ax = plt.gca() - img = ax.pcolor(values, cmap=cmap, vmin=None, vmax=None) + img = ax.pcolor(values, cmap=cmap, vmin=None, vmax=None, norm=norm) # this will allow us to access the pixel values: img.update_scalarmappable() ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) + + ax.set_xlim(0, values.shape[1]) + ax.set_ylim(0, values.shape[0]) + if xticklabels is None: xticklabels = [""] * values.shape[1] if yticklabels is None: yticklabels = [""] * values.shape[0] - ax.xaxis.set_ticks_position('bottom') - # +.5 makes the ticks centered on the pixels ax.set_xticks(np.arange(values.shape[1]) + .5) - ax.set_xticklabels(xticklabels, ha="center") + ax.set_xticklabels(xticklabels, ha="center", rotation=xtickrotation) ax.set_yticks(np.arange(values.shape[0]) + .5) ax.set_yticklabels(yticklabels, va="center") ax.set_aspect(1) From 40078c676538273183087843d7f827e913cbe6eb Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 19 Dec 2016 14:37:42 -0500 Subject: [PATCH 05/97] add plot to __all__ --- sklearn/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/__init__.py b/sklearn/__init__.py index 26c4fff59dcf3..5847865b1a0c7 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -66,7 +66,7 @@ 'mixture', 'model_selection', 'multiclass', 'multioutput', 'naive_bayes', 'neighbors', 'neural_network', 'pipeline', 'preprocessing', 'random_projection', 'semi_supervised', - 'svm', 'tree', 'discriminant_analysis', + 'svm', 'tree', 'discriminant_analysis', 'plot', # Non-modules: 'clone'] From 5d4384380a924258d6fe3334aa351e3e82ca9a08 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sun, 11 Jun 2017 16:34:13 -0400 Subject: [PATCH 06/97] using pcolormesh + alignment fix --- sklearn/plot/_heatmap.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index 74aabfd82bb97..c6e11b92060e7 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -45,10 +45,11 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, norm : matplotlib normalizer Normalizer passed to pcolor """ + import matplotlib.pyplot as plt if ax is None: ax = plt.gca() - img = ax.pcolor(values, cmap=cmap, vmin=None, vmax=None, norm=norm) + img = ax.pcolormesh(values, cmap=cmap, vmin=None, vmax=None, norm=norm) # this will allow us to access the pixel values: img.update_scalarmappable() ax.set_xlabel(xlabel) @@ -72,6 +73,11 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, for p, color, value in zip(img.get_paths(), img.get_facecolors(), img.get_array()): x, y = p.vertices[:-2, :].mean(0) + + # adjusting x and y for alignment: + x = x - 1./6 + y = y + 1./6 + if np.mean(color[:3]) > 0.5: # pixel bright: use black for number c = 'k' From 221d7483871edd8c4003d462d3e73def8003fda7 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 00:19:16 -0400 Subject: [PATCH 07/97] added confusion_matrix plot file --- sklearn/plot/_confusion_matrix.py | 68 +++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 sklearn/plot/_confusion_matrix.py diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py new file mode 100644 index 0000000000000..1e77bbdc883fc --- /dev/null +++ b/sklearn/plot/_confusion_matrix.py @@ -0,0 +1,68 @@ +import numpy as np +from sklearn.plot import plot_heatmap + + +def plot_confusion_matrix(values, classes, normalize=True, + xlabel="Predicted Label", ylabel="True Label", + title='Confusion matrix', cmap=None, vmin=None, + vmax=None, ax=None, fmt="{:.2f}", + xtickrotation=45, norm=None): + """Print and plot the confusion matrix. Normalization can be applied by + setting `normalize=True`. + + Parameters + ---------- + values : ndarray + Two-dimensional array to visualize. + + classes : list of strings + The list of classes represented in the two-dimensional input array. + + normalize : boolean, default=True + If True, the confusion matrix will be normalized by row. + + xlabel : string, default="" + Label for the x-axis. + + ylabel : string, default="" + Label for the y-axis. + + cmap : string or colormap + Matpotlib colormap to use. + + vmin : int, float or None + Minimum clipping value. + + vmax : int, float or None + Maximum clipping value. + + ax : axes object or None + Matplotlib axes object to plot into. If None, the current axes are + used. + + fmt : string, default="{:.2f}" + Format string to convert value to text. This will be ignored if + normalize argument is False. + + xtickrotation : float, default=45 + Rotation of the xticklabels. + + norm : matplotlib normalizer + Normalizer passed to pcolor + """ + + import matplotlib.pyplot as plt + + if normalize: + values = values.astype('float') / values.sum(axis=1)[:, np.newaxis] + + print(title) + print(values) + + fmt = fmt if normalize else '{:d}' + + plot_heatmap(values, xticklabels=classes, yticklabels=classes, cmap=cmap, + xlabel=xlabel, ylabel=ylabel, vmin=vmin, vmax=vmax, ax=ax, + fmt=fmt, xtickrotation=xtickrotation, norm=norm) + + plt.title(title) From 74143d5aa154c02ede8e69bb5b4c66dd9505f428 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 00:19:48 -0400 Subject: [PATCH 08/97] made vmin and vmax pass through in heatplot plot function --- sklearn/plot/_heatmap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index c6e11b92060e7..fb92a981ba14c 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -49,7 +49,7 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, import matplotlib.pyplot as plt if ax is None: ax = plt.gca() - img = ax.pcolormesh(values, cmap=cmap, vmin=None, vmax=None, norm=norm) + img = ax.pcolormesh(values, cmap=cmap, vmin=vmin, vmax=vmax, norm=norm) # this will allow us to access the pixel values: img.update_scalarmappable() ax.set_xlabel(xlabel) From 0b377e9780d1c0c8c8366e3d394404a11e952669 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 00:40:59 -0400 Subject: [PATCH 09/97] modified documentation plot_confusion_matrix --- sklearn/plot/_confusion_matrix.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 1e77bbdc883fc..a2ae4b7a630e1 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -2,13 +2,13 @@ from sklearn.plot import plot_heatmap -def plot_confusion_matrix(values, classes, normalize=True, +def plot_confusion_matrix(values, classes, normalize=False, xlabel="Predicted Label", ylabel="True Label", title='Confusion matrix', cmap=None, vmin=None, vmax=None, ax=None, fmt="{:.2f}", xtickrotation=45, norm=None): - """Print and plot the confusion matrix. Normalization can be applied by - setting `normalize=True`. + """Print and plot the confusion matrix as a heatmap. Normalization can be + applied by setting `normalize=True`. Parameters ---------- @@ -18,23 +18,25 @@ def plot_confusion_matrix(values, classes, normalize=True, classes : list of strings The list of classes represented in the two-dimensional input array. - normalize : boolean, default=True + normalize : boolean, default=False If True, the confusion matrix will be normalized by row. - xlabel : string, default="" + xlabel : string, default="Predicted Label" Label for the x-axis. - ylabel : string, default="" + ylabel : string, default="True Label" Label for the y-axis. cmap : string or colormap Matpotlib colormap to use. vmin : int, float or None - Minimum clipping value. + Minimum clipping value. This argument will be passed on to the + pcolormesh function from matplotlib used to generate the heatmap. vmax : int, float or None - Maximum clipping value. + Maximum clipping value. This argument will be passed on to the + pcolormesh function from matplotlib used to generate the heatmap. ax : axes object or None Matplotlib axes object to plot into. If None, the current axes are @@ -48,7 +50,8 @@ def plot_confusion_matrix(values, classes, normalize=True, Rotation of the xticklabels. norm : matplotlib normalizer - Normalizer passed to pcolor + Normalizer passed to pcolormesh function from matplotlib used to + generate the heatmap. """ import matplotlib.pyplot as plt From e875d0d755e744d95accbda927ef2a2784ea53b5 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 00:42:13 -0400 Subject: [PATCH 10/97] updated __init__.py file to include confusion matrix plot --- sklearn/plot/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/plot/__init__.py b/sklearn/plot/__init__.py index 7cff0ad0ee943..fec45fff52116 100644 --- a/sklearn/plot/__init__.py +++ b/sklearn/plot/__init__.py @@ -1,3 +1,4 @@ from ._heatmap import plot_heatmap +from ._confusion_matrix import plot_confusion_matrix -__all__ = ["plot_heatmap"] +__all__ = ["plot_heatmap", "plot_confusion_matrix"] From 74bf78636808ba6250a934a5a1818f79b36588a6 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 00:43:08 -0400 Subject: [PATCH 11/97] plot confusion matrix example updated to use new function --- .../model_selection/plot_confusion_matrix.py | 29 ++++--------------- 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py index 5625fa6d4b59f..95df220d242a3 100644 --- a/examples/model_selection/plot_confusion_matrix.py +++ b/examples/model_selection/plot_confusion_matrix.py @@ -30,7 +30,7 @@ from sklearn import svm, datasets from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix -from sklearn.plot import plot_heatmap +from sklearn.plot import plot_confusion_matrix # import some data to play with iris = datasets.load_iris() @@ -46,27 +46,6 @@ classifier = svm.SVC(kernel='linear', C=0.01) y_pred = classifier.fit(X_train, y_train).predict(X_test) - -def plot_confusion_matrix(cm, classes, - normalize=False, - title='Confusion matrix', - cmap=plt.cm.Blues): - """ - This function prints and plots the confusion matrix. - Normalization can be applied by setting `normalize=True`. - """ - if normalize: - cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] - - print(title) - print(cm) - - fmt = '{:.2f}' if normalize else '{:d}' - plot_heatmap(cm, xticklabels=classes, yticklabels=classes, cmap=cmap, - xlabel="Predicted label", ylabel="True label", fmt=fmt) - - plt.title(title) - # Compute confusion matrix cnf_matrix = confusion_matrix(y_test, y_pred) np.set_printoptions(precision=2) @@ -74,11 +53,13 @@ def plot_confusion_matrix(cm, classes, # Plot non-normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, - title='Confusion matrix, without normalization') + title='Confusion matrix, without normalization', + cmap=plt.cm.Blues) # Plot normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True, - title='Normalized confusion matrix') + title='Normalized confusion matrix', + cmap=plt.cm.Blues) plt.show() From 8a1b8616470aaeed669eb423dff2fdbfdd5b59af Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 01:49:42 -0400 Subject: [PATCH 12/97] make matrix diagonal --- sklearn/plot/_heatmap.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index fb92a981ba14c..cfa0645c86e18 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -50,6 +50,7 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, if ax is None: ax = plt.gca() img = ax.pcolormesh(values, cmap=cmap, vmin=vmin, vmax=vmax, norm=norm) + # this will allow us to access the pixel values: img.update_scalarmappable() ax.set_xlabel(xlabel) @@ -84,4 +85,9 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, else: c = 'w' ax.text(x, y, fmt.format(value), color=c, ha="center", va="center") + + # Invert the y-axis so that the matrix looks like a diagonal matrix and + # not anti-diagonal matrix + ax.invert_yaxis() + return ax From abaaccbb5249c0094b4a6ec844a0396d72246472 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 02:42:55 -0400 Subject: [PATCH 13/97] modify documentation --- sklearn/plot/_confusion_matrix.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index a2ae4b7a630e1..36579f68a1ab5 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -27,6 +27,9 @@ def plot_confusion_matrix(values, classes, normalize=False, ylabel : string, default="True Label" Label for the y-axis. + title : string, default="Confusion matrix" + Title for the heatmap. + cmap : string or colormap Matpotlib colormap to use. From badf3877f5114255777fe0a20d5dc481fb1584e3 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 02:43:45 -0400 Subject: [PATCH 14/97] adding grid search results plotting function. --- sklearn/plot/__init__.py | 3 +- sklearn/plot/_gridsearch_results.py | 79 +++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 sklearn/plot/_gridsearch_results.py diff --git a/sklearn/plot/__init__.py b/sklearn/plot/__init__.py index fec45fff52116..c63653af62156 100644 --- a/sklearn/plot/__init__.py +++ b/sklearn/plot/__init__.py @@ -1,4 +1,5 @@ from ._heatmap import plot_heatmap from ._confusion_matrix import plot_confusion_matrix +from ._gridsearch_results import plot_gridsearch_results -__all__ = ["plot_heatmap", "plot_confusion_matrix"] +__all__ = ["plot_heatmap", "plot_confusion_matrix", "plot_gridsearch_results"] diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py new file mode 100644 index 0000000000000..7f671e8932977 --- /dev/null +++ b/sklearn/plot/_gridsearch_results.py @@ -0,0 +1,79 @@ +import numpy as np +from sklearn.plot import plot_heatmap + + +def plot_gridsearch_results(cv_results, param_grid, metric='mean_test_score', + xlabel=None, ylabel=None, + title='Grid Search Results', cmap=None, + vmin=None, vmax=None, ax=None, fmt="{:.2f}", + xtickrotation=45, norm=None): + """Print and plot the confusion matrix as a heatmap. Normalization can be + applied by setting `normalize=True`. + + Parameters + ---------- + cv_results : dict of numpy (masked) ndarrays + The cv_results_ attribute of the GridSearchCV object. + + param_grid : dict + A dictionary with keys as the two parameters searched and values + as a list of their respective values searched. + + xlabel : string, default=None + Label for the x-axis. If None, the first key of the param_grid will + be used as the xlabel. + + ylabel : string, default=None + Label for the y-axis. If None, the second key of the param_grid will + be used as the ylabel. + + metric : string, default="mean_test_score" + The metric from the GridSearchCV results to display. + + title : string, default="Grid Search Results" + Title for the heatmap. + + cmap : string or colormap + Matpotlib colormap to use. + + vmin : int, float or None + Minimum clipping value. This argument will be passed on to the + pcolormesh function from matplotlib used to generate the heatmap. + + vmax : int, float or None + Maximum clipping value. This argument will be passed on to the + pcolormesh function from matplotlib used to generate the heatmap. + + ax : axes object or None + Matplotlib axes object to plot into. If None, the current axes are + used. + + fmt : string, default="{:.2f}" + Format string to convert value to text. + + xtickrotation : float, default=45 + Rotation of the xticklabels. + + norm : matplotlib normalizer + Normalizer passed to pcolormesh function from matplotlib used to + generate the heatmap. + """ + + import matplotlib.pyplot as plt + + parameter1, parameter2 = param_grid.keys() + parameter1_values = param_grid[parameter1] + parameter2_values = param_grid[parameter2] + + scores = cv_results[metric].reshape(len(parameter1_values), + len(parameter2_values)) + + xlabel = parameter1 if xlabel is None else xlabel + ylabel = parameter2 if ylabel is None else ylabel + + plot_heatmap(scores, cmap=plt.cm.hot, xlabel=xlabel, ylabel=ylabel, + xticklabels=parameter1_values, yticklabels=parameter2_values, + ax=ax, norm=norm) + + plt.title(title) + plt.show() From 46c42532e48360fc6639b9f1ff0c12fdf3f51d47 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 02:44:33 -0400 Subject: [PATCH 15/97] modified examples/svm/plot_rbf_parameters.py with new function --- examples/svm/plot_rbf_parameters.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py index 89eefc2e1d33e..e2768af7a036a 100644 --- a/examples/svm/plot_rbf_parameters.py +++ b/examples/svm/plot_rbf_parameters.py @@ -75,7 +75,7 @@ from sklearn.datasets import load_iris from sklearn.model_selection import StratifiedShuffleSplit from sklearn.model_selection import GridSearchCV -from sklearn.plot import plot_heatmap +from sklearn.plot import plot_gridsearch_results print(__doc__) @@ -173,9 +173,6 @@ def __call__(self, value, clip=None): plt.yticks(()) plt.axis('tight') -scores = grid.cv_results_['mean_test_score'].reshape(len(C_range), - len(gamma_range)) - # Draw heatmap of the validation accuracy as a function of gamma and C # # The score are encoded as colors with the hot colormap which varies from dark @@ -187,8 +184,7 @@ def __call__(self, value, clip=None): plt.figure(figsize=(10, 10)) plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) -plot_heatmap(scores, cmap=plt.cm.hot, xlabel="gamma", ylabel="C", - xticklabels=gamma_range, yticklabels=C_range, - norm=MidpointNormalize(vmin=0.2, midpoint=0.92)) -plt.title('Validation accuracy') +plot_gridsearch_results(grid.cv_results_, param_grid=param_grid, + title="Validation accuracy", cmap=plt.cm.hot, + norm=MidpointNormalize(vmin=0.2, midpoint=0.92)) plt.show() From 8f4e5f144460172cd500fbc4088801dfb4b95a79 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 17:50:21 -0400 Subject: [PATCH 16/97] removed printing confusion matrix --- sklearn/plot/_confusion_matrix.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 36579f68a1ab5..183fe0a45b033 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -7,7 +7,7 @@ def plot_confusion_matrix(values, classes, normalize=False, title='Confusion matrix', cmap=None, vmin=None, vmax=None, ax=None, fmt="{:.2f}", xtickrotation=45, norm=None): - """Print and plot the confusion matrix as a heatmap. Normalization can be + """Plot the confusion matrix as a heatmap. Normalization can be applied by setting `normalize=True`. Parameters @@ -62,9 +62,6 @@ def plot_confusion_matrix(values, classes, normalize=False, if normalize: values = values.astype('float') / values.sum(axis=1)[:, np.newaxis] - print(title) - print(values) - fmt = fmt if normalize else '{:d}' plot_heatmap(values, xticklabels=classes, yticklabels=classes, cmap=cmap, From d6940f51aa67c46963b28ce94668c81d5c92991e Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 18:01:50 -0400 Subject: [PATCH 17/97] remove param_grid argument --- sklearn/plot/_gridsearch_results.py | 39 ++++++++++++++++------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 7f671e8932977..fd3919436eaa4 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -2,7 +2,7 @@ from sklearn.plot import plot_heatmap -def plot_gridsearch_results(cv_results, param_grid, metric='mean_test_score', +def plot_gridsearch_results(cv_results, metric='mean_test_score', xlabel=None, ylabel=None, title='Grid Search Results', cmap=None, vmin=None, vmax=None, ax=None, fmt="{:.2f}", @@ -15,10 +15,6 @@ def plot_gridsearch_results(cv_results, param_grid, metric='mean_test_score', cv_results : dict of numpy (masked) ndarrays The cv_results_ attribute of the GridSearchCV object. - param_grid : dict - A dictionary with keys as the two parameters searched and values - as a list of their respective values searched. - xlabel : string, default=None Label for the x-axis. If None, the first key of the param_grid will be used as the xlabel. @@ -61,19 +57,28 @@ def plot_gridsearch_results(cv_results, param_grid, metric='mean_test_score', import matplotlib.pyplot as plt - parameter1, parameter2 = param_grid.keys() - parameter1_values = param_grid[parameter1] - parameter2_values = param_grid[parameter2] + params = sorted(cv_results['params'][0].keys()) + + if len(params) == 1: + # plot a line chart + pass + elif len(params) == 2: + parameter1_values = np.unique(cv_results['param_%s' % params[0]]) + parameter2_values = np.unique(cv_results['param_%s' % params[1]]) - scores = cv_results[metric].reshape(len(parameter1_values), - len(parameter2_values)) + scores = cv_results[metric].reshape(len(parameter1_values), + len(parameter2_values)) - xlabel = parameter1 if xlabel is None else xlabel - ylabel = parameter2 if ylabel is None else ylabel + xlabel = params[0] if xlabel is None else xlabel + ylabel = params[1] if ylabel is None else ylabel - plot_heatmap(scores, cmap=plt.cm.hot, xlabel=xlabel, ylabel=ylabel, - xticklabels=parameter1_values, yticklabels=parameter2_values, - ax=ax, norm=norm) + plot_heatmap(scores, cmap=plt.cm.hot, xlabel=xlabel, ylabel=ylabel, + xticklabels=parameter1_values, + yticklabels=parameter2_values, + ax=ax, norm=norm) - plt.title(title) - plt.show() + plt.title(title) + plt.show() + else: + # print error statement + pass From 18ac9c71ee9c6fc454f0bb3de0d795ed89cf4948 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 21 Jun 2017 11:12:36 -0400 Subject: [PATCH 18/97] adding cases for nparams 1,2, more --- sklearn/plot/_gridsearch_results.py | 42 ++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index fd3919436eaa4..68ca542a67728 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -24,7 +24,8 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', be used as the ylabel. metric : string, default="mean_test_score" - The metric from the GridSearchCV results to display. + The metric from the GridSearchCV results to display. This is ignored + if only 1 parameter is used in grid search. title : string, default="Grid Search Results" Title for the heatmap. @@ -58,11 +59,38 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', import matplotlib.pyplot as plt params = sorted(cv_results['params'][0].keys()) + nparams = len(params) + + if nparams == 1: + param = params[0] + param_range = sorted(cv_results['param_%s' % param]) + train_scores_mean = cv_results['mean_train_score'] + train_scores_std = cv_results['mean_train_std'] + test_scores_mean = cv_results['mean_test_score'] + test_scores_std = cv_results['mean_test_std'] + + lw = 2 + plt.semilogx(param_range, train_scores_mean, + label="Training score", + color="darkorange", lw=lw) + plt.fill_between(param_range, train_scores_mean - train_scores_std, + train_scores_mean + train_scores_std, alpha=0.2, + color="darkorange", lw=lw) + + plt.semilogx(param_range, test_scores_mean, + label="Cross-validation score", + color="navy", lw=lw) + plt.fill_between(param_range, test_scores_mean - test_scores_std, + test_scores_mean + test_scores_std, alpha=0.2, + color="navy", lw=lw) - if len(params) == 1: - # plot a line chart - pass - elif len(params) == 2: + plt.title(title) + plt.xlabel(param) + ylabel = "Score" if ylabel is None else ylabel + plt.ylabel(ylabel) + plt.show() + + elif nparams == 2: parameter1_values = np.unique(cv_results['param_%s' % params[0]]) parameter2_values = np.unique(cv_results['param_%s' % params[1]]) @@ -79,6 +107,8 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', plt.title(title) plt.show() + else: - # print error statement + raise ValueError('Plot functions supports upto 2 parameters in grid' + 'search, got {0}.'.format(nparams)) pass From 343db9d0815a6175f15670ff450ea00fba48976a Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 21 Jun 2017 12:00:56 -0400 Subject: [PATCH 19/97] minor fixes --- sklearn/plot/_gridsearch_results.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 68ca542a67728..2719e38c3a52b 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -65,9 +65,9 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', param = params[0] param_range = sorted(cv_results['param_%s' % param]) train_scores_mean = cv_results['mean_train_score'] - train_scores_std = cv_results['mean_train_std'] + train_scores_std = cv_results['std_train_score'] test_scores_mean = cv_results['mean_test_score'] - test_scores_std = cv_results['mean_test_std'] + test_scores_std = cv_results['std_test_score'] lw = 2 plt.semilogx(param_range, train_scores_mean, @@ -86,6 +86,7 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', plt.title(title) plt.xlabel(param) + plt.legend() ylabel = "Score" if ylabel is None else ylabel plt.ylabel(ylabel) plt.show() From d82c4c26f5b2a1e7bfcaf00c7133d94dc0579954 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 21 Jun 2017 15:38:04 -0400 Subject: [PATCH 20/97] fixed typo --- sklearn/plot/_gridsearch_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 2719e38c3a52b..55ce25bd2f425 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -110,6 +110,6 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', plt.show() else: - raise ValueError('Plot functions supports upto 2 parameters in grid' + raise ValueError('Plot function supports upto 2 parameters in grid' 'search, got {0}.'.format(nparams)) pass From a3ccae9e0fac5e9599715fe186e69b7804f7bff5 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 21 Jun 2017 15:38:38 -0400 Subject: [PATCH 21/97] adding tests for confusion matrix and grid search plots --- sklearn/plot/tests/test_heatmap.py | 85 ++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index fb27bdf0ae9be..668f82b2b138f 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -1,5 +1,11 @@ from sklearn.plot import plot_heatmap +from sklearn.plot import plot_confusion_matrix +from sklearn.plot import plot_gridsearch_results +from sklearn.datasets import load_iris +from sklearn.model_selection import GridSearchCV +from sklearn.svm import SVC from sklearn.utils.testing import SkipTest +from sklearn.utils.testing import assert_raises import numpy as np @@ -22,3 +28,82 @@ def test_heatmap(): plt.draw() plt.close() + + +def test_confusion_matrix(): + try: + import matplotlib + except ImportError: + raise SkipTest("Not testing plot_heatmap, matplotlib not installed.") + + import matplotlib.pyplot as plt + + with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): + plt.figure() + cnf_matrix = np.random.randomint(1, 10, size=(2, 2)) + + # use mixture of default values and keyword args + plot_confusion_matrix(cnf_matrix, classes=["dummay1", "dummy2"], + cmap="Paired", ax=plt.gca()) + + plt.draw() + plt.close() + + +def test_gridsearch_results(): + try: + import matplotlib + except ImportError: + raise SkipTest("Not testing plot_heatmap, matplotlib not installed.") + + import matplotlib.pyplot as plt + + iris = load_iris() + X = iris.data + y = iris.target + + # We only keep the first two features in X and sub-sample the dataset to + # keep only 2 classes and make it a binary classification problem. + + X_2d = X[:, :2] + X_2d = X_2d[y > 0] + y_2d = y[y > 0] + y_2d -= 1 + + # Define parameters: + C_range = np.logspace(-2, 10, 2) + gamma_range = np.logspace(-9, 3, 2) + tol_range = [1e-3, 1e-4] + + # Test 1D case: + param_grid = dict(gamma=gamma_range) + grid = GridSearchCV(SVC(), param_grid=param_grid, cv=3) + grid.fit(X, y) + + with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): + plt.figure() + plot_gridsearch_results(grid.cv_results_) + plt.draw() + plt.close() + + # Test 2D case: + param_grid = dict(gamma=gamma_range, C=C_range) + grid = GridSearchCV(SVC(), param_grid=param_grid, cv=3) + grid.fit(X, y) + + with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): + plt.figure() + plot_gridsearch_results(grid.cv_results_) + plt.draw() + plt.close() + + # Test 3D case: + param_grid = dict(gamma=gamma_range, C=C_range, tol=tol_range) + grid = GridSearchCV(SVC(), param_grid=param_grid, cv=3) + grid.fit(X, y) + + with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): + plt.figure() + assert_raises(ValueError, plot_gridsearch_results, grid.cv_results_) + plt.draw() + plt.close() From d27c8592975c8b3fd443a651642b416b394b0989 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 21 Jun 2017 17:26:01 -0400 Subject: [PATCH 22/97] adding test case for normalized --- sklearn/plot/tests/test_heatmap.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index 668f82b2b138f..5a199e23e24d7 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -42,10 +42,15 @@ def test_confusion_matrix(): plt.figure() cnf_matrix = np.random.randomint(1, 10, size=(2, 2)) - # use mixture of default values and keyword args + # plot un-normalized matrix plot_confusion_matrix(cnf_matrix, classes=["dummay1", "dummy2"], cmap="Paired", ax=plt.gca()) + # plot normalized matrix + plot_confusion_matrix(cnf_matrix, normalize=True, + classes=["dummay1", "dummy2"], + cmap="Paired", ax=plt.gca()) + plt.draw() plt.close() From b5e582315dde2c51dbb9d5e31dff0a72f8ce7b8b Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 21 Jun 2017 17:28:41 -0400 Subject: [PATCH 23/97] updated doc files --- doc/modules/classes.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index a23a3de01ee42..cc05404a1e280 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -1365,7 +1365,8 @@ Low-level methods :no-inherited-members: This module is experimental. Use at your own risk. -Use of this module requires the matplotlib library. +Use of this module requires the matplotlib library, +version 1.5 or later (preferably 2.0). .. currentmodule:: sklearn.plot @@ -1374,6 +1375,8 @@ Use of this module requires the matplotlib library. :template: function.rst plot_heatmap + plot_confusion_matrix + plot_gridsearch_results Recently deprecated From d5c64c5376d1b2efb66d4ff78679a420c6db8877 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Thu, 29 Jun 2017 14:30:46 -0400 Subject: [PATCH 24/97] modifying doc --- doc/modules/classes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index 9aab9300a78fe..72a4f9d01d2a5 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -1396,7 +1396,7 @@ Low-level methods This module is experimental. Use at your own risk. Use of this module requires the matplotlib library, -version 1.5 or later (preferably 2.0). +version 1.5 or later. .. currentmodule:: sklearn.plot From 35d60fada9546517c1e35434b307bb9c89bcfc00 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 4 Jul 2017 11:36:32 -0400 Subject: [PATCH 25/97] doc fix --- sklearn/plot/_gridsearch_results.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 55ce25bd2f425..587ff2ae81106 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -7,8 +7,9 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', title='Grid Search Results', cmap=None, vmin=None, vmax=None, ax=None, fmt="{:.2f}", xtickrotation=45, norm=None): - """Print and plot the confusion matrix as a heatmap. Normalization can be - applied by setting `normalize=True`. + """Plot the grid search results as a line chart for 1D search and heatmap + for a 2D search. This function will not work if grid-search has more than + 2 parameters in the search space. Parameters ---------- From 808c9e2af561eef6b10b91c9282a153dc3b3d51b Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 4 Jul 2017 11:37:35 -0400 Subject: [PATCH 26/97] spell fix, make_blobs instead of iris, split checks into 3 functions --- sklearn/plot/tests/test_heatmap.py | 60 ++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index 5a199e23e24d7..7ff7b85ca827c 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -1,7 +1,7 @@ from sklearn.plot import plot_heatmap from sklearn.plot import plot_confusion_matrix from sklearn.plot import plot_gridsearch_results -from sklearn.datasets import load_iris +from sklearn.datasets import make_blobs from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC from sklearn.utils.testing import SkipTest @@ -43,7 +43,7 @@ def test_confusion_matrix(): cnf_matrix = np.random.randomint(1, 10, size=(2, 2)) # plot un-normalized matrix - plot_confusion_matrix(cnf_matrix, classes=["dummay1", "dummy2"], + plot_confusion_matrix(cnf_matrix, classes=["dummy1", "dummy2"], cmap="Paired", ax=plt.gca()) # plot normalized matrix @@ -55,7 +55,7 @@ def test_confusion_matrix(): plt.close() -def test_gridsearch_results(): +def test_gridsearch_results_1d(): try: import matplotlib except ImportError: @@ -63,25 +63,14 @@ def test_gridsearch_results(): import matplotlib.pyplot as plt - iris = load_iris() - X = iris.data - y = iris.target - - # We only keep the first two features in X and sub-sample the dataset to - # keep only 2 classes and make it a binary classification problem. - - X_2d = X[:, :2] - X_2d = X_2d[y > 0] - y_2d = y[y > 0] - y_2d -= 1 + X, y = make_blobs(n_samples=20, centers=2, n_features=3, + random_state=0) # Define parameters: C_range = np.logspace(-2, 10, 2) - gamma_range = np.logspace(-9, 3, 2) - tol_range = [1e-3, 1e-4] # Test 1D case: - param_grid = dict(gamma=gamma_range) + param_grid = dict(C=C_range) grid = GridSearchCV(SVC(), param_grid=param_grid, cv=3) grid.fit(X, y) @@ -91,7 +80,23 @@ def test_gridsearch_results(): plt.draw() plt.close() - # Test 2D case: + +def test_gridsearch_results_2d(): + try: + import matplotlib + except ImportError: + raise SkipTest("Not testing plot_heatmap, matplotlib not installed.") + + import matplotlib.pyplot as plt + + X, y = make_blobs(n_samples=20, centers=2, n_features=3, + random_state=0) + + # Define parameters: + C_range = np.logspace(-2, 10, 2) + gamma_range = np.logspace(-9, 3, 2) + + # Test 1D case: param_grid = dict(gamma=gamma_range, C=C_range) grid = GridSearchCV(SVC(), param_grid=param_grid, cv=3) grid.fit(X, y) @@ -102,7 +107,24 @@ def test_gridsearch_results(): plt.draw() plt.close() - # Test 3D case: + +def test_gridsearch_results_3d(): + try: + import matplotlib + except ImportError: + raise SkipTest("Not testing plot_heatmap, matplotlib not installed.") + + import matplotlib.pyplot as plt + + X, y = make_blobs(n_samples=20, centers=2, n_features=3, + random_state=0) + + # Define parameters: + C_range = np.logspace(-2, 10, 2) + gamma_range = np.logspace(-9, 3, 2) + tol_range = [1e-3, 1e-4] + + # Test 1D case: param_grid = dict(gamma=gamma_range, C=C_range, tol=tol_range) grid = GridSearchCV(SVC(), param_grid=param_grid, cv=3) grid.fit(X, y) From 346cb2444147d15ad9a64b85100b3ad32b2b2019 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 4 Jul 2017 13:43:08 -0400 Subject: [PATCH 27/97] fixed parameter in examples --- examples/svm/plot_rbf_parameters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py index 2760c2259a112..b5e923b7ee937 100644 --- a/examples/svm/plot_rbf_parameters.py +++ b/examples/svm/plot_rbf_parameters.py @@ -185,7 +185,7 @@ def __call__(self, value, clip=None): plt.figure(figsize=(10, 10)) plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) -plot_gridsearch_results(grid.cv_results_, param_grid=param_grid, - title="Validation accuracy", cmap=plt.cm.hot, +plot_gridsearch_results(grid.cv_results_, title="Validation accuracy", + cmap=plt.cm.hot, norm=MidpointNormalize(vmin=0.2, midpoint=0.92)) plt.show() From 3bda05ae8fceb5308de6a288ab7e22bde1f04ece Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 4 Jul 2017 16:36:36 -0400 Subject: [PATCH 28/97] modified travis files --- .travis.yml | 4 ++-- build_tools/travis/install.sh | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6892cdbd53e51..71b414bd31b64 100644 --- a/.travis.yml +++ b/.travis.yml @@ -39,12 +39,12 @@ matrix: # It also runs tests requiring Pandas. - env: DISTRIB="conda" PYTHON_VERSION="3.6.1" INSTALL_MKL="true" NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" PANDAS_VERSION="0.20.1" - CYTHON_VERSION="0.25.2" COVERAGE=true + CYTHON_VERSION="0.25.2" MATPLOTLIB_VERSION="2.0.2" COVERAGE=true # This environment use pytest to run the tests. It uses the newest # supported Anaconda release (4.4.0). It also runs tests requiring Pandas. - env: USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6.1" INSTALL_MKL="true" NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" - PANDAS_VERSION="0.20.1" CYTHON_VERSION="0.25.2" + PANDAS_VERSION="0.20.1" CYTHON_VERSION="0.25.2" MATPLOTLIB_VERSION="2.0.2" # flake8 linting on diff wrt common ancestor with upstream/master - env: RUN_FLAKE8="true" SKIP_TESTS="true" DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true" diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh index 257cfb17f3938..ab0bd356be3b1 100755 --- a/build_tools/travis/install.sh +++ b/build_tools/travis/install.sh @@ -42,12 +42,14 @@ if [[ "$DISTRIB" == "conda" ]]; then if [[ "$INSTALL_MKL" == "true" ]]; then conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \ numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \ + matplotlib=$MATPLOTLIB_VERSION \ mkl cython=$CYTHON_VERSION \ ${PANDAS_VERSION+pandas=$PANDAS_VERSION} else conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \ numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \ + matplotlib=$MATPLOTLIB_VERSION \ nomkl cython=$CYTHON_VERSION \ ${PANDAS_VERSION+pandas=$PANDAS_VERSION} fi From b9e5800f4b264e867ba32a87dec02db5194234e6 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 4 Jul 2017 18:02:38 -0400 Subject: [PATCH 29/97] explicitly imported random module functions --- sklearn/plot/tests/test_heatmap.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index 7ff7b85ca827c..b110375adfc1b 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -7,6 +7,8 @@ from sklearn.utils.testing import SkipTest from sklearn.utils.testing import assert_raises import numpy as np +from numpy.random import (RandomState, + randomint) def test_heatmap(): @@ -19,7 +21,7 @@ def test_heatmap(): with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): plt.figure() - rng = np.random.RandomState(0) + rng = RandomState(0) X = rng.normal(size=(10, 5)) # use mixture of default values and keyword args plot_heatmap(X, ylabel="y-axis", @@ -40,7 +42,7 @@ def test_confusion_matrix(): with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): plt.figure() - cnf_matrix = np.random.randomint(1, 10, size=(2, 2)) + cnf_matrix = randomint(1, 10, size=(2, 2)) # plot un-normalized matrix plot_confusion_matrix(cnf_matrix, classes=["dummy1", "dummy2"], From 7e1475f60b66d645549f0390504cb45503b57dd5 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 4 Jul 2017 18:03:35 -0400 Subject: [PATCH 30/97] install matplotlib only if secret variable specified in build matrix --- build_tools/travis/install.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh index ab0bd356be3b1..e7ec4dc42d8d8 100755 --- a/build_tools/travis/install.sh +++ b/build_tools/travis/install.sh @@ -42,14 +42,13 @@ if [[ "$DISTRIB" == "conda" ]]; then if [[ "$INSTALL_MKL" == "true" ]]; then conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \ numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \ - matplotlib=$MATPLOTLIB_VERSION \ mkl cython=$CYTHON_VERSION \ - ${PANDAS_VERSION+pandas=$PANDAS_VERSION} + ${PANDAS_VERSION+pandas=$PANDAS_VERSION} \ + ${MATPLOTLIB_VERSION+matplotlib=$MATPLOTLIB_VERSION} else conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \ numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \ - matplotlib=$MATPLOTLIB_VERSION \ nomkl cython=$CYTHON_VERSION \ ${PANDAS_VERSION+pandas=$PANDAS_VERSION} fi From 343cbc72240864a33ff88d6dda749524046d62f1 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 5 Jul 2017 09:20:15 -0400 Subject: [PATCH 31/97] randint correction --- sklearn/plot/tests/test_heatmap.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index b110375adfc1b..8ea3d97ef99aa 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -8,7 +8,7 @@ from sklearn.utils.testing import assert_raises import numpy as np from numpy.random import (RandomState, - randomint) + randint) def test_heatmap(): @@ -42,7 +42,7 @@ def test_confusion_matrix(): with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): plt.figure() - cnf_matrix = randomint(1, 10, size=(2, 2)) + cnf_matrix = randint(1, 10, size=(2, 2)) # plot un-normalized matrix plot_confusion_matrix(cnf_matrix, classes=["dummy1", "dummy2"], From 2d1261d9f5da85001f3e891a871de8aa046fe5c1 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 5 Jul 2017 19:56:44 -0400 Subject: [PATCH 32/97] y_pred and y_true as input in place of matrix --- sklearn/plot/_confusion_matrix.py | 21 ++++++++++++++++----- sklearn/plot/tests/test_heatmap.py | 7 ++++--- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 183fe0a45b033..648fad9ce82e2 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -1,23 +1,32 @@ import numpy as np +from sklearn.metrics import confusion_matrix from sklearn.plot import plot_heatmap -def plot_confusion_matrix(values, classes, normalize=False, +def plot_confusion_matrix(y_true, y_pred, classes, sample_weights=None, + normalize=False, xlabel="Predicted Label", ylabel="True Label", title='Confusion matrix', cmap=None, vmin=None, vmax=None, ax=None, fmt="{:.2f}", xtickrotation=45, norm=None): - """Plot the confusion matrix as a heatmap. Normalization can be - applied by setting `normalize=True`. + """Plot the confusion matrix as a heatmap. A confusion matrix is computed + using `y_true`, `y_pred` and `sample_weights` arguments. Normalization + can be applied by setting `normalize=True`. Parameters ---------- - values : ndarray - Two-dimensional array to visualize. + y_true : array, shape = [n_samples] + Ground truth (correct) target values. + + y_pred : array, shape = [n_samples] + Estimated targets as returned by a classifier. classes : list of strings The list of classes represented in the two-dimensional input array. + sample_weight : array-like of shape = [n_samples], optional + Sample weights used to calculate the confusion matrix + normalize : boolean, default=False If True, the confusion matrix will be normalized by row. @@ -59,6 +68,8 @@ def plot_confusion_matrix(values, classes, normalize=False, import matplotlib.pyplot as plt + values = confusion_matrix(y_true, y_pred, sample_weights=sample_weights) + if normalize: values = values.astype('float') / values.sum(axis=1)[:, np.newaxis] diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index 8ea3d97ef99aa..51bf5a4873353 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -42,14 +42,15 @@ def test_confusion_matrix(): with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): plt.figure() - cnf_matrix = randint(1, 10, size=(2, 2)) + array1 = randint(1, 3, size=20) + array2 = randint(1, 3, size=20) # plot un-normalized matrix - plot_confusion_matrix(cnf_matrix, classes=["dummy1", "dummy2"], + plot_confusion_matrix(array1, array2, classes=["dummy1", "dummy2"], cmap="Paired", ax=plt.gca()) # plot normalized matrix - plot_confusion_matrix(cnf_matrix, normalize=True, + plot_confusion_matrix(array1, array2, normalize=True, classes=["dummay1", "dummy2"], cmap="Paired", ax=plt.gca()) From 473f131ec282c7b173d7c73c2e67a2cf638222a7 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Thu, 6 Jul 2017 12:08:19 -0400 Subject: [PATCH 33/97] fixed typo --- sklearn/plot/_confusion_matrix.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 648fad9ce82e2..803ebbdfdc1d5 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -3,7 +3,7 @@ from sklearn.plot import plot_heatmap -def plot_confusion_matrix(y_true, y_pred, classes, sample_weights=None, +def plot_confusion_matrix(y_true, y_pred, classes, sample_weight=None, normalize=False, xlabel="Predicted Label", ylabel="True Label", title='Confusion matrix', cmap=None, vmin=None, @@ -68,7 +68,7 @@ def plot_confusion_matrix(y_true, y_pred, classes, sample_weights=None, import matplotlib.pyplot as plt - values = confusion_matrix(y_true, y_pred, sample_weights=sample_weights) + values = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) if normalize: values = values.astype('float') / values.sum(axis=1)[:, np.newaxis] From c86580a34432afa17913e266110ffa69aa035469 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Fri, 7 Jul 2017 13:13:55 -0400 Subject: [PATCH 34/97] fixed example --- examples/model_selection/plot_confusion_matrix.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py index 95df220d242a3..29853b9412681 100644 --- a/examples/model_selection/plot_confusion_matrix.py +++ b/examples/model_selection/plot_confusion_matrix.py @@ -29,7 +29,6 @@ from sklearn import svm, datasets from sklearn.model_selection import train_test_split -from sklearn.metrics import confusion_matrix from sklearn.plot import plot_confusion_matrix # import some data to play with @@ -46,19 +45,17 @@ classifier = svm.SVC(kernel='linear', C=0.01) y_pred = classifier.fit(X_train, y_train).predict(X_test) -# Compute confusion matrix -cnf_matrix = confusion_matrix(y_test, y_pred) np.set_printoptions(precision=2) # Plot non-normalized confusion matrix plt.figure() -plot_confusion_matrix(cnf_matrix, classes=class_names, +plot_confusion_matrix(y_test, y_pred, classes=class_names, title='Confusion matrix, without normalization', cmap=plt.cm.Blues) # Plot normalized confusion matrix plt.figure() -plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True, +plot_confusion_matrix(y_test, y_pred, classes=class_names, normalize=True, title='Normalized confusion matrix', cmap=plt.cm.Blues) From 2c10e41197afd0b7473147f40b0ec8e6bb87b385 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 25 Jul 2017 19:20:48 -0400 Subject: [PATCH 35/97] making classes optional --- sklearn/plot/_confusion_matrix.py | 17 +++++++++++++++-- sklearn/plot/tests/test_heatmap.py | 12 ++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 803ebbdfdc1d5..e06af3c9c0d43 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -1,9 +1,10 @@ import numpy as np from sklearn.metrics import confusion_matrix from sklearn.plot import plot_heatmap +from sklearn.utils.multiclass import unique_labels -def plot_confusion_matrix(y_true, y_pred, classes, sample_weight=None, +def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, normalize=False, xlabel="Predicted Label", ylabel="True Label", title='Confusion matrix', cmap=None, vmin=None, @@ -21,8 +22,10 @@ def plot_confusion_matrix(y_true, y_pred, classes, sample_weight=None, y_pred : array, shape = [n_samples] Estimated targets as returned by a classifier. - classes : list of strings + classes : list of strings, optional (default=None) The list of classes represented in the two-dimensional input array. + If not passed in function call, the classes will be infered from + y_true and y_pred sample_weight : array-like of shape = [n_samples], optional Sample weights used to calculate the confusion matrix @@ -68,6 +71,16 @@ def plot_confusion_matrix(y_true, y_pred, classes, sample_weight=None, import matplotlib.pyplot as plt + unique_y = unique_labels(y_true, y_pred) + + if classes is None: + classes = unique_y + else: + if not set(classes).issuperset(set(unique_y)): + raise ValueError("`classes=%s` are not a superset of the unique", + "values of y_true and y_pred which are %s" % + (classes, unique_y)) + values = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) if normalize: diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index 51bf5a4873353..870263f421085 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -6,6 +6,7 @@ from sklearn.svm import SVC from sklearn.utils.testing import SkipTest from sklearn.utils.testing import assert_raises +from sklearn.utils.testing import assert_raise_message import numpy as np from numpy.random import (RandomState, randint) @@ -54,6 +55,17 @@ def test_confusion_matrix(): classes=["dummay1", "dummy2"], cmap="Paired", ax=plt.gca()) + # plot without passing classes explicitly + plot_confusion_matrix(array1, array2, + cmap="Paired", ax=plt.gca()) + + # y having different value than classes should raise error + expected_msg = ("`classes=[1,2]` are not a superset of the unique", + "values of y_true and y_pred which are [1,2,3]") + assert_raise_message(ValueError, expected_msg, + plot_confusion_matrix, array1, array2, + classes=[1, 2], ax=plt.gca()) + plt.draw() plt.close() From f26a1a519f5880527174f7a4e7fae24cc7868847 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Thu, 27 Jul 2017 12:38:42 -0400 Subject: [PATCH 36/97] some fixes --- sklearn/plot/_confusion_matrix.py | 24 +++--- sklearn/plot/_gridsearch_results.py | 109 ++++++++++++++++------------ sklearn/plot/_heatmap.py | 2 +- sklearn/plot/tests/test_heatmap.py | 5 +- 4 files changed, 79 insertions(+), 61 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index e06af3c9c0d43..42fcf7f789172 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -23,9 +23,9 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, Estimated targets as returned by a classifier. classes : list of strings, optional (default=None) - The list of classes represented in the two-dimensional input array. - If not passed in function call, the classes will be infered from - y_true and y_pred + The list of names of classes represented in the two-dimensional input + array. If not passed in function call, the classes will be infered + from y_true and y_pred sample_weight : array-like of shape = [n_samples], optional Sample weights used to calculate the confusion matrix @@ -76,10 +76,11 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, if classes is None: classes = unique_y else: - if not set(classes).issuperset(set(unique_y)): - raise ValueError("`classes=%s` are not a superset of the unique", - "values of y_true and y_pred which are %s" % - (classes, unique_y)) + if len(classes) != len(unique_y): + raise ValueError("y_true and y_pred contain %d unique classes," + "which is not the same as %d" + "classes found in `classes=%s` paramter" % + (len(classes), len(unique_y), unique_y)) values = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) @@ -88,8 +89,11 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, fmt = fmt if normalize else '{:d}' - plot_heatmap(values, xticklabels=classes, yticklabels=classes, cmap=cmap, - xlabel=xlabel, ylabel=ylabel, vmin=vmin, vmax=vmax, ax=ax, - fmt=fmt, xtickrotation=xtickrotation, norm=norm) + img = plot_heatmap(values, xticklabels=classes, yticklabels=classes, + cmap=cmap, xlabel=xlabel, ylabel=ylabel, vmin=vmin, + vmax=vmax, ax=ax, fmt=fmt, xtickrotation=xtickrotation, + norm=norm) plt.title(title) + + return img diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 587ff2ae81106..56cf657f0f1cf 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -2,6 +2,58 @@ from sklearn.plot import plot_heatmap +def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title): + import matplotlib.pyplot as plt + + param = params[0] + param_range = sorted(cv_results['param_%s' % param]) + train_scores_mean = cv_results['mean_train_score'] + train_scores_std = cv_results['std_train_score'] + test_scores_mean = cv_results['mean_test_score'] + test_scores_std = cv_results['std_test_score'] + + lw = 2 + plt.semilogx(param_range, train_scores_mean, + label="Training score", + color="darkorange", lw=lw) + plt.fill_between(param_range, train_scores_mean - train_scores_std, + train_scores_mean + train_scores_std, alpha=0.2, + color="darkorange", lw=lw) + + img = plt.semilogx(param_range, test_scores_mean, + label="Cross-validation score", + color="navy", lw=lw) + plt.fill_between(param_range, test_scores_mean - test_scores_std, + test_scores_mean + test_scores_std, alpha=0.2, + color="navy", lw=lw) + + plt.xlabel(param) + plt.legend() + ylabel = "Score" if ylabel is None else ylabel + plt.ylabel(ylabel) + plt.title(title) + plt.show() + return img + + +def _plot_2D_results(cv_results, params, metric, ax, xlabel, ylabel, + title, norm): + import matplotlib.pyplot as plt + + parameter1_values = np.unique(cv_results['param_%s' % params[0]]) + parameter2_values = np.unique(cv_results['param_%s' % params[1]]) + + scores = cv_results[metric].reshape(len(parameter1_values), + len(parameter2_values)) + + img = plot_heatmap(scores, cmap=plt.cm.hot, xlabel=xlabel, + ylabel=ylabel, xticklabels=parameter1_values, + yticklabels=parameter2_values, ax=ax, norm=norm) + plt.title(title) + plt.show() + return img + + def plot_gridsearch_results(cv_results, metric='mean_test_score', xlabel=None, ylabel=None, title='Grid Search Results', cmap=None, @@ -54,63 +106,24 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', norm : matplotlib normalizer Normalizer passed to pcolormesh function from matplotlib used to - generate the heatmap. + generate the heatmap. This is ignored if only 1 parameter is used in + grid search. """ - import matplotlib.pyplot as plt - params = sorted(cv_results['params'][0].keys()) nparams = len(params) + xlabel = params[0] if xlabel is None else xlabel + ylabel = params[1] if ylabel is None else ylabel if nparams == 1: - param = params[0] - param_range = sorted(cv_results['param_%s' % param]) - train_scores_mean = cv_results['mean_train_score'] - train_scores_std = cv_results['std_train_score'] - test_scores_mean = cv_results['mean_test_score'] - test_scores_std = cv_results['std_test_score'] - - lw = 2 - plt.semilogx(param_range, train_scores_mean, - label="Training score", - color="darkorange", lw=lw) - plt.fill_between(param_range, train_scores_mean - train_scores_std, - train_scores_mean + train_scores_std, alpha=0.2, - color="darkorange", lw=lw) - - plt.semilogx(param_range, test_scores_mean, - label="Cross-validation score", - color="navy", lw=lw) - plt.fill_between(param_range, test_scores_mean - test_scores_std, - test_scores_mean + test_scores_std, alpha=0.2, - color="navy", lw=lw) - - plt.title(title) - plt.xlabel(param) - plt.legend() - ylabel = "Score" if ylabel is None else ylabel - plt.ylabel(ylabel) - plt.show() + img = _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title) elif nparams == 2: - parameter1_values = np.unique(cv_results['param_%s' % params[0]]) - parameter2_values = np.unique(cv_results['param_%s' % params[1]]) - - scores = cv_results[metric].reshape(len(parameter1_values), - len(parameter2_values)) - - xlabel = params[0] if xlabel is None else xlabel - ylabel = params[1] if ylabel is None else ylabel - - plot_heatmap(scores, cmap=plt.cm.hot, xlabel=xlabel, ylabel=ylabel, - xticklabels=parameter1_values, - yticklabels=parameter2_values, - ax=ax, norm=norm) - - plt.title(title) - plt.show() + img = _plot_2D_results(cv_results, params, metric, ax, xlabel, + ylabel, title, norm) else: raise ValueError('Plot function supports upto 2 parameters in grid' 'search, got {0}.'.format(nparams)) - pass + + return img diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index cfa0645c86e18..8b0c51d2cf5b9 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -90,4 +90,4 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, # not anti-diagonal matrix ax.invert_yaxis() - return ax + return img diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index 870263f421085..d135591f285d6 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -60,8 +60,9 @@ def test_confusion_matrix(): cmap="Paired", ax=plt.gca()) # y having different value than classes should raise error - expected_msg = ("`classes=[1,2]` are not a superset of the unique", - "values of y_true and y_pred which are [1,2,3]") + expected_msg = ("y_true and y_pred contain 3 unique classes, which is" + "not the same as 2 classes found in `classes=[1,2]`" + "paramter") assert_raise_message(ValueError, expected_msg, plot_confusion_matrix, array1, array2, classes=[1, 2], ax=plt.gca()) From 7d202ad578df56c20df33e3e08b6f18e25b47338 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Thu, 27 Jul 2017 13:31:50 -0400 Subject: [PATCH 37/97] fixed 1d case of grid_search_results --- sklearn/plot/_confusion_matrix.py | 26 +++++----- sklearn/plot/_gridsearch_results.py | 74 +++++++++++++++++------------ sklearn/plot/tests/test_heatmap.py | 6 ++- 3 files changed, 60 insertions(+), 46 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 42fcf7f789172..60df2fec50dc0 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -27,44 +27,44 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, array. If not passed in function call, the classes will be infered from y_true and y_pred - sample_weight : array-like of shape = [n_samples], optional + sample_weight : array-like of shape = [n_samples], optional (default=None) Sample weights used to calculate the confusion matrix - normalize : boolean, default=False + normalize : boolean, optional (default=False) If True, the confusion matrix will be normalized by row. - xlabel : string, default="Predicted Label" + xlabel : string, optional (default="Predicted Label") Label for the x-axis. - ylabel : string, default="True Label" + ylabel : string, optional (default="True Label") Label for the y-axis. - title : string, default="Confusion matrix" + title : string, optional (default="Confusion matrix") Title for the heatmap. - cmap : string or colormap - Matpotlib colormap to use. + cmap : string or colormap, optional (default=None) + Matpotlib colormap to use. If None, plt.cm.hot will be used. - vmin : int, float or None + vmin : int, float or None, optional (default=None) Minimum clipping value. This argument will be passed on to the pcolormesh function from matplotlib used to generate the heatmap. - vmax : int, float or None + vmax : int, float or None, optional (default=None) Maximum clipping value. This argument will be passed on to the pcolormesh function from matplotlib used to generate the heatmap. - ax : axes object or None + ax : axes object or None, optional (default=None) Matplotlib axes object to plot into. If None, the current axes are used. - fmt : string, default="{:.2f}" + fmt : string, optional (default="{:.2f}") Format string to convert value to text. This will be ignored if normalize argument is False. - xtickrotation : float, default=45 + xtickrotation : float, optional (default=45) Rotation of the xticklabels. - norm : matplotlib normalizer + norm : matplotlib normalizer, optional (default=None) Normalizer passed to pcolormesh function from matplotlib used to generate the heatmap. """ diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 56cf657f0f1cf..f27c0d53d1925 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -2,7 +2,8 @@ from sklearn.plot import plot_heatmap -def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title): +def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, + fmt, xtickrotation): import matplotlib.pyplot as plt param = params[0] @@ -13,20 +14,23 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title): test_scores_std = cv_results['std_test_score'] lw = 2 - plt.semilogx(param_range, train_scores_mean, - label="Training score", - color="darkorange", lw=lw) - plt.fill_between(param_range, train_scores_mean - train_scores_std, + x_vales = range(len(param_range)) + plt.plot(x_vales, train_scores_mean, + label="Training score", + color="darkorange", lw=lw) + plt.fill_between(x_vales, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.2, color="darkorange", lw=lw) - img = plt.semilogx(param_range, test_scores_mean, - label="Cross-validation score", - color="navy", lw=lw) - plt.fill_between(param_range, test_scores_mean - test_scores_std, + img = plt.plot(x_vales, test_scores_mean, + label="Cross-validation score", + color="navy", lw=lw) + plt.fill_between(x_vales, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.2, color="navy", lw=lw) - + plt.xticks(x_vales, [fmt.format(x) for x in param_range], + rotation=xtickrotation) + xlabel = params[0] if xlabel is None else xlabel plt.xlabel(param) plt.legend() ylabel = "Score" if ylabel is None else ylabel @@ -36,8 +40,9 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title): return img -def _plot_2D_results(cv_results, params, metric, ax, xlabel, ylabel, - title, norm): +def _plot_2D_results(cv_results, params, metric, ax, xlabel, + ylabel, title, cmap, vmin, vmax, fmt, + xtickrotation, norm): import matplotlib.pyplot as plt parameter1_values = np.unique(cv_results['param_%s' % params[0]]) @@ -46,9 +51,16 @@ def _plot_2D_results(cv_results, params, metric, ax, xlabel, ylabel, scores = cv_results[metric].reshape(len(parameter1_values), len(parameter2_values)) - img = plot_heatmap(scores, cmap=plt.cm.hot, xlabel=xlabel, - ylabel=ylabel, xticklabels=parameter1_values, - yticklabels=parameter2_values, ax=ax, norm=norm) + xlabel = params[0] if xlabel is None else xlabel + ylabel = params[1] if ylabel is None else ylabel + + cmap = cmap if cmap is not None else plt.cm.hot + + img = plot_heatmap(scores, xlabel=xlabel, ylabel=ylabel, + xticklabels=parameter1_values, + yticklabels=parameter2_values, cmap=cmap, + vmin=vmin, vmax=vmax, fmt=fmt, ax=ax, + xtickrotation=xtickrotation, norm=norm) plt.title(title) plt.show() return img @@ -68,43 +80,43 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', cv_results : dict of numpy (masked) ndarrays The cv_results_ attribute of the GridSearchCV object. - xlabel : string, default=None + xlabel : string, optional (default=None) Label for the x-axis. If None, the first key of the param_grid will be used as the xlabel. - ylabel : string, default=None + ylabel : string, optional (default=None) Label for the y-axis. If None, the second key of the param_grid will be used as the ylabel. - metric : string, default="mean_test_score" + metric : string, optional (default="mean_test_score") The metric from the GridSearchCV results to display. This is ignored if only 1 parameter is used in grid search. - title : string, default="Grid Search Results" + title : string, optional (default="Grid Search Results") Title for the heatmap. - cmap : string or colormap - Matpotlib colormap to use. + cmap : string or colormap, optional (default=None) + Matpotlib colormap to use. If None, plt.cm.hot will be used. - vmin : int, float or None + vmin : int, float or None, optional (default=None) Minimum clipping value. This argument will be passed on to the pcolormesh function from matplotlib used to generate the heatmap. - vmax : int, float or None + vmax : int, float or None, optional (default=None) Maximum clipping value. This argument will be passed on to the pcolormesh function from matplotlib used to generate the heatmap. - ax : axes object or None + ax : axes object or None, optional (default=None) Matplotlib axes object to plot into. If None, the current axes are used. - fmt : string, default="{:.2f}" + fmt : string, optional (default="{:.2f}") Format string to convert value to text. - xtickrotation : float, default=45 + xtickrotation : float, optional (default=45) Rotation of the xticklabels. - norm : matplotlib normalizer + norm : matplotlib normalizer, optional (default=None) Normalizer passed to pcolormesh function from matplotlib used to generate the heatmap. This is ignored if only 1 parameter is used in grid search. @@ -112,15 +124,15 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', params = sorted(cv_results['params'][0].keys()) nparams = len(params) - xlabel = params[0] if xlabel is None else xlabel - ylabel = params[1] if ylabel is None else ylabel if nparams == 1: - img = _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title) + img = _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, + fmt, xtickrotation) elif nparams == 2: img = _plot_2D_results(cv_results, params, metric, ax, xlabel, - ylabel, title, norm) + ylabel, title, cmap, vmin, vmax, fmt, + xtickrotation, norm) else: raise ValueError('Plot function supports upto 2 parameters in grid' diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index d135591f285d6..1a89536c17202 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -5,7 +5,6 @@ from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC from sklearn.utils.testing import SkipTest -from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message import numpy as np from numpy.random import (RandomState, @@ -147,6 +146,9 @@ def test_gridsearch_results_3d(): with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): plt.figure() - assert_raises(ValueError, plot_gridsearch_results, grid.cv_results_) + expected_msg = ('Plot function supports upto 2 parameters in grid' + 'search, got 3.') + assert_raise_message(ValueError, expected_msg, + plot_gridsearch_results, grid.cv_results_) plt.draw() plt.close() From a5602d470623ae0a84516bd18748fe3365fb84b9 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Thu, 27 Jul 2017 14:27:21 -0400 Subject: [PATCH 38/97] fixed confusion matrix test --- sklearn/plot/_confusion_matrix.py | 8 ++++---- sklearn/plot/_gridsearch_results.py | 2 +- sklearn/plot/tests/test_heatmap.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 60df2fec50dc0..af0ffeda548b8 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -77,10 +77,10 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, classes = unique_y else: if len(classes) != len(unique_y): - raise ValueError("y_true and y_pred contain %d unique classes," - "which is not the same as %d" - "classes found in `classes=%s` paramter" % - (len(classes), len(unique_y), unique_y)) + raise ValueError("y_true and y_pred contain %d unique classes, " + "which is not the same as %d " + "classes found in `classes=%s` parameter" % + (len(unique_y), len(classes), classes)) values = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index f27c0d53d1925..1c65ae49d8404 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -136,6 +136,6 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', else: raise ValueError('Plot function supports upto 2 parameters in grid' - 'search, got {0}.'.format(nparams)) + 'search, got %d.' % nparams) return img diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index 1a89536c17202..a50c20f384113 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -59,12 +59,12 @@ def test_confusion_matrix(): cmap="Paired", ax=plt.gca()) # y having different value than classes should raise error - expected_msg = ("y_true and y_pred contain 3 unique classes, which is" - "not the same as 2 classes found in `classes=[1,2]`" - "paramter") + expected_msg = ("y_true and y_pred contain 2 unique classes, which is" + " not the same as 3 classes found in " + "`classes=[1, 2, 3]` parameter") assert_raise_message(ValueError, expected_msg, plot_confusion_matrix, array1, array2, - classes=[1, 2], ax=plt.gca()) + classes=[1, 2, 3], ax=plt.gca()) plt.draw() plt.close() From 3192dfdde185f71e09a49b33298ecd1d296e874d Mon Sep 17 00:00:00 2001 From: aarshayj Date: Thu, 27 Jul 2017 14:54:53 -0400 Subject: [PATCH 39/97] working on axes not plt --- sklearn/plot/_gridsearch_results.py | 42 ++++++++++++++++------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 1c65ae49d8404..e98bed94e2e23 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -5,6 +5,8 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, fmt, xtickrotation): import matplotlib.pyplot as plt + if ax is None: + ax = plt.gca() param = params[0] param_range = sorted(cv_results['param_%s' % param]) @@ -15,27 +17,29 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, lw = 2 x_vales = range(len(param_range)) - plt.plot(x_vales, train_scores_mean, - label="Training score", - color="darkorange", lw=lw) - plt.fill_between(x_vales, train_scores_mean - train_scores_std, - train_scores_mean + train_scores_std, alpha=0.2, - color="darkorange", lw=lw) - - img = plt.plot(x_vales, test_scores_mean, - label="Cross-validation score", - color="navy", lw=lw) - plt.fill_between(x_vales, test_scores_mean - test_scores_std, - test_scores_mean + test_scores_std, alpha=0.2, - color="navy", lw=lw) - plt.xticks(x_vales, [fmt.format(x) for x in param_range], - rotation=xtickrotation) + ax.plot(x_vales, train_scores_mean, + label="Training score", + color="darkorange", lw=lw) + ax.fill_between(x_vales, train_scores_mean - train_scores_std, + train_scores_mean + train_scores_std, alpha=0.2, + color="darkorange", lw=lw) + + img = ax.plot(x_vales, test_scores_mean, + label="Cross-validation score", + color="navy", lw=lw) + ax.fill_between(x_vales, test_scores_mean - test_scores_std, + test_scores_mean + test_scores_std, alpha=0.2, + color="navy", lw=lw) + ax.set_xticks(x_vales) + ax.set_xticklabels([fmt.format(x) for x in param_range], + rotation=xtickrotation) + xlabel = params[0] if xlabel is None else xlabel - plt.xlabel(param) - plt.legend() + ax.set_xlabel(param) + ax.legend() ylabel = "Score" if ylabel is None else ylabel - plt.ylabel(ylabel) - plt.title(title) + ax.set_ylabel(ylabel) + ax.set_title(title) plt.show() return img From 37c96306eb632674082f2ea1108904f7a2eebbcf Mon Sep 17 00:00:00 2001 From: aarshayj Date: Fri, 28 Jul 2017 15:15:29 -0400 Subject: [PATCH 40/97] adding title to plot_heatmap, removing plt.show from within API --- examples/model_selection/plot_confusion_matrix.py | 2 +- sklearn/plot/_confusion_matrix.py | 10 +++------- sklearn/plot/_gridsearch_results.py | 6 ++---- sklearn/plot/_heatmap.py | 12 ++++++++++-- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py index 29853b9412681..804d4ba6d9354 100644 --- a/examples/model_selection/plot_confusion_matrix.py +++ b/examples/model_selection/plot_confusion_matrix.py @@ -56,7 +56,7 @@ # Plot normalized confusion matrix plt.figure() plot_confusion_matrix(y_test, y_pred, classes=class_names, normalize=True, - title='Normalized confusion matrix', + title='Confusion matrix, with normalization', cmap=plt.cm.Blues) plt.show() diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index af0ffeda548b8..1c2c8f9a22bd6 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -69,8 +69,6 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, generate the heatmap. """ - import matplotlib.pyplot as plt - unique_y = unique_labels(y_true, y_pred) if classes is None: @@ -90,10 +88,8 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, fmt = fmt if normalize else '{:d}' img = plot_heatmap(values, xticklabels=classes, yticklabels=classes, - cmap=cmap, xlabel=xlabel, ylabel=ylabel, vmin=vmin, - vmax=vmax, ax=ax, fmt=fmt, xtickrotation=xtickrotation, - norm=norm) - - plt.title(title) + cmap=cmap, xlabel=xlabel, ylabel=ylabel, title=title, + vmin=vmin, vmax=vmax, ax=ax, fmt=fmt, + xtickrotation=xtickrotation, norm=norm) return img diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index e98bed94e2e23..0d3952cac1999 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -40,7 +40,6 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, ylabel = "Score" if ylabel is None else ylabel ax.set_ylabel(ylabel) ax.set_title(title) - plt.show() return img @@ -62,11 +61,10 @@ def _plot_2D_results(cv_results, params, metric, ax, xlabel, img = plot_heatmap(scores, xlabel=xlabel, ylabel=ylabel, xticklabels=parameter1_values, - yticklabels=parameter2_values, cmap=cmap, + yticklabels=parameter2_values, + title=title, cmap=cmap, vmin=vmin, vmax=vmax, fmt=fmt, ax=ax, xtickrotation=xtickrotation, norm=norm) - plt.title(title) - plt.show() return img diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index 8b0c51d2cf5b9..989c170cacd53 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -2,8 +2,9 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, - yticklabels=None, cmap=None, vmin=None, vmax=None, ax=None, - fmt="{:.2f}", xtickrotation=45, norm=None): + yticklabels=None, title=None, cmap=None, vmin=None, + vmax=None, ax=None, fmt="{:.2f}", xtickrotation=45, + norm=None): """Plot a matrix as heatmap with explicit numbers. Parameters @@ -23,6 +24,9 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, yticklabels : list of string or None, default=None Tick labels for the y-axis + title : string or None, default=None + Title of the chart + cmap : string or colormap Matpotlib colormap to use. @@ -90,4 +94,8 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, # not anti-diagonal matrix ax.invert_yaxis() + # set title if not none: + if title is not None: + ax.set_title(title) + return img From d2a91fc50518e7a0c3fc9c49bf70101ea4e371b2 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Fri, 28 Jul 2017 15:25:21 -0400 Subject: [PATCH 41/97] added section to validation curve example --- .../model_selection/plot_validation_curve.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py index ed74a41ff100b..ae335b3cf89a9 100644 --- a/examples/model_selection/plot_validation_curve.py +++ b/examples/model_selection/plot_validation_curve.py @@ -49,3 +49,31 @@ color="navy", lw=lw) plt.legend(loc="best") plt.show() + +##################################################################### +""" +The same plot can also be generated using a combination of GridSearchCV and + plotting module of scikit-learn. +""" + +print(__doc__) + +import matplotlib.pyplot as plt +import numpy as np + +from sklearn.datasets import load_digits +from sklearn.svm import SVC +from sklearn.model_selection import GridSearchCV +from sklearn.plot import plot_gridsearch_results + +digits = load_digits() +X, y = digits.data, digits.target + +param_grid = {'gamma': np.logspace(-6, -1, 5)} +gs = GridSearchCV(SVC(), + param_grid=param_grid, + cv=10, scoring="accuracy") + +gs.fit(X, y) +plot_gridsearch_results(gs.cv_results_) +plt.show() From f87f6430970a03f1026fa56640d7e048b07b7c24 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Fri, 28 Jul 2017 17:08:24 -0400 Subject: [PATCH 42/97] sphinx syntax fix --- examples/model_selection/plot_validation_curve.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py index ae335b3cf89a9..c345b77cd3176 100644 --- a/examples/model_selection/plot_validation_curve.py +++ b/examples/model_selection/plot_validation_curve.py @@ -51,10 +51,8 @@ plt.show() ##################################################################### -""" -The same plot can also be generated using a combination of GridSearchCV and - plotting module of scikit-learn. -""" +# The same plot can also be generated using a combination of GridSearchCV and +# plotting module of scikit-learn. print(__doc__) From ba49b5c857f2a91aa4ea4b14f7f70a141e6b39f0 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 1 Aug 2017 11:02:24 -0400 Subject: [PATCH 43/97] matplotlib new figure creation modified --- sklearn/plot/_gridsearch_results.py | 4 +++- sklearn/plot/_heatmap.py | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 0d3952cac1999..6d51402455bc2 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -6,7 +6,8 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, fmt, xtickrotation): import matplotlib.pyplot as plt if ax is None: - ax = plt.gca() + fig = plt.figure() + ax = fig.add_subplot(111) param = params[0] param_range = sorted(cv_results['param_%s' % param]) @@ -40,6 +41,7 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, ylabel = "Score" if ylabel is None else ylabel ax.set_ylabel(ylabel) ax.set_title(title) + plt.draw() return img diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index 989c170cacd53..020e1394c1f6f 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -52,7 +52,8 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, import matplotlib.pyplot as plt if ax is None: - ax = plt.gca() + fig = plt.figure() + ax = fig.add_subplot(111) img = ax.pcolormesh(values, cmap=cmap, vmin=vmin, vmax=vmax, norm=norm) # this will allow us to access the pixel values: @@ -98,4 +99,6 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, if title is not None: ax.set_title(title) + plt.draw() + return img From 3c74c7fbc338c0f91fe8d33116c7ea821e3e992d Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 1 Aug 2017 12:51:23 -0400 Subject: [PATCH 44/97] define axis closer to public layer --- sklearn/plot/_confusion_matrix.py | 5 +++++ sklearn/plot/_gridsearch_results.py | 8 +++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 1c2c8f9a22bd6..2ca9e10ac44b4 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -68,6 +68,7 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, Normalizer passed to pcolormesh function from matplotlib used to generate the heatmap. """ + import matplotlib.pyplot as plt unique_y = unique_labels(y_true, y_pred) @@ -87,6 +88,10 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, fmt = fmt if normalize else '{:d}' + if ax is None: + fig = plt.figure() + ax = fig.add_subplot(111) + img = plot_heatmap(values, xticklabels=classes, yticklabels=classes, cmap=cmap, xlabel=xlabel, ylabel=ylabel, title=title, vmin=vmin, vmax=vmax, ax=ax, fmt=fmt, diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 6d51402455bc2..c1d1f01031b53 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -5,9 +5,6 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, fmt, xtickrotation): import matplotlib.pyplot as plt - if ax is None: - fig = plt.figure() - ax = fig.add_subplot(111) param = params[0] param_range = sorted(cv_results['param_%s' % param]) @@ -125,10 +122,15 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', generate the heatmap. This is ignored if only 1 parameter is used in grid search. """ + import matplotlib.pyplot as plt params = sorted(cv_results['params'][0].keys()) nparams = len(params) + if ax is None: + fig = plt.figure() + ax = fig.add_subplot(111) + if nparams == 1: img = _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, fmt, xtickrotation) From d7c0a305ef298dacc6cf289862c9eea6b7e64115 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Thu, 3 Aug 2017 11:19:00 -0400 Subject: [PATCH 45/97] removed plt.draw() --- sklearn/plot/_gridsearch_results.py | 3 --- sklearn/plot/_heatmap.py | 2 -- 2 files changed, 5 deletions(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index c1d1f01031b53..f5d74c834143f 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -4,8 +4,6 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, fmt, xtickrotation): - import matplotlib.pyplot as plt - param = params[0] param_range = sorted(cv_results['param_%s' % param]) train_scores_mean = cv_results['mean_train_score'] @@ -38,7 +36,6 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, ylabel = "Score" if ylabel is None else ylabel ax.set_ylabel(ylabel) ax.set_title(title) - plt.draw() return img diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index 020e1394c1f6f..d098bd82cc927 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -99,6 +99,4 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, if title is not None: ax.set_title(title) - plt.draw() - return img From 432a524bab90f28d677b8a94ae522c788b660948 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sat, 12 Aug 2017 19:03:04 -0400 Subject: [PATCH 46/97] docstring split lines --- sklearn/plot/_confusion_matrix.py | 8 +++++--- sklearn/plot/_gridsearch_results.py | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 2ca9e10ac44b4..aa205db564706 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -10,9 +10,11 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, title='Confusion matrix', cmap=None, vmin=None, vmax=None, ax=None, fmt="{:.2f}", xtickrotation=45, norm=None): - """Plot the confusion matrix as a heatmap. A confusion matrix is computed - using `y_true`, `y_pred` and `sample_weights` arguments. Normalization - can be applied by setting `normalize=True`. + """Plot confusion matrix as a heatmap. + + A confusion matrix is computed using `y_true`, `y_pred` and + `sample_weights` arguments. Normalization can be applied by setting + `normalize=True`. Parameters ---------- diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index f5d74c834143f..7a902903a6ec0 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -69,7 +69,9 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', title='Grid Search Results', cmap=None, vmin=None, vmax=None, ax=None, fmt="{:.2f}", xtickrotation=45, norm=None): - """Plot the grid search results as a line chart for 1D search and heatmap + """Plot grid search results. + + The results are plotted as a line chart for 1D search and as a heatmap for a 2D search. This function will not work if grid-search has more than 2 parameters in the search space. From 226690d4dbbf8c477c6a7b5cd2c12aa5ec74d3fb Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 19 Dec 2016 11:41:27 -0500 Subject: [PATCH 47/97] add plotting module and "plot_heatmap" function --- sklearn/plot/__init__.py | 3 ++ sklearn/plot/_heatmap.py | 73 ++++++++++++++++++++++++++++++ sklearn/plot/tests/__init__.py | 0 sklearn/plot/tests/test_heatmap.py | 24 ++++++++++ sklearn/setup.py | 2 + 5 files changed, 102 insertions(+) create mode 100644 sklearn/plot/__init__.py create mode 100644 sklearn/plot/_heatmap.py create mode 100644 sklearn/plot/tests/__init__.py create mode 100644 sklearn/plot/tests/test_heatmap.py diff --git a/sklearn/plot/__init__.py b/sklearn/plot/__init__.py new file mode 100644 index 0000000000000..7cff0ad0ee943 --- /dev/null +++ b/sklearn/plot/__init__.py @@ -0,0 +1,3 @@ +from ._heatmap import plot_heatmap + +__all__ = ["plot_heatmap"] diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py new file mode 100644 index 0000000000000..92a38d3bc8104 --- /dev/null +++ b/sklearn/plot/_heatmap.py @@ -0,0 +1,73 @@ +import numpy as np + + +def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, + yticklabels=None, cmap=None, vmin=None, vmax=None, ax=None, + fmt="{:.2f}"): + """Plot a matrix as heatmap with explicit numbers. + + Parameters + ---------- + values : ndarray + Two-dimensional array to visualize. + + xlabel : string, default="" + Label for the x-axis. + + ylabel : string, default="" + Label for the y-axis. + + xticklabels : list of string or None, default=None + Tick labels for the x-axis. + + yticklabels : list of string or None, default=None + Tick labels for the y-axis + + cmap : string or colormap + Matpotlib colormap to use. + + vmin : int, float or None + Minimum clipping value. + + vmax : int, float or None + Maximum clipping value. + + ax : axes object or None + Matplotlib axes object to plot into. If None, the current axes are + used. + + fmt : string, default="{:.2f}" + Format string to convert value to text. + """ + import matplotlib.pyplot as plt + if ax is None: + ax = plt.gca() + img = ax.pcolor(values, cmap=cmap, vmin=None, vmax=None) + # this will allow us to access the pixel values: + img.update_scalarmappable() + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + if xticklabels is None: + xticklabels = [""] * values.shape[1] + if yticklabels is None: + yticklabels = [""] * values.shape[0] + + ax.xaxis.set_ticks_position('bottom') + + # +.5 makes the ticks centered on the pixels + ax.set_xticks(np.arange(values.shape[1]) + .5) + ax.set_xticklabels(xticklabels, ha="center") + ax.set_yticks(np.arange(values.shape[0]) + .5) + ax.set_yticklabels(yticklabels, va="center") + ax.set_aspect(1) + + for p, color, value in zip(img.get_paths(), img.get_facecolors(), + img.get_array()): + x, y = p.vertices[:-2, :].mean(0) + if np.mean(color[:3]) > 0.5: + # pixel bright: use black for number + c = 'k' + else: + c = 'w' + ax.text(x, y, fmt.format(value), color=c, ha="center", va="center") + return ax diff --git a/sklearn/plot/tests/__init__.py b/sklearn/plot/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py new file mode 100644 index 0000000000000..fb27bdf0ae9be --- /dev/null +++ b/sklearn/plot/tests/test_heatmap.py @@ -0,0 +1,24 @@ +from sklearn.plot import plot_heatmap +from sklearn.utils.testing import SkipTest +import numpy as np + + +def test_heatmap(): + try: + import matplotlib + except ImportError: + raise SkipTest("Not testing plot_heatmap, matplotlib not installed.") + + import matplotlib.pyplot as plt + + with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): + plt.figure() + rng = np.random.RandomState(0) + X = rng.normal(size=(10, 5)) + # use mixture of default values and keyword args + plot_heatmap(X, ylabel="y-axis", + xticklabels=["a", "b", "c", "d", "efgh"], + cmap="Paired", ax=plt.gca()) + + plt.draw() + plt.close() diff --git a/sklearn/setup.py b/sklearn/setup.py index 8adbbd9d49132..5bae68530cedc 100644 --- a/sklearn/setup.py +++ b/sklearn/setup.py @@ -36,6 +36,8 @@ def configuration(parent_package='', top_path=None): config.add_subpackage('model_selection/tests') config.add_subpackage('neural_network') config.add_subpackage('neural_network/tests') + config.add_subpackage('plot') + config.add_subpackage('plot/tests') config.add_subpackage('preprocessing') config.add_subpackage('preprocessing/tests') config.add_subpackage('semi_supervised') From b03874ca44255a98d118bb029ba421c6db35b9d6 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 19 Dec 2016 11:43:56 -0500 Subject: [PATCH 48/97] add plotting module to the API docs --- doc/modules/classes.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index 5977e2f01a9b3..583c63a37343b 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -1395,6 +1395,25 @@ Low-level methods utils.validation.column_or_1d utils.validation.has_fit_parameter +:mod:`sklearn.plot`: Plotting functions +======================================= + +.. automodule:: sklearn.plot + :no-members: + :no-inherited-members: + +This module is experimental. Use at your own risk. +Use of this module requires the matplotlib library. + +.. currentmodule:: sklearn.plot + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + plot_heatmap + + Recently deprecated =================== From c6919612bc667f1820200f2fae365638391817c0 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 19 Dec 2016 11:50:21 -0500 Subject: [PATCH 49/97] simplify plot_confusion_matrix example --- .../model_selection/plot_confusion_matrix.py | 28 ++++--------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py index 4b7c360988071..5625fa6d4b59f 100644 --- a/examples/model_selection/plot_confusion_matrix.py +++ b/examples/model_selection/plot_confusion_matrix.py @@ -24,15 +24,13 @@ """ -print(__doc__) - -import itertools import numpy as np import matplotlib.pyplot as plt from sklearn import svm, datasets from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix +from sklearn.plot import plot_heatmap # import some data to play with iris = datasets.load_iris() @@ -59,29 +57,15 @@ def plot_confusion_matrix(cm, classes, """ if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] - print("Normalized confusion matrix") - else: - print('Confusion matrix, without normalization') + print(title) print(cm) - plt.imshow(cm, interpolation='nearest', cmap=cmap) + fmt = '{:.2f}' if normalize else '{:d}' + plot_heatmap(cm, xticklabels=classes, yticklabels=classes, cmap=cmap, + xlabel="Predicted label", ylabel="True label", fmt=fmt) + plt.title(title) - plt.colorbar() - tick_marks = np.arange(len(classes)) - plt.xticks(tick_marks, classes, rotation=45) - plt.yticks(tick_marks, classes) - - fmt = '.2f' if normalize else 'd' - thresh = cm.max() / 2. - for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): - plt.text(j, i, format(cm[i, j], fmt), - horizontalalignment="center", - color="white" if cm[i, j] > thresh else "black") - - plt.tight_layout() - plt.ylabel('True label') - plt.xlabel('Predicted label') # Compute confusion matrix cnf_matrix = confusion_matrix(y_test, y_pred) From f760d6689e662c77b69d3ef1baaecaad7cf87f6f Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 19 Dec 2016 12:18:08 -0500 Subject: [PATCH 50/97] add normalizer support to heatmap, use heatmap when plotting gridsearch results --- examples/svm/plot_rbf_parameters.py | 26 ++++++++++++-------------- sklearn/plot/_heatmap.py | 18 +++++++++++++----- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py index 3a909b2b422bf..789589d54bcec 100644 --- a/examples/svm/plot_rbf_parameters.py +++ b/examples/svm/plot_rbf_parameters.py @@ -51,11 +51,11 @@ Finally one can also observe that for some intermediate values of ``gamma`` we get equally performing models when ``C`` becomes very large: it is not -necessary to regularize by limiting the number of support vectors. The radius of -the RBF kernel alone acts as a good structural regularizer. In practice though -it might still be interesting to limit the number of support vectors with a -lower value of ``C`` so as to favor models that use less memory and that are -faster to predict. +necessary to regularize by limiting the number of support vectors. The radius +of the RBF kernel alone acts as a good structural regularizer. In practice +though it might still be interesting to limit the number of support vectors +with a lower value of ``C`` so as to favor models that use less memory and that +are faster to predict. We should also note that small differences in scores results from the random splits of the cross-validation procedure. Those spurious variations can be @@ -65,7 +65,6 @@ map. ''' -print(__doc__) import numpy as np import matplotlib.pyplot as plt @@ -76,6 +75,9 @@ from sklearn.datasets import load_iris from sklearn.model_selection import StratifiedShuffleSplit from sklearn.model_selection import GridSearchCV +from sklearn.plot import plot_heatmap + +print(__doc__) # Utility function to move the midpoint of a colormap to be around @@ -184,14 +186,10 @@ def __call__(self, value, clip=None): # interesting range while not brutally collapsing all the low score values to # the same color. -plt.figure(figsize=(8, 6)) +plt.figure(figsize=(10, 10)) plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) -plt.imshow(scores, interpolation='nearest', cmap=plt.cm.hot, - norm=MidpointNormalize(vmin=0.2, midpoint=0.92)) -plt.xlabel('gamma') -plt.ylabel('C') -plt.colorbar() -plt.xticks(np.arange(len(gamma_range)), gamma_range, rotation=45) -plt.yticks(np.arange(len(C_range)), C_range) +plot_heatmap(scores, cmap=plt.cm.hot, xlabel="gamma", ylabel="C", + xticklabels=gamma_range, yticklabels=C_range, + norm=MidpointNormalize(vmin=0.2, midpoint=0.92)) plt.title('Validation accuracy') plt.show() diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index 92a38d3bc8104..74aabfd82bb97 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -3,7 +3,7 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, yticklabels=None, cmap=None, vmin=None, vmax=None, ax=None, - fmt="{:.2f}"): + fmt="{:.2f}", xtickrotation=45, norm=None): """Plot a matrix as heatmap with explicit numbers. Parameters @@ -38,25 +38,33 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, fmt : string, default="{:.2f}" Format string to convert value to text. + + xtickrotation : float, default=45 + Rotation of the xticklabels. + + norm : matplotlib normalizer + Normalizer passed to pcolor """ import matplotlib.pyplot as plt if ax is None: ax = plt.gca() - img = ax.pcolor(values, cmap=cmap, vmin=None, vmax=None) + img = ax.pcolor(values, cmap=cmap, vmin=None, vmax=None, norm=norm) # this will allow us to access the pixel values: img.update_scalarmappable() ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) + + ax.set_xlim(0, values.shape[1]) + ax.set_ylim(0, values.shape[0]) + if xticklabels is None: xticklabels = [""] * values.shape[1] if yticklabels is None: yticklabels = [""] * values.shape[0] - ax.xaxis.set_ticks_position('bottom') - # +.5 makes the ticks centered on the pixels ax.set_xticks(np.arange(values.shape[1]) + .5) - ax.set_xticklabels(xticklabels, ha="center") + ax.set_xticklabels(xticklabels, ha="center", rotation=xtickrotation) ax.set_yticks(np.arange(values.shape[0]) + .5) ax.set_yticklabels(yticklabels, va="center") ax.set_aspect(1) From f04c07992eca066899f7adce5efc00224e9d20a8 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 19 Dec 2016 14:37:42 -0500 Subject: [PATCH 51/97] add plot to __all__ --- sklearn/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/__init__.py b/sklearn/__init__.py index 5f2278d1c8c37..b5e24514248a1 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -143,7 +143,7 @@ def config_context(**new_config): 'mixture', 'model_selection', 'multiclass', 'multioutput', 'naive_bayes', 'neighbors', 'neural_network', 'pipeline', 'preprocessing', 'random_projection', 'semi_supervised', - 'svm', 'tree', 'discriminant_analysis', + 'svm', 'tree', 'discriminant_analysis', 'plot', # Non-modules: 'clone'] From 661a1ff33eccdce0d2b8b2adc485857e010e8c45 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sun, 11 Jun 2017 16:34:13 -0400 Subject: [PATCH 52/97] using pcolormesh + alignment fix --- sklearn/plot/_heatmap.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index 74aabfd82bb97..c6e11b92060e7 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -45,10 +45,11 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, norm : matplotlib normalizer Normalizer passed to pcolor """ + import matplotlib.pyplot as plt if ax is None: ax = plt.gca() - img = ax.pcolor(values, cmap=cmap, vmin=None, vmax=None, norm=norm) + img = ax.pcolormesh(values, cmap=cmap, vmin=None, vmax=None, norm=norm) # this will allow us to access the pixel values: img.update_scalarmappable() ax.set_xlabel(xlabel) @@ -72,6 +73,11 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, for p, color, value in zip(img.get_paths(), img.get_facecolors(), img.get_array()): x, y = p.vertices[:-2, :].mean(0) + + # adjusting x and y for alignment: + x = x - 1./6 + y = y + 1./6 + if np.mean(color[:3]) > 0.5: # pixel bright: use black for number c = 'k' From d28400b67f5b33b78f2505368b4525165f73ee7b Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 00:19:16 -0400 Subject: [PATCH 53/97] added confusion_matrix plot file --- sklearn/plot/_confusion_matrix.py | 68 +++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 sklearn/plot/_confusion_matrix.py diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py new file mode 100644 index 0000000000000..1e77bbdc883fc --- /dev/null +++ b/sklearn/plot/_confusion_matrix.py @@ -0,0 +1,68 @@ +import numpy as np +from sklearn.plot import plot_heatmap + + +def plot_confusion_matrix(values, classes, normalize=True, + xlabel="Predicted Label", ylabel="True Label", + title='Confusion matrix', cmap=None, vmin=None, + vmax=None, ax=None, fmt="{:.2f}", + xtickrotation=45, norm=None): + """Print and plot the confusion matrix. Normalization can be applied by + setting `normalize=True`. + + Parameters + ---------- + values : ndarray + Two-dimensional array to visualize. + + classes : list of strings + The list of classes represented in the two-dimensional input array. + + normalize : boolean, default=True + If True, the confusion matrix will be normalized by row. + + xlabel : string, default="" + Label for the x-axis. + + ylabel : string, default="" + Label for the y-axis. + + cmap : string or colormap + Matpotlib colormap to use. + + vmin : int, float or None + Minimum clipping value. + + vmax : int, float or None + Maximum clipping value. + + ax : axes object or None + Matplotlib axes object to plot into. If None, the current axes are + used. + + fmt : string, default="{:.2f}" + Format string to convert value to text. This will be ignored if + normalize argument is False. + + xtickrotation : float, default=45 + Rotation of the xticklabels. + + norm : matplotlib normalizer + Normalizer passed to pcolor + """ + + import matplotlib.pyplot as plt + + if normalize: + values = values.astype('float') / values.sum(axis=1)[:, np.newaxis] + + print(title) + print(values) + + fmt = fmt if normalize else '{:d}' + + plot_heatmap(values, xticklabels=classes, yticklabels=classes, cmap=cmap, + xlabel=xlabel, ylabel=ylabel, vmin=vmin, vmax=vmax, ax=ax, + fmt=fmt, xtickrotation=xtickrotation, norm=norm) + + plt.title(title) From 434e9ec423ec8cebaaee4f9f8d13d75519dac2a8 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 00:19:48 -0400 Subject: [PATCH 54/97] made vmin and vmax pass through in heatplot plot function --- sklearn/plot/_heatmap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index c6e11b92060e7..fb92a981ba14c 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -49,7 +49,7 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, import matplotlib.pyplot as plt if ax is None: ax = plt.gca() - img = ax.pcolormesh(values, cmap=cmap, vmin=None, vmax=None, norm=norm) + img = ax.pcolormesh(values, cmap=cmap, vmin=vmin, vmax=vmax, norm=norm) # this will allow us to access the pixel values: img.update_scalarmappable() ax.set_xlabel(xlabel) From 46c58b20992a863e28f98ec899c23f86cba21759 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 00:40:59 -0400 Subject: [PATCH 55/97] modified documentation plot_confusion_matrix --- sklearn/plot/_confusion_matrix.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 1e77bbdc883fc..a2ae4b7a630e1 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -2,13 +2,13 @@ from sklearn.plot import plot_heatmap -def plot_confusion_matrix(values, classes, normalize=True, +def plot_confusion_matrix(values, classes, normalize=False, xlabel="Predicted Label", ylabel="True Label", title='Confusion matrix', cmap=None, vmin=None, vmax=None, ax=None, fmt="{:.2f}", xtickrotation=45, norm=None): - """Print and plot the confusion matrix. Normalization can be applied by - setting `normalize=True`. + """Print and plot the confusion matrix as a heatmap. Normalization can be + applied by setting `normalize=True`. Parameters ---------- @@ -18,23 +18,25 @@ def plot_confusion_matrix(values, classes, normalize=True, classes : list of strings The list of classes represented in the two-dimensional input array. - normalize : boolean, default=True + normalize : boolean, default=False If True, the confusion matrix will be normalized by row. - xlabel : string, default="" + xlabel : string, default="Predicted Label" Label for the x-axis. - ylabel : string, default="" + ylabel : string, default="True Label" Label for the y-axis. cmap : string or colormap Matpotlib colormap to use. vmin : int, float or None - Minimum clipping value. + Minimum clipping value. This argument will be passed on to the + pcolormesh function from matplotlib used to generate the heatmap. vmax : int, float or None - Maximum clipping value. + Maximum clipping value. This argument will be passed on to the + pcolormesh function from matplotlib used to generate the heatmap. ax : axes object or None Matplotlib axes object to plot into. If None, the current axes are @@ -48,7 +50,8 @@ def plot_confusion_matrix(values, classes, normalize=True, Rotation of the xticklabels. norm : matplotlib normalizer - Normalizer passed to pcolor + Normalizer passed to pcolormesh function from matplotlib used to + generate the heatmap. """ import matplotlib.pyplot as plt From d815fd33ad4fe7b4fbeac4f6f33972ff206a3010 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 00:42:13 -0400 Subject: [PATCH 56/97] updated __init__.py file to include confusion matrix plot --- sklearn/plot/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/plot/__init__.py b/sklearn/plot/__init__.py index 7cff0ad0ee943..fec45fff52116 100644 --- a/sklearn/plot/__init__.py +++ b/sklearn/plot/__init__.py @@ -1,3 +1,4 @@ from ._heatmap import plot_heatmap +from ._confusion_matrix import plot_confusion_matrix -__all__ = ["plot_heatmap"] +__all__ = ["plot_heatmap", "plot_confusion_matrix"] From c4828e245987f25d77fdf5e789dc116ddea909d2 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 00:43:08 -0400 Subject: [PATCH 57/97] plot confusion matrix example updated to use new function --- .../model_selection/plot_confusion_matrix.py | 29 ++++--------------- 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py index 5625fa6d4b59f..95df220d242a3 100644 --- a/examples/model_selection/plot_confusion_matrix.py +++ b/examples/model_selection/plot_confusion_matrix.py @@ -30,7 +30,7 @@ from sklearn import svm, datasets from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix -from sklearn.plot import plot_heatmap +from sklearn.plot import plot_confusion_matrix # import some data to play with iris = datasets.load_iris() @@ -46,27 +46,6 @@ classifier = svm.SVC(kernel='linear', C=0.01) y_pred = classifier.fit(X_train, y_train).predict(X_test) - -def plot_confusion_matrix(cm, classes, - normalize=False, - title='Confusion matrix', - cmap=plt.cm.Blues): - """ - This function prints and plots the confusion matrix. - Normalization can be applied by setting `normalize=True`. - """ - if normalize: - cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] - - print(title) - print(cm) - - fmt = '{:.2f}' if normalize else '{:d}' - plot_heatmap(cm, xticklabels=classes, yticklabels=classes, cmap=cmap, - xlabel="Predicted label", ylabel="True label", fmt=fmt) - - plt.title(title) - # Compute confusion matrix cnf_matrix = confusion_matrix(y_test, y_pred) np.set_printoptions(precision=2) @@ -74,11 +53,13 @@ def plot_confusion_matrix(cm, classes, # Plot non-normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, - title='Confusion matrix, without normalization') + title='Confusion matrix, without normalization', + cmap=plt.cm.Blues) # Plot normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True, - title='Normalized confusion matrix') + title='Normalized confusion matrix', + cmap=plt.cm.Blues) plt.show() From aa28778c0b053143bbdf5daee0c548d332c002f2 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 01:49:42 -0400 Subject: [PATCH 58/97] make matrix diagonal --- sklearn/plot/_heatmap.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index fb92a981ba14c..cfa0645c86e18 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -50,6 +50,7 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, if ax is None: ax = plt.gca() img = ax.pcolormesh(values, cmap=cmap, vmin=vmin, vmax=vmax, norm=norm) + # this will allow us to access the pixel values: img.update_scalarmappable() ax.set_xlabel(xlabel) @@ -84,4 +85,9 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, else: c = 'w' ax.text(x, y, fmt.format(value), color=c, ha="center", va="center") + + # Invert the y-axis so that the matrix looks like a diagonal matrix and + # not anti-diagonal matrix + ax.invert_yaxis() + return ax From 1f36c0f372e068b15aa812edf2f677ccc63509d3 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 02:42:55 -0400 Subject: [PATCH 59/97] modify documentation --- sklearn/plot/_confusion_matrix.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index a2ae4b7a630e1..36579f68a1ab5 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -27,6 +27,9 @@ def plot_confusion_matrix(values, classes, normalize=False, ylabel : string, default="True Label" Label for the y-axis. + title : string, default="Confusion matrix" + Title for the heatmap. + cmap : string or colormap Matpotlib colormap to use. From 2e5f141e7ea041e4fa03b4959e78b8df48495b57 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 02:43:45 -0400 Subject: [PATCH 60/97] adding grid search results plotting function. --- sklearn/plot/__init__.py | 3 +- sklearn/plot/_gridsearch_results.py | 79 +++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 sklearn/plot/_gridsearch_results.py diff --git a/sklearn/plot/__init__.py b/sklearn/plot/__init__.py index fec45fff52116..c63653af62156 100644 --- a/sklearn/plot/__init__.py +++ b/sklearn/plot/__init__.py @@ -1,4 +1,5 @@ from ._heatmap import plot_heatmap from ._confusion_matrix import plot_confusion_matrix +from ._gridsearch_results import plot_gridsearch_results -__all__ = ["plot_heatmap", "plot_confusion_matrix"] +__all__ = ["plot_heatmap", "plot_confusion_matrix", "plot_gridsearch_results"] diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py new file mode 100644 index 0000000000000..7f671e8932977 --- /dev/null +++ b/sklearn/plot/_gridsearch_results.py @@ -0,0 +1,79 @@ +import numpy as np +from sklearn.plot import plot_heatmap + + +def plot_gridsearch_results(cv_results, param_grid, metric='mean_test_score', + xlabel=None, ylabel=None, + title='Grid Search Results', cmap=None, + vmin=None, vmax=None, ax=None, fmt="{:.2f}", + xtickrotation=45, norm=None): + """Print and plot the confusion matrix as a heatmap. Normalization can be + applied by setting `normalize=True`. + + Parameters + ---------- + cv_results : dict of numpy (masked) ndarrays + The cv_results_ attribute of the GridSearchCV object. + + param_grid : dict + A dictionary with keys as the two parameters searched and values + as a list of their respective values searched. + + xlabel : string, default=None + Label for the x-axis. If None, the first key of the param_grid will + be used as the xlabel. + + ylabel : string, default=None + Label for the y-axis. If None, the second key of the param_grid will + be used as the ylabel. + + metric : string, default="mean_test_score" + The metric from the GridSearchCV results to display. + + title : string, default="Grid Search Results" + Title for the heatmap. + + cmap : string or colormap + Matpotlib colormap to use. + + vmin : int, float or None + Minimum clipping value. This argument will be passed on to the + pcolormesh function from matplotlib used to generate the heatmap. + + vmax : int, float or None + Maximum clipping value. This argument will be passed on to the + pcolormesh function from matplotlib used to generate the heatmap. + + ax : axes object or None + Matplotlib axes object to plot into. If None, the current axes are + used. + + fmt : string, default="{:.2f}" + Format string to convert value to text. + + xtickrotation : float, default=45 + Rotation of the xticklabels. + + norm : matplotlib normalizer + Normalizer passed to pcolormesh function from matplotlib used to + generate the heatmap. + """ + + import matplotlib.pyplot as plt + + parameter1, parameter2 = param_grid.keys() + parameter1_values = param_grid[parameter1] + parameter2_values = param_grid[parameter2] + + scores = cv_results[metric].reshape(len(parameter1_values), + len(parameter2_values)) + + xlabel = parameter1 if xlabel is None else xlabel + ylabel = parameter2 if ylabel is None else ylabel + + plot_heatmap(scores, cmap=plt.cm.hot, xlabel=xlabel, ylabel=ylabel, + xticklabels=parameter1_values, yticklabels=parameter2_values, + ax=ax, norm=norm) + + plt.title(title) + plt.show() From 1cf525d880888e45186be073f9e3a1303367b222 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 02:44:33 -0400 Subject: [PATCH 61/97] modified examples/svm/plot_rbf_parameters.py with new function --- examples/svm/plot_rbf_parameters.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py index 789589d54bcec..2760c2259a112 100644 --- a/examples/svm/plot_rbf_parameters.py +++ b/examples/svm/plot_rbf_parameters.py @@ -75,7 +75,7 @@ from sklearn.datasets import load_iris from sklearn.model_selection import StratifiedShuffleSplit from sklearn.model_selection import GridSearchCV -from sklearn.plot import plot_heatmap +from sklearn.plot import plot_gridsearch_results print(__doc__) @@ -174,9 +174,6 @@ def __call__(self, value, clip=None): plt.yticks(()) plt.axis('tight') -scores = grid.cv_results_['mean_test_score'].reshape(len(C_range), - len(gamma_range)) - # Draw heatmap of the validation accuracy as a function of gamma and C # # The score are encoded as colors with the hot colormap which varies from dark @@ -188,8 +185,7 @@ def __call__(self, value, clip=None): plt.figure(figsize=(10, 10)) plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) -plot_heatmap(scores, cmap=plt.cm.hot, xlabel="gamma", ylabel="C", - xticklabels=gamma_range, yticklabels=C_range, - norm=MidpointNormalize(vmin=0.2, midpoint=0.92)) -plt.title('Validation accuracy') +plot_gridsearch_results(grid.cv_results_, param_grid=param_grid, + title="Validation accuracy", cmap=plt.cm.hot, + norm=MidpointNormalize(vmin=0.2, midpoint=0.92)) plt.show() From 6279ff904112027a7e52ce94e52335976b0f410a Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 17:50:21 -0400 Subject: [PATCH 62/97] removed printing confusion matrix --- sklearn/plot/_confusion_matrix.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 36579f68a1ab5..183fe0a45b033 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -7,7 +7,7 @@ def plot_confusion_matrix(values, classes, normalize=False, title='Confusion matrix', cmap=None, vmin=None, vmax=None, ax=None, fmt="{:.2f}", xtickrotation=45, norm=None): - """Print and plot the confusion matrix as a heatmap. Normalization can be + """Plot the confusion matrix as a heatmap. Normalization can be applied by setting `normalize=True`. Parameters @@ -62,9 +62,6 @@ def plot_confusion_matrix(values, classes, normalize=False, if normalize: values = values.astype('float') / values.sum(axis=1)[:, np.newaxis] - print(title) - print(values) - fmt = fmt if normalize else '{:d}' plot_heatmap(values, xticklabels=classes, yticklabels=classes, cmap=cmap, From e450fe52897df3c931ee5b862f309f2df8c10ec7 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 20 Jun 2017 18:01:50 -0400 Subject: [PATCH 63/97] remove param_grid argument --- sklearn/plot/_gridsearch_results.py | 39 ++++++++++++++++------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 7f671e8932977..fd3919436eaa4 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -2,7 +2,7 @@ from sklearn.plot import plot_heatmap -def plot_gridsearch_results(cv_results, param_grid, metric='mean_test_score', +def plot_gridsearch_results(cv_results, metric='mean_test_score', xlabel=None, ylabel=None, title='Grid Search Results', cmap=None, vmin=None, vmax=None, ax=None, fmt="{:.2f}", @@ -15,10 +15,6 @@ def plot_gridsearch_results(cv_results, param_grid, metric='mean_test_score', cv_results : dict of numpy (masked) ndarrays The cv_results_ attribute of the GridSearchCV object. - param_grid : dict - A dictionary with keys as the two parameters searched and values - as a list of their respective values searched. - xlabel : string, default=None Label for the x-axis. If None, the first key of the param_grid will be used as the xlabel. @@ -61,19 +57,28 @@ def plot_gridsearch_results(cv_results, param_grid, metric='mean_test_score', import matplotlib.pyplot as plt - parameter1, parameter2 = param_grid.keys() - parameter1_values = param_grid[parameter1] - parameter2_values = param_grid[parameter2] + params = sorted(cv_results['params'][0].keys()) + + if len(params) == 1: + # plot a line chart + pass + elif len(params) == 2: + parameter1_values = np.unique(cv_results['param_%s' % params[0]]) + parameter2_values = np.unique(cv_results['param_%s' % params[1]]) - scores = cv_results[metric].reshape(len(parameter1_values), - len(parameter2_values)) + scores = cv_results[metric].reshape(len(parameter1_values), + len(parameter2_values)) - xlabel = parameter1 if xlabel is None else xlabel - ylabel = parameter2 if ylabel is None else ylabel + xlabel = params[0] if xlabel is None else xlabel + ylabel = params[1] if ylabel is None else ylabel - plot_heatmap(scores, cmap=plt.cm.hot, xlabel=xlabel, ylabel=ylabel, - xticklabels=parameter1_values, yticklabels=parameter2_values, - ax=ax, norm=norm) + plot_heatmap(scores, cmap=plt.cm.hot, xlabel=xlabel, ylabel=ylabel, + xticklabels=parameter1_values, + yticklabels=parameter2_values, + ax=ax, norm=norm) - plt.title(title) - plt.show() + plt.title(title) + plt.show() + else: + # print error statement + pass From cf789febe13debf66e7c8bd37df1ae09d46117ef Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 21 Jun 2017 11:12:36 -0400 Subject: [PATCH 64/97] adding cases for nparams 1,2, more --- sklearn/plot/_gridsearch_results.py | 42 ++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index fd3919436eaa4..68ca542a67728 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -24,7 +24,8 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', be used as the ylabel. metric : string, default="mean_test_score" - The metric from the GridSearchCV results to display. + The metric from the GridSearchCV results to display. This is ignored + if only 1 parameter is used in grid search. title : string, default="Grid Search Results" Title for the heatmap. @@ -58,11 +59,38 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', import matplotlib.pyplot as plt params = sorted(cv_results['params'][0].keys()) + nparams = len(params) + + if nparams == 1: + param = params[0] + param_range = sorted(cv_results['param_%s' % param]) + train_scores_mean = cv_results['mean_train_score'] + train_scores_std = cv_results['mean_train_std'] + test_scores_mean = cv_results['mean_test_score'] + test_scores_std = cv_results['mean_test_std'] + + lw = 2 + plt.semilogx(param_range, train_scores_mean, + label="Training score", + color="darkorange", lw=lw) + plt.fill_between(param_range, train_scores_mean - train_scores_std, + train_scores_mean + train_scores_std, alpha=0.2, + color="darkorange", lw=lw) + + plt.semilogx(param_range, test_scores_mean, + label="Cross-validation score", + color="navy", lw=lw) + plt.fill_between(param_range, test_scores_mean - test_scores_std, + test_scores_mean + test_scores_std, alpha=0.2, + color="navy", lw=lw) - if len(params) == 1: - # plot a line chart - pass - elif len(params) == 2: + plt.title(title) + plt.xlabel(param) + ylabel = "Score" if ylabel is None else ylabel + plt.ylabel(ylabel) + plt.show() + + elif nparams == 2: parameter1_values = np.unique(cv_results['param_%s' % params[0]]) parameter2_values = np.unique(cv_results['param_%s' % params[1]]) @@ -79,6 +107,8 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', plt.title(title) plt.show() + else: - # print error statement + raise ValueError('Plot functions supports upto 2 parameters in grid' + 'search, got {0}.'.format(nparams)) pass From f6bdc2b9919ad15669a4e714a6b80eabef2c0a02 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 21 Jun 2017 12:00:56 -0400 Subject: [PATCH 65/97] minor fixes --- sklearn/plot/_gridsearch_results.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 68ca542a67728..2719e38c3a52b 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -65,9 +65,9 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', param = params[0] param_range = sorted(cv_results['param_%s' % param]) train_scores_mean = cv_results['mean_train_score'] - train_scores_std = cv_results['mean_train_std'] + train_scores_std = cv_results['std_train_score'] test_scores_mean = cv_results['mean_test_score'] - test_scores_std = cv_results['mean_test_std'] + test_scores_std = cv_results['std_test_score'] lw = 2 plt.semilogx(param_range, train_scores_mean, @@ -86,6 +86,7 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', plt.title(title) plt.xlabel(param) + plt.legend() ylabel = "Score" if ylabel is None else ylabel plt.ylabel(ylabel) plt.show() From 96622fafc0ddc10647a485102cc8eaefcb1636e3 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 21 Jun 2017 15:38:04 -0400 Subject: [PATCH 66/97] fixed typo --- sklearn/plot/_gridsearch_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 2719e38c3a52b..55ce25bd2f425 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -110,6 +110,6 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', plt.show() else: - raise ValueError('Plot functions supports upto 2 parameters in grid' + raise ValueError('Plot function supports upto 2 parameters in grid' 'search, got {0}.'.format(nparams)) pass From 5adcafe22fd768f66013452bb0aafe575daf8c3f Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 21 Jun 2017 15:38:38 -0400 Subject: [PATCH 67/97] adding tests for confusion matrix and grid search plots --- sklearn/plot/tests/test_heatmap.py | 85 ++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index fb27bdf0ae9be..668f82b2b138f 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -1,5 +1,11 @@ from sklearn.plot import plot_heatmap +from sklearn.plot import plot_confusion_matrix +from sklearn.plot import plot_gridsearch_results +from sklearn.datasets import load_iris +from sklearn.model_selection import GridSearchCV +from sklearn.svm import SVC from sklearn.utils.testing import SkipTest +from sklearn.utils.testing import assert_raises import numpy as np @@ -22,3 +28,82 @@ def test_heatmap(): plt.draw() plt.close() + + +def test_confusion_matrix(): + try: + import matplotlib + except ImportError: + raise SkipTest("Not testing plot_heatmap, matplotlib not installed.") + + import matplotlib.pyplot as plt + + with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): + plt.figure() + cnf_matrix = np.random.randomint(1, 10, size=(2, 2)) + + # use mixture of default values and keyword args + plot_confusion_matrix(cnf_matrix, classes=["dummay1", "dummy2"], + cmap="Paired", ax=plt.gca()) + + plt.draw() + plt.close() + + +def test_gridsearch_results(): + try: + import matplotlib + except ImportError: + raise SkipTest("Not testing plot_heatmap, matplotlib not installed.") + + import matplotlib.pyplot as plt + + iris = load_iris() + X = iris.data + y = iris.target + + # We only keep the first two features in X and sub-sample the dataset to + # keep only 2 classes and make it a binary classification problem. + + X_2d = X[:, :2] + X_2d = X_2d[y > 0] + y_2d = y[y > 0] + y_2d -= 1 + + # Define parameters: + C_range = np.logspace(-2, 10, 2) + gamma_range = np.logspace(-9, 3, 2) + tol_range = [1e-3, 1e-4] + + # Test 1D case: + param_grid = dict(gamma=gamma_range) + grid = GridSearchCV(SVC(), param_grid=param_grid, cv=3) + grid.fit(X, y) + + with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): + plt.figure() + plot_gridsearch_results(grid.cv_results_) + plt.draw() + plt.close() + + # Test 2D case: + param_grid = dict(gamma=gamma_range, C=C_range) + grid = GridSearchCV(SVC(), param_grid=param_grid, cv=3) + grid.fit(X, y) + + with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): + plt.figure() + plot_gridsearch_results(grid.cv_results_) + plt.draw() + plt.close() + + # Test 3D case: + param_grid = dict(gamma=gamma_range, C=C_range, tol=tol_range) + grid = GridSearchCV(SVC(), param_grid=param_grid, cv=3) + grid.fit(X, y) + + with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): + plt.figure() + assert_raises(ValueError, plot_gridsearch_results, grid.cv_results_) + plt.draw() + plt.close() From f4675d725c65dd502b8816bd0d505142c37852eb Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 21 Jun 2017 17:26:01 -0400 Subject: [PATCH 68/97] adding test case for normalized --- sklearn/plot/tests/test_heatmap.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index 668f82b2b138f..5a199e23e24d7 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -42,10 +42,15 @@ def test_confusion_matrix(): plt.figure() cnf_matrix = np.random.randomint(1, 10, size=(2, 2)) - # use mixture of default values and keyword args + # plot un-normalized matrix plot_confusion_matrix(cnf_matrix, classes=["dummay1", "dummy2"], cmap="Paired", ax=plt.gca()) + # plot normalized matrix + plot_confusion_matrix(cnf_matrix, normalize=True, + classes=["dummay1", "dummy2"], + cmap="Paired", ax=plt.gca()) + plt.draw() plt.close() From bf76105182afb0bd84a1324d44c345bcce8b7bb5 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 21 Jun 2017 17:28:41 -0400 Subject: [PATCH 69/97] updated doc files --- doc/modules/classes.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index 583c63a37343b..967f6d0b4cf02 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -1403,7 +1403,8 @@ Low-level methods :no-inherited-members: This module is experimental. Use at your own risk. -Use of this module requires the matplotlib library. +Use of this module requires the matplotlib library, +version 1.5 or later (preferably 2.0). .. currentmodule:: sklearn.plot @@ -1412,6 +1413,8 @@ Use of this module requires the matplotlib library. :template: function.rst plot_heatmap + plot_confusion_matrix + plot_gridsearch_results Recently deprecated From 11c4006abcac76b4796d35a0edc1a272cb999fac Mon Sep 17 00:00:00 2001 From: aarshayj Date: Thu, 29 Jun 2017 14:30:46 -0400 Subject: [PATCH 70/97] modifying doc --- doc/modules/classes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index 967f6d0b4cf02..d359249ec38e9 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -1404,7 +1404,7 @@ Low-level methods This module is experimental. Use at your own risk. Use of this module requires the matplotlib library, -version 1.5 or later (preferably 2.0). +version 1.5 or later. .. currentmodule:: sklearn.plot From b48617ccb70d83928d513653375d5872f09dad14 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 4 Jul 2017 11:36:32 -0400 Subject: [PATCH 71/97] doc fix --- sklearn/plot/_gridsearch_results.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 55ce25bd2f425..587ff2ae81106 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -7,8 +7,9 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', title='Grid Search Results', cmap=None, vmin=None, vmax=None, ax=None, fmt="{:.2f}", xtickrotation=45, norm=None): - """Print and plot the confusion matrix as a heatmap. Normalization can be - applied by setting `normalize=True`. + """Plot the grid search results as a line chart for 1D search and heatmap + for a 2D search. This function will not work if grid-search has more than + 2 parameters in the search space. Parameters ---------- From 05e86a8102eacf9fa8f632ee433672c942d37bfd Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 4 Jul 2017 11:37:35 -0400 Subject: [PATCH 72/97] spell fix, make_blobs instead of iris, split checks into 3 functions --- sklearn/plot/tests/test_heatmap.py | 60 ++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index 5a199e23e24d7..7ff7b85ca827c 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -1,7 +1,7 @@ from sklearn.plot import plot_heatmap from sklearn.plot import plot_confusion_matrix from sklearn.plot import plot_gridsearch_results -from sklearn.datasets import load_iris +from sklearn.datasets import make_blobs from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC from sklearn.utils.testing import SkipTest @@ -43,7 +43,7 @@ def test_confusion_matrix(): cnf_matrix = np.random.randomint(1, 10, size=(2, 2)) # plot un-normalized matrix - plot_confusion_matrix(cnf_matrix, classes=["dummay1", "dummy2"], + plot_confusion_matrix(cnf_matrix, classes=["dummy1", "dummy2"], cmap="Paired", ax=plt.gca()) # plot normalized matrix @@ -55,7 +55,7 @@ def test_confusion_matrix(): plt.close() -def test_gridsearch_results(): +def test_gridsearch_results_1d(): try: import matplotlib except ImportError: @@ -63,25 +63,14 @@ def test_gridsearch_results(): import matplotlib.pyplot as plt - iris = load_iris() - X = iris.data - y = iris.target - - # We only keep the first two features in X and sub-sample the dataset to - # keep only 2 classes and make it a binary classification problem. - - X_2d = X[:, :2] - X_2d = X_2d[y > 0] - y_2d = y[y > 0] - y_2d -= 1 + X, y = make_blobs(n_samples=20, centers=2, n_features=3, + random_state=0) # Define parameters: C_range = np.logspace(-2, 10, 2) - gamma_range = np.logspace(-9, 3, 2) - tol_range = [1e-3, 1e-4] # Test 1D case: - param_grid = dict(gamma=gamma_range) + param_grid = dict(C=C_range) grid = GridSearchCV(SVC(), param_grid=param_grid, cv=3) grid.fit(X, y) @@ -91,7 +80,23 @@ def test_gridsearch_results(): plt.draw() plt.close() - # Test 2D case: + +def test_gridsearch_results_2d(): + try: + import matplotlib + except ImportError: + raise SkipTest("Not testing plot_heatmap, matplotlib not installed.") + + import matplotlib.pyplot as plt + + X, y = make_blobs(n_samples=20, centers=2, n_features=3, + random_state=0) + + # Define parameters: + C_range = np.logspace(-2, 10, 2) + gamma_range = np.logspace(-9, 3, 2) + + # Test 1D case: param_grid = dict(gamma=gamma_range, C=C_range) grid = GridSearchCV(SVC(), param_grid=param_grid, cv=3) grid.fit(X, y) @@ -102,7 +107,24 @@ def test_gridsearch_results(): plt.draw() plt.close() - # Test 3D case: + +def test_gridsearch_results_3d(): + try: + import matplotlib + except ImportError: + raise SkipTest("Not testing plot_heatmap, matplotlib not installed.") + + import matplotlib.pyplot as plt + + X, y = make_blobs(n_samples=20, centers=2, n_features=3, + random_state=0) + + # Define parameters: + C_range = np.logspace(-2, 10, 2) + gamma_range = np.logspace(-9, 3, 2) + tol_range = [1e-3, 1e-4] + + # Test 1D case: param_grid = dict(gamma=gamma_range, C=C_range, tol=tol_range) grid = GridSearchCV(SVC(), param_grid=param_grid, cv=3) grid.fit(X, y) From 1f17578b46b354f57ea1b85e98433ee6d70a1039 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 4 Jul 2017 13:43:08 -0400 Subject: [PATCH 73/97] fixed parameter in examples --- examples/svm/plot_rbf_parameters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py index 2760c2259a112..b5e923b7ee937 100644 --- a/examples/svm/plot_rbf_parameters.py +++ b/examples/svm/plot_rbf_parameters.py @@ -185,7 +185,7 @@ def __call__(self, value, clip=None): plt.figure(figsize=(10, 10)) plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95) -plot_gridsearch_results(grid.cv_results_, param_grid=param_grid, - title="Validation accuracy", cmap=plt.cm.hot, +plot_gridsearch_results(grid.cv_results_, title="Validation accuracy", + cmap=plt.cm.hot, norm=MidpointNormalize(vmin=0.2, midpoint=0.92)) plt.show() From 4aa485b5cbbc9fe738b168c084d480623c6fb3fa Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 4 Jul 2017 16:36:36 -0400 Subject: [PATCH 74/97] modified travis files --- .travis.yml | 14 ++++++++++++-- build_tools/travis/install.sh | 12 ++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4022f78aa0928..413c78f250fbb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -43,8 +43,18 @@ matrix: - env: DISTRIB="conda" PYTHON_VERSION="3.6.2" INSTALL_MKL="true" NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" PANDAS_VERSION="0.20.3" CYTHON_VERSION="0.26.1" PYAMG_VERSION="3.3.2" COVERAGE=true - CHECK_PYTEST_SOFT_DEPENDENCY="true" - if: type != cron + CHECK_PYTEST_SOFT_DEPENDENCY="true" MATPLOTLIB_VERSION="2.0.2" + # This environment use pytest to run the tests. It uses the newest + # supported Anaconda release (4.4.0). It also runs tests requiring Pandas. + - env: USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6.1" + INSTALL_MKL="true" NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" + PANDAS_VERSION="0.20.1" CYTHON_VERSION="0.25.2" + MATPLOTLIB_VERSION="2.0.2" + # This environment tests the newest supported Anaconda release (4.4.0) + # It also runs tests requiring Pandas. + - env: DISTRIB="conda" PYTHON_VERSION="3.6.1" INSTALL_MKL="true" + NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" PANDAS_VERSION="0.20.1" + CYTHON_VERSION="0.25.2" MATPLOTLIB_VERSION="2.0.2" COVERAGE=true # flake8 linting on diff wrt common ancestor with upstream/master - env: RUN_FLAKE8="true" SKIP_TESTS="true" DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true" diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh index 76cd3221cb009..d2a6aa9da4fbe 100755 --- a/build_tools/travis/install.sh +++ b/build_tools/travis/install.sh @@ -43,8 +43,20 @@ if [[ "$DISTRIB" == "conda" ]]; then if [[ "$INSTALL_MKL" == "true" ]]; then TO_INSTALL="$TO_INSTALL mkl" + conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \ + numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \ + matplotlib=$MATPLOTLIB_VERSION \ + mkl cython=$CYTHON_VERSION \ + ${PANDAS_VERSION+pandas=$PANDAS_VERSION} + else TO_INSTALL="$TO_INSTALL nomkl" + conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \ + numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \ + matplotlib=$MATPLOTLIB_VERSION \ + nomkl cython=$CYTHON_VERSION \ + ${PANDAS_VERSION+pandas=$PANDAS_VERSION} + fi if [[ -n "$PANDAS_VERSION" ]]; then From 9b9bca6141dc4fa765ba2bba271026dab52c788f Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 4 Jul 2017 18:02:38 -0400 Subject: [PATCH 75/97] explicitly imported random module functions --- sklearn/plot/tests/test_heatmap.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index 7ff7b85ca827c..b110375adfc1b 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -7,6 +7,8 @@ from sklearn.utils.testing import SkipTest from sklearn.utils.testing import assert_raises import numpy as np +from numpy.random import (RandomState, + randomint) def test_heatmap(): @@ -19,7 +21,7 @@ def test_heatmap(): with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): plt.figure() - rng = np.random.RandomState(0) + rng = RandomState(0) X = rng.normal(size=(10, 5)) # use mixture of default values and keyword args plot_heatmap(X, ylabel="y-axis", @@ -40,7 +42,7 @@ def test_confusion_matrix(): with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): plt.figure() - cnf_matrix = np.random.randomint(1, 10, size=(2, 2)) + cnf_matrix = randomint(1, 10, size=(2, 2)) # plot un-normalized matrix plot_confusion_matrix(cnf_matrix, classes=["dummy1", "dummy2"], From 129ff24570b439e7e3f8b8b8271d45f5bd1303c5 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 4 Jul 2017 18:03:35 -0400 Subject: [PATCH 76/97] install matplotlib only if secret variable specified in build matrix --- build_tools/travis/install.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh index d2a6aa9da4fbe..64fc28740c5fc 100755 --- a/build_tools/travis/install.sh +++ b/build_tools/travis/install.sh @@ -45,15 +45,14 @@ if [[ "$DISTRIB" == "conda" ]]; then TO_INSTALL="$TO_INSTALL mkl" conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \ numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \ - matplotlib=$MATPLOTLIB_VERSION \ mkl cython=$CYTHON_VERSION \ - ${PANDAS_VERSION+pandas=$PANDAS_VERSION} + ${PANDAS_VERSION+pandas=$PANDAS_VERSION} \ + ${MATPLOTLIB_VERSION+matplotlib=$MATPLOTLIB_VERSION} else TO_INSTALL="$TO_INSTALL nomkl" conda create -n testenv --yes python=$PYTHON_VERSION pip nose pytest \ numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \ - matplotlib=$MATPLOTLIB_VERSION \ nomkl cython=$CYTHON_VERSION \ ${PANDAS_VERSION+pandas=$PANDAS_VERSION} From bc736433cabf410f8f7d8a88462d1ee9e9e3f3e8 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 5 Jul 2017 09:20:15 -0400 Subject: [PATCH 77/97] randint correction --- sklearn/plot/tests/test_heatmap.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index b110375adfc1b..8ea3d97ef99aa 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -8,7 +8,7 @@ from sklearn.utils.testing import assert_raises import numpy as np from numpy.random import (RandomState, - randomint) + randint) def test_heatmap(): @@ -42,7 +42,7 @@ def test_confusion_matrix(): with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): plt.figure() - cnf_matrix = randomint(1, 10, size=(2, 2)) + cnf_matrix = randint(1, 10, size=(2, 2)) # plot un-normalized matrix plot_confusion_matrix(cnf_matrix, classes=["dummy1", "dummy2"], From caebd38453fa6d45457d5c367d6c0b9f6f9f1dfa Mon Sep 17 00:00:00 2001 From: aarshayj Date: Wed, 5 Jul 2017 19:56:44 -0400 Subject: [PATCH 78/97] y_pred and y_true as input in place of matrix --- sklearn/plot/_confusion_matrix.py | 21 ++++++++++++++++----- sklearn/plot/tests/test_heatmap.py | 7 ++++--- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 183fe0a45b033..648fad9ce82e2 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -1,23 +1,32 @@ import numpy as np +from sklearn.metrics import confusion_matrix from sklearn.plot import plot_heatmap -def plot_confusion_matrix(values, classes, normalize=False, +def plot_confusion_matrix(y_true, y_pred, classes, sample_weights=None, + normalize=False, xlabel="Predicted Label", ylabel="True Label", title='Confusion matrix', cmap=None, vmin=None, vmax=None, ax=None, fmt="{:.2f}", xtickrotation=45, norm=None): - """Plot the confusion matrix as a heatmap. Normalization can be - applied by setting `normalize=True`. + """Plot the confusion matrix as a heatmap. A confusion matrix is computed + using `y_true`, `y_pred` and `sample_weights` arguments. Normalization + can be applied by setting `normalize=True`. Parameters ---------- - values : ndarray - Two-dimensional array to visualize. + y_true : array, shape = [n_samples] + Ground truth (correct) target values. + + y_pred : array, shape = [n_samples] + Estimated targets as returned by a classifier. classes : list of strings The list of classes represented in the two-dimensional input array. + sample_weight : array-like of shape = [n_samples], optional + Sample weights used to calculate the confusion matrix + normalize : boolean, default=False If True, the confusion matrix will be normalized by row. @@ -59,6 +68,8 @@ def plot_confusion_matrix(values, classes, normalize=False, import matplotlib.pyplot as plt + values = confusion_matrix(y_true, y_pred, sample_weights=sample_weights) + if normalize: values = values.astype('float') / values.sum(axis=1)[:, np.newaxis] diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index 8ea3d97ef99aa..51bf5a4873353 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -42,14 +42,15 @@ def test_confusion_matrix(): with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): plt.figure() - cnf_matrix = randint(1, 10, size=(2, 2)) + array1 = randint(1, 3, size=20) + array2 = randint(1, 3, size=20) # plot un-normalized matrix - plot_confusion_matrix(cnf_matrix, classes=["dummy1", "dummy2"], + plot_confusion_matrix(array1, array2, classes=["dummy1", "dummy2"], cmap="Paired", ax=plt.gca()) # plot normalized matrix - plot_confusion_matrix(cnf_matrix, normalize=True, + plot_confusion_matrix(array1, array2, normalize=True, classes=["dummay1", "dummy2"], cmap="Paired", ax=plt.gca()) From 0f124f155452f1e4abd5d39dddf579dd50df8b33 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Thu, 6 Jul 2017 12:08:19 -0400 Subject: [PATCH 79/97] fixed typo --- sklearn/plot/_confusion_matrix.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 648fad9ce82e2..803ebbdfdc1d5 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -3,7 +3,7 @@ from sklearn.plot import plot_heatmap -def plot_confusion_matrix(y_true, y_pred, classes, sample_weights=None, +def plot_confusion_matrix(y_true, y_pred, classes, sample_weight=None, normalize=False, xlabel="Predicted Label", ylabel="True Label", title='Confusion matrix', cmap=None, vmin=None, @@ -68,7 +68,7 @@ def plot_confusion_matrix(y_true, y_pred, classes, sample_weights=None, import matplotlib.pyplot as plt - values = confusion_matrix(y_true, y_pred, sample_weights=sample_weights) + values = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) if normalize: values = values.astype('float') / values.sum(axis=1)[:, np.newaxis] From 5e5f741e45f65b61e6e0859bdf1d767faa108229 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Fri, 7 Jul 2017 13:13:55 -0400 Subject: [PATCH 80/97] fixed example --- examples/model_selection/plot_confusion_matrix.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py index 95df220d242a3..29853b9412681 100644 --- a/examples/model_selection/plot_confusion_matrix.py +++ b/examples/model_selection/plot_confusion_matrix.py @@ -29,7 +29,6 @@ from sklearn import svm, datasets from sklearn.model_selection import train_test_split -from sklearn.metrics import confusion_matrix from sklearn.plot import plot_confusion_matrix # import some data to play with @@ -46,19 +45,17 @@ classifier = svm.SVC(kernel='linear', C=0.01) y_pred = classifier.fit(X_train, y_train).predict(X_test) -# Compute confusion matrix -cnf_matrix = confusion_matrix(y_test, y_pred) np.set_printoptions(precision=2) # Plot non-normalized confusion matrix plt.figure() -plot_confusion_matrix(cnf_matrix, classes=class_names, +plot_confusion_matrix(y_test, y_pred, classes=class_names, title='Confusion matrix, without normalization', cmap=plt.cm.Blues) # Plot normalized confusion matrix plt.figure() -plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True, +plot_confusion_matrix(y_test, y_pred, classes=class_names, normalize=True, title='Normalized confusion matrix', cmap=plt.cm.Blues) From 2e3eec98203d6ded04e13ce341062d949c459e3a Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 25 Jul 2017 19:20:48 -0400 Subject: [PATCH 81/97] making classes optional --- sklearn/plot/_confusion_matrix.py | 17 +++++++++++++++-- sklearn/plot/tests/test_heatmap.py | 12 ++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 803ebbdfdc1d5..e06af3c9c0d43 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -1,9 +1,10 @@ import numpy as np from sklearn.metrics import confusion_matrix from sklearn.plot import plot_heatmap +from sklearn.utils.multiclass import unique_labels -def plot_confusion_matrix(y_true, y_pred, classes, sample_weight=None, +def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, normalize=False, xlabel="Predicted Label", ylabel="True Label", title='Confusion matrix', cmap=None, vmin=None, @@ -21,8 +22,10 @@ def plot_confusion_matrix(y_true, y_pred, classes, sample_weight=None, y_pred : array, shape = [n_samples] Estimated targets as returned by a classifier. - classes : list of strings + classes : list of strings, optional (default=None) The list of classes represented in the two-dimensional input array. + If not passed in function call, the classes will be infered from + y_true and y_pred sample_weight : array-like of shape = [n_samples], optional Sample weights used to calculate the confusion matrix @@ -68,6 +71,16 @@ def plot_confusion_matrix(y_true, y_pred, classes, sample_weight=None, import matplotlib.pyplot as plt + unique_y = unique_labels(y_true, y_pred) + + if classes is None: + classes = unique_y + else: + if not set(classes).issuperset(set(unique_y)): + raise ValueError("`classes=%s` are not a superset of the unique", + "values of y_true and y_pred which are %s" % + (classes, unique_y)) + values = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) if normalize: diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index 51bf5a4873353..870263f421085 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -6,6 +6,7 @@ from sklearn.svm import SVC from sklearn.utils.testing import SkipTest from sklearn.utils.testing import assert_raises +from sklearn.utils.testing import assert_raise_message import numpy as np from numpy.random import (RandomState, randint) @@ -54,6 +55,17 @@ def test_confusion_matrix(): classes=["dummay1", "dummy2"], cmap="Paired", ax=plt.gca()) + # plot without passing classes explicitly + plot_confusion_matrix(array1, array2, + cmap="Paired", ax=plt.gca()) + + # y having different value than classes should raise error + expected_msg = ("`classes=[1,2]` are not a superset of the unique", + "values of y_true and y_pred which are [1,2,3]") + assert_raise_message(ValueError, expected_msg, + plot_confusion_matrix, array1, array2, + classes=[1, 2], ax=plt.gca()) + plt.draw() plt.close() From 7f3336fd4be0bebd49e7727c53b8aa7b09cd554d Mon Sep 17 00:00:00 2001 From: aarshayj Date: Thu, 27 Jul 2017 12:38:42 -0400 Subject: [PATCH 82/97] some fixes --- sklearn/plot/_confusion_matrix.py | 24 +++--- sklearn/plot/_gridsearch_results.py | 109 ++++++++++++++++------------ sklearn/plot/_heatmap.py | 2 +- sklearn/plot/tests/test_heatmap.py | 5 +- 4 files changed, 79 insertions(+), 61 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index e06af3c9c0d43..42fcf7f789172 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -23,9 +23,9 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, Estimated targets as returned by a classifier. classes : list of strings, optional (default=None) - The list of classes represented in the two-dimensional input array. - If not passed in function call, the classes will be infered from - y_true and y_pred + The list of names of classes represented in the two-dimensional input + array. If not passed in function call, the classes will be infered + from y_true and y_pred sample_weight : array-like of shape = [n_samples], optional Sample weights used to calculate the confusion matrix @@ -76,10 +76,11 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, if classes is None: classes = unique_y else: - if not set(classes).issuperset(set(unique_y)): - raise ValueError("`classes=%s` are not a superset of the unique", - "values of y_true and y_pred which are %s" % - (classes, unique_y)) + if len(classes) != len(unique_y): + raise ValueError("y_true and y_pred contain %d unique classes," + "which is not the same as %d" + "classes found in `classes=%s` paramter" % + (len(classes), len(unique_y), unique_y)) values = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) @@ -88,8 +89,11 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, fmt = fmt if normalize else '{:d}' - plot_heatmap(values, xticklabels=classes, yticklabels=classes, cmap=cmap, - xlabel=xlabel, ylabel=ylabel, vmin=vmin, vmax=vmax, ax=ax, - fmt=fmt, xtickrotation=xtickrotation, norm=norm) + img = plot_heatmap(values, xticklabels=classes, yticklabels=classes, + cmap=cmap, xlabel=xlabel, ylabel=ylabel, vmin=vmin, + vmax=vmax, ax=ax, fmt=fmt, xtickrotation=xtickrotation, + norm=norm) plt.title(title) + + return img diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 587ff2ae81106..56cf657f0f1cf 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -2,6 +2,58 @@ from sklearn.plot import plot_heatmap +def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title): + import matplotlib.pyplot as plt + + param = params[0] + param_range = sorted(cv_results['param_%s' % param]) + train_scores_mean = cv_results['mean_train_score'] + train_scores_std = cv_results['std_train_score'] + test_scores_mean = cv_results['mean_test_score'] + test_scores_std = cv_results['std_test_score'] + + lw = 2 + plt.semilogx(param_range, train_scores_mean, + label="Training score", + color="darkorange", lw=lw) + plt.fill_between(param_range, train_scores_mean - train_scores_std, + train_scores_mean + train_scores_std, alpha=0.2, + color="darkorange", lw=lw) + + img = plt.semilogx(param_range, test_scores_mean, + label="Cross-validation score", + color="navy", lw=lw) + plt.fill_between(param_range, test_scores_mean - test_scores_std, + test_scores_mean + test_scores_std, alpha=0.2, + color="navy", lw=lw) + + plt.xlabel(param) + plt.legend() + ylabel = "Score" if ylabel is None else ylabel + plt.ylabel(ylabel) + plt.title(title) + plt.show() + return img + + +def _plot_2D_results(cv_results, params, metric, ax, xlabel, ylabel, + title, norm): + import matplotlib.pyplot as plt + + parameter1_values = np.unique(cv_results['param_%s' % params[0]]) + parameter2_values = np.unique(cv_results['param_%s' % params[1]]) + + scores = cv_results[metric].reshape(len(parameter1_values), + len(parameter2_values)) + + img = plot_heatmap(scores, cmap=plt.cm.hot, xlabel=xlabel, + ylabel=ylabel, xticklabels=parameter1_values, + yticklabels=parameter2_values, ax=ax, norm=norm) + plt.title(title) + plt.show() + return img + + def plot_gridsearch_results(cv_results, metric='mean_test_score', xlabel=None, ylabel=None, title='Grid Search Results', cmap=None, @@ -54,63 +106,24 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', norm : matplotlib normalizer Normalizer passed to pcolormesh function from matplotlib used to - generate the heatmap. + generate the heatmap. This is ignored if only 1 parameter is used in + grid search. """ - import matplotlib.pyplot as plt - params = sorted(cv_results['params'][0].keys()) nparams = len(params) + xlabel = params[0] if xlabel is None else xlabel + ylabel = params[1] if ylabel is None else ylabel if nparams == 1: - param = params[0] - param_range = sorted(cv_results['param_%s' % param]) - train_scores_mean = cv_results['mean_train_score'] - train_scores_std = cv_results['std_train_score'] - test_scores_mean = cv_results['mean_test_score'] - test_scores_std = cv_results['std_test_score'] - - lw = 2 - plt.semilogx(param_range, train_scores_mean, - label="Training score", - color="darkorange", lw=lw) - plt.fill_between(param_range, train_scores_mean - train_scores_std, - train_scores_mean + train_scores_std, alpha=0.2, - color="darkorange", lw=lw) - - plt.semilogx(param_range, test_scores_mean, - label="Cross-validation score", - color="navy", lw=lw) - plt.fill_between(param_range, test_scores_mean - test_scores_std, - test_scores_mean + test_scores_std, alpha=0.2, - color="navy", lw=lw) - - plt.title(title) - plt.xlabel(param) - plt.legend() - ylabel = "Score" if ylabel is None else ylabel - plt.ylabel(ylabel) - plt.show() + img = _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title) elif nparams == 2: - parameter1_values = np.unique(cv_results['param_%s' % params[0]]) - parameter2_values = np.unique(cv_results['param_%s' % params[1]]) - - scores = cv_results[metric].reshape(len(parameter1_values), - len(parameter2_values)) - - xlabel = params[0] if xlabel is None else xlabel - ylabel = params[1] if ylabel is None else ylabel - - plot_heatmap(scores, cmap=plt.cm.hot, xlabel=xlabel, ylabel=ylabel, - xticklabels=parameter1_values, - yticklabels=parameter2_values, - ax=ax, norm=norm) - - plt.title(title) - plt.show() + img = _plot_2D_results(cv_results, params, metric, ax, xlabel, + ylabel, title, norm) else: raise ValueError('Plot function supports upto 2 parameters in grid' 'search, got {0}.'.format(nparams)) - pass + + return img diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index cfa0645c86e18..8b0c51d2cf5b9 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -90,4 +90,4 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, # not anti-diagonal matrix ax.invert_yaxis() - return ax + return img diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index 870263f421085..d135591f285d6 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -60,8 +60,9 @@ def test_confusion_matrix(): cmap="Paired", ax=plt.gca()) # y having different value than classes should raise error - expected_msg = ("`classes=[1,2]` are not a superset of the unique", - "values of y_true and y_pred which are [1,2,3]") + expected_msg = ("y_true and y_pred contain 3 unique classes, which is" + "not the same as 2 classes found in `classes=[1,2]`" + "paramter") assert_raise_message(ValueError, expected_msg, plot_confusion_matrix, array1, array2, classes=[1, 2], ax=plt.gca()) From 6be2a92895ac53c02f3f45794d9e22a67064b896 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Thu, 27 Jul 2017 13:31:50 -0400 Subject: [PATCH 83/97] fixed 1d case of grid_search_results --- sklearn/plot/_confusion_matrix.py | 26 +++++----- sklearn/plot/_gridsearch_results.py | 74 +++++++++++++++++------------ sklearn/plot/tests/test_heatmap.py | 6 ++- 3 files changed, 60 insertions(+), 46 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 42fcf7f789172..60df2fec50dc0 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -27,44 +27,44 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, array. If not passed in function call, the classes will be infered from y_true and y_pred - sample_weight : array-like of shape = [n_samples], optional + sample_weight : array-like of shape = [n_samples], optional (default=None) Sample weights used to calculate the confusion matrix - normalize : boolean, default=False + normalize : boolean, optional (default=False) If True, the confusion matrix will be normalized by row. - xlabel : string, default="Predicted Label" + xlabel : string, optional (default="Predicted Label") Label for the x-axis. - ylabel : string, default="True Label" + ylabel : string, optional (default="True Label") Label for the y-axis. - title : string, default="Confusion matrix" + title : string, optional (default="Confusion matrix") Title for the heatmap. - cmap : string or colormap - Matpotlib colormap to use. + cmap : string or colormap, optional (default=None) + Matpotlib colormap to use. If None, plt.cm.hot will be used. - vmin : int, float or None + vmin : int, float or None, optional (default=None) Minimum clipping value. This argument will be passed on to the pcolormesh function from matplotlib used to generate the heatmap. - vmax : int, float or None + vmax : int, float or None, optional (default=None) Maximum clipping value. This argument will be passed on to the pcolormesh function from matplotlib used to generate the heatmap. - ax : axes object or None + ax : axes object or None, optional (default=None) Matplotlib axes object to plot into. If None, the current axes are used. - fmt : string, default="{:.2f}" + fmt : string, optional (default="{:.2f}") Format string to convert value to text. This will be ignored if normalize argument is False. - xtickrotation : float, default=45 + xtickrotation : float, optional (default=45) Rotation of the xticklabels. - norm : matplotlib normalizer + norm : matplotlib normalizer, optional (default=None) Normalizer passed to pcolormesh function from matplotlib used to generate the heatmap. """ diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 56cf657f0f1cf..f27c0d53d1925 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -2,7 +2,8 @@ from sklearn.plot import plot_heatmap -def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title): +def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, + fmt, xtickrotation): import matplotlib.pyplot as plt param = params[0] @@ -13,20 +14,23 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title): test_scores_std = cv_results['std_test_score'] lw = 2 - plt.semilogx(param_range, train_scores_mean, - label="Training score", - color="darkorange", lw=lw) - plt.fill_between(param_range, train_scores_mean - train_scores_std, + x_vales = range(len(param_range)) + plt.plot(x_vales, train_scores_mean, + label="Training score", + color="darkorange", lw=lw) + plt.fill_between(x_vales, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.2, color="darkorange", lw=lw) - img = plt.semilogx(param_range, test_scores_mean, - label="Cross-validation score", - color="navy", lw=lw) - plt.fill_between(param_range, test_scores_mean - test_scores_std, + img = plt.plot(x_vales, test_scores_mean, + label="Cross-validation score", + color="navy", lw=lw) + plt.fill_between(x_vales, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.2, color="navy", lw=lw) - + plt.xticks(x_vales, [fmt.format(x) for x in param_range], + rotation=xtickrotation) + xlabel = params[0] if xlabel is None else xlabel plt.xlabel(param) plt.legend() ylabel = "Score" if ylabel is None else ylabel @@ -36,8 +40,9 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title): return img -def _plot_2D_results(cv_results, params, metric, ax, xlabel, ylabel, - title, norm): +def _plot_2D_results(cv_results, params, metric, ax, xlabel, + ylabel, title, cmap, vmin, vmax, fmt, + xtickrotation, norm): import matplotlib.pyplot as plt parameter1_values = np.unique(cv_results['param_%s' % params[0]]) @@ -46,9 +51,16 @@ def _plot_2D_results(cv_results, params, metric, ax, xlabel, ylabel, scores = cv_results[metric].reshape(len(parameter1_values), len(parameter2_values)) - img = plot_heatmap(scores, cmap=plt.cm.hot, xlabel=xlabel, - ylabel=ylabel, xticklabels=parameter1_values, - yticklabels=parameter2_values, ax=ax, norm=norm) + xlabel = params[0] if xlabel is None else xlabel + ylabel = params[1] if ylabel is None else ylabel + + cmap = cmap if cmap is not None else plt.cm.hot + + img = plot_heatmap(scores, xlabel=xlabel, ylabel=ylabel, + xticklabels=parameter1_values, + yticklabels=parameter2_values, cmap=cmap, + vmin=vmin, vmax=vmax, fmt=fmt, ax=ax, + xtickrotation=xtickrotation, norm=norm) plt.title(title) plt.show() return img @@ -68,43 +80,43 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', cv_results : dict of numpy (masked) ndarrays The cv_results_ attribute of the GridSearchCV object. - xlabel : string, default=None + xlabel : string, optional (default=None) Label for the x-axis. If None, the first key of the param_grid will be used as the xlabel. - ylabel : string, default=None + ylabel : string, optional (default=None) Label for the y-axis. If None, the second key of the param_grid will be used as the ylabel. - metric : string, default="mean_test_score" + metric : string, optional (default="mean_test_score") The metric from the GridSearchCV results to display. This is ignored if only 1 parameter is used in grid search. - title : string, default="Grid Search Results" + title : string, optional (default="Grid Search Results") Title for the heatmap. - cmap : string or colormap - Matpotlib colormap to use. + cmap : string or colormap, optional (default=None) + Matpotlib colormap to use. If None, plt.cm.hot will be used. - vmin : int, float or None + vmin : int, float or None, optional (default=None) Minimum clipping value. This argument will be passed on to the pcolormesh function from matplotlib used to generate the heatmap. - vmax : int, float or None + vmax : int, float or None, optional (default=None) Maximum clipping value. This argument will be passed on to the pcolormesh function from matplotlib used to generate the heatmap. - ax : axes object or None + ax : axes object or None, optional (default=None) Matplotlib axes object to plot into. If None, the current axes are used. - fmt : string, default="{:.2f}" + fmt : string, optional (default="{:.2f}") Format string to convert value to text. - xtickrotation : float, default=45 + xtickrotation : float, optional (default=45) Rotation of the xticklabels. - norm : matplotlib normalizer + norm : matplotlib normalizer, optional (default=None) Normalizer passed to pcolormesh function from matplotlib used to generate the heatmap. This is ignored if only 1 parameter is used in grid search. @@ -112,15 +124,15 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', params = sorted(cv_results['params'][0].keys()) nparams = len(params) - xlabel = params[0] if xlabel is None else xlabel - ylabel = params[1] if ylabel is None else ylabel if nparams == 1: - img = _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title) + img = _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, + fmt, xtickrotation) elif nparams == 2: img = _plot_2D_results(cv_results, params, metric, ax, xlabel, - ylabel, title, norm) + ylabel, title, cmap, vmin, vmax, fmt, + xtickrotation, norm) else: raise ValueError('Plot function supports upto 2 parameters in grid' diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index d135591f285d6..1a89536c17202 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -5,7 +5,6 @@ from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC from sklearn.utils.testing import SkipTest -from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message import numpy as np from numpy.random import (RandomState, @@ -147,6 +146,9 @@ def test_gridsearch_results_3d(): with matplotlib.rc_context(rc={'backend': 'Agg', 'interactive': False}): plt.figure() - assert_raises(ValueError, plot_gridsearch_results, grid.cv_results_) + expected_msg = ('Plot function supports upto 2 parameters in grid' + 'search, got 3.') + assert_raise_message(ValueError, expected_msg, + plot_gridsearch_results, grid.cv_results_) plt.draw() plt.close() From 1c8db681b4a043d8fd6e23c9de8f20957482249a Mon Sep 17 00:00:00 2001 From: aarshayj Date: Thu, 27 Jul 2017 14:27:21 -0400 Subject: [PATCH 84/97] fixed confusion matrix test --- sklearn/plot/_confusion_matrix.py | 8 ++++---- sklearn/plot/_gridsearch_results.py | 2 +- sklearn/plot/tests/test_heatmap.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 60df2fec50dc0..af0ffeda548b8 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -77,10 +77,10 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, classes = unique_y else: if len(classes) != len(unique_y): - raise ValueError("y_true and y_pred contain %d unique classes," - "which is not the same as %d" - "classes found in `classes=%s` paramter" % - (len(classes), len(unique_y), unique_y)) + raise ValueError("y_true and y_pred contain %d unique classes, " + "which is not the same as %d " + "classes found in `classes=%s` parameter" % + (len(unique_y), len(classes), classes)) values = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index f27c0d53d1925..1c65ae49d8404 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -136,6 +136,6 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', else: raise ValueError('Plot function supports upto 2 parameters in grid' - 'search, got {0}.'.format(nparams)) + 'search, got %d.' % nparams) return img diff --git a/sklearn/plot/tests/test_heatmap.py b/sklearn/plot/tests/test_heatmap.py index 1a89536c17202..a50c20f384113 100644 --- a/sklearn/plot/tests/test_heatmap.py +++ b/sklearn/plot/tests/test_heatmap.py @@ -59,12 +59,12 @@ def test_confusion_matrix(): cmap="Paired", ax=plt.gca()) # y having different value than classes should raise error - expected_msg = ("y_true and y_pred contain 3 unique classes, which is" - "not the same as 2 classes found in `classes=[1,2]`" - "paramter") + expected_msg = ("y_true and y_pred contain 2 unique classes, which is" + " not the same as 3 classes found in " + "`classes=[1, 2, 3]` parameter") assert_raise_message(ValueError, expected_msg, plot_confusion_matrix, array1, array2, - classes=[1, 2], ax=plt.gca()) + classes=[1, 2, 3], ax=plt.gca()) plt.draw() plt.close() From f737c01164fec77e91f3b43ae19378c25d3c0295 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Thu, 27 Jul 2017 14:54:53 -0400 Subject: [PATCH 85/97] working on axes not plt --- sklearn/plot/_gridsearch_results.py | 42 ++++++++++++++++------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 1c65ae49d8404..e98bed94e2e23 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -5,6 +5,8 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, fmt, xtickrotation): import matplotlib.pyplot as plt + if ax is None: + ax = plt.gca() param = params[0] param_range = sorted(cv_results['param_%s' % param]) @@ -15,27 +17,29 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, lw = 2 x_vales = range(len(param_range)) - plt.plot(x_vales, train_scores_mean, - label="Training score", - color="darkorange", lw=lw) - plt.fill_between(x_vales, train_scores_mean - train_scores_std, - train_scores_mean + train_scores_std, alpha=0.2, - color="darkorange", lw=lw) - - img = plt.plot(x_vales, test_scores_mean, - label="Cross-validation score", - color="navy", lw=lw) - plt.fill_between(x_vales, test_scores_mean - test_scores_std, - test_scores_mean + test_scores_std, alpha=0.2, - color="navy", lw=lw) - plt.xticks(x_vales, [fmt.format(x) for x in param_range], - rotation=xtickrotation) + ax.plot(x_vales, train_scores_mean, + label="Training score", + color="darkorange", lw=lw) + ax.fill_between(x_vales, train_scores_mean - train_scores_std, + train_scores_mean + train_scores_std, alpha=0.2, + color="darkorange", lw=lw) + + img = ax.plot(x_vales, test_scores_mean, + label="Cross-validation score", + color="navy", lw=lw) + ax.fill_between(x_vales, test_scores_mean - test_scores_std, + test_scores_mean + test_scores_std, alpha=0.2, + color="navy", lw=lw) + ax.set_xticks(x_vales) + ax.set_xticklabels([fmt.format(x) for x in param_range], + rotation=xtickrotation) + xlabel = params[0] if xlabel is None else xlabel - plt.xlabel(param) - plt.legend() + ax.set_xlabel(param) + ax.legend() ylabel = "Score" if ylabel is None else ylabel - plt.ylabel(ylabel) - plt.title(title) + ax.set_ylabel(ylabel) + ax.set_title(title) plt.show() return img From 170471d1b755fb745748cbf3dbd81be76615ee54 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Fri, 28 Jul 2017 15:15:29 -0400 Subject: [PATCH 86/97] adding title to plot_heatmap, removing plt.show from within API --- examples/model_selection/plot_confusion_matrix.py | 2 +- sklearn/plot/_confusion_matrix.py | 10 +++------- sklearn/plot/_gridsearch_results.py | 6 ++---- sklearn/plot/_heatmap.py | 12 ++++++++++-- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py index 29853b9412681..804d4ba6d9354 100644 --- a/examples/model_selection/plot_confusion_matrix.py +++ b/examples/model_selection/plot_confusion_matrix.py @@ -56,7 +56,7 @@ # Plot normalized confusion matrix plt.figure() plot_confusion_matrix(y_test, y_pred, classes=class_names, normalize=True, - title='Normalized confusion matrix', + title='Confusion matrix, with normalization', cmap=plt.cm.Blues) plt.show() diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index af0ffeda548b8..1c2c8f9a22bd6 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -69,8 +69,6 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, generate the heatmap. """ - import matplotlib.pyplot as plt - unique_y = unique_labels(y_true, y_pred) if classes is None: @@ -90,10 +88,8 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, fmt = fmt if normalize else '{:d}' img = plot_heatmap(values, xticklabels=classes, yticklabels=classes, - cmap=cmap, xlabel=xlabel, ylabel=ylabel, vmin=vmin, - vmax=vmax, ax=ax, fmt=fmt, xtickrotation=xtickrotation, - norm=norm) - - plt.title(title) + cmap=cmap, xlabel=xlabel, ylabel=ylabel, title=title, + vmin=vmin, vmax=vmax, ax=ax, fmt=fmt, + xtickrotation=xtickrotation, norm=norm) return img diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index e98bed94e2e23..0d3952cac1999 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -40,7 +40,6 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, ylabel = "Score" if ylabel is None else ylabel ax.set_ylabel(ylabel) ax.set_title(title) - plt.show() return img @@ -62,11 +61,10 @@ def _plot_2D_results(cv_results, params, metric, ax, xlabel, img = plot_heatmap(scores, xlabel=xlabel, ylabel=ylabel, xticklabels=parameter1_values, - yticklabels=parameter2_values, cmap=cmap, + yticklabels=parameter2_values, + title=title, cmap=cmap, vmin=vmin, vmax=vmax, fmt=fmt, ax=ax, xtickrotation=xtickrotation, norm=norm) - plt.title(title) - plt.show() return img diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index 8b0c51d2cf5b9..989c170cacd53 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -2,8 +2,9 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, - yticklabels=None, cmap=None, vmin=None, vmax=None, ax=None, - fmt="{:.2f}", xtickrotation=45, norm=None): + yticklabels=None, title=None, cmap=None, vmin=None, + vmax=None, ax=None, fmt="{:.2f}", xtickrotation=45, + norm=None): """Plot a matrix as heatmap with explicit numbers. Parameters @@ -23,6 +24,9 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, yticklabels : list of string or None, default=None Tick labels for the y-axis + title : string or None, default=None + Title of the chart + cmap : string or colormap Matpotlib colormap to use. @@ -90,4 +94,8 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, # not anti-diagonal matrix ax.invert_yaxis() + # set title if not none: + if title is not None: + ax.set_title(title) + return img From df031bf3038bceb4d9da70c78efcbe0c8deef278 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Fri, 28 Jul 2017 15:25:21 -0400 Subject: [PATCH 87/97] added section to validation curve example --- .../model_selection/plot_validation_curve.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py index ed74a41ff100b..ae335b3cf89a9 100644 --- a/examples/model_selection/plot_validation_curve.py +++ b/examples/model_selection/plot_validation_curve.py @@ -49,3 +49,31 @@ color="navy", lw=lw) plt.legend(loc="best") plt.show() + +##################################################################### +""" +The same plot can also be generated using a combination of GridSearchCV and + plotting module of scikit-learn. +""" + +print(__doc__) + +import matplotlib.pyplot as plt +import numpy as np + +from sklearn.datasets import load_digits +from sklearn.svm import SVC +from sklearn.model_selection import GridSearchCV +from sklearn.plot import plot_gridsearch_results + +digits = load_digits() +X, y = digits.data, digits.target + +param_grid = {'gamma': np.logspace(-6, -1, 5)} +gs = GridSearchCV(SVC(), + param_grid=param_grid, + cv=10, scoring="accuracy") + +gs.fit(X, y) +plot_gridsearch_results(gs.cv_results_) +plt.show() From d676be39d59d6fd553783ec5cc88bd04a4a08e83 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Fri, 28 Jul 2017 17:08:24 -0400 Subject: [PATCH 88/97] sphinx syntax fix --- examples/model_selection/plot_validation_curve.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py index ae335b3cf89a9..c345b77cd3176 100644 --- a/examples/model_selection/plot_validation_curve.py +++ b/examples/model_selection/plot_validation_curve.py @@ -51,10 +51,8 @@ plt.show() ##################################################################### -""" -The same plot can also be generated using a combination of GridSearchCV and - plotting module of scikit-learn. -""" +# The same plot can also be generated using a combination of GridSearchCV and +# plotting module of scikit-learn. print(__doc__) From f4d8a64abbf121d5ae90e11378bc870e5e9a12e3 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 1 Aug 2017 11:02:24 -0400 Subject: [PATCH 89/97] matplotlib new figure creation modified --- sklearn/plot/_gridsearch_results.py | 4 +++- sklearn/plot/_heatmap.py | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 0d3952cac1999..6d51402455bc2 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -6,7 +6,8 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, fmt, xtickrotation): import matplotlib.pyplot as plt if ax is None: - ax = plt.gca() + fig = plt.figure() + ax = fig.add_subplot(111) param = params[0] param_range = sorted(cv_results['param_%s' % param]) @@ -40,6 +41,7 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, ylabel = "Score" if ylabel is None else ylabel ax.set_ylabel(ylabel) ax.set_title(title) + plt.draw() return img diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index 989c170cacd53..020e1394c1f6f 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -52,7 +52,8 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, import matplotlib.pyplot as plt if ax is None: - ax = plt.gca() + fig = plt.figure() + ax = fig.add_subplot(111) img = ax.pcolormesh(values, cmap=cmap, vmin=vmin, vmax=vmax, norm=norm) # this will allow us to access the pixel values: @@ -98,4 +99,6 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, if title is not None: ax.set_title(title) + plt.draw() + return img From a90c0d2ac424eb826eab7901dc969bcbc32fafd9 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Tue, 1 Aug 2017 12:51:23 -0400 Subject: [PATCH 90/97] define axis closer to public layer --- sklearn/plot/_confusion_matrix.py | 5 +++++ sklearn/plot/_gridsearch_results.py | 8 +++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 1c2c8f9a22bd6..2ca9e10ac44b4 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -68,6 +68,7 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, Normalizer passed to pcolormesh function from matplotlib used to generate the heatmap. """ + import matplotlib.pyplot as plt unique_y = unique_labels(y_true, y_pred) @@ -87,6 +88,10 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, fmt = fmt if normalize else '{:d}' + if ax is None: + fig = plt.figure() + ax = fig.add_subplot(111) + img = plot_heatmap(values, xticklabels=classes, yticklabels=classes, cmap=cmap, xlabel=xlabel, ylabel=ylabel, title=title, vmin=vmin, vmax=vmax, ax=ax, fmt=fmt, diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index 6d51402455bc2..c1d1f01031b53 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -5,9 +5,6 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, fmt, xtickrotation): import matplotlib.pyplot as plt - if ax is None: - fig = plt.figure() - ax = fig.add_subplot(111) param = params[0] param_range = sorted(cv_results['param_%s' % param]) @@ -125,10 +122,15 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', generate the heatmap. This is ignored if only 1 parameter is used in grid search. """ + import matplotlib.pyplot as plt params = sorted(cv_results['params'][0].keys()) nparams = len(params) + if ax is None: + fig = plt.figure() + ax = fig.add_subplot(111) + if nparams == 1: img = _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, fmt, xtickrotation) From fa2dec7bd475354224fd801e1413d467d3c32d85 Mon Sep 17 00:00:00 2001 From: aarshayj Date: Thu, 3 Aug 2017 11:19:00 -0400 Subject: [PATCH 91/97] removed plt.draw() --- sklearn/plot/_gridsearch_results.py | 3 --- sklearn/plot/_heatmap.py | 2 -- 2 files changed, 5 deletions(-) diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index c1d1f01031b53..f5d74c834143f 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -4,8 +4,6 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, fmt, xtickrotation): - import matplotlib.pyplot as plt - param = params[0] param_range = sorted(cv_results['param_%s' % param]) train_scores_mean = cv_results['mean_train_score'] @@ -38,7 +36,6 @@ def _plot_1D_results(cv_results, params, ax, xlabel, ylabel, title, ylabel = "Score" if ylabel is None else ylabel ax.set_ylabel(ylabel) ax.set_title(title) - plt.draw() return img diff --git a/sklearn/plot/_heatmap.py b/sklearn/plot/_heatmap.py index 020e1394c1f6f..d098bd82cc927 100644 --- a/sklearn/plot/_heatmap.py +++ b/sklearn/plot/_heatmap.py @@ -99,6 +99,4 @@ def plot_heatmap(values, xlabel="", ylabel="", xticklabels=None, if title is not None: ax.set_title(title) - plt.draw() - return img From 876d854720d5d543361c3b5b8a8480e59572a8de Mon Sep 17 00:00:00 2001 From: aarshayj Date: Sat, 12 Aug 2017 19:03:04 -0400 Subject: [PATCH 92/97] docstring split lines --- sklearn/plot/_confusion_matrix.py | 8 +++++--- sklearn/plot/_gridsearch_results.py | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/sklearn/plot/_confusion_matrix.py b/sklearn/plot/_confusion_matrix.py index 2ca9e10ac44b4..aa205db564706 100644 --- a/sklearn/plot/_confusion_matrix.py +++ b/sklearn/plot/_confusion_matrix.py @@ -10,9 +10,11 @@ def plot_confusion_matrix(y_true, y_pred, classes=None, sample_weight=None, title='Confusion matrix', cmap=None, vmin=None, vmax=None, ax=None, fmt="{:.2f}", xtickrotation=45, norm=None): - """Plot the confusion matrix as a heatmap. A confusion matrix is computed - using `y_true`, `y_pred` and `sample_weights` arguments. Normalization - can be applied by setting `normalize=True`. + """Plot confusion matrix as a heatmap. + + A confusion matrix is computed using `y_true`, `y_pred` and + `sample_weights` arguments. Normalization can be applied by setting + `normalize=True`. Parameters ---------- diff --git a/sklearn/plot/_gridsearch_results.py b/sklearn/plot/_gridsearch_results.py index f5d74c834143f..7a902903a6ec0 100644 --- a/sklearn/plot/_gridsearch_results.py +++ b/sklearn/plot/_gridsearch_results.py @@ -69,7 +69,9 @@ def plot_gridsearch_results(cv_results, metric='mean_test_score', title='Grid Search Results', cmap=None, vmin=None, vmax=None, ax=None, fmt="{:.2f}", xtickrotation=45, norm=None): - """Plot the grid search results as a line chart for 1D search and heatmap + """Plot grid search results. + + The results are plotted as a line chart for 1D search and as a heatmap for a 2D search. This function will not work if grid-search has more than 2 parameters in the search space. From fb16ab0ba482c87cfddafc406f04632331099275 Mon Sep 17 00:00:00 2001 From: aj2713 Date: Fri, 12 Jan 2018 18:03:24 +0530 Subject: [PATCH 93/97] add tight_layout to plot confusion matrix examples --- examples/model_selection/plot_confusion_matrix.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py index 804d4ba6d9354..b2d9f1306fb52 100644 --- a/examples/model_selection/plot_confusion_matrix.py +++ b/examples/model_selection/plot_confusion_matrix.py @@ -52,11 +52,13 @@ plot_confusion_matrix(y_test, y_pred, classes=class_names, title='Confusion matrix, without normalization', cmap=plt.cm.Blues) +plt.tight_layout() # Plot normalized confusion matrix plt.figure() plot_confusion_matrix(y_test, y_pred, classes=class_names, normalize=True, title='Confusion matrix, with normalization', cmap=plt.cm.Blues) +plt.tight_layout() plt.show() From 8cc94f17d9cd475412114176d2227e6881599955 Mon Sep 17 00:00:00 2001 From: aj2713 Date: Fri, 12 Jan 2018 18:04:44 +0530 Subject: [PATCH 94/97] remove second print doc statement --- examples/model_selection/plot_validation_curve.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py index c345b77cd3176..f2fb8dff64fe7 100644 --- a/examples/model_selection/plot_validation_curve.py +++ b/examples/model_selection/plot_validation_curve.py @@ -54,8 +54,6 @@ # The same plot can also be generated using a combination of GridSearchCV and # plotting module of scikit-learn. -print(__doc__) - import matplotlib.pyplot as plt import numpy as np From c48b61a2e2bbbd4bbcf47a6ed621748970af2b11 Mon Sep 17 00:00:00 2001 From: aj2713 Date: Sat, 13 Jan 2018 05:17:18 +0530 Subject: [PATCH 95/97] adding axis format and tight layout --- examples/model_selection/plot_validation_curve.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py index f2fb8dff64fe7..a5ec866a6f76a 100644 --- a/examples/model_selection/plot_validation_curve.py +++ b/examples/model_selection/plot_validation_curve.py @@ -71,5 +71,6 @@ cv=10, scoring="accuracy") gs.fit(X, y) -plot_gridsearch_results(gs.cv_results_) +plot_gridsearch_results(gs.cv_results_, fmt='{:.1e}') +plt.tight_layout() plt.show() From 2d1e6cfd6f3931c26aa90d60f4c6a181d0499e88 Mon Sep 17 00:00:00 2001 From: aj2713 Date: Sat, 13 Jan 2018 05:20:51 +0530 Subject: [PATCH 96/97] adding tight_layout --- examples/svm/plot_rbf_parameters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py index b5e923b7ee937..f1b803d42a0c2 100644 --- a/examples/svm/plot_rbf_parameters.py +++ b/examples/svm/plot_rbf_parameters.py @@ -188,4 +188,5 @@ def __call__(self, value, clip=None): plot_gridsearch_results(grid.cv_results_, title="Validation accuracy", cmap=plt.cm.hot, norm=MidpointNormalize(vmin=0.2, midpoint=0.92)) +plt.tight_layout() plt.show() From 01a63a7a5ad870a7693491d71377550575890893 Mon Sep 17 00:00:00 2001 From: aj2713 Date: Sat, 13 Jan 2018 05:24:42 +0530 Subject: [PATCH 97/97] taking .travis.yml from master and adding matplotlib --- .travis.yml | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1b0a7f5d50fa1..784339699f3e2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,7 +36,7 @@ matrix: # Python 3.4 build - env: DISTRIB="conda" PYTHON_VERSION="3.4" INSTALL_MKL="false" NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.16.1" CYTHON_VERSION="0.25.2" - COVERAGE=true + COVERAGE=true MATPLOTLIB_VERSION="2.0.2" if: type != cron # This environment tests the newest supported Anaconda release (5.0.0) # It also runs tests requiring Pandas and PyAMG @@ -44,17 +44,6 @@ matrix: NUMPY_VERSION="1.13.1" SCIPY_VERSION="0.19.1" PANDAS_VERSION="0.20.3" CYTHON_VERSION="0.26.1" PYAMG_VERSION="3.3.2" COVERAGE=true CHECK_PYTEST_SOFT_DEPENDENCY="true" MATPLOTLIB_VERSION="2.0.2" - # This environment use pytest to run the tests. It uses the newest - # supported Anaconda release (4.4.0). It also runs tests requiring Pandas. - - env: USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6.1" - INSTALL_MKL="true" NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" - PANDAS_VERSION="0.20.1" CYTHON_VERSION="0.25.2" - TEST_DOCSTRINGS="true" MATPLOTLIB_VERSION="2.0.2" - # This environment tests the newest supported Anaconda release (4.4.0) - # It also runs tests requiring Pandas. - - env: DISTRIB="conda" PYTHON_VERSION="3.6.1" INSTALL_MKL="true" - NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" PANDAS_VERSION="0.20.1" - CYTHON_VERSION="0.25.2" MATPLOTLIB_VERSION="2.0.2" COVERAGE=true if: type != cron # flake8 linting on diff wrt common ancestor with upstream/master - env: RUN_FLAKE8="true" SKIP_TESTS="true"