From 7a7d41eea3821068697e0ec497098e5bc75b9d50 Mon Sep 17 00:00:00 2001 From: Isaac Laughlin Date: Thu, 2 Feb 2017 15:51:44 -0800 Subject: [PATCH 1/7] Refactoring plot_iris example. --- examples/svm/plot_iris.py | 88 +++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 37 deletions(-) diff --git a/examples/svm/plot_iris.py b/examples/svm/plot_iris.py index 08f0d57b036f6..4ea97c12c652f 100644 --- a/examples/svm/plot_iris.py +++ b/examples/svm/plot_iris.py @@ -39,55 +39,69 @@ import matplotlib.pyplot as plt from sklearn import svm, datasets + +def make_meshgrid(x, y, h=.02): + """Create a mesh of points to plot in + """ + x_min, x_max = x.min() - 1, x.max() + 1 + y_min, y_max = y.min() - 1, y.max() + 1 + xx, yy = np.meshgrid(np.arange(x_min, x_max, h), + np.arange(y_min, y_max, h)) + return xx, yy + +def plot_contours(ax, clf, xx, yy, **params): + """Plot the decision boundaries for a classifier. + """ + Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) + Z = Z.reshape(xx.shape) + out = ax.contourf(xx, yy, Z, **params) + ax.set_xlim(xx.min(), xx.max()) + ax.set_ylim(yy.min(), yy.max()) + return out + +def plot_points_boundary(ax, clf, x, y, xx, yy, + points_params, contour_params): + """Plot the decision boundaries and points. + """ + contours = plot_contours(ax, clf, xx, yy, **contour_params) + points = ax.scatter(x, y, **points_params) + return [contours, points] + # import some data to play with iris = datasets.load_iris() X = iris.data[:, :2] # we only take the first two features. We could # avoid this ugly slicing by using a two-dim dataset y = iris.target -h = .02 # step size in the mesh - # we create an instance of SVM and fit out data. We do not scale our # data since we want to plot the support vectors C = 1.0 # SVM regularization parameter -svc = svm.SVC(kernel='linear', C=C).fit(X, y) -rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C).fit(X, y) -poly_svc = svm.SVC(kernel='poly', degree=3, C=C).fit(X, y) -lin_svc = svm.LinearSVC(C=C).fit(X, y) - -# create a mesh to plot in -x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 -y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 -xx, yy = np.meshgrid(np.arange(x_min, x_max, h), - np.arange(y_min, y_max, h)) +models = (svm.SVC(kernel='linear', C=C), + svm.LinearSVC(C=C), + svm.SVC(kernel='rbf', gamma=0.7, C=C), + svm.SVC(kernel='poly', degree=3, C=C)) +models = (clf.fit(X, y) for clf in models) # title for the plots -titles = ['SVC with linear kernel', +titles = ('SVC with linear kernel', 'LinearSVC (linear kernel)', 'SVC with RBF kernel', - 'SVC with polynomial (degree 3) kernel'] - - -for i, clf in enumerate((svc, lin_svc, rbf_svc, poly_svc)): - # Plot the decision boundary. For that, we will assign a color to each - # point in the mesh [x_min, x_max]x[y_min, y_max]. - plt.subplot(2, 2, i + 1) - plt.subplots_adjust(wspace=0.4, hspace=0.4) - - Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) - - # Put the result into a color plot - Z = Z.reshape(xx.shape) - plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8) - - # Plot also the training points - plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm) - plt.xlabel('Sepal length') - plt.ylabel('Sepal width') - plt.xlim(xx.min(), xx.max()) - plt.ylim(yy.min(), yy.max()) - plt.xticks(()) - plt.yticks(()) - plt.title(titles[i]) + 'SVC with polynomial (degree 3) kernel') + +#Set-up 2x2 grid for plotting. +fig, sub = plt.subplots(2,2) +plt.subplots_adjust(wspace=0.4, hspace=0.4) + +xx, yy = make_meshgrid(X[:, 0], X[:, 1]) + +for clf, title, ax in zip(models, titles, sub.flatten()): + plot_points_boundary(ax, clf, X[:, 0], X[:, 1], xx, yy, + dict(c=y, cmap=plt.cm.coolwarm), + dict(cmap=plt.cm.coolwarm, alpha=0.8)) + ax.set_xlabel('Sepal length') + ax.set_ylabel('Sepal width') + ax.set_xticks(()) + ax.set_yticks(()) + ax.set_title(title) plt.show() From 3c95b551dfbd7604b5c5e03876a34711297aebe1 Mon Sep 17 00:00:00 2001 From: Isaac Laughlin Date: Thu, 2 Feb 2017 16:06:23 -0800 Subject: [PATCH 2/7] pep8 fixes --- examples/svm/plot_iris.py | 45 +++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/examples/svm/plot_iris.py b/examples/svm/plot_iris.py index 4ea97c12c652f..83d6dfa558e1a 100644 --- a/examples/svm/plot_iris.py +++ b/examples/svm/plot_iris.py @@ -42,6 +42,16 @@ def make_meshgrid(x, y, h=.02): """Create a mesh of points to plot in + + Parameters + ---------- + x: data to base x-axis meshgrid on + y: data to base y-axis meshgrid on + h: stepsize for meshgrid, optional + + Returns + ------- + xx, yy : ndarray """ x_min, x_max = x.min() - 1, x.max() + 1 y_min, y_max = y.min() - 1, y.max() + 1 @@ -49,8 +59,17 @@ def make_meshgrid(x, y, h=.02): np.arange(y_min, y_max, h)) return xx, yy + def plot_contours(ax, clf, xx, yy, **params): """Plot the decision boundaries for a classifier. + + Parameters + ---------- + ax: matplotlib axes object + clf: a classifier + xx: meshgrid ndarray + yy: meshgrid ndarray + params: dictionary of params to pass to contourf, optional """ Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) @@ -59,9 +78,21 @@ def plot_contours(ax, clf, xx, yy, **params): ax.set_ylim(yy.min(), yy.max()) return out + def plot_points_boundary(ax, clf, x, y, xx, yy, - points_params, contour_params): + points_params, contour_params): """Plot the decision boundaries and points. + + Parameters + ---------- + ax: matplotlib axes object + clf: a classifier + x: x-axis of points to plot + y: y-axis of points to plot + xx: meshgrid ndarray + yy: meshgrid ndarray + point_params: dictionary of params to pass to `ax.scatter`, optional + countour_params: dictionary of params to pass to `plot_contours`, optional """ contours = plot_contours(ax, clf, xx, yy, **contour_params) points = ax.scatter(x, y, **points_params) @@ -69,8 +100,8 @@ def plot_points_boundary(ax, clf, x, y, xx, yy, # import some data to play with iris = datasets.load_iris() -X = iris.data[:, :2] # we only take the first two features. We could - # avoid this ugly slicing by using a two-dim dataset +# Take the first two features. We could avoid this by using a two-dim dataset +X = iris.data[:, :2] y = iris.target # we create an instance of SVM and fit out data. We do not scale our @@ -88,16 +119,16 @@ def plot_points_boundary(ax, clf, x, y, xx, yy, 'SVC with RBF kernel', 'SVC with polynomial (degree 3) kernel') -#Set-up 2x2 grid for plotting. -fig, sub = plt.subplots(2,2) +# Set-up 2x2 grid for plotting. +fig, sub = plt.subplots(2, 2) plt.subplots_adjust(wspace=0.4, hspace=0.4) xx, yy = make_meshgrid(X[:, 0], X[:, 1]) for clf, title, ax in zip(models, titles, sub.flatten()): plot_points_boundary(ax, clf, X[:, 0], X[:, 1], xx, yy, - dict(c=y, cmap=plt.cm.coolwarm), - dict(cmap=plt.cm.coolwarm, alpha=0.8)) + dict(c=y, cmap=plt.cm.coolwarm), + dict(cmap=plt.cm.coolwarm, alpha=0.8)) ax.set_xlabel('Sepal length') ax.set_ylabel('Sepal width') ax.set_xticks(()) From 8cde6c39781385ac59fe0bbaa35cfae708552395 Mon Sep 17 00:00:00 2001 From: Isaac Laughlin Date: Mon, 13 Feb 2017 10:44:14 -0800 Subject: [PATCH 3/7] Specify plotting defaults for consistency between matplotlib 1.5 & 2.0 --- examples/svm/plot_iris.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/svm/plot_iris.py b/examples/svm/plot_iris.py index 83d6dfa558e1a..69f3c171b90e1 100644 --- a/examples/svm/plot_iris.py +++ b/examples/svm/plot_iris.py @@ -127,7 +127,7 @@ def plot_points_boundary(ax, clf, x, y, xx, yy, for clf, title, ax in zip(models, titles, sub.flatten()): plot_points_boundary(ax, clf, X[:, 0], X[:, 1], xx, yy, - dict(c=y, cmap=plt.cm.coolwarm), + dict(c=y, cmap=plt.cm.coolwarm, s=20, edgecolors='k'), dict(cmap=plt.cm.coolwarm, alpha=0.8)) ax.set_xlabel('Sepal length') ax.set_ylabel('Sepal width') From df8ca27e0044af9f42ea24fd86350be9bada1704 Mon Sep 17 00:00:00 2001 From: Isaac Laughlin Date: Mon, 13 Feb 2017 12:58:36 -0800 Subject: [PATCH 4/7] Adding fitted qualifier to function docstring. --- examples/svm/plot_iris.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/svm/plot_iris.py b/examples/svm/plot_iris.py index 69f3c171b90e1..6282dc265c8e3 100644 --- a/examples/svm/plot_iris.py +++ b/examples/svm/plot_iris.py @@ -86,7 +86,7 @@ def plot_points_boundary(ax, clf, x, y, xx, yy, Parameters ---------- ax: matplotlib axes object - clf: a classifier + clf: a fitted classifier x: x-axis of points to plot y: y-axis of points to plot xx: meshgrid ndarray From 984cd6dfec73b4aa62f524710b6e7d0f8fa1dcff Mon Sep 17 00:00:00 2001 From: Isaac Laughlin Date: Tue, 14 Feb 2017 09:26:03 -0800 Subject: [PATCH 5/7] Removing unnecessary function in favor of direct call to ax.scatter, moving set_*lim calls outside decision boundary function. --- examples/svm/plot_iris.py | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/examples/svm/plot_iris.py b/examples/svm/plot_iris.py index 6282dc265c8e3..321ca1136dd2e 100644 --- a/examples/svm/plot_iris.py +++ b/examples/svm/plot_iris.py @@ -74,30 +74,9 @@ def plot_contours(ax, clf, xx, yy, **params): Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) out = ax.contourf(xx, yy, Z, **params) - ax.set_xlim(xx.min(), xx.max()) - ax.set_ylim(yy.min(), yy.max()) return out -def plot_points_boundary(ax, clf, x, y, xx, yy, - points_params, contour_params): - """Plot the decision boundaries and points. - - Parameters - ---------- - ax: matplotlib axes object - clf: a fitted classifier - x: x-axis of points to plot - y: y-axis of points to plot - xx: meshgrid ndarray - yy: meshgrid ndarray - point_params: dictionary of params to pass to `ax.scatter`, optional - countour_params: dictionary of params to pass to `plot_contours`, optional - """ - contours = plot_contours(ax, clf, xx, yy, **contour_params) - points = ax.scatter(x, y, **points_params) - return [contours, points] - # import some data to play with iris = datasets.load_iris() # Take the first two features. We could avoid this by using a two-dim dataset @@ -126,9 +105,11 @@ def plot_points_boundary(ax, clf, x, y, xx, yy, xx, yy = make_meshgrid(X[:, 0], X[:, 1]) for clf, title, ax in zip(models, titles, sub.flatten()): - plot_points_boundary(ax, clf, X[:, 0], X[:, 1], xx, yy, - dict(c=y, cmap=plt.cm.coolwarm, s=20, edgecolors='k'), - dict(cmap=plt.cm.coolwarm, alpha=0.8)) + plot_contours(ax, clf, xx, yy, + cmap=plt.cm.coolwarm, alpha=0.8) + ax.scatter(x, y, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors='k') + ax.set_xlim(xx.min(), xx.max()) + ax.set_ylim(yy.min(), yy.max()) ax.set_xlabel('Sepal length') ax.set_ylabel('Sepal width') ax.set_xticks(()) From e6eb7784e03522bfbfdbc2167ebee6f69afa12c1 Mon Sep 17 00:00:00 2001 From: Isaac Laughlin Date: Tue, 14 Feb 2017 09:28:54 -0800 Subject: [PATCH 6/7] Fixes bug in plotting points. --- examples/svm/plot_iris.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/svm/plot_iris.py b/examples/svm/plot_iris.py index 321ca1136dd2e..d568fdbeac9e6 100644 --- a/examples/svm/plot_iris.py +++ b/examples/svm/plot_iris.py @@ -102,12 +102,13 @@ def plot_contours(ax, clf, xx, yy, **params): fig, sub = plt.subplots(2, 2) plt.subplots_adjust(wspace=0.4, hspace=0.4) -xx, yy = make_meshgrid(X[:, 0], X[:, 1]) +X0, X1 = X[:,0], X[:,1] +xx, yy = make_meshgrid(X0, X1) for clf, title, ax in zip(models, titles, sub.flatten()): plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8) - ax.scatter(x, y, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors='k') + ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors='k') ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xlabel('Sepal length') From fe363067f7ee7ade9b05208de962d2b01ea15be0 Mon Sep 17 00:00:00 2001 From: Isaac Laughlin Date: Tue, 14 Feb 2017 13:34:41 -0800 Subject: [PATCH 7/7] Correcting whitespace pep8 issue. --- examples/svm/plot_iris.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/svm/plot_iris.py b/examples/svm/plot_iris.py index d568fdbeac9e6..f278b1e3354f1 100644 --- a/examples/svm/plot_iris.py +++ b/examples/svm/plot_iris.py @@ -102,7 +102,7 @@ def plot_contours(ax, clf, xx, yy, **params): fig, sub = plt.subplots(2, 2) plt.subplots_adjust(wspace=0.4, hspace=0.4) -X0, X1 = X[:,0], X[:,1] +X0, X1 = X[:, 0], X[:, 1] xx, yy = make_meshgrid(X0, X1) for clf, title, ax in zip(models, titles, sub.flatten()):