8000 [MRG + 1] 10 more examples fixed with matplotlib 2.0 updates (#9030) · raghavrv/scikit-learn@1d4e18a · GitHub
[go: up one dir, main page]

Skip to content

Commit 1d4e18a

Browse files
Aarshay Jainraghavrv
authored andcommitted
[MRG + 1] 10 more examples fixed with matplotlib 2.0 updates (scikit-learn#9030)
* examples/covariance/plot_outlier_detection.py - matplotlib2.0 update * examples/cluster/plot_kmeans_silhouette_analysis.py - matplotlib2.0 update * examples/cluster/plot_birch_vs_minibatchkmeans.py - matplotlib2.0 + pep8 fix * examples/cluster/plot_cluster_iris.py - matplotlib2.0 update * examples/cluster/plot_agglomerative_clustering.py - matplotlib2.0 update * examples/cluster/plot_ward_structured_vs_unstructured.py - matplotlib2.0 update * examples/cluster/plot_kmeans_assumptions.py - matplotlib2.0 update * examples/classification/plot_lda_qda.py - matplotlib2.0 + pep8 fix * examples/calibration/plot_calibration.py - matplotlib2.0 update * examples/plot_johnson_lindenstrauss_bound.py - matplotlib2.0 update * flake8 changes * reversing changes for plot_kmeans_assumptions & plot_agglomerative_clustering * cluster/plot_cluster_iris.py - major fixes * examples/cluster/plot_cluster_iris.py - flake8 fix * examples/cluster/plot_cluster_iris.py - 3d projection error fix * cluster/plot_cluster_iris.py - elevation and azimuth setting
1 parent 2a36ff1 commit 1d4e18a

9 files changed

+67
-55
lines changed

examples/calibration/plot_calibration.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@
1515
1616
Compared are the estimated probability using a Gaussian naive Bayes classifier
1717
without calibration, with a sigmoid calibration, and with a non-parametric
18-
isotonic calibration. One can observe that only the non-parametric model is able
19-
to provide a probability calibration that returns probabilities close to the
20-
expected 0.5 for most of the samples belonging to the middle cluster with
21-
heterogeneous labels. This results in a significantly improved Brier score.
18+
isotonic calibration. One can observe that only the non-parametric model is
19+
able to provide a probability calibration that returns probabilities close
20+
to the expected 0.5 for most of the samples belonging to the middle
21+
cluster with heterogeneous labels. This results in a significantly improved
22+
Brier score.
2223
"""
2324
print(__doc__)
2425

@@ -91,7 +92,8 @@
9192
for this_y, color in zip(y_unique, colors):
9293
this_X = X_train[y_train == this_y]
9394
this_sw = sw_train[y_train == this_y]
94-
plt.scatter(this_X[:, 0], this_X[:, 1], s=this_sw * 50, c=color, alpha=0.5,
95+
plt.scatter(this_X[:, 0], this_X[:, 1], s=this_sw * 50, c=color,
96+
alpha=0.5, edgecolor='k',
9597
label="Class %s" % this_y)
9698
plt.legend(loc="best")
9799
plt.title("Data")

examples/classification/plot_lda_qda.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,11 @@ def plot_data(lda, X, y, y_pred, fig_index):
6060
splot = plt.subplot(2, 2, fig_index)
6161
if fig_index == 1:
6262
plt.title('Linear Discriminant Analysis')
63-
plt.ylabel('Data with fixed covariance')
63+
plt.ylabel('Data with\n fixed covariance')
6464
elif fig_index == 2:
6565
plt.title('Quadratic Discriminant Analysis')
6666
elif fig_index == 3:
67-
plt.ylabel('Data with varying covariances')
67+
plt.ylabel('Data with\n varying covariances')
6868

6969
tp = (y == y_pred) # True Positive
7070
tp0, tp1 = tp[y == 0], tp[y == 1]
@@ -76,15 +76,15 @@ def plot_data(lda, X, y, y_pred, fig_index):
7676

7777
# class 0: dots
7878
plt.plot(X0_tp[:, 0], X0_tp[:, 1], 'o', alpha=alpha,
79-
color='red')
79+
color='red', markeredgecolor='k')
8080
plt.plot(X0_fp[:, 0], X0_fp[:, 1], '*', alpha=alpha,
81-
color='#990000') # dark red
81+
color='#990000', markeredgecolor='k') # dark red
8282

8383
# class 1: dots
8484
plt.plot(X1_tp[:, 0], X1_tp[:, 1], 'o', alpha=alpha,
85-
color='blue')
85+
color='blue', markeredgecolor='k')
8686
plt.plot(X1_fp[:, 0], X1_fp[:, 1], '*', alpha=alpha,
87-
color='#000099') # dark blue
87+
color='#000099', markeredgecolor='k') # dark blue
8888

8989
# class 0 and 1 : areas
9090
nx, ny = 200, 100
@@ -100,9 +100,9 @@ def plot_data(lda, X, y, y_pred, fig_index):
100100

101101
# means
102102
plt.plot(lda.means_[0][0], lda.means_[0][1],
103-
'o', color='black', markersize=10)
103+
'o', color='black', markersize=10, markeredgecolor='k')
104104
plt.plot(lda.means_[1][0], lda.means_[1][1],
105-
'o', color='black', markersize=10)
105+
'o', color='black', markersize=10, markeredgecolor='k')
106106

107107
return splot
108108

@@ -114,7 +114,8 @@ def plot_ellipse(splot, mean, cov, color):
114114
angle = 180 * angle / np.pi # convert to degrees
115115
# filled Gaussian at 2 standard deviation
116116
ell = mpl.patches.Ellipse(mean, 2 * v[0] ** 0.5, 2 * v[1] ** 0.5,
117-
180 + angle, facecolor=color, edgecolor='yellow',
117+
180 + angle, facecolor=color,
118+
edgecolor='yellow',
118119
linewidth=2, zorder=2)
119120
ell.set_clip_box(splot.bbox)
120121
ell.set_alpha(0.5)
@@ -146,5 +147,6 @@ def plot_qda_cov(qda, splot):
146147
splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2)
147148
plot_qda_cov(qda, splot)
148149
plt.axis('tight')
149-
plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant Analysis')
150+
plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant'
151+
'Analysis')
150152
plt.show()

examples/cluster/plot_birch_vs_minibatchkmeans.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939

4040
# Generate blobs to do a comparison between MiniBatchKMeans and Birch.
4141
X, y = make_blobs(n_samples=100000, centers=n_centres, random_state=0)
42-
4342

4443
# Use all colors that matplotlib provides by default.
4544
colors_ = cycle(colors.cnames.keys())
@@ -69,11 +68,11 @@
6968
ax = fig.add_subplot(1, 3, ind + 1)
7069
for this_centroid, k, col in zip(centroids, range(n_clusters), colors_):
7170
mask = labels == k
72-
ax.plot(X[mask, 0], X[mask, 1], 'w',
73-
markerfacecolor=col, marker='.')
71+
ax.scatter(X[mask, 0], X[mask, 1],
72+
c='w', edgecolor=col, marker='.', alpha=0.5)
7473
if birch_model.n_clusters is None:
75-
ax.plot(this_centroid[0], this_centroid[1], '+', markerfacecolor=col,
76-
markeredgecolor='k', markersize=5)
74+
ax.scatter(this_centroid[0], this_centroid[1], marker='+',
75+
c='k', s=25)
7776
ax.set_ylim([-25, 25])
7877
ax.set_xlim([-25, 25])
7978
ax.set_autoscaley_on(False)
@@ -93,9 +92,10 @@
9392
for this_centroid, k, col in zip(mbk.cluster_centers_,
9493
range(n_clusters), colors_):
9594
mask = mbk.labels_ == k
96-
ax.plot(X[mask, 0], X[mask, 1], 'w', markerfacecolor=col, marker='.')
97-
ax.plot(this_centroid[0], this_centroid[1], '+', markeredgecolor='k',
98-
markersize=5)
95+
ax.scatter(X[mask, 0], X[mask, 1], marker='.',
96+
c='w', edgecolor=col, alpha=0.5)
97+
ax.scatter(this_centroid[0], this_centroid[1], marker='+',
98+
c='k', s=25)
9999
ax.set_xlim([-25, 25])
100100
ax.set_ylim([-25, 25])
101101
ax.set_title("MiniBatchKMeans")

examples/cluster/plot_cluster_iris.py

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,10 @@
2525

2626
import numpy as np
2727
import matplotlib.pyplot as plt
28+
# Though the following import is not directly being used, it is required
29+
# for 3D projection to work
2830
from mpl_toolkits.mplot3d import Axes3D
2931

30-
3132
from sklearn.cluster import KMeans
3233
from sklearn import datasets
3334

@@ -43,50 +44,51 @@
4344
'k_means_iris_bad_init': KMeans(n_clusters=3, n_init=1,
4445
init='random')}
4546

46-
47+
fig = plt.figure(figsize=(8, 6))
4748
fignum = 1
49+
titles = ['3 clusters', '8 clusters', '3 clusters, bad initialization']
4850
for name, est in estimators.items():
49-
fig = plt.figure(fignum, figsize=(4, 3))
50-
plt.clf()
51-
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
52-
53-
plt.cla()
51+
ax = plt.subplot(2, 2, fignum, projection='3d',
52+
elev=48, azim=134)
5453
est.fit(X)
5554
labels = est.labels_
5655

57-
ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=labels.astype(np.float))
56+
ax.scatter(X[:, 3], X[:, 0], X[:, 2],
57+
c=labels.astype(np.float), edgecolor='k')
5858

5959
ax.w_xaxis.set_ticklabels([])
6060
ax.w_yaxis.set_ticklabels([])
6161
ax.w_zaxis.set_ticklabels([])
6262
ax.set_xlabel('Petal width')
6363
ax.set_ylabel('Sepal length')
6464
ax.set_zlabel('Petal length')
65+
ax.set_title(titles[fignum - 1])
66+
ax.dist = 12
6567
fignum = fignum + 1
6668

6769
# Plot the ground truth
68-
fig = plt.figure(fignum, figsize=(4, 3))
69-
plt.clf()
70-
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
71-
72-
plt.cla()
73-
70+
ax = plt.subplot(2, 2, 4, projection='3d',
71+
elev=48, azim=134)
7472
for name, label in [('Setosa', 0),
7573
('Versicolour', 1),
7674
('Virginica', 2)]:
7775
ax.text3D(X[y == label, 3].mean(),
78-
X[y == label, 0].mean() + 1.5,
79-
X[y == label, 2].mean(), name,
76+
X[y == label, 0].mean(),
77+
X[y == label, 2].mean() + 2, name,
8078
horizontalalignment='center',
81-
bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))
79+
bbox=dict(alpha=.2, edgecolor='w', facecolor='w'))
8280
# Reorder the labels to have colors matching the cluster results
8381
y = np.choose(y, [1, 2, 0]).astype(np.float)
84-
ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y)
82+
ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y, edgecolor='k')
8583

8684
ax.w_xaxis.set_ticklabels([])
8785
ax.w_yaxis.set_ticklabels([])
8886
ax.w_zaxis.set_ticklabels([])
8987
ax.set_xlabel('Petal width')
9088
ax.set_ylabel('Sepal length')
9189
ax.set_zlabel('Petal length')
92-
plt.show()
90+
ax.set_title('Ground Truth')
91+
ax.dist = 12
92+
93+
fig.tight_layout()
94+
fig.show()

examples/cluster/plot_kmeans_assumptions.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
plt.title("Incorrect Number of Blobs")
3535

3636
# Anisotropicly distributed data
37-
transformation = [[ 0.60834549, -0.63667341], [-0.40887718, 0.85253229]]
37+
transformation = [[0.60834549, -0.63667341], [-0.40887718, 0.85253229]]
3838
X_aniso = np.dot(X, transformation)
3939
y_pred = KMeans(n_clusters=3, random_state=random_state).fit_predict(X_aniso)
4040

@@ -54,7 +54,8 @@
5454

5555
# Unevenly sized blobs
5656
X_filtered = np.vstack((X[y == 0][:500], X[y == 1][:100], X[y == 2][:10]))
57-
y_pred = KMeans(n_clusters=3, random_state=random_state).fit_predict(X_filtered)
57+
y_pred = KMeans(n_clusters=3,
58+
random_state=random_state).fit_predict(X_filtered)
5859

5960
plt.subplot(224)
6061
plt.scatter(X_filtered[:, 0], X_filtered[:, 1], c=y_pred)

examples/cluster/plot_kmeans_silhouette_analysis.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,16 +119,17 @@
119119
# 2nd Plot showing the actual clusters formed
120120
colors = cm.spectral(cluster_labels.astype(float) / n_clusters)
121121
ax2.scatter(X[:, 0], X[:, 1], marker='.', s=30, lw=0, alpha=0.7,
122-
c=colors)
122+
c=colors, edgecolor='k')
123123

124124
# Labeling the clusters
125125
centers = clusterer.cluster_centers_
126126
# Draw white circles at cluster centers
127-
ax2.scatter(centers[:, 0], centers[:, 1],
128-
marker='o', c="white", alpha=1, s=200)
127+
ax2.scatter(centers[:, 0], centers[:, 1], marker='o',
128+
c="white", alpha=1, s=200, edgecolor='k')
129129

130130
for i, c in enumerate(centers):
131-
ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1, s=50)
131+
ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1,
132+
s=50, edgecolor='k')
132133

133134
ax2.set_title("The visualization of the clustered data.")
134135
ax2.set_xlabel("Feature space for the 1st feature")

examples/cluster/plot_ward_structured_vs_unstructured.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,9 @@
5757
ax = p3.Axes3D(fig)
5858
ax.view_init(7, -80)
5959
for l in np.unique(label):
60-
ax.plot3D(X[label == l, 0], X[label == l, 1], X[label == l, 2],
61-
'o', color=plt.cm.jet(np.float(l) / np.max(label + 1)))
60+
ax.scatter(X[label == l, 0], X[label == l, 1], X[label == l, 2],
61+
color=plt.cm.jet(np.float(l) / np.max(label + 1)),
62+
s=20, edgecolor='k')
6263
plt.title('Without connectivity constraints (time %.2fs)' % elapsed_time)
6364

6465

@@ -84,8 +85,9 @@
8485
ax = p3.Axes3D(fig)
8586
ax.view_init(7, -80)
8687
for l in np.unique(label):
87-
ax.plot3D(X[label == l, 0], X[label == l, 1], X[label == l, 2],
88-
'o', color=plt.cm.jet(float(l) / np.max(label + 1)))
88+
ax.scatter(X[label == l, 0], X[label == l, 1], X[label == l, 2],
89+
color=plt.cm.jet(float(l) / np.max(label + 1)),
90+
s=20, edgecolor='k')
8991
plt.title('With connectivity constraints (time %.2fs)' % elapsed_time)
9092

9193
plt.show()

examples/covariance/plot_outlier_detection.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,10 @@
107107
linewidths=2, colors='red')
108108
subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],
109109
colors='orange')
110-
b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white')
111-
c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black')
110+
b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',
111+
s=20, edgecolor='k')
112+
c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',
113+
s=20, edgecolor='k')
112114
subplot.axis('tight')
113115
subplot.legend(
114116
[a.collections[0], b, c],

examples/plot_johnson_lindenstrauss_bound.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@
187187
% (np.mean(rates), np.std(rates)))
188188

189189
plt.figure()
190-
plt.hist(rates, bins=50, normed=True, range=(0., 2.))
190+
plt.hist(rates, bins=50, normed=True, range=(0., 2.), edgecolor='k')
191191
plt.xlabel("Squared distances rate: projected / original")
192192
plt.ylabel("Distribution of samples pairs")
193193
plt.title("Histogram of pairwise distance rates for n_components=%d" %

0 commit comments

Comments
 (0)
0