10000 FEA Add DecisionBoundaryDisplay (#16061) · scikit-learn/scikit-learn@d400723 · GitHub
[go: up one dir, main page]

Skip to content

Commit d400723

Browse files
thomasjpfanglemaitreogrisellesteve
authored
FEA Add DecisionBoundaryDisplay (#16061)
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com> Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org> Co-authored-by: Loïc Estève <loic.esteve@ymail.com>
1 parent d4aad64 commit d400723

22 files changed

+829
-254
lines changed

doc/modules/classes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,7 @@ Plotting
657657
:toctree: generated/
658658
:template: class.rst
659659

660+
inspection.DecisionBoundaryDisplay
660661
inspection.PartialDependenceDisplay
661662

662663
.. autosummary::

doc/visualizations.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ Display Objects
9696

9797
calibration.CalibrationDisplay
9898
inspection.PartialDependenceDisplay
99+
inspection.DecisionBoundaryDisplay
99100
metrics.ConfusionMatrixDisplay
100101
metrics.DetCurveDisplay
101102
metrics.PrecisionRecallDisplay

doc/whats_new/v1.1.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,10 @@ Changelog
548548
:mod:`sklearn.inspection`
549549
.........................
550550

551+
- |Feature| Add a display to plot the boundary decision of a classifier by
552+
using the method :func:`inspection.DecisionBoundaryDisplay.from_estimator`.
553+
:pr:`16061` by `Thomas Fan`_.
554+
551555
- |Enhancement| In
552556
:meth:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` and
553557
:meth:`~sklearn.inspection.PartialDependenceDisplay.from_predictions`, allow

examples/classification/plot_classifier_comparison.py

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,7 @@
4040
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
4141
from sklearn.naive_bayes import GaussianNB
4242
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
43-
44-
h = 0.02 # step size in the mesh
43+
from sklearn.inspection import DecisionBoundaryDisplay
4544

4645
names = [
4746
"Nearest Neighbors",
@@ -95,7 +94,6 @@
9594

9695
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
9796
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
98-
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
9997

10098
# just plot the dataset first
10199
cm = plt.cm.RdBu
@@ -109,8 +107,8 @@
109107
ax.scatter(
110108
X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6, edgecolors="k"
111109
)
112-
ax.set_xlim(xx.min(), xx.max())
113-
ax.set_ylim(yy.min(), yy.max())
110+
ax.set_xlim(x_min, x_max)
111+
ax.set_ylim(y_min, y_max)
114112
ax.set_xticks(())
115113
ax.set_yticks(())
116114
i += 1
@@ -120,17 +118,9 @@
120118
ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
121119
clf.fit(X_train, y_train)
122120
score = clf.score(X_test, y_test)
123-
124-
# Plot the decision boundary. For that, we will assign a color to each
125-
# point in the mesh [x_min, x_max]x[y_min, y_max].
126-
if hasattr(clf, "decision_function"):
127-
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
128-
else:
129-
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
130-
131-
# Put the result into a color plot
132-
Z = Z.reshape(xx.shape)
133-
ax.contourf(xx, yy, Z, cmap=cm, alpha=0.8)
121+
DecisionBoundaryDisplay.from_estimator(
122+
clf, X, cmap=cm, alpha=0.8, ax=ax, eps=0.5
123+
)
134124

135125
# Plot the training points
136126
ax.scatter(
@@ -146,15 +136,15 @@
146136
alpha=0.6,
147137
)
148138

149-
ax.set_xlim(xx.min(), xx.max())
150-
ax.set_ylim(yy.min(), yy.max())
139+
ax.set_xlim(x_min, x_max)
140+
ax.set_ylim(y_min, y_max)
151141
ax.set_xticks(())
152142
ax.set_yticks(())
153143
if ds_cnt == 0:
154144
ax.set_title(name)
155145
ax.text(
156-
xx.max() - 0.3,
157-
yy.min() + 0.3,
146+
x_max - 0.3,
147+
y_min + 0.3,
158148
("%.2f" % score).lstrip("0"),
159149
size=15,
160150
horizontalalignment="right",

examples/cluster/plot_inductive_clustering.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,12 @@
2323
# Authors: Chirag Nagpal
2424
# Christos Aridas
2525

26-
import numpy as np
2726
import matplotlib.pyplot as plt
2827
from sklearn.base import BaseEstimator, clone
2928
from sklearn.cluster import AgglomerativeClustering
3029
from sklearn.datasets import make_blobs
3130
from sklearn.ensemble import RandomForestClassifier
31+
from sklearn.inspection import DecisionBoundaryDisplay
3232
from sklearn.utils.metaestimators import available_if
3333
from sklearn.utils.validation import check_is_fitted
3434

@@ -116,19 +116,14 @@ def plot_scatter(X, color, alpha=0.5):
116116
probable_clusters = inductive_learner.predict(X_new)
117117

118118

119-
plt.subplot(133)
119+
ax = plt.subplot(133)
120120
plot_scatter(X, cluster_labels)
121121
plot_scatter(X_new, probable_clusters)
122122

123123
# Plotting decision regions
124-
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
125-
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
126-
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
127-
128-
Z = inductive_learner.predict(np.c_[xx.ravel(), yy.ravel()])
129-
Z = Z.reshape(xx.shape)
130-
131-
plt.contourf(xx, yy, Z, alpha=0.4)
124+
DecisionBoundaryDisplay.from_estimator(
125+
inductive_learner, X, response_method="predict", alpha=0.4, ax=ax
126+
)
132127
plt.title("Classify unknown instances")
133128

134129
plt.show()

examples/ensemble/plot_adaboost_twoclass.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from sklearn.ensemble import AdaBoostClassifier
2828
from sklearn.tree import DecisionTreeClassifier
2929
from sklearn.datasets import make_gaussian_quantiles
30+
from sklearn.inspection import DecisionBoundaryDisplay
3031

3132

3233
# Construct dataset
@@ -53,16 +54,18 @@
5354
plt.figure(figsize=(10, 5))
5455

5556
# Plot the decision boundaries
56-
plt.subplot(121)
57-
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
58-
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
59-
xx, yy = np.meshgrid(
60-
np.arange(x_min, x_max, plot_step), np.arange(y_min, y_max, plot_step)
57+
ax = plt.subplot(121)
58+
disp = DecisionBoundaryDisplay.from_estimator(
59+
bdt,
60+
X,
61+
cmap=plt.cm.Paired,
62+
response_method="predict",
63+
ax=ax,
64+
xlabel="x",
65+
ylabel="y",
6166
)
62-
63-
Z = bdt.predict(np.c_[xx.ravel(), yy.ravel()])
64-
Z = Z.reshape(xx.shape)
65-
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
67+
x_min, x_max = disp.xx0.min(), disp.xx0.max()
68+
y_min, y_max = disp.xx1.min(), disp.xx1.max()
6669
plt.axis("tight")
6770

6871
# Plot the training points
@@ -80,8 +83,7 @@
8083
plt.xlim(x_min, x_max)
8184
plt.ylim(y_min, y_max)
8285
plt.legend(loc="upper right")
83-
plt.xlabel("x")
84-
plt.ylabel("y")
86+
8587
plt.title("Decision Boundary")
8688

8789
# Plot the two-class decision scores

examples/ensemble/plot_voting_decision_regions.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@
2525

2626
from itertools import product
2727

28-
import numpy as np
2928
import matplotlib.pyplot as plt
3029

3130
from sklearn import datasets
3231
from sklearn.tree import DecisionTreeClassifier
3332
from sklearn.neighbors import KNeighborsClassifier
3433
from sklearn.svm import SVC
3534
from sklearn.ensemble import VotingClassifier
35+
from sklearn.inspection import DecisionBoundaryDisplay
3636

3737
# Loading some example data
3838
iris = datasets.load_iris()
@@ -55,22 +55,15 @@
5555
eclf.fit(X, y)
5656

5757
# Plotting decision regions
58-
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
59-
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
60-
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
61-
6258
f, axarr = plt.subplots(2, 2, sharex="col", sharey="row", figsize=(10, 8))
63-
6459
for idx, clf, tt in zip(
6560
product([0, 1], [0, 1]),
6661
[clf1, clf2, clf3, eclf],
6762
["Decision Tree (depth=4)", "KNN (k=7)", "Kernel SVM", "Soft Voting"],
6863
):
69-
70-
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
71-
Z = Z.reshape(xx.shape)
72-
73-
axarr[idx[0], idx[1]].contourf(xx, yy, Z, alpha=0.4)
64+
DecisionBoundaryDisplay.from_estimator(
65+
A851 clf, X, alpha=0.4, ax=axarr[idx[0], idx[1]], response_method="predict"
66+
)
7467
axarr[idx[0], idx[1]].scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
7568
axarr[idx[0], idx[1]].set_title(tt)
7669

examples/linear_model/plot_iris_logistic.py

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
# Modified for documentation by Jaques Grobler
1616
# License: BSD 3 clause
1717

18-
import numpy as np
1918
import matplotlib.pyplot as plt
2019
from sklearn.linear_model import LogisticRegression
2120
from sklearn import datasets
21+
from sklearn.inspection import DecisionBoundaryDisplay
2222

2323
# import some data to play with
2424
iris = datasets.load_iris()
@@ -29,26 +29,24 @@
2929
logreg = LogisticRegression(C=1e5)
3030
logreg.fit(X, Y)
3131

32-
# Plot the decision boundary. For that, we will assign a color to each
< F438 /td>
33-
# point in the mesh [x_min, x_max]x[y_min, y_max].
34-
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
35-
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
36-
h = 0.02 # step size in the mesh
37-
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
38-
Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])
39-
40-
# Put the result into a color plot
41-
Z = Z.reshape(xx.shape)
42-
plt.figure(1, figsize=(4, 3))
43-
plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
32+
_, ax = plt.subplots(figsize=(4, 3))
33+
DecisionBoundaryDisplay.from_estimator(
34+
logreg,
35+
X,
36+
cmap=plt.cm.Paired,
37+
ax=ax,
38+
response_method="predict",
39+
plot_method="pcolormesh",
40+
shading="auto",
41+
xlabel="Sepal length",
42+
ylabel="Sepal width",
43+
eps=0.5,
44+
)
4445

4546
# Plot also the training points
4647
plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors="k", cmap=plt.cm.Paired)
47-
plt.xlabel("Sepal length")
48-
plt.ylabel("Sepal width")
4948

50-
plt.xlim(xx.min(), xx.max())
51-
plt.ylim(yy.min(), yy.max())
49+
5250
plt.xticks(())
5351
plt.yticks(())
5452

examples/linear_model/plot_logistic_multinomial.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import matplotlib.pyplot as plt
1717
from sklearn.datasets import make_blobs
1818
from sklearn.linear_model import LogisticRegression
19+
from sklearn.inspection import DecisionBoundaryDisplay
1920

2021
# make 3-class dataset for classification
2122
centers = [[-5, 0], [0, 1.5], [5, -1]]
@@ -31,19 +32,10 @@
3132
# print the training scores
3233
print("training score : %.3f (%s)" % (clf.score(X, y), multi_class))
3334

34-
# create a mesh to plot in
35-
h = 0.02 # step size in the mesh
36-
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
37-
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
38-
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
39-
40-
# Plot the decision boundary. For that, we will assign a color to each
41-
# point in the mesh [x_min, x_max]x[y_min, y_max].
42-
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
43-
# Put the result into a color plot
44-
Z = Z.reshape(xx.shape)
45-
plt.figure()
46-
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
35+
_, ax = plt.subplots()
36+
DecisionBoundaryDisplay.from_estimator(
37+
clf, X, response_method="predict", cmap=plt.cm.Paired, ax=ax
38+
)
4739
plt.title("Decision surface of LogisticRegression (%s)" % multi_class)
4840
plt.axis("tight")
4941

examples/linear_model/plot_sgd_iris.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import matplotlib.pyplot as plt
1414
from sklearn import datasets
1515
from sklearn.linear_model import SGDClassifier
16+
from sklearn.inspection import DecisionBoundaryDisplay
1617

1718
# import some data to play with
1819
iris = datasets.load_iris()
@@ -35,21 +36,17 @@
3536
std = X.std(axis=0)
3637
X = (X - mean) / std
3738

38-
h = 0.02 # step size in the mesh
39-
4039
clf = SGDClassifier(alpha=0.001, max_iter=100).fit(X, y)
41-
42-
# create a mesh to plot in
43-
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
44-
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
45-
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
46-
47-
# Plot the decision boundary. For that, we will assign a color to each
48-
# point in the mesh [x_min, x_max]x[y_min, y_max].
49-
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
50-
# Put the result into a color plot
51-
Z = Z.reshape(xx.shape)
52-
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
40+
ax = plt.gca()
41+
DecisionBoundaryDisplay.from_estimator(
42+
clf,
43+
X,
44+
cmap=plt.cm.Paired,
45+
ax=ax,
46+
response_method="predict",
47+
xlabel=iris.feature_names[0],
48+
ylabel=iris.feature_names[1],
49+
)
5350
plt.axis("tight")
5451

5552
# Plot also the training points

0 commit comments

Comments
 (0)
0