scikit-learn
diff --git a/‎doc/developers/plotting.rst
Lines changed: 21 additions & 14 deletions b/‎doc/developers/plotting.rst
Lines changed: 21 additions & 14 deletions
diff --git a/‎doc/visualizations.rst
Lines changed: 15 additions & 8 deletions b/‎doc/visualizations.rst
Lines changed: 15 additions & 8 deletions
diff --git a/‎doc/whats_new/v1.0.rst
Lines changed: 8 additions & 0 deletions b/‎doc/whats_new/v1.0.rst
Lines changed: 8 additions & 0 deletions
diff --git a/‎examples/ensemble/plot_feature_transformation.py
Lines changed: 31 additions & 22 deletions b/‎examples/ensemble/plot_feature_transformation.py
Lines changed: 31 additions & 22 deletions
diff --git a/‎examples/miscellaneous/plot_roc_curve_visualization_api.py
Lines changed: 7 additions & 7 deletions b/‎examples/miscellaneous/plot_roc_curve_visualization_api.py
Lines changed: 7 additions & 7 deletions
diff --git a/‎examples/model_selection/plot_det.py
Lines changed: 14 additions & 11 deletions b/‎examples/model_selection/plot_det.py
Lines changed: 14 additions & 11 deletions
@@ -18,12 +18,14 @@ stored and the plotting is done in a `plot` method. The display object's
 `__init__` method contains only the data needed to create the visualization.
 The `plot` method takes in parameters that only have to do with visualization,
 such as a matplotlib axes. The `plot` method will store the matplotlib artists
-as attributes allowing for style adjustments through the display object. A
-`plot_*` helper function accepts parameters to do the computation and the
-parameters used for plotting. After the helper function creates the display
-object with the computed values, it calls the display's plot method. Note that
-the `plot` method defines attributes related to matplotlib, such as the line
-artist. This allows for customizations after calling the `plot` method.
+as attributes allowing for style adjustments through the display object. The
+`Display` class should define one or both class methods: `from_estimator` and
+`from_predictions`. These methods allows to create the `Display` object from
+the estimator and some data or from the true and predicted values. After these
+class methods create the display object with the computed values, then call the
+display's plot method. Note that the `plot` method defines attributes related
+to matplotlib, such as the line artist. This allows for customizations after
+calling the `plot` method.
 
 For example, the `RocCurveDisplay` defines the following methods and
 attributes::
@@ -36,20 +38,25 @@ attributes::
            self.roc_auc = roc_auc
            self.estimator_name = estimator_name
 
+       @classmethod
+       def from_estimator(cls, estimator, X, y):
+           # get the predictions
+           y_pred = estimator.predict_proba(X)[:, 1]
+           return cls.from_predictions(y, y_pred, estimator.__class__.__name__)
+
+       @classmethod
+       def from_predictions(cls, y, y_pred, estimator_name):
+           # do ROC computation from y and y_pred
+           fpr, tpr, roc_auc = ...
+           viz = RocCurveDisplay(fpr, tpr, roc_auc, estimator_name)
+           return viz.plot()
+
        def plot(self, ax=None, name=None, **kwargs):
            ...
            self.line_ = ...
            self.ax_ = ax
            self.figure_ = ax.figure_
 
-   def plot_roc_curve(estimator, X, y, pos_label=None, sample_weight=None,
-                      drop_intermediate=True, response_method="auto",
-                      name=None, ax=None, **kwargs):
-       # do computation
-       viz = RocCurveDisplay(fpr, tpr, roc_auc, 
-                                estimator.__class__.__name__)
-       return viz.plot(ax=ax, name=name, **kwargs)
-
 Read more in :ref:`sphx_glr_auto_examples_miscellaneous_plot_roc_curve_visualization_api.py`
 and the :ref:`User Guide <visualizations>`.
 
 
@@ -12,21 +12,26 @@ Visualizations
 
 Scikit-learn defines a simple API for creating visualizations for machine
 learning. The key feature of this API is to allow for quick plotting and
-visual adjustments without recalculation. In the following example, we plot a
-ROC curve for a fitted support vector machine:
+visual adjustments without recalculation. We provide `Display` classes that
+exposes two methods allowing to make the plotting: `from_estimator` and
+`from_predictions`. The `from_estimator` method will take a fitted estimator
+and some data (`X` and `y`) and create a `Display` object. Sometimes, we would
+like to only compute the predictions once and one should use `from_predictions`
+instead. In the following example, we plot a ROC curve for a fitted support
+vector machine:
 
 .. code-block:: python
 
     from sklearn.model_selection import train_test_split
     from sklearn.svm import SVC
-    from sklearn.metrics import plot_roc_curve
+    from sklearn.metrics import RocCurveDisplay
     from sklearn.datasets import load_wine
 
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
     svc = SVC(random_state=42)
     svc.fit(X_train, y_train)
 
-    svc_disp = plot_roc_curve(svc, X_test, y_test)
+    svc_disp = RocCurveDisplay.from_estimator(svc, X_test, y_test)
 
 .. figure:: auto_examples/miscellaneous/images/sphx_glr_plot_roc_curve_visualization_api_001.png
     :target: auto_examples/miscellaneous/plot_roc_curve_visualization_api.html
@@ -36,9 +41,11 @@ ROC curve for a fitted support vector machine:
 The returned `svc_disp` object allows us to continue using the already computed
 ROC curve for SVC in future plots. In this case, the `svc_disp` is a
 :class:`~sklearn.metrics.RocCurveDisplay` that stores the computed values as
-attributes called `roc_auc`, `fpr`, and `tpr`. Next, we train a random forest
-classifier and plot the previously computed roc curve again by using the `plot`
-method of the `Display` object.
+attributes called `roc_auc`, `fpr`, and `tpr`. Be aware that we could get
+the predictions from the support vector machine and then use `from_predictions`
+instead of `from_estimator` Next, we train a random forest classifier and plot
+the previously computed roc curve again by using the `plot` method of the
+`Display` object.
 
 .. code-block:: python
 
@@ -49,7 +56,7 @@ method of the `Display` object.
     rfc.fit(X_train, y_train)
 
     ax = plt.gca()
-    rfc_disp = plot_roc_curve(rfc, X_test, y_test, ax=ax, alpha=0.8)
+    rfc_disp = RocCurveDisplay.from_estimator(rfc, X_test, y_test, ax=ax, alpha=0.8)
     svc_disp.plot(ax=ax, alpha=0.8)
 
 .. figure:: auto_examples/miscellaneous/images/sphx_glr_plot_roc_curve_visualization_api_002.png
 
@@ -605,6 +605,14 @@ Changelog
   class methods and will be removed in 1.2.
   :pr:`18543` by `Guillaume Lemaitre`_.
 
+- |API| :class:`metrics.RocCurveDisplay` exposes two class methods
+  :func:`~metrics.RocCurveDisplay.from_estimator` and
+  :func:`~metrics.RocCurveDisplay.from_predictions` allowing to create
+  a confusion matrix plot using an estimator or the predictions.
+  :func:`metrics.plot_roc_cure` is deprecated in favor of these two
+  class methods and will be removed in 1.2.
+  :pr:`20569` by `Guillaume Lemaitre`_.
+
 - |API| :class:`metrics.PrecisionRecallDisplay` exposes two class methods
   :func:`~metrics.PrecisionRecallDisplay.from_estimator` and
   :func:`~metrics.PrecisionRecallDisplay.from_predictions` allowing to create
 
@@ -26,7 +26,8 @@
 print(__doc__)
 
 from sklearn import set_config
-set_config(display='diagram')
+
+set_config(display="diagram")
 
 # %%
 # First, we will create a large dataset and split it into three sets:
@@ -45,10 +46,11 @@
 X, y = make_classification(n_samples=80000, random_state=10)
 
 X_full_train, X_test, y_full_train, y_test = train_test_split(
-    X, y, test_size=0.5, random_state=10)
-X_train_ensemble, X_train_linear, y_train_ensemble, y_train_linear = \
-    train_test_split(X_full_train, y_full_train, test_size=0.5,
-                     random_state=10)
+    X, y, test_size=0.5, random_state=10
+)
+X_train_ensemble, X_train_linear, y_train_ensemble, y_train_linear = train_test_split(
+    X_full_train, y_full_train, test_size=0.5, random_state=10
+)
 
 # %%
 # For each of the ensemble methods, we will use 10 estimators and a maximum
@@ -64,11 +66,13 @@
 from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
 
 random_forest = RandomForestClassifier(
-    n_estimators=n_estimators, max_depth=max_depth, random_state=10)
+    n_estimators=n_estimators, max_depth=max_depth, random_state=10
+)
 random_forest.fit(X_train_ensemble, y_train_ensemble)
 
 gradient_boosting = GradientBoostingClassifier(
-    n_estimators=n_estimators, max_depth=max_depth, random_state=10)
+    n_estimators=n_estimators, max_depth=max_depth, random_state=10
+)
 _ = gradient_boosting.fit(X_train_ensemble, y_train_ensemble)
 
 # %%
@@ -78,7 +82,8 @@
 from sklearn.ensemble import RandomTreesEmbedding
 
 random_tree_embedding = RandomTreesEmbedding(
-    n_estimators=n_estimators, max_depth=max_depth, random_state=0)
+    n_estimators=n_estimators, max_depth=max_depth, random_state=0
+)
 
 # %%
 # Now, we will create three pipelines that will use the above embedding as
@@ -90,8 +95,7 @@
 from sklearn.linear_model import LogisticRegression
 from sklearn.pipeline import make_pipeline
 
-rt_model = make_pipeline(
-    random_tree_embedding, LogisticRegression(max_iter=1000))
+rt_model = make_pipeline(random_tree_embedding, LogisticRegression(max_iter=1000))
 rt_model.fit(X_train_linear, y_train_linear)
 
 # %%
@@ -108,12 +112,13 @@ def rf_apply(X, model):
     return model.apply(X)
 
 
-rf_leaves_yielder = FunctionTransformer(
-    rf_apply, kw_args={"model": random_forest})
+rf_leaves_yielder = FunctionTransformer(rf_apply, kw_args={"model": random_forest})
 
 rf_model = make_pipeline(
-    rf_leaves_yielder, OneHotEncoder(handle_unknown="ignore"),
-    LogisticRegression(max_iter=1000))
+    rf_leaves_yielder,
+    OneHotEncoder(handle_unknown="ignore"),
+    LogisticRegression(max_iter=1000),
+)
 rf_model.fit(X_train_linear, y_train_linear)
 
 
@@ -123,18 +128,21 @@ def gbdt_apply(X, model):
 
 
 gbdt_leaves_yielder = FunctionTransformer(
-    gbdt_apply, kw_args={"model": gradient_boosting})
+    gbdt_apply, kw_args={"model": gradient_boosting}
+)
 
 gbdt_model = make_pipeline(
-    gbdt_leaves_yielder, OneHotEncoder(handle_unknown="ignore"),
-    LogisticRegression(max_iter=1000))
+    gbdt_leaves_yielder,
+    OneHotEncoder(handle_unknown="ignore"),
+    LogisticRegression(max_iter=1000),
+)
 gbdt_model.fit(X_train_linear, y_train_linear)
 
 # %%
 # We can finally show the different ROC curves for all the models.
 
 import matplotlib.pyplot as plt
-from sklearn.metrics import plot_roc_curve
+from sklearn.metrics import RocCurveDisplay
 
 fig, ax = plt.subplots()
 
@@ -148,9 +156,10 @@ def gbdt_apply(X, model):
 
 model_displays = {}
 for name, pipeline in models:
-    model_displays[name] = plot_roc_curve(
-        pipeline, X_test, y_test, ax=ax, name=name)
-_ = ax.set_title('ROC curve')
+    model_displays[name] = RocCurveDisplay.from_estimator(
+        pipeline, X_test, y_test, ax=ax, name=name
+    )
+_ = ax.set_title("ROC curve")
 
 # %%
 fig, ax = plt.subplots()
@@ -159,4 +168,4 @@ def gbdt_apply(X, model):
 
 ax.set_xlim(0, 0.2)
 ax.set_ylim(0.8, 1)
-_ = ax.set_title('ROC curve (zoomed in at top left)')
+_ = ax.set_title("ROC curve (zoomed in at top left)")
@@ -17,7 +17,7 @@
 import matplotlib.pyplot as plt
 from sklearn.svm import SVC
 from sklearn.ensemble import RandomForestClassifier
-from sklearn.metrics import plot_roc_curve
+from sklearn.metrics import RocCurveDisplay
 from sklearn.datasets import load_wine
 from sklearn.model_selection import train_test_split
 
@@ -32,15 +32,15 @@
 # Plotting the ROC Curve
 # ----------------------
 # Next, we plot the ROC curve with a single call to
-# :func:`sklearn.metrics.plot_roc_curve`. The returned `svc_disp` object allows
-# us to continue using the already computed ROC curve for the SVC in future
-# plots.
-svc_disp = plot_roc_curve(svc, X_test, y_test)
+# :func:`sklearn.metrics.RocCurveDisplay.from_estimator`. The returned
+# `svc_disp` object allows us to continue using the already computed ROC curve
+# for the SVC in future plots.
+svc_disp = RocCurveDisplay.from_estimator(svc, X_test, y_test)
 plt.show()
 
 # %%
 # Training a Random Forest and Plotting the ROC Curve
-# --------------------------------------------------------
+# ---------------------------------------------------
 # We train a random forest classifier and create a plot comparing it to the SVC
 # ROC curve. Notice how `svc_disp` uses
 # :func:`~sklearn.metrics.RocCurveDisplay.plot` to plot the SVC ROC curve
@@ -50,6 +50,6 @@
 rfc = RandomForestClassifier(n_estimators=10, random_state=42)
 rfc.fit(X_train, y_train)
 ax = plt.gca()
-rfc_disp = plot_roc_curve(rfc, X_test, y_test, ax=ax, alpha=0.8)
+rfc_disp = RocCurveDisplay.from_estimator(rfc, X_test, y_test, ax=ax, alpha=0.8)
 svc_disp.plot(ax=ax, alpha=0.8)
 plt.show()
@@ -51,8 +51,7 @@
 
 from sklearn.datasets import make_classification
 from sklearn.ensemble import RandomForestClassifier
-from sklearn.metrics import DetCurveDisplay
-from sklearn.metrics import plot_roc_curve
+from sklearn.metrics import DetCurveDisplay, RocCurveDisplay
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
@@ -68,26 +67,30 @@
 }
 
 X, y = make_classification(
-    n_samples=N_SAMPLES, n_features=2, n_redundant=0, n_informative=2,
-    random_state=1, n_clusters_per_class=1)
+    n_samples=N_SAMPLES,
+    n_features=2,
+    n_redundant=0,
+    n_informative=2,
+    random_state=1,
+    n_clusters_per_class=1,
+)
 
-X_train, X_test, y_train, y_test = train_test_split(
-    X, y, test_size=.4, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
 
 # prepare plots
 fig, [ax_roc, ax_det] = plt.subplots(1, 2, figsize=(11, 5))
 
 for name, clf in classifiers.items():
     clf.fit(X_train, y_train)
 
-    plot_roc_curve(clf, X_test, y_test, ax=ax_roc, name=name)
+    RocCurveDisplay.from_estimator(clf, X_test, y_test, ax=ax_roc, name=name)
     DetCurveDisplay.from_estimator(clf, X_test, y_test, ax=ax_det, name=name)
 
-ax_roc.set_title('Receiver Operating Characteristic (ROC) curves')
-ax_det.set_title('Detection Error Tradeoff (DET) curves')
+ax_roc.set_title("Receiver Operating Characteristic (ROC) curves")
+ax_det.set_title("Detection Error Tradeoff (DET) curves")
 
-ax_roc.grid(linestyle='--')
-ax_det.grid(linestyle='--')
+ax_roc.grid(linestyle="--")
+ax_det.grid(linestyle="--")
 
 plt.legend()
 plt.show()