scikit-learn · jeremiedbb · Mar 30, 2023 · Mar 24, 2023 · Mar 24, 2023 · Mar 25, 2023
diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
@@ -318,6 +318,12 @@ Changelog
   curves.
   :pr:`24668` by :user:`dberenbaum`.
 
+- |Enhancement| :meth:`metrics.RocCurveDisplay.from_estimator` and
+  :meth:`metrics.RocCurveDisplay.from_predictions` now accept two new keywords,
+  `plot_chance_level` and `chance_level_kw` to plot the baseline chance
+  level. This line is exposed in the `chance_level_` attribute.
+  :pr:`25987` by :user:`Yao Xiao <Charlie-XIAO>`.
+
 - |Fix| :func:`log_loss` raises a warning if the values of the parameter `y_pred` are
   not normalized, instead of actually normalizing them in the metric. Starting from
   1.5 this will raise an error. :pr:`25299` by :user:`Omar Salman <OmarManzoor`.

diff --git a/examples/miscellaneous/plot_outlier_detection_bench.py b/examples/miscellaneous/plot_outlier_detection_bench.py
@@ -172,12 +172,12 @@ def compute_prediction(X, model_name):
 pos_label = 0  # mean 0 belongs to positive class
 rows = math.ceil(len(datasets_name) / cols)
 
-fig, axs = plt.subplots(rows, cols, figsize=(10, rows * 3))
+fig, axs = plt.subplots(rows, cols, figsize=(10, rows * 3), sharex=True, sharey=True)
 
 for i, dataset_name in enumerate(datasets_name):
     (X, y) = preprocess_dataset(dataset_name=dataset_name)
 
-    for model_name in models_name:
+    for model_idx, model_name in enumerate(models_name):
         y_pred = compute_prediction(X, model_name=model_name)
         display = RocCurveDisplay.from_predictions(
             y,
@@ -186,10 +186,12 @@ def compute_prediction(X, model_name):
             name=model_name,
             linewidth=linewidth,
             ax=axs[i // cols, i % cols],
+            plot_chance_level=(model_idx == len(models_name) - 1),
+            chance_level_kw={
+                "linewidth": linewidth,
+                "linestyle": ":",
+            },
         )
-    axs[i // cols, i % cols].plot([0, 1], [0, 1], linewidth=linewidth, linestyle=":")
     axs[i // cols, i % cols].set_title(dataset_name)
-    axs[i // cols, i % cols].set_xlabel("False Positive Rate")
-    axs[i // cols, i % cols].set_ylabel("True Positive Rate")
 plt.tight_layout(pad=2.0)  # spacing between subplots
 plt.show()
diff --git a/examples/model_selection/plot_roc.py b/examples/model_selection/plot_roc.py
@@ -125,8 +125,8 @@
     y_score[:, class_id],
     name=f"{class_of_interest} vs the rest",
     color="darkorange",
+    plot_chance_level=True,
 )
-plt.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)")
 plt.axis("square")
 plt.xlabel("False Positive Rate")
 plt.ylabel("True Positive Rate")
@@ -161,8 +161,8 @@
     y_score.ravel(),
     name="micro-average OvR",
     color="darkorange",
+    plot_chance_level=True,
 )
-plt.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)")
 plt.axis("square")
 plt.xlabel("False Positive Rate")
 plt.ylabel("True Positive Rate")
@@ -281,9 +281,9 @@
         name=f"ROC curve for {target_names[class_id]}",
         color=color,
         ax=ax,
+        plot_chance_level=(class_id == 2),
     )
 
-plt.plot([0, 1], [0, 1], "k--", label="ROC curve for chance level (AUC = 0.5)")
 plt.axis("square")
 plt.xlabel("False Positive Rate")
 plt.ylabel("True Positive Rate")
@@ -364,8 +364,8 @@
         y_score[ab_mask, idx_b],
         ax=ax,
         name=f"{label_b} as positive class",
+        plot_chance_level=True,
     )
-    plt.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)")
     plt.axis("square")
     plt.xlabel("False Positive Rate")
     plt.ylabel("True Positive Rate")
@@ -413,7 +413,7 @@
     linestyle=":",
     linewidth=4,
 )
-plt.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)")
+plt.plot([0, 1], [0, 1], "k--", label="Chance level (AUC = 0.5)")
 plt.axis("square")
 plt.xlabel("False Positive Rate")
 plt.ylabel("True Positive Rate")

diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py
@@ -70,7 +70,8 @@
 from sklearn.metrics import RocCurveDisplay
 from sklearn.model_selection import StratifiedKFold
 
-cv = StratifiedKFold(n_splits=6)
+n_splits = 6
+cv = StratifiedKFold(n_splits=n_splits)
 classifier = svm.SVC(kernel="linear", probability=True, random_state=random_state)
 
 tprs = []
@@ -88,12 +89,12 @@
         alpha=0.3,
         lw=1,
         ax=ax,
+        plot_chance_level=(fold == n_splits - 1),
     )
     interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
     interp_tpr[0] = 0.0
     tprs.append(interp_tpr)
     aucs.append(viz.roc_auc)
-ax.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)")
 
 mean_tpr = np.mean(tprs, axis=0)
 mean_tpr[-1] = 1.0

diff --git a/sklearn/metrics/_plot/roc_curve.py b/sklearn/metrics/_plot/roc_curve.py
@@ -43,6 +43,11 @@ class RocCurveDisplay:
     line_ : matplotlib Artist
         ROC Curve.
 
+    chance_level_ : matplotlib Artist or None
+        The chance level line. It is `None` if the chance level is not plotted.
+
+        .. versionadded:: 1.3
+
     ax_ : matplotlib Axes
         Axes with ROC Curve.
 
@@ -81,7 +86,15 @@ def __init__(self, *, fpr, tpr, roc_auc=None, estimator_name=None, pos_label=Non
         self.roc_auc = roc_auc
         self.pos_label = pos_label
 
-    def plot(self, ax=None, *, name=None, **kwargs):
+    def plot(
+        self,
+        ax=None,
+        *,
+        name=None,
+        plot_chance_level=False,
+        chance_level_kw=None,
+        **kwargs,
+    ):
         """Plot visualization.
 
         Extra keyword arguments will be passed to matplotlib's ``plot``.
@@ -96,6 +109,17 @@ def plot(self, ax=None, *, name=None, **kwargs):
             Name of ROC Curve for labeling. If `None`, use `estimator_name` if
             not `None`, otherwise no labeling is shown.
 
+        plot_chance_level : bool, default=False
+            Whether to plot the chance level.
+
+            .. versionadded:: 1.3
+
+        chance_level_kw : dict, default=None
+            Keyword arguments to be passed to matplotlib's `plot` for rendering
+            the chance level line.
+
+            .. versionadded:: 1.3
+
         **kwargs : dict
             Keyword arguments to be passed to matplotlib's `plot`.
 
@@ -118,6 +142,15 @@ def plot(self, ax=None, *, name=None, **kwargs):
 
         line_kwargs.update(**kwargs)
 
+        chance_level_line_kw = {
+            "label": "Chance level (AUC = 0.5)",
+            "color": "k",
+            "linestyle": "--",
+        }
+
+        if chance_level_kw is not None:
+            chance_level_line_kw.update(**chance_level_kw)
+
         import matplotlib.pyplot as plt
 
         if ax is None:
@@ -132,6 +165,11 @@ def plot(self, ax=None, *, name=None, **kwargs):
         ylabel = "True Positive Rate" + info_pos_label
         ax.set(xlabel=xlabel, ylabel=ylabel)
 
+        if plot_chance_level:
+            (self.chance_level_,) = ax.plot((0, 1), (0, 1), **chance_level_line_kw)
+        else:
+            self.chance_level_ = None
+
         if "label" in line_kwargs:
             ax.legend(loc="lower right")
 
@@ -152,6 +190,8 @@ def from_estimator(
         pos_label=None,
         name=None,
         ax=None,
+        plot_chance_level=False,
+        chance_level_kw=None,
         **kwargs,
     ):
         """Create a ROC Curve display from an estimator.
@@ -195,6 +235,17 @@ def from_estimator(
         ax : matplotlib axes, default=None
             Axes object to plot on. If `None`, a new figure and axes is created.
 
+        plot_chance_level : bool, default=False
+            Whether to plot the chance level.
+
+            .. versionadded:: 1.3
+
+        chance_level_kw : dict, default=None
+            Keyword arguments to be passed to matplotlib's `plot` for rendering
+            the chance level line.
+
+            .. versionadded:: 1.3
+
         **kwargs : dict
             Keyword arguments to be passed to matplotlib's `plot`.
 
@@ -245,6 +296,8 @@ def from_estimator(
             name=name,
             ax=ax,
             pos_label=pos_label,
+            plot_chance_level=plot_chance_level,
+            chance_level_kw=chance_level_kw,
             **kwargs,
         )
 
@@ -259,6 +312,8 @@ def from_predictions(
         pos_label=None,
         name=None,
         ax=None,
+        plot_chance_level=False,
+        chance_level_kw=None,
         **kwargs,
     ):
         """Plot ROC curve given the true and predicted values.
@@ -298,6 +353,17 @@ def from_predictions(
             Axes object to plot on. If `None`, a new figure and axes is
             created.
 
+        plot_chance_level : bool, default=False
+            Whether to plot the chance level.
+
+            .. versionadded:: 1.3
+
+        chance_level_kw : dict, default=None
+            Keyword arguments to be passed to matplotlib's `plot` for rendering
+            the chance level line.
+
+            .. versionadded:: 1.3
+
         **kwargs : dict
             Additional keywords arguments passed to matplotlib `plot` function.
 
@@ -348,4 +414,10 @@ def from_predictions(
             fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=name, pos_label=pos_label
         )
 
-        return viz.plot(ax=ax, name=name, **kwargs)
+        return viz.plot(
+            ax=ax,
+            name=name,
+            plot_chance_level=plot_chance_level,
+            chance_level_kw=chance_level_kw,
+            **kwargs,
+        )
diff --git a/sklearn/metrics/_plot/tests/test_roc_curve_display.py b/sklearn/metrics/_plot/tests/test_roc_curve_display.py
@@ -125,6 +125,74 @@ def test_roc_curve_display_plotting(
     assert display.ax_.get_xlabel() == expected_xlabel
 
 
+@pytest.mark.parametrize("plot_chance_level", [True, False])
+@pytest.mark.parametrize(
+    "chance_level_kw",
+    [None, {"linewidth": 1, "color": "red", "label": "DummyEstimator"}],
+)
+@pytest.mark.parametrize(
+    "constructor_name",
+    ["from_estimator", "from_predictions"],
+)
+def test_roc_curve_chance_level_line(
+    pyplot,
+    data_binary,
+    plot_chance_level,
+    chance_level_kw,
+    constructor_name,
+):
+    """Check the chance leve line plotting behaviour."""
+    X, y = data_binary
+
+    lr = LogisticRegression()
+    lr.fit(X, y)
+
+    y_pred = getattr(lr, "predict_proba")(X)
+    y_pred = y_pred if y_pred.ndim == 1 else y_pred[:, 1]
+
+    if constructor_name == "from_estimator":
+        display = RocCurveDisplay.from_estimator(
+            lr,
+            X,
+            y,
+            alpha=0.8,
+            plot_chance_level=plot_chance_level,
+            chance_level_kw=chance_level_kw,
+        )
+    else:
+        display = RocCurveDisplay.from_predictions(
+            y,
+            y_pred,
+            alpha=0.8,
+            plot_chance_level=plot_chance_level,
+            chance_level_kw=chance_level_kw,
+        )
+
+    import matplotlib as mpl  # noqa
+
+    assert isinstance(display.line_, mpl.lines.Line2D)
+    assert display.line_.get_alpha() == 0.8
+    assert isinstance(display.ax_, mpl.axes.Axes)
+    assert isinstance(display.figure_, mpl.figure.Figure)
+
+    if plot_chance_level:
+        assert isinstance(display.chance_level_, mpl.lines.Line2D)
+        assert tuple(display.chance_level_.get_xdata()) == (0, 1)
+        assert tuple(display.chance_level_.get_ydata()) == (0, 1)
+    else:
+        assert display.chance_level_ is None
+
+    # Checking for chance level line styles
+    if plot_chance_level and chance_level_kw is None:
+        assert display.chance_level_.get_color() == "k"
+        assert display.chance_level_.get_linestyle() == "--"
+        assert display.chance_level_.get_label() == "Chance level (AUC = 0.5)"
+    elif plot_chance_level:
+        assert display.chance_level_.get_label() == chance_level_kw["label"]
+        assert display.chance_level_.get_color() == chance_level_kw["color"]
+        assert display.chance_level_.get_linewidth() == chance_level_kw["linewidth"]
+
+
 @pytest.mark.parametrize(
     "clf",
     [