scikit-learn · SuccessMoses · Oct 22, 2024 · Oct 22, 2024 · Oct 22, 2024 · Oct 22, 2024
diff --git a/doc/api_reference.py b/doc/api_reference.py
@@ -727,6 +727,7 @@ def _get_submodule(module_name, submodule_name):
                     "auc",
                     "average_precision_score",
                     "balanced_accuracy_score",
+                    "binary_classification_curve",
                     "brier_score_loss",
                     "class_likelihood_ratios",
                     "classification_report",

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
@@ -339,6 +339,7 @@ Some of these are restricted to the binary classification case:
    roc_curve
    class_likelihood_ratios
    det_curve
+   binary_classification_curve
 
 
 Others also work in the multiclass case:
@@ -674,6 +675,28 @@ false negatives and true positives as follows::
   >>> tn, fp, fn, tp
   (2, 1, 2, 3)
 
+With :func:`binary_classification_curve` we can get true negatives, false positives,
+false negatives and true positives for different thresholds.
+
+  >>> import numpy as np
+  >>> from sklearn.metrics import binary_classification_curve
+  >>> y_true = np.array([0., 0., 1., 1.])
+  >>> y_score = np.array([0.1, 0.4, 0.35, 0.8])
+  >>> fps, tps, thresholds = binary_classification_curve(y_true, y_score)
+  >>> fps
+  array([0., 1., 1., 2.])
+  >>> tps
+  array([1., 1., 2., 2.])
+  >>> thresholds
+  array([0.8, 0.4, 0.35, 0.1])
+  >>> # True Negatives can be calculated using:
+  >>> fps[-1] - fps
+  array([2., 1., 1., 0.])]
+  >>> # False negatives can be calculated using:
+  >>> tps[-1] - tps
+  array([1., 1., 0., 0.])
+
+
 .. rubric:: Examples
 
 * See :ref:`sphx_glr_auto_examples_model_selection_plot_confusion_matrix.py`

diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/30134.api.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/30134.api.rst
@@ -0,0 +1,3 @@
+- Add :func:`metrics.binary_classification_curve` function that returns the number of
+  true and false positive per threshold.
+  By :user:`Success Moses <SuccessMoses>`
diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py
@@ -1,7 +1,7 @@
 """
-================
-Confusion matrix
-================
+==============================================================
+Evaluate the performance of a classifier with Confusion Matrix
+==============================================================
 
 Example of confusion matrix usage to evaluate the quality
 of the output of a classifier on the iris data set. The
@@ -69,3 +69,52 @@
     print(disp.confusion_matrix)
 
 plt.show()
+
+# %%
+# Binary Classification
+# =====================
+#
+# For binary problems, :func:`sklearn.metrics.confusion_matrix` has the ``ravel`` method
+# we can use get counts of true negatives, false positives, false negatives and
+# true positives.
+#
+# :func:`sklearn.metrics.binary_classification_curve`
+# can be used to count true negatives, true positives, false positives, false negatives
+# for different threshold values. It is fundamental for binary classification metrics
+# like :func:`sklearn.metrics.roc_auc_score` and :func:`sklearn.metrics.det_curve`.
+
+from sklearn.datasets import make_classification
+from sklearn.metrics import binary_classification_curve
+
+X, y = make_classification(
+    n_samples=100,
+    n_features=20,
+    n_informative=20,
+    n_redundant=0,
+    n_classes=2,
+    random_state=42,
+)
+
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.3, random_state=42
+)
+
+classifier = svm.SVC(kernel="linear", C=0.01, probability=True)
+classifier.fit(X_train, y_train)
+
+y_score = classifier.predict_proba(X_test)[:, 1]
+
+tps, fps, threshold = binary_classification_curve(y_test, y_score)
+
+# Plot TPs and FPs vs Thresholds
+plt.figure(figsize=(10, 6))
+
+plt.plot(threshold, tps, label="True Positives (TPs)", color="blue")
+plt.plot(threshold, fps, label="False Positives (FPs)", color="red")
+plt.xlabel("Thresholds")
+plt.ylabel("Count")
+plt.title("TPs and FPs vs Thresholds")
+plt.legend()
+plt.grid()
+
+plt.show()
diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
@@ -35,6 +35,7 @@
 from ._ranking import (
     auc,
     average_precision_score,
+    binary_classification_curve,
     coverage_error,
     dcg_score,
     det_curve,
@@ -101,6 +102,7 @@
     "auc",
     "average_precision_score",
     "balanced_accuracy_score",
+    "binary_classification_curve",
     "calinski_harabasz_score",
     "check_scoring",
     "class_likelihood_ratios",

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
@@ -321,6 +321,8 @@ def confusion_matrix(
     ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix
         given the true and predicted labels.
     ConfusionMatrixDisplay : Confusion Matrix visualization.
+    binary_classification_curve : For binary classification, compute True Positive
+        and False Positive per threshold.
 
     References
     ----------

diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
@@ -326,6 +326,8 @@ def det_curve(y_true, y_score, pos_label=None, sample_weight=None):
     DetCurveDisplay : DET curve visualization.
     roc_curve : Compute Receiver operating characteristic (ROC) curve.
     precision_recall_curve : Compute precision-recall curve.
+    binary_classification_curve : Compute True Positive and False Positive per
+        threshold.
 
     Examples
     --------
@@ -341,7 +343,7 @@ def det_curve(y_true, y_score, pos_label=None, sample_weight=None):
     >>> thresholds
     array([0.35, 0.4 , 0.8 ])
     """
-    fps, tps, thresholds = _binary_clf_curve(
+    fps, tps, thresholds = binary_classification_curve(
         y_true, y_score, pos_label=pos_label, sample_weight=sample_weight
     )
 
@@ -774,9 +776,22 @@ def _multiclass_roc_auc_score(
         )
 
 
-def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None):
+@validate_params(
+    {
+        "y_true": ["array-like"],
+        "y_score": ["array-like"],
+        "pos_label": [Real, str, "boolean", None],
+        "sample_weight": ["array-like", None],
+    },
+    prefer_skip_nested_validation=True,
+)
+def binary_classification_curve(y_true, y_score, pos_label=None, sample_weight=None):
     """Calculate true and false positives per binary classification threshold.
 
+    Read more in the :ref:`User Guide <confusion_matrix>`.
+
+    .. versionadded:: 1.6
+
     Parameters
     ----------
     y_true : ndarray of shape (n_samples,)
@@ -807,6 +822,30 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None):
 
     thresholds : ndarray of shape (n_thresholds,)
         Decreasing score values.
+
+    See Also
+    --------
+    confusion_matrix : Compute classification matrix to evaluate the accuracy of a
+        classifier.
+    roc_curve : Compute Receiver operating characteristic (ROC) curve.
+    precision_recall_curve : Compute precision-recall curve.
+    det_curve : Compute Detection error tradeoff (DET) curve.
+    binary_classification_curve : Compute True Positive and False Positive per
+        threshold.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.metrics import binary_classification_curve
+    >>> y_true = np.array([0., 0., 1., 1.])
+    >>> y_score = np.array([0.1, 0.4, 0.35, 0.8])
+    >>> fps, tps, thresholds = binary_classification_curve(y_true, y_score)
+    >>> fps
+    array([0., 1., 1., 2.])
+    >>> tps
+    array([1., 1., 2., 2.])
+    >>> thresholds
+    array([0.8 , 0.4 , 0.35, 0.1 ])
     """
     # Check to make sure y_true is valid
     y_type = type_of_target(y_true, input_name="y_true")
@@ -962,6 +1001,8 @@ def precision_recall_curve(
     average_precision_score : Compute average precision from prediction scores.
     det_curve: Compute error rates for different probability thresholds.
     roc_curve : Compute Receiver operating characteristic (ROC) curve.
+    binary_classification_curve : Compute True Positive and False Positive per
+        threshold.
 
     Examples
     --------
@@ -996,7 +1037,7 @@ def precision_recall_curve(
         )
         y_score = probas_pred
 
-    fps, tps, thresholds = _binary_clf_curve(
+    fps, tps, thresholds = binary_classification_curve(
         y_true, y_score, pos_label=pos_label, sample_weight=sample_weight
     )
 
@@ -1106,6 +1147,8 @@ def roc_curve(
         (ROC) curve given the true and predicted values.
     det_curve: Compute error rates for different probability thresholds.
     roc_auc_score : Compute the area under the ROC curve.
+    binary_classification_curve : Compute True Positive and False Positive per
+        threshold.
 
     Notes
     -----
@@ -1139,7 +1182,7 @@ def roc_curve(
     >>> thresholds
     array([ inf, 0.8 , 0.4 , 0.35, 0.1 ])
     """
-    fps, tps, thresholds = _binary_clf_curve(
+    fps, tps, thresholds = binary_classification_curve(
         y_true, y_score, pos_label=pos_label, sample_weight=sample_weight
     )
 
@@ -1149,7 +1192,7 @@ def roc_curve(
     # Here np.diff(_, 2) is used as a "second derivative" to tell if there
     # is a corner at the point. Both fps and tps must be tested to handle
     # thresholds with multiple data points (which are combined in
-    # _binary_clf_curve). This keeps all cases where the point should be kept,
+    # binary_classification_curve). This keeps all cases where the point should be kept,
     # but does not drop more complicated cases like fps = [1, 3, 7],
     # tps = [1, 2, 4]; there is no harm in keeping too many thresholds.
     if drop_intermediate and len(fps) > 2:

diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
@@ -839,7 +839,7 @@ def test_auc_score_non_binary_class():
 
 
 @pytest.mark.parametrize("curve_func", CURVE_FUNCS)
-def test_binary_clf_curve_multiclass_error(curve_func):
+def test_binary_classification_curve_multiclass_error(curve_func):
     rng = check_random_state(404)
     y_true = rng.randint(0, 3, size=10)
     y_pred = rng.rand(10)
@@ -849,7 +849,7 @@ def test_binary_clf_curve_multiclass_error(curve_func):
 
 
 @pytest.mark.parametrize("curve_func", CURVE_FUNCS)
-def test_binary_clf_curve_implicit_pos_label(curve_func):
+def test_binary_classification_curve_implicit_pos_label(curve_func):
     # Check that using string class labels raises an informative
     # error for any supported string dtype:
     msg = (
@@ -877,7 +877,7 @@ def test_binary_clf_curve_implicit_pos_label(curve_func):
 @pytest.mark.filterwarnings("ignore:Support for labels represented as bytes")
 @pytest.mark.parametrize("curve_func", [precision_recall_curve, roc_curve])
 @pytest.mark.parametrize("labels_type", ["list", "array"])
-def test_binary_clf_curve_implicit_bytes_pos_label(curve_func, labels_type):
+def test_binary_classification_curve_implicit_bytes_pos_label(curve_func, labels_type):
     # Check that using bytes class labels raises an informative
     # error for any supported string dtype:
     labels = _convert_container([b"a", b"b"], labels_type)
@@ -891,7 +891,7 @@ def test_binary_clf_curve_implicit_bytes_pos_label(curve_func, labels_type):
 
 
 @pytest.mark.parametrize("curve_func", CURVE_FUNCS)
-def test_binary_clf_curve_zero_sample_weight(curve_func):
+def test_binary_classification_curve_zero_sample_weight(curve_func):
     y_true = [0, 0, 1, 1, 1]
     y_score = [0.1, 0.2, 0.3, 0.4, 0.5]
     sample_weight = [1, 1, 1, 0.5, 0]