betatim
diff --git a/‎sklearn/metrics/_classification.py
Lines changed: 7 additions & 7 deletions b/‎sklearn/metrics/_classification.py
Lines changed: 7 additions & 7 deletions
diff --git a/‎sklearn/metrics/_ranking.py
Lines changed: 4 additions & 4 deletions b/‎sklearn/metrics/_ranking.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎sklearn/metrics/tests/test_classification.py
Lines changed: 33 additions & 3 deletions b/‎sklearn/metrics/tests/test_classification.py
Lines changed: 33 additions & 3 deletions
diff --git a/‎sklearn/metrics/tests/test_ranking.py
Lines changed: 26 additions & 0 deletions b/‎sklearn/metrics/tests/test_ranking.py
Lines changed: 26 additions & 0 deletions
@@ -732,7 +732,7 @@ def jaccard_score(
         labels are column indices. By default, all labels in ``y_true`` and
         ``y_pred`` are used in sorted order.
 
-    pos_label : str or int, default=1
+    pos_label : int, float, bool or str, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
@@ -1083,7 +1083,7 @@ def f1_score(
         .. versionchanged:: 0.17
            Parameter `labels` improved for multiclass problem.
 
-    pos_label : str or int, default=1
+    pos_label : int, float, bool or str, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
@@ -1231,7 +1231,7 @@ def fbeta_score(
         .. versionchanged:: 0.17
            Parameter `labels` improved for multiclass problem.
 
-    pos_label : str or int, default=1
+    pos_label : int, float, bool or str, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
@@ -1491,7 +1491,7 @@ def precision_recall_fscore_support(
         labels are column indices. By default, all labels in ``y_true`` and
         ``y_pred`` are used in sorted order.
 
-    pos_label : str or int, default=1
+    pos_label : int, float, bool or str, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
@@ -1893,7 +1893,7 @@ def precision_score(
         .. versionchanged:: 0.17
            Parameter `labels` improved for multiclass problem.
 
-    pos_label : str or int, default=1
+    pos_label : int, float, bool or str, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
@@ -2034,7 +2034,7 @@ def recall_score(
         .. versionchanged:: 0.17
            Parameter `labels` improved for multiclass problem.
 
-    pos_label : str or int, default=1
+    pos_label : int, float, bool or str, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
@@ -2878,7 +2878,7 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None):
     sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
-    pos_label : int or str, default=None
+    pos_label : int, float, bool or str, default=None
         Label of the positive class. `pos_label` will be inferred in the
         following manner:
 
 
@@ -21,7 +21,7 @@
 
 import warnings
 from functools import partial
-from numbers import Real, Integral
+from numbers import Real
 
 import numpy as np
 from scipy.sparse import csr_matrix, issparse
@@ -252,7 +252,7 @@ def _binary_uninterpolated_average_precision(
     {
         "y_true": ["array-like"],
         "y_score": ["array-like"],
-        "pos_label": [Integral, str, None],
+        "pos_label": [Real, str, "boolean", None],
         "sample_weight": ["array-like", None],
     }
 )
@@ -278,7 +278,7 @@ def det_curve(y_true, y_score, pos_label=None, sample_weight=None):
         class, confidence values, or non-thresholded measure of decisions
         (as returned by "decision_function" on some classifiers).
 
-    pos_label : int or str, default=None
+    pos_label : int, float, bool or str, default=None
         The label of the positive class.
         When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},
         ``pos_label`` is set to 1, otherwise an error will be raised.
@@ -848,7 +848,7 @@ def precision_recall_curve(y_true, probas_pred, *, pos_label=None, sample_weight
         class, or non-thresholded measure of decisions (as returned by
         `decision_function` on some classifiers).
 
-    pos_label : int or str, default=None
+    pos_label : int, float, bool or str, default=None
         The label of the positive class.
         When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1},
 
@@ -103,7 +103,6 @@ def make_prediction(dataset=None, binary=False):
 
 
 def test_classification_report_dictionary_output():
-
     # Test performance report with dictionary output
     iris = datasets.load_iris()
     y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)
@@ -1874,7 +1873,6 @@ def test_prf_warnings():
     # average of per-label scores
     f, w = precision_recall_fscore_support, UndefinedMetricWarning
     for average in [None, "weighted", "macro"]:
-
         msg = (
             "Precision and F-score are ill-defined and "
             "being set to 0.0 in labels with no predicted samples."
@@ -1974,7 +1972,6 @@ def test_prf_no_warnings_if_zero_division_set(zero_division):
     # average of per-label scores
     f = precision_recall_fscore_support
     for average in [None, "weighted", "macro"]:
-
         assert_no_warnings(
             f, [0, 1, 2], [1, 1, 2], average=average, zero_division=zero_division
         )
@@ -2635,3 +2632,36 @@ def test_balanced_accuracy_score(y_true, y_pred
F438
):
     adjusted = balanced_accuracy_score(y_true, y_pred, adjusted=True)
     chance = balanced_accuracy_score(y_true, np.full_like(y_true, y_true[0]))
     assert adjusted == (balanced - chance) / (1 - chance)
+
+
+@pytest.mark.parametrize(
+    "metric",
+    [
+        jaccard_score,
+        f1_score,
+        partial(fbeta_score, beta=0.5),
+        precision_recall_fscore_support,
+        precision_score,
+        recall_score,
+        brier_score_loss,
+    ],
+)
+@pytest.mark.parametrize(
+    "classes", [(False, True), (0, 1), (0.0, 1.0), ("zero", "one")]
+)
+def test_classification_metric_pos_label_types(metric, classes):
+    """Check that the metric works with different types of `pos_label`.
+
+    We can expect `pos_label` to be a bool, an integer, a float, a string.
+    No error should be raised for those types.
+    """
+    rng = np.random.RandomState(42)
+    n_samples, pos_label = 10, classes[-1]
+    y_true = rng.choice(classes, size=n_samples, replace=True)
+    if metric is brier_score_loss:
+        # brier score loss requires probabilities
+        y_pred = rng.uniform(size=n_samples)
+    else:
+        y_pred = y_true.copy()
+    result = metric(y_true, y_pred, pos_label=pos_label)
+    assert not np.any(np.isnan(result))
@@ -2115,3 +2115,29 @@ def test_label_ranking_avg_precision_score_should_allow_csr_matrix_for_y_true_in
     y_score = np.array([[0.5, 0.9, 0.6], [0, 0, 1]])
     result = label_ranking_average_precision_score(y_true, y_score)
     assert result == pytest.approx(2 / 3)
+
+
+@pytest.mark.parametrize(
+    "metric", [average_precision_score, det_curve, precision_recall_curve, roc_curve]
+)
+@pytest.mark.parametrize(
+    "classes", [(False, True), (0, 1), (0.0, 1.0), ("zero", "one")]
+)
+def test_ranking_metric_pos_label_types(metric, classes):
+    """Check that the metric works with different types of `pos_label`.
+
+    We can expect `pos_label` to be a bool, an integer, a float, a string.
+    No error should be raised for those types.
+    """
+    rng = np.random.RandomState(42)
+    n_samples, pos_label = 10, classes[-1]
+    y_true = rng.choice(classes, size=n_samples, replace=True)
+    y_proba = rng.rand(n_samples)
+    result = metric(y_true, y_proba, pos_label=pos_label)
+    if isinstance(result, float):
+        assert not np.isnan(result)
+    else:
+        metric_1, metric_2, thresholds = result
+        assert not np.isnan(metric_1).any()
+        assert not np.isnan(metric_2).any()
+        assert not np.isnan(thresholds).any()