8000 Revert "ENH add a parameter pos_label in roc_auc_score (#17594)" (#17… · jayzed82/scikit-learn@2c87874 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2c87874

Browse files
ogriseljayzed82
authored andcommitted
Revert "ENH add a parameter pos_label in roc_auc_score (scikit-learn#17594)" (scikit-learn#17703)
This reverts commit fde9212.
1 parent 1506cf6 commit 2c87874

File tree

4 files changed

+8
-72
lines changed

4 files changed

+8
-72
lines changed

doc/whats_new/v0.24.rst

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,6 @@ Changelog
122122
class to be used when computing the precision and recall statistics.
123123
:pr:`17569` by :user:`Guillaume Lemaitre <glemaitre>`.
124124

125-
- |Enhancement| Add `pos_label` parameter to :func:`roc_auc_score`.
126-
:pr:`17594` by :user:`Guillaume Lemaitre <glemaitre>`.
127-
128125
:mod:`sklearn.model_selection`
129126
..............................
130127

sklearn/metrics/_ranking.py

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -218,16 +218,14 @@ def _binary_uninterpolated_average_precision(
218218
average, sample_weight=sample_weight)
219219

220220

221-
def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None,
222-
pos_label=None):
221+
def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None):
223222
"""Binary roc auc score"""
224223
if len(np.unique(y_true)) != 2:
225224
raise ValueError("Only one class present in y_true. ROC AUC score "
226225
"is not defined in that case.")
227226

228-
fpr, tpr, _ = roc_curve(
229-
y_true, y_score, sample_weight=sample_weight, pos_label=pos_label,
230-
)
227+
fpr, tpr, _ = roc_curve(y_true, y_score,
228+
sample_weight=sample_weight)
231229
if max_fpr is None or max_fpr == 1:
232230
return auc(fpr, tpr)
233231
if max_fpr <= 0 or max_fpr > 1:
@@ -250,8 +248,7 @@ def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None,
250248

251249
@_deprecate_positional_args
252250
def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None,
253-
max_fpr=None, multi_class="raise", labels=None,
254-
pos_label=None):
251+
max_fpr=None, multi_class="raise", labels=None):
255252
"""Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)
256253
from prediction scores.
257254
@@ -330,13 +327,6 @@ def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None,
330327
If ``None``, the numerical or lexicographical order of the labels in
331328
``y_true`` is used.
332329
333-
pos_label : int or str, default=None
334-
The label of the positive class in the binary case. When
335-
`pos_label=None`, if `y_true` is in {-1, 1} or {0, 1}, `pos_label` is
336-
set to 1, otherwise an error will be raised.
337-
338-
.. versionadded:: 0.24
339-
340330
Returns
341331
-------
342332
auc : float
@@ -398,9 +388,10 @@ def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None,
398388
return _multiclass_roc_auc_score(y_true, y_score, labels,
399389
multi_class, average, sample_weight)
400390
elif y_type == "binary":
391+
labels = np.unique(y_true)
392+
y_true = label_binarize(y_true, classes=labels)[:, 0]
401393
return _average_binary_score(partial(_binary_roc_auc_score,
402-
max_fpr=max_fpr,
403-
pos_label=pos_label),
394+
max_fpr=max_fpr),
404395
y_true, y_score, average,
405396
sample_weight=sample_weight)
406397
else: # multilabel-indicator

sklearn/metrics/tests/test_common.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -319,17 +319,6 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
319319
# Metrics with a "pos_label" argument
320320
METRICS_WITH_POS_LABEL = {
321321
"roc_curve",
322-
323-
"roc_auc_score",
324-
"weighted_roc_auc",
325-
"samples_roc_auc",
326-
"micro_roc_auc",
327-
"ovr_roc_auc",
328-
"weighted_ovr_roc_auc",
329-
"ovo_roc_auc",
330-
"weighted_ovo_roc_auc",
331-
"partial_roc_auc",
332-
333322
"precision_recall_curve",
334323

335324
"brier_score_loss",

sklearn/metrics/tests/test_ranking.py

Lines changed: 1 addition & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,9 @@
77
from sklearn import datasets
88
from sklearn import svm
99

10+
from sklearn.utils.extmath import softmax
1011
from sklearn.datasets import make_multilabel_classification
11-
from sklearn.datasets import load_breast_cancer
12-
from sklearn.linear_model import LogisticRegression
13-
from sklearn.model_selection import train_test_split
1412
from sklearn.random_projection import _sparse_random_matrix
15-
from sklearn.utils import shuffle
16-
from sklearn.utils.extmath import softmax
1713
from sklearn.utils.validation import check_array, check_consistent_length
1814
from sklearn.utils.validation import check_random_state
1915

@@ -1473,40 +1469,3 @@ def test_partial_roc_auc_score():
14731469
assert_almost_equal(
14741470
roc_auc_score(y_true, y_pred, max_fpr=max_fpr),
14751471
_partial_roc_auc_score(y_true, y_pred, max_fpr))
1476-
1477-
1478-
@pytest.mark.parametrize(
1479-
"decision_method", ["predict_proba", "decision_function"]
1480-
)
1481-
def test_roc_auc_score_pos_label(decision_method):
1482-
X, y = load_breast_cancer(return_X_y=True)
1483-
# create an highly imbalanced
1484-
idx_positive = np.flatnonzero(y == 1)
1485-
idx_negative = np.flatnonzero(y == 0)
1486-
idx_selected = np.hstack([idx_negative, idx_positive[:25]])
1487-
X, y = X[idx_selected], y[idx_selected]
1488-
X, y = shuffle(X, y, random_state=42)
1489-
# only use 2 features to make the problem even harder
1490-
X = X[:, :2]
1491-
y = np.array(
1492-
["cancer" if c == 1 else "not cancer" for c in y], dtype=object
1493-
)
1494-
X_train, X_test, y_train, y_test = train_test_split(
1495-
X, y, stratify=y, random_state=0,
1496-
)
1497-
1498-
classifier = LogisticRegression()
1499-
classifier.fit(X_train, y_train)
1500-
1501-
# sanity check to be sure the positive class is classes_[0] and that we
1502-
# are betrayed by the class imbalance
1503-
assert classifier.classes_.tolist() == ["cancer", "not cancer"]
1504-
pos_label = "cancer"
1505-
1506-
y_pred = getattr(classifier, decision_method)(X_test)
1507-
y_pred = y_pred[:, 0] if y_pred.ndim == 2 else -y_pred
1508-
1509-
fpr, tpr, _ = roc_curve(y_test, y_pred, pos_label=pos_label)
1510-
roc_auc = roc_auc_score(y_test, y_pred, pos_label=pos_label)
1511-
1512-
assert roc_auc == pytest.approx(np.trapz(tpr, fpr))

0 commit comments

Comments
 (0)
0