@@ -248,27 +248,32 @@ def roc_auc_score(y_true, y_score, average="macro", sample_weight=None,
248
248
"""Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)
249
249
from prediction scores.
250
250
251
- Note: this implementation is restricted to the binary classification task
252
- or multilabel classification task in label indicator format .
251
+ Note: this implementation can be used with binary, multiclass and
252
+ multilabel classification, but some restrictions apply (see Parameters) .
253
253
254
254
Read more in the :ref:`User Guide <roc_metrics>`.
255
255
256
256
Parameters
257
257
----------
258
- y_true : array, shape = [n_samples] or [n_samples, n_classes]
259
- True binary labels or binary label indicators.
260
- The multiclass case expects shape = [n_samples] and labels
261
- with values in ``range(n_classes)``.
262
-
263
- y_score : array, shape = [n_samples] or [n_samples, n_classes]
264
- Target scores, can either be probability estimates of the positive
265
- class, confidence values, or non-thresholded measure of decisions
266
- (as returned by "decision_function" on some classifiers). For binary
267
- y_true, y_score is supposed to be the score of the class with greater
268
- label. The multiclass case expects shape = [n_samples, n_classes]
269
- where the scores correspond to probability estimates.
270
-
271
- average : string, [None, 'micro', 'macro' (default), 'samples', 'weighted']
258
+ y_true : array-like of shape (n_samples,) or (n_samples, n_classes)
259
+ True labels or binary label indicators. The binary and multiclass cases
260
+ expect labels with shape (n_samples,) while the multilabel case expects
261
+ binary label indicators with shape (n_samples, n_classes).
262
+
263
+ y_score : array-like of shape (n_samples,) or (n_samples, n_classes)
264
+ Target scores. In the binary and multilabel cases, these can be either
265
+ probability estimates or non-thresholded decision values (as returned
266
+ by `decision_function` on some classifiers). In the multiclass case,
267
+ these must be probability estimates which sum to 1. The binary
268
+ case expects a shape (n_samples,), and the scores must be the scores of
269
+ the class with the greater label. The multiclass and multilabel
270
+ cases expect a shape (n_samples, n_classes). In the multiclass case,
271
+ the order of the class scores must correspond to the order of
272
+ ``labels``, if provided, or else to the numerical or lexicographical
273
+ order of the labels in ``y_true``.
274
+
275
+ average : {'micro', 'macro', 'samples', 'weighted'} or None, \
276
+ default='macro'
272
277
If ``None``, the scores for each class are returned. Otherwise,
273
278
this determines the type of averaging performed on the data:
274
279
Note: multiclass ROC AUC currently only handles the 'macro' and
@@ -291,26 +296,32 @@ def roc_auc_score(y_true, y_score, average="macro", sample_weight=None,
291
296
sample_weight : array-like of shape (n_samples,), default=None
292
297
Sample weights.
293
298
294
- max_fpr : float > 0 and <= 1, optional
295
- If not ``None``, the standardized partial AUC [3 ]_ over the range
299
+ max_fpr : float > 0 and <= 1, default=None
300
+ If not ``None``, the standardized partial AUC [2 ]_ over the range
296
301
[0, max_fpr] is returned. For the multiclass case, ``max_fpr``,
297
302
should be either equal to ``None`` or ``1.0`` as AUC ROC partial
298
303
computation currently is not supported for multiclass.
299
304
300
- multi_class : string, 'ovr' or 'ovo', optional(default='raise')
301
- Determines the type of multiclass configuration to use.
302
- ``multi_class`` must be provided when ``y_true`` is multiclass.
305
+ multi_class : {'raise', 'ovr', 'ovo'}, default='raise'
306
+ Multiclass only. Determines the type of configuration to use. The
307
+ default value raises an error, so either ``'ovr'`` or ``'ovo'`` must be
308
+ passed explicitly.
303
309
304
310
``'ovr'``:
305
- Calculate metrics for the multiclass case using the one-vs-rest
306
- approach.
311
+ Computes the AUC of each class against the rest [3]_ [4]_. This
312
+ treats the multiclass case in the same way as the multilabel case.
313
F438
+ Sensitive to class imbalance even when ``average == 'macro'``,
314
+ because class imbalance affects the composition of each of the
315
+ 'rest' groupings.
307
316
``'ovo'``:
308
- Calculate metrics for the multiclass case using the one-vs-one
309
- approach.
317
+ Computes the average AUC of all possible pairwise combinations of
318
+ classes [5]_. Insensitive to class imbalance when
319
+ ``average == 'macro'``.
310
320
311
- labels : array, shape = [n_classes] or None, optional (default=None)
312
- List of labels to index ``y_score`` used for multiclass. If ``None``,
313
- the lexicon order of ``y_true`` is used to index ``y_score``.
321
+ labels : array-like of shape (n_classes,), default=None
322
+ Multiclass only. List of labels that index the classes in ``y_score``.
323
+ If ``None``, the numerical or lexicographical order of the labels in
324
+ ``y_true`` is used.
314
325
315
326
Returns
316
327
-------
@@ -321,12 +332,22 @@ def roc_auc_score(y_true, y_score, average="macro", sample_weight=None,
321
332
.. [1] `Wikipedia entry for the Receiver operating characteristic
322
333
<https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_
323
334
324
- .. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition
325
- Letters, 2006, 27(8):861-874.
326
-
327
- .. [3] `Analyzing a portion of the ROC curve. McClish, 1989
335
+ .. [2] `Analyzing a portion of the ROC curve. McClish, 1989
328
336
<https://www.ncbi.nlm.nih.gov/pubmed/2668680>`_
329
337
338
+ .. [3] Provost, F., Domingos, P. (2000). Well-trained PETs: Improving
339
+ probability estimation trees (Section 6.2), CeDER Working Paper
340
+ #IS-00-04, Stern School of Business, New York University.
341
+
342
+ .. [4] `Fawcett, T. (2006). An introduction to ROC analysis. Pattern
343
+ Recognition Letters, 27(8), 861-874.
344
+ <https://www.sciencedirect.com/science/article/pii/S016786550500303X>`_
345
+
346
+ .. [5] `Hand, D.J., Till, R.J. (2001). A Simple Generalisation of the Area
347
+ Under the ROC Curve for Multiple Class Classification Problems.
348
+ Machine Learning, 45(2), 171-186.
349
+ <http://link.springer.com/article/10.1023/A:1010920819831>`_
350
+
330
351
See also
331
352
--------
332
353
average_precision_score : Area under the precision-recall curve
@@ -341,7 +362,6 @@ def roc_auc_score(y_true, y_score, average="macro", sample_weight=None,
341
362
>>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])
342
363
>>> roc_auc_score(y_true, y_scores)
343
364
0.75
344
-
345
365
"""
346
366
347
367
y_type = type_of_target (y_true )
0 commit comments