scikit-learn
diff --git a/‎doc/whats_new/v0.22.rst
Lines changed: 5 additions & 3 deletions b/‎doc/whats_new/v0.22.rst
Lines changed: 5 additions & 3 deletions
diff --git a/‎sklearn/metrics/_scorer.py
Lines changed: 5 additions & 5 deletions b/‎sklearn/metrics/_scorer.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎sklearn/metrics/tests/test_score_objects.py
Lines changed: 50 additions & 1 deletion b/‎sklearn/metrics/tests/test_score_objects.py
Lines changed: 50 additions & 1 deletion
@@ -470,9 +470,11 @@ Changelog
   Gain and Normalized Discounted Cumulative Gain. :pr:`9951` by :user:`Jérôme
   Dockès <jeromedockes>`.
 
-- |Feature| Added multiclass support to :func:`metrics.roc_auc_score`.
-  :issue:`12789` by :user:`Kathy Chen <kathyxchen>`,
-  :user:`Mohamed Maskani <maskani-moh>`, and :user:`Thomas Fan <thomasjpfan>`.
+- |Feature| Added multiclass support to :func:`metrics.roc_auc_score` with
+  corresponding scorers 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 
+  and 'roc_auc_ovo_weighted'. :pr:`12789` and :pr:`15274` by 
+  :user:`Kathy Chen <kathyxchen>`, :user:`Mohamed Maskani <maskani-moh>`, and
+  `Thomas Fan`_.
 
 - |Feature| Add :class:`metrics.mean_tweedie_deviance` measuring the
   Tweedie deviance for a given ``power`` parameter. Also add mean Poisson
 
@@ -247,7 +247,7 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
         if y_type == "binary":
             if y_pred.shape[1] == 2:
                 y_pred = y_pred[:, 1]
-            else:
+            elif y_pred.shape[1] == 1:  # not multiclass
                 raise ValueError('got predict_proba of shape {},'
                                  ' but need classifier with two'
                                  ' classes for {} scoring'.format(
@@ -645,14 +645,14 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False,
                              needs_threshold=True)
 average_precision_scorer = make_scorer(average_precision_score,
                                        needs_threshold=True)
-roc_auc_ovo_scorer = make_scorer(roc_auc_score, needs_threshold=True,
+roc_auc_ovo_scorer = make_scorer(roc_auc_score, needs_proba=True,
                                  multi_class='ovo')
-roc_auc_ovo_weighted_scorer = make_scorer(roc_auc_score, needs_threshold=True,
+roc_auc_ovo_weighted_scorer = make_scorer(roc_auc_score, needs_proba=True,
                                           multi_class='ovo',
                                           average='weighted')
-roc_auc_ovr_scorer = make_scorer(roc_auc_score, needs_threshold=True,
+roc_auc_ovr_scorer = make_scorer(roc_auc_score, needs_proba=True,
                                  multi_class='ovr')
-roc_auc_ovr_weighted_scorer = make_scorer(roc_auc_score, needs_threshold=True,
+roc_auc_ovr_weighted_scorer = make_scorer(roc_auc_score, needs_proba=True,
                                           multi_class='ovr',
                                           average='weighted')
 
 
@@ -4,6 +4,7 @@
 import os
 import numbers
 from unittest.mock import Mock
+from functools import partial
 
 import numpy as np
 import pytest
@@ -29,7 +30,7 @@
 from sklearn.svm import LinearSVC
 from sklearn.pipeline import make_pipeline
 from sklearn.cluster import KMeans
-from sklearn.linear_model import Ridge, LogisticRegression
+from sklearn.linear_model import Ridge, LogisticRegression, Perceptron
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.datasets import make_blobs
 from sklearn.datasets import make_classification
@@ -670,3 +671,51 @@ def test_multimetric_scorer_sanity_check():
     for key, value in result.items():
         score_name = scorers[key]
         assert_allclose(value, seperate_scores[score_name])
+
+
+@pytest.mark.parametrize('scorer_name, metric', [
+    ('roc_auc_ovr', partial(roc_auc_score, multi_class='ovr')),
+    ('roc_auc_ovo', partial(roc_auc_score, multi_class='ovo')),
+    ('roc_auc_ovr_weighted', partial(roc_auc_score, multi_class='ovr',
+                                     average='weighted')),
+    ('roc_auc_ovo_weighted', partial(roc_auc_score, multi_class='ovo',
+                                     average='weighted'))])
+def test_multiclass_roc_proba_scorer(scorer_name, metric):
+    scorer = get_scorer(scorer_name)
+    X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
+                               random_state=0)
+    lr = LogisticRegression(multi_class="multinomial").fit(X, y)
+    y_proba = lr.predict_proba(X)
+    expected_score = metric(y, y_proba)
+
+    assert scorer(lr, X, y) == pytest.approx(expected_score)
+
+
+def test_multiclass_roc_proba_scorer_label():
+    scorer = make_scorer(roc_auc_score, multi_class='ovo',
+                         labels=[0, 1, 2], needs_proba=True)
+    X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
+                               random_state=0)
+    lr = LogisticRegression(multi_class="multinomial").fit(X, y)
+    y_proba = lr.predict_proba(X)
+
+    y_binary = y == 0
+    expected_score = roc_auc_score(y_binary, y_proba,
+                                   multi_class='ovo',
+                                   labels=[0, 1, 2])
+
+    assert scorer(lr, X, y_binary) == pytest.approx(expected_score)
+
+
+@pytest.mark.parametrize('scorer_name', [
+    'roc_auc_ovr', 'roc_auc_ovo',
+    'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted'])
+def test_multiclass_roc_no_proba_scorer_errors(scorer_name):
+    # Perceptron has no predict_proba
+    scorer = get_scorer(scorer_name)
+    X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
+                               random_state=0)
+    lr = Perceptron().fit(X, y)
+    msg = "'Perceptron' object has no attribute 'predict_proba'"
+    with pytest.raises(AttributeError, match=msg):
+        scorer(lr, X, y)