jnothman
diff --git a/‎sklearn/metrics/scorer.py
Lines changed: 15 additions & 2 deletions b/‎sklearn/metrics/scorer.py
Lines changed: 15 additions & 2 deletions
diff --git a/‎sklearn/metrics/tests/test_score_objects.py
Lines changed: 20 additions & 5 deletions b/‎sklearn/metrics/tests/test_score_objects.py
Lines changed: 20 additions & 5 deletions
@@ -126,7 +126,13 @@ def __call__(self, clf, X, y, sample_weight=None):
         y_type = type_of_target(y)
         y_pred = clf.predict_proba(X)
         if y_type == "binary":
-            y_pred = y_pred[:, 1]
+            if y_pred.shape[1] == 2:
+                y_pred = y_pred[:, 1]
+            else:
+                raise ValueError('got predict_proba of shape {},'
+                                 ' but need classifier with two'
+                                 ' classes for {} scoring'.format(
+                                     y_pred.shape, self._score_func.__name__))
         if sample_weight is not None:
             return self._sign * self._score_func(y, y_pred,
                                                  sample_weight=sample_weight,
@@ -183,7 +189,14 @@ def __call__(self, clf, X, y, sample_weight=None):
                 y_pred = clf.predict_proba(X)
 
                 if y_type == "binary":
-                    y_pred = y_pred[:, 1]
+                    if y_pred.shape[1] == 2:
+                        y_pred = y_pred[:, 1]
+                    else:
+                        raise ValueError('got predict_proba of shape {},'
+                                         ' but need classifier with two'
+                                         ' classes for {} scoring'.format(
+                                             y_pred.shape,
+                                             self._score_func.__name__))
                 elif isinstance(y_pred, list):
                     y_pred = np.vstack([p[:, -1] for p in y_pred]).T
 
 
@@ -186,10 +186,11 @@ def check_scoring_validator_for_single_metric_usecases(scoring_validator):
 
 
 def check_multimetric_scoring_single_metric_wrapper(*args, **kwargs):
-    # This wraps the _check_multimetric_scoring to take in single metric
-    # scoring parameter so we can run the tests that we will run for
-    # check_scoring, for check_multimetric_scoring too for single-metric
-    # usecases
+    # This wraps the _check_multimetric_scoring to take in
+    # single metric scoring parameter so we can run the tests
+    # that we will run for check_scoring, for check_multimetric_scoring
+    # too for single-metric usecases
+
     scorers, is_multi = _check_multimetric_scoring(*args, **kwargs)
     # For all single metric use cases, it should register as not multimetric
     assert_false(is_multi)
@@ -370,7 +371,21 @@ def test_thresholded_scorers():
     X, y = make_blobs(random_state=0, centers=3)
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
     clf.fit(X_train, y_train)
-    assert_raises(ValueError, get_scorer('roc_auc'), clf, X_test, y_test)
+    with pytest.raises(ValueError, match="multiclass format is not supported"):
+        get_scorer('roc_auc')(clf, X_test, y_test)
+
+    # test error is raised with a single class present in model
+    # (predict_proba shape is not suitable for binary auc)
+    X, y = make_blobs(random_state=0, centers=2)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+    clf = DecisionTreeClassifier()
+    clf.fit(X_train, np.zeros_like(y_train))
+    with pytest.raises(ValueError, match="need classifier with two classes"):
+        get_scorer('roc_auc')(clf, X_test, y_test)
+
+    # for proba scorers
+    with pytest.raises(ValueError, match="need classifier with two classes"):
+        get_scorer('neg_log_loss')(clf, X_test, y_test)
 
 
 def test_thresholded_scorers_multilabel_indicator_data():