jeremiedbb
diff --git a/‎doc/whats_new/v0.21.rst
Lines changed: 8 additions & 0 deletions b/‎doc/whats_new/v0.21.rst
Lines changed: 8 additions & 0 deletions
diff --git a/‎sklearn/multioutput.py
Lines changed: 10 additions & 5 deletions b/‎sklearn/multioutput.py
Lines changed: 10 additions & 5 deletions
diff --git a/‎sklearn/tests/test_multioutput.py
Lines changed: 29 additions & 0 deletions b/‎sklearn/tests/test_multioutput.py
Lines changed: 29 additions & 0 deletions
@@ -511,6 +511,14 @@ Support for Python 3.4 and below has been officially dropped.
   containing this same sample due to the scaling used in decision_function.
   :issue:`10440` by :user:`Jonathan Ohayon <Johayon>`.
 
+:mod:`sklearn.multioutput`
+........................
+
+- |Fix| Fixed a bug in :class:`multiout.MultiOutputClassifier` where the
+  `predict_proba` method incorrectly checked for `predict_proba` attribute in
+  the estimator object.
+  :issue:`12222` by :user:`Rebekah Kim <rebekahkim>`
+  
 :mod:`sklearn.neighbors`
 ........................
 
 
@@ -145,7 +145,7 @@ def fit(self, X, y, sample_weight=None):
 
         if not hasattr(self.estimator, "fit"):
             raise ValueError("The base estimator should implement"
-                             "  a fit method")
+                             " a fit method")
 
         X, y = check_X_y(X, y,
                          multi_output=True,
@@ -186,7 +186,8 @@ def predict(self, X):
         """
         check_is_fitted(self, 'estimators_')
         if not hasattr(self.estimator, "predict"):
-            raise ValueError("The base estimator should implement a predict method")
+            raise ValueError("The base estimator should implement"
+                             " a predict method")
 
         X = check_array(X, accept_sparse=True)
 
@@ -327,6 +328,9 @@ def predict_proba(self, X):
         """Probability estimates.
         Returns prediction probabilities for each class of each output.
 
+        This method will raise a ``ValueError`` if any of the
+        estimators do not have ``predict_proba``.
+
         Parameters
         ----------
         X : array-like, shape (n_samples, n_features)
@@ -340,16 +344,17 @@ def predict_proba(self, X):
             classes corresponds to that in the attribute `classes_`.
         """
         check_is_fitted(self, 'estimators_')
-        if not hasattr(self.estimator, "predict_proba"):
-            raise ValueError("The base estimator should implement"
+        if not all([hasattr(estimator, "predict_proba")
+                    for estimator in self.estimators_]):
+            raise ValueError("The base estimator should implement "
                              "predict_proba method")
 
         results = [estimator.predict_proba(X) for estimator in
                    self.estimators_]
         return results
     def score(self, X, y):
-        """"Returns the mean accuracy on the given test data and labels.
+        """Returns the mean accuracy on the given test data and labels.
 
         Parameters
         ----------
 
@@ -31,6 +31,7 @@
 from sklearn.svm import LinearSVC
 from sklearn.base import ClassifierMixin
 from sklearn.utils import shuffle
+from sklearn.model_selection import GridSearchCV
 
 
 def test_multi_target_regression():
@@ -176,6 +177,34 @@ def test_multi_output_classification_partial_fit_parallelism():
         assert est1 is not est2
 
 
+# check predict_proba passes
+def test_multi_output_predict_proba():
+    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5, tol=1e-3)
+    param = {'loss': ('hinge', 'log', 'modified_huber')}
+
+    # inner function for custom scoring
+    def custom_scorer(estimator, X, y):
+        if hasattr(estimator, "predict_proba"):
+            return 1.0
+        else:
+            return 0.0
+    grid_clf = GridSearchCV(sgd_linear_clf, param_grid=param,
+                            scoring=custom_scorer, cv=3, error_score=np.nan)
+    multi_target_linear = MultiOutputClassifier(grid_clf)
+    multi_target_linear.fit(X, y)
+
+    multi_target_linear.predict_proba(X)
+
+    # SGDClassifier defaults to loss='hinge' which is not a probabilistic
+    # loss function; therefore it does not expose a predict_proba method
+    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5, tol=1e-3)
+    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)
+    multi_target_linear.fit(X, y)
+    err_msg = "The base estimator should implement predict_proba method"
+    with pytest.raises(ValueError, match=err_msg):
+        multi_target_linear.predict_proba(X)
+
+
 # 0.23. warning about tol not having its correct default value.
 @pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been')
 def test_multi_output_classification_partial_fit():