scikit-learn
diff --git a/‎doc/whats_new.rst
Lines changed: 5 additions & 1 deletion b/‎doc/whats_new.rst
Lines changed: 5 additions & 1 deletion
diff --git a/‎sklearn/model_selection/_validation.py
Lines changed: 56 additions & 10 deletions b/‎sklearn/model_selection/_validation.py
Lines changed: 56 additions & 10 deletions
diff --git a/‎sklearn/model_selection/tests/test_validation.py
Lines changed: 65 additions & 17 deletions b/‎sklearn/model_selection/tests/test_validation.py
Lines changed: 65 additions & 17 deletions
@@ -171,6 +171,10 @@ Enhancements
      removed by setting it to `None`.
      :issue:`7674` by:user:`Yichuan Liu <yl565>`.
 
+   - Added ability for :func:`model_selection.cross_val_predict` to handle multi-label
+     (and multi-class multi-label) targets with `predict_proba`-type methods.
+     :issue:`8773` by:user:`Stephen Hoover <stephen-hoover>`.
+
 Bug fixes
 .........
    - Fixed a bug where :class:`sklearn.ensemble.IsolationForest` uses an
@@ -5066,4 +5070,4 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
 .. _Anish Shah: https://github.com/AnishShah
 
 .. _Neeraj Gangwar: http://neerajgangwar.in
-.. _Arthur Mensch: https://amensch.fr
+.. _Arthur Mensch: https://amensch.fr
@@ -393,9 +393,18 @@ def cross_val_predict(estimator, X, y=None, groups=None, cv=None, n_jobs=1,
         raise AttributeError('{} not implemented in estimator'
                              .format(method))
 
-    if method in ['decision_function', 'predict_proba', 'predict_log_proba']:
-        le = LabelEncoder()
-        y = le.fit_transform(y)
+    do_manual_encoding = method in ['decision_function', 'predict_proba',
+                                    'predict_log_proba']
+    if do_manual_encoding:
+        y = np.asarray(y)
+        if y.ndim == 1:
+            le = LabelEncoder()
+            y = le.fit_transform(y)
+        elif y.ndim == 2:
+            y_enc = np.zeros_like(y, dtype=np.int)
+            for i_label in range(y.shape[1]):
+                y_enc[:, i_label] = LabelEncoder().fit_transform(y[:, i_label])
+            y = y_enc
 
     # We clone the estimator to make sure that all the folds are
     # independent, and that it is pickle-able.
@@ -419,9 +428,20 @@ def cross_val_predict(estimator, X, y=None, groups=None, cv=None, n_jobs=1,
     # Check for sparse predictions
     if sp.issparse(predictions[0]):
         predictions = sp.vstack(predictions, format=predictions[0].format)
+    elif do_manual_encoding and isinstance(predictions[0], list):
+        n_labels = y.shape[1]
+        concat_pred = []
+        for i_label in range(n_labels):
+            label_preds = np.concatenate([p[i_label] for p in predictions])
+            concat_pred.append(label_preds)
+        predictions = concat_pred
     else:
         predictions = np.concatenate(predictions)
-    return predictions[inv_test_indices]
+
+    if do_manual_encoding and isinstance(predictions, list):
+        return [p[inv_test_indices] for p in predictions]
+    else:
+        return predictions[inv_test_indices]
 
 
 def _fit_and_predict(estimator, X, y, train, test, verbose, fit_params,
@@ -480,16 +500,42 @@ def _fit_and_predict(estimator, X, y, train, test, verbose, fit_params,
     func = getattr(estimator, method)
     predictions = func(X_test)
     if method in ['decision_function', 'predict_proba', 'predict_log_proba']:
-        n_classes = len(set(y))
-        predictions_ = np.zeros((X_test.shape[0], n_classes))
-        if method == 'decision_function' and len(estimator.classes_) == 2:
-            predictions_[:, estimator.classes_[-1]] = predictions
+        is_dec_func = (method == 'decision_function')
+        if isinstance(predictions, list):
+            predictions = [_enforce_prediction_order(
+                estimator.classes_[i_label], predictions[i_label],
+                n_classes=len(set(y[:, i_label])),
+                one_col_if_binary=is_dec_func)
+                for i_label in range(len(predictions))]
         else:
-            predictions_[:, estimator.classes_] = predictions
-        predictions = predictions_
+            # A 2D y array should be a binary label indicator matrix
+            n_classes = len(set(y)) if y.ndim == 1 else y.shape[1]
+            predictions = _enforce_prediction_order(
+                estimator.classes_, predictions, n_classes, is_dec_func)
+
     return predictions, test
 
 
+def _enforce_prediction_order(classes, predictions, n_classes,
+                              one_col_if_binary=False):
+    """Ensure that prediction arrays have correct column order
+
+    When doing cross-validation, if one or more classes are
+    not present in the subset of data used for training,
+    then the output prediction array might not have the same
+    columns as other folds. Use the list of class names
+    (assumed to be integers) to enforce the correct column order.
+    """
+    predictions_ = np.zeros((predictions.shape[0], n_classes),
+                            dtype=predictions.dtype)
+    if one_col_if_binary and len(classes) == 2:
+        predictions_[:, classes[-1]] = predictions
+    else:
+        predictions_[:, classes] = predictions
+    predictions = predictions_
+    return predictions
+
+
 def _check_is_permutation(indices, n_samples):
     """Check whether indices is a reordering of the array np.arange(n_samples)
 
 
@@ -46,6 +46,7 @@
 
 from sklearn.linear_model import Ridge, LogisticRegression
 from sklearn.linear_model import PassiveAggressiveClassifier
+from sklearn.ensemble import RandomForestClassifier
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.svm import SVC
 from sklearn.cluster import KMeans
@@ -915,54 +916,101 @@ def test_cross_val_predict_sparse_prediction():
     assert_array_almost_equal(preds_sparse, preds)
 
 
-def check_cross_val_predict_with_method(est):
-    iris = load_iris()
-    X, y = iris.data, iris.target
-    X, y = shuffle(X, y, random_state=0)
-    classes = len(set(y))
+def check_cross_val_predict_with_method(est, X, y, methods):
+    kfold = KFold(X.shape[0])
 
-    kfold = KFold(len(iris.target))
-
-    methods = ['decision_function', 'predict_proba', 'predict_log_proba']
     for method in methods:
         predictions = cross_val_predict(est, X, y, method=method)
-        assert_equal(len(predictions), len(y))
 
-        expected_predictions = np.zeros([len(y), classes])
+        if isinstance(predictions, list):
+            assert_equal(len(predictions), y.shape[1])
+            for i in range(y.shape[1]):
+            expected_predictions = [np.zeros([len(y), len(set(y[:, i]))])
+                                    for i in range(y.shape[1])]
+        else:
+            assert_equal(len(predictions), len(y))
+            expected_predictions = np.zeros_like(predictions)
         func = getattr(est, method)
 
         # Naive loop (should be same as cross_val_predict):
         for train, test in kfold.split(X, y):
             est.fit(X[train], y[train])
-            expected_predictions[test] = func(X[test])
+            preds = func(X[test])
+            if isinstance(predictions, list):
+                for i_label in range(y.shape[1]):
+                    expected_predictions[i_label][test] = preds[i_label]
+            else:
+                expected_predictions[test] = func(X[test])
 
         predictions = cross_val_predict(est, X, y, method=method,
                                         cv=kfold)
-        assert_array_almost_equal(expected_predictions, predictions)
+        assert_array_equal_maybe_list(expected_predictions, predictions)
 
         # Test alternative representations of y
         predictions_y1 = cross_val_predict(est, X, y + 1, method=method,
                                            cv=kfold)
-        assert_array_equal(predictions, predictions_y1)
+        assert_array_equal_maybe_list(predictions, predictions_y1)
 
         predictions_y2 = cross_val_predict(est, X, y - 2, method=method,
                                            cv=kfold)
-        assert_array_equal(predictions, predictions_y2)
+        assert_array_equal_maybe_list(predictions, predictions_y2)
 
         predictions_ystr = cross_val_predict(est, X, y.astype('str'),
                                              method=method, cv=kfold)
-        assert_array_equal(predictions, predictions_ystr)
+        assert_array_equal_maybe_list(predictions, predictions_ystr)
+
+
+def assert_array_equal_maybe_list(x, y):
+    # If x and y are lists of arrays, compare arrays individually.
+    if isinstance(x, list):
+        for i in range(len(x)):
+            assert_array_equal(x[i], y[i])
+    else:
+        assert_array_equal(x, y)
 
 
 def test_cross_val_predict_with_method():
-    check_cross_val_predict_with_method(LogisticRegression())
+    iris = load_iris()
+    X, y = iris.data, iris.target
+    X, y = shuffle(X, y, random_state=0)
+    methods = ['decision_function', 'predict_proba', 'predict_log_proba']
+    check_cross_val_predict_with_method(LogisticRegression(), X, y, methods)
 
 
 def test_gridsearchcv_cross_val_predict_with_method():
+    iris = load_iris()
+    X, y = iris.data, iris.target
+    X, y = shuffle(X, y, random_state=0)
     est = GridSearchCV(LogisticRegression(random_state=42),
                        {'C': [0.1, 1]},
                        cv=2)
-    check_cross_val_predict_with_method(est)
+    methods = ['decision_function', 'predict_proba', 'predict_log_proba']
+    check_cross_val_predict_with_method(est, X, y, methods)
+
+
+def test_cross_val_predict_with_method_multilabel_ovr():
+    # OVR does multilabel predictions, but only arrays of
+    # binary indicator columns. The output of predict_proba
+    # is a 2D array with shape (n_samples, n_labels).
+    X, y = make_multilabel_classification(n_samples=100, n_labels=3,
+                                          n_classes=4, n_features=5,
+                                          random_state=42)
+    est = OneVsRestClassifier(LogisticRegression(random_state=0))
+    check_cross_val_predict_with_method(
+        est, X, y, methods=['predict_proba', 'decision_function'])
+
+
+def test_cross_val_predict_with_method_multilabel_rf():
+    # The RandomForest allows anything for the contents of the labels.
+    # Output of predict_proba is a list of outputs of predict_proba
+    # for each individual label.
+    X, y = make_multilabel_classification(n_samples=100, n_labels=3,
+                                          n_classes=4, n_features=5,
+                                          random_state=42)
+    y[:, 0] += y[:, 1]  # Put three classes in the first column
+    est = RandomForestClassifier(n_estimators=5, random_state=0)
+    check_cross_val_predict_with_method(est, X, y, methods=['predict_proba'])
 
 
 def get_expected_predictions(X, y, cv, classes, est, method):