xhluca
diff --git a/‎doc/whats_new/v0.21.rst
Lines changed: 0 additions & 4 deletions b/‎doc/whats_new/v0.21.rst
Lines changed: 0 additions & 4 deletions
diff --git a/‎sklearn/model_selection/_validation.py
Lines changed: 50 additions & 97 deletions b/‎sklearn/model_selection/_validation.py
Lines changed: 50 additions & 97 deletions
@@ -430,10 +430,6 @@ Support for Python 3.4 and below has been officially dropped.
   making ``shuffle=True`` ineffective.
   :issue:`13124` by :user:`Hanmin Qin <qinhanmin2014>`.
 
-- |Fix| Added ability for :func:`model_selection.cross_val_predict` to handle
-  multi-label (and multioutput-multiclass) targets with ``predict_proba``-type
-  methods. :issue:`8773` by :user:`Stephen Hoover <stephen-hoover>`.
-
 - |Fix| Fixed an issue in :func:`~model_selection.cross_val_predict` where
   `method="predict_proba"` returned always `0.0` when one of the classes was
   excluded in a cross-validation fold.
 
@@ -757,20 +757,9 @@ def cross_val_predict(estimator, X, y=None, groups=None, cv='warn',
 
     cv = check_cv(cv, y, classifier=is_classifier(estimator))
 
-    # If classification methods produce multiple columns of output,
-    # we need to manually encode classes to ensure consistent column ordering.
-    encode = method in ['decision_function', 'predict_proba',
-                        'predict_log_proba']
-    if encode:
-        y = np.asarray(y)
-        if y.ndim == 1:
-            le = LabelEncoder()
-            y = le.fit_transform(y)
-        elif y.ndim == 2:
-            y_enc = np.zeros_like(y, dtype=np.int)
-            for i_label in range(y.shape[1]):
-                y_enc[:, i_label] = LabelEncoder().fit_transform(y[:, i_label])
-            y = y_enc
+    if method in ['decision_function', 'predict_proba', 'predict_log_proba']:
+        le = LabelEncoder()
+        y = le.fit_transform(y)
 
     # We clone the estimator to make sure that all the folds are
     # independent, and that it is pickle-able.
@@ -791,26 +780,12 @@ def cross_val_predict(estimator, X, y=None, groups=None, cv='warn',
     inv_test_indices = np.empty(len(test_indices), dtype=int)
     inv_test_indices[test_indices] = np.arange(len(test_indices))
 
+    # Check for sparse predictions
     if sp.issparse(predictions[0]):
         predictions = sp.vstack(predictions, format=predictions[0].format)
-    elif encode and isinstance(predictions[0], list):
-        # `predictions` is a list of method outputs from each fold.
-        # If each of those is also a list, then treat this as a
-        # multioutput-multiclass task. We need to separately concatenate
-        # the method outputs for each label into an `n_labels` long list.
-        n_labels = y.shape[1]
-        concat_pred = []
-        for i_label in range(n_labels):
-            label_preds = np.concatenate([p[i_label] for p in predictions])
-            concat_pred.append(label_preds)
-        predictions = concat_pred
     else:
         predictions = np.concatenate(predictions)
-
-    if isinstance(predictions, list):
-        return [p[inv_test_indices] for p in predictions]
-    else:
-        return predictions[inv_test_indices]
+    return predictions[inv_test_indices]
 
 
 def _fit_and_predict(estimator, X, y, train, test, verbose, fit_params,
@@ -869,76 +844,54 @@ def _fit_and_predict(estimator, X, y, train, test, verbose, fit_params,
     func = getattr(estimator, method)
     predictions = func(X_test)
     if method in ['decision_function', 'predict_proba', 'predict_log_proba']:
-        if isinstance(predictions, list):
-            predictions = [_enforce_prediction_order(
-                estimator.classes_[i_label], predictions[i_label],
-                n_classes=len(set(y[:, i_label])), method=method)
-                for i_label in range(len(predictions))]
-        else:
-            # A 2D y array should be a binary label indicator matrix
-            n_classes = len(set(y)) if y.ndim == 1 else y.shape[1]
-            predictions = _enforce_prediction_order(
-                estimator.classes_, predictions, n_classes, method)
+        n_classes = len(set(y))
+        if n_classes != len(estimator.classes_):
+            recommendation = (
+                'To fix this, use a cross-validation '
+                'technique resulting in properly '
+                'stratified folds')
+            warnings.warn('Number of classes in training fold ({}) does '
+                          'not match total number of classes ({}). '
+                          'Results may not be appropriate for your use case. '
+                          '{}'.format(len(estimator.classes_),
+                                      n_classes, recommendation),
+                          RuntimeWarning)
+            if method == 'decision_function':
+                if (predictions.ndim == 2 and
+                        predictions.shape[1] != len(estimator.classes_)):
+                    # does not match the number of classes used to train
+                    # it with. This case is found when sklearn.svm.SVC is
+                    # set to `decision_function_shape='ovo'`.
+                    raise ValueError('Output shape {} of {} does not match '
+                                     'number of classes ({}) in fold. '
+                                     'Irregular decision_function outputs '
+                                     'are not currently supported by '
+                                     'cross_val_predict'.format(
+                                        predictions.shape, method,
+                                        len(estimator.classes_),
+                                        recommendation))
+                if len(estimator.classes_) <= 2:
+                    # In this special case, `predictions` contains a 1D array.
+                    raise ValueError('Only {} class/es in training fold, this '
+                                     'is not supported for decision_function '
+                                     'with imbalanced folds. {}'.format(
+                                        len(estimator.classes_),
+                                        recommendation))
+
+            float_min = np.finfo(predictions.dtype).min
+            default_values = {'decision_function': float_min,
+                              'predict_log_proba': float_min,
+                              'predict_proba': 0.0}
+            predictions_for_all_classes = np.full((_num_samples(predictions),
+                                                   n_classes),
+                                                  default_values[method],
+                                                  predictions.dtype)
+            predictions_for_all_classes[:, estimator.classes_] = predictions
+            predictions = predictions_for_all_classes
     return predictions, test
 
 
-def _enforce_prediction_order(classes, predictions, n_classes, method):
-    """Ensure that prediction arrays have correct column order
-
-    When doing cross-validation, if one or more classes are
-    not present in the subset of data used for training,
-    then the output prediction array might not have the same
-    columns as other folds. Use the list of class names
-    (assumed to be integers) to enforce the correct column order.
-
-    Note that `classes` is the list of classes in this fold
-    (a subset of the classes in the full training set)
-    and `n_classes` is the number of classes in the full training set.
-    """
-    if n_classes != len(classes):
-        recommendation = (
-            'To fix this, use a cross-validation '
-            'technique resulting in properly '
-        warnings.warn('Number of classes in training fold ({}) does '
-                      'not match total number of classes ({}). '
-                      'Results may not be appropriate for your use case. '
-                      '{}'.format(len(classes), n_classes, recommendation),
-                      RuntimeWarning)
-        if method == 'decision_function':
-            if (predictions.ndim == 2 and
-                    predictions.shape[1] != len(classes)):
-                # This handles the case when the shape of predictions
-                # does not match the number of classes used to train
-                # it with. This case is found when sklearn.svm.SVC is
-                # set to `decision_function_shape='ovo'`.
-                raise ValueError('Output shape {} of {} does not match '
-                                 'number of classes ({}) in fold. '
-                                 'Irregular decision_function outputs '
-                                 'are not currently supported by '
-                                 'cross_val_predict'.format(
-                                    predictions.shape, method, len(classes)))
-            if len(classes) <= 2:
-                # In this special case, `predictions` contains a 1D array.
-                raise ValueError('Only {} class/es in training fold, but {} '
-                                 'in overall dataset. This '
-                                 'is not supported for decision_function '
-                                 'with imbalanced folds. {}'.format(
-                                    len(classes), n_classes, recommendation))
-
-        float_min = np.finfo(predictions.dtype).min
-        default_values = {'decision_function': float_min,
-                          'predict_log_proba': float_min,
-                          'predict_proba': 0}
-        predictions_for_all_classes = np.full((_num_samples(predictions),
-                                               n_classes),
-                                              default_values[method],
-                                              dtype=predictions.dtype)
-        predictions_for_all_classes[:, classes] = predictions
-        predictions = predictions_for_all_classes
-    return predictions
-
-
 def _check_is_permutation(indices, n_samples):
     """Check whether indices is a reordering of the array np.arange(n_samples)