scikit-learn
diff --git a/‎doc/whats_new/v0.22.rst
Lines changed: 6 additions & 0 deletions b/‎doc/whats_new/v0.22.rst
Lines changed: 6 additions & 0 deletions
diff --git a/‎examples/model_selection/plot_roc.py
Lines changed: 1 addition & 1 deletion b/‎examples/model_selection/plot_roc.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/ensemble/tests/test_forest.py
Lines changed: 0 additions & 21 deletions b/‎sklearn/ensemble/tests/test_forest.py
Lines changed: 0 additions & 21 deletions
diff --git a/‎sklearn/linear_model/coordinate_descent.py
Lines changed: 4 additions & 0 deletions b/‎sklearn/linear_model/coordinate_descent.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎sklearn/linear_model/least_angle.py
Lines changed: 6 additions & 0 deletions b/‎sklearn/linear_model/least_angle.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎sklearn/linear_model/ridge.py
Lines changed: 4 additions & 4 deletions b/‎sklearn/linear_model/ridge.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎sklearn/neighbors/regression.py
Lines changed: 5 additions & 0 deletions b/‎sklearn/neighbors/regression.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎sklearn/tree/tests/test_tree.py
Lines changed: 0 additions & 20 deletions b/‎sklearn/tree/tests/test_tree.py
Lines changed: 0 additions & 20 deletions
diff --git a/‎sklearn/utils/estimator_checks.py
Lines changed: 87 additions & 3 deletions b/‎sklearn/utils/estimator_checks.py
Lines changed: 87 additions & 3 deletions
diff --git a/‎sklearn/utils/tests/test_estimator_checks.py
Lines changed: 6 additions & 4 deletions b/‎sklearn/utils/tests/test_estimator_checks.py
Lines changed: 6 additions & 4 deletions
@@ -631,4 +631,10 @@ These changes mostly affect library developers.
 - Added check that pairwise estimators raise error on non-square data
   :pr:`14336` by :user:`Gregory Dexter <gdex1>`.
 
+- Added two common multioutput estimator tests
+  :func:`~utils.estimator_checks.check_classifier_multioutput` and
+  :func:`~utils.estimator_checks.check_regressor_multioutput`.
+  :pr:`13392` by :user:`Rok Mihevc <rok>`.
+
 - |Fix| Added ``check_transformer_data_not_an_array`` to checks where missing
+
@@ -150,7 +150,7 @@
 # Area under ROC for the multiclass problem
 # .........................................
 # The :func:`sklearn.metrics.roc_auc_score` function can be used for
-# multi-class classification. The mutliclass One-vs-One scheme compares every
+# multi-class classification. The multi-class One-vs-One scheme compares every
 # unique pairwise combination of classes. In this section, we calcuate the AUC
 # using the OvR and OvO schemes. We report a macro average, and a
 # prevalence-weighted average.
 
@@ -1294,27 +1294,6 @@ def test_backend_respected():
     assert ba.count == 0
 
 
-@pytest.mark.parametrize('name', FOREST_CLASSIFIERS)
-@pytest.mark.parametrize('oob_score', (True, False))
-def test_multi_target(name, oob_score):
-    ForestClassifier = FOREST_CLASSIFIERS[name]
-
-    clf = ForestClassifier(bootstrap=True, oob_score=oob_score)
-
-    X = iris.data
-
-    # Make multi column mixed type target.
-    y = np.vstack([
-        iris.target.astype(float),
-        iris.target.astype(int),
-        iris.target.astype(str),
-    ]).T
-
-    # Try to fit and predict.
-    clf.fit(X, y)
-    clf.predict(X)
-
-
 def test_forest_feature_importances_sum():
     X, y = make_classification(n_samples=15, n_informative=3, random_state=1,
                                n_classes=3)
 
@@ -1389,6 +1389,8 @@ def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
             cv=cv, verbose=verbose, n_jobs=n_jobs, positive=positive,
             random_state=random_state, selection=selection)
 
+    def _more_tags(self):
+        return {'multioutput': False}
 
 class ElasticNetCV(RegressorMixin, LinearModelCV):
     """Elastic Net model with iterative fitting along a regularization path.
@@ -1594,6 +1596,8 @@ def __init__(self, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
         self.random_state = random_state
         self.selection = <
F438
span class=pl-s1>selection
 
+    def _more_tags(self):
+        return {'multioutput': False}
 
 ###############################################################################
 # Multi Task ElasticNet and Lasso models (with joint feature selection)
 
@@ -1358,6 +1358,9 @@ def __init__(self, fit_intercept=True, verbose=False, max_iter=500,
                          n_nonzero_coefs=500,
                          eps=eps, copy_X=copy_X, fit_path=True)
 
+    def _more_tags(self):
+        return {'multioutput': False}
+
     def fit(self, X, y):
         """Fit the model using X, y as training data.
 
@@ -1729,6 +1732,9 @@ def __init__(self, criterion='aic', fit_intercept=True, verbose=False,
         self.eps = eps
         self.fit_path = True
 
+    def _more_tags(self):
+        return {'multioutput': False}
+
     def fit(self, X, y, copy_X=None):
         """Fit the model using X, y as training data.
 
 
@@ -521,7 +521,7 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
         return coef
 
 
-class _BaseRidge(MultiOutputMixin, LinearModel, metaclass=ABCMeta):
+class _BaseRidge(LinearModel, metaclass=ABCMeta):
     @abstractmethod
     def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
                  copy_X=True, max_iter=None, tol=1e-3, solver="auto",
@@ -602,7 +602,7 @@ def fit(self, X, y, sample_weight=None):
         return self
 
 
-class Ridge(RegressorMixin, _BaseRidge):
+class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge):
     """Linear least squares with l2 regularization.
 
     Minimizes the objective function::
@@ -1506,7 +1506,7 @@ def identity_estimator():
         return self
 
 
-class _BaseRidgeCV(MultiOutputMixin, LinearModel):
+class _BaseRidgeCV(LinearModel):
     def __init__(self, alphas=(0.1, 1.0, 10.0),
                  fit_intercept=True, normalize=False, scoring=None,
                  cv=None, gcv_mode=None,
@@ -1578,7 +1578,7 @@ def fit(self, X, y, sample_weight=None):
         return self
 
 
-class RidgeCV(RegressorMixin, _BaseRidgeCV):
+class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV):
     """Ridge regression with built-in cross-validation.
 
     See glossary entry for :term:`cross-validation estimator`.
 
@@ -148,6 +148,11 @@ def __init__(self, n_neighbors=5, weights='uniform',
               metric_params=metric_params, n_jobs=n_jobs, **kwargs)
         self.weights = _check_weights(weights)
 
+    @property
+    def _pairwise(self):
+        # For cross-validation routines to split data correctly
+        return self.metric == 'precomputed'
+
     def predict(self, X):
         """Predict the target for the provided data
 
 
@@ -1823,26 +1823,6 @@ def test_empty_leaf_infinite_threshold():
         assert len(empty_leaf) == 0
 
 
-@pytest.mark.parametrize('name', CLF_TREES)
-def test_multi_target(name):
-    Tree = CLF_TREES[name]
-
-    clf = Tree()
-
-    X = iris.data
-
-    # Make multi column mixed type target.
-    y = np.vstack([
-        iris.target.astype(float),
-        iris.target.astype(int),
-        iris.target.astype(str),
-    ]).T
-
-    # Try to fit and predict.
-    clf.fit(X, y)
-    clf.predict(X)
-
-
 def test_decision_tree_memmap():
     # check that decision trees supports read-only buffer (#13626)
     X = np.random.RandomState(0).random_sample((10, 2)).astype(np.float32)
 
@@ -38,7 +38,6 @@
                     BaseEstimator)
 
 from ..metrics import accuracy_score, adjusted_rand_score, f1_score
-
 from ..random_projection import BaseRandomProjection
 from ..feature_selection import SelectKBest
 from ..pipeline import make_pipeline
@@ -54,13 +53,13 @@
 from .import deprecated
 from .validation import has_fit_parameter, _num_samples
 from ..preprocessing import StandardScaler
-from ..datasets import load_iris, load_boston, make_blobs
+from ..datasets import (load_iris, load_boston, make_blobs,
+                        make_multilabel_classification, make_regression)
 
 
 BOSTON = None
 CROSS_DECOMPOSITION = ['PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD']
 
-
 def _safe_tags(estimator, key=None):
     # if estimator doesn't have _get_tags, use _DEFAULT_TAGS
     # if estimator has tags but not key, use _DEFAULT_TAGS[key]
@@ -125,6 +124,8 @@ def _yield_classifier_checks(name, classifier):
     yield check_classifiers_one_label
     yield check_classifiers_classes
     yield check_estimators_partial_fit_n_features
+    if tags["multioutput"]:
+        yield check_classifier_multioutput
     # basic consistency testing
     yield check_classifiers_train
     yield partial(check_classifiers_train, readonly_memmap=True)
@@ -174,6 +175,8 @@ def _yield_regressor_checks(name, regressor):
     yield partial(check_regressors_train, readonly_memmap=True)
     yield check_regressor_data_not_an_array
     yield check_estimators_partial_fit_n_features
+    if tags["multioutput"]:
+        yield check_regressor_multioutput
     yield check_regressors_no_decision_function
     if not tags["no_validation"]:
         yield check_supervised_y_2d
@@ -1495,6 +1498,87 @@ def check_estimators_partial_fit_n_features(name, estimator_orig):
         estimator.partial_fit(X[:, :-1], y)
 
 
+@ignore_warnings(category=(DeprecationWarning, FutureWarning))
+def check_classifier_multioutput(name, estimator):
+    n_samples, n_labels, n_classes = 42, 5, 3
+    tags = _safe_tags(estimator)
+    estimator = clone(estimator)
+    X, y = make_multilabel_classification(random_state=42,
+                                          n_samples=n_samples,
+                                          n_labels=n_labels,
+                                          n_classes=n_classes)
+    estimator.fit(X, y)
+    y_pred = estimator.predict(X)
+
+    assert y_pred.shape == (n_samples, n_classes), (
+        "The shape of the prediction for multioutput data is "
+        "incorrect. Expected {}, got {}."
+        .format((n_samples, n_labels), y_pred.shape))
+    assert y_pred.dtype.kind == 'i'
+
+    if hasattr(estimator, "decision_function"):
+        decision = estimator.decision_function(X)
+        assert isinstance(decision, np.ndarray)
+        assert decision.shape == (n_samples, n_classes), (
+            "The shape of the decision function output for "
+            "multioutput data is incorrect. Expected {}, got {}."
+            .format((n_samples, n_classes), decision.shape))
+
+        dec_pred = (decision > 0).astype(np.int)
+        dec_exp = estimator.classes_[dec_pred]
+        assert_array_equal(dec_exp, y_pred)
+
+    if hasattr(estimator, "predict_proba"):
+        y_prob = estimator.predict_proba(X)
+
+        if isinstance(y_prob, list) and not tags['poor_score']:
+            for i in range(n_classes):
+                assert y_prob[i].shape == (n_samples, 2), (
+                    "The shape of the probability for multioutput data is"
+                    " incorrect. Expected {}, got {}."
+                    .format((n_samples, 2), y_prob[i].shape))
+                assert_array_equal(
+                    np.argmax(y_prob[i], axis=1).astype(np.int),
+                    y_pred[:, i]
+                )
+        elif not tags['poor_score']:
+            assert y_prob.shape == (n_samples, n_classes), (
+                "The shape of the probability for multioutput data is"
+                " incorrect. Expected {}, got {}."
+                .format((n_samples, n_classes), y_prob.shape))
+            assert_array_equal(y_prob.round().astype(int), y_pred)
+
+    if (hasattr(estimator, "decision_function") and
+            hasattr(estimator, "predict_proba")):
+        for i in range(n_classes):
+            y_proba = estimator.predict_proba(X)[:, i]
+            y_decision = estimator.decision_function(X)
+            assert_array_equal(rankdata(y_proba), rankdata(y_decision[:, i]))
+
+
+@ignore_warnings(category=(DeprecationWarning, FutureWarning))
+def check_regressor_multioutput(name, estimator):
+    estimator = clone(estimator)
+    n_samples = n_features = 10
+
+    if not _is_pairwise_metric(estimator):
+        n_samples = n_samples + 1
+
+    X, y = make_regression(random_state=42, n_targets=5,
+                           n_samples=n_samples, n_features=n_features)
+    X = pairwise_estimator_convert_X(X, estimator)
+
+    estimator.fit(X, y)
+    y_pred = estimator.predict(X)
+
+    assert y_pred.dtype == np.dtype('float64'), (
+        "Multioutput predictions by a regressor are expected to be"
+        " floating-point precision. Got {} instead".format(y_pred.dtype))
+    assert y_pred.shape == y.shape, (
+        "The shape of the orediction for multioutput data is incorrect."
+        " Expected {}, got {}.")
+
+
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_clustering(name, clusterer_orig, readonly_memmap=False):
     clusterer = clone(clusterer_orig)
 
@@ -282,7 +282,7 @@ class UntaggedBinaryClassifier(DecisionTreeClassifier):
     # Toy classifier that only supports binary classification, will fail tests.
     def fit(self, X, y, sample_weight=None):
         super().fit(X, y, sample_weight)
-        if self.n_classes_ > 2:
+        if np.all(self.n_classes_ > 2):
             raise ValueError('Only 2 classes are supported')
         return self
 
@@ -296,7 +296,7 @@ def _more_tags(self):
 class RequiresPositiveYRegressor(LinearRegression):
 
     def fit(self, X, y):
-        X, y = check_X_y(X, y)
+        X, y = check_X_y(X, y, multi_output=True)
         if (y <= 0).any():
             raise ValueError('negative y values not supported!')
         return super().fit(X, y)
@@ -423,7 +423,9 @@ def test_check_estimator():
     check_estimator(TaggedBinaryClassifier)
 
     # Check regressor with requires_positive_y estimator tag
-    check_estimator(RequiresPositiveYRegressor)
+    msg = 'negative y values not supported!'
+    assert_raises_regex(ValueError, msg, check_estimator,
+                        RequiresPositiveYRegressor)
 
 
 def test_check_outlier_corruption():
@@ -511,7 +513,7 @@ def __init__(self, you_should_set_this_=None):
 
 def test_check_estimator_pairwise():
     # check that check_estimator() works on estimator with _pairwise
-    # kernel or  metric
+    # kernel or metric
 
     # test precomputed kernel
     est = SVC(kernel='precomputed')