diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index 93635d88069d5..450ec8aab0dad 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -115,6 +115,15 @@ Changelog preserve the class balance of the original training set. :pr:`14194` by :user:`Johann Faouzi `. +- |Fix| Run by default + :func:`utils.estimator_checks.check_estimator` on both + :class:`ensemble.VotingClassifier` and :class:`ensemble.VotingRegressor`. It + leads to solve issues regarding shape consistency during `predict` which was + failing when the underlying estimators were not outputting consistent array + dimensions. Note that it should be replaced by refactoring the common tests + in the future. + :pr:`14305` by :user:`Guillaume Lemaitre `. + - |Efficiency| :func:`ensemble.MissingIndicator.fit_transform` the _get_missing_features_info function is now called once when calling fit_transform for MissingIndicator class. :pr:`14356` by :user: diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py index 5cd971934abf2..52c47129572e2 100644 --- a/sklearn/ensemble/tests/test_voting.py +++ b/sklearn/ensemble/tests/test_voting.py @@ -6,6 +6,8 @@ from sklearn.utils.testing import assert_almost_equal, assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_raise_message +from sklearn.utils.estimator_checks import check_estimator +from sklearn.utils.estimator_checks import check_no_attributes_set_in_init from sklearn.exceptions import NotFittedError from sklearn.linear_model import LinearRegression from sklearn.linear_model import LogisticRegression @@ -13,6 +15,8 @@ from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import RandomForestRegressor from sklearn.ensemble import VotingClassifier, VotingRegressor +from sklearn.tree import DecisionTreeClassifier +from sklearn.tree import DecisionTreeRegressor from sklearn.model_selection import GridSearchCV from sklearn import datasets from sklearn.model_selection import cross_val_score, train_test_split @@ -508,3 +512,20 @@ def test_none_estimator_with_weights(X, y, voter, drop): voter.fit(X, y, sample_weight=np.ones(y.shape)) y_pred = voter.predict(X) assert y_pred.shape == y.shape + + +@pytest.mark.parametrize( + "estimator", + [VotingRegressor( + estimators=[('lr', LinearRegression()), + ('tree', DecisionTreeRegressor(random_state=0))]), + VotingClassifier( + estimators=[('lr', LogisticRegression(random_state=0)), + ('tree', DecisionTreeClassifier(random_state=0))])], + ids=['VotingRegressor', 'VotingClassifier'] +) +def test_check_estimators_voting_estimator(estimator): + # FIXME: to be removed when meta-estimators can be specified themselves + # their testing parameters (for required parameters). + check_estimator(estimator) + check_no_attributes_set_in_init(estimator.__class__.__name__, estimator) diff --git a/sklearn/ensemble/voting.py b/sklearn/ensemble/voting.py index b29ad46c238f5..7900d28c1f782 100644 --- a/sklearn/ensemble/voting.py +++ b/sklearn/ensemble/voting.py @@ -13,9 +13,10 @@ # # License: BSD 3 clause -import numpy as np from abc import abstractmethod +import numpy as np + from joblib import Parallel, delayed from ..base import ClassifierMixin @@ -23,9 +24,11 @@ from ..base import TransformerMixin from ..base import clone from ..preprocessing import LabelEncoder +from ..utils import Bunch from ..utils.validation import check_is_fitted from ..utils.metaestimators import _BaseComposition -from ..utils import Bunch +from ..utils.multiclass import check_classification_targets +from ..utils.validation import column_or_1d def _parallel_fit_estimator(estimator, X, y, sample_weight=None): @@ -67,7 +70,7 @@ def _weights_not_none(self): def _predict(self, X): """Collect results from clf.predict calls. """ - return np.asarray([clf.predict(X) for clf in self.estimators_]).T + return np.asarray([est.predict(X) for est in self.estimators_]).T @abstractmethod def fit(self, X, y, sample_weight=None): @@ -264,6 +267,7 @@ def fit(self, X, y, sample_weight=None): ------- self : object """ + check_classification_targets(y) if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1: raise NotImplementedError('Multilabel and multi-output' ' classification is not supported.') @@ -454,6 +458,7 @@ def fit(self, X, y, sample_weight=None): ------- self : object """ + y = column_or_1d(y, warn=True) return super().fit(X, y, sample_weight) def predict(self, X): diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 51f71f2f7919b..abfc84b00f2fd 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -23,9 +23,9 @@ from sklearn.base import RegressorMixin from sklearn.cluster.bicluster import BiclusterMixin +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.linear_model.base import LinearClassifierMixin from sklearn.linear_model import Ridge -from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.utils import IS_PYPY from sklearn.utils.estimator_checks import ( _yield_all_checks, diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index e922a7c0b4d48..c8a82bc8e623f 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -30,7 +30,6 @@ from ..discriminant_analysis import LinearDiscriminantAnalysis from ..linear_model import Ridge - from ..base import (clone, ClusterMixin, is_classifier, is_regressor, _DEFAULT_TAGS, RegressorMixin, is_outlier_detector)