diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index 99591cedead3d..0bfc458f3eaff 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -15,6 +15,7 @@ from abc import abstractmethod +import numbers import numpy as np from joblib import Parallel @@ -27,6 +28,7 @@ from ._base import _BaseHeterogeneousEnsemble from ..preprocessing import LabelEncoder from ..utils import Bunch +from ..utils import check_scalar from ..utils.metaestimators import available_if from ..utils.validation import check_is_fitted from ..utils.multiclass import check_classification_targets @@ -46,7 +48,7 @@ class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): def _log_message(self, name, idx, total): if not self.verbose: return None - return "(%d of %d) Processing %s" % (idx, total, name) + return f"({idx} of {total}) Processing {name}" @property def _weights_not_none(self): @@ -64,11 +66,17 @@ def fit(self, X, y, sample_weight=None): """Get common fit operations.""" names, clfs = self._validate_estimators() + check_scalar( + self.verbose, + name="verbose", + target_type=(numbers.Integral, np.bool_), + min_val=0, + ) + if self.weights is not None and len(self.weights) != len(self.estimators): raise ValueError( - "Number of `estimators` and weights must be equal" - "; got %d weights, %d estimators" - % (len(self.weights), len(self.estimators)) + "Number of `estimators` and weights must be equal; got" + f" {len(self.weights)} weights, {len(self.estimators)} estimators" ) self.estimators_ = Parallel(n_jobs=self.n_jobs)( @@ -312,9 +320,15 @@ def fit(self, X, y, sample_weight=None): "Multilabel and multi-output classification is not supported." ) + check_scalar( + self.flatten_transform, + name="flatten_transform", + target_type=(numbers.Integral, np.bool_), + ) + if self.voting not in ("soft", "hard"): raise ValueError( - "Voting must be 'soft' or 'hard'; got (voting=%r)" % self.voting + f"Voting must be 'soft' or 'hard'; got (voting={self.voting!r})" ) self.le_ = LabelEncoder().fit(y) diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py index 4bebfaca53709..ab11cff022fd2 100644 --- a/sklearn/ensemble/tests/test_voting.py +++ b/sklearn/ensemble/tests/test_voting.py @@ -34,6 +34,40 @@ X_r, y_r = datasets.load_diabetes(return_X_y=True) +def test_invalid_type_for_flatten_transform(): + # Test that invalid input raises the proper exception + ensemble = VotingClassifier( + estimators=[("lr", LogisticRegression())], flatten_transform="foo" + ) + err_msg = "flatten_transform must be an instance of" + with pytest.raises(TypeError, match=err_msg): + ensemble.fit(X, y) + + +@pytest.mark.parametrize( + "X, y, voter, learner", + [ + (X, y, VotingClassifier, {"estimators": [("lr", LogisticRegression())]}), + (X_r, y_r, VotingRegressor, {"estimators": [("lr", LinearRegression())]}), + ], +) +@pytest.mark.parametrize( + "params, err_type, err_msg", + [ + ({"verbose": -1}, ValueError, "verbose == -1, must be >= 0"), + ({"verbose": "foo"}, TypeError, "verbose must be an instance of"), + ], +) +def test_voting_estimators_param_validation( + X, y, voter, learner, params, err_type, err_msg +): + # Test that invalid input raises the proper exception + params.update(learner) + ensemble = voter(**params) + with pytest.raises(err_type, match=err_msg): + ensemble.fit(X, y) + + @pytest.mark.parametrize( "params, err_msg", [