From 3a07e33743cde1f56bc1dd124eaf0b2bd5f9f47d Mon Sep 17 00:00:00 2001 From: Stefanie Molin <24376333+stefmolin@users.noreply.github.com> Date: Sat, 20 Aug 2022 13:16:06 -0400 Subject: [PATCH 1/2] MAINT Add parameter validation to PolynomialFeatures. --- sklearn/preprocessing/_polynomial.py | 29 +++++++++++-------- .../preprocessing/tests/test_polynomial.py | 6 +--- sklearn/tests/test_common.py | 1 - 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py index d90411a0c8bfa..d7f2a3a3bf653 100644 --- a/sklearn/preprocessing/_polynomial.py +++ b/sklearn/preprocessing/_polynomial.py @@ -2,7 +2,7 @@ This file contains preprocessing tools based on polynomials. """ import collections -import numbers +from numbers import Integral from itertools import chain, combinations from itertools import combinations_with_replacement as combinations_w_r @@ -16,6 +16,7 @@ from ..utils.deprecation import deprecated from ..utils.validation import check_is_fitted, FLOAT_DTYPES, _check_sample_weight from ..utils.validation import _check_feature_names_in +from ..utils._param_validation import Interval, StrOptions from ..utils.stats import _weighted_percentile from ._csr_polynomial_expansion import _csr_polynomial_expansion @@ -129,6 +130,13 @@ class PolynomialFeatures(TransformerMixin, BaseEstimator): [ 1., 4., 5., 20.]]) """ + _parameter_constraints = { + "degree": [Interval(Integral, 0, None, closed="left"), "array-like"], + "interaction_only": ["boolean"], + "include_bias": ["boolean"], + "order": [StrOptions({"C", "F"})], + } + def __init__( self, degree=2, *, interaction_only=False, include_bias=True, order="C" ): @@ -284,14 +292,11 @@ def fit(self, X, y=None): self : object Fitted transformer. """ + self._validate_params() _, n_features = self._validate_data(X, accept_sparse=True).shape - if isinstance(self.degree, numbers.Integral): - if self.degree < 0: - raise ValueError( - f"degree must be a non-negative integer, got {self.degree}." - ) - elif self.degree == 0 and not self.include_bias: + if isinstance(self.degree, Integral): + if self.degree == 0 and not self.include_bias: raise ValueError( "Setting degree to zero and include_bias to False would result in" " an empty output array." @@ -304,8 +309,8 @@ def fit(self, X, y=None): ): self._min_degree, self._max_degree = self.degree if not ( - isinstance(self._min_degree, numbers.Integral) - and isinstance(self._max_degree, numbers.Integral) + isinstance(self._min_degree, Integral) + and isinstance(self._max_degree, Integral) and self._min_degree >= 0 and self._min_degree <= self._max_degree ): @@ -317,7 +322,7 @@ def fit(self, X, y=None): ) elif self._max_degree == 0 and not self.include_bias: raise ValueError( - "Setting both min_deree and max_degree to zero and include_bias to" + "Setting both min_degree and max_degree to zero and include_bias to" " False would result in an empty output array." ) else: @@ -779,7 +784,7 @@ def fit(self, X, y=None, sample_weight=None): _, n_features = X.shape - if not (isinstance(self.degree, numbers.Integral) and self.degree >= 0): + if not (isinstance(self.degree, Integral) and self.degree >= 0): raise ValueError( f"degree must be a non-negative integer, got {self.degree}." ) @@ -788,7 +793,7 @@ def fit(self, X, y=None, sample_weight=None): "uniform", "quantile", ]: - if not (isinstance(self.n_knots, numbers.Integral) and self.n_knots >= 2): + if not (isinstance(self.n_knots, Integral) and self.n_knots >= 2): raise ValueError( f"n_knots must be a positive integer >= 2, got: {self.n_knots}" ) diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py index 0ab8f0f335f43..792e49a9b169d 100644 --- a/sklearn/preprocessing/tests/test_polynomial.py +++ b/sklearn/preprocessing/tests/test_polynomial.py @@ -465,10 +465,6 @@ def test_spline_transformer_n_features_out(n_knots, include_bias, degree): @pytest.mark.parametrize( "params, err_msg", [ - ({"degree": -1}, "degree must be a non-negative integer"), - ({"degree": 2.5}, "degree must be a non-negative int or tuple"), - ({"degree": "12"}, r"degree=\(min_degree, max_degree\) must"), - ({"degree": "string"}, "degree must be a non-negative int or tuple"), ({"degree": (-1, 2)}, r"degree=\(min_degree, max_degree\) must"), ({"degree": (0, 1.5)}, r"degree=\(min_degree, max_degree\) must"), ({"degree": (3, 2)}, r"degree=\(min_degree, max_degree\) must"), @@ -915,7 +911,7 @@ def test_polynomial_features_behaviour_on_zero_degree(): poly = PolynomialFeatures(degree=(0, 0), include_bias=False) err_msg = ( - "Setting both min_deree and max_degree to zero and include_bias to" + "Setting both min_degree and max_degree to zero and include_bias to" " False would result in an empty output array." ) with pytest.raises(ValueError, match=err_msg): diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 9d7c53113bcf6..b74d9b7791372 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -495,7 +495,6 @@ def test_estimators_do_not_raise_errors_in_init_or_set_params(Estimator): "OneVsRestClassifier", "PatchExtractor", "PolynomialCountSketch", - "PolynomialFeatures", "QuadraticDiscriminantAnalysis", "RANSACRegressor", "RBFSampler", From 00a9273d0375e4412257c25ba93030ca78a774fb Mon Sep 17 00:00:00 2001 From: Stefanie Molin <24376333+stefmolin@users.noreply.github.com> Date: Mon, 22 Aug 2022 12:51:33 -0400 Subject: [PATCH 2/2] Add additional test case per comment. --- sklearn/preprocessing/tests/test_polynomial.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py index 792e49a9b169d..b1d3ccd847855 100644 --- a/sklearn/preprocessing/tests/test_polynomial.py +++ b/sklearn/preprocessing/tests/test_polynomial.py @@ -468,6 +468,7 @@ def test_spline_transformer_n_features_out(n_knots, include_bias, degree): ({"degree": (-1, 2)}, r"degree=\(min_degree, max_degree\) must"), ({"degree": (0, 1.5)}, r"degree=\(min_degree, max_degree\) must"), ({"degree": (3, 2)}, r"degree=\(min_degree, max_degree\) must"), + ({"degree": (1, 2, 3)}, r"int or tuple \(min_degree, max_degree\)"), ], ) def test_polynomial_features_input_validation(params, err_msg):