diff --git a/sklearn/base.py b/sklearn/base.py index d9fc4b7092971..bb2e3c67d7bbc 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -359,8 +359,9 @@ def _check_n_features(self, X, reset): The input samples. reset : bool If True, the `n_features_in_` attribute is set to `X.shape[1]`. - Else, the attribute must already exist and the function checks - that it is equal to `X.shape[1]`. + If False and the attribute exists, then check that it is equal to + `X.shape[1]`. If False and the attribute does *not* exist, then + the check is skipped. .. note:: It is recommended to call reset=True in `fit` and in the first call to `partial_fit`. All other methods that validate `X` @@ -370,18 +371,18 @@ def _check_n_features(self, X, reset): if reset: self.n_features_in_ = n_features - else: - if not hasattr(self, 'n_features_in_'): - raise RuntimeError( - "The reset parameter is False but there is no " - "n_features_in_ attribute. Is this estimator fitted?" - ) - if n_features != self.n_features_in_: - raise ValueError( - 'X has {} features, but {} is expecting {} features ' - 'as input.'.format(n_features, self.__class__.__name__, - self.n_features_in_) - ) + return + + if not hasattr(self, "n_features_in_"): + # Skip this check if the expected number of expected input features + # was not recorded by calling fit first. This is typically the case + # for stateless transformers. + return + + if n_features != self.n_features_in_: + raise ValueError( + f"X has {n_features} features, but {self.__class__.__name__} " + f"is expecting {self.n_features_in_} features as input.") def _validate_data(self, X, y='no_validation', reset=True, validate_separately=False, **check_params):