CLN Only check for n_features_in_ only when it exists (#18011)

thomasjpfan · ogrisel · web-flow · commit e8ffa3179aff · 2020-10-13T11:07:26.000+02:00
* CLN Checks n_features_in only if it exists

* Update sklearn/base.py

Co-authored-by: Olivier Grisel &lt;olivier.grisel@gmail.com&gt;

* DOC Update docstring

* DOC Grammer

* Grammar [ci skip]

Co-authored-by: Olivier Grisel &lt;olivier.grisel@gmail.com&gt;
diff --git a/sklearn/base.py b/sklearn/base.py
@@ -359,8 +359,9 @@ def _check_n_features(self, X, reset):
             The input samples.
         reset : bool
             If True, the `n_features_in_` attribute is set to `X.shape[1]`.
-            Else, the attribute must already exist and the function checks
-            that it is equal to `X.shape[1]`.
+            If False and the attribute exists, then check that it is equal to
+            `X.shape[1]`. If False and the attribute does *not* exist, then
+            the check is skipped.
             .. note::
                It is recommended to call reset=True in `fit` and in the first
                call to `partial_fit`. All other methods that validate `X`
@@ -370,18 +371,18 @@ def _check_n_features(self, X, reset):
 
         if reset:
             self.n_features_in_ = n_features
-        else:
-            if not hasattr(self, 'n_features_in_'):
-                raise RuntimeError(
-                    "The reset parameter is False but there is no "
-                    "n_features_in_ attribute. Is this estimator fitted?"
-                )
-            if n_features != self.n_features_in_:
-                raise ValueError(
-                    'X has {} features, but {} is expecting {} features '
-                    'as input.'.format(n_features, self.__class__.__name__,
-                                       self.n_features_in_)
-                )
+            return
+
+        if not hasattr(self, "n_features_in_"):
+            # Skip this check if the expected number of expected input features
+            # was not recorded by calling fit first. This is typically the case
+            # for stateless transformers.
+            return
+
+        if n_features != self.n_features_in_:
+            raise ValueError(
+                f"X has {n_features} features, but {self.__class__.__name__} "
+                f"is expecting {self.n_features_in_} features as input.")
 
     def _validate_data(self, X, y='no_validation', reset=True,
                        validate_separately=False, **check_params):