8000 Check feature_names_in_ for sklearn.model_selection (#20850) · scikit-learn/scikit-learn@2a6426f · GitHub
[go: up one dir, main page]

Skip to content

Commit 2a6426f

Browse files
ogriselthomasjpfan
andauthored
Check feature_names_in_ for sklearn.model_selection (#20850)
Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
1 parent 1398738 commit 2a6426f

File tree

3 files changed

+56
-10
lines changed

3 files changed

+56
-10
lines changed

sklearn/model_selection/_search.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,9 @@ def evaluate_candidates(candidate_params, cv=None, more_results=None):
858858
refit_end_time = time.time()
859859
self.refit_time_ = refit_end_time - refit_start_time
860860

861+
if hasattr(self.best_estimator_, "feature_names_in_"):
862+
self.feature_names_in_ = self.best_estimator_.feature_names_in_
863+
861864
# Store the only scorer not as a dict for single metric evaluation
862865
self.scorer_ = scorers
863866

@@ -1246,11 +1249,21 @@ class GridSearchCV(BaseSearchCV):
12461249
the underlying estimator is a classifier.
12471250
12481251
n_features_in_ : int
1249-
Number of features seen during :term:`fit`. Only defined if the
1250-
underlying estimator exposes such an attribute when fit.
1252+
Number of features seen during :term:`fit`. Only defined if
1253+
`best_estimator_` is defined (see the documentation for the `refit`
1254+
parameter for more details) and that `best_estimator_` exposes
1255+
`n_features_in_` when fit.
12511256
12521257
.. versionadded:: 0.24
12531258
1259+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
1260+
Names of features seen during :term:`fit`. Only defined if
1261+
`best_estimator_` is defined (see the documentation for the `refit`
1262+
parameter for more details) and that `best_estimator_` exposes
1263+
`feature_names_in_` when fit.
1264+
1265+
.. versionadded:: 1.0
1266+
12541267
Notes
12551268
-----
12561269
The parameters selected are those that maximize the score of the left out
@@ -1595,11 +1608,21 @@ class RandomizedSearchCV(BaseSearchCV):
15951608
the underlying estimator is a classifier.
15961609
15971610
n_features_in_ : int
1598-
Number of features seen during :term:`fit`. Only defined if the
1599-
underlying estimator exposes such an attribute when fit.
1611+
Number of features seen during :term:`fit`. Only defined if
1612+
`best_estimator_` is defined (see the documentation for the `refit`
1613+
parameter for more details) and that `best_estimator_` exposes
1614+
`n_features_in_` when fit.
16001615
16011616
.. versionadded:: 0.24
16021617
1618+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
1619+
Names of features seen during :term:`fit`. Only defined if
1620+
`best_estimator_` is defined (see the documentation for the `refit`
1621+
parameter for more details) and that `best_estimator_` exposes
1622+
`feature_names_in_` when fit.
1623+
1624+
.. versionadded:: 1.0
1625+
16031626
Notes
16041627
-----
16051628
The parameters selected are those that maximize the score of the held-out

sklearn/model_selection/_search_successive_halving.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -626,11 +626,21 @@ class HalvingGridSearchCV(BaseSuccessiveHalving):
626626
the underlying estimator is a classifier.
627627
628628
n_features_in_ : int
629-
Number of features seen during :term:`fit`. Only defined if the
630-
underlying estimator exposes such an attribute when fit.
629+
Number of features seen during :term:`fit`. Only defined if
630+
`best_estimator_` is defined (see the documentation for the `refit`
631+
parameter for more details) and that `best_estimator_` exposes
632+
`n_features_in_` when fit.
631633
632634
.. versionadded:: 0.24
633635
636+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
637+
Names of features seen during :term:`fit`. Only defined if
638+
`best_estimator_` is defined (see the documentation for the `refit`
639+
parameter for more details) and that `best_estimator_` exposes
640+
`feature_names_in_` when fit.
641+
642+
.. versionadded:: 1.0
643+
634644
See Also
635645
--------
636646
:class:`HalvingRandomSearchCV`:
@@ -954,11 +964,21 @@ class HalvingRandomSearchCV(BaseSuccessiveHalving):
954964
the underlying estimator is a classifier.
955965
956966
n_features_in_ : int
957-
Number of features seen during :term:`fit`. Only defined if the
958-
underlying estimator exposes such an attribute when fit.
967+
Number of features seen during :term:`fit`. Only defined if
968+
`best_estimator_` is defined (see the documentation for the `refit`
969+
parameter for more details) and that `best_estimator_` exposes
970+
`n_features_in_` when fit.
959971
960972
.. versionadded:: 0.24
961973
974+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
975+
Names of features seen during :term:`fit`. Only defined if
976+
`best_estimator_` is defined (see the documentation for the `refit`
977+
parameter for more details) and that `best_estimator_` exposes
978+
`feature_names_in_` when fit.
979+
980+
.. versionadded:: 1.0
981+
962982
See Also
963983
--------
964984
:class:`HalvingGridSearchCV`:

sklearn/tests/test_common.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -325,11 +325,14 @@ def test_check_n_features_in_after_fitting(estimator):
325325

326326
COLUMN_NAME_MODULES_TO_IGNORE = {
327327
"compose",
328-
"model_selection",
329328
}
330329

331330
_estimators_to_test = list(
332-
chain(_tested_estimators(), [make_pipeline(LogisticRegression(C=1))])
331+
chain(
332+
_tested_estimators(),
333+
[make_pipeline(LogisticRegression(C=1))],
334+
list(_generate_search_cv_instances()),
335+
)
333336
)
334337

335338

0 commit comments

Comments
 (0)
0