8000 FEA Add metadata routing for SequentialFeatureSelector by OmarManzoor · Pull Request #29260 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

FEA Add metadata routing for SequentialFeatureSelector #29260

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 31, 2024
2 changes: 1 addition & 1 deletion doc/metadata_routing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ Meta-estimators and functions supporting metadata routing:
- :class:`sklearn.ensemble.BaggingClassifier`
- :class:`sklearn.ensemble.BaggingRegressor`
- :class:`sklearn.feature_selection.SelectFromModel`
- :class:`sklearn.feature_selection.SequentialFeatureSelector`
- :class:`sklearn.impute.IterativeImputer`
- :class:`sklearn.linear_model.ElasticNetCV`
- :class:`sklearn.linear_model.LarsCV`
Expand Down Expand Up @@ -324,4 +325,3 @@ Meta-estimators and tools not supporting metadata routing yet:
- :class:`sklearn.ensemble.AdaBoostRegressor`
- :class:`sklearn.feature_selection.RFE`
- :class:`sklearn.feature_selection.RFECV`
- :class:`sklearn.feature_selection.SequentialFeatureSelector`
5 changes: 5 additions & 0 deletions doc/whats_new/v1.6.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ more details.
params to the underlying regressor.
:pr:`29136` by :user:`Omar Salman <OmarManzoor>`.

- |Feature| :class:`feature_selection.SequentialFeatureSelector` now supports
metadata routing in its `fit` method and passes the corresponding params to
the :func:`model_selection.cross_val_score` function.
:pr:`29260` by :user:`Omar Salman <OmarManzoor>`.

- |Feature| :func:`model_selection.validation_curve` now supports metadata routing for
the `fit` method of its estimator and for its underlying CV splitter and scorer.
:pr:`29329` by :user:`Stefanie Senger <StefanieSenger>`.
Expand Down
69 changes: 61 additions & 8 deletions sklearn/feature_selection/_sequential.py
8000
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,22 @@
import numpy as np

from ..base import BaseEstimator, MetaEstimatorMixin, _fit_context, clone, is_classifier
from ..metrics import get_scorer_names
from ..metrics import check_scoring, get_scorer_names
from ..model_selection import check_cv, cross_val_score
from ..utils._metadata_requests import (
MetadataRouter,
MethodMapping,
_raise_for_params,
_routing_enabled,
process_routing,
)
from ..utils._param_validation import HasMethods, Interval, RealNotInt, StrOptions
from ..utils._tags import _safe_tags
from ..utils.metadata_routing import _RoutingNotSupportedMixin
from ..utils.validation import check_is_fitted
from ._base import SelectorMixin


class SequentialFeatureSelector(
_RoutingNotSupportedMixin, SelectorMixin, MetaEstimatorMixin, BaseEstimator
):
class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator):
"""Transformer that performs Sequential Feature Selection.

This Sequential Feature Selector adds (forward selection) or
Expand Down Expand Up @@ -191,7 +195,7 @@ def __init__(
# SequentialFeatureSelector.estimator is not validated yet
prefer_skip_nested_validation=False
)
def fit(self, X, y=None):
def fit(self, X, y=None, **params):
"""Learn the features to select from X.

Parameters
Expand All @@ -204,11 +208,24 @@ def fit(self, X, y=None):
Target values. This parameter may be ignored for
unsupervised learning.

**params : dict, default=None
Parameters to be passed to the underlying `estimator`, `cv`
and `scorer` objects.

.. versionadded:: 1.6

Only available if `enable_metadata_routing=True`,
which can be set by using
``sklearn.set_config(enable_metadata_routing=True)``.
See :ref:`Metadata Routing User Guide <metadata_routing>` for
more details.

Returns
-------
self : object
Returns the instance itself.
"""
_raise_for_params(params, self, "fit")
tags = self._get_tags()
X = self._validate_data(
X,
Expand Down Expand Up @@ -251,9 +268,15 @@ def fit(self, X, y=None):

old_score = -np.inf
is_auto_select = self.tol is not None and self.n_features_to_select == "auto"

# We only need to verify the routing here and not use the routed params
# because internally the actual routing will also take place inside the
# `cross_val_score` function.
if _routing_enabled():
process_routing(self, "fit", **params)
for _ in range(n_iterations):
new_feature_idx, new_score = self._get_best_new_feature_score(
cloned_estimator, X, y, cv, current_mask
cloned_estimator, X, y, cv, current_mask, **params
)
if is_auto_select and ((new_score - old_score) < self.tol):
break
Expand All @@ -269,7 +292,7 @@ def fit(self, X, y=None):

return self

def _get_best_new_feature_score(self, estimator, X, y, cv, current_mask):
def _get_best_new_feature_score(self, estimator, X, y, cv, current_mask, **params):
# Return the best new feature and its score to add to the current_mask,
# i.e. return the best new feature and its score to add (resp. remove)
# when doing forward selection (resp. backward selection).
Expand All @@ -290,6 +313,7 @@ def _get_best_new_feature_score(self, estimator, X, y, cv, current_mask):
cv=cv,
scoring=self.scoring,
n_jobs=self.n_jobs,
params=params,
).mean()
new_feature_idx = max(scores, key=lambda feature_idx: scores[feature_idx])
return new_feature_idx, scores[new_feature_idx]
Expand All @@ -302,3 +326,32 @@ def _more_tags(self):
return {
"allow_nan": _safe_tags(self.estimator, key="allow_nan"),
}

def get_metadata_routing(self):
"""Get metadata routing of this object.

Please check :ref:`User Guide <metadata_routing>` on how the routing
mechanism works.

.. versionadded:: 1.6

Returns
-------
routing : MetadataRouter
A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
routing information.
"""
router = MetadataRouter(owner=self.__class__.__name__)
router.add(
estimator=self.estimator,
method_mapping=MethodMapping().add(caller="fit", callee="fit"),
)
router.add(
splitter=check_cv(self.cv, classifier=is_classifier(self.estimator)),
method_mapping=MethodMapping().add(caller="fit", callee="split"),
)
router.add(
scorer=check_scoring(self.estimator, scoring=self.scoring),
method_mapping=MethodMapping().add(caller="fit", callee="score"),
)
return router
9 changes: 9 additions & 0 deletions sklearn/feature_selection/tests/test_sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,3 +321,12 @@ def test_cv_generator_support():

sfs = SequentialFeatureSelector(knc, n_features_to_select=5, cv=splits)
sfs.fit(X, y)


def test_fit_rejects_params_with_no_routing_enabled():
X, y = make_classification(random_state=42)
est = LinearRegression()
sfs = SequentialFeatureSelector(estimator=est)

with pytest.raises(ValueError, match="is only supported if"):
sfs.fit(X, y, sample_weight=np.ones_like(y))
13 changes: 12 additions & 1 deletion sklearn/tests/test_metaestimators_metadata_routing.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,18 @@ def enable_slep006():
],
"method_mapping": {"fit": ["fit", "score"]},
},
{
"metaestimator": SequentialFeatureSelector,
"estimator_name": "estimator",
"estimator": "classifier",
"X": X,
"y": y,
"estimator_routing_methods": ["fit"],
"scorer_name": "scoring",
"scorer_routing_methods": ["fit"],
"cv_name": "cv",
"cv_routing_methods": ["fit"],
},
]
"""List containing all metaestimators to be tested and their settings

Expand Down Expand Up @@ -450,7 +462,6 @@ def enable_slep006():
AdaBoostRegressor(),
RFE(ConsumingClassifier()),
RFECV(ConsumingClassifier()),
SequentialFeatureSelector(ConsumingClassifier()),
]


Expand Down
Loading
0