8000 FIX Allow negative tol in SequentialFeatureSelector (#25664) · jeremiedbb/scikit-learn@7b59556 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7b59556

Browse files
authored
FIX Allow negative tol in SequentialFeatureSelector (scikit-learn#25664)
1 parent 4b5cf19 commit 7b59556

File tree

3 files changed

+52
-1
lines changed

3 files changed

+52
-1
lines changed

doc/whats_new/v1.2.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,13 @@ Changelog
5757
and :class:`ensemble.BaggingRegressor`.
5858
:pr:`25477` by :user:`Tim Head <betatim>`.
5959

60+
:mod:`sklearn.feature_selection`
61+
................................
62+
63+
- |Fix| Fixed a regression where a negative `tol` would not be accepted any more by
64+
:class:`feature_selection.SequentialFeatureSelector`.
65+
:pr:`25664` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
66+
6067
:mod:`sklearn.isotonic`
6168
.......................
6269

sklearn/feature_selection/_sequential.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,11 @@ class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator
5757
tol : float, default=None
5858
If the score is not incremented by at least `tol` between two
5959
consecutive feature additions or removals, stop adding or removing.
60+
61+
`tol` can be negative when removing features using `direction="backward"`.
62+
It can be useful to reduce the number of features at the cost of a small
63+
decrease in the score.
64+
6065
`tol` is enabled only when `n_features_to_select` is `"auto"`.
6166
6267
.. versionadded:: 1.1
@@ -153,7 +158,7 @@ class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator
153158
Interval(Integral, 0, None, closed="neither"),
154159
Hidden(None),
155160
],
156-
"tol": [None, Interval(Real, 0, None, closed="neither")],
161+
"tol": [None, Interval(Real, None, None, closed="neither")],
157162
"direction": [StrOptions({"forward", "backward"})],
158163
"scoring": [None, StrOptions(set(get_scorer_names())), callable],
159164
"cv": ["cv_object"],
@@ -250,6 +255,9 @@ def fit(self, X, y=None):
250255
elif isinstance(self.n_features_to_select, Real):
251256
self.n_features_to_select_ = int(n_features * self.n_features_to_select)
252257

258+
if self.tol is not None and self.tol < 0 and self.direction == "forward":
259+
raise ValueError("tol must be positive when doing forward selection")
260+
253261
cloned_estimator = clone(self.estimator)
254262

255263
# the current mask corresponds to the set of features:

sklearn/feature_selection/tests/test_sequential.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,3 +278,39 @@ def test_no_y_validation_model_fit(y):
278278

279279
with pytest.raises((TypeError, ValueError)):
280280
sfs.fit(X, y)
281+
282+
283+
def test_forward_neg_tol_error():
284+
"""Check that we raise an error when tol<0 and direction='forward'"""
285+
X, y = make_regression(n_features=10, random_state=0)
286+
sfs = SequentialFeatureSelector(
287+
LinearRegression(),
288+
n_features_to_select="auto",
289+
direction="forward",
290+
tol=-1e-3,
291+
)
292+
293+
with pytest.raises(ValueError, match="tol must be positive"):
294+
sfs.fit(X, y)
295+
296+
297+
def test_backward_neg_tol():
298+
"""Check that SequentialFeatureSelector works negative tol
299+
300+
non-regression test for #25525
301+
"""
302+
X, y = make_regression(n_features=10, random_state=0)
303+
lr = LinearRegression()
304+
initial_score = lr.fit(X, y).score(X, y)
305+
306+
sfs = SequentialFeatureSelector(
307+
lr,
308+
n_features_to_select="auto",
309+
direction="backward",
310+
tol=-1e-3,
311+
)
312+
Xr = sfs.fit_transform(X, y)
313+
new_score = lr.fit(Xr, y).score(Xr, y)
314+
315+
assert 0 < sfs.get_support().sum() < X.shape[1]
316+
assert new_score < initial_score

0 commit comments

Comments
 (0)
0