8000 Fix sklearn.feature_selection.SequentialFeatureSelector Select featur… · murata-yu/scikit-learn@e501ae8 · GitHub
[go: up one dir, main page]

Skip to content

Commit e501ae8

Browse files
committed
Fix sklearn.feature_selection.SequentialFeatureSelector Select features as long as score gets better. scikit-learn#20137
Add `censored_rate` parameter in SequentialFeatureSelector. If censored_rate is NOT None, Fitting is aborted if new_score is lower than old_score*(1+censoring_rate).
1 parent c1cc67d commit e501ae8

File tree

1 file changed

+32
-9
lines changed

1 file changed

+32
-9
lines changed

sklearn/feature_selection/_sequential.py

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin,
3636
to select. If float between 0 and 1, it is the fraction of features to
3737
select.
3838
39+
aborted_rate : float, default=None
40+
The rate of increase score until the fitting is aborted. If `None`,
41+
fitting is NOT aborted.
42+
3943
direction : {'forward', 'backward'}, default='forward'
4044
Whether to perform forward selection or backward selection.
4145
@@ -106,11 +110,12 @@ class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin,
106110
>>> sfs.transform(X).shape
107111
(150, 3)
108112
"""
109-
def __init__(self, estimator, *, n_features_to_select=None,
113+
def __init__(self, estimator, *, n_features_to_select=None, censored_rate=None,
110114
direction='forward', scoring=None, cv=5, n_jobs=None):
111115

112116
self.estimator = estimator
113117
self.n_features_to_select = n_features_to_select
118+
self.censored_rate = censored_rate
114119
self.direction = direction
115120
self.scoring = scoring
116121
self.cv = cv
@@ -175,18 +180,34 @@ def fit(self, X, y):
175180
self.n_features_to_select_ if self.direction == 'forward'
176181
else n_features - self.n_features_to_select_
177182
)
178-
for _ in range(n_iterations):
179-
new_feature_idx = self._get_best_new_feature(cloned_estimator, X,
180-
y, current_mask)
181-
current_mask[new_feature_idx] = True
182183

183-
if self.direction == 'backward':
184-
current_mask = ~current_mask
184+
if self.censored_rate is None:
185+
for _ in range(n_iterations):
186+
new_feature_idx, new_score = self._get_best_new_feature_score(cloned_estimator, X,
187+
y, current_mask)
188+
current_mask[new_feature_idx] = True
189+
190+
if self.direction == 'backward':
191+
current_mask = ~current_mask
192+
else:
193+
old_score = 0
194+
for _ in range(n_iterations):
195+
new_feature_idx, new_score = self._get_best_new_feature_score(cloned_estimator, X,
196+
y, current_mask)
197+
if new_score < old_score*(1+self.censored_rate):
198+
break
199+
200+
old_score = new_score
201+
current_mask[new_f BF44 eature_idx] = True
202+
203+
if self.direction == 'backward':
204+
current_mask = ~current_mask
205+
185206
self.support_ = current_mask
186207

187208
return self
188209

189-
def _get_best_new_feature(self, estimator, X, y, current_mask):
210+
def _get_best_new_feature_score(self, estimator, X, y, current_mask):
190211
# Return the best new feature to add to the current_mask, i.e. return
191212
# the best new feature to add (resp. remove) when doing forward
192213
# selection (resp. backward selection)
@@ -201,7 +222,9 @@ def _get_best_new_feature(self, estimator, X, y, current_mask):
201222
scores[feature_idx] = cross_val_score(
202223
estimator, X_new, y, cv=self.cv, scoring=self.scoring,
203224
n_jobs=self.n_jobs).mean()
204-
return max(scores, key=lambda feature_idx: scores[feature_idx])
225+
226+
new_feature_idx = max(scores, key=lambda feature_idx: scores[feature_idx])
227+
return new_feature_idx, scores[new_feature_idx]
205228

206229
def _get_support_mask(self):
207230
check_is_fitted(self)

0 commit comments

Comments
 (0)
0