8000 Fix sklearn.feature_selection.SequentialFeatureSelector Select featur… · murata-yu/scikit-learn@16f9dc2 · GitHub
[go: up one dir, main page]

Skip to content

Commit 16f9dc2

Browse files
committed
Fix sklearn.feature_selection.SequentialFeatureSelector Select features as long as score gets better. scikit-learn#20137
Add `censored_rate` parameter in SequentialFeatureSelector. If censored_rate is NOT None, Fitting is aborted if new_score is lower than old_score*(1+censoring_rate).
1 parent c1cc67d commit 16f9dc2

File tree

1 file changed

+28
-9
lines changed

1 file changed

+28
-9
lines changed

sklearn/feature_selection/_sequential.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,12 @@ class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin,
106106
>>> sfs.transform(X).shape
107107
(150, 3)
108108
"""
109-
def __init__(self, estimator, *, n_features_to_select=None,
109+
def __init__(self, estimator, *, n_features_to_select=None, censored_rate=None,
110110
direction='forward', scoring=None, cv=5, n_jobs=None):
111111

112112
self.estimator = estimator
113113
self.n_features_to_select = n_features_to_select
114+
self.censored_rate = censored_rate
114115
self.direction = direction
115116
self.scoring = scoring
116117
self.cv = cv
@@ -175,18 +176,34 @@ def fit(self, X, y):
175176
self.n_features_to_select_ if self.direction == 'forward'
176177
else n_features - self.n_features_to_select_
177178
)
178-
for _ in range(n_iterations):
179-
new_feature_idx = self._get_best_new_feature(cloned_estimator, X,
180-
y, current_mask)
181-
current_mask[new_feature_idx] = True
182179

183-
if self.direction == 'backward':
184-
current_mask = ~current_mask
180+
if self.censored_rate is None:
181+
for _ in range(n_iterations):
182+
new_feature_idx, new_score = self._get_best_new_feature_score(cloned_estimator, X,
183+
y, current_mask)
184+
current_mask[new_feature_idx] = True
185+
186+
if self.direction == 'backward':
187+
current_mask = ~current_mask
188+
else:
189+
old_score = 0
190+
for _ in range(n_iterations):
191+
new_feature_idx, new_score = self._get_best_new_feature_score(cloned_estimator, X,
192+
y, current_mask)
193+
if new_score < old_score*(1+self.censored_rate):
194+
break
195+
196+
old_score = new_score
197+
current_mask[new_feature_idx] = True
198+
199+
if self.direction == 'backward':
200+
current_mask = ~current_mask
201+
185202
self.support_ = current_mask
186203

187204
return self
188205

189-
def _get_best_new_feature(self, estimator, X, y, current_mask):
206+
def _get_best_new_feature_score(self, estimator, X, y, current_mask):
190207
# Return the best new feature to add to the current_mask, i.e. return
191208
# the best new feature to add (resp. remove) when doing forward
192209
# selection (resp. backward selection)
@@ -201,7 +218,9 @@ def _get_best_new_feature(self, estimator, X, y, current_mask):
201218
scores[feature_idx] = cross_val_score(
202219
estimator, X_new, y, cv=self.cv, scoring=self.scoring,
203220
n_jobs=self.n_jobs).mean()
204-
return max(scores, key=lambda feature_idx: scores[feature_idx])
221+
222+
new_feature_idx = max(scores, key=lambda feature_idx: scores[feature_idx])
223+
return new_feature_idx, scores[new_feature_idx]
205224

206225
def _get_support_mask(self):
207226
check_is_fitted(self)

0 commit comments

Comments
 (0)
0