8000 support for scorers that return nan; bugfix and additional unittest t… · scikit-learn/scikit-learn@712a4ba · GitHub
[go: up one dir, main page]

Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 712a4ba

Browse files
committed
support for scorers that return nan; bugfix and additional unittest to select features in a specified range
1 parent ef4d566 commit 712a4ba

File tree

2 files changed

+29
-6
lines changed

2 files changed

+29
-6
lines changed

sklearn/feature_selection/sequential_feature_selector.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def fit(self, X, y):
183183

184184
if self.n_features_to_select[0] > self.n_features_to_select[1]:
185185
raise ValueError('The min n_features_to_select value must be'
186-
' larger than the max'
186+
' smaller than the max'
187187
' n_features_to_select value.')
188188

189189
if isinstance(self.n_features_to_select, tuple):
@@ -211,7 +211,7 @@ def fit(self, X, y):
211211
self.subsets_[k] = {
212212
'feature_subset_idx': k_idx,
213213
'cv_scores': k_score,
214-
'avg_score': k_score.mean()
214+
'avg_score': np.nanmean(k_score)
215215
}
216216

217217
best_subset = None
@@ -245,6 +245,8 @@ def fit(self, X, y):
245245
if select_in_range:
246246
max_score = float('-inf')
247247
for k in self.subsets_:
248+
if k < self.n_features_to_select[0] or k > self.n_features_to_select[1]:
249+
continue
248250
if self.subsets_[k]['avg_score'] > max_score:
249251
max_score = self.subsets_[k]['avg_score']
250252
best_subset = k
@@ -279,7 +281,7 @@ def _inclusion(self, orig_set, subset, X, y):
279281
for feature in remaining:
280282
new_subset = tuple(subset | {feature})
281283
cv_scores = self._calc_score(X, y, new_subset)
282-
all_avg_scores.append(cv_scores.mean())
284+
all_avg_scores.append(np.nanmean(cv_scores))
283285
all_cv_scores.append(cv_scores)
284286
all_subsets.append(new_subset)
285287
best = np.argmax(all_avg_scores)
@@ -299,7 +301,7 @@ def _exclusion(self, feature_set, X, y, fixed_feature=None):
299301
if fixed_feature and fixed_feature not in set(p):
300302
continue
301303
cv_scores = self._calc_score(X, y, p)
302-
all_avg_scores.append(cv_scores.mean())
304+
all_avg_scores.append(np.nanmean(cv_scores))
303305
all_cv_scores.append(cv_scores)
304306
all_subsets.append(p)
305307
best = np.argmax(all_avg_scores)

sklearn/feature_selection/tests/test_sequential_feature_selector.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def test_kfeatures_type_5():
9393
y = iris.target
9494
knn = KNeighborsClassifier()
9595
expect = ('he min n_features_to_select value must be'
96-
' larger than the max n_features_to_select value.')
96+
' smaller than the max n_features_to_select value.')
9797
sfs = SFS(estimator=knn,
9898
n_features_to_select=(3, 1))
9999
assert_raise_message(ValueError, expect, sfs.fit, X, y)
@@ -221,7 +221,10 @@ def test_regression():
221221
assert round(sfs_r.score_, 4) == 0.2001
222222

223223

224-
def test_regression_in_tuplerange():
224+
def test_regression_in_tuplerange_forward():
225+
"""Test if selected features are within
226+
the selected tuple range when running forward selection
227+
"""
225228
boston = load_boston()
226229
X, y = boston.data, boston.target
227230
lr = LinearRegression()
@@ -234,6 +237,24 @@ def test_regression_in_tuplerange():
234237
assert round(sfs_r.score_, 4) == 0.2991, sfs_r.score_
235238

236239

240+
def test_regression_in_tuplerange_backward():
241+
"""Test if selected features are within
242+
the selected tuple range when running backward selection
243+
"""
244+
boston = load_boston()
245+
X, y = boston.data, boston.target
246+
lr = LinearRegression()
247+
248+
sfs_r = SFS(lr,
249+
n_features_to_select=(1, 5),
250+
forward=False,
251+
scoring='neg_mean_squared_error',
252+
cv=10)
253+
254+
sfs_r = sfs_r.fit(X, y)
255+
assert len(sfs_r.feature_subset_idx_) == 5
256+
257+
237258
def test_transform_not_fitted():
238259
iris = load_iris()
239260
X = iris.data

0 commit comments

Comments
 (0)
0