From 479f104fd9cb3f828a424f8d6fb8260d9beabf46 Mon Sep 17 00:00:00 2001 From: Gil Rutter Date: Fri, 2 Sep 2016 11:01:53 +0100 Subject: [PATCH 1/5] Let RFE pass fit_params to estimator --- sklearn/feature_selection/rfe.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py index bbe0cda392290..66985bf9e253c 100644 --- a/sklearn/feature_selection/rfe.py +++ b/sklearn/feature_selection/rfe.py @@ -120,7 +120,7 @@ def __init__(self, estimator, n_features_to_select=None, step=1, def _estimator_type(self): return self.estimator._estimator_type - def fit(self, X, y): + def fit(self, X, y, **fit_params): """Fit the RFE model and then the underlying estimator on the selected features. @@ -131,10 +131,13 @@ def fit(self, X, y): y : array-like, shape = [n_samples] The target values. + + **fit_params : kwargs + Additional parameter passed to the fit function of the estimator. """ - return self._fit(X, y) + return self._fit(X, y, **fit_params) - def _fit(self, X, y, step_score=None): + def _fit(self, X, y, step_score=None, **fit_params): X, y = check_X_y(X, y, "csc") # Initialization n_features = X.shape[1] @@ -166,7 +169,7 @@ def _fit(self, X, y, step_score=None): if self.verbose > 0: print("Fitting estimator with %d features." % np.sum(support_)) - estimator.fit(X[:, features], y) + estimator.fit(X[:, features], y, **fit_params) # Get coefs if hasattr(estimator, 'coef_'): From 9fb10ffe0b1a1824b555c6ee6de1bd763bf677ea Mon Sep 17 00:00:00 2001 From: Gil Rutter Date: Fri, 2 Sep 2016 16:05:28 +0100 Subject: [PATCH 2/5] Test for rfe sample weights --- sklearn/feature_selection/tests/test_rfe.py | 30 +++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 1efc0279a9dc7..03e32f6eeeac5 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -75,6 +75,36 @@ def test_rfe_features_importance(): assert_array_equal(rfe.get_support(), rfe_svc.get_support()) +def test_rfe_sample_weights(): + iris = load_iris() + X = iris.data + y = iris.target + + clf = SVC(kernel="linear") + rfe = RFE(estimator=clf, n_features_to_select=1) + + sample_weight_test = 2 + + # Case 1 - double the weight of the class's features + w = np.ones(y.shape[0]) + w[y == 2] = sample_weight_test + + rfe.fit(X, y, sample_weight=w) + ranking_1 = rfe.ranking_.copy() + + # Case 2 - duplicate the features of one class + extra_X = np.tile(X[y == 2], (sample_weight_test - 1, 1)) + X2 = np.concatenate((X, extra_X), axis=0) + + n_extra = (y == 2).sum() * (sample_weight_test - 1) + extra_Y = np.full(n_extra, 2, dtype=int) + y2 = np.concatenate((y, extra_Y), axis=0) + + rfe.fit(X2, y2) + ranking_2 = rfe.ranking_.copy() + + assert_array_equal(ranking_1, ranking_2) + def test_rfe(): generator = check_random_state(0) iris = load_iris() From 748336473272909f55d0dd39a4dc65dd57fd42d0 Mon Sep 17 00:00:00 2001 From: Gil Rutter Date: Fri, 2 Sep 2016 16:48:53 +0100 Subject: [PATCH 3/5] Switch from np.full to np.ones for compatibility. Make the class being tested a variable, . --- sklearn/feature_selection/tests/test_rfe.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 03e32f6eeeac5..3cd1d80d38a5a 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -84,20 +84,21 @@ def test_rfe_sample_weights(): rfe = RFE(estimator=clf, n_features_to_select=1) sample_weight_test = 2 + class_test = 2 # Case 1 - double the weight of the class's features w = np.ones(y.shape[0]) - w[y == 2] = sample_weight_test + w[y == class_test] = sample_weight_test rfe.fit(X, y, sample_weight=w) ranking_1 = rfe.ranking_.copy() # Case 2 - duplicate the features of one class - extra_X = np.tile(X[y == 2], (sample_weight_test - 1, 1)) + extra_X = np.tile(X[y == class_test], (sample_weight_test - 1, 1)) X2 = np.concatenate((X, extra_X), axis=0) - n_extra = (y == 2).sum() * (sample_weight_test - 1) - extra_Y = np.full(n_extra, 2, dtype=int) + n_extra = (y == class_test).sum() * (sample_weight_test - 1) + extra_Y = np.ones(n_extra, dtype=int) * class_test y2 = np.concatenate((y, extra_Y), axis=0) rfe.fit(X2, y2) From 5870b7a6b84a1ad23369da1e6dfd69b26d51faa1 Mon Sep 17 00:00:00 2001 From: Gil Rutter Date: Fri, 16 Sep 2016 12:02:40 +0100 Subject: [PATCH 4/5] Improve sample weights testing - Test that the weighted feature ranking is different from the original feature ranking - Clearer comments and variable names --- sklearn/feature_selection/tests/test_rfe.py | 26 ++++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 3cd1d80d38a5a..ce65e1a974586 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -2,7 +2,8 @@ Testing Recursive feature elimination """ import numpy as np -from numpy.testing import assert_array_almost_equal, assert_array_equal +from numpy.testing import (assert_array_almost_equal, assert_array_equal, + assert_raises) from nose.tools import assert_equal, assert_true from scipy import sparse @@ -86,25 +87,32 @@ def test_rfe_sample_weights(): sample_weight_test = 2 class_test = 2 - # Case 1 - double the weight of the class's features + # Case 1 - original dataset + rfe.fit(X, y) + ranking_original = rfe.ranking_.copy() + + # Case 2 - double the weight of one class's samples w = np.ones(y.shape[0]) w[y == class_test] = sample_weight_test rfe.fit(X, y, sample_weight=w) - ranking_1 = rfe.ranking_.copy() + ranking_weights = rfe.ranking_.copy() - # Case 2 - duplicate the features of one class + # Case 3 - duplicate the samples of one class extra_X = np.tile(X[y == class_test], (sample_weight_test - 1, 1)) - X2 = np.concatenate((X, extra_X), axis=0) + X_duplicate = np.concatenate((X, extra_X), axis=0) n_extra = (y == class_test).sum() * (sample_weight_test - 1) extra_Y = np.ones(n_extra, dtype=int) * class_test - y2 = np.concatenate((y, extra_Y), axis=0) + y_duplicate = np.concatenate((y, extra_Y), axis=0) + + rfe.fit(X_duplicate, y_duplicate) + ranking_duplicate = rfe.ranking_.copy() - rfe.fit(X2, y2) - ranking_2 = rfe.ranking_.copy() + with assert_raises(AssertionError): + assert_array_equal(ranking_original, ranking_weights) + assert_array_equal(ranking_weights, ranking_duplicate) - assert_array_equal(ranking_1, ranking_2) def test_rfe(): generator = check_random_state(0) From a4b38504069fa5c8045ebd217f819a60c9e52b04 Mon Sep 17 00:00:00 2001 From: Gil Rutter Date: Fri, 16 Sep 2016 12:51:38 +0100 Subject: [PATCH 5/5] Fix nosetests for old versions of numpy by not using as a context manager. --- sklearn/feature_selection/tests/test_rfe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index ce65e1a974586..afa0c9a552cda 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -109,8 +109,8 @@ def test_rfe_sample_weights(): rfe.fit(X_duplicate, y_duplicate) ranking_duplicate = rfe.ranking_.copy() - with assert_raises(AssertionError): - assert_array_equal(ranking_original, ranking_weights) + assert_raises(AssertionError, assert_array_equal, ranking_original, + ranking_weights) assert_array_equal(ranking_weights, ranking_duplicate)