diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py index bbe0cda392290..66985bf9e253c 100644 --- a/sklearn/feature_selection/rfe.py +++ b/sklearn/feature_selection/rfe.py @@ -120,7 +120,7 @@ def __init__(self, estimator, n_features_to_select=None, step=1, def _estimator_type(self): return self.estimator._estimator_type - def fit(self, X, y): + def fit(self, X, y, **fit_params): """Fit the RFE model and then the underlying estimator on the selected features. @@ -131,10 +131,13 @@ def fit(self, X, y): y : array-like, shape = [n_samples] The target values. + + **fit_params : kwargs + Additional parameter passed to the fit function of the estimator. """ - return self._fit(X, y) + return self._fit(X, y, **fit_params) - def _fit(self, X, y, step_score=None): + def _fit(self, X, y, step_score=None, **fit_params): X, y = check_X_y(X, y, "csc") # Initialization n_features = X.shape[1] @@ -166,7 +169,7 @@ def _fit(self, X, y, step_score=None): if self.verbose > 0: print("Fitting estimator with %d features." % np.sum(support_)) - estimator.fit(X[:, features], y) + estimator.fit(X[:, features], y, **fit_params) # Get coefs if hasattr(estimator, 'coef_'): diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 1efc0279a9dc7..afa0c9a552cda 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -2,7 +2,8 @@ Testing Recursive feature elimination """ import numpy as np -from numpy.testing import assert_array_almost_equal, assert_array_equal +from numpy.testing import (assert_array_almost_equal, assert_array_equal, + assert_raises) from nose.tools import assert_equal, assert_true from scipy import sparse @@ -75,6 +76,44 @@ def test_rfe_features_importance(): assert_array_equal(rfe.get_support(), rfe_svc.get_support()) +def test_rfe_sample_weights(): + iris = load_iris() + X = iris.data + y = iris.target + + clf = SVC(kernel="linear") + rfe = RFE(estimator=clf, n_features_to_select=1) + + sample_weight_test = 2 + class_test = 2 + + # Case 1 - original dataset + rfe.fit(X, y) + ranking_original = rfe.ranking_.copy() + + # Case 2 - double the weight of one class's samples + w = np.ones(y.shape[0]) + w[y == class_test] = sample_weight_test + + rfe.fit(X, y, sample_weight=w) + ranking_weights = rfe.ranking_.copy() + + # Case 3 - duplicate the samples of one class + extra_X = np.tile(X[y == class_test], (sample_weight_test - 1, 1)) + X_duplicate = np.concatenate((X, extra_X), axis=0) + + n_extra = (y == class_test).sum() * (sample_weight_test - 1) + extra_Y = np.ones(n_extra, dtype=int) * class_test + y_duplicate = np.concatenate((y, extra_Y), axis=0) + + rfe.fit(X_duplicate, y_duplicate) + ranking_duplicate = rfe.ranking_.copy() + + assert_raises(AssertionError, assert_array_equal, ranking_original, + ranking_weights) + assert_array_equal(ranking_weights, ranking_duplicate) + + def test_rfe(): generator = check_random_state(0) iris = load_iris()