8000 [MRG] Allow sample weights and other fit_params for RFE by g-rutter · Pull Request #7333 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[MRG] Allow sample weights and other fit_params for RFE #7333

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions sklearn/feature_selection/rfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def __init__(self, estimator, n_features_to_select=None, step=1,
def _estimator_type(self):
return self.estimator._estimator_type

def fit(self, X, y):
def fit(self, X, y, **fit_params):
"""Fit the RFE model and then the underlying estimator on the selected
features.

Expand All @@ -131,10 +131,13 @@ def fit(self, X, y):

y : array-like, shape = [n_samples]
The target values.

**fit_params : kwargs
Additional parameter passed to the fit function of the estimator.
"""
return self._fit(X, y)
return self._fit(X, y, **fit_params)

def _fit(self, X, y, step_score=None):
def _fit(self, X, y, step_score=None, **fit_params):
X, y = check_X_y(X, y, "csc")
# Initialization
n_features = X.shape[1]
Expand Down Expand Up @@ -166,7 +169,7 @@ def _fit(self, X, y, step_score=None):
if self.verbose > 0:
print("Fitting estimator with %d features." % np.sum(support_))

estimator.fit(X[:, features], y)
estimator.fit(X[:, features], y, **fit_params)

# Get coefs
if hasattr(estimator, 'coef_'):
Expand Down
41 changes: 40 additions & 1 deletion sklearn/feature_selection/tests/test_rfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
Testing Recursive feature elimination
"""
import numpy as np
from numpy.testing import assert_array_almost_equal, assert_array_equal
from numpy.testing import (assert_array_almost_equal, assert_array_equal,
assert_raises)
from nose.tools import assert_equal, assert_true
from scipy import sparse

Expand Down Expand Up @@ -75,6 +76,44 @@ def test_rfe_features_importance():
assert_array_equal(rfe.get_support(), rfe_svc.get_support())


def test_rfe_sample_weights():
iris = load_iris()
X = iris.data
y = iris.target

clf = SVC(kernel="linear")
rfe = RFE(estimator=clf, n_features_to_select=1)

sample_weight_test = 2
class_test = 2

# Case 1 - original dataset
rfe.fit(X, y)
ranking_original = rfe.ranking_.copy()

# Case 2 - double the weight of one class's samples
w = np.ones(y.shape[0])
w[y == class_test] = sample_weight_test

rfe.fit(X, y, sample_weight=w)
ranking_weights = rfe.ranking_.copy()

# Case 3 - duplicate the samples of one class
extra_X = np.tile(X[y == class_test], (sample_weight_test - 1, 1))
X_duplicate = np.concatenate((X, extra_X), axis=0)

n_extra = (y == class_test).sum() * (sample_weight_test - 1)
extra_Y = np.ones(n_extra, dtype=int) * class_test
y_duplicate = np.concatenate((y, extra_Y), axis=0)

rfe.fit(X_duplicate, y_duplicate)
ranking_duplicate = rfe.ranking_.copy()

assert_raises(AssertionError, assert_array_equal, ranking_original,
ranking_weights)
assert_array_equal(ranking_weights, ranking_duplicate)


def test_rfe():
generator = check_random_state(0)
iris = load_iris()
Expand Down
0