@@ -390,7 +390,7 @@ def __init__(self, estimator, step=1, cv=None, scoring=None, verbose=0,
390
390
self .verbose = verbose
391
391
self .n_jobs = n_jobs
392
392
393
- def fit (self , X , y ):
393
+ def fit (self , X , y , groups = None ):
394
394
"""Fit the RFE model and automatically tune the number of selected
395
395
features.
396
396
@@ -403,6 +403,10 @@ def fit(self, X, y):
403
403
y : array-like, shape = [n_samples]
404
404
Target values (integers for classification, real numbers for
405
405
regression).
406
+
407
+ groups : array-like, shape = [n_samples], optional
408
+ Group labels for the samples used while splitting the dataset into
409
+ train/test set.
406
410
"""
407
411
X , y = check_X_y (X , y , "csr" )
408
412
@@ -442,7 +446,7 @@ def fit(self, X, y):
442
446
443
447
scores = parallel (
444
448
func (rfe , self .estimator , X , y , train , test , scorer )
445
- for train , test in cv .split (X , y ))
449
+ for train , test in cv .split (X , y , groups ))
446
450
447
451
scores = np .sum (scores , axis = 0 )
448
452
n_features_to_select = max (
@@ -465,5 +469,5 @@ def fit(self, X, y):
465
469
466
470
# Fixing a normalization error, n is equal to get_n_splits(X, y) - 1
467
471
# here, the scores are normalized by get_n_splits(X, y)
468
- self .grid_scores_ = scores [::- 1 ] / cv .get_n_splits (X , y )
472
+ self .grid_scores_ = scores [::- 1 ] / cv .get_n_splits (X , y , groups )
469
473
return self
0 commit comments