@@ -108,9 +108,12 @@ class MinRedundancyMaxRelevance(BaseEstimator, SelectorMixin):
108
108
109
109
Parameters
110
110
----------
111
- n_features_to_select : None or int, optional (default=None)
112
- Number of features to select. If None, half of the features
113
- will be selected.
111
+ n_features_to_select : float or int, optional (default=0.5)
112
+ Number of features to select. The value greater than or equal 1 is
113
+ interpreted as the absolute number of features to select. The value
114
+ within (0.0, 1.0) is interpreted as the percentage from the initial
115
+ number of features (rounded down). Half of the features is selected by
116
+ default.
114
117
categorical_features : bool or array_like with shape (n_features),
115
118
optional (default=False)
116
119
If bool, then determines whether to consider all features categorical
@@ -124,8 +127,6 @@ class MinRedundancyMaxRelevance(BaseEstimator, SelectorMixin):
124
127
125
128
Attributes
126
129
----------
127
- n_features_ : int
128
- Number of selected features.
129
130
support_ : ndarray, shape (n_features,)
130
131
Mask of selected features.
131
132
relevance_ : ndarray, shape (n_features,)
@@ -147,7 +148,7 @@ class MinRedundancyMaxRelevance(BaseEstimator, SelectorMixin):
147
148
.. [3] B. C. Ross "Mutual Information between Discrete and Continuous
148
149
Data Sets". PLoS ONE 9(2), 2014.
149
150
"""
150
- def __init__ (self , n_features_to_select = None , categorical_features = False ,
151
+ def __init__ (self , n_features_to_select = 0.5 , categorical_features = False ,
151
152
categorical_target = False , n_neighbors = 3 ):
152
153
self .n_features_to_select = n_features_to_select
153
154
self .categorical_features = categorical_features
@@ -171,9 +172,19 @@ def fit(self, X, y):
171
172
-------
172
173
self
173
174
"""
174
- X , y = check_X_y (X , y , accept_sparse = 'csc' )
175
+ X , y = check_X_y (X , y , accept_sparse = 'csc' ,
176
+ y_numeric = not self .categorical_target )
175
177
176
178
n_features = X .shape [1 ]
179
+
180
+ if self .n_features_to_select >= 1 :
181
+ n_features_to_select = int (self .n_features_to_select )
182
+ elif 0 < self .n_features_to_select < 1 :
183
+ n_features_to_select = max (
184
+ 1 , int (self .n_features_to_select * n_features ))
185
+ else :
186
+ raise ValueError ("`n_features_to_select` must be positive." )
187
+
177
188
if isinstance (self .categorical_features , bool ):
178
189
categorical_features = np .empty (n_features , dtype = bool )
179
190
categorical_features .fill (self .categorical_features )
@@ -203,14 +214,9 @@ def fit(self, X, y):
203
214
xi , xj , categorical_features [i ], categorical_features [j ],
204
215
self .n_neighbors )
205
216
206
- if self .n_features_to_select is None :
207
- self .n_features_ = (n_features + 1 ) // 2
208
- else :
209
- self .n_features_ = self .n_features_to_select
210
-
211
217
support = np .zeros (n_features , dtype = bool )
212
218
support [np .argmax (relevance )] = True
213
- for i in range (self . n_features_ - 1 ):
219
+ for i in range (n_features_to_select - 1 ):
214
220
selected = np .nonzero (support )[0 ]
215
221
candidates = np .nonzero (~ support )[0 ]
216
222
D = relevance [candidates ]
0 commit comments