@@ -299,10 +299,6 @@ def fit(self, X, y):
299
299
self .scores_ , self .pvalues_ = self .score_func (X , y )
300
300
self .scores_ = np .asarray (self .scores_ )
301
301
self .pvalues_ = np .asarray (self .pvalues_ )
302
- if len (np .unique (self .pvalues_ )) < len (self .pvalues_ ):
303
- warn ("Duplicate p-values. Result may depend on feature ordering."
304
- "There are probably duplicate features, or you used a "
305
- "classification score for a regression task." )
306
302
return self
307
303
308
304
@@ -315,10 +311,6 @@ def fit(self, X, y):
315
311
self .scores_ , self .pvalues_ = self .score_func (X , y )
316
312
self .scores_ = np .asarray (self .scores_ )
317
313
self .pvalues_ = np .asarray (self .pvalues_ )
318
- if len (np .unique (self .scores_ )) < len (self .scores_ ):
319
- warn ("Duplicate scores. Result may depend on feature ordering."
320
- "There are probably duplicate features, or you used a "
321
- "classification score for a regression task." )
322
314
return self
323
315
324
316
@@ -428,7 +420,11 @@ def _get_support_mask(self):
428
420
# from argsort, which we transform to a mask, which we probably
429
421
# transform back to indices later.
430
422
mask = np .zeros (scores .shape , dtype = bool )
431
- mask [np .argsort (scores )[- k :]] = 1
423
+
424
+ # Request a stable sort. Mergesort takes more memory (~40MB per
425
+ # megafeature on x86-64), but blows heapsort out of the water in
426
+ # terms of speed.
427
+ mask [np .argsort (scores , kind = "mergesort" )[- k :]] = 1
432
428
return mask
433
429
434
430
0 commit comments