scikit-learn · ogrisel · Aug 19, 2013 · Aug 19, 2013
diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
@@ -481,6 +481,17 @@ def test_tied_pvalues():
         assert_not_in(9998, Xt)
 
 
+def test_tied_scores():
+    """Test for stable sorting in k-best with tied scores."""
+    X_train = np.array([[0, 0, 0], [1, 1, 1]])
+    y_train = [0, 1]
+
+    for n_features in [1, 2, 3]:
+        sel = SelectKBest(chi2, k=n_features).fit(X_train, y_train)
+        X_test = sel.transform([0, 1, 2])
+        assert_array_equal(X_test[0], np.arange(3)[-n_features:])
+
+
 def test_nans():
     """Assert that SelectKBest and SelectPercentile can handle NaNs."""
     # First feature has zero variance to confuse f_classif (ANOVA) and

diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py
@@ -299,10 +299,6 @@ def fit(self, X, y):
         self.scores_, self.pvalues_ = self.score_func(X, y)
         self.scores_ = np.asarray(self.scores_)
         self.pvalues_ = np.asarray(self.pvalues_)
-        if len(np.unique(self.pvalues_)) < len(self.pvalues_):
-            warn("Duplicate p-values. Result may depend on feature ordering."
-                 "There are probably duplicate features, or you used a "
-                 "classification score for a regression task.")
         return self
 
 
@@ -315,10 +311,6 @@ def fit(self, X, y):
         self.scores_, self.pvalues_ = self.score_func(X, y)
         self.scores_ = np.asarray(self.scores_)
         self.pvalues_ = np.asarray(self.pvalues_)
-        if len(np.unique(self.scores_)) < len(self.scores_):
-            warn("Duplicate scores. Result may depend on feature ordering."
-                 "There are probably duplicate features, or you used a "
-                 "classification score for a regression task.")
         return self
 
 
@@ -428,7 +420,11 @@ def _get_support_mask(self):
         # from argsort, which we transform to a mask, which we probably
         # transform back to indices later.
         mask = np.zeros(scores.shape, dtype=bool)
-        mask[np.argsort(scores)[-k:]] = 1
+
+        # Request a stable sort. Mergesort takes more memory (~40MB per
+        # megafeature on x86-64), but blows heapsort out of the water in
+        # terms of speed.
+        mask[np.argsort(scores, kind="mergesort")[-k:]] = 1
         return mask