diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py index 7a5208dd49c16..16f2d274ace62 100644 --- a/sklearn/feature_selection/base.py +++ b/sklearn/feature_selection/base.py @@ -5,6 +5,7 @@ # License: BSD 3 clause from abc import ABCMeta, abstractmethod +from warnings import warn import numpy as np from scipy.sparse import issparse, csc_matrix @@ -73,6 +74,11 @@ def transform(self, X): """ X = check_array(X, accept_sparse='csr') mask = self.get_support() + if not mask.any(): + warn("No features were selected: either the data is" + " too noisy or the selection test too strict.", + UserWarning) + return np.empty(0).reshape((X.shape[0], 0)) if len(mask) != X.shape[1]: raise ValueError("X has a different shape than during fitting.") return check_array(X, accept_sparse='csr')[:, safe_mask(X, mask)] diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py index 769df7783a6ad..4d46d257c1189 100644 --- a/sklearn/feature_selection/tests/test_feature_select.py +++ b/sklearn/feature_selection/tests/test_feature_select.py @@ -16,6 +16,7 @@ from sklearn.utils.testing import assert_less from sklearn.utils.testing import assert_warns from sklearn.utils.testing import ignore_warnings +from sklearn.utils.testing import assert_warns_message from sklearn.utils import safe_mask from sklearn.datasets.samples_generator import (make_classification, @@ -251,10 +252,13 @@ def test_select_kbest_zero(): shuffle=False, random_state=0) univariate_filter = SelectKBest(f_classif, k=0) - univariate_filter.fit(X, y).transform(X) + univariate_filter.fit(X, y) support = univariate_filter.get_support() gtruth = np.zeros(10, dtype=bool) assert_array_equal(support, gtruth) + X_selected = assert_warns_message(UserWarning, 'No features were selected', + univariate_filter.transform, X) + assert_equal(X_selected.shape, (20, 0)) def test_select_fpr_classif(): @@ -585,3 +589,24 @@ def test_f_classif_constant_feature(): X, y = make_classification(n_samples=10, n_features=5) X[:, 0] = 2.0 assert_warns(UserWarning, f_classif, X, y) + + +def test_no_feature_selected(): + rng = np.random.RandomState(0) + + # Generate random uncorrelated data: a strict univariate test should + # rejects all the features + X = rng.rand(40, 10) + y = rng.randint(0, 4, size=40) + strict_selectors = [ + SelectFwe(alpha=0.01).fit(X, y), + SelectFdr(alpha=0.01).fit(X, y), + SelectFpr(alpha=0.01).fit(X, y), + SelectPercentile(percentile=0).fit(X, y), + SelectKBest(k=0).fit(X, y), + ] + for selector in strict_selectors: + assert_array_equal(selector.get_support(), np.zeros(10)) + X_selected = assert_warns_message( + UserWarning, 'No features were selected', selector.transform, X) + assert_equal(X_selected.shape, (40, 0)) diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py index 7e34ca2356b65..057d5b4980033 100644 --- a/sklearn/feature_selection/univariate_selection.py +++ b/sklearn/feature_selection/univariate_selection.py @@ -496,8 +496,10 @@ def _get_support_mask(self): alpha = self.alpha sv = np.sort(self.pvalues_) - threshold = sv[sv < alpha * np.arange(len(self.pvalues_))].max() - return self.pvalues_ <= threshold + selected = sv[sv < alpha * np.arange(len(self.pvalues_))] + if selected.size == 0: + return np.zeros_like(self.pvalues_, dtype=bool) + return self.pvalues_ <= selected.max() class SelectFwe(_BaseFilter):