From a18cb3d2434f784d01f889707cdd8fdb61c8accd Mon Sep 17 00:00:00 2001 From: Leandro Hermida Date: Sat, 23 Mar 2019 10:18:30 -0400 Subject: [PATCH 1/8] discrete_features str and value check --- sklearn/feature_selection/mutual_info_.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sklearn/feature_selection/mutual_info_.py b/sklearn/feature_selection/mutual_info_.py index d5d1b7fb59db9..3a32261e5b61d 100644 --- a/sklearn/feature_selection/mutual_info_.py +++ b/sklearn/feature_selection/mutual_info_.py @@ -247,8 +247,11 @@ def _estimate_mi(X, y, discrete_features='auto', discrete_target=False, X, y = check_X_y(X, y, accept_sparse='csc', y_numeric=not discrete_target) n_samples, n_features = X.shape - if discrete_features == 'auto': - discrete_features = issparse(X) + if isinstance(discrete_features, str): + if discrete_features == 'auto': + discrete_features = issparse(X) + else: + raise ValueError("Invalid string value for discrete_features.") if isinstance(discrete_features, bool): discrete_mask = np.empty(n_features, dtype=bool) From eb3f556f2c551cccbe4f392ba421be3218203ada Mon Sep 17 00:00:00 2001 From: Leandro Hermida Date: Tue, 26 Mar 2019 13:50:05 -0400 Subject: [PATCH 2/8] Update if logic --- sklearn/feature_selection/mutual_info_.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/sklearn/feature_selection/mutual_info_.py b/sklearn/feature_selection/mutual_info_.py index 3a32261e5b61d..2085352a231b5 100644 --- a/sklearn/feature_selection/mutual_info_.py +++ b/sklearn/feature_selection/mutual_info_.py @@ -10,7 +10,7 @@ from ..preprocessing import scale from ..utils import check_random_state from ..utils.fixes import _astype_copy_false -from ..utils.validation import check_X_y +from ..utils.validation import check_array, check_X_y from ..utils.multiclass import check_classification_targets @@ -247,17 +247,16 @@ def _estimate_mi(X, y, discrete_features='auto', discrete_target=False, X, y = check_X_y(X, y, accept_sparse='csc', y_numeric=not discrete_target) n_samples, n_features = X.shape - if isinstance(discrete_features, str): - if discrete_features == 'auto': - discrete_features = issparse(X) - else: - raise ValueError("Invalid string value for discrete_features.") - - if isinstance(discrete_features, bool): + if isinstance(discrete_features, (str, bool)): + if isinstance(discrete_features, str): + if discrete_features == 'auto': + discrete_features = issparse(X) + else: + raise ValueError("Invalid string value for discrete_features.") discrete_mask = np.empty(n_features, dtype=bool) discrete_mask.fill(discrete_features) else: - discrete_features = np.asarray(discrete_features) + discrete_features = check_array(discrete_features, ensure_2d=False) if discrete_features.dtype != 'bool': discrete_mask = np.zeros(n_features, dtype=bool) discrete_mask[discrete_features] = True From 835bea861eedf7b74413a6bff16110e3c3ff2544 Mon Sep 17 00:00:00 2001 From: Leandro Hermida Date: Tue, 26 Mar 2019 13:50:27 -0400 Subject: [PATCH 3/8] Add discrete_features bad str value test --- sklearn/feature_selection/tests/test_mutual_info.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py index 7ba7d37e8ff6c..379661a197c5b 100644 --- a/sklearn/feature_selection/tests/test_mutual_info.py +++ b/sklearn/feature_selection/tests/test_mutual_info.py @@ -185,6 +185,8 @@ def test_mutual_info_options(): for mutual_info in (mutual_info_regression, mutual_info_classif): assert_raises(ValueError, mutual_info_regression, X_csr, y, discrete_features=False) + assert_raises(ValueError, mutual_info, X, y, + discrete_features='manual') mi_1 = mutual_info(X, y, discrete_features='auto', random_state=0) mi_2 = mutual_info(X, y, discrete_features=False, random_state=0) From 2e789212822cef10fdfde7be2ae04ff5d1dcc3c0 Mon Sep 17 00:00:00 2001 From: Leandro Hermida Date: Tue, 26 Mar 2019 14:02:46 -0400 Subject: [PATCH 4/8] Remove unnecessary nested isinstance str check --- sklearn/feature_selection/mutual_info_.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sklearn/feature_selection/mutual_info_.py b/sklearn/feature_selection/mutual_info_.py index 2085352a231b5..0cca99470e5be 100644 --- a/sklearn/feature_selection/mutual_info_.py +++ b/sklearn/feature_selection/mutual_info_.py @@ -248,11 +248,10 @@ def _estimate_mi(X, y, discrete_features='auto', discrete_target=False, n_samples, n_features = X.shape if isinstance(discrete_features, (str, bool)): - if isinstance(discrete_features, str): - if discrete_features == 'auto': - discrete_features = issparse(X) - else: - raise ValueError("Invalid string value for discrete_features.") + if discrete_features == 'auto': + discrete_features = issparse(X) + else: + raise ValueError("Invalid string value for discrete_features.") discrete_mask = np.empty(n_features, dtype=bool) discrete_mask.fill(discrete_features) else: From 7474d6d92688e695783c261ac0558a38507500c7 Mon Sep 17 00:00:00 2001 From: Leandro Hermida Date: Tue, 26 Mar 2019 16:57:52 -0400 Subject: [PATCH 5/8] Add back nested isinstance str check --- sklearn/feature_selection/mutual_info_.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sklearn/feature_selection/mutual_info_.py b/sklearn/feature_selection/mutual_info_.py index 0cca99470e5be..2085352a231b5 100644 --- a/sklearn/feature_selection/mutual_info_.py +++ b/sklearn/feature_selection/mutual_info_.py @@ -248,10 +248,11 @@ def _estimate_mi(X, y, discrete_features='auto', discrete_target=False, n_samples, n_features = X.shape if isinstance(discrete_features, (str, bool)): - if discrete_features == 'auto': - discrete_features = issparse(X) - else: - raise ValueError("Invalid string value for discrete_features.") + if isinstance(discrete_features, str): + if discrete_features == 'auto': + discrete_features = issparse(X) + else: + raise ValueError("Invalid string value for discrete_features.") discrete_mask = np.empty(n_features, dtype=bool) discrete_mask.fill(discrete_features) else: From 954d260286365a6fdbfc72b098bb6485120c572d Mon Sep 17 00:00:00 2001 From: Leandro Hermida Date: Thu, 28 Mar 2019 14:49:04 -0400 Subject: [PATCH 6/8] New/updates to tests --- .../feature_selection/tests/test_mutual_info.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py index 379661a197c5b..0fe437817ed28 100644 --- a/sklearn/feature_selection/tests/test_mutual_info.py +++ b/sklearn/feature_selection/tests/test_mutual_info.py @@ -183,20 +183,26 @@ def test_mutual_info_options(): X_csr = csr_matrix(X) for mutual_info in (mutual_info_regression, mutual_info_classif): - assert_raises(ValueError, mutual_info_regression, X_csr, y, + assert_raises(ValueError, mutual_info, X_csr, y, discrete_features=False) assert_raises(ValueError, mutual_info, X, y, discrete_features='manual') + assert_raises(ValueError, mutual_info, X_csr, y, + discrete_features=[True, False, True]) + assert_raises(IndexError, mutual_info, X, y, + discrete_features=[True, False, True, False]) + assert_raises(IndexError, mutual_info, X, y, discrete_features=[1, 4]) mi_1 = mutual_info(X, y, discrete_features='auto', random_state=0) mi_2 = mutual_info(X, y, discrete_features=False, random_state=0) - - mi_3 = mutual_info(X_csr, y, discrete_features='auto', - random_state=0) - mi_4 = mutual_info(X_csr, y, discrete_features=True, + mi_3 = mutual_info(X_csr, y, discrete_features='auto', random_state=0) + mi_4 = mutual_info(X_csr, y, discrete_features=True, random_state=0) + mi_5 = mutual_info(X, y, discrete_features=[True, False, True], random_state=0) + mi_6 = mutual_info(X, y, discrete_features=[0, 2], random_state=0) assert_array_equal(mi_1, mi_2) assert_array_equal(mi_3, mi_4) + assert_array_equal(mi_5, mi_6) assert not np.allclose(mi_1, mi_3) From 583a194205e9a77c79f7588765fe8dcf581f725e Mon Sep 17 00:00:00 2001 From: Leandro Hermida Date: Sun, 31 Mar 2019 17:53:05 -0400 Subject: [PATCH 7/8] Add v0.21 whats new entry --- doc/whats_new/v0.21.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index cdb6b0960f535..a08f2bd7e4c94 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -212,6 +212,15 @@ Support for Python 3.4 and below has been officially dropped. - |API| Deprecated :mod:`externals.six` since we have dropped support for Python 2.7. :issue:`12916` by :user:`Hanmin Qin `. +:mod:`sklearn.feature_selection` +..................... + +- |Fix| In :function:`feature_selection.mutual_info_classif` and + :function:`feature_selection.mutual_info_regression` fix logic handling + `discrete_features` property cases to prevent issues with future versions of + numpy. :issue:`13497` by :user:`Leandro Hermida ` and + :user:`Adrin Jalali `. + :mod:`sklearn.impute` ..................... From 1a06ff25ee89a1c6b1eb6f45715de545e7a7f9a1 Mon Sep 17 00:00:00 2001 From: Leandro Hermida Date: Mon, 1 Apr 2019 10:07:39 -0400 Subject: [PATCH 8/8] Undo v0.21 whats new entry --- doc/whats_new/v0.21.rst | 9 --------- 1 file changed, 9 deletions(-) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index a08f2bd7e4c94..cdb6b0960f535 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -212,15 +212,6 @@ Support for Python 3.4 and below has been officially dropped. - |API| Deprecated :mod:`externals.six` since we have dropped support for Python 2.7. :issue:`12916` by :user:`Hanmin Qin `. -:mod:`sklearn.feature_selection` -..................... - -- |Fix| In :function:`feature_selection.mutual_info_classif` and - :function:`feature_selection.mutual_info_regression` fix logic handling - `discrete_features` property cases to prevent issues with future versions of - numpy. :issue:`13497` by :user:`Leandro Hermida ` and - :user:`Adrin Jalali `. - :mod:`sklearn.impute` .....................