From c16cc90634c9432f0afa10a14665a738cf8b8c5e Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 30 Nov 2022 15:55:56 +0100 Subject: [PATCH 1/7] MAINT Array validation for DecisionBoundaryDisplay --- sklearn/inspection/_plot/decision_boundary.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py index 86836a81f7207..f225c1693292d 100644 --- a/sklearn/inspection/_plot/decision_boundary.py +++ b/sklearn/inspection/_plot/decision_boundary.py @@ -4,6 +4,7 @@ from ...preprocessing import LabelEncoder from ...utils import check_matplotlib_support +from ...utils import check_array from ...utils import _safe_indexing from ...base import is_regressor from ...utils.validation import check_is_fitted, _is_arraylike_not_scalar @@ -296,6 +297,7 @@ def from_estimator( """ check_matplotlib_support(f"{cls.__name__}.from_estimator") check_is_fitted(estimator) + X = check_array(X) if not grid_resolution > 1: raise ValueError( From 4cfd3a0c31cac8f96837e2bbe02fa6de912331f9 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 30 Nov 2022 16:48:25 +0100 Subject: [PATCH 2/7] Fix failing test --- sklearn/inspection/_plot/decision_boundary.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py index f225c1693292d..ca61e162d4c2b 100644 --- a/sklearn/inspection/_plot/decision_boundary.py +++ b/sklearn/inspection/_plot/decision_boundary.py @@ -4,7 +4,6 @@ from ...preprocessing import LabelEncoder from ...utils import check_matplotlib_support -from ...utils import check_array from ...utils import _safe_indexing from ...base import is_regressor from ...utils.validation import check_is_fitted, _is_arraylike_not_scalar @@ -297,7 +296,6 @@ def from_estimator( """ check_matplotlib_support(f"{cls.__name__}.from_estimator") check_is_fitted(estimator) - X = check_array(X) if not grid_resolution > 1: raise ValueError( @@ -328,6 +326,11 @@ def from_estimator( np.linspace(x1_min, x1_max, grid_resolution), ) if hasattr(X, "iloc"): + _, features_to_plot = X.shape + if features_to_plot != 2: + raise ValueError( + f"n_features must be equal to 2. Got {features_to_plot} instead." + ) # we need to preserve the feature names and therefore get an empty dataframe X_grid = X.iloc[[], :].copy() X_grid.iloc[:, 0] = xx0.ravel() From 245d2fb80b39b169642e9c486fb83ac0642772b5 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 30 Nov 2022 17:23:39 +0100 Subject: [PATCH 3/7] Perform validation before safe indexing --- sklearn/inspection/_plot/decision_boundary.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py index ca61e162d4c2b..60ac6ba4e20cc 100644 --- a/sklearn/inspection/_plot/decision_boundary.py +++ b/sklearn/inspection/_plot/decision_boundary.py @@ -6,7 +6,9 @@ from ...utils import check_matplotlib_support from ...utils import _safe_indexing from ...base import is_regressor -from ...utils.validation import check_is_fitted, _is_arraylike_not_scalar +from ...utils.validation import check_is_fitted +from ...utils.validation import _is_arraylike_not_scalar +from ...utils.validation import _num_features def _check_boundary_response_method(estimator, response_method): @@ -316,6 +318,12 @@ def from_estimator( f"Got {plot_method} instead." ) + num_features = _num_features(X) + if num_features != 2: + raise ValueError( + f"n_features must be equal to 2. Got {num_features} instead." + ) + x0, x1 = _safe_indexing(X, 0, axis=1), _safe_indexing(X, 1, axis=1) x0_min, x0_max = x0.min() - eps, x0.max() + eps @@ -326,11 +334,6 @@ def from_estimator( np.linspace(x1_min, x1_max, grid_resolution), ) if hasattr(X, "iloc"): - _, features_to_plot = X.shape - if features_to_plot != 2: - raise ValueError( - f"n_features must be equal to 2. Got {features_to_plot} instead." - ) # we need to preserve the feature names and therefore get an empty dataframe X_grid = X.iloc[[], :].copy() X_grid.iloc[:, 0] = xx0.ravel() From c85910875c2c644e50663c97ba70065b5ee5e946 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 30 Nov 2022 17:33:36 +0100 Subject: [PATCH 4/7] Add test for error message --- .../_plot/tests/test_boundary_decision_display.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py index 8981c9d5a5e83..943314937afd9 100644 --- a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py +++ b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py @@ -38,6 +38,18 @@ def fitted_clf(): return LogisticRegression().fit(X, y) +def test_input_data_dimension(): + n_features, n_samples = 4, 10 + X = np.random.randn(n_samples, n_features) + y = np.random.randint(0, 2, n_samples) + + clf = LogisticRegression() + clf.fit(X, y) + msg = "n_features must be equal to 2. Got 4 instead." + with pytest.raises(ValueError, match=msg): + DecisionBoundaryDisplay.from_estimator(estimator=clf, X=X) + + def test_check_boundary_response_method_auto(): """Check _check_boundary_response_method behavior with 'auto'.""" From c5febc74d6af841cb928b4160d571b8c1f042cac Mon Sep 17 00:00:00 2001 From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> Date: Wed, 30 Nov 2022 18:22:56 +0100 Subject: [PATCH 5/7] Apply suggestions from code review Co-authored-by: Guillaume Lemaitre --- sklearn/inspection/_plot/decision_boundary.py | 8 +++++--- .../_plot/tests/test_boundary_decision_display.py | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py index 60ac6ba4e20cc..22b4590d9bc3c 100644 --- a/sklearn/inspection/_plot/decision_boundary.py +++ b/sklearn/inspection/_plot/decision_boundary.py @@ -6,9 +6,11 @@ from ...utils import check_matplotlib_support from ...utils import _safe_indexing from ...base import is_regressor -from ...utils.validation import check_is_fitted -from ...utils.validation import _is_arraylike_not_scalar -from ...utils.validation import _num_features +from ...utils.validation import ( + check_is_fitted, + _is_arraylike_not_scalar, + _num_features, +) def _check_boundary_response_method(estimator, response_method): diff --git a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py index 943314937afd9..b462c285b7f5f 100644 --- a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py +++ b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py @@ -39,12 +39,12 @@ def fitted_clf(): def test_input_data_dimension(): + """Check that we raise an error when `X` does not have exactly 2 features.""" n_features, n_samples = 4, 10 X = np.random.randn(n_samples, n_features) y = np.random.randint(0, 2, n_samples) - clf = LogisticRegression() - clf.fit(X, y) + clf = LogisticRegression().fit(X, y) msg = "n_features must be equal to 2. Got 4 instead." with pytest.raises(ValueError, match=msg): DecisionBoundaryDisplay.from_estimator(estimator=clf, X=X) From 863bc5a33a60b91a03526718338c0c1a8ba1b055 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Thu, 1 Dec 2022 10:37:15 +0100 Subject: [PATCH 6/7] Use make_classification instead of numpy for pytest --- .../inspection/_plot/tests/test_boundary_decision_display.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py index b462c285b7f5f..2d46507ab212c 100644 --- a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py +++ b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py @@ -40,9 +40,7 @@ def fitted_clf(): def test_input_data_dimension(): """Check that we raise an error when `X` does not have exactly 2 features.""" - n_features, n_samples = 4, 10 - X = np.random.randn(n_samples, n_features) - y = np.random.randint(0, 2, n_samples) + X, y = make_classification(n_samples=10, n_features=4, random_state=0) clf = LogisticRegression().fit(X, y) msg = "n_features must be equal to 2. Got 4 instead." From edde77e1cebd364d8bca455dea6b927b5a43a55c Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Thu, 1 Dec 2022 14:43:54 +0100 Subject: [PATCH 7/7] Add entry to changelog for 1.2 --- doc/whats_new/v1.2.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index b2eb253068f56..735dd6d20f4b0 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -416,6 +416,10 @@ Changelog :pr:`18298` by :user:`Madhura Jayaratne ` and :user:`Guillaume Lemaitre `. +- |Fix| :class:`inspection.DecisionBoundaryDisplay` now raises error if input + data is not 2-dimensional. + :pr:`25077` by :user:`Arturo Amor `. + :mod:`sklearn.kernel_approximation` ...................................