From 5601fb5557fa7b0e3e4339de4d0b67b62a15e635 Mon Sep 17 00:00:00 2001 From: Akaash Date: Sun, 10 Sep 2023 15:03:34 +0100 Subject: [PATCH 1/5] Raise ValueError instead of RuntimeWarning --- sklearn/linear_model/_linear_loss.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_linear_loss.py b/sklearn/linear_model/_linear_loss.py index 4255706e284f1..1bd0e27ba2b21 100644 --- a/sklearn/linear_model/_linear_loss.py +++ b/sklearn/linear_model/_linear_loss.py @@ -291,13 +291,29 @@ def loss_gradient( if not self.base_loss.is_multiclass: grad = np.empty_like(coef, dtype=weights.dtype) - grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights + with np.errstate(all="raise"): + try: + grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights + except FloatingPointError: + raise ValueError( + "Overflow detected. Try scaling the target variable or" + " features, or using a different solver" + ) from None if self.fit_intercept: grad[-1] = grad_pointwise.sum() else: grad = np.empty((n_classes, n_dof), dtype=weights.dtype, order="F") # grad_pointwise.shape = (n_samples, n_classes) - grad[:, :n_features] = grad_pointwise.T @ X + l2_reg_strength * weights + with np.errstate(all="raise"): + try: + grad[:, :n_features] = ( + grad_pointwise.T @ X + l2_reg_strength * weights + ) + except FloatingPointError: + raise ValueError( + "Overflow detected. Try scaling the target variable or" + " features, or using a different solver" + ) from None if self.fit_intercept: grad[:, -1] = grad_pointwise.sum(axis=0) if coef.ndim == 1: From 09e27d275d1f4020e17b94bcceb77262eac07479 Mon Sep 17 00:00:00 2001 From: Akaash Date: Sun, 10 Sep 2023 16:51:05 +0100 Subject: [PATCH 2/5] scale X from load_breast_cancer to avoid convergence error --- sklearn/ensemble/tests/test_stacking.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/tests/test_stacking.py b/sklearn/ensemble/tests/test_stacking.py index d15aa32077689..c56e1f748815d 100644 --- a/sklearn/ensemble/tests/test_stacking.py +++ b/sklearn/ensemble/tests/test_stacking.py @@ -435,7 +435,10 @@ def test_stacking_classifier_stratify_default(): final_estimator=LogisticRegression(), cv=KFold(shuffle=True, random_state=42), ), - *load_breast_cancer(return_X_y=True), + *[ + scale(data) if idx == 0 else data + for idx, data in list(enumerate(load_breast_cancer(return_X_y=True))) + ], ), ( StackingRegressor( @@ -502,7 +505,10 @@ def test_stacking_classifier_sample_weight_fit_param(): ], final_estimator=LogisticRegression(), ), - *load_breast_cancer(return_X_y=True), + *[ + scale(data) if idx == 0 else data + for idx, data in list(enumerate(load_breast_cancer(return_X_y=True))) + ], ), ( StackingRegressor( From 428c736d1eb04f0222ec99f962d8000b2a00aee1 Mon Sep 17 00:00:00 2001 From: Akaash Date: Sun, 10 Sep 2023 17:02:42 +0100 Subject: [PATCH 3/5] Remove ignore ConvergenceWarning from tests based on diabetes data --- sklearn/ensemble/tests/test_stacking.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/sklearn/ensemble/tests/test_stacking.py b/sklearn/ensemble/tests/test_stacking.py index c56e1f748815d..42eb2f88810b6 100644 --- a/sklearn/ensemble/tests/test_stacking.py +++ b/sklearn/ensemble/tests/test_stacking.py @@ -467,18 +467,15 @@ def test_stacking_with_sample_weight(stacker, X, y): X, y, total_sample_weight, random_state=42 ) - with ignore_warnings(category=ConvergenceWarning): - stacker.fit(X_train, y_train) + stacker.fit(X_train, y_train) y_pred_no_weight = stacker.predict(X_test) - with ignore_warnings(category=ConvergenceWarning): - stacker.fit(X_train, y_train, sample_weight=np.ones(y_train.shape)) + stacker.fit(X_train, y_train, sample_weight=np.ones(y_train.shape)) y_pred_unit_weight = stacker.predict(X_test) assert_allclose(y_pred_no_weight, y_pred_unit_weight) - with ignore_warnings(category=ConvergenceWarning): - stacker.fit(X_train, y_train, sample_weight=sample_weight_train) + stacker.fit(X_train, y_train, sample_weight=sample_weight_train) y_pred_biased = stacker.predict(X_test) assert np.abs(y_pred_no_weight - y_pred_biased).sum() > 0 @@ -493,7 +490,6 @@ def test_stacking_classifier_sample_weight_fit_param(): stacker.fit(X_iris, y_iris, sample_weight=np.ones(X_iris.shape[0])) -@pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning") @pytest.mark.parametrize( "stacker, X, y", [ From 0721f6bc729b92c59f791d76e8e86feb8e2a7c25 Mon Sep 17 00:00:00 2001 From: Akaash Date: Sun, 10 Sep 2023 17:24:21 +0100 Subject: [PATCH 4/5] Linting fix --- sklearn/ensemble/tests/test_stacking.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/ensemble/tests/test_stacking.py b/sklearn/ensemble/tests/test_stacking.py index 42eb2f88810b6..191b0dd49610e 100644 --- a/sklearn/ensemble/tests/test_stacking.py +++ b/sklearn/ensemble/tests/test_stacking.py @@ -26,7 +26,7 @@ StackingClassifier, StackingRegressor, ) -from sklearn.exceptions import ConvergenceWarning, NotFittedError +from sklearn.exceptions import NotFittedError from sklearn.linear_model import ( LinearRegression, LogisticRegression, @@ -42,7 +42,6 @@ from sklearn.utils._testing import ( assert_allclose, assert_allclose_dense_sparse, - ignore_warnings, ) from sklearn.utils.fixes import COO_CONTAINERS, CSC_CONTAINERS, CSR_CONTAINERS From 9585d68d0f5001f6fc6ddcbdc195d8cff4451a69 Mon Sep 17 00:00:00 2001 From: Akaash Date: Fri, 22 Sep 2023 20:12:33 +0100 Subject: [PATCH 5/5] Simplify scaling of X dataframe --- sklearn/ensemble/tests/test_stacking.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/sklearn/ensemble/tests/test_stacking.py b/sklearn/ensemble/tests/test_stacking.py index 191b0dd49610e..fc47cbdb978df 100644 --- a/sklearn/ensemble/tests/test_stacking.py +++ b/sklearn/ensemble/tests/test_stacking.py @@ -53,6 +53,8 @@ n_classes=3, random_state=42 ) X_binary, y_binary = make_classification(n_classes=2, random_state=42) +X_breast_cancer, y_breast_cancer = load_breast_cancer(return_X_y=True) +X_breast_cancer = scale(X_breast_cancer) @pytest.mark.parametrize( @@ -434,10 +436,8 @@ def test_stacking_classifier_stratify_default(): final_estimator=LogisticRegression(), cv=KFold(shuffle=True, random_state=42), ), - *[ - scale(data) if idx == 0 else data - for idx, data in list(enumerate(load_breast_cancer(return_X_y=True))) - ], + X_breast_cancer, + y_breast_cancer, ), ( StackingRegressor( @@ -500,10 +500,8 @@ def test_stacking_classifier_sample_weight_fit_param(): ], final_estimator=LogisticRegression(), ), - *[ - scale(data) if idx == 0 else data - for idx, data in list(enumerate(load_breast_cancer(return_X_y=True))) - ], + X_breast_cancer, + y_breast_cancer, ), ( StackingRegressor(