From 185f96e4de60a097cc9d6921b03701e6b877327f Mon Sep 17 00:00:00 2001 From: Jose Ortiz Date: Tue, 21 Mar 2023 10:17:43 -0700 Subject: [PATCH 1/6] [MRG] LogisticRegression: Check if l1_ratio is None when using ElasticNet LogisticRegression with `penalty=elasticnet` requires the user specify the `l1_ratio` between the L1 and L2 penalties, but the code currently does not check for this. Currently, an unhelpful error stating "unsupported operand type(s) for -: 'int' and 'NoneType'" is raised when `fold_coefs_` is calculated later in `fit()`. With the proposed change, it will be clear to the user that the error was not specifying `l1_ratio`. --- sklearn/linear_model/_logistic.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index 6680d60cb4b1c..3390c4adf000b 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -1168,6 +1168,9 @@ def fit(self, X, y, sample_weight=None): "(penalty={})".format(self.penalty) ) + if self.penalty == "elasticnet" and self.l1_ratio is None: + raise ValueError("l1_ratio must be specified when penalty is elasticnet.") + # TODO(1.4): Remove "none" option if self.penalty == "none": warnings.warn( From fdb3aff5112695f43a1f0206dce3b4637bc0c51b Mon Sep 17 00:00:00 2001 From: Joey Ortiz Date: Tue, 21 Mar 2023 11:38:44 -0700 Subject: [PATCH 2/6] Added test to check that error message is helpful This test checks that the error that occurs when l1_ratio=None and penalty='elasticnet' is helpful, meaning it contains the word 'l1_ratio'. The error message for LogisticRegressionCV was already helpful: `ValueError: l1_ratios must be a list of numbers between 0 and 1; got (l1_ratios=None)` With the previous commit, the error message for LogisticRegression will be, too. --- sklearn/linear_model/tests/test_logistic.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 4353ba2388014..f680ad4de529c 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -226,6 +226,16 @@ def test_check_solver_option(LR): with pytest.raises(ValueError, match=msg): lr.fit(X, y) +@pytest.mark.parametrize("LR", [LogisticRegression, LogisticRegressionCV]) +def test_elasticnet_l1_ratio_none_err_helpful(LR): + """Test that the error that occurs when l1_ratio=None and penalty='elasticnet' + is helpful, meaning it contains the word 'l1_ratio'.""" + # Check that the error message contains the word 'l1_ratio'. + with pytest.raises(Exception, match=r".*l1_ratio.*"): + # Perform a simple LogisticRegression to trigger the error. + model = LR(penalty="elasticnet", l1_ratio=None, solver="saga") + model.fit(np.array([[1, 2], [3, 4]]), np.array([0, 1])) + @pytest.mark.parametrize("solver", ["lbfgs", "newton-cg", "sag", "saga"]) def test_multinomial_binary(solver): From fce348e5baeee941c4c7f533d263e12332fcd4d2 Mon Sep 17 00:00:00 2001 From: Joey Ortiz Date: Tue, 21 Mar 2023 11:41:36 -0700 Subject: [PATCH 3/6] Added a blank line to match the code formatting Added a blank line to match the formatting of the document, two blank lines between every test. --- sklearn/linear_model/tests/test_logistic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index f680ad4de529c..46d345f004c2f 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -226,6 +226,7 @@ def test_check_solver_option(LR): with pytest.raises(ValueError, match=msg): lr.fit(X, y) + @pytest.mark.parametrize("LR", [LogisticRegression, LogisticRegressionCV]) def test_elasticnet_l1_ratio_none_err_helpful(LR): """Test that the error that occurs when l1_ratio=None and penalty='elasticnet' From c94fe0138b97f3b811c4a3a909b37d1f4f8a0036 Mon Sep 17 00:00:00 2001 From: Joey Ortiz Date: Tue, 21 Mar 2023 11:57:21 -0700 Subject: [PATCH 4/6] Try to fix linter fail that says file would be reformatted --- sklearn/linear_model/tests/test_logistic.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 46d345f004c2f..1447765f6ef02 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -228,9 +228,7 @@ def test_check_solver_option(LR): @pytest.mark.parametrize("LR", [LogisticRegression, LogisticRegressionCV]) -def test_elasticnet_l1_ratio_none_err_helpful(LR): - """Test that the error that occurs when l1_ratio=None and penalty='elasticnet' - is helpful, meaning it contains the word 'l1_ratio'.""" +def test_elasticnet_l1_ratio_err_helpful(LR): # Check that the error message contains the word 'l1_ratio'. with pytest.raises(Exception, match=r".*l1_ratio.*"): # Perform a simple LogisticRegression to trigger the error. From 2e94e3ac5849d9c28a21a239c6fb992fda459fd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?= <34657725+jeremiedbb@users.noreply.github.com> Date: Wed, 22 Mar 2023 15:56:36 +0100 Subject: [PATCH 5/6] Update sklearn/linear_model/tests/test_logistic.py --- sklearn/linear_model/tests/test_logistic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 1447765f6ef02..ed4e31537f37c 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -229,10 +229,10 @@ def test_check_solver_option(LR): @pytest.mark.parametrize("LR", [LogisticRegression, LogisticRegressionCV]) def test_elasticnet_l1_ratio_err_helpful(LR): - # Check that the error message contains the word 'l1_ratio'. - with pytest.raises(Exception, match=r".*l1_ratio.*"): - # Perform a simple LogisticRegression to trigger the error. - model = LR(penalty="elasticnet", l1_ratio=None, solver="saga") + # Check that an informative error message is raised when penalty="elasticnet" + # but l1_ratio is not specified. + model = LR(penalty="elasticnet") + with pytest.raises(Exception, match="l1_ratio must be specified"): model.fit(np.array([[1, 2], [3, 4]]), np.array([0, 1])) From ac6aa63e4043e4c8abaacb385db36f2ac8de5b37 Mon Sep 17 00:00:00 2001 From: jeremiedbb Date: Wed, 22 Mar 2023 16:15:28 +0100 Subject: [PATCH 6/6] cln --- sklearn/linear_model/tests/test_logistic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index ed4e31537f37c..c3f439bd4f150 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -231,8 +231,8 @@ def test_check_solver_option(LR): def test_elasticnet_l1_ratio_err_helpful(LR): # Check that an informative error message is raised when penalty="elasticnet" # but l1_ratio is not specified. - model = LR(penalty="elasticnet") - with pytest.raises(Exception, match="l1_ratio must be specified"): + model = LR(penalty="elasticnet", solver="saga") + with pytest.raises(ValueError, match=r".*l1_ratio.*"): model.fit(np.array([[1, 2], [3, 4]]), np.array([0, 1]))