From 6877342824457851f35cd6d386fa58a9d3fba6f5 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Sat, 2 Mar 2019 00:14:54 +0100 Subject: [PATCH 01/26] ridge_regression: fix selecting solver in 'auto' mode --- sklearn/linear_model/ridge.py | 32 +++++++++++++++--------- sklearn/linear_model/tests/test_ridge.py | 22 ++++++++++++++++ 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index e1fc9b42438e4..279099befb4b7 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -368,11 +368,26 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto', return_n_iter=False, return_intercept=False, X_scale=None, X_offset=None): + has_sw = sample_weight is not None + + def _select_auto_mode(): + if return_intercept: + # only sag and saga support fitting intercept directly + return "sag" + if has_sw: + # this should be changed since all solvers support sample_weights + return "cholesky" + if sparse.issparse(X): + return "sparse_cg" + return "cholesky" + + if solver == 'auto': + solver = _select_auto_mode() + if return_intercept and sparse.issparse(X) and solver != 'sag': - if solver != 'auto': - warnings.warn("In Ridge, only 'sag' solver can currently fit the " - "intercept when X is sparse. Solver has been " - "automatically changed into 'sag'.") + warnings.warn("In Ridge, only 'sag' solver can currently fit the " + "intercept. Solver has been " + "automatically changed into 'sag'.") solver = 'sag' _dtype = [np.float64, np.float32] @@ -404,14 +419,7 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto', raise ValueError("Number of samples in X and y does not correspond:" " %d != %d" % (n_samples, n_samples_)) - has_sw = sample_weight is not None - if solver == 'auto': - # cholesky if it's a dense array and cg in any other case - if not sparse.issparse(X) or has_sw: - solver = 'cholesky' - else: - solver = 'sparse_cg' if has_sw: if np.atleast_1d(sample_weight).ndim > 1: @@ -555,7 +563,7 @@ def fit(self, X, y, sample_weight=None): # add the offset which was subtracted by _preprocess_data self.intercept_ += y_offset else: - if sparse.issparse(X): + if sparse.issparse(X) and self.solver == 'sparse_cg': # required to fit intercept with sparse_cg solver params = {'X_offset': X_offset, 'X_scale': X_scale} else: diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 4a0bd1d86e2ef..c824aec1d8b6e 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -837,6 +837,28 @@ def test_ridge_fit_intercept_sparse(): assert_array_almost_equal(dense.coef_, sparse.coef_) +@pytest.mark.parametrize('return_intercept', [False, True]) +@pytest.mark.parametrize('sample_weight', [None, np.ones(1000)]) +@pytest.mark.parametrize('arr_type', [np.array, sp.csr_matrix]) +def test_ridge_check_auto_modes(return_intercept, sample_weight, arr_type): + X = np.random.rand(1000, 3) + true_coefs = [1, 2, 0.1] + y = np.dot(X, true_coefs) + X_testing = arr_type(X) + + target = ridge_regression(X_testing, y, 1, + solver='auto', + sample_weight=sample_weight, + return_intercept=return_intercept + ) + try: + coef, intercept = target + assert_array_almost_equal(coef, true_coefs, decimal=1) + assert_array_almost_equal(intercept, 0, decimal=1) + except ValueError: + assert_array_almost_equal(target, true_coefs, decimal=1) + + def test_errors_and_values_helper(): ridgecv = _RidgeGCV() rng = check_random_state(42) From 39a387b47c5d46982f9ff250a2a2386feeb11ef1 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Sat, 2 Mar 2019 00:30:50 +0100 Subject: [PATCH 02/26] test for return_intercept=True and solver specified --- sklearn/linear_model/ridge.py | 9 +++++++-- sklearn/linear_model/tests/test_ridge.py | 21 +++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index 279099befb4b7..70b2d903b590b 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -384,12 +384,19 @@ def _select_auto_mode(): if solver == 'auto': solver = _select_auto_mode() + if solver not in ('sparse_cg', 'cholesky', 'svd', 'lsqr', 'sag', 'saga'): + raise ValueError('Solver %s not understood' % solver) + if return_intercept and sparse.issparse(X) and solver != 'sag': warnings.warn("In Ridge, only 'sag' solver can currently fit the " "intercept. Solver has been " "automatically changed into 'sag'.") solver = 'sag' + if return_intercept and solver not in ['sag', 'saga']: + raise ValueError("return_intercept=True is only supported with sag " + "and saga solvers") + _dtype = [np.float64, np.float32] # SAG needs X and y columns to be C-contiguous and np.float64 @@ -440,8 +447,6 @@ def _select_auto_mode(): if alpha.size == 1 and n_targets > 1: alpha = np.repeat(alpha, n_targets) - if solver not in ('sparse_cg', 'cholesky', 'svd', 'lsqr', 'sag', 'saga'): - raise ValueError('Solver %s not understood' % solver) n_iter = None if solver == 'sparse_cg': diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index c824aec1d8b6e..856945b3d0603 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -859,6 +859,27 @@ def test_ridge_check_auto_modes(return_intercept, sample_weight, arr_type): assert_array_almost_equal(target, true_coefs, decimal=1) +def test_ridge_regression_fail_with_return_intercept(): + # return_itercept is only supported by sag/saga + + X, y = make_regression(n_samples=1000, n_features=2, n_informative=2, + bias=10., random_state=42) + + for solver in ['sparse_cg', 'cholesky', 'svd', 'lsqr']: + with pytest.raises(ValueError, match="return_intercept=True is only"): + target = ridge_regression(X, y, 1, + solver=solver, + return_intercept=True + ) + + for solver in ['saga', 'sag']: + target = ridge_regression(X, y, 1, + solver=solver, + return_intercept=True + ) + assert len(target) == 2 + assert target[0].shape == (2,) + def test_errors_and_values_helper(): ridgecv = _RidgeGCV() rng = check_random_state(42) From 62afd26951c11eee014f2e6a444a1b779a711bc4 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Sat, 2 Mar 2019 00:33:35 +0100 Subject: [PATCH 03/26] fix linting issue --- sklearn/linear_model/tests/test_ridge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 856945b3d0603..52f5ed2637383 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -876,7 +876,7 @@ def test_ridge_regression_fail_with_return_intercept(): target = ridge_regression(X, y, 1, solver=solver, return_intercept=True - ) + ) assert len(target) == 2 assert target[0].shape == (2,) From d3e085f38aa7da521a18a169b33a6ae96e434457 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Sat, 2 Mar 2019 00:49:42 +0100 Subject: [PATCH 04/26] change solver and issue warning instead of raising an exception --- sklearn/linear_model/ridge.py | 6 +----- sklearn/linear_model/tests/test_ridge.py | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index 70b2d903b590b..a4f38d5e32e35 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -387,16 +387,12 @@ def _select_auto_mode(): if solver not in ('sparse_cg', 'cholesky', 'svd', 'lsqr', 'sag', 'saga'): raise ValueError('Solver %s not understood' % solver) - if return_intercept and sparse.issparse(X) and solver != 'sag': + if return_intercept and solver != 'sag': warnings.warn("In Ridge, only 'sag' solver can currently fit the " "intercept. Solver has been " "automatically changed into 'sag'.") solver = 'sag' - if return_intercept and solver not in ['sag', 'saga']: - raise ValueError("return_intercept=True is only supported with sag " - "and saga solvers") - _dtype = [np.float64, np.float32] # SAG needs X and y columns to be C-contiguous and np.float64 diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 52f5ed2637383..0a4b992b072b2 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -865,21 +865,29 @@ def test_ridge_regression_fail_with_return_intercept(): X, y = make_regression(n_samples=1000, n_features=2, n_informative=2, bias=10., random_state=42) - for solver in ['sparse_cg', 'cholesky', 'svd', 'lsqr']: - with pytest.raises(ValueError, match="return_intercept=True is only"): + for solver in ['sparse_cg', 'cholesky', 'svd', 'lsqr', 'saga']: + with pytest.warns(UserWarning) as record: target = ridge_regression(X, y, 1, solver=solver, return_intercept=True ) + assert len(target) == 2 + assert target[0].shape == (2,) - for solver in ['saga', 'sag']: + for r in record: + r.message.args[0].startswith("return_intercept=True is only") + + with pytest.warns(None) as record: target = ridge_regression(X, y, 1, - solver=solver, + solver="sag", return_intercept=True ) assert len(target) == 2 assert target[0].shape == (2,) + # no warning should be raised + assert len(record) == 0 + def test_errors_and_values_helper(): ridgecv = _RidgeGCV() rng = check_random_state(42) From 596ae7f79236fa311adcd2c82584a2f33c2e13c7 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Sat, 2 Mar 2019 00:52:19 +0100 Subject: [PATCH 05/26] fix comment --- sklearn/linear_model/ridge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index a4f38d5e32e35..24af1792f63a5 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -372,7 +372,7 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto', def _select_auto_mode(): if return_intercept: - # only sag and saga support fitting intercept directly + # only sag supports fitting intercept directly return "sag" if has_sw: # this should be changed since all solvers support sample_weights From b0bfbe1ea5e7d73b8fcfb2b977f5dd91d0005b99 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Sat, 2 Mar 2019 00:59:57 +0100 Subject: [PATCH 06/26] run the return_intercept test on all solvers --- sklearn/linear_model/tests/test_ridge.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 0a4b992b072b2..6032dd206a39c 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -840,14 +840,22 @@ def test_ridge_fit_intercept_sparse(): @pytest.mark.parametrize('return_intercept', [False, True]) @pytest.mark.parametrize('sample_weight', [None, np.ones(1000)]) @pytest.mark.parametrize('arr_type', [np.array, sp.csr_matrix]) -def test_ridge_check_auto_modes(return_intercept, sample_weight, arr_type): +@pytest.mark.parametrize('solver', ['auto', 'sparse_cg', 'cholesky', 'lsqr', + 'sag', 'saga']) +def test_ridge_regression_check_arguments_validity(return_intercept, + sample_weight, arr_type, + solver): + """check if all combinations of arguments give valid estimations""" + + # test excludes 'svd' solver because it raises exception for sparse inputs + X = np.random.rand(1000, 3) true_coefs = [1, 2, 0.1] y = np.dot(X, true_coefs) X_testing = arr_type(X) target = ridge_regression(X_testing, y, 1, - solver='auto', + solver=solver, sample_weight=sample_weight, return_intercept=return_intercept ) From fdfbc50f32eb97d3c3f2d724cc84d3b9a2c25fbf Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Sat, 2 Mar 2019 01:02:15 +0100 Subject: [PATCH 07/26] change test name --- sklearn/linear_model/tests/test_ridge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 6032dd206a39c..81f8ee2bb559c 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -867,7 +867,7 @@ def test_ridge_regression_check_arguments_validity(return_intercept, assert_array_almost_equal(target, true_coefs, decimal=1) -def test_ridge_regression_fail_with_return_intercept(): +def test_ridge_regression_warns_with_return_intercept(): # return_itercept is only supported by sag/saga X, y = make_regression(n_samples=1000, n_features=2, n_informative=2, From 73bbd9106419476c6cdecc054630334cc82391fe Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Sun, 3 Mar 2019 00:21:47 +0100 Subject: [PATCH 08/26] inline select auto mode --- sklearn/linear_model/ridge.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index 24af1792f63a5..dda7b0aceff2a 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -370,19 +370,17 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto', has_sw = sample_weight is not None - def _select_auto_mode(): + if solver == 'auto': if return_intercept: # only sag supports fitting intercept directly - return "sag" - if has_sw: + solver = "sag" + elif has_sw: # this should be changed since all solvers support sample_weights - return "cholesky" - if sparse.issparse(X): - return "sparse_cg" - return "cholesky" - - if solver == 'auto': - solver = _select_auto_mode() + solver = "cholesky" + elif sparse.issparse(X): + solver = "sparse_cg" + else: + solver = "cholesky" if solver not in ('sparse_cg', 'cholesky', 'svd', 'lsqr', 'sag', 'saga'): raise ValueError('Solver %s not understood' % solver) From 70f7560b3ad02e61ca5e77a1bbd03546ab67e40c Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Sun, 3 Mar 2019 00:36:22 +0100 Subject: [PATCH 09/26] update whats new doc --- doc/whats_new/v0.21.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index b2df99d9a131b..d46d412603e69 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -177,9 +177,9 @@ Support for Python 3.4 and below has been officially dropped. - |Enhancement| Minimized the validation of X in :class:`ensemble.AdaBoostClassifier` and :class:`ensemble.AdaBoostRegressor` :issue:`13174` by :user:`Christos Aridas `. - + - |Enhancement| :class:`ensemble.IsolationForest` now exposes ``warm_start`` - parameter, allowing iterative addition of trees to an isolation + parameter, allowing iterative addition of trees to an isolation forest. :issue:`13496` by :user:`Peter Marko `. - |Efficiency| Make :class:`ensemble.IsolationForest` more memory efficient @@ -340,6 +340,11 @@ Support for Python 3.4 and below has been officially dropped. deterministic when trained in a multi-class setting on several threads. :issue:`13422` by :user:`Clément Doumouro `. +- |Fix| Fixed bug in :func:`linear_model.ridge.ridge_regression` that + caused unhandled exception for arguments ``return_intercept=True`` and + ``solver=auto`` (default) or any other solver different from ``sag``. + :issue:`13363` by :user:`Bartosz Telenczuk ` + :mod:`sklearn.manifold` ............................ From b494e76f89ce84cc3e6e8c3666d552deecc75d1d Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Wed, 6 Mar 2019 22:55:08 +0100 Subject: [PATCH 10/26] remove try... except... --- sklearn/linear_model/tests/test_ridge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 81f8ee2bb559c..7864f9ae5061c 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -859,11 +859,11 @@ def test_ridge_regression_check_arguments_validity(return_intercept, sample_weight=sample_weight, return_intercept=return_intercept ) - try: + if return_intercept: coef, intercept = target assert_array_almost_equal(coef, true_coefs, decimal=1) assert_array_almost_equal(intercept, 0, decimal=1) - except ValueError: + else: assert_array_almost_equal(target, true_coefs, decimal=1) From 3d44669808e116d34bf33b9712bc0f10c04ae063 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Wed, 6 Mar 2019 22:59:30 +0100 Subject: [PATCH 11/26] update error message --- sklearn/linear_model/ridge.py | 3 ++- sklearn/linear_model/tests/test_ridge.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index dda7b0aceff2a..53265ae2d42a7 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -383,7 +383,8 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto', solver = "cholesky" if solver not in ('sparse_cg', 'cholesky', 'svd', 'lsqr', 'sag', 'saga'): - raise ValueError('Solver %s not understood' % solver) + raise ValueError("Known solver are 'sparse_cg', 'cholesky', 'svd'" + " 'lsqr', 'sag' or 'saga'. Got %s." % solver) if return_intercept and solver != 'sag': warnings.warn("In Ridge, only 'sag' solver can currently fit the " diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 7864f9ae5061c..d0af6a246c2a3 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -778,7 +778,8 @@ def test_raises_value_error_if_solver_not_supported(): wrong_solver = "This is not a solver (MagritteSolveCV QuantumBitcoin)" exception = ValueError - message = "Solver %s not understood" % wrong_solver + message = ("Known solver are 'sparse_cg', 'cholesky', 'svd'" + " 'lsqr', 'sag' or 'saga'. Got %s." % wrong_solver) def func(): X = np.eye(3) From 9848429ae67d6f173a25aa64d3f72aea05f3644f Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Wed, 13 Mar 2019 00:56:22 +0100 Subject: [PATCH 12/26] fix typo --- sklearn/linear_model/ridge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index 53265ae2d42a7..64c26f1b708ee 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -383,7 +383,7 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto', solver = "cholesky" if solver not in ('sparse_cg', 'cholesky', 'svd', 'lsqr', 'sag', 'saga'): - raise ValueError("Known solver are 'sparse_cg', 'cholesky', 'svd'" + raise ValueError("Known solvers are 'sparse_cg', 'cholesky', 'svd'" " 'lsqr', 'sag' or 'saga'. Got %s." % solver) if return_intercept and solver != 'sag': From 81573aaa39f1c6c2689209088e40ccc838c0ba74 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Fri, 15 Mar 2019 13:00:26 +0100 Subject: [PATCH 13/26] fix test (didn't update error message) --- sklearn/linear_model/tests/test_ridge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index d0af6a246c2a3..3a9394d6c7711 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -778,7 +778,7 @@ def test_raises_value_error_if_solver_not_supported(): wrong_solver = "This is not a solver (MagritteSolveCV QuantumBitcoin)" exception = ValueError - message = ("Known solver are 'sparse_cg', 'cholesky', 'svd'" + message = ("Known solvers are 'sparse_cg', 'cholesky', 'svd'" " 'lsqr', 'sag' or 'saga'. Got %s." % wrong_solver) def func(): From a2473ff21c287840b17b12f76f5522e475204849 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Thu, 21 Mar 2019 00:48:10 +0100 Subject: [PATCH 14/26] match warning --- sklearn/linear_model/tests/test_ridge.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 3a9394d6c7711..33823ef5b3fff 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -875,7 +875,7 @@ def test_ridge_regression_warns_with_return_intercept(): bias=10., random_state=42) for solver in ['sparse_cg', 'cholesky', 'svd', 'lsqr', 'saga']: - with pytest.warns(UserWarning) as record: + with pytest.warns(UserWarning, match="In Ridge, only 'sag' solver"): target = ridge_regression(X, y, 1, solver=solver, return_intercept=True @@ -883,9 +883,6 @@ def test_ridge_regression_warns_with_return_intercept(): assert len(target) == 2 assert target[0].shape == (2,) - for r in record: - r.message.args[0].startswith("return_intercept=True is only") - with pytest.warns(None) as record: target = ridge_regression(X, y, 1, solver="sag", From 10c116cfde3ef258c34ad6d7b1518fbd9599188f Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Thu, 21 Mar 2019 01:18:30 +0100 Subject: [PATCH 15/26] change the auto selection sequence --- doc/whats_new/v0.21.rst | 5 +++++ sklearn/linear_model/ridge.py | 7 ++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index d46d412603e69..e3d9129229096 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -345,6 +345,11 @@ Support for Python 3.4 and below has been officially dropped. ``solver=auto`` (default) or any other solver different from ``sag``. :issue:`13363` by :user:`Bartosz Telenczuk ` +- |API| :func:`linear_model.ridge.ridge_regression` will choose ``sparse_cg`` + solver for sparse inputs when ``solver=auto`` and ``sample_weight`` + is provided (previously `cholesky` solver was selected). :issue:`13363` + by :user:`Bartosz Telenczuk ` + :mod:`sklearn.manifold` ............................ diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index 64c26f1b708ee..2a37c1a9e6964 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -374,13 +374,10 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto', if return_intercept: # only sag supports fitting intercept directly solver = "sag" - elif has_sw: - # this should be changed since all solvers support sample_weights + elif not sparse.issparse(X): solver = "cholesky" - elif sparse.issparse(X): - solver = "sparse_cg" else: - solver = "cholesky" + solver = "sparse_cg" if solver not in ('sparse_cg', 'cholesky', 'svd', 'lsqr', 'sag', 'saga'): raise ValueError("Known solvers are 'sparse_cg', 'cholesky', 'svd'" From 05ba6272635d45c0fb77c22e0201e447a3c1e512 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Thu, 21 Mar 2019 01:18:45 +0100 Subject: [PATCH 16/26] use assert_allclose --- sklearn/linear_model/tests/test_ridge.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 33823ef5b3fff..d3b5d542bdff7 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -7,6 +7,7 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_almost_equal +from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_greater @@ -862,10 +863,10 @@ def test_ridge_regression_check_arguments_validity(return_intercept, ) if return_intercept: coef, intercept = target - assert_array_almost_equal(coef, true_coefs, decimal=1) - assert_array_almost_equal(intercept, 0, decimal=1) + assert_allclose(coef, true_coefs, atol=0.1) + assert_allclose(intercept, 0, atol=0.1) else: - assert_array_almost_equal(target, true_coefs, decimal=1) + assert_allclose(target, true_coefs, atol=0.1) def test_ridge_regression_warns_with_return_intercept(): From 90a7e564e9c23d567c2cbfbb39388de3e3d9be87 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Thu, 21 Mar 2019 14:13:56 +0100 Subject: [PATCH 17/26] changed atol to 0.03 --- sklearn/linear_model/tests/test_ridge.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index d3b5d542bdff7..7e51a8e50b781 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -863,10 +863,10 @@ def test_ridge_regression_check_arguments_validity(return_intercept, ) if return_intercept: coef, intercept = target - assert_allclose(coef, true_coefs, atol=0.1) - assert_allclose(intercept, 0, atol=0.1) + assert_allclose(coef, true_coefs, atol=0.03) + assert_allclose(intercept, 0, atol=0.03) else: - assert_allclose(target, true_coefs, atol=0.1) + assert_allclose(target, true_coefs, atol=0.03) def test_ridge_regression_warns_with_return_intercept(): From 97a532663b52f89ab91998cb509fe6671234fa60 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Tue, 9 Apr 2019 14:30:33 +0200 Subject: [PATCH 18/26] added rtol=0 --- sklearn/linear_model/tests/test_ridge.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 7e51a8e50b781..d0aa9d02748e7 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -863,10 +863,10 @@ def test_ridge_regression_check_arguments_validity(return_intercept, ) if return_intercept: coef, intercept = target - assert_allclose(coef, true_coefs, atol=0.03) - assert_allclose(intercept, 0, atol=0.03) + assert_allclose(coef, true_coefs, rtol=0, atol=0.03) + assert_allclose(intercept, 0, rtol=0, atol=0.03) else: - assert_allclose(target, true_coefs, atol=0.03) + assert_allclose(target, true_coefs, rtol=0, atol=0.03) def test_ridge_regression_warns_with_return_intercept(): From 06190130db5c4713f5c26f22dc6dc322739768b6 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Wed, 17 Apr 2019 01:18:19 +0200 Subject: [PATCH 19/26] use fixed random_state --- sklearn/linear_model/tests/test_ridge.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index d0aa9d02748e7..fc4bdc417dc5e 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -851,7 +851,8 @@ def test_ridge_regression_check_arguments_validity(return_intercept, # test excludes 'svd' solver because it raises exception for sparse inputs - X = np.random.rand(1000, 3) + rng = check_random_state(42) + X = rng.rand(1000, 3) true_coefs = [1, 2, 0.1] y = np.dot(X, true_coefs) X_testing = arr_type(X) From 64d21df952117d0be4981671e275e9733bc3a444 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Wed, 17 Apr 2019 01:30:20 +0200 Subject: [PATCH 20/26] update whats new --- doc/whats_new/v0.21.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index e3d9129229096..b00ac7f39bc07 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -340,7 +340,9 @@ Support for Python 3.4 and below has been officially dropped. deterministic when trained in a multi-class setting on several threads. :issue:`13422` by :user:`Clément Doumouro `. -- |Fix| Fixed bug in :func:`linear_model.ridge.ridge_regression` that +- |Fix| Fixed bug in :func:`linear_model.ridge.ridge_regression`, + :class:`linear_model.ridge.Ridge` and + :class:`linear_model.ridge.ridge.RidgeClassifier` that caused unhandled exception for arguments ``return_intercept=True`` and ``solver=auto`` (default) or any other solver different from ``sag``. :issue:`13363` by :user:`Bartosz Telenczuk ` From 1d7ca02eea93c7963936ae6f14e53f7bccfd7562 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Wed, 17 Apr 2019 01:30:34 +0200 Subject: [PATCH 21/26] currently -> directly --- sklearn/linear_model/ridge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index 2a37c1a9e6964..4fe89e1d5ea1e 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -384,7 +384,7 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto', " 'lsqr', 'sag' or 'saga'. Got %s." % solver) if return_intercept and solver != 'sag': - warnings.warn("In Ridge, only 'sag' solver can currently fit the " + warnings.warn("In Ridge, only 'sag' solver can directly fit the " "intercept. Solver has been " "automatically changed into 'sag'.") solver = 'sag' From 604f7d10f66247dd291f1d2b71654f349b9edd2d Mon Sep 17 00:00:00 2001 From: Alexandre Gramfort Date: Wed, 17 Apr 2019 13:51:26 +0200 Subject: [PATCH 22/26] use more strict test --- sklearn/linear_model/tests/test_ridge.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index fc4bdc417dc5e..c8c3b08abb2ba 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -855,19 +855,26 @@ def test_ridge_regression_check_arguments_validity(return_intercept, X = rng.rand(1000, 3) true_coefs = [1, 2, 0.1] y = np.dot(X, true_coefs) + intercept = 0. + if return_intercept: + intercept = 10000. + y += intercept X_testing = arr_type(X) - target = ridge_regression(X_testing, y, 1, + alpha, atol, tol = 1e-3, 1e-4, 1e-6 + target = ridge_regression(X_testing, y, alpha=alpha, solver=solver, sample_weight=sample_weight, - return_intercept=return_intercept + return_intercept=return_intercept, + tol=tol, ) + if return_intercept: coef, intercept = target - assert_allclose(coef, true_coefs, rtol=0, atol=0.03) - assert_allclose(intercept, 0, rtol=0, atol=0.03) + assert_allclose(coef, true_coefs, rtol=0, atol=atol) + assert_allclose(intercept, intercept, rtol=0, atol=atol) else: - assert_allclose(target, true_coefs, rtol=0, atol=0.03) + assert_allclose(target, true_coefs, rtol=0, atol=atol) def test_ridge_regression_warns_with_return_intercept(): From 79d04d6b61527cd3749b3550dbea1390df22b4c1 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Wed, 17 Apr 2019 22:35:17 +0200 Subject: [PATCH 23/26] fix test for true_intercept --- sklearn/linear_model/tests/test_ridge.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index c8c3b08abb2ba..e9c033e0b74b6 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -855,26 +855,26 @@ def test_ridge_regression_check_arguments_validity(return_intercept, X = rng.rand(1000, 3) true_coefs = [1, 2, 0.1] y = np.dot(X, true_coefs) - intercept = 0. + true_intercept = 0. if return_intercept: - intercept = 10000. - y += intercept + true_intercept = 10000. + y += true_intercept X_testing = arr_type(X) alpha, atol, tol = 1e-3, 1e-4, 1e-6 - target = ridge_regression(X_testing, y, alpha=alpha, - solver=solver, - sample_weight=sample_weight, - return_intercept=return_intercept, - tol=tol, - ) + out = ridge_regression(X_testing, y, alpha=alpha, + solver=solver, + sample_weight=sample_weight, + return_intercept=return_intercept, + tol=tol, + ) if return_intercept: - coef, intercept = target + coef, intercept = out assert_allclose(coef, true_coefs, rtol=0, atol=atol) - assert_allclose(intercept, intercept, rtol=0, atol=atol) + assert_allclose(intercept, true_intercept, rtol=0, atol=atol) else: - assert_allclose(target, true_coefs, rtol=0, atol=atol) + assert_allclose(out, true_coefs, rtol=0, atol=atol) def test_ridge_regression_warns_with_return_intercept(): From 9e1aefbb393ff32ac269ead62a4af480584a18f3 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Wed, 17 Apr 2019 22:45:25 +0200 Subject: [PATCH 24/26] changed warning to exception --- sklearn/linear_model/ridge.py | 7 ++-- sklearn/linear_model/tests/test_ridge.py | 45 +++++++++++++----------- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index 4fe89e1d5ea1e..625f534779d9d 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -384,10 +384,9 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto', " 'lsqr', 'sag' or 'saga'. Got %s." % solver) if return_intercept and solver != 'sag': - warnings.warn("In Ridge, only 'sag' solver can directly fit the " - "intercept. Solver has been " - "automatically changed into 'sag'.") - solver = 'sag' + raise ValueError("In Ridge, only 'sag' solver can directly fit the " + "intercept. Solver has been " + "automatically changed into 'sag'.") _dtype = [np.float64, np.float32] diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index e9c033e0b74b6..3a2497c1d0cda 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -834,9 +834,7 @@ def test_ridge_fit_intercept_sparse(): # test the solver switch and the corresponding warning for solver in ['saga', 'lsqr']: sparse = Ridge(alpha=1., tol=1.e-15, solver=solver, fit_intercept=True) - assert_warns(UserWarning, sparse.fit, X_csr, y) - assert_almost_equal(dense.intercept_, sparse.intercept_) - assert_array_almost_equal(dense.coef_, sparse.coef_) + assert_raises_regex(ValueError, "In Ridge,", sparse.fit, X_csr, y) @pytest.mark.parametrize('return_intercept', [False, True]) @@ -862,6 +860,18 @@ def test_ridge_regression_check_arguments_validity(return_intercept, X_testing = arr_type(X) alpha, atol, tol = 1e-3, 1e-4, 1e-6 + + if solver not in ['sag', 'auto'] and return_intercept: + assert_raises_regex(ValueError, + "In Ridge, only 'sag' solver", + ridge_regression, X_testing, y, + alpha=alpha, + solver=solver, + sample_weight=sample_weight, + return_intercept=return_intercept, + tol=tol) + return + out = ridge_regression(X_testing, y, alpha=alpha, solver=solver, sample_weight=sample_weight, @@ -884,24 +894,17 @@ def test_ridge_regression_warns_with_return_intercept(): bias=10., random_state=42) for solver in ['sparse_cg', 'cholesky', 'svd', 'lsqr', 'saga']: - with pytest.warns(UserWarning, match="In Ridge, only 'sag' solver"): - target = ridge_regression(X, y, 1, - solver=solver, - return_intercept=True - ) - assert len(target) == 2 - assert target[0].shape == (2,) - - with pytest.warns(None) as record: - target = ridge_regression(X, y, 1, - solver="sag", - return_intercept=True - ) - assert len(target) == 2 - assert target[0].shape == (2,) - - # no warning should be raised - assert len(record) == 0 + + + assert_raises_regex(ValueError, + "In Ridge, only 'sag' solver", + ridge_regression, X, y, 1, solver=solver, + return_intercept=True) + + target = ridge_regression(X, y, 1, + solver="sag", + return_intercept=True + ) def test_errors_and_values_helper(): ridgecv = _RidgeGCV() From d4cd2804700bd4831be4ef69ace0ad25e2822859 Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Wed, 17 Apr 2019 22:49:24 +0200 Subject: [PATCH 25/26] remove redundant test (covered by test_ridge_regression_check_arguments_validity) --- sklearn/linear_model/tests/test_ridge.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 3a2497c1d0cda..7bfb617d4beff 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -887,25 +887,6 @@ def test_ridge_regression_check_arguments_validity(return_intercept, assert_allclose(out, true_coefs, rtol=0, atol=atol) -def test_ridge_regression_warns_with_return_intercept(): - # return_itercept is only supported by sag/saga - - X, y = make_regression(n_samples=1000, n_features=2, n_informative=2, - bias=10., random_state=42) - - for solver in ['sparse_cg', 'cholesky', 'svd', 'lsqr', 'saga']: - - - assert_raises_regex(ValueError, - "In Ridge, only 'sag' solver", - ridge_regression, X, y, 1, solver=solver, - return_intercept=True) - - target = ridge_regression(X, y, 1, - solver="sag", - return_intercept=True - ) - def test_errors_and_values_helper(): ridgecv = _RidgeGCV() rng = check_random_state(42) From 4ed33b5ec2254c569fedd0b84d70c03394a1339a Mon Sep 17 00:00:00 2001 From: Bartosz Telenczuk Date: Thu, 18 Apr 2019 00:46:16 +0200 Subject: [PATCH 26/26] updated exception message and added entry in whats new --- doc/whats_new/v0.21.rst | 4 ++++ sklearn/linear_model/ridge.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index b00ac7f39bc07..259b943b4273d 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -347,6 +347,10 @@ Support for Python 3.4 and below has been officially dropped. ``solver=auto`` (default) or any other solver different from ``sag``. :issue:`13363` by :user:`Bartosz Telenczuk ` +- |Fix| :func:`linear_model.ridge.ridge_regression` will now raise an exception + if ``return_intercept=True`` and solver is different from ``sag``. Previously, + only warning was issued. :issue:`13363` by :user:`Bartosz Telenczuk ` + - |API| :func:`linear_model.ridge.ridge_regression` will choose ``sparse_cg`` solver for sparse inputs when ``solver=auto`` and ``sample_weight`` is provided (previously `cholesky` solver was selected). :issue:`13363` diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index 625f534779d9d..c9ef770135ef3 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -385,8 +385,8 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto', if return_intercept and solver != 'sag': raise ValueError("In Ridge, only 'sag' solver can directly fit the " - "intercept. Solver has been " - "automatically changed into 'sag'.") + "intercept. Please change solver to 'sag' or set " + "return_intercept=False.") _dtype = [np.float64, np.float32]