diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index b2df99d9a131b..259b943b4273d 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -177,9 +177,9 @@ Support for Python 3.4 and below has been officially dropped. - |Enhancement| Minimized the validation of X in :class:`ensemble.AdaBoostClassifier` and :class:`ensemble.AdaBoostRegressor` :issue:`13174` by :user:`Christos Aridas `. - + - |Enhancement| :class:`ensemble.IsolationForest` now exposes ``warm_start`` - parameter, allowing iterative addition of trees to an isolation + parameter, allowing iterative addition of trees to an isolation forest. :issue:`13496` by :user:`Peter Marko `. - |Efficiency| Make :class:`ensemble.IsolationForest` more memory efficient @@ -340,6 +340,22 @@ Support for Python 3.4 and below has been officially dropped. deterministic when trained in a multi-class setting on several threads. :issue:`13422` by :user:`Clément Doumouro `. +- |Fix| Fixed bug in :func:`linear_model.ridge.ridge_regression`, + :class:`linear_model.ridge.Ridge` and + :class:`linear_model.ridge.ridge.RidgeClassifier` that + caused unhandled exception for arguments ``return_intercept=True`` and + ``solver=auto`` (default) or any other solver different from ``sag``. + :issue:`13363` by :user:`Bartosz Telenczuk ` + +- |Fix| :func:`linear_model.ridge.ridge_regression` will now raise an exception + if ``return_intercept=True`` and solver is different from ``sag``. Previously, + only warning was issued. :issue:`13363` by :user:`Bartosz Telenczuk ` + +- |API| :func:`linear_model.ridge.ridge_regression` will choose ``sparse_cg`` + solver for sparse inputs when ``solver=auto`` and ``sample_weight`` + is provided (previously `cholesky` solver was selected). :issue:`13363` + by :user:`Bartosz Telenczuk ` + :mod:`sklearn.manifold` ............................ diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index e1fc9b42438e4..c9ef770135ef3 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -368,12 +368,25 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto', return_n_iter=False, return_intercept=False, X_scale=None, X_offset=None): - if return_intercept and sparse.issparse(X) and solver != 'sag': - if solver != 'auto': - warnings.warn("In Ridge, only 'sag' solver can currently fit the " - "intercept when X is sparse. Solver has been " - "automatically changed into 'sag'.") - solver = 'sag' + has_sw = sample_weight is not None + + if solver == 'auto': + if return_intercept: + # only sag supports fitting intercept directly + solver = "sag" + elif not sparse.issparse(X): + solver = "cholesky" + else: + solver = "sparse_cg" + + if solver not in ('sparse_cg', 'cholesky', 'svd', 'lsqr', 'sag', 'saga'): + raise ValueError("Known solvers are 'sparse_cg', 'cholesky', 'svd'" + " 'lsqr', 'sag' or 'saga'. Got %s." % solver) + + if return_intercept and solver != 'sag': + raise ValueError("In Ridge, only 'sag' solver can directly fit the " + "intercept. Please change solver to 'sag' or set " + "return_intercept=False.") _dtype = [np.float64, np.float32] @@ -404,14 +417,7 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto', raise ValueError("Number of samples in X and y does not correspond:" " %d != %d" % (n_samples, n_samples_)) - has_sw = sample_weight is not None - if solver == 'auto': - # cholesky if it's a dense array and cg in any other case - if not sparse.issparse(X) or has_sw: - solver = 'cholesky' - else: - solver = 'sparse_cg' if has_sw: if np.atleast_1d(sample_weight).ndim > 1: @@ -432,8 +438,6 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto', if alpha.size == 1 and n_targets > 1: alpha = np.repeat(alpha, n_targets) - if solver not in ('sparse_cg', 'cholesky', 'svd', 'lsqr', 'sag', 'saga'): - raise ValueError('Solver %s not understood' % solver) n_iter = None if solver == 'sparse_cg': @@ -555,7 +559,7 @@ def fit(self, X, y, sample_weight=None): # add the offset which was subtracted by _preprocess_data self.intercept_ += y_offset else: - if sparse.issparse(X): + if sparse.issparse(X) and self.solver == 'sparse_cg': # required to fit intercept with sparse_cg solver params = {'X_offset': X_offset, 'X_scale': X_scale} else: diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 4a0bd1d86e2ef..7bfb617d4beff 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -7,6 +7,7 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_almost_equal +from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_greater @@ -778,7 +779,8 @@ def test_raises_value_error_if_solver_not_supported(): wrong_solver = "This is not a solver (MagritteSolveCV QuantumBitcoin)" exception = ValueError - message = "Solver %s not understood" % wrong_solver + message = ("Known solvers are 'sparse_cg', 'cholesky', 'svd'" + " 'lsqr', 'sag' or 'saga'. Got %s." % wrong_solver) def func(): X = np.eye(3) @@ -832,9 +834,57 @@ def test_ridge_fit_intercept_sparse(): # test the solver switch and the corresponding warning for solver in ['saga', 'lsqr']: sparse = Ridge(alpha=1., tol=1.e-15, solver=solver, fit_intercept=True) - assert_warns(UserWarning, sparse.fit, X_csr, y) - assert_almost_equal(dense.intercept_, sparse.intercept_) - assert_array_almost_equal(dense.coef_, sparse.coef_) + assert_raises_regex(ValueError, "In Ridge,", sparse.fit, X_csr, y) + + +@pytest.mark.parametrize('return_intercept', [False, True]) +@pytest.mark.parametrize('sample_weight', [None, np.ones(1000)]) +@pytest.mark.parametrize('arr_type', [np.array, sp.csr_matrix]) +@pytest.mark.parametrize('solver', ['auto', 'sparse_cg', 'cholesky', 'lsqr', + 'sag', 'saga']) +def test_ridge_regression_check_arguments_validity(return_intercept, + sample_weight, arr_type, + solver): + """check if all combinations of arguments give valid estimations""" + + # test excludes 'svd' solver because it raises exception for sparse inputs + + rng = check_random_state(42) + X = rng.rand(1000, 3) + true_coefs = [1, 2, 0.1] + y = np.dot(X, true_coefs) + true_intercept = 0. + if return_intercept: + true_intercept = 10000. + y += true_intercept + X_testing = arr_type(X) + + alpha, atol, tol = 1e-3, 1e-4, 1e-6 + + if solver not in ['sag', 'auto'] and return_intercept: + assert_raises_regex(ValueError, + "In Ridge, only 'sag' solver", + ridge_regression, X_testing, y, + alpha=alpha, + solver=solver, + sample_weight=sample_weight, + return_intercept=return_intercept, + tol=tol) + return + + out = ridge_regression(X_testing, y, alpha=alpha, + solver=solver, + sample_weight=sample_weight, + return_intercept=return_intercept, + tol=tol, + ) + + if return_intercept: + coef, intercept = out + assert_allclose(coef, true_coefs, rtol=0, atol=atol) + assert_allclose(intercept, true_intercept, rtol=0, atol=atol) + else: + assert_allclose(out, true_coefs, rtol=0, atol=atol) def test_errors_and_values_helper():