From d972da240324a70b8e7905a07c4c298de68a0ef7 Mon Sep 17 00:00:00 2001 From: Tom DLT Date: Wed, 7 Oct 2015 18:14:44 +0200 Subject: [PATCH 1/2] FIX temporary fix for sparse ridge with intercept fitting --- doc/whats_new.rst | 6 +++ sklearn/linear_model/ridge.py | 50 +++++++++++++++++++----- sklearn/linear_model/tests/test_ridge.py | 27 +++++++++++-- 3 files changed, 71 insertions(+), 12 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 57e0fec119c84..7dfe235a9f79e 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -263,6 +263,12 @@ Bug fixes - Fixed a bug in :class:`naive_bayes.GaussianNB` which caused classification results to depend on scale. By `Jake Vanderplas`_. + - Fixed temporarily :class:`linear_model.Ridge`, which was incorrect + when fitting the intercept in the case of sparse data. The fix + automatically changes the solver to 'sag' in this case. + (`#5360 `_) + By `Tom Dupre la Tour`_. + API changes summary ------------------- diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index 96c18a4e18073..104e175a334dc 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -194,7 +194,7 @@ def _solve_svd(X, y, alpha): def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', max_iter=None, tol=1e-3, verbose=0, random_state=None, - return_n_iter=False): + return_n_iter=False, return_intercept=False): """Solve the ridge equation by the method of normal equations. Read more in the :ref:`User Guide `. @@ -268,6 +268,11 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', If True, the method also returns `n_iter`, the actual number of iteration performed by the solver. + return_intercept : boolean, default False + If True, the method also returns the intercept, and the solver + is automatically changed to 'sag'. This is only a temporary fix + for fitting the intercept with sparse data. + Returns ------- coef : array, shape = [n_features] or [n_targets, n_features] @@ -281,6 +286,12 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', ----- This function won't compute the intercept. """ + if return_intercept and solver != 'sag': + warnings.warn("In Ridge, only 'sag' solver can currently fit the " + "intercept when X is sparse. Solver has been " + "automatically changed into 'sag'.") + solver = 'sag' + # SAG needs X and y columns to be C-contiguous and np.float64 if solver == 'sag': X = check_array(X, accept_sparse=['csr'], @@ -375,14 +386,22 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', coef = np.empty((y.shape[1], n_features)) n_iter = np.empty(y.shape[1], dtype=np.int32) + intercept = np.zeros((y.shape[1], )) for i, (alpha_i, target) in enumerate(zip(alpha, y.T)): + start = {'coef': np.zeros(n_features + int(return_intercept))} coef_, n_iter_, _ = sag_solver( X, target.ravel(), sample_weight, 'squared', alpha_i, max_iter, tol, verbose, random_state, False, max_squared_sum, - dict()) - coef[i] = coef_ + start) + if return_intercept: + coef[i] = coef_[:-1] + intercept[i] = coef_[-1] + else: + coef[i] = coef_ n_iter[i] = n_iter_ + if intercept.shape[0] == 1: + intercept = intercept[0] coef = np.asarray(coef) if solver == 'svd': @@ -395,7 +414,11 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', # When y was passed as a 1d-array, we flatten the coefficients. coef = coef.ravel() - if return_n_iter: + if return_n_iter and return_intercept: + return coef, n_iter, intercept + elif return_intercept: + return coef, intercept + elif return_n_iter: return coef, n_iter else: return coef @@ -428,12 +451,21 @@ def fit(self, X, y, sample_weight=None): X, y, self.fit_intercept, self.normalize, self.copy_X, sample_weight=sample_weight) - self.coef_, self.n_iter_ = ridge_regression( - X, y, alpha=self.alpha, sample_weight=sample_weight, - max_iter=self.max_iter, tol=self.tol, solver=self.solver, - random_state=self.random_state, return_n_iter=True) + if sparse.issparse(X) and self.fit_intercept: + self.coef_, self.n_iter_, self.intercept_ = ridge_regression( + X, y, alpha=self.alpha, sample_weight=sample_weight, + max_iter=self.max_iter, tol=self.tol, solver=self.solver, + random_state=self.random_state, return_n_iter=True, + return_intercept=True) + self.intercept_ += y_mean + else: + self.coef_, self.n_iter_ = ridge_regression( + X, y, alpha=self.alpha, sample_weight=sample_weight, + max_iter=self.max_iter, tol=self.tol, solver=self.solver, + random_state=self.random_state, return_n_iter=True, + return_intercept=False) + self._set_intercept(X_mean, y_mean, X_std) - self._set_intercept(X_mean, y_mean, X_std) return self diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 11dc02f4b0645..6f0b00b1dc54f 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -11,6 +11,7 @@ from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import ignore_warnings +from sklearn.utils.testing import assert_warns from sklearn import datasets from sklearn.metrics import mean_squared_error @@ -26,6 +27,7 @@ from sklearn.linear_model.ridge import RidgeClassifierCV from sklearn.linear_model.ridge import _solve_cholesky from sklearn.linear_model.ridge import _solve_cholesky_kernel +from sklearn.datasets import make_regression from sklearn.grid_search import GridSearchCV @@ -413,11 +415,11 @@ def _test_ridge_classifiers(filter_): def _test_tolerance(filter_): - ridge = Ridge(tol=1e-5) + ridge = Ridge(tol=1e-5, fit_intercept=False) ridge.fit(filter_(X_diabetes), y_diabetes) score = ridge.score(filter_(X_diabetes), y_diabetes) - ridge2 = Ridge(tol=1e-3) + ridge2 = Ridge(tol=1e-3, fit_intercept=False) ridge2.fit(filter_(X_diabetes), y_diabetes) score2 = ridge2.score(filter_(X_diabetes), y_diabetes) @@ -449,7 +451,7 @@ def test_ridge_cv_sparse_svd(): def test_ridge_sparse_svd(): X = sp.csc_matrix(rng.rand(100, 10)) y = rng.rand(100) - ridge = Ridge(solver='svd') + ridge = Ridge(solver='svd', fit_intercept=False) assert_raises(TypeError, ridge.fit, X, y) @@ -694,3 +696,22 @@ def test_n_iter(): reg = Ridge(solver=solver, max_iter=1, tol=1e-1) reg.fit(X, y_n) assert_equal(reg.n_iter_, None) + + +def test_ridge_fit_intercept_sparse(): + X, y = make_regression(n_samples=1000, n_features=2, n_informative=2, + bias=10., random_state=42) + X_csr = sp.csr_matrix(X) + + dense = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True) + sparse = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True) + dense.fit(X, y) + sparse.fit(X_csr, y) + assert_almost_equal(dense.intercept_, sparse.intercept_) + assert_array_almost_equal(dense.coef_, sparse.coef_) + + # test the solver switch and the corresponding warning + sparse = Ridge(alpha=1., tol=1.e-15, solver='lsqr', fit_intercept=True) + assert_warns(UserWarning, sparse.fit, X_csr, y) + assert_almost_equal(dense.intercept_, sparse.intercept_) + assert_array_almost_equal(dense.coef_, sparse.coef_) From db49cdd6acc8740604fe043603ef39d04774d0ba Mon Sep 17 00:00:00 2001 From: TomDLT Date: Tue, 13 Oct 2015 10:24:10 +0200 Subject: [PATCH 2/2] DOC update docstring and warning --- sklearn/linear_model/ridge.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index 104e175a334dc..c8e4f91c0b550 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -269,9 +269,10 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', iteration performed by the solver. return_intercept : boolean, default False - If True, the method also returns the intercept, and the solver - is automatically changed to 'sag'. This is only a temporary fix - for fitting the intercept with sparse data. + If True and if X is sparse, the method also returns the intercept, + and the solver is automatically changed to 'sag'. This is only a + temporary fix for fitting the intercept with sparse data. For dense + data, use sklearn.linear_model.center_data before your regression. Returns ------- @@ -282,11 +283,15 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', The actual number of iteration performed by the solver. Only returned if `return_n_iter` is True. + intercept : float or array, shape = [n_targets] + The intercept of the model. Only returned if `return_intercept` + is True and if X is a scipy sparse array. + Notes ----- This function won't compute the intercept. """ - if return_intercept and solver != 'sag': + if return_intercept and sparse.issparse(X) and solver != 'sag': warnings.warn("In Ridge, only 'sag' solver can currently fit the " "intercept when X is sparse. Solver has been " "automatically changed into 'sag'.") @@ -451,6 +456,7 @@ def fit(self, X, y, sample_weight=None): X, y, self.fit_intercept, self.normalize, self.copy_X, sample_weight=sample_weight) + # temporary fix for fitting the intercept with sparse data using 'sag' if sparse.issparse(X) and self.fit_intercept: self.coef_, self.n_iter_, self.intercept_ = ridge_regression( X, y, alpha=self.alpha, sample_weight=sample_weight,