-
-
Notifications
You must be signed in to change notification settings - Fork 25.9k
[MRG+2] temporary fix for sparse ridge with intercept fitting #5360
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -194,7 +194,7 @@ def _solve_svd(X, y, alpha): | |
|
||
def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', | ||
max_iter=None, tol=1e-3, verbose=0, random_state=None, | ||
return_n_iter=False): | ||
return_n_iter=False, return_intercept=False): | ||
"""Solve the ridge equation by the method of normal equations. | ||
|
||
Read more in the :ref:`User Guide <ridge_regression>`. | ||
|
@@ -268,6 +268,12 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', | |
If True, the method also returns `n_iter`, the actual number of | ||
iteration performed by the solver. | ||
|
||
return_intercept : boolean, default False | ||
If True and if X is sparse, the method also returns the intercept, | ||
and the solver is automatically changed to 'sag'. This is only a | ||
temporary fix for fitting the intercept with sparse data. For dense | ||
data, use sklearn.linear_model.center_data before your regression. | ||
|
||
Returns | ||
------- | ||
coef : array, shape = [n_features] or [n_targets, n_features] | ||
|
@@ -277,10 +283,20 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', | |
The actual number of iteration performed by the solver. | ||
Only returned if `return_n_iter` is True. | ||
|
||
intercept : float or array, shape = [n_targets] | ||
The intercept of the model. Only returned if `return_intercept` | ||
is True and if X is a scipy sparse array. | ||
|
||
Notes | ||
----- | ||
This function won't compute the intercept. | ||
""" | ||
if return_intercept and sparse.issparse(X) and solver != 'sag': | ||
warnings.warn("In Ridge, only 'sag' solver can currently fit the " | ||
"intercept when X is sparse. Solver has been " | ||
"automatically changed into 'sag'.") | ||
solver = 'sag' | ||
|
||
# SAG needs X and y columns to be C-contiguous and np.float64 | ||
if solver == 'sag': | ||
X = check_array(X, accept_sparse=['csr'], | ||
|
@@ -375,14 +391,22 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', | |
|
||
coef = np.empty((y.shape[1], n_features)) | ||
n_iter = np.empty(y.shape[1], dtype=np.int32) | ||
intercept = np.zeros((y.shape[1], )) | ||
for i, (alpha_i, target) in enumerate(zip(alpha, y.T)): | ||
start = {'coef': np.zeros(n_features + int(return_intercept))} | ||
coef_, n_iter_, _ = sag_solver( | ||
X, target.ravel(), sample_weight, 'squared', alpha_i, | ||
max_iter, tol, verbose, random_state, False, max_squared_sum, | ||
dict()) | ||
coef[i] = coef_ | ||
start) | ||
if return_intercept: | ||
coef[i] = coef_[:-1] | ||
intercept[i] = coef_[-1] | ||
else: | ||
coef[i] = coef_ | ||
n_iter[i] = n_iter_ | ||
|
||
if intercept.shape[0] == 1: | ||
intercept = intercept[0] | ||
coef = np.asarray(coef) | ||
|
||
if solver == 'svd': | ||
|
@@ -395,7 +419,11 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', | |
# When y was passed as a 1d-array, we flatten the coefficients. | ||
coef = coef.ravel() | ||
|
||
if return_n_iter: | ||
if return_n_iter and return_intercept: | ||
return coef, n_iter, intercept | ||
elif return_intercept: | ||
return coef, intercept | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You didn't document the return in the docstring. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, I'll change that |
||
elif return_n_iter: | ||
return coef, n_iter | ||
else: | ||
return coef | ||
|
@@ -428,12 +456,22 @@ def fit(self, X, y, sample_weight=None): | |
X, y, self.fit_intercept, self.normalize, self.copy_X, | ||
sample_weight=sample_weight) | ||
|
||
self.coef_, self.n_iter_ = ridge_regression( | ||
X, y, alpha=self.alpha, sample_weight=sample_weight, | ||
max_iter=self.max_iter, tol=self.tol, solver=self.solver, | ||
random_state=self.random_state, return_n_iter=True) | ||
# temporary fix for fitting the intercept with sparse data using 'sag' | ||
if sparse.issparse(X) and self.fit_intercept: | ||
self.coef_, self.n_iter_, self.intercept_ = ridge_regression( | ||
X, y, alpha=self.alpha, sample_weight=sample_weight, | ||
max_iter=self.max_iter, tol=self.tol, solver=self.solver, | ||
random_state=self.random_state, return_n_iter=True, | ||
return_intercept=True) | ||
self.intercept_ += y_mean | ||
else: | ||
self.coef_, self.n_iter_ = ridge_regression( | ||
X, y, alpha=self.alpha, sample_weight=sample_weight, | ||
max_iter=self.max_iter, tol=self.tol, solver=self.solver, | ||
random_state=self.random_state, return_n_iter=True, | ||
return_intercept=False) | ||
self._set_intercept(X_mean, y_mean, X_std) | ||
|
||
self._set_intercept(X_mean, y_mean, X_std) | ||
return self | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ | |
from sklearn.utils.testing import assert_raises | ||
from sklearn.utils.testing import assert_raise_message | ||
from sklearn.utils.testing import ignore_warnings | ||
from sklearn.utils.testing import assert_warns | ||
|
||
from sklearn import datasets | ||
from sklearn.metrics import mean_squared_error | ||
|
@@ -26,6 +27,7 @@ | |
from sklearn.linear_model.ridge import RidgeClassifierCV | ||
from sklearn.linear_model.ridge import _solve_cholesky | ||
from sklearn.linear_model.ridge import _solve_cholesky_kernel | ||
from sklearn.datasets import make_regression | ||
|
||
from sklearn.grid_search import GridSearchCV | ||
|
||
|
@@ -413,11 +415,11 @@ def _test_ridge_classifiers(filter_): | |
|
||
|
||
def _test_tolerance(filter_): | ||
ridge = Ridge(tol=1e-5) | ||
ridge = Ridge(tol=1e-5, fit_intercept=False) | ||
ridge.fit(filter_(X_diabetes), y_diabetes) | ||
score = ridge.score(filter_(X_diabetes), y_diabetes) | ||
|
||
ridge2 = Ridge(tol=1e-3) | ||
ridge2 = Ridge(tol=1e-3, fit_intercept=False) | ||
ridge2.fit(filter_(X_diabetes), y_diabetes) | ||
score2 = ridge2.score(filter_(X_diabetes), y_diabetes) | ||
|
||
|
@@ -449,7 +451,7 @@ def test_ridge_cv_sparse_svd(): | |
def test_ridge_sparse_svd(): | ||
X = sp.csc_matrix(rng.rand(100, 10)) | ||
y = rng.rand(100) | ||
ridge = Ridge(solver='svd') | ||
ridge = Ridge(solver='svd', fit_intercept=False) | ||
assert_raises(TypeError, ridge.fit, X, y) | ||
|
||
|
||
|
@@ -694,3 +696,22 @@ def test_n_iter(): | |
reg = Ridge(solver=solver, max_iter=1, tol=1e-1) | ||
reg.fit(X, y_n) | ||
assert_equal(reg.n_iter_, None) | ||
|
||
|
||
def test_ridge_fit_intercept_sparse(): | ||
X, y = make_regression(n_samples=1000, n_features=2, n_informative=2, | ||
bias=10., random_state=42) | ||
X_csr = sp.csr_matrix(X) | ||
|
||
dense = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a check that the solution to the dense model doesn't depend on the solver? (I hope so). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
sparse = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please also add the case where There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
dense.fit(X, y) | ||
sparse.fit(X_csr, y) | ||
assert_almost_equal(dense.intercept_, sparse.intercept_) | ||
assert_array_almost_equal(dense.coef_, sparse.coef_) | ||
|
||
# test the solver switch and the corresponding warning | ||
sparse = Ridge(alpha=1., tol=1.e-15, solver='lsqr', fit_intercept=True) | ||
assert_warns(UserWarning, sparse.fit, X_csr, y) | ||
assert_almost_equal(dense.intercept_, sparse.intercept_) | ||
assert_array_almost_equal(dense.coef_, sparse.coef_) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doesn't this raise a warning even when the data isn't sparse?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are right, I did not think about the public use of
ridge_regression
.I'll change that