8000 [MRG+2] temporary fix for sparse ridge with intercept fitting by TomDLT · Pull Request #5360 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[MRG+2] temporary fix for sparse ridge with intercept fitting #5360

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 13, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,12 @@ Bug fixes
- Fixed a bug in :class:`naive_bayes.GaussianNB` which caused classification
results to depend on scale. By `Jake Vanderplas`_.

- Fixed temporarily :class:`linear_model.Ridge`, which was incorrect
when fitting the intercept in the case of sparse data. The fix
automatically changes the solver to 'sag' in this case.
(`#5360 <https://github.com/scikit-learn/scikit-learn/pull/5360>`_)
By `Tom Dupre la Tour`_.

API changes summary
-------------------

Expand Down
56 changes: 47 additions & 9 deletions sklearn/linear_model/ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def _solve_svd(X, y, alpha):

def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
max_iter=None, tol=1e-3, verbose=0, random_state=None,
return_n_iter=False):
return_n_iter=False, return_intercept=False):
"""Solve the ridge equation by the method of normal equations.

Read more in the :ref:`User Guide <ridge_regression>`.
Expand Down Expand Up @@ -268,6 +268,12 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
If True, the method also returns `n_iter`, the actual number of
iteration performed by the solver.

return_intercept : boolean, default False
If True and if X is sparse, the method also returns the intercept,
and the solver is automatically changed to 'sag'. This is only a
temporary fix for fitting the intercept with sparse data. For dense
data, use sklearn.linear_model.center_data before your regression.

Returns
-------
coef : array, shape = [n_features] or [n_targets, n_features]
Expand All @@ -277,10 +283,20 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
The actual number of iteration performed by the solver.
Only returned if `return_n_iter` is True.

intercept : float or array, shape = [n_targets]
The intercept of the model. Only returned if `return_intercept`
is True and if X is a scipy sparse array.

Notes
-----
This function won't compute the intercept.
"""
if return_intercept and sparse.issparse(X) and solver != 'sag':
warnings.warn("In Ridge, only 'sag' solver can currently fit the "
"intercept when X is sparse. Solver has been "
"automatically changed into 'sag'.")
solver = 'sag'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't this raise a warning even when the data isn't sparse?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, I did not think about the public use of ridge_regression.
I'll change that


# SAG needs X and y columns to be C-contiguous and np.float64
if solver == 'sag':
X = check_array(X, accept_sparse=['csr'],
Expand Down Expand Up @@ -375,14 +391,22 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',

coef = np.empty((y.shape[1], n_features))
n_iter = np.empty(y.shape[1], dtype=np.int32)
intercept = np.zeros((y.shape[1], ))
for i, (alpha_i, target) in enumerate(zip(alpha, y.T)):
start = {'coef': np.zeros(n_features + int(return_intercept))}
coef_, n_iter_, _ = sag_solver(
X, target.ravel(), sample_weight, 'squared', alpha_i,
max_iter, tol, verbose, random_state, False, max_squared_sum,
dict())
coef[i] = coef_
start)
if return_intercept:
coef[i] = coef_[:-1]
intercept[i] = coef_[-1]
else:
coef[i] = coef_
n_iter[i] = n_iter_

if intercept.shape[0] == 1:
intercept = intercept[0]
coef = np.asarray(coef)

if solver == 'svd':
Expand All @@ -395,7 +419,11 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
# When y was passed as a 1d-array, we flatten the coefficients.
coef = coef.ravel()

if return_n_iter:
if return_n_iter and return_intercept:
return coef, n_iter, intercept
elif return_intercept:
return coef, intercept
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You didn't document the return in the docstring.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I'll change that

elif return_n_iter:
return coef, n_iter
else:
return coef
Expand Down Expand Up @@ -428,12 +456,22 @@ def fit(self, X, y, sample_weight=None):
X, y, self.fit_intercept, self.normalize, self.copy_X,
sample_weight=sample_weight)

self.coef_, self.n_iter_ = ridge_regression(
X, y, alpha=self.alpha, sample_weight=sample_weight,
max_iter=self.max_iter, tol=self.tol, solver=self.solver,
random_state=self.random_state, return_n_iter=True)
# temporary fix for fitting the intercept with sparse data using 'sag'
if sparse.issparse(X) and self.fit_intercept:
self.coef_, self.n_iter_, self.intercept_ = ridge_regression(
X, y, alpha=self.alpha, sample_weight=sample_weight,
max_iter=self.max_iter, tol=self.tol, solver=self.solver,
random_state=self.random_state, return_n_iter=True,
return_intercept=True)
self.intercept_ += y_mean
else:
self.coef_, self.n_iter_ = ridge_regression(
X, y, alpha=self.alpha, sample_weight=sample_weight,
max_iter=self.max_iter, tol=self.tol, solver=self.solver,
random_state=self.random_state, return_n_iter=True,
return_intercept=False)
self._set_intercept(X_mean, y_mean, X_std)

self._set_intercept(X_mean, y_mean, X_std)
return self


Expand Down
27 changes: 24 additions & 3 deletions sklearn/linear_model/tests/test_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from sklearn.utils.testing import assert_raises
from sklearn.utils.testing import assert_raise_message
from sklearn.utils.testing import ignore_warnings
from sklearn.utils.testing import assert_warns

from sklearn import datasets
from sklearn.metrics import mean_squared_error
Expand All @@ -26,6 +27,7 @@
from sklearn.linear_model.ridge import RidgeClassifierCV
from sklearn.linear_model.ridge import _solve_cholesky
from sklearn.linear_model.ridge import _solve_cholesky_kernel
from sklearn.datasets import make_regression

from sklearn.grid_search import GridSearchCV

Expand Down Expand Up @@ -413,11 +415,11 @@ def _test_ridge_classifiers(filter_):


def _test_tolerance(filter_):
ridge = Ridge(tol=1e-5)
ridge = Ridge(tol=1e-5, fit_intercept=False)
ridge.fit(filter_(X_diabetes), y_diabetes)
score = ridge.score(filter_(X_diabetes), y_diabetes)

ridge2 = Ridge(tol=1e-3)
ridge2 = Ridge(tol=1e-3, fit_intercept=False)
ridge2.fit(filter_(X_diabetes), y_diabetes)
score2 = ridge2.score(filter_(X_diabetes), y_diabetes)

Expand Down Expand Up @@ -449,7 +451,7 @@ def test_ridge_cv_sparse_svd():
def test_ridge_sparse_svd():
X = sp.csc_matrix(rng.rand(100, 10))
y = rng.rand(100)
ridge = Ridge(solver='svd')
ridge = Ridge(solver='svd', fit_intercept=False)
assert_raises(TypeError, ridge.fit, X, y)


Expand Down Expand Up @@ -694,3 +696,22 @@ def test_n_iter():
reg = Ridge(solver=solver, max_iter=1, tol=1e-1)
reg.fit(X, y_n)
assert_equal(reg.n_iter_, None)


def test_ridge_fit_intercept_sparse():
X, y = make_regression(n_samples=1000, n_features=2, n_informative=2,
bias=10., random_state=42)
X_csr = sp.csr_matrix(X)

dense = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a check that the solution to the dense model doesn't depend on the solver? (I hope so).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

 test_ridge_individual_penalties does check it indirectly, right?

sparse = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please also add the case where solver != 'sag' and check for the solver switch warning with assert_warns?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

dense.fit(X, y)
sparse.fit(X_csr, y)
assert_almost_equal(dense.intercept_, sparse.intercept_)
assert_array_almost_equal(dense.coef_, sparse.coef_)

# test the solver switch and the corresponding warning
sparse = Ridge(alpha=1., tol=1.e-15, solver='lsqr', fit_intercept=True)
assert_warns(UserWarning, sparse.fit, X_csr, y)
assert_almost_equal(dense.intercept_, sparse.intercept_)
assert_array_almost_equal(dense.coef_, sparse.coef_)
0