8000 FIX temporary fix for sparse ridge with intercept fitting · scikit-learn/scikit-learn@934c158 · GitHub
[go: up one dir, main page]

Skip to content

Commit 934c158

Browse files
committed
FIX temporary fix for sparse ridge with intercept fitting
1 parent 8d273a1 commit 934c158

File tree

3 files changed

+71
-12
lines changed

3 files changed

+71
-12
lines changed

doc/whats_new.rst

Lines changed: 6 additions & 0 deletions
< 8000 /tbody>
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,12 @@ Bug fixes
263263
- Fixed a bug in :class:`naive_bayes.GaussianNB` which caused classification
264264
results to depend on scale. By `Jake Vanderplas`_.
265265

266+
- Fixed temporarily :class:`linear_model.Ridge`, which was incorrect
267+
when fitting the intercept in the case of sparse data. The fix
268+
automatically changes the solver to 'sag' in this case.
269+
(`#5360 <https://github.com/scikit-learn/scikit-learn/pull/5360>`_)
270+
By `Tom Dupre la Tour`_.
271+
266272
API changes summary
267273
-------------------
268274

sklearn/linear_model/ridge.py

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def _solve_svd(X, y, alpha):
194194

195195
def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
196196
max_iter=None, tol=1e-3, verbose=0, random_state=None,
197-
return_n_iter=False):
197+
return_n_iter=False, return_intercept=False):
198198
"""Solve the ridge equation by the method of normal equations.
199199
200200
Read more in the :ref:`User Guide <ridge_regression>`.
@@ -268,6 +268,11 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
268268
If True, the method also returns `n_iter`, the actual number of
269269
iteration performed by the solver.
270270
271+
return_intercept : boolean, default False
272+
If True, the method also returns the intercept, and the solver
273+
is automatically changed to 'sag'. This is only a temporary fix
274+
for fitting the intercept with sparse data.
275+
271276
Returns
272277
-------
273278
coef : array, shape = [n_features] or [n_targets, n_features]
@@ -281,6 +286,12 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
281286
-----
282287
This function won't compute the intercept.
283288
"""
289+
if return_intercept and solver != 'sag':
290+
warnings.warn("In Ridge, only 'sag' solver can currently fit the "
291+
"intercept when X is sparse. Solver has been "
292+
"automatically changed into 'sag'.")
293+
solver = 'sag'
294+
284295
# SAG needs X and y columns to be C-contiguous and np.float64
285296
if solver == 'sag':
286297
X = check_array(X, accept_sparse=['csr'],
@@ -375,14 +386,22 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
375386

376387
coef = np.empty((y.shape[1], n_features))
377388
n_iter = np.empty(y.shape[1], dtype=np.int32)
389+
intercept = np.zeros((y.shape[1], ))
378390
for i, (alpha_i, target) in enumerate(zip(alpha, y.T)):
391+
start = {'coef': np.zeros(n_features + int(return_intercept))}
379392
coef_, n_iter_, _ = sag_solver(
380393
X, target.ravel(), sample_weight, 'squared', alpha_i,
381394
max_iter, tol, verbose, random_state, False, max_squared_sum,
382-
dict())
383-
coef[i] = coef_
395+
start)
396+
if return_intercept:
397+
coef[i] = coef_[:-1]
398+
intercept[i] = coef_[-1]
399+
else:
400+
coef[i] = coef_
384401
n_iter[i] = n_iter_
385402

403+
if intercept.shape[0] == 1:
404+
intercept = intercept[0]
386405
coef = np.asarray(coef)
387406

388407
if solver == 'svd':
@@ -395,7 +414,11 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
395414
# When y was passed as a 1d-array, we flatten the coefficients.
396415
coef = coef.ravel()
397416

398-
if return_n_iter:
417+
if return_n_iter and return_intercept:
418+
return coef, n_iter, intercept
419+
elif return_intercept:
420+
return coef, intercept
421+
elif return_n_iter:
399422
return coef, n_iter
400423
else:
401424
return coef
@@ -428,12 +451,21 @@ def fit(self, X, y, sample_weight=None):
428451
X, y, self.fit_intercept, self.normalize, self.copy_X,
429452
sample_weight=sample_weight)
430453

431-
self.coef_, self.n_iter_ = ridge_regression(
432-
X, y, alpha=self.alpha, sample_weight=sample_weight,
433-
max_iter=self.max_iter, tol=self.tol, solver=self.solver,
434-
random_state=self.random_state, return_n_iter=True)
454+
if sparse.issparse(X) and self.fit_intercept:
455+
self.coef_, self.n_iter_, self.intercept_ = ridge_regression(
456+
X, y, alpha=self.alpha, sample_weight=sample_weight,
457+
max_iter=self.max_iter, tol=self.tol, solver=self.solver,
458+
random_state=self.random_state, return_n_iter=True,
459+
return_intercept=True)
460+
self.intercept_ += y_mean
461+
else:
462+
self.coef_, self.n_iter_ = ridge_regression(
463+
X, y, alpha=self.alpha, sample_weight=sample_weight,
464+
max_iter=self.max_iter, tol=self.tol, solver=self.solver,
465+
random_state=self.random_state, return_n_iter=True,
466+
return_intercept=False)
467+
self._set_intercept(X_mean, y_mean, X_std)
435468

436-
self._set_intercept(X_mean, y_mean, X_std)
437469
return self
438470

439471

sklearn/linear_model/tests/test_ridge.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from sklearn.utils.testing import assert_raises
1212
from sklearn.utils.testing import assert_raise_message
1313
from sklearn.utils.testing import ignore_warnings
14+
from sklearn.utils.testing import assert_warns
1415

1516
from sklearn import datasets
1617
from sklearn.metrics import mean_squared_error
@@ -26,6 +27,7 @@
2627
from sklearn.linear_model.ridge import RidgeClassifierCV
2728
from sklearn.linear_model.ridge import _solve_cholesky
2829
from sklearn.linear_model.ridge import _solve_cholesky_kernel
30+
from sklearn.datasets import make_regression
2931

3032
from sklearn.grid_search import GridSearchCV
3133

@@ -413,11 +415,11 @@ def _test_ridge_classifiers(filter_):
413415

414416

415417
def _test_tolerance(filter_):
416-
ridge = Ridge(tol=1e-5)
418+
ridge = Ridge(tol=1e-5, fit_intercept=False)
417419
ridge.fit(filter_(X_diabetes), y_diabetes)
418420
score = ridge.score(filter_(X_diabetes), y_diabetes)
419421

420-
ridge2 = Ridge(tol=1e-3)
422+
ridge2 = Ridge(tol=1e-3, fit_intercept=False)
421423
ridge2.fit(filter_(X_diabetes), y_diabetes)
422424
score2 = ridge2.score(filter_(X_diabetes), y_diabetes)
423425

@@ -449,7 +451,7 @@ def test_ridge_cv_sparse_svd():
449451
def test_ridge_sparse_svd():
450452
X = sp.csc_matrix(rng.rand(100, 10))
451453
y = rng.rand(100)
452-
ridge = Ridge(solver='svd')
454+
ridge = Ridge(solver='svd', fit_intercept=False)
453455
assert_raises(TypeError, ridge.fit, X, y)
454456

455457

@@ -694,3 +696,22 @@ def test_n_iter():
694696
reg = Ridge(solver=solver, max_iter=1, tol=1e-1)
695697
reg.fit(X, y_n)
696698
assert_equal(reg.n_iter_, None)
699+
700+
701+
def test_ridge_fit_intercept_sparse():
702+
X, y = make_regression(n_samples=1000, n_features=2, n_informative=2,
703+
bias=10., random_state=42)
704+
X_csr = sp.csr_matrix(X)
705+
706+
dense = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True)
707+
sparse = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True)
708+
dense.fit(X, y)
709+
sparse.fit(X_csr, y)
710+
assert_almost_equal(dense.intercept_, sparse.intercept_)
711+
assert_array_almost_equal(dense.coef_, sparse.coef_)
712+
713+
# test the solver switch and the corresponding warning
714+
sparse = Ridge(alpha=1., tol=1.e-15, solver='lsqr', fit_intercept=True)
715+
assert_warns(UserWarning, sparse.fit, X_csr, y)
716+
assert_almost_equal(dense.intercept_, sparse.intercept_)
717+
assert_array_almost_equal(dense.coef_, sparse.coef_)

0 commit comments

Comments
 (0)
0