8000 Merge pull request #5360 from TomDLT/sparseridge · scikit-learn/scikit-learn@90922ea · GitHub
[go: up one dir, main page]

Skip to content

Commit 90922ea

Browse files
author
Tom Dupré la Tour
committed
Merge pull request #5360 from TomDLT/sparseridge
[MRG+2] temporary fix for sparse ridge with intercept fitting
2 parents 7fe902c + db49cdd commit 90922ea

File tree

3 files changed

+77
-12
lines changed

3 files changed

+77
-12
lines changed

doc/whats_new.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,12 @@ Bug fixes
263263
- Fixed a bug in :class:`naive_bayes.GaussianNB` which caused classification
264264
results to depend on scale. By `Jake Vanderplas`_.
265265

266+
- Fixed temporarily :class:`linear_model.Ridge`, which was incorrect
267+
when fitting the intercept in the case of sparse data. The fix
268+
automatically changes the solver to 'sag' in this case.
269+
(`#5360 <https://github.com/scikit-learn/scikit-learn/pull/5360>`_)
270+
By `Tom Dupre la Tour`_.
271+
266272
API changes summary
267273
-------------------
268274

sklearn/linear_model/ridge.py

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def _solve_svd(X, y, alpha):
194194

195195
def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
196196
max_iter=None, tol=1e-3, verbose=0, random_state=None,
197-
return_n_iter=False):
197+
return_n_iter=False, return_intercept=False):
198198
"""Solve the ridge equation by the method of normal equations.
199199
200200
Read more in the :ref:`User Guide <ridge_regression>`.
@@ -268,6 +268,12 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
268268
If True, the method also returns `n_iter`, the actual number of
269269
iteration performed by the solver.
270270
271+
return_intercept : boolean, default False
272+
If True and if X is sparse, the method also returns the intercept,
273+
and the solver is automatically changed to 'sag'. This is only a
274+
temporary fix for fitting the intercept with sparse data. For dense
275+
data, use sklearn.linear_model.center_data before your regression.
276+
271277
Returns
272278
-------
273279
coef : array, shape = [n_features] or [n_targets, n_features]
@@ -277,10 +283,20 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
277283
The actual number of iteration performed by the solver.
278284
Only returned if `return_n_iter` is True.
279285
286+
intercept : float or array, shape = [n_targets]
287+
The intercept of the model. Only returned if `return_intercept`
288+
is True and if X is a scipy sparse array.
289+
280290
Notes
281291
-----
282292
This function won't compute the intercept.
283293
"""
294+
if return_intercept and sparse.issparse(X) and solver != 'sag':
295+
warnings.warn("In Ridge, only 'sag' solver can currently fit the "
296+
"intercept when X is sparse. Solver has been "
297+
"automatically changed into 'sag'.")
298+
solver = 'sag'
299+
284300
# SAG needs X and y columns to be C-contiguous and np.float64
285301
if solver == 'sag':
286302
X = check_array(X, accept_sparse=['csr'],
@@ -375,14 +391,22 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
375391

376392
coef = np.empty((y.shape[1], n_features))
377393
n_iter = np.empty(y.shape[1], dtype=np.int32)
394+
intercept = np.zeros((y.shape[1], ))
378395
for i, (alpha_i, target) in enumerate(zip(alpha, y.T)):
396+
start = {'coef': np.zeros(n_features + int(return_intercept))}
379397
coef_, n_iter_, _ = sag_solver(
380398
X, target.ravel(), sample_weight, 'squared', alpha_i,
381399
max_iter, tol, verbose, random_state, False, max_squared_sum,
382-
dict())
383-
coef[i] = coef_
400+
start)
401+
if return_intercept:
402+
coef[i] = coef_[:-1]
403+
intercept[i] = coef_[-1]
404+
else:
405+
coef[i] = coef_
384406
n_iter[i] = n_iter_
385407

408+
if intercept.shape[0] == 1:
409+
intercept = intercept[0]
386410
coef = np.asarray(coef)
387411

388412
if solver == 'svd':
@@ -395,7 +419,11 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
395419
# When y was passed as a 1d-array, we flatten the coefficients.
396420
coef = coef.ravel()
397421

398-
if return_n_iter:
422+
if return_n_iter and return_intercept:
423+
return coef, n_iter, intercept
424+
elif return_intercept:
425+
return coef, intercept
426+
elif return_n_iter:
399427
return coef, n_iter
400428
else:
401429
return coef
@@ -428,12 +456,22 @@ def fit(self, X, y, sample_weight=None):
428456
X, y, self.fit_intercept, self.normalize, self.copy_X,
429457
sample_weight=sample_weight)
430458

431-
self.coef_, self.n_iter_ = ridge_regression(
432-
X, y, alpha=self.alpha, sample_weight=sample_weight,
433-
max_iter=self.max_iter, tol=self.tol, solver=self.solver,
434-
random_state=self.random_state, return_n_iter=True)
459+
# temporary fix for fitting the intercept with sparse data using 'sag'
460+
if sparse.issparse(X) and self.fit_intercept:
461+
self.coef_, self.n_iter_, self.intercept_ = ridge_regression(
462+
X, y, alpha=self.alpha, sample_weight=sample_weight,
463+
max_iter=self.max_iter, tol=self.tol, solver=self.solver,
464+
random_state=self.random_state, return_n_iter=True,
465+
return_intercept=True)
466+
self.intercept_ += y_mean
467+
else:
468+
self.coef_, self.n_iter_ = ridge_regression(
469+
X, y, alpha=self.alpha, sample_weight=sample_weight,
470+
max_iter=self.max_iter, tol=self.tol, solver=self.solver,
471+
random_state=self.random_state, return_n_iter=True,
472+
return_intercept=False)
473+
self._set_intercept(X_mean, y_mean, X_std)
435474

436-
self._set_intercept(X_mean, y_mean, X_std)
437475
return self
438476

439477

sklearn/linear_model/tests/test_ridge.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from sklearn.utils.testing import assert_raises
1212
from sklearn.utils.testing import assert_raise_message
1313
from sklearn.utils.testing import ignore_warnings
14+
from sklearn.utils.testing import assert_warns
1415

1516
from sklearn import datasets
1617
from sklearn.metrics import mean_squared_error
@@ -26,6 +27,7 @@
2627
from sklearn.linear_model.ridge import RidgeClassifierCV
2728
from sklearn.linear_model.ridge import _solve_cholesky
2829
from sklearn.linear_model.ridge import _solve_cholesky_kernel
30+
from sklearn.datasets import make_regression
2931

3032
from sklearn.grid_search import GridSearchCV
3133

@@ -413,11 +415,11 @@ def _test_ridge_classifiers(filter_):
413415

414416

415417
def _test_tolerance(filter_):
416-
ridge = Ridge(tol=1e-5)
418+
ridge = Ridge(tol=1e-5, fit_intercept=False)
417419
ridge.fit(filter_(X_diabetes), y_diabetes)
418420
score = ridge.score(filter_(X_diabetes), y_diabetes)
419421

420-
ridge2 = Ridge(tol=1e-3)
422+
ridge2 = Ridge(tol=1e-3, fit_intercept=False)
421423
ridge2.fit(filter_(X_diabetes), y_diabetes)
422424
score2 = ridge2.score(filter_(X_diabetes), y_diabetes)
423425

@@ -449,7 +451,7 @@ def test_ridge_cv_sparse_svd():
449451
def test_ridge_sparse_svd():
450452
X = sp.csc_matrix(rng.rand(100, 10))
451453
y = rng.rand(100)
452-
ridge = Ridge(solver='svd')
454+
ridge = Ridge(solver='svd', fit_intercept=False)
453455
assert_raises(TypeError, ridge.fit, X, y)
454456

455457

@@ -694,3 +696,22 @@ def test_n_iter():
694696
reg = Ridge(solver=solver, max_iter=1, tol=1e-1)
695697
reg.fit(X, y_n)
696698
assert_equal(reg.n_iter_, None)
699+
700+
701+
def test_ridge_fit_intercept_sparse():
702+
X, y = make_regression(n_samples=1000, n_features=2, n_informative=2,
703+
bias=10., random_state=42)
704+
X_csr = sp.csr_matrix(X)
705+
706+
dense = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True)
707+
sparse = Ridge(alpha=1., tol=1.e-15, solver='sag', fit_intercept=True)
708+
dense.fit(X, y)
709+
sparse.fit(X_csr, y)
710+
assert_almost_equal(dense.intercept_, sparse.intercept_)
711+
assert_array_almost_equal(dense.coef_, sparse.coef_)
712+
713+
# test the solver switch and the corresponding warning
714+
sparse = Ridge(alpha=1., tol=1.e-15, solver='lsqr', fit_intercept=True)
715+
assert_warns(UserWarning, sparse.fit, X_csr, y)
716+
assert_almost_equal(dense.intercept_, sparse.intercept_)
717+
assert_array_almost_equal(dense.coef_, sparse.coef_)

0 commit comments

Comments
 (0)
0