|
33 | 33 | from ..exceptions import ConvergenceWarning
|
34 | 34 |
|
35 | 35 |
|
36 |
| -def _solve_sparse_cg(X, y, alpha, max_iter=None, tol=1e-3, verbose=0): |
| 36 | +def _solve_sparse_cg(X, y, alpha, max_iter=None, tol=1e-3, verbose=0, |
| 37 | + X_offset=None, X_scale=None): |
| 38 | + |
| 39 | + def _get_rescaled_operator(X): |
| 40 | + |
| 41 | + X_offset_scale = X_offset / X_scale |
| 42 | + |
| 43 | + def matvec(b): |
| 44 | + return X.dot(b) - b.dot(X_offset_scale) |
| 45 | + |
| 46 | + def rmatvec(b): |
| 47 | + return X.T.dot(b) - X_offset_scale * np.sum(b) |
| 48 | + |
| 49 | + X1 = sparse.linalg.LinearOperator(shape=X.shape, |
| 50 | + matvec=matvec, |
| 51 | + rmatvec=rmatvec) |
| 52 | + return X1 |
| 53 | + |
37 | 54 | n_samples, n_features = X.shape
|
38 |
| - X1 = sp_linalg.aslinearoperator(X) |
| 55 | + |
| 56 | + if X_offset is None or X_scale is None: |
| 57 | + X1 = sp_linalg.aslinearoperator(X) |
| 58 | + else: |
| 59 | + X1 = _get_rescaled_operator(X) |
| 60 | + |
39 | 61 | coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)
|
40 | 62 |
|
41 | 63 | if n_features > n_samples:
|
@@ -326,6 +348,25 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
|
326 | 348 | -----
|
327 | 349 | This function won't compute the intercept.
|
328 | 350 | """
|
| 351 | + |
| 352 | + return _ridge_regression(X, y, alpha, |
| 353 | + sample_weight=sample_weight, |
| 354 | + solver=solver, |
| 355 | + max_iter=max_iter, |
| 356 | + tol=tol, |
| 357 | + verbose=verbose, |
| 358 | + random_state=random_state, |
| 359 | + return_n_iter=return_n_iter, |
| 360 | + return_intercept=return_intercept, |
| 361 | + X_scale=None, |
| 362 | + X_offset=None) |
| 363 | + |
| 364 | + |
| 365 | +def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto', |
| 366 | + max_iter=None, tol=1e-3, verbose=0, random_state=None, |
| 367 | + return_n_iter=False, return_intercept=False, |
| 368 | + X_scale=None, X_offset=None): |
| 369 | + |
329 | 370 | if return_intercept and sparse.issparse(X) and solver != 'sag':
|
330 | 371 | if solver != 'auto':
|
331 | 372 | warnings.warn("In Ridge, only 'sag' solver can currently fit the "
|
@@ -395,7 +436,12 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
|
395 | 436 |
|
396 | 437 | n_iter = None
|
397 | 438 | if solver == 'sparse_cg':
|
398 |
| - coef = _solve_sparse_cg(X, y, alpha, max_iter, tol, verbose) |
| 439 | + coef = _solve_sparse_cg(X, y, alpha, |
| 440 | + max_iter=max_iter, |
| 441 | + tol=tol, |
| 442 | + verbose=verbose, |
| 443 | + X_offset=X_offset, |
| 444 | + X_scale=X_scale) |
399 | 445 |
|
400 | 446 | elif solver == 'lsqr':
|
401 | 447 | coef, n_iter = _solve_lsqr(X, y, alpha, max_iter, tol)
|
@@ -492,24 +538,35 @@ def fit(self, X, y, sample_weight=None):
|
492 | 538 | np.atleast_1d(sample_weight).ndim > 1):
|
493 | 539 | raise ValueError("Sample weights must be 1D array or scalar")
|
494 | 540 |
|
| 541 | + # when X is sparse we only remove offset from y |
495 | 542 | X, y, X_offset, y_offset, X_scale = self._preprocess_data(
|
496 | 543 | X, y, self.fit_intercept, self.normalize, self.copy_X,
|
497 |
| - sample_weight=sample_weight) |
| 544 | + sample_weight=sample_weight, return_mean=True) |
498 | 545 |
|
499 | 546 | # temporary fix for fitting the intercept with sparse data using 'sag'
|
500 |
| - if sparse.issparse(X) and self.fit_intercept: |
501 |
| - self.coef_, self.n_iter_, self.intercept_ = ridge_regression( |
| 547 | + if (sparse.issparse(X) and self.fit_intercept and |
| 548 | + self.solver != 'sparse_cg'): |
| 549 | + self.coef_, self.n_iter_, self.intercept_ = _ridge_regression( |
502 | 550 | X, y, alpha=self.alpha, sample_weight=sample_weight,
|
503 | 551 | max_iter=self.max_iter, tol=self.tol, solver=self.solver,
|
504 | 552 | random_state=self.random_state, return_n_iter=True,
|
505 | 553 | return_intercept=True)
|
| 554 | + # add the offset which was subtracted by _preprocess_data |
506 | 555 | self.intercept_ += y_offset
|
507 | 556 | else:
|
508 |
| - self.coef_, self.n_iter_ = ridge_regression( |
| 557 | + if sparse.issparse(X): |
| 558 | + # required to fit intercept with sparse_cg solver |
| 559 | + params = {'X_offset': X_offset, 'X_scale': X_scale} |
| 560 | + else: |
| 561 | + # for dense matrices or when intercept is set to 0 |
| 562 | + params = {} |
| 563 | + |
| 564 | + self.coef_, self.n_iter_ = _ridge_regression( |
509 | 565 | X, y, alpha=self.alpha, sample_weight=sample_weight,
|
510 | 566 | max_iter=self.max_iter, tol=self.tol, solver=self.solver,
|
511 | 567 | random_state=self.random_state, return_n_iter=True,
|
512 |
| - return_intercept=False) |
| 568 | + return_intercept=False, **params) |
| 569 | + |
513 | 570 | self._set_intercept(X_offset, y_offset, X_scale)
|
514 | 571 |
|
515 | 572 | return self
|
|