8000 ENH implement sag_ridge · scikit-learn/scikit-learn@13921bd · GitHub
[go: up one dir, main page]

Skip to content

Commit 13921bd

Browse files
committed
ENH implement sag_ridge
1 parent 578bd5b commit 13921bd

File tree

6 files changed

+286
-200
lines changed

6 files changed

+286
-200
lines changed

sklearn/linear_model/ridge.py

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from scipy.sparse import linalg as sp_linalg
1919

2020
from .base import LinearClassifierMixin, LinearModel
21+
from .sag import sag_ridge
2122
from ..base import RegressorMixin
2223
from ..utils.extmath import safe_sparse_dot
2324
from ..utils import check_X_y
@@ -220,8 +221,9 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
220221
The default value is determined by scipy.sparse.linalg.
221222
222223
sample_weight : float or numpy array of shape [n_samples]
223-
Individual weights for each sample. If sample_weight is set, then
224-
the solver will automatically be set to 'cholesky'
224+
Individual weights for each sample. If sample_weight is set, and
225+
if the solver is not in {'cholesky', 'sag'}, then the solver will
226+
automatically be set to 'cholesky'.
225227
226228
solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg'}
227229
Solver to use in the computational routines:
@@ -245,7 +247,11 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
245247
scipy.sparse.linalg.lsqr. It is the fatest but may not be available
246248
in old scipy versions. It also uses an iterative procedure.
247249
248-
All three solvers support both dense and sparse data.
250+
- 'sag' uses a Stochastic Average Gradient descent. It also uses an
251+
iterative procedure, and is faster than other solvers when both
252+
n_samples and n_features are large.
253+
254+
All last four solvers support both dense and sparse data.
249255
250256
tol : float
251257
Precision of the solution.
@@ -285,7 +291,7 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
285291
if solver == 'auto':
286292
# cholesky if it's a dense array and cg in
287293
# any other case
288-
if not sparse.issparse(X) or has_sw:
294+
if not sparse.issparse(X) or (has_sw and solver != 'sag'):
289295
solver = 'cholesky'
290296
else:
291297
solver = 'sparse_cg'
@@ -299,8 +305,9 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
299305
if np.atleast_1d(sample_weight).ndim > 1:
300306
raise ValueError("Sample weights must be 1D array or scalar")
301307

302-
# Sample weight can be implemented via a simple rescaling.
303-
X, y = _rescale_data(X, y, sample_weight)
308+
if solver == 'cholesky':
309+
# Sample weight can be implemented via a simple rescaling.
310+
X, y = _rescale_data(X, y, sample_weight)
304311

305312
# There should be either 1 or n_targets penalties
306313
alpha = np.asarray(alpha).ravel()
@@ -312,13 +319,13 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
312319
if alpha.size == 1 and n_targets > 1:
313320
alpha = np.repeat(alpha, n_targets)
314321

315-
if solver not in ('sparse_cg', 'cholesky', 'svd', 'lsqr'):
322+
if solver not in ('sparse_cg', 'cholesky', 'svd', 'lsqr', 'sag'):
316323
raise ValueError('Solver %s not understood' % solver)
317324

318325
if solver == 'sparse_cg':
319326
coef = _solve_sparse_cg(X, y, alpha, max_iter, tol, verbose)
320327

321-
elif solver == "lsqr":
328+
elif solver == 'lsqr':
322329
coef = _solve_lsqr(X, y, alpha, max_iter, tol)
323330

324331
elif solver == 'cholesky':
@@ -339,6 +346,12 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
339346
# use SVD solver if matrix is singular
340347
solver = 'svd'
341348

349+
elif solver == 'sag':
350+
coef = [sag_ridge(X, target.ravel(), alpha_i, sample_weight,
351+
max_iter, tol, verbose)
352+
for alpha_i, target in zip(alpha, y.T)]
353+
coef = np.asarray(coef)
354+
342355
if solver == 'svd':
343356
if sparse.issparse(X):
344357
raise TypeError('SVD solver does not support sparse'
@@ -414,9 +427,10 @@ class Ridge(_BaseRidge, RegressorMixin):
414427
to false, no intercept will be used in calculations
415428
(e.g. data is expected to be already centered).
416429
417-
max_iter : int, optional
430+
max_iter : int, optional for 'sparse_cg' and 'lsqr' solvers
418431
Maximum number of iterations for conjugate gradient solver.
419-
The default value is determined by scipy.sparse.linalg.
432+
For 'sparse_cg' and 'lsqr' solvers, the default value is determined
433+
by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.
420434
421435
normalize : boolean, optional, default False
422436
If True, the regressors X will be normalized before regression.
@@ -442,7 +456,11 @@ class Ridge(_BaseRidge, RegressorMixin):
442456
scipy.sparse.linalg.lsqr. It is the fatest but may not be available
443457
in old scipy versions. It also uses an iterative procedure.
444458
445-
All three solvers support both dense and sparse data.
459+
- 'sag' uses a Stochastic Average Gradient descent. It also uses an
460+
iterative procedure, and is faster than other solvers when both
461+
n_samples and n_features are large.
462+
463+
All last four solvers support both dense and sparse data.
446464
447465
tol : float
448466
Precision of the solution.
@@ -527,15 +545,16 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge):
527545
normalize : boolean, optional, default False
528546
If True, the regressors X will be normalized before regression.
529547
530-
solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg'}
548+
solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag'}
531549
Solver to use in the computational
532550
routines. 'svd' will use a Singular value decomposition to obtain
533551
the solution, 'cholesky' will use the standard
534552
scipy.linalg.solve function, 'sparse_cg' will use the
535553
conjugate gradient solver as found in
536554
scipy.sparse.linalg.cg while 'auto' will chose the most
537555
appropriate depending on the matrix X. 'lsqr' uses
538-
a direct regularized least-squares routine provided by scipy.
556+
a direct regularized least-squares routine provided by scipy,
557+
and 'sag' uses a Stochastic Average Gradient descent.
539558
540559
tol : float
541560
Precision of the solution.

sklearn/linear_model/sag.py

Lines changed: 94 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from ..utils.seq_dataset import ArrayDataset, CSRDataset
1212
from ..externals import six
1313
from ..externals.joblib import Parallel, delayed
14-
from .sag_fast import Log, SquaredLoss
14+
from .sag_fast import LogLoss, SquaredLoss
1515
from .sag_fast import sag_sparse, get_auto_eta
1616

1717
MAX_INT = np.iinfo(np.int32).max
@@ -21,15 +21,94 @@
2121
SPARSE_INTERCEPT_DECAY = 0.01
2222

2323

24+
def make_dataset(X, y, sample_weight, random_state):
25+
# check which type of Sequential Dataset is needed
26+
if sp.issparse(X):
27+
dataset = CSRDataset(X.data, X.indptr, X.indices,
28+
y, sample_weight,
29+
seed=random_state.randint(MAX_INT))
30+
intercept_decay = SPARSE_INTERCEPT_DECAY
31+
else:
32+
dataset = ArrayDataset(X, y, sample_weight,
33+
seed=random_state.randint(MAX_INT))
34+
intercept_decay = 1.0
35+
36+
return dataset, intercept_decay
37+
38+
39+
def sag_ridge(X, y, alpha=1e-4, sample_weight=None, max_iter=1000, tol=0.001,
40+
verbose=0):
41+
"""SAG solver for Ridge regression"""
42+
43+
# TODO
44+
if max_iter is None:
45+
warnings.warn("sag solver requires 'max_iter' to be not None. "
46+
"max_iter is set to 1000", RuntimeWarning)
47+
max_iter = 1000
48+
49+
n_samples, n_features = X.shape[0], X.shape[1]
50+
alpha = float(alpha) / n_samples
51+
fit_intercept = False
52+
53+
# initialization
54+
if sample_weight is None:
55+
sample_weight = np.ones(n_samples, dtype=np.float64, order='C')
56+
coef_init = np.zeros(n_features, dtype=np.float64, order='C')
57+
intercept_init = 0.0
58+
weight_pos = 1
59+
weight_neg = 1
60+
61+
# TODO: *_init (with a boolean warm-start) as parameters ?
62+
intercept_sum_gradient_init = 0.0
63+
sum_gradient_init = np.zeros(n_features, dtype=np.float64, order='C')
64+
gradient_memory_init = np.zeros(n_samples, dtype=np.float64, order='C')
65+
seen_init = np.zeros(n_samples, dtype=np.int32, order='C')
66+
num_seen_init = 0
67+
68+
# TODO: add a random_state in parameters ?
69+
random_state = check_random_state(42)
70+
71+
dataset, intercept_decay = make_dataset(X, y, sample_weight, random_state)
72+
73+
# set the eta0 at 1 / 4L where L is the max sum of
74+
# squares for over all samples
75+
step_size = get_auto_eta(dataset, alpha, n_samples, SquaredLoss(),
76+
fit_intercept)
77+
if step_size * alpha == 1:
78+
raise ZeroDivisionError("Current sag implementation does not handle "
79+
"the case step_size * alpha == 1")
80+
print alpha
81+
intercept_, num_seen, max_iter_reached, intercept_sum_gradient = \
82+
sag_sparse(dataset, coef_init.ravel(),
83+
intercept_init, n_samples,
84+
n_features, tol,
85+
max_iter,
86+
SquaredLoss(),
87+
step_size, alpha,
88+
sum_gradient_init.ravel(),
89+
gradient_memory_init.ravel(),
90+
seen_init.ravel(),
91+
num_seen_init,
92+
weight_pos, weight_neg,
93+
fit_intercept,
94+
intercept_sum_gradient_init,
95+
intercept_decay,
96+
verbose)
97+
98+
if max_iter_reached:
99+
warnings.warn("The max_iter was reached which means "
100+
"the coef_ did not converge", ConvergenceWarning)
101+
102+
return coef_init
103+
104+
24105
def sag_logistic(X, y, coef_init, alpha=1e-4, sample_weight=None,
25106
max_iter=1000, tol=0.001, verbose=0, random_state=None):
26107
"""SAG solver for LogisticRegression"""
27108

28109
n_samples, n_features = X.shape[0], X.shape[1]
110+
alpha = float(alpha) / n_samples
29111

30-
alpha = alpha / n_samples
31-
32-
# initialize all parameters if there is no init
33112
if sample_weight is None:
34113
sample_weight = np.ones(n_samples, dtype=np.float64, order='C')
35114

@@ -47,32 +126,28 @@ def sag_logistic(X, y, coef_init, alpha=1e-4, sample_weight=None,
47126
gradient_memory_init = np.zeros(n_samples, dtype=np.float64, order='C')
48127
seen_init = np.zeros(n_samples, dtype=np.int32, order='C')
49128
num_seen_init = 0
129+
50130
weight_pos = 1
51131
weight_neg = 1
52132

53133
random_state = check_random_state(random_state)
54134

55-
# check which type of Sequential Dataset is needed
56-
if sp.issparse(X):
57-
dataset = CSRDataset(X.data, X.indptr, X.indices,
58-
y, sample_weight,
59-
seed=random_state.randint(MAX_INT))
60-
intercept_decay = SPARSE_INTERCEPT_DECAY
61-
else:
62-
dataset = ArrayDataset(X, y, sample_weight,
63-
seed=random_state.randint(MAX_INT))
64-
intercept_decay = 1.0
135+
dataset, intercept_decay = make_dataset(X, y, sample_weight, random_state)
65136

66137
# set the eta0 at 1 / 4L where L is the max sum of
67138
# squares for over all samples
68-
step_size = get_auto_eta(dataset, alpha, n_samples, Log(), fit_intercept)
139+
step_size = get_auto_eta(dataset, alpha, n_samples, LogLoss(),
140+
fit_intercept)
141+
if step_size * alpha == 1.:
142+
raise ZeroDivisionError("Current sag implementation does not handle "
143+
"the case step_size * alpha == 1")
69144

70145
intercept_, num_seen, max_iter_reached, intercept_sum_gradient = \
71146
sag_sparse(dataset, coef_init.ravel(),
72147
intercept_init, n_samples 10712 ,
73148
n_features, tol,
74149
max_iter,
75-
Log(),
150+
LogLoss(),
76151
step_size, alpha,
77152
sum_gradient_init.ravel(),
78153
gradient_memory_init.ravel(),
@@ -163,16 +238,8 @@ def _fit(self, X, y, coef_init=None, intercept_init=None,
163238

164239
random_state = check_random_state(self.random_state)
165240

166-
# check which type of Sequential Dataset is needed
167-
if sp.issparse(X):
168-
dataset = CSRDataset(X.data, X.indptr, X.indices,
169-
y, sample_weight,
170-
seed=random_state.randint(MAX_INT))
171-
intercept_decay = SPARSE_INTERCEPT_DECAY
172-
else:
173-
dataset = ArrayDataset(X, y, sample_weight,
174-
seed=random_state.randint(MAX_INT))
175-
intercept_decay = 1.0
241+
dataset, intercept_decay = make_dataset(X, y, sample_weight,
242+
random_state)
176243

177244
# set the eta0 if needed, 'auto' is 1 / 4L where L is the max sum of
178245
# squares for over all samples
@@ -317,7 +384,7 @@ def __init__(self, alpha=0.0001,
317384
eta0='auto', class_weight=None, warm_start=False):
318385
self.n_jobs = n_jobs
319386
self.class_weight = class_weight
320-
self.loss_function = Log()
387+
self.loss_function = LogLoss()
321388
super(SAGClassifier, self).__init__(alpha=alpha,
322389
fit_intercept=fit_intercept,
323390
max_iter=max_iter,

0 commit comments

Comments
 (0)
0