18
18
from scipy .sparse import linalg as sp_linalg
19
19
20
20
from .base import LinearClassifierMixin , LinearModel , _rescale_data
21
- from .sag import sag_ridge
21
+ from .sag import sag_solver
22
22
from .sag_fast import get_max_squared_sum
23
23
from ..base import RegressorMixin
24
24
from ..utils .extmath import safe_sparse_dot
@@ -193,7 +193,8 @@ def _solve_svd(X, y, alpha):
193
193
194
194
195
195
def ridge_regression (X , y , alpha , sample_weight = None , solver = 'auto' ,
196
- max_iter = None , tol = 1e-3 , verbose = 0 , random_state = None ):
196
+ max_iter = None , tol = 1e-3 , verbose = 0 , random_state = None ,
197
+ return_n_iter = False ):
197
198
"""Solve the ridge equation by the method of normal equations.
198
199
199
200
Read more in the :ref:`User Guide <ridge_regression>`.
@@ -244,8 +245,11 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
244
245
in old scipy versions. It also uses an iterative procedure.
245
246
246
247
- 'sag' uses a Stochastic Average Gradient descent. It also uses an
247
- iterative procedure, and is faster than other solvers when both
248
- n_samples and n_features are large.
248
+ iterative procedure, and is often faster than other solvers when
249
+ both n_samples and n_features are large. Note that 'sag' fast
250
+ convergence is only guaranteed on features with approximately the
251
+ same scale. You can preprocess the data with a scaler from
252
+ sklearn.preprocessing.
249
253
250
254
All last four solvers support both dense and sparse data.
251
255
@@ -260,11 +264,19 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
260
264
The seed of the pseudo random number generator to use when
261
265
shuffling the data. Used in 'sag' solver.
262
266
267
+ return_n_iter : boolean, default False
268
+ If True, the method also returns `n_iter`, the actual number of
269
+ iteration performed by the solver.
270
+
263
271
Returns
264
272
-------
265
273
coef : array, shape = [n_features] or [n_targets, n_features]
266
274
Weight vector(s).
267
275
276 + n_iter : int, optional
277
+ The actual number of iteration performed by the solver.
278
+ Only returned if `return_n_iter` is True.
279
+
268
280
Notes
269
281
-----
270
282
This function won't compute the intercept.
@@ -364,9 +376,10 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
364
376
coef = np .empty ((y .shape [1 ], n_features ))
365
377
n_iter = np .empty (y .shape [1 ], dtype = np .int32 )
366
378
for i , (alpha_i , target ) in enumerate (zip (alpha , y .T )):
367
- coef_ , n_iter_ = sag_ridge (
368
- X , target .ravel (), sample_weight , alpha_i , max_iter , tol ,
369
- verbose , random_state , False , max_squared_sum )
379
+ coef_ , n_iter_ , _ = sag_solver (
380
+ X , target .ravel (), sample_weight , 'squared' , alpha_i ,
381
+ max_iter , tol , verbose , random_state , False , max_squared_sum ,
382
+ dict ())
370
383
coef [i ] = coef_
371
384
n_iter [i ] = n_iter_
372
385
@@ -382,7 +395,10 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
382
395
# When y was passed as a 1d-array, we flatten the coefficients.
383
396
coef = coef .ravel ()
384
397
385
- return coef , n_iter
398
+ if return_n_iter :
399
+ return coef , n_iter
400
+ else :
401
+ return coef
386
402
387
403
388
404
class _BaseRidge (six .with_metaclass (ABCMeta , LinearModel )):
@@ -415,7 +431,7 @@ def fit(self, X, y, sample_weight=None):
415
431
self .coef_ , self .n_iter_ = ridge_regression (
416
432
X , y , alpha = self .alpha , sample_weight = sample_weight ,
417
433
max_iter = self .max_iter , tol = self .tol , solver = self .solver ,
418
- random_state = self .random_state )
434
+ random_state = self .random_state , return_n_iter = True )
419
435
420
436
self ._set_intercept (X_mean , y_mean , X_std )
421
437
return self
@@ -479,8 +495,11 @@ class Ridge(_BaseRidge, RegressorMixin):
479
495
in old scipy versions. It also uses an iterative procedure.
480
496
481
497
- 'sag' uses a Stochastic Average Gradient descent. It also uses an
482
- iterative procedure, and is faster than other solvers when both
483
- n_samples and n_features are large.
498
+ iterative procedure, and is often faster than other solvers when
499
+ both n_samples and n_features are large. Note that 'sag' fast
500
+ convergence is only guaranteed on features with approximately the
501
+ same scale. You can preprocess the data with a scaler from
502
+ sklearn.preprocessing.
484
503
485
504
All last four solvers support both dense and sparse data.
486
505
@@ -624,15 +643,13 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge):
624
643
coef_ : array, shape (n_features,) or (n_classes, n_features)
625
644
Weight vector(s).
626
645
627
- <<<<<<< HEAD
628
646
intercept_ : float | array, shape = (n_targets,)
629
647
Independent term in decision function. Set to 0.0 if
630
648
``fit_intercept = False``.
631
- =======
649
+
632
650
n_iter_ : array or None, shape (n_targets,)
633
651
Actual number of iterations for each target. Available only for
634
652
sag and lsqr solvers. Other solvers will return None.
635
- >>>>>>> ENH add n_iter in ridge
636
653
637
654
See also
638
655
--------
0 commit comments