@@ -226,9 +226,17 @@ def _solve_svd(X, y, alpha):
226
226
return np .dot (Vt .T , d_UT_y ).T
227
227
228
228
229
+ def _get_valid_accept_sparse (is_X_sparse , solver ):
230
+ if is_X_sparse and solver in ['auto' , 'sag' , 'saga' ]:
231
+ return 'csr'
232
+ else :
233
+ return ['csr' , 'csc' , 'coo' ]
234
+
235
+
229
236
def ridge_regression (X , y , alpha , sample_weight = None , solver = 'auto' ,
230
237
max_iter = None , tol = 1e-3 , verbose = 0 , random_state = None ,
231
- return_n_iter = False , return_intercept = False ):
238
+ return_n_iter = False , return_intercept = False ,
239
+ check_input = True ):
232
240
"""Solve the ridge equation by the method of normal equations.
233
241
234
242
Read more in the :ref:`User Guide <ridge_regression>`.
@@ -332,6 +340,11 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
332
340
33
6D40
3
341
.. versionadded:: 0.17
334
342
343
+ check_input : boolean, default True
344
+ If False, the input arrays X and y will not be checked.
345
+
346
+ .. versionadded:: 0.21
347
+
335
348
Returns
336
349
-------
337
350
coef : array, shape = [n_features] or [n_targets, n_features]
@@ -360,13 +373,14 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
360
373
return_n_iter = return_n_iter ,
361
374
return_intercept = return_intercept ,
362
375
X_scale = None ,
363
- X_offset = None )
376
+ X_offset = None ,
377
+ check_input = check_input )
364
378
365
379
366
380
def _ridge_regression (X , y , alpha , sample_weight = None , solver = 'auto' ,
367
381
max_iter = None , tol = 1e-3 , verbose = 0 , random_state = None ,
368
382
return_n_iter = False , return_intercept = False ,
369
- X_scale = None , X_offset = None ):
383
+ X_scale = None , X_offset = None , check_input = True ):
370
384
371
385
has_sw = sample_weight is not None
372
386
@@ -388,17 +402,12 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
388
402
"intercept. Please change solver to 'sag' or set "
389
403
"return_intercept=False." )
390
404
391
- _dtype = [np .float64 , np .float32 ]
392
-
393
- # SAG needs X and y columns to be C-contiguous and np.float64
394
- if solver in ['sag' , 'saga' ]:
395
- X = check_array (X , accept_sparse = ['csr' ],
396
- dtype = np .float64 , order = 'C' )
397
- y = check_array (y , dtype = np .float64 , ensure_2d = False , order = 'F' )
398
- else :
399
- X = check_array (X , accept_sparse = ['csr' , 'csc' , 'coo' ],
400
- dtype = _dtype )
401
- y = check_array (y , dtype = X .dtype , ensure_2d = False )
405
+ if check_input :
406
+ _dtype = [np .float64 , np .float32 ]
407
+ _accept_sparse = _get_valid_accept_sparse (sparse .issparse (X ), solver )
408
+ X = check_array (X , accept_sparse = _accept_sparse , dtype = _dtype
F438
,
409
+ order = "C" )
410
+ y = check_array (y , dtype = X .dtype , ensure_2d = False , order = "C" )
402
411
check_consistent_length (X , y )
403
412
404
413
n_samples , n_features = X .shape
@@ -417,8 +426,6 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
417
426
raise ValueError ("Number of samples in X and y does not correspond:"
418
427
" %d != %d" % (n_samples , n_samples_ ))
419
428
420
-
421
-
422
429
if has_sw :
423
430
if np .atleast_1d (sample_weight ).ndim > 1 :
424
431
raise ValueError ("Sample weights must be 1D array or scalar" )
@@ -438,7 +445,6 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
438
445
if alpha .size == 1 and n_targets > 1 :
439
446
alpha = np .repeat (alpha , n_targets )
440
447
441
-
442
448
n_iter = None
443
449
if solver == 'sparse_cg' :
444
450
coef = _solve_sparse_cg (X , y , alpha ,
@@ -461,7 +467,6 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
461
467
except linalg .LinAlgError :
462
468
# use SVD solver if matrix is singular
463
469
solver = 'svd'
464
-
465
470
else :
466
471
try :
467
472
coef = _solve_cholesky (X , y , alpha )
@@ -473,11 +478,12 @@ def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',
473
478
# precompute max_squared_sum for all targets
474
479
max_squared_sum = row_norms (X , squared = True ).max ()
475
480
476
- coef = np .empty ((y .shape [1 ], n_features ))
481
+ coef = np .empty ((y .shape [1 ], n_features ), dtype = X . dtype )
477
482
n_iter = np .empty (y .shape [1 ], dtype = np .int32 )
478
- intercept = np .zeros ((y .shape [1 ], ))
483
+ intercept = np .zeros ((y .shape [1 ], ), dtype = X . dtype )
479
484
for i , (alpha_i , target ) in enumerate (zip (alpha , y .T )):
480
- init = {'coef' : np .zeros ((n_features + int (return_intercept ), 1 ))}
485
+ init = {'coef' : np .zeros ((n_features + int (return_intercept ), 1 ),
486
+ dtype = X .dtype )}
481
487
coef_ , n_iter_ , _ = sag_solver (
482
488
X , target .ravel (), sample_weight , 'squared' , alpha_i , 0 ,
483
489
max_iter , tol , verbose , random_state , False , max_squared_sum ,
@@ -530,13 +536,13 @@ def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
530
536
531
537
def fit (self , X , y , sample_weight = None ):
532
538
533
- if self . solver in ( 'sag' , 'saga' ):
534
- _dtype = np .float64
535
- else :
536
- # all other solvers work at both float precision levels
537
- _dtype = [ np . float64 , np . float32 ]
538
-
539
- X , y = check_X_y ( X , y , [ 'csr' , 'csc' , 'coo' ], dtype = _dtype ,
539
+ # all other solvers work at both float precision levels
540
+ _dtype = [ np .float64 , np . float32 ]
541
+ _accept_sparse = _get_valid_accept_sparse ( sparse . issparse ( X ),
542
+ self . solver )
543
+ X , y = check_X_y ( X , y ,
544
+ accept_sparse = _accept_sparse ,
545
+ dtype = _dtype ,
540
546
multi_output = True , y_numeric = True )
541
547
542
548
if ((sample_weight is not None ) and
@@ -555,7 +561,7 @@ def fit(self, X, y, sample_weight=None):
555
561
X , y , alpha = self .alpha , sample_weight = sample_weight ,
556
562
max_iter = self .max_iter , tol = self .tol , solver = self .solver ,
557
563
random_state = self .random_state , return_n_iter = True ,
558
- return_intercept = True )
564
+ return_intercept = True , check_input = False )
559
565
# add the offset which was subtracted by _preprocess_data
560
566
self .intercept_ += y_offset
561
567
else :
@@ -570,8 +576,7 @@ def fit(self, X, y, sample_weight=None):
570
576
X , y , alpha = self .alpha , sample_weight = sample_weight ,
571
577
max_iter = self .max_iter , tol = self .tol , solver = self .solver ,
572
578
random_state = self .random_state , return_n_iter = True ,
573
- return_intercept = False , ** params )
574
-
579
+ return_intercept = False , check_input = False , ** params )
575
580
self ._set_intercept (X_offset , y_offset , X_scale )
576
581
577
582
return self
@@ -893,8 +898,9 @@ def fit(self, X, y, sample_weight=None):
893
898
-------
894
899
self : returns an instance of self.
895
900
"""
896
- check_X_y (X , y , accept_sparse = ['csr' , 'csc' , 'coo' ],
897
- multi_output = True )
901
+ _accept_sparse = _get_valid_accept_sparse (sparse .issparse (X ),
902
+ self .solver )
903
+ check_X_y (X , y , accept_sparse = _accept_sparse , multi_output = True )
898
904
899
905
self ._label_binarizer = LabelBinarizer (pos_label = 1 , neg_label = - 1 )
900
906
Y = self ._label_binarizer .fit_transform (y )
@@ -1077,10 +1083,13 @@ def fit(self, X, y, sample_weight=None):
1077
1083
-------
1078
1084
self : object
1079
1085
"""
1080
- X , y = check_X_y (X , y , ['csr' , 'csc' , 'coo' ], dtype = np .float64 ,
1086
+ X , y = check_X_y (X , y ,
1087
+ accept_sparse = ['csr' , 'csc' , 'coo' ],
1088
+ dtype = [np .float64 , np .float32 ],
1081
1089
multi_output = True , y_numeric = True )
1082
1090
if sample_weight is not None and not isinstance (sample_weight , float ):
1083
- sample_weight = check_array (sample_weight , ensure_2d = False )
1091
+ sample_weight = check_array (sample_weight , ensure_2d = False ,
1092
+ dtype = X .dtype )
1084
1093
n_samples , n_features = X .shape
1085
1094
1086
1095
X , y , X_offset , y_offset , X_scale = LinearModel ._preprocess_data (
0 commit comments