34
34
from ..utils .seq_dataset import ArrayDataset , CSRDataset
35
35
36
36
37
- ###
38
- ### TODO: intercept for all models
39
- ### We should define a common function to center data instead of
40
- ### repeating the same code inside each fit method.
37
+ # TODO: intercept for all models
41
38
42
- ### TODO: bayesian_ridge_regression and bayesian_regression_ard
43
- ### should be squashed into its respective objects.
39
+ # TODO: bayesian_ridge_regression and bayesian_regression_ard
40
+ # should be squashed into its respective objects.
44
41
45
42
SPARSE_INTERCEPT_DECAY = 0.01
46
43
# For sparse data intercept updates are scaled by this decay factor to avoid
@@ -69,27 +66,32 @@ def make_dataset(X, y, sample_weight, random_state=None):
69
66
return dataset , intercept_decay
70
67
71
68
72
- def sparse_center_data (X , y , fit_intercept , normalize = False ):
69
+ def sparse_center_data (X , y , fit_intercept , standardize = False ,
70
+ normalize = None ):
73
71
"""
74
72
Compute information needed to center data to have mean zero along
75
73
axis 0. Be aware that X will not be centered since it would break
76
- the sparsity, but will be normalized if asked so.
74
+ the sparsity, but will be standardized if asked so.
77
75
"""
76
+ if normalize is not None :
77
+ warnings .warn ("The `normalize` parameter is not in use anymore from "
78
+ "version 0.17 and will be removed in 0.19. If you want "
79
+ "to standardize the data instead, use"
80
+ "`standardize=True`" , DeprecationWarning )
81
+
78
82
if fit_intercept :
79
83
# we might require not to change the csr matrix sometimes
80
- # store a copy if normalize is True.
84
+ # store a copy if standardize is True.
81
85
# Change dtype to float64 since mean_variance_axis accepts
82
86
# it that way.
83
87
if sp .isspmatrix (X ) and X .getformat () == 'csr' :
84
- X = sp .csr_matrix (X , copy = normalize , dtype = np .float64 )
88
+ X = sp .csr_matrix (X , copy = standardize , dtype = np .float64 )
85
89
else :
86
- X = sp .csc_matrix (X , copy = normalize , dtype = np .float64 )
90
+ X = sp .csc_matrix (X , copy = standardize , dtype = np .float64 )
87
91
88
92
X_mean , X_var = mean_variance_axis (X , axis = 0 )
89
- if normalize :
93
+ if standardize :
90
94
# transform variance to std in-place
91
- # XXX: currently scaled to variance=n_samples to match center_data
92
- X_var *= X .shape [0 ]
93
95
X_std = np .sqrt (X_var , X_var )
94
96
del X_var
95
97
X_std [X_std == 0 ] = 1
@@ -106,15 +108,21 @@ def sparse_center_data(X, y, fit_intercept, normalize=False):
106
108
return X , y , X_mean , y_mean , X_std
107
109
108
110
109
- def center_data (X , y , fit_intercept , normalize = False , copy = True ,
110
- sample_weight = None ):
111
+ def center_data (X , y , fit_intercept , standardize = False , normalize = None ,
112
+ copy = True , sample_weight = None ):
111
113
"""
112
114
Centers data to have mean zero along axis 0. This is here because
113
115
nearly all linear models will want their data to be centered.
114
116
115
117
If sample_weight is not None, then the weighted mean of X and y
116
118
is zero, and not the mean itself
117
119
"""
120
+ if normalize is not None :
121
+ warnings .warn ("The `normalize` parameter is not in use anymore from "
122
+ "version 0.17 and will be removed in 0.19. If you want "
123
+ "to standardize the data instead, use"
124
+ "`standardize=True`" , DeprecationWarning )
125
+
118
126
X = as_float_array (X , copy )
119
127
if fit_intercept :
120
128
if isinstance (sample_weight , numbers .Number ):
@@ -125,9 +133,8 @@ def center_data(X, y, fit_intercept, normalize=False, copy=True,
125
133
else :
126
134
X_mean = np .average (X , axis = 0 , weights = sample_weight )
127
135
X -= X_mean
128
- if normalize :
129
- # XXX: currently scaled to variance=n_samples
130
- X_std = np .sqrt (np .sum (X ** 2 , axis = 0 ))
136
+ if standardize :
137
+ X_std = np .sqrt (np .mean (X ** 2 , axis = 0 ))
131
138
X_std [X_std == 0 ] = 1
132
139
X /= X_std
133
140
else :
@@ -356,8 +363,8 @@ class LinearRegression(LinearModel, RegressorMixin):
356
363
to false, no intercept will be used in calculations
357
364
(e.g. data is expected to be already centered).
358
365
359
- normalize : boolean, optional, default False
360
- If True, the regressors X will be normalized before regression.
366
+ standardize : boolean, optional, default False
367
+ If True, the regressors X will be standardized before regression.
361
368
362
369
copy_X : boolean, optional, default True
363
370
If True, X will be copied; else, it may be overwritten.
@@ -385,13 +392,26 @@ class LinearRegression(LinearModel, RegressorMixin):
385
392
386
393
"""
387
394
388
- def __init__ (self , fit_intercept = True , normalize = False , copy_X = True ,
389
- n_jobs = 1 ):
395
+ def __init__ (self , fit_intercept = True , standardize = False , normalize = None ,
396
+ copy_X = True , n_jobs = 1 ):
397
+ if normalize is not None :
398
+ warnings .warn ("The `normalize` parameter is not in use anymore "
399
+ "from version 0.17 and will be removed in 0.19. If "
400
+ "you want the data to be standardized instead, use "
401
+ "`standardize=True`" , DeprecationWarning )
390
402
self .fit_intercept = fit_intercept
391
- self .normalize = normalize
403
+ self .standardize = standardize
392
404
self .copy_X = copy_X
393
405
self .n_jobs = n_jobs
394
406
407
+ @property
408
+ @deprecated ("The `normalize` attribute is not in use anymore "
409
+ "from version 0.17 and will be removed in 0.19. If "
410
+ "you want the data to be standardized instead, use "
411
+ "`standardize=True`" )
412
+ def normalize (self ):
413
+ return None
414
+
395
415
def fit (self , X , y , sample_weight = None ):
396
416
"""
397
417
Fit linear model.
@@ -416,11 +436,13 @@ def fit(self, X, y, sample_weight=None):
416
436
X , y = check_X_y (X , y , accept_sparse = ['csr' , 'csc' , 'coo' ],
417
437
y_numeric = True , multi_output = True )
418
438
419
- if ((sample_weight is not None ) and np .atleast_1d (sample_weight ).ndim > 1 ):
439
+ if ((sample_weight is not None ) and
440
+ np .atleast_1d (sample_weight ).ndim > 1 ):
420
441
sample_weight = column_or_1d (sample_weight , warn = True )
421
442
422
443
X , y , X_mean , y_mean , X_std = self ._center_data (
423
- X , y , self .fit_intercept , self .normalize , self .copy_X ,
444
+ X , y , fit_intercept = self .fit_intercept ,
445
+ standardize = self .standardize , copy = self .copy_X ,
424
446
sample_weight = sample_weight )
425
447
426
448
if sample_weight is not None :
@@ -450,24 +472,25 @@ def fit(self, X, y, sample_weight=None):
450
472
return self
451
473
452
474
453
- def _pre_fit (X , y , Xy , precompute , normalize , fit_intercept , copy ):
475
+ def _pre_fit (X , y , Xy , precompute , standardize , fit_intercept , copy ):
454
476
"""Aux function used at beginning of fit in linear models"""
455
477
n_samples , n_features = X .shape
456
478
457
479
if sparse .isspmatrix (X ):
458
480
precompute = False
459
481
X , y , X_mean , y_mean , X_std = sparse_center_data (
460
- X , y , fit_intercept , normalize )
482
+ X , y , fit_intercept = fit_intercept , standardize = standardize )
461
483
else :
462
484
# copy was done in fit if necessary
463
485
X , y , X_mean , y_mean , X_std = center_data (
464
- X , y , fit_intercept , normalize , copy = copy )
486
+ X , y , fit_intercept = fit_intercept , standardize = standardize ,
487
+ copy = copy )
465
488
if hasattr (precompute , '__array__' ) and (
466
- fit_intercept and not np .allclose (X_mean , np .zeros (n_features ))
467
- or normalize and not np .allclose (X_std , np .ones (n_features ))):
489
+ fit_intercept and not np .allclose (X_mean , np .zeros (n_features )) or
490
+ standardize and not np .allclose (X_std , np .ones (n_features ))):
468
491
warnings .warn ("Gram matrix was provided but X was centered"
469
492
" to fit intercept, "
470
- "or X was normalized : recomputing Gram matrix." ,
493
+ "or X was standardized : recomputing Gram matrix." ,
471
494
UserWarning )
472
495
# recompute Gram
473
496
precompute = 'auto'
0 commit comments