34
34
from ..utils .seq_dataset import ArrayDataset , CSRDataset
35
35
36
36
37
- ###
38
- ### TODO: intercept for all models
39
- ### We should define a common function to center data instead of
40
- ### repeating the same code inside each fit method.
37
+ # TODO: intercept for all models
41
38
42
- ### TODO: bayesian_ridge_regression and bayesian_regression_ard
43
- ### should be squashed into its respective objects.
39
+ # TODO: bayesian_ridge_regression and bayesian_regression_ard
40
+ # should be squashed into its respective objects.
44
41
45
42
SPARSE_INTERCEPT_DECAY = 0.01
46
43
# For sparse data intercept updates are scaled by this decay factor to avoid
@@ -69,12 +66,9 @@ def make_dataset(X, y, sample_weight, random_state=None):
69
66
return dataset , intercept_decay
70
67
71
68
72
- def sparse_center_data (X , y , fit_intercept , normalize = False ):
73
- """
74
- Compute information needed to center data to have mean zero along
75
- axis 0. Be aware that X will not be centered since it would break
76
- the sparsity, but will be normalized if asked so.
77
- """
69
+ # TODO: this reproduces the behavior prior 0.17
70
+ # Must be remove in 0.19
71
+ def _sparse_center_data (X , y , fit_intercept , normalize = None ):
78
72
if fit_intercept :
79
73
A3E2
# we might require not to change the csr matrix sometimes
80
74
# store a copy if normalize is True.
@@ -106,15 +100,96 @@ def sparse_center_data(X, y, fit_intercept, normalize=False):
106
100
return X , y , X_mean , y_mean , X_std
107
101
108
102
109
- def center_data (X , y , fit_intercept , normalize = False , copy = True ,
110
- sample_weight = None ):
103
+ def sparse_center_data (X , y , fit_intercept , standardize = False ,
104
+ normalize = None ):
105
+ """
106
+ Compute information needed to center data to have mean zero along
107
+ axis 0. Be aware that X will not be centered since it would break
108
+ the sparsity, but will be standardized if asked so.
109
+ """
110
+ if normalize is not None :
111
+ warnings .warn ("The `normalize` parameter is not in use anymore from "
112
+ "version 0.17 and will be removed in 0.19. If you want "
113
+ "to standardize the data instead, use"
114
+ "`standardize=True`" , DeprecationWarning )
115
+ return _sparse_center_data (X , y , fit_intercept , normalize )
116
+
117
+ if fit_intercept :
118
+ # we might require not to change the csr matrix sometimes
119
+ # store a copy if standardize is True.
120
+ # Change dtype to float64 since mean_variance_axis accepts
121
+ # it that way.
122
+ if sp .isspmatrix (X ) and X .getformat () == 'csr' :
123
+ X = sp .csr_matrix (X , copy = standardize , dtype = np .float64 )
124
+ else :
125
+ X = sp .csc_matrix (X , copy = standardize , dtype = np .float64 )
126
+
127
+ X_mean , X_var = mean_variance_axis (X , axis = 0 )
128
+ if standardize :
129
+ # transform variance to std in-place
130
+ X_std = np .sqrt (X_var , X_var )
131
+ del X_var
132
+ X_std [X_std == 0 ] = 1
133
+ inplace_column_scale (X , 1. / X_std )
134
+ else :
135
+ X_std = np .ones (X .shape [1 ])
136
+ y_mean = y .mean (axis = 0 )
137
+ y = y - y_mean
138
+ else :
139
+ X_mean = np .zeros (X .shape [1 ])
140
+ X_std = np .ones (X .shape [1 ])
141
+ y_mean = 0. if y .ndim == 1 else np .zeros (y .shape [1 ], dtype = X .dtype )
142
+
143
+ return X , y , X_mean , y_mean , X_std
144
+
145
+
146
+ # TODO: this reproduces the behavior prior 0.17
147
+ # Must be remove in 0.19
148
+ def _center_data (X , y , fit_intercept , normalize = False , copy = True ,
149
+ sample_weight = None ):
150
+
151
+ X = as_float_array (X , copy )
152
+ if fit_intercept :
153
+ if isinstance (sample_weight , numbers .Number ):
154
+ sample_weight = None
155
+ if sp .issparse (X ):
156
+ X_mean = np .zeros (X .shape [1 ])
157
+ X_std = np .ones (X .shape [1 ])
158
+ else :
159
+ X_mean = np .average (X , axis = 0 , weights = sample_weight )
160
+ X -= X_mean
161
+ if normalize :
162
+ # XXX: currently scaled to variance=n_samples
163
+ X_std = np .sqrt (np .sum (X ** 2 , axis = 0 ))
164
+ X_std [X_std == 0 ] = 1
165
+ X /= X_std
166
+ else :
167
+ X_std = np .ones (X .shape [1 ])
168
+ y_mean = np .average (y , axis = 0 , weights = sample_weight )
169
+ y = y - y_mean
170
+ else :
171
+ X_mean = np .zeros (X .shape [1 ])
172
+ X_std = np .ones (X .shape [1 ])
173
+ y_mean = 0. if y .ndim == 1 else np .zeros (y .shape [1 ], dtype = X .dtype )
174
+ return X , y , X_mean , y_mean , X_std
175
+
176
+
177
+ def center_data (X , y , fit_intercept , standardize = False , normalize = None ,
178
+ copy = True , sample_weight = None ):
111
179
"""
112
180
Centers data to have mean zero along axis 0. This is here because
113
181
nearly all linear models will want their data to be centered.
114
182
115
183
If sample_weight is not None, then the weighted mean of X and y
116
184
is zero, and not the mean itself
117
185
"""
186
+ if normalize is not None :
187
+ warnings .warn ("The `normalize` parameter is not in use anymore from "
188
+ "version 0.17 and will be removed in 0.19. If you want "
189
+ "to standardize the data instead, use"
190
+ "`standardize=True`" , DeprecationWarning )
191
+ return _center_data (X , y , fit_intercept , normalize , copy , sample_weight )
192
+
118
193
X = as_float_array (X , copy )
119
194
if fit_intercept :
120
195
if isinstance (sample_weight , numbers .Number ):
@@ -125,9 +200,8 @@ def center_data(X, y, fit_intercept, normalize=False, copy=True,
125
200
else :
126
201
X_mean = np .average (X , axis = 0 , weights = sample_weight )
127
202
X -= X_mean
128
- if normalize :
129
- # XXX: currently scaled to variance=n_samples
130
- X_std = np .sqrt (np .sum (X ** 2 , axis = 0 ))
203
+ if standardize :
204
+ X_std = np .sqrt (np .mean (X ** 2 , axis = 0 ))
131
205
X_std [X_std == 0 ] = 1
132
206
X /= X_std
133
207
else :
@@ -356,8 +430,8 @@ class LinearRegression(LinearModel, RegressorMixin):
356
430
to false, no intercept will be used in calculations
357
431
(e.g. data is expected to be already centered).
358
432
359
- normalize : boolean, optional, default False
360
- If True, the regressors X will be normalized before regression.
433
+ standardize : boolean, optional, default False
434
+ If True, the regressors X will be standardized before regression.
361
435
362
436
copy_X : boolean, optional, default True
363
437
If True, X will be copied; else, it may be overwritten.
@@ -385,13 +459,26 @@ class LinearRegression(LinearModel, RegressorMixin):
385
459
386
460
"""
387
461
388
- def __init__ (self , fit_intercept = True , normalize = False , copy_X = True ,
389
- n_jobs = 1 ):
462
+ def __init__ (self , fit_intercept = True , standardize = False , normalize = None ,
463
+ copy_X = True , n_jobs = 1 ):
464
+ if normalize is not None :
465
+ warnings .warn ("The `normalize` parameter is not in use anymore "
466
+ "from version 0.17 and will be removed in 0.19. If "
467
+ "you want the data to be standardized instead, use "
468
+ "`standardize=True`" , DeprecationWarning )
390
469
self .fit_intercept = fit_intercept
391
- self .normalize = normalize
470
+ self .standardize = standardize
392
471
self .copy_X = copy_X
393
472
self .n_jobs = n_jobs
394
473
474
+ @property
475
+ @deprecated ("The `normalize` attribute is not in use anymore "
476
+ "from version 0.17 and will be removed in 0.19. If "
477
+ "you want the data to be standardized instead, use "
478
+ "`standardize=True`" )
479
+ def normalize (self ):
480
+ return None
481
+
395
482
def fit (self , X , y , sample_weight = None ):
396
483
"""
397
484
Fit linear model.
@@ -416,11 +503,13 @@ def fit(self, X, y, sample_weight=None):
416
503
X , y = check_X_y (X , y , accept_sparse = ['csr' , 'csc' , 'coo' ],
417
504
y_numeric = True , multi_output = True )
418
505
419
- if ((sample_weight is not None ) and np .atleast_1d (sample_weight ).ndim > 1 ):
506
+ if ((sample_weight is not None ) and
507
+ np .atleast_1d (sample_weight ).ndim > 1 ):
420
508
sample_weight = column_or_1d (sample_weight , warn = True )
421
509
422
510
X , y , X_mean , y_mean , X_std = self ._center_data (
423
- X , y , self .fit_intercept , self .normalize , self .copy_X ,
511
+ X , y , fit_intercept = self .fit_intercept ,
512
+ standardize = self .standardize , copy = self .copy_X ,
424
513
sample_weight = sample_weight )
425
514
426
515
if sample_weight is not None :
@@ -450,24 +539,25 @@ def fit(self, X, y, sample_weight=None):
450
539
return self
451
540
452
541
453
- def _pre_fit (X , y , Xy , precompute , normalize , fit_intercept , copy ):
542
+ def _pre_fit (X , y , Xy , precompute , standardize , fit_intercept , copy ):
454
543
"""Aux function used at beginning of fit in linear models"""
455
544
n_samples , n_features = X .shape
456
545
457
546
if sparse .isspmatrix (X ):
458
547
precompute = False
459
548
X , y , X_mean , y_mean , X_std = sparse_center_data (
460
- X , y , fit_intercept , normalize )
549
+ X , y , fit_intercept = fit_intercept , standardize = standardize )
461
550
else :
462
551
# copy was done in fit if necessary
463
552
X , y , X_mean , y_mean , X_std = center_data (
464
- X , y , fit_intercept , normalize , copy = copy )
553
+ X , y , fit_intercept = fit_intercept , standardize = standardize ,
554
+ copy = copy )
465
555
if hasattr (precompute , '__array__' ) and (
466
- fit_intercept and not np .allclose (X_mean , np .zeros (n_features ))
467
- or normalize and not np .allclose (X_std , np .ones (n_features ))):
556
+ fit_intercept and not np .allclose (X_mean , np .zeros (n_features )) or
557
+ standardize and not np .allclose (X_std , np .ones (n_features ))):
468
558
warnings .warn ("Gram matrix was provided but X was centered"
469
559
" to fit intercept, "
470
- "or X was normalized : recomputing Gram matrix." ,
560
+ "or X was standardized : recomputing Gram matrix." ,
471
561
UserWarning )
472
562
# recompute Gram
473
563
precompute = 'auto'
0 commit comments