8000 normalized param deprecated + test fixed · scikit-learn/scikit-learn@f1356ff · GitHub
[go: up one dir, main page]

Skip to content

Commit f1356ff

Browse files
author
giorgiop
committed
normalized param deprecated + test fixed
1 parent e8d2b93 commit f1356ff

File tree

4 files changed

+208
-89
lines changed

4 files changed

+208
-89
lines changed

sklearn/linear_model/base.py

Lines changed: 120 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,10 @@
3434
from ..utils.seq_dataset import ArrayDataset, CSRDataset
3535

3636

37-
###
38-
### TODO: intercept for all models
39-
### We should define a common function to center data instead of
40-
### repeating the same code inside each fit method.
37+
# TODO: intercept for all models
4138

42-
### TODO: bayesian_ridge_regression and bayesian_regression_ard
43-
### should be squashed into its respective objects.
39+
# TODO: bayesian_ridge_regression and bayesian_regression_ard
40+
# should be squashed into its respective objects.
4441

4542
SPARSE_INTERCEPT_DECAY = 0.01
4643
# For sparse data intercept updates are scaled by this decay factor to avoid
@@ -69,12 +66,9 @@ def make_dataset(X, y, sample_weight, random_state=None):
6966
return dataset, intercept_decay
7067

7168

72-
def sparse_center_data(X, y, fit_intercept, normalize=False):
73-
"""
74-
Compute information needed to center data to have mean zero along
75-
axis 0. Be aware that X will not be centered since it would break
76-
the sparsity, but will be normalized if asked so.
77-
"""
69+
# TODO: this reproduces the behavior prior 0.17
70+
# Must be remove in 0.19
71+
def _sparse_center_data(X, y, fit_intercept, normalize=None):
7872
if fit_intercept:
7973 A3E2
# we might require not to change the csr matrix sometimes
8074
# store a copy if normalize is True.
@@ -106,15 +100,96 @@ def sparse_center_data(X, y, fit_intercept, normalize=False):
106100
return X, y, X_mean, y_mean, X_std
107101

108102

109-
def center_data(X, y, fit_intercept, normalize=False, copy=True,
110-
sample_weight=None):
103+
def sparse_center_data(X, y, fit_intercept, standardize=False,
104+
normalize=None):
105+
"""
106+
Compute information needed to center data to have mean zero along
107+
axis 0. Be aware that X will not be centered since it would break
108+
the sparsity, but will be standardized if asked so.
109+
"""
110+
if normalize is not None:
111+
warnings.warn("The `normalize` parameter is not in use anymore from "
112+
"version 0.17 and will be removed in 0.19. If you want "
113+
"to standardize the data instead, use"
114+
"`standardize=True`", DeprecationWarning)
115+
return _sparse_center_data(X, y, fit_intercept, normalize)
116+
117+
if fit_intercept:
118+
# we might require not to change the csr matrix sometimes
119+
# store a copy if standardize is True.
120+
# Change dtype to float64 since mean_variance_axis accepts
121+
# it that way.
122+
if sp.isspmatrix(X) and X.getformat() == 'csr':
123+
X = sp.csr_matrix(X, copy=standardize, dtype=np.float64)
124+
else:
125+
X = sp.csc_matrix(X, copy=standardize, dtype=np.float64)
126+
127+
X_mean, X_var = mean_variance_axis(X, axis=0)
128+
if standardize:
129+
# transform variance to std in-place
130+
X_std = np.sqrt(X_var, X_var)
131+
del X_var
132+
X_std[X_std == 0] = 1
133+
inplace_column_scale(X, 1. / X_std)
134+
else:
135+
X_std = np.ones(X.shape[1])
136+
y_mean = y.mean(axis=0)
137+
y = y - y_mean
138+
else:
139+
X_mean = np.zeros(X.shape[1])
140+
X_std = np.ones(X.shape[1])
141+
y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
142+
143+
return X, y, X_mean, y_mean, X_std
144+
145+
146+
# TODO: this reproduces the behavior prior 0.17
147+
# Must be remove in 0.19
148+
def _center_data(X, y, fit_intercept, normalize=False, copy=True,
149+
sample_weight=None):
150+
151+
X = as_float_array(X, copy)
152+
if fit_intercept:
153+
if isinstance(sample_weight, numbers.Number):
154+
sample_weight = None
155+
if sp.issparse(X):
156+
X_mean = np.zeros(X.shape[1])
157+
X_std = np.ones(X.shape[1])
158+
else:
159+
X_mean = np.average(X, axis=0, weights=sample_weight)
160+
X -= X_mean
161+
if normalize:
162+
# XXX: currently scaled to variance=n_samples
163+
X_std = np.sqrt(np.sum(X ** 2, axis=0))
164+
X_std[X_std == 0] = 1
165+
X /= X_std
166+
else:
167+
X_std = np.ones(X.shape[1])
168+
y_mean = np.average(y, axis=0, weights=sample_weight)
169+
y = y - y_mean
170+
else:
171+
X_mean = np.zeros(X.shape[1])
172+
X_std = np.ones(X.shape[1])
173+
y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
174+
return X, y, X_mean, y_mean, X_std
175+
176+
177+
def center_data(X, y, fit_intercept, standardize=False, normalize=None,
178+
copy=True, sample_weight=None):
111179
"""
112180
Centers data to have mean zero along axis 0. This is here because
113181
nearly all linear models will want their data to be centered.
114182
115183
If sample_weight is not None, then the weighted mean of X and y
116184
is zero, and not the mean itself
117185
"""
186+
if normalize is not None:
187+
warnings.warn("The `normalize` parameter is not in use anymore from "
188+
"version 0.17 and will be removed in 0.19. If you want "
189+
"to standardize the data instead, use"
190+
"`standardize=True`", DeprecationWarning)
191+
return _center_data(X, y, fit_intercept, normalize, copy, sample_weight)
192+
118193
X = as_float_array(X, copy)
119194
if fit_intercept:
120195
if isinstance(sample_weight, numbers.Number):
@@ -125,9 +200,8 @@ def center_data(X, y, fit_intercept, normalize=False, copy=True,
125200
else:
126201
X_mean = np.average(X, axis=0, weights=sample_weight)
127202
X -= X_mean
128-
if normalize:
129-
# XXX: currently scaled to variance=n_samples
130-
X_std = np.sqrt(np.sum(X ** 2, axis=0))
203+
if standardize:
204+
X_std = np.sqrt(np.mean(X ** 2, axis=0))
131205
X_std[X_std == 0] = 1
132206
X /= X_std
133207
else:
@@ -356,8 +430,8 @@ class LinearRegression(LinearModel, RegressorMixin):
356430
to false, no intercept will be used in calculations
357431
(e.g. data is expected to be already centered).
358432
359-
normalize : boolean, optional, default False
360-
If True, the regressors X will be normalized before regression.
433+
standardize : boolean, optional, default False
434+
If True, the regressors X will be standardized before regression.
361435
362436
copy_X : boolean, optional, default True
363437
If True, X will be copied; else, it may be overwritten.
@@ -385,13 +459,26 @@ class LinearRegression(LinearModel, RegressorMixin):
385459
386460
"""
387461

388-
def __init__(self, fit_intercept=True, normalize=False, copy_X=True,
389-
n_jobs=1):
462+
def __init__(self, fit_intercept=True, standardize=False, normalize=None,
463+
copy_X=True, n_jobs=1):
464+
if normalize is not None:
465+
warnings.warn("The `normalize` parameter is not in use anymore "
466+
"from version 0.17 and will be removed in 0.19. If "
467+
"you want the data to be standardized instead, use "
468+
"`standardize=True`", DeprecationWarning)
390469
self.fit_intercept = fit_intercept
391-
self.normalize = normalize
470+
self.standardize = standardize
392471
self.copy_X = copy_X
393472
self.n_jobs = n_jobs
394473

474+
@property
475+
@deprecated("The `normalize` attribute is not in use anymore "
476+
"from version 0.17 and will be removed in 0.19. If "
477+
"you want the data to be standardized instead, use "
478+
"`standardize=True`")
479+
def normalize(self):
480+
return None
481+
395482
def fit(self, X, y, sample_weight=None):
396483
"""
397484
Fit linear model.
@@ -416,11 +503,13 @@ def fit(self, X, y, sample_weight=None):
416503
X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
417504
y_numeric=True, multi_output=True)
418505

419-
if ((sample_weight is not None) and np.atleast_1d(sample_weight).ndim > 1):
506+
if ((sample_weight is not None) and
507+
np.atleast_1d(sample_weight).ndim > 1):
420508
sample_weight = column_or_1d(sample_weight, warn=True)
421509

422510
X, y, X_mean, y_mean, X_std = self._center_data(
423-
X, y, self.fit_intercept, self.normalize, self.copy_X,
511+
X, y, fit_intercept=self.fit_intercept,
512+
standardize=self.standardize, copy=self.copy_X,
424513
sample_weight=sample_weight)
425514

426515
if sample_weight is not None:
@@ -450,24 +539,25 @@ def fit(self, X, y, sample_weight=None):
450539
return self
451540

452541

453-
def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy):
542+
def _pre_fit(X, y, Xy, precompute, standardize, fit_intercept, copy):
454543
"""Aux function used at beginning of fit in linear models"""
455544
n_samples, n_features = X.shape
456545

457546
if sparse.isspmatrix(X):
458547
precompute = False
459548
X, y, X_mean, y_mean, X_std = sparse_center_data(
460-
X, y, fit_intercept, normalize)
549+
X, y, fit_intercept=fit_intercept, standardize=standardize)
461550
else:
462551
# copy was done in fit if necessary
463552
X, y, X_mean, y_mean, X_std = center_data(
464-
X, y, fit_intercept, normalize, copy=copy)
553+
X, y, fit_intercept=fit_intercept, standardize=standardize,
554+
copy=copy)
465555
if hasattr(precompute, '__array__') and (
466-
fit_intercept and not np.allclose(X_mean, np.zeros(n_features))
467-
or normalize and not np.allclose(X_std, np.ones(n_features))):
556+
fit_intercept and not np.allclose(X_mean, np.zeros(n_features)) or
557+
standardize and not np.allclose(X_std, np.ones(n_features))):
468558
warnings.warn("Gram matrix was provided but X was centered"
469559
" to fit intercept, "
470-
"or X was normalized : recomputing Gram matrix.",
560+
"or X was standardized : recomputing Gram matrix.",
471561
UserWarning)
472562
# recompute Gram
473563
precompute = 'auto'

sklearn/linear_model/bayes.py

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def fit(self, X, y):
139139
X, y, self.fit_intercept, self.normalize, self.copy_X)
140140
n_samples, n_features = X.shape
141141

142-
### Initialization of the values of the parameters
142+
# Initialization of the values of the parameters
143143
alpha_ = 1. / np.var(y)
144144
lambda_ = 1.
145145

@@ -156,10 +156,10 @@ def fit(self, X, y):
156156
U, S, Vh = linalg.svd(X, full_matrices=False)
157157
eigen_vals_ = S ** 2
158158

159-
### Convergence loop of the bayesian ridge regression
159+
# Convergence loop of the bayesian ridge regression
160160
for iter_ in range(self.n_iter):
161161

162-
### Compute mu and sigma
162+
# Compute mu and sigma
163163
# sigma_ = lambda_ / alpha_ * np.eye(n_features) + np.dot(X.T, X)
164164
# coef_ = sigma_^-1 * XT * y
165165
if n_samples > n_features:
@@ -178,28 +178,28 @@ def fit(self, X, y):
178178
logdet_sigma_[:n_samples] += alpha_ * eigen_vals_
179179
logdet_sigma_ = - np.sum(np.log(logdet_sigma_))
180180

181-
### Update alpha and lambda
181+
# Update alpha and lambda
182182
rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)
183-
gamma_ = (np.sum((alpha_ * eigen_vals_)
184-
/ (lambda_ + alpha_ * eigen_vals_)))
185-
lambda_ = ((gamma_ + 2 * lambda_1)
186-
/ (np.sum(coef_ ** 2) + 2 * lambda_2))
187-
alpha_ = ((n_samples - gamma_ + 2 * alpha_1)
188-
/ (rmse_ + 2 * alpha_2))
189-
190-
### Compute the objective function
183+
gamma_ = (np.sum((alpha_ * eigen_vals_) /
184+
(lambda_ + alpha_ * eigen_vals_)))
185+
lambda_ = ((gamma_ + 2 * lambda_1) /
186+
(np.sum(coef_ ** 2) + 2 * lambda_2))
187+
alpha_ = ((n_samples - gamma_ + 2 * alpha_1) /
188+
(rmse_ + 2 * alpha_2))
189+
190+
# Compute the objective function
191191
if self.compute_score:
192192
s = lambda_1 * log(lambda_) - lambda_2 * lambda_
193193
s += alpha_1 * log(alpha_) - alpha_2 * alpha_
194-
s += 0.5 * (n_features * log(lambda_)
195-
+ n_samples * log(alpha_)
196-
- alpha_ * rmse_
197-
- (lambda_ * np.sum(coef_ ** 2))
198-
- logdet_sigma_
199-
- n_samples * log(2 * np.pi))
194+
s += 0.5 * (n_features * log(lambda_) +
195+
n_samples * log(alpha_) -
196+
alpha_ * rmse_ -
197+
(lambda_ * np.sum(coef_ ** 2)) -
198+
logdet_sigma_ -
199+
n_samples * log(2 * np.pi))
200200
self.scores_.append(s)
201201

202-
### Check for convergence
202+
# Check for convergence
203203
if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:
204204
if verbose:
205205
print("Convergence after ", str(iter_), " iterations")
@@ -354,7 +354,7 @@ def fit(self, X, y):
354354
X, y, X_mean, y_mean, X_std = self._center_data(
355355
X, y, self.fit_intercept, self.normalize, self.copy_X)
356356

357-
### Launch the convergence loop
357+
# Launch the convergence loop
358358
keep_lambda = np.ones(n_features, dtype=bool)
359359

360360
lambda_1 = self.lambda_1
@@ -363,51 +363,51 @@ def fit(self, X, y):
363363
alpha_2 = self.alpha_2
364364
verbose = self.verbose
365365

366-
### Initialization of the values of the parameters
366+
# Initialization of the values of the parameters
367367
alpha_ = 1. / np.var(y)
368368
lambda_ = np.ones(n_features)
369369

370370
self.scores_ = list()
371371
coef_old_ = None
372372

373-
### Iterative procedure of ARDRegression
373+
# Iterative procedure of ARDRegression
374374
for iter_ in range(self.n_iter):
375-
### Compute mu and sigma (using Woodbury matrix identity)
375+
# Compute mu and sigma (using Woodbury matrix identity)
376376
sigma_ = pinvh(np.eye(n_samples) / alpha_ +
377377
np.dot(X[:, keep_lambda] *
378378
np.reshape(1. / lambda_[keep_lambda], [1, -1]),
379379
X[:, keep_lambda].T))
380-
sigma_ = np.dot(sigma_, X[:, keep_lambda]
381-
* np.reshape(1. / lambda_[keep_lambda], [1, -1]))
382-
sigma_ = - np.dot(np.reshape(1. / lambda_[keep_lambda], [-1, 1])
383-
* X[:, keep_lambda].T, sigma_)
380+
sigma_ = np.dot(sigma_, X[:, keep_lambda] *
381+
np.reshape(1. / lambda_[keep_lambda], [1, -1]))
382+
sigma_ = - np.dot(np.reshape(1. / lambda_[keep_lambda], [-1, 1]) *
383+
X[:, keep_lambda].T, sigma_)
384384
sigma_.flat[::(sigma_.shape[1] + 1)] += 1. / lambda_[keep_lambda]
385385
coef_[keep_lambda] = alpha_ * np.dot(
386386
sigma_, np.dot(X[:, keep_lambda].T, y))
387387

388-
### Update alpha and lambda
388+
# Update alpha and lambda
389389
rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)
390390
gamma_ = 1. - lambda_[keep_lambda] * np.diag(sigma_)
391-
lambda_[keep_lambda] = ((gamma_ + 2. * lambda_1)
392-
/ ((coef_[keep_lambda]) ** 2
393-
+ 2. * lambda_2))
394-
alpha_ = ((n_samples - gamma_.sum() + 2. * alpha_1)
395-
/ (rmse_ + 2. * alpha_2))
391+
lambda_[keep_lambda] = ((gamma_ + 2. * lambda_1) /
392+
((coef_[keep_lambda]) ** 2 +
393+
2. * lambda_2))
394+
alpha_ = ((n_samples - gamma_.sum() + 2. * alpha_1) /
395+
(rmse_ + 2. * alpha_2))
396396

397-
### Prune the weights with a precision over a threshold
397+
# Prune the weights with a precision over a threshold
398398
keep_lambda = lambda_ < self.threshold_lambda
399399
coef_[~keep_lambda] = 0
400400

401-
### Compute the objective function
401+
# Compute the objective function
402402
if self.compute_score:
403403
s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum()
404404
s += alpha_1 * log(alpha_) - alpha_2 * alpha_
405-
s += 0.5 * (fast_logdet(sigma_) + n_samples * log(alpha_)
406-
+ np.sum(np.log(lambda_)))
405+
s += 0.5 * (fast_logdet(sigma_) + n_samples * log(alpha_) +
406+
np.sum(np.log(lambda_)))
407407
s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_ ** 2).sum())
408408
self.scores_.append(s)
409409

410-
### Check for convergence
410+
# Check for convergence
411411
if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:
412412
if verbose:
413413
print("Converged after %s iterations" % iter_)

0 commit comments

Comments
 (0)
0