8000 comments addressed · scikit-learn/scikit-learn@43d0f2c · GitHub
[go: up one dir, main page]

Skip to content

Commit 43d0f2c

Browse files
author
giorgiop
committed
comments addressed
1 parent 1b6b3c6 commit 43d0f2c

File tree

7 files changed

+74
-72
lines changed

7 files changed

+74
-72
lines changed

sklearn/linear_model/base.py

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def sparse_center_data(X, y, fit_intercept, normalize=False):
8585
else:
8686
X = sp.csc_matrix(X, copy=normalize, dtype=np.float64)
8787

88-
X_mean, X_var = mean_variance_axis(X, axis=0)
88+
X_offset, X_var = mean_variance_axis(X, axis=0)
8989
if normalize:
9090
# transform variance to std in-place
9191
X_var *= X.shape[0]
@@ -95,14 +95,14 @@ def sparse_center_data(X, y, fit_intercept, normalize=False):
9595
inplace_column_scale(X, 1. / X_std)
9696
else:
9797
X_std = np.ones(X.shape[1])
98-
y_mean = y.mean(axis=0)
99-
y = y - y_mean
98+
y_offset = y.mean(axis=0)
99+
y = y - y_offset
100100
else:
101-
X_mean = np.zeros(X.shape[1])
101+
X_offset = np.zeros(X.shape[1])
102102
X_std = np.ones(X.shape[1])
103-
y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
103+
y_offset = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
104104

105-
return X, y, X_mean, y_mean, X_std
105+
return X, y, X_offset, y_offset, X_std
106106

107107

108108
@deprecated("center_data will be removed in "
@@ -120,24 +120,25 @@ def center_data(X, y, fit_intercept, normalize=False, copy=True,
120120
if isinstance(sample_weight, numbers.Number):
121121
sample_weight = None
122122
if sp.issparse(X):
123-
X_mean = np.zeros(X.shape[1])
123+
X_offset = np.zeros(X.shape[1])
124124
X_std = np.ones(X.shape[1])
125125
else:
126-
X_mean = np.average(X, axis=0, weights=sample_weight)
127-
X -= X_mean
126+
X_offset = np.average(X, axis=0, weights=sample_weight)
127+
X -= X_offset
128+
# XXX: currently scaled to variance=n_samples
128129
if normalize:
129130
X_std = np.sqrt(np.sum(X ** 2, axis=0))
130131
X_std[X_std == 0] = 1
131132
X /= X_std
132133
else:
133134
X_std = np.ones(X.shape[1])
134-
y_mean = np.average(y, axis=0, weights=sample_weight)
135-
y = y - y_mean
135+
y_offset = np.average(y, axis=0, weights=sample_weight)
136+
y = y - y_offset
136137
else:
137-
X_mean = np.zeros(X.shape[1])
138+
X_offset = np.zeros(X.shape[1])
138139
X_std = np.ones(X.shape[1])
139-
y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
140-
return X, y, X_mean, y_mean, X_std
140+
y_offset = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
141+
return X, y, X_offset, y_offset, X_std
141142

142143

143144
def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
@@ -150,10 +151,11 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
150151
151152
X = (X - X_offset) / X_scale
152153
153-
If sample_weight is not None, then the weighted mean of X and y
154-
is zero, and not the mean itself. If return_mean=True, the mean, eventually
155-
weighted, is returned, independently of whether X was centered (option used
156-
for optimization with sparse data in coordinate_descend).
154+
X_scale is the L2 norm of X - X_offset. If sample_weight is not None,
155+
then the weighted mean of X and y is zero, and not the mean itself. If
156+
return_mean=True, the mean, eventually weighted, is returned, independently
157+
of whether X was centered (option used for optimization with sparse data in
158+
coordinate_descend).
157159
158160
This is here because nearly all linear models will want their data to be
159161
centered.
@@ -210,11 +212,11 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
210212

211213
def _rescale_data(X, y, sample_weight):
212214
"""Rescale data so as to support sample_weight"""
213-
sample_weight = sample_weight * np.ones(y.shape[0])
215+
n_samples = X.shape[0]
216+
sample_weight = sample_weight * np.ones(n_samples)
214217
sample_weight = np.sqrt(sample_weight)
215-
sw_matrix = np.diag(sample_weight)
216-
if sp.issparse(X) or sp.issparse(y):
217-
sw_matrix = sparse.dia_matrix(sw_matrix)
218+
sw_matrix = sparse.dia_matrix((sample_weight, 0),
219+
shape=(n_samples, n_samples))
218220
X = safe_sparse_dot(sw_matrix, X)
219221
y = safe_sparse_dot(sw_matrix, y)
220222
return X, y
@@ -267,12 +269,12 @@ def predict(self, X):
267269

268270
_preprocess_data = staticmethod(_preprocess_data)
269271

270-
def _set_intercept(self, X_mean, y_mean, X_norm):
272+
def _set_intercept(self, X_offset, y_offset, X_scale):
271273
"""Set the intercept_
272274
"""
273275
if self.fit_intercept:
274-
self.coef_ = self.coef_ / X_norm
275-
self.intercept_ = y_mean - np.dot(X_mean, self.coef_.T)
276+
self.coef_ = self.coef_ / X_scale
277+
self.intercept_ = y_offset - np.dot(X_offset, self.coef_.T)
276278
else:
277279
self.intercept_ = 0.
278280

@@ -425,11 +427,11 @@ class LinearRegression(LinearModel, RegressorMixin):
425427
426428
normalize : boolean, optional, default False
427429
If True, the regressors X will be normalized before regression.
428-
When the regressors are normalized, the fitted `coef_` are the same
429-
independently of the number of training samples; hence, hyperparameters
430-
learnt by cross-validation will be compatible among different training
431-
and validation sets. The same property is not valid for standardized
432-
data. However, if you wish to standardize, please use
430+
This parameter is ignored when `fit_intercept` is set to `False`.
431+
When the regressors are normalized, note that this makes the
432+
hyperparameters learnt more robust and almost independent of the number
433+
of samples. The same property is not valid for standardized data.
434+
However, if you wish to standardize, please use
433435
`preprocessing.StandardScaler` before calling `fit` on an estimator
434436
with `normalize=False`.
435437
@@ -510,7 +512,7 @@ def fit(self, X, y, sample_weight=None):
510512
if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:
511513
raise ValueError("Sample weights must be 1D array or scalar")
512514

513-
X, y, X_mean, y_mean, X_norm = self._preprocess_data(
515+
X, y, X_offset, y_offset, X_scale = self._preprocess_data(
514516
X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
515517
copy=self.copy_X, sample_weight=sample_weight)
516518

@@ -537,7 +539,7 @@ def fit(self, X, y, sample_weight=None):
537539

538540
if y.ndim == 1:
539541
self.coef_ = np.ravel(self.coef_)
540-
self._set_intercept(X_mean, y_mean, X_norm)
542+
self._set_intercept(X_offset, y_offset, X_scale)
541543
return self
542544

543545

@@ -547,16 +549,16 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy):
547549

548550
if sparse.isspmatrix(X):
549551
precompute = False
550-
X, y, X_mean, y_mean, X_norm = _preprocess_data(
552+
X, y, X_offset, y_offset, X_scale = _preprocess_data(
551553
X, y, fit_intercept=fit_intercept, normalize=normalize,
552554
return_mean=True)
553555
else:
554556
# copy was done in fit if necessary
555-
X, y, X_mean, y_mean, X_norm = _preprocess_data(
557+
X, y, X_offset, y_offset, X_scale = _preprocess_data(
556558
X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy)
557559
if hasattr(precompute, '__array__') and (
558-
fit_intercept and not np.allclose(X_mean, np.zeros(n_features)) or
559-
normalize and not np.allclose(X_norm, np.ones(n_features))):
560+
fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or
561+
normalize and not np.allclose(X_scale, np.ones(n_features))):
560562
warnings.warn("Gram matrix was provided but X was centered"
561563
" to fit intercept, "
562564
"or X was normalized : recomputing Gram matrix.",
@@ -593,4 +595,4 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy):
593595
order='F')
594596
np.dot(y.T, X, out=Xy.T)
595597

596-
return X, y, X_mean, y_mean, X_norm, precompute, Xy
598+
return X, y, X_offset, y_offset, X_scale, precompute, Xy

sklearn/linear_model/bayes.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def fit(self, X, y):
142142
self : returns an instance of self.
143143
"""
144144
X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)
145-
X, y, X_mean, y_mean, X_std = self._preprocess_data(
145+
X, y, X_offset, y_offset, X_scale = self._preprocess_data(
146146
X, y, self.fit_intercept, self.normalize, self.copy_X)
147147
n_samples, n_features = X.shape
148148

@@ -217,7 +217,7 @@ def fit(self, X, y):
217217
self.lambda_ = lambda_
218218
self.coef_ = coef_
219219

220-
self._set_intercept(X_mean, y_mean, X_std)
220+
self._set_intercept(X_offset, y_offset, X_scale)
221221
return self
222222

223223

@@ -365,7 +365,7 @@ def fit(self, X, y):
365365
n_samples, n_features = X.shape
366366
coef_ = np.zeros(n_features)
367367

368-
X, y, X_mean, y_mean, X_std = self._preprocess_data(
368+
X, y, X_offset, y_offset, X_scale = self._preprocess_data(
369369
X, y, self.fit_intercept, self.normalize, self.copy_X)
370370

371371
# Launch the convergence loop
@@ -432,5 +432,5 @@ def fit(self, X, y):
432432
self.alpha_ = alpha_
433433
self.sigma_ = sigma_
434434
self.lambda_ = lambda_
435-
self._set_intercept(X_mean, y_mean, X_std)
435+
self._set_intercept(X_offset, y_offset, X_scale)
436436
return self

sklearn/linear_model/coordinate_descent.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,10 @@ def _alpha_grid(X, y, Xy=None, l1_ratio=1.0, fit_intercept=True,
9494
if sparse_center:
9595
# Workaround to find alpha_max for sparse matrices.
9696
# since we should not destroy the sparsity of such matrices.
97-
_, _, X_mean, _, X_std = _preprocess_data(X, y, fit_intercept,
97+
_, _, X_offset, _, X_scale = _preprocess_data(X, y, fit_intercept,
9898
normalize,
9999
return_mean=True)
100-
mean_dot = X_mean * np.sum(y)
100+
mean_dot = X_offset * np.sum(y)
101101

102102
if Xy.ndim == 1:
103103
Xy = Xy[:, np.newaxis]
@@ -106,7 +106,7 @@ def _alpha_grid(X, y, Xy=None, l1_ratio=1.0, fit_intercept=True,
106106
if fit_intercept:
107107
Xy -= mean_dot[:, np.newaxis]
108108
if normalize:
109-
Xy /= X_std[:, np.newaxis]
109+
Xy /= X_scale[:, np.newaxis]
110110

111111
alpha_max = (np.sqrt(np.sum(Xy ** 2, axis=1)).max() /
112112
(n_samples * l1_ratio))
@@ -391,17 +391,17 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
391391

392392
# MultiTaskElasticNet does not support sparse matrices
393393
if not multi_output and sparse.isspmatrix(X):
394-
if 'X_mean' in params:
394+
if 'X_offset' in params:
395395
# As sparse matrices are not actually centered we need this
396396
# to be passed to the CD solver.
397-
X_sparse_scaling = params['X_mean'] / params['X_std']
397+
X_sparse_scaling = params['X_offset'] / params['X_scale']
398398
else:
399399
X_sparse_scaling = np.zeros(n_features)
400400

401401
# X should be normalized and fit already if function is called
402402
# from ElasticNet.fit
403403
if check_input:
404-
X, y, X_mean, y_mean, X_std, precompute, Xy = \
404+
X, y, X_offset, y_offset, X_scale, precompute, Xy = \
405405
_pre_fit(X, y, Xy, precompute, normalize=False,
406406
fit_intercept=False, copy=False)
407407
if alphas is None:
@@ -673,7 +673,7 @@ def fit(self, X, y, check_input=True):
673673
multi_output=True, y_numeric=True)
674674
y = check_array(y, dtype=np.float64, order='F', copy=False,
675675
ensure_2d=False)
676-
X, y, X_mean, y_mean, X_std, precompute, Xy = \
676+
X, y, X_offset, y_offset, X_scale, precompute, Xy = \
677677
_pre_fit(X, y, None, self.precompute, self.normalize,
678678
self.fit_intercept, copy=False)
679679
if y.ndim == 1:
@@ -710,7 +710,7 @@ def fit(self, X, y, check_input=True):
710710
precompute=precompute, Xy=this_Xy,
711711
fit_intercept=False, normalize=False, copy_X=True,
712712
verbose=False, tol=self.tol, positive=self.positive,
713-
X_mean=X_mean, X_std=X_std, return_n_iter=True,
713+
X_offset=X_offset, X_scale=X_scale, return_n_iter=True,
714714
coef_init=coef_[k], max_iter=self.max_iter,
715715
random_state=self.random_state,
716716
selection=self.selection,
@@ -723,7 +723,7 @@ def fit(self, X, y, check_input=True):
723723
self.n_iter_ = self.n_iter_[0]
724724

725725
self.coef_, self.dual_gap_ = map(np.squeeze, [coef_, dual_gaps_])
726-
self._set_intercept(X_mean, y_mean, X_std)
726+
self._set_intercept(X_offset, y_offset, X_scale)
727727

728728
# return self for chaining fit and predict calls
729729
return self
@@ -963,14 +963,14 @@ def _path_residuals(X, y, train, test, path, path_params, alphas=None,
963963
# Fall back to default enet_multitask
964964
precompute = False
965965

966-
X_train, y_train, X_mean, y_mean, X_std, precompute, Xy = \
966+
X_train, y_train, X_offset, y_offset, X_scale, precompute, Xy = \
967967
_pre_fit(X_train, y_train, None, precompute, normalize, fit_intercept,
968968
copy=False)
969969

970970
path_params = path_params.copy()
971971
path_params['Xy'] = Xy
972-
path_params['X_mean'] = X_mean
973-
path_params['X_std'] = X_std
972+
path_params['X_offset'] = X_offset
973+
path_params['X_scale'] = X_scale
974974
path_params['precompute'] = precompute
975975
path_params['copy_X'] = False
976976
path_params['alphas'] = alphas
@@ -987,14 +987,14 @@ def _path_residuals(X, y, train, test, path, path_params, alphas=None,
987987
if y.ndim == 1:
988988
# Doing this so that it becomes coherent with multioutput.
989989
coefs = coefs[np.newaxis, :, :]
990-
y_mean = np.atleast_1d(y_mean)
990+
y_offset = np.atleast_1d(y_offset)
991991
y_test = y_test[:, np.newaxis]
992992

993993
if normalize:
994-
nonzeros = np.flatnonzero(X_std)
995-
coefs[:, nonzeros] /= X_std[nonzeros][:, np.newaxis]
994+
nonzeros = np.flatnonzero(X_scale)
995+
coefs[:, nonzeros] /= X_scale[nonzeros][:, np.newaxis]
996996

997-
intercepts = y_mean[:, np.newaxis] - np.dot(X_mean, coefs)
997+
intercepts = y_offset[:, np.newaxis] - np.dot(X_offset, coefs)
998998
if sparse.issparse(X_test):
999999
n_order, n_features, n_alphas = coefs.shape
10001000
# Work around for sparse matices since coefs is a 3-D numpy array.
@@ -1700,7 +1700,7 @@ def fit(self, X, y):
17001700
raise ValueError("X and y have inconsistent dimensions (%d != %d)"
17011701
% (n_samples, y.shape[0]))
17021702

1703-
X, y, X_mean, y_mean, X_std = _preprocess_data(
1703+
X, y, X_offset, y_offset, X_scale = _preprocess_data(
17041704
X, y, self.fit_intercept, self.normalize, copy=False)
17051705

17061706
if not self.warm_start or self.coef_ is None:
@@ -1721,7 +1721,7 @@ def fit(self, X, y):
17211721
self.coef_, l1_reg, l2 F438 _reg, X, y, self.max_iter, self.tol,
17221722
check_random_state(self.random_state), random)
17231723

1724-
self._set_intercept(X_mean, y_mean, X_std)
1724+
self._set_intercept(X_offset, y_offset, X_scale)
17251725

17261726
if self.dual_gap_ > self.eps_:
17271727
warnings.warn('Objective did not converge, you might want'

sklearn/linear_model/least_angle.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,7 @@ def fit(self, X, y, Xy=None):
635635
X, y = check_X_y(X, y, y_numeric=True, multi_output=True)
636636
n_features = X.shape[1]
637637

638-
X, y, X_mean, y_mean, X_std = self._preprocess_data(X, y,
638+
X, y, X_offset, y_offset, X_scale = self._preprocess_data(X, y,
639639
self.fit_intercept,
640640
self.normalize,
641641
self.copy_X)
@@ -702,7 +702,7 @@ def fit(self, X, y, Xy=None):
702702
if n_targets == 1:
703703
self.alphas_ = self.alphas_[0]
704704
self.n_iter_ = self.n_iter_[0]
705-
self._set_intercept(X_mean, y_mean, X_std)
705+
self._set_intercept(X_offset, y_offset, X_scale)
706706
return self
707707

708708

sklearn/linear_model/omp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ def fit(self, X, y):
636636
X, y = check_X_y(X, y, multi_output=True, y_numeric=True)
637637
n_features = X.shape[1]
638638

639-
X, y, X_mean, y_mean, X_std, Gram, Xy = \
639+
X, y, X_offset, y_offset, X_scale, Gram, Xy = \
640640
_pre_fit(X, y, None, self.precompute, self.normalize,
641641
self.fit_intercept, copy=True)
642642

@@ -664,7 +664,7 @@ def fit(self, X, y):
664664
copy_Gram=True, copy_Xy=True,
665665
return_n_iter=True)
666666
self.coef_ = coef_.T
667-
self._set_intercept(X_mean, y_mean, X_std)
667+
self._set_intercept(X_offset, y_offset, X_scale)
668668
return self
669669

670670

sklearn/linear_model/randomized_l1.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def fit(self, X, y):
9494
X = as_float_array(X, copy=False)
9595
n_samples, n_features = X.shape
9696

97-
X, y, X_mean, y_mean, X_std = \
97+
X, y, X_offset, y_offset, X_scale = \
9898
self._preprocess_data(X, y, self.fit_intercept, self.normalize)
9999

100100
estimator_func, params = self._make_estimator_and_params(X, y)
@@ -515,9 +515,9 @@ def _make_estimator_and_params(self, X, y):
515515

516516
def _preprocess_data(self, X, y, fit_intercept, normalize=False):
517517
"""Center the data in X but not in y"""
518-
X, _, Xmean, _, X_std = _preprocess_data(X, y, fit_intercept,
519-
normalize=normalize)
520-
return X, y, Xmean, y, X_std
518+
X, _, X_offset, _, X_scale = _preprocess_data(X, y, fit_intercept,
519+
normalize=normalize)
520+
return X, y, X_offset, y, X_scale
521521

522522

523523
###############################################################################

0 commit comments

Comments
 (0)
0