8000 comments addressed · scikit-learn/scikit-learn@effdf97 · GitHub
[go: up one dir, main page]

Skip to content

Commit effdf97

Browse files
author
giorgiop
committed
comments addressed
1 parent ea666c5 commit effdf97

File tree

7 files changed

+63
-63
lines changed

7 files changed

+63
-63
lines changed

sklearn/linear_model/base.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def sparse_center_data(X, y, fit_intercept, normalize=False):
8585
else:
8686
X = sp.csc_matrix(X, copy=normalize, dtype=np.float64)
8787

88-
X_mean, X_var = mean_variance_axis(X, axis=0)
88+
X_offset, X_var = mean_variance_axis(X, axis=0)
8989
if normalize:
9090
# transform variance to std in-place
9191
X_var *= X.shape[0]
@@ -95,14 +95,14 @@ def sparse_center_data(X, y, fit_intercept, normalize=False):
9595
inplace_column_scale(X, 1. / X_std)
9696
else:
9797
X_std = np.ones(X.shape[1])
98-
y_mean = y.mean(axis=0)
99-
y = y - y_mean
98+
y_offset = y.mean(axis=0)
99+
y = y - y_offset
100100
else:
101-
X_mean = np.zeros(X.shape[1])
101+
X_offset = np.zeros(X.shape[1])
102102
X_std = np.ones(X.shape[1])
103-
y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
103+
y_offset = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
104104

105-
return X, y, X_mean, y_mean, X_std
105+
return X, y, X_offset, y_offset, X_std
106106

107107

108108
@deprecated("center_data will be removed in "
@@ -120,24 +120,24 @@ def center_data(X, y, fit_intercept, normalize=False, copy=True,
120120
if isinstance(sample_weight, numbers.Number):
121121
sample_weight = None
122122
if sp.issparse(X):
123-
X_mean = np.zeros(X.shape[1])
123+
X_offset = np.zeros(X.shape[1])
124124
X_std = np.ones(X.shape[1])
125125
else:
126-
X_mean = np.average(X, axis=0, weights=sample_weight)
127-
X -= X_mean
126+
X_offset = np.average(X, axis=0, weights=sample_weight)
127+
X -= X_offset
128128
if normalize:
129129
X_std = np.sqrt(np.sum(X ** 2, axis=0))
130130
X_std[X_std == 0] = 1
131131
X /= X_std
132132
else:
133133
X_std = np.ones(X.shape[1])
134-
y_mean = np.average(y, axis=0, weights=sample_weight)
135-
y = y - y_mean
134+
y_offset = np.average(y, axis=0, weights=sample_weight)
135+
y = y - y_offset
136136
else:
137-
X_mean = np.zeros(X.shape[1])
137+
X_offset = np.zeros(X.shape[1])
138138
X_std = np.ones(X.shape[1])
139-
y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
140-
return X, y, X_mean, y_mean, X_std
139+
y_offset = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
140+
return X, y, X_offset, y_offset, X_std
141141

142142

143143
def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
@@ -210,11 +210,11 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
210210

211211
def _rescale_data(X, y, sample_weight):
212212
"""Rescale data so as to support sample_weight"""
213-
sample_weight = sample_weight * np.ones(y.shape[0])
213+
n_samples = X.shape[0]
214+
sample_weight = sample_weight * np.ones(n_samples)
214215
sample_weight = np.sqrt(sample_weight)
215-
sw_matrix = np.diag(sample_weight)
216-
if sp.issparse(X) or sp.issparse(y):
217-
sw_matrix = sparse.dia_matrix(sw_matrix)
216+
sw_matrix = sparse.dia_matrix((sample_weight, 0),
217+
shape=(n_samples, n_samples))
218218
X = safe_sparse_dot(sw_matrix, X)
219219
y = safe_sparse_dot(sw_matrix, y)
220220
return X, y
@@ -267,12 +267,12 @@ def predict(self, X):
267267

268268
_preprocess_data = staticmethod(_preprocess_data)
269269

270-
def _set_intercept(self, X_mean, y_mean, X_norm):
270+
def _set_intercept(self, X_offset, y_offset, X_scale):
271271
"""Set the intercept_
272272
"""
273273
if self.fit_intercept:
274-
self.coef_ = self.coef_ / X_norm
275-
self.intercept_ = y_mean - np.dot(X_mean, self.coef_.T)
274+
self.coef_ = self.coef_ / X_scale
275+
self.intercept_ = y_offset - np.dot(X_offset, self.coef_.T)
276276
else:
277277
self.intercept_ = 0.
278278

@@ -510,7 +510,7 @@ def fit(self, X, y, sample_weight=None):
510510
if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:
511511
raise ValueError("Sample weights must be 1D array or scalar")
512512

513-
X, y, X_mean, y_mean, X_norm = self._preprocess_data(
513+
X, y, X_offset, y_offset, X_scale = self._preprocess_data(
514514
X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
515515
copy=self.copy_X, sample_weight=sample_weight)
516516

@@ -537,7 +537,7 @@ def fit(self, X, y, sample_weight=None):
537537

538538
if y.ndim == 1:
539539
self.coef_ = np.ravel(self.coef_)
540-
self._set_intercept(X_mean, y_mean, X_norm)
540+
self._set_intercept(X_offset, y_offset, X_scale)
541541
return self
542542

543543

@@ -547,16 +547,16 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy):
547547

548548
if sparse.isspmatrix(X):
549549
precompute = False
550-
X, y, X_mean, y_mean, X_norm = _preprocess_data(
550+
X, y, X_offset, y_offset, X_scale = _preprocess_data(
551551
X, y, fit_intercept=fit_intercept, normalize=normalize,
552552
return_mean=True)
553553
else:
554554
# copy was done in fit if necessary
555-
X, y, X_mean, y_mean, X_norm = _preprocess_data(
555+
X, y, X_offset, y_offset, X_scale = _preprocess_data(
556556
X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy)
557557
if hasattr(precompute, '__array__') and (
558-
fit_intercept and not np.allclose(X_mean, np.zeros(n_features)) or
559-
normalize and not np.allclose(X_norm, np.ones(n_features))):
558+
fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or
559+
normalize and not np.allclose(X_scale, np.ones(n_features))):
560560
warnings.warn("Gram matrix was provided but X was centered"
561561
" to fit intercept, "
562562
"or X was normalized : recomputing Gram matrix.",
@@ -593,4 +593,4 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy):
593593
order='F')
594594
np.dot(y.T, X, out=Xy.T)
595595

596-
return X, y, X_mean, y_mean, X_norm, precompute, Xy
596+
return X, y, X_offset, y_offset, X_scale, precompute, Xy

sklearn/linear_model/bayes.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def fit(self, X, y):
142142
self : returns an instance of self.
143143
"""
144144
X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)
145-
X, y, X_mean, y_mean, X_std = self._preprocess_data(
145+
X, y, X_offset, y_offset, X_scale = self._preprocess_data(
146146
X, y, self.fit_intercept, self.normalize, self.copy_X)
147147
n_samples, n_features = X.shape
148148

@@ -217,7 +217,7 @@ def fit(self, X, y):
217217
self.lambda_ = lambda_
218218
self.coef_ = coef_
219219

220-
self._set_intercept(X_mean, y_mean, X_std)
220+
self._set_intercept(X_offset, y_offset, X_scale)
221221
return self
222222

223223

@@ -365,7 +365,7 @@ def fit(self, X, y):
365365
n_samples, n_features = X.shape
366366
coef_ = np.zeros(n_features)
367367

368-
X, y, X_mean, y_mean, X_std = self._preprocess_data(
368+
X, y, X_offset, y_offset, X_scale = self._preprocess_data(
369369
X, y, self.fit_intercept, self.normalize, self.copy_X)
370370

371371
# Launch the convergence loop
@@ -432,5 +432,5 @@ def fit(self, X, y):
432432
self.alpha_ = alpha_
433433
self.sigma_ = sigma_
434434
self.lambda_ = lambda_
435-
self._set_intercept(X_mean, y_mean, X_std)
435+
self._set_intercept(X_offset, y_offset, X_scale)
436436
return self

sklearn/linear_model/coordinate_descent.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,10 @@ def _alpha_grid(X, y, Xy=None, l1_ratio=1.0, fit_intercept=True,
9494
if sparse_center:
9595
# Workaround to find alpha_max for sparse matrices.
9696
# since we should not destroy the sparsity of such matrices.
97-
_, _, X_mean, _, X_std = _preprocess_data(X, y, fit_intercept,
97+
_, _, X_offset, _, X_scale = _preprocess_data(X, y, fit_intercept,
9898
normalize,
9999
return_mean=True)
100-
mean_dot = X_mean * np.sum(y)
100+
mean_dot = X_offset * np.sum(y)
101101

102102
if Xy.ndim == 1:
103103
Xy = Xy[:, np.newaxis]
@@ -106,7 +106,7 @@ def _alpha_grid(X, y, Xy=None, l1_ratio=1.0, fit_intercept=True,
106106
if fit_intercept:
107107
Xy -= mean_dot[:, np.newaxis]
108108
if normalize:
109-
Xy /= X_std[:, np.newaxis]
109+
Xy /= X_scale[:, np.newaxis]
110110

111111
alpha_max = (np.sqrt(np.sum(Xy ** 2, axis=1)).max() /
112112
(n_samples * l1_ratio))
@@ -391,17 +391,17 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
391391

392392
# MultiTaskElasticNet does not support sparse matrices
393393
if not multi_output and sparse.isspmatrix(X):
394-
if 'X_mean' in params:
394+
if 'X_offset' in params:
395395
# As sparse matrices are not actually centered we need this
396396
# to be passed to the CD solver.
397-
X_sparse_scaling = params['X_mean'] / params['X_std']
397+
X_sparse_scaling = params['X_offset'] / params['X_scale']
398398
else:
399399
X_sparse_scaling = np.zeros(n_features)
400400

401401
# X should be normalized and fit already if function is called
402402
# from ElasticNet.fit
403403
if check_input:
404-
X, y, X_mean, y_mean, X_std, precompute, Xy = \
404+
X, y, X_offset, y_offset, X_scale, precompute, Xy = \
405405
_pre_fit(X, y, Xy, precompute, normalize=False,
406406
fit_intercept=False, copy=False)
407407
if alphas is None:
@@ -673,7 +673,7 @@ def fit(self, X, y, check_input=True):
673673
multi_output=True, y_numeric=True)
674674
y = check_array(y, dtype=np.float64, order='F', copy=False,
675675
ensure_2d=False)
676-
X, y, X_mean, y_mean, X_std, precompute, Xy = \
676+
X, y, X_offset, y_offset, X_scale, precompute, Xy = \
677677
_pre_fit(X, y, None, self.precompute, self.normalize,
678678
self.fit_intercept, copy=False)
679679
if y.ndim == 1:
@@ -710,7 +710,7 @@ def fit(self, X, y, check_input=True):
710710
precompute=precompute, Xy=this_Xy,
711711
fit_intercept=False, normalize=False, copy_X=True,
712712
verbose=False, tol=self.tol, positive=self.positive,
713-
X_mean=X_mean, X_std=X_std, return_n_iter=True,
713+
X_offset=X_offset, X_scale=X_scale, return_n_iter=True,
714714
coef_init=coef_[k], max_iter=self.max_iter,
715715
random_state=self.random_state,
716716
selection=self.selection,
@@ -723,7 +723,7 @@ def fit(self, X, y, check_input=True):
723723
self.n_iter_ = self.n_iter_[0]
724724

725725
self.coef_, self.dual_gap_ = map(np.squeeze, [coef_, dual_gaps_])
726-
self._set_intercept(X_mean, y_mean, X_std)
726+
self._set_intercept(X_offset, y_offset, X_scale)
727727

728728
# return self for chaining fit and predict calls
729729
return self
@@ -963,14 +963,14 @@ def _path_residuals(X, y, train, test, path, path_params, alphas=None,
963963
# Fall back to default enet_multitask
9649 10000 64
precompute = False
965965

966-
X_train, y_train, X_mean, y_mean, X_std, precompute, Xy = \
966+
X_train, y_train, X_offset, y_offset, X_scale, precompute, Xy = \
967967
_pre_fit(X_train, y_train, None, precompute, normalize, fit_intercept,
968968
copy=False)
969969

970970
path_params = path_params.copy()
971971
path_params['Xy'] = Xy
972-
path_params['X_mean'] = X_mean
973-
path_params['X_std'] = X_std
972+
path_params['X_offset'] = X_offset
973+
path_params['X_scale'] = X_scale
974974
path_params['precompute'] = precompute
975975
path_params['copy_X'] = False
976976
path_params['alphas'] = alphas
@@ -987,14 +987,14 @@ def _path_residuals(X, y, train, test, path, path_params, alphas=None,
987987
if y.ndim == 1:
988988
# Doing this so that it becomes coherent with multioutput.
989989
coefs = coefs[np.newaxis, :, :]
990-
y_mean = np.atleast_1d(y_mean)
990+
y_offset = np.atleast_1d(y_offset)
991991
y_test = y_test[:, np.newaxis]
992992

993993
if normalize:
994-
nonzeros = np.flatnonzero(X_std)
995-
coefs[:, nonzeros] /= X_std[nonzeros][:, np.newaxis]
994+
nonzeros = np.flatnonzero(X_scale)
995+
coefs[:, nonzeros] /= X_scale[nonzeros][:, np.newaxis]
996996

997-
intercepts = y_mean[:, np.newaxis] - np.dot(X_mean, coefs)
997+
intercepts = y_offset[:, np.newaxis] - np.dot(X_offset, coefs)
998998
if sparse.issparse(X_test):
999999
n_order, n_features, n_alphas = coefs.shape
10001000
# Work around for sparse matices since coefs is a 3-D numpy array.
@@ -1700,7 +1700,7 @@ def fit(self, X, y):
17001700
raise ValueError("X and y have inconsistent dimensions (%d != %d)"
17011701
% (n_samples, y.shape[0]))
17021702

1703-
X, y, X_mean, y_mean, X_std = _preprocess_data(
1703+
X, y, X_offset, y_offset, X_scale = _preprocess_data(
17041704
X, y, self.fit_intercept, self.normalize, copy=False)
17051705

17061706
if not self.warm_start or self.coef_ is None:
@@ -1721,7 +1721,7 @@ def fit(self, X, y):
17211721
self.coef_, l1_reg, l2_reg, X, y, self.max_iter, self.tol,
17221722
check_random_state(self.random_state), random)
17231723

1724-
self._set_intercept(X_mean, y_mean, X_std)
1724+
self._set_intercept(X_offset, y_offset, X_scale)
17251725

17261726
if self.dual_gap_ > self.eps_:
17271727
warnings.warn('Objective did not converge, you might want'

sklearn/linear_model/least_angle.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,7 @@ def fit(self, X, y, Xy=None):
635635
X, y = check_X_y(X, y, y_numeric=True, multi_output=True)
636636
n_features = X.shape[1]
637637

638-
X, y, X_mean, y_mean, X_std = self._preprocess_data(X, y,
638+
X, y, X_offset, y_offset, X_scale = self._preprocess_data(X, y,
639639
self.fit_intercept,
640640
self.normalize,
641641
self.copy_X)
@@ -702,7 +702,7 @@ def fit(self, X, y, Xy=None):
702702
if n_targets == 1:
703703
self.alphas_ = self.alphas_[0]
704704
self.n_iter_ = self.n_iter_[0]
705-
self._set_intercept(X_mean, y_mean, X_std)
705+
self._set_intercept(X_offset, y_offset, X_scale)
706706
return self
707707

708708

sklearn/linear_model/omp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ def fit(self, X, y):
636636
X, y = check_X_y(X, y, multi_output=True, y_numeric=True)
637637
n_features = X.shape[1]
638638

639-
X, y, X_mean, y_mean, X_std, Gram, Xy = \
639+
X, y, X_offset, y_offset, X_scale, Gram, Xy = \
640640
_pre_fit(X, y, None, self.precompute, self.normalize,
641641
self.fit_intercept, copy=True)
642642

@@ -664,7 +664,7 @@ def fit(self, X, y):
664664
copy_Gram=True, copy_Xy=True,
665665
return_n_iter=True)
666666
self.coef_ = coef_.T
667-
self._set_intercept(X_mean, y_mean, X_std)
667+
self._set_intercept(X_offset, y_offset, X_scale)
668668
return self
669669

670670

sklearn/linear_model/randomized_l1.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def fit(self, X, y):
9494
X = as_float_array(X, copy=False)
9595
n_samples, n_features = X.shape
9696

97-
X, y, X_mean, y_mean, X_std = \
97+
X, y, X_offset, y_offset, X_scale = \
9898
self._preprocess_data(X, y, self.fit_intercept, self.normalize)
9999

100100
estimator_func, params = self._make_estimator_and_params(X, y)
@@ -515,9 +515,9 @@ def _make_estimator_and_params(self, X, y):
515515

516516
def _preprocess_data(self, X, y, fit_intercept, normalize=False):
517517
"""Center the data in X but not in y"""
518-
X, _, Xmean, _, X_std = _preprocess_data(X, y, fit_intercept,
519-
normalize=normalize)
520-
return X, y, Xmean, y, X_std
518+
X, _, X_offset, _, X_scale = _preprocess_data(X, y, fit_intercept,
519+
normalize=normalize)
520+
return X, y, X_offset, y, X_scale
521521

522522

523523
###############################################################################

sklearn/linear_model/ridge.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ def fit(self, X, y, sample_weight=None):
463463
np.atleast_1d(sample_weight).ndim > 1):
464464
raise ValueError("Sample weights must be 1D array or scalar")
465465

466-
X, y, X_mean, y_mean, X_std = self._preprocess_data(
466+
X, y, X_offset, y_offset, X_scale = self._preprocess_data(
467467
X, y, self.fit_intercept, self.normalize, self.copy_X,
468468
sample_weight=sample_weight)
469469

@@ -474,14 +474,14 @@ def fit(self, X, y, sample_weight=None):
474474
max_iter=self.max_iter, tol=self.tol, solver=self.solver,
475475
random_state=self.random_state, return_n_iter=True,
476476
return_intercept=True)
477-
self.intercept_ += y_mean
477+
self.intercept_ += y_offset
478478
else:
479479
self.coef_, self.n_iter_ = ridge_regression(
480480
X, y, alpha=self.alpha, sample_weight=sample_weight,
481481
max_iter=self.max_iter, tol=self.tol, solver=self.solver,
482482
random_state=self.random_state, return_n_iter=True,
483483
return_intercept=False)
484-
self._set_intercept(X_mean, y_mean, X_std)
484+
self._set_intercept(X_offset, y_offset, X_scale)
485485

486486
return self
487487

@@ -935,7 +935,7 @@ def fit(self, X, y, sample_weight=None):
935935

936936
n_samples, n_features = X.shape
937937

938-
X, y, X_mean, y_mean, X_std = LinearModel._preprocess_data(
938+
X, y, X_offset, y_offset, X_scale = LinearModel._preprocess_data(
939939
X, y, self.fit_intercept, self.normalize, self.copy_X,
940940
sample_weight=sample_weight)
941941

@@ -1003,7 +1003,7 @@ def identity_estimator():
10031003
self.dual_coef_ = C[best]
10041004
self.coef_ = safe_sparse_dot(self.dual_coef_.T, X)
10051005

1006-
self._set_intercept(X_mean, y_mean, X_std)
1006+
self._set_intercept(X_offset, y_offset, X_scale)
10071007

10081008
if self.store_cv_values:
10091009
if len(y.shape) == 1:

0 commit comments

Comments
 (0)
0