scikit-learn
diff --git a/‎sklearn/linear_model/base.py
Lines changed: 39 additions & 37 deletions b/‎sklearn/linear_model/base.py
Lines changed: 39 additions & 37 deletions
diff --git a/‎sklearn/linear_model/bayes.py
Lines changed: 4 additions & 4 deletions b/‎sklearn/linear_model/bayes.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎sklearn/linear_model/coordinate_descent.py
Lines changed: 18 additions & 18 deletions b/‎sklearn/linear_model/coordinate_descent.py
Lines changed: 18 additions & 18 deletions
diff --git a/‎sklearn/linear_model/least_angle.py
Lines changed: 2 additions & 2 deletions b/‎sklearn/linear_model/least_angle.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎sklearn/linear_model/omp.py
Lines changed: 2 additions & 2 deletions b/‎sklearn/linear_model/omp.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎sklearn/linear_model/randomized_l1.py
Lines changed: 4 additions & 4 deletions b/‎sklearn/linear_model/randomized_l1.py
Lines changed: 4 additions & 4 deletions
@@ -85,7 +85,7 @@ def sparse_center_data(X, y, fit_intercept, normalize=False):
         else:
             X = sp.csc_matrix(X, copy=normalize, dtype=np.float64)
 
-        X_mean, X_var = mean_variance_axis(X, axis=0)
+        X_offset, X_var = mean_variance_axis(X, axis=0)
         if normalize:
             # transform variance to std in-place
             X_var *= X.shape[0]
@@ -95,14 +95,14 @@ def sparse_center_data(X, y, fit_intercept, normalize=False):
             inplace_column_scale(X, 1. / X_std)
         else:
             X_std = np.ones(X.shape[1])
-        y_mean = y.mean(axis=0)
-        y = y - y_mean
+        y_offset = y.mean(axis=0)
+        y = y - y_offset
     else:
-        X_mean = np.zeros(X.shape[1])
+        X_offset = np.zeros(X.shape[1])
         X_std = np.ones(X.shape[1])
-        y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
+        y_offset = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
 
-    return X, y, X_mean, y_mean, X_std
+    return X, y, X_offset, y_offset, X_std
 
 
 @deprecated("center_data will be removed in "
@@ -120,24 +120,25 @@ def center_data(X, y, fit_intercept, normalize=False, copy=True,
         if isinstance(sample_weight, numbers.Number):
             sample_weight = None
         if sp.issparse(X):
-            X_mean = np.zeros(X.shape[1])
+            X_offset = np.zeros(X.shape[1])
             X_std = np.ones(X.shape[1])
         else:
-            X_mean = np.average(X, axis=0, weights=sample_weight)
-            X -= X_mean
+            X_offset = np.average(X, axis=0, weights=sample_weight)
+            X -= X_offset
+            # XXX: currently scaled to variance=n_samples
             if normalize:
                 X_std = np.sqrt(np.sum(X ** 2, axis=0))
                 X_std[X_std == 0] = 1
                 X /= X_std
             else:
                 X_std = np.ones(X.shape[1])
-        y_mean = np.average(y, axis=0, weights=sample_weight)
-        y = y - y_mean
+        y_offset = np.average(y, axis=0, weights=sample_weight)
+        y = y - y_offset
     else:
-        X_mean = np.zeros(X.shape[1])
+        X_offset = np.zeros(X.shape[1])
         X_std = np.ones(X.shape[1])
-        y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
-    return X, y, X_mean, y_mean, X_std
+        y_offset = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
+    return X, y, X_offset, y_offset, X_std
 
 
 def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
@@ -150,10 +151,11 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
 
         X = (X - X_offset) / X_scale
 
-    If sample_weight is not None, then the weighted mean of X and y
-    is zero, and not the mean itself. If return_mean=True, the mean, eventually
-    weighted, is returned, independently of whether X was centered (option used
-    for optimization with sparse data in coordinate_descend).
+    X_scale is the L2 norm of X - X_offset. If sample_weight is not None,
+    then the weighted mean of X and y is zero, and not the mean itself. If
+    return_mean=True, the mean, eventually weighted, is returned, independently
+    of whether X was centered (option used for optimization with sparse data in
+    coordinate_descend).
 
     This is here because nearly all linear models will want their data to be
     centered.
@@ -210,11 +212,11 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
 
 def _rescale_data(X, y, sample_weight):
     """Rescale data so as to support sample_weight"""
-    sample_weight = sample_weight * np.ones(y.shape[0])
+    n_samples = X.shape[0]
+    sample_weight = sample_weight * np.ones(n_samples)
     sample_weight = np.sqrt(sample_weight)
-    sw_matrix = np.diag(sample_weight)
-    if sp.issparse(X) or sp.issparse(y):
-        sw_matrix = sparse.dia_matrix(sw_matrix)
+    sw_matrix = sparse.dia_matrix((sample_weight, 0),
+                                  shape=(n_samples, n_samples))
     X = safe_sparse_dot(sw_matrix, X)
     y = safe_sparse_dot(sw_matrix, y)
     return X, y
@@ -267,12 +269,12 @@ def predict(self, X):
 
     _preprocess_data = staticmethod(_preprocess_data)
 
-    def _set_intercept(self, X_mean, y_mean, X_norm):
+    def _set_intercept(self, X_offset, y_offset, X_scale):
         """Set the intercept_
         """
         if self.fit_intercept:
-            self.coef_ = self.coef_ / X_norm
-            self.intercept_ = y_mean - np.dot(X_mean, self.coef_.T)
+            self.coef_ = self.coef_ / X_scale
+            self.intercept_ = y_offset - np.dot(X_offset, self.coef_.T)
         else:
             self.intercept_ = 0.
 
@@ -425,11 +427,11 @@ class LinearRegression(LinearModel, RegressorMixin):
 
     normalize : boolean, optional, default False
         If True, the regressors X will be normalized before regression.
-        When the regressors are normalized, the fitted `coef_` are the same
-        independently of the number of training samples; hence, hyperparameters
-        learnt by cross-validation will be compatible among different training
-        and validation sets. The same property is not valid for standardized
-        data. However, if you wish to standardize, please use
+        This parameter is ignored when `fit_intercept` is set to `False`.
+        When the regressors are normalized, note that this makes the
+        hyperparameters learnt more robust and almost independent of the number
+        of samples. The same property is not valid for standardized data.
+        However, if you wish to standardize, please use
         `preprocessing.StandardScaler` before calling `fit` on an estimator
         with `normalize=False`.
 
@@ -510,7 +512,7 @@ def fit(self, X, y, sample_weight=None):
         if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:
             raise ValueError("Sample weights must be 1D array or scalar")
 
-        X, y, X_mean, y_mean, X_norm = self._preprocess_data(
+        X, y, X_offset, y_offset, X_scale = self._preprocess_data(
             X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
             copy=self.copy_X, sample_weight=sample_weight)
 
@@ -537,7 +539,7 @@ def fit(self, X, y, sample_weight=None):
 
         if y.ndim == 1:
             self.coef_ = np.ravel(self.coef_)
-        self._set_intercept(X_mean, y_mean, X_norm)
+        self._set_intercept(X_offset, y_offset, X_scale)
         return self
 
 
@@ -547,16 +549,16 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy):
 
     if sparse.isspmatrix(X):
         precompute = False
-        X, y, X_mean, y_mean, X_norm = _preprocess_data(
+        X, y, X_offset, y_offset, X_scale = _preprocess_data(
             X, y, fit_intercept=fit_intercept, normalize=normalize,
             return_mean=True)
     else:
         # copy was done in fit if necessary
-        X, y, X_mean, y_mean, X_norm = _preprocess_data(
+        X, y, X_offset, y_offset, X_scale = _preprocess_data(
             X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy)
     if hasattr(precompute, '__array__') and (
-            fit_intercept and not np.allclose(X_mean, np.zeros(n_features)) or
-            normalize and not np.allclose(X_norm, np.ones(n_features))):
+            fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or
+            normalize and not np.allclose(X_scale, np.ones(n_features))):
         warnings.warn("Gram matrix was provided but X was centered"
                       " to fit intercept, "
                       "or X was normalized : recomputing Gram matrix.",
@@ -593,4 +595,4 @@ def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy):
                           order='F')
             np.dot(y.T, X, out=Xy.T)
 
-    return X, y, X_mean, y_mean, X_norm, precompute, Xy
+    return X, y, X_offset, y_offset, X_scale, precompute, Xy
@@ -142,7 +142,7 @@ def fit(self, X, y):
         self : returns an instance of self.
         """
         X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)
-        X, y, X_mean, y_mean, X_std = self._preprocess_data(
+        X, y, X_offset, y_offset, X_scale = self._preprocess_data(
             X, y, self.fit_intercept, self.normalize, self.copy_X)
         n_samples, n_features = X.shape
 
@@ -217,7 +217,7 @@ def fit(self, X, y):
         self.lambda_ = lambda_
         self.coef_ = coef_
 
-        self._set_intercept(X_mean, y_mean, X_std)
+        self._set_intercept(X_offset, y_offset, X_scale)
         return self
 
 
@@ -365,7 +365,7 @@ def fit(self, X, y):
         n_samples, n_features = X.shape
         coef_ = np.zeros(n_features)
 
-        X, y, X_mean, y_mean, X_std = self._preprocess_data(
+        X, y, X_offset, y_offset, X_scale = self._preprocess_data(
             X, y, self.fit_intercept, self.normalize, self.copy_X)
 
         # Launch the convergence loop
@@ -432,5 +432,5 @@ def fit(self, X, y):
         self.alpha_ = alpha_
         self.sigma_ = sigma_
         self.lambda_ = lambda_
-        self._set_intercept(X_mean, y_mean, X_std)
+        self._set_intercept(X_offset, y_offset, X_scale)
         return self
@@ -94,10 +94,10 @@ def _alpha_grid(X, y, Xy=None, l1_ratio=1.0, fit_intercept=True,
         if sparse_center:
             # Workaround to find alpha_max for sparse matrices.
             # since we should not destroy the sparsity of such matrices.
-            _, _, X_mean, _, X_std = _preprocess_data(X, y, fit_intercept,
+            _, _, X_offset, _, X_scale = _preprocess_data(X, y, fit_intercept,
                                                       normalize,
                                                       return_mean=True)
-            mean_dot = X_mean * np.sum(y)
+            mean_dot = X_offset * np.sum(y)
 
     if Xy.ndim == 1:
         Xy = Xy[:, np.newaxis]
@@ -106,7 +106,7 @@ def _alpha_grid(X, y, Xy=None, l1_ratio=1.0, fit_intercept=True,
         if fit_intercept:
             Xy -= mean_dot[:, np.newaxis]
         if normalize:
-            Xy /= X_std[:, np.newaxis]
+            Xy /= X_scale[:, np.newaxis]
 
     alpha_max = (np.sqrt(np.sum(Xy ** 2, axis=1)).max() /
                  (n_samples * l1_ratio))
@@ -391,17 +391,17 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
 
     # MultiTaskElasticNet does not support sparse matrices
     if not multi_output and sparse.isspmatrix(X):
-        if 'X_mean' in params:
+        if 'X_offset' in params:
             # As sparse matrices are not actually centered we need this
             # to be passed to the CD solver.
-            X_sparse_scaling = params['X_mean'] / params['X_std']
+            X_sparse_scaling = params['X_offset'] / params['X_scale']
         else:
             X_sparse_scaling = np.zeros(n_features)
 
     # X should be normalized and fit already if function is called
     # from ElasticNet.fit
     if check_input:
-        X, y, X_mean, y_mean, X_std, precompute, Xy = \
+        X, y, X_offset, y_offset, X_scale, precompute, Xy = \
             _pre_fit(X, y, Xy, precompute, normalize=False,
                      fit_intercept=False, copy=False)
     if alphas is None:
@@ -673,7 +673,7 @@ def fit(self, X, y, check_input=True):
                              multi_output=True, y_numeric=True)
             y = check_array(y, dtype=np.float64, order='F', copy=False,
                             ensure_2d=False)
-        X, y, X_mean, y_mean, X_std, precompute, Xy = \
+        X, y, X_offset, y_offset, X_scale, precompute, Xy = \
             _pre_fit(X, y, None, self.precompute, self.normalize,
                      self.fit_intercept, copy=False)
         if y.ndim == 1:
@@ -710,7 +710,7 @@ def fit(self, X, y, check_input=True):
                           precompute=precompute, Xy=this_Xy,
                           fit_intercept=False, normalize=False, copy_X=True,
                           verbose=False, tol=self.tol, positive=self.positive,
-                          X_mean=X_mean, X_std=X_std, return_n_iter=True,
+                          X_offset=X_offset, X_scale=X_scale, return_n_iter=True,
                           coef_init=coef_[k], max_iter=self.max_iter,
                           random_state=self.random_state,
                           selection=self.selection,
@@ -723,7 +723,7 @@ def fit(self, X, y, check_input=True):
             self.n_iter_ = self.n_iter_[0]
 
         self.coef_, self.dual_gap_ = map(np.squeeze, [coef_, dual_gaps_])
-        self._set_intercept(X_mean, y_mean, X_std)
+        self._set_intercept(X_offset, y_offset, X_scale)
 
         # return self for chaining fit and predict calls
         return self
@@ -963,14 +963,14 @@ def _path_residuals(X, y, train, test, path, path_params, alphas=None,
         # Fall back to default enet_multitask
         precompute = False
 
-    X_train, y_train, X_mean, y_mean, X_std, precompute, Xy = \
+    X_train, y_train, X_offset, y_offset, X_scale, precompute, Xy = \
         _pre_fit(X_train, y_train, None, precompute, normalize, fit_intercept,
                  copy=False)
 
     path_params = path_params.copy()
     path_params['Xy'] = Xy
-    path_params['X_mean'] = X_mean
-    path_params['X_std'] = X_std
+    path_params['X_offset'] = X_offset
+    path_params['X_scale'] = X_scale
     path_params['precompute'] = precompute
     path_params['copy_X'] = False
     path_params['alphas'] = alphas
@@ -987,14 +987,14 @@ def _path_residuals(X, y, train, test, path, path_params, alphas=None,
     if y.ndim == 1:
         # Doing this so that it becomes coherent with multioutput.
         coefs = coefs[np.newaxis, :, :]
-        y_mean = np.atleast_1d(y_mean)
+        y_offset = np.atleast_1d(y_offset)
         y_test = y_test[:, np.newaxis]
 
     if normalize:
-        nonzeros = np.flatnonzero(X_std)
-        coefs[:, nonzeros] /= X_std[nonzeros][:, np.newaxis]
+        nonzeros = np.flatnonzero(X_scale)
+        coefs[:, nonzeros] /= X_scale[nonzeros][:, np.newaxis]
 
-    intercepts = y_mean[:, np.newaxis] - np.dot(X_mean, coefs)
+    intercepts = y_offset[:, np.newaxis] - np.dot(X_offset, coefs)
     if sparse.issparse(X_test):
         n_order, n_features, n_alphas = coefs.shape
         # Work around for sparse matices since coefs is a 3-D numpy array.
@@ -1700,7 +1700,7 @@ def fit(self, X, y):
             raise ValueError("X and y have inconsistent dimensions (%d != %d)"
                              % (n_samples, y.shape[0]))
 
-        X, y, X_mean, y_mean, X_std = _preprocess_data(
+        X, y, X_offset, y_offset, X_scale = _preprocess_data(
             X, y, self.fit_intercept, self.normalize, copy=False)
 
         if not self.warm_start or self.coef_ is None:
@@ -1721,7 +1721,7 @@ def fit(self, X, y):
                 self.coef_, l1_reg, l2
F438
_reg, X, y, self.max_iter, self.tol,
                 check_random_state(self.random_state), random)
 
-        self._set_intercept(X_mean, y_mean, X_std)
+        self._set_intercept(X_offset, y_offset, X_scale)
 
         if self.dual_gap_ > self.eps_:
             warnings.warn('Objective did not converge, you might want'
 
@@ -635,7 +635,7 @@ def fit(self, X, y, Xy=None):
         X, y = check_X_y(X, y, y_numeric=True, multi_output=True)
         n_features = X.shape[1]
 
-        X, y, X_mean, y_mean, X_std = self._preprocess_data(X, y,
+        X, y, X_offset, y_offset, X_scale = self._preprocess_data(X, y,
                                                         self.fit_intercept,
                                                         self.normalize,
                                                         self.copy_X)
@@ -702,7 +702,7 @@ def fit(self, X, y, Xy=None):
             if n_targets == 1:
                 self.alphas_ = self.alphas_[0]
                 self.n_iter_ = self.n_iter_[0]
-        self._set_intercept(X_mean, y_mean, X_std)
+        self._set_intercept(X_offset, y_offset, X_scale)
         return self
 
 
 
@@ -636,7 +636,7 @@ def fit(self, X, y):
         X, y = check_X_y(X, y, multi_output=True, y_numeric=True)
         n_features = X.shape[1]
 
-        X, y, X_mean, y_mean, X_std, Gram, Xy = \
+        X, y, X_offset, y_offset, X_scale, Gram, Xy = \
             _pre_fit(X, y, None, self.precompute, self.normalize,
                      self.fit_intercept, copy=True)
 
@@ -664,7 +664,7 @@ def fit(self, X, y):
                 copy_Gram=True, copy_Xy=True,
                 return_n_iter=True)
         self.coef_ = coef_.T
-        self._set_intercept(X_mean, y_mean, X_std)
+        self._set_intercept(X_offset, y_offset, X_scale)
         return self
 
 
 
@@ -94,7 +94,7 @@ def fit(self, X, y):
         X = as_float_array(X, copy=False)
         n_samples, n_features = X.shape
 
-        X, y, X_mean, y_mean, X_std = \
+        X, y, X_offset, y_offset, X_scale = \
             self._preprocess_data(X, y, self.fit_intercept, self.normalize)
 
         estimator_func, params = self._make_estimator_and_params(X, y)
@@ -515,9 +515,9 @@ def _make_estimator_and_params(self, X, y):
 
     def _preprocess_data(self, X, y, fit_intercept, normalize=False):
         """Center the data in X but not in y"""
-        X, _, Xmean, _, X_std = _preprocess_data(X, y, fit_intercept,
-                                                 normalize=normalize)
-        return X, y, Xmean, y, X_std
+        X, _, X_offset, _, X_scale = _preprocess_data(X, y, fit_intercept,
+                                                      normalize=normalize)
+        return X, y, X_offset, y, X_scale
 
 
 ###############################################################################