scikit-learn
diff --git a/‎sklearn/linear_model/base.py
Lines changed: 120 additions & 30 deletions b/‎sklearn/linear_model/base.py
Lines changed: 120 additions & 30 deletions
diff --git a/‎sklearn/linear_model/bayes.py
Lines changed: 38 additions & 38 deletions b/‎sklearn/linear_model/bayes.py
Lines changed: 38 additions & 38 deletions
@@ -34,13 +34,10 @@
 from ..utils.seq_dataset import ArrayDataset, CSRDataset
 
 
-###
-### TODO: intercept for all models
-### We should define a common function to center data instead of
-### repeating the same code inside each fit method.
+# TODO: intercept for all models
 
-### TODO: bayesian_ridge_regression and bayesian_regression_ard
-### should be squashed into its respective objects.
+# TODO: bayesian_ridge_regression and bayesian_regression_ard
+# should be squashed into its respective objects.
 
 SPARSE_INTERCEPT_DECAY = 0.01
 # For sparse data intercept updates are scaled by this decay factor to avoid
@@ -69,12 +66,9 @@ def make_dataset(X, y, sample_weight, random_state=None):
     return dataset, intercept_decay
 
 
-def sparse_center_data(X, y, fit_intercept, normalize=False):
-    """
-    Compute information needed to center data to have mean zero along
-    axis 0. Be aware that X will not be centered since it would break
-    the sparsity, but will be normalized if asked so.
-    """
+# TODO: this reproduces the behavior prior 0.17
+# Must be remove in 0.19
+def _sparse_center_data(X, y, fit_intercept, normalize=None):
     if fit_intercept:
         # we might require not to change the csr matrix sometimes
         # store a copy if normalize is True.
@@ -106,15 +100,96 @@ def sparse_center_data(X, y, fit_intercept, normalize=False):
     return X, y, X_mean, y_mean, X_std
 
 
-def center_data(X, y, fit_intercept, normalize=False, copy=True,
-                sample_weight=None):
+def sparse_center_data(X, y, fit_intercept, standardize=False,
+                       normalize=None):
+    """
+    Compute information needed to center data to have mean zero along
+    axis 0. Be aware that X will not be centered since it would break
+    the sparsity, but will be standardized if asked so.
+    """
+    if normalize is not None:
+        warnings.warn("The `normalize` parameter is not in use anymore from "
+                      "version 0.17 and will be removed in 0.19. If you want "
+                      "to standardize the data instead, use"
+                      "`standardize=True`", DeprecationWarning)
+        return _sparse_center_data(X, y, fit_intercept, normalize)
+
+    if fit_intercept:
+        # we might require not to change the csr matrix sometimes
+        # store a copy if standardize is True.
+        # Change dtype to float64 since mean_variance_axis accepts
+        # it that way.
+        if sp.isspmatrix(X) and X.getformat() == 'csr':
+            X = sp.csr_matrix(X, copy=standardize, dtype=np.float64)
+        else:
+            X = sp.csc_matrix(X, copy=standardize, dtype=np.float64)
+
+        X_mean, X_var = mean_variance_axis(X, axis=0)
+        if standardize:
+            # transform variance to std in-place
+            X_std = np.sqrt(X_var, X_var)
+            del X_var
+            X_std[X_std == 0] = 1
+            inplace_column_scale(X, 1. / X_std)
+        else:
+            X_std = np.ones(X.shape[1])
+        y_mean = y.mean(axis=0)
+        y = y - y_mean
+    else:
+        X_mean = np.zeros(X.shape[1])
+        X_std = np.ones(X.shape[1])
+        y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
+
+    return X, y, X_mean, y_mean, X_std
+
+
+# TODO: this reproduces the behavior prior 0.17
+# Must be remove in 0.19
+def _center_data(X, y, fit_intercept, normalize=False, copy=True,
+                 sample_weight=None):
+
+    X = as_float_array(X, copy)
+    if fit_intercept:
+        if isinstance(sample_weight, numbers.Number):
+            sample_weight = None
+        if sp.issparse(X):
+            X_mean = np.zeros(X.shape[1])
+            X_std = np.ones(X.shape[1])
+        else:
+            X_mean = np.average(X, axis=0, weights=sample_weight)
+            X -= X_mean
+            if normalize:
+                # XXX: currently scaled to variance=n_samples
+                X_std = np.sqrt(np.sum(X ** 2, axis=0))
+                X_std[X_std == 0] = 1
+                X /= X_std
+            else:
+                X_std = np.ones(X.shape[1])
+        y_mean = np.average(y, axis=0, weights=sample_weight)
+        y = y - y_mean
+    else:
+        X_mean = np.zeros(X.shape[1])
+        X_std = np.ones(X.shape[1])
+        y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
+    return X, y, X_mean, y_mean, X_std
+
+
+def center_data(X, y, fit_intercept, standardize=False, normalize=None,
+                copy=True, sample_weight=None):
     """
     Centers data to have mean zero along axis 0. This is here because
     nearly all linear models will want their data to be centered.
 
     If sample_weight is not None, then the weighted mean of X and y
     is zero, and not the mean itself
     """
+    if normalize is not None:
+        warnings.warn("The `normalize` parameter is not in use anymore from "
+                      "version 0.17 and will be removed in 0.19. If you want "
+                      "to standardize the data instead, use"
+                      "`standardize=True`", DeprecationWarning)
+        return _center_data(X, y, fit_intercept, normalize, copy, sample_weight)
+
     X = as_float_array(X, copy)
     if fit_intercept:
         if isinstance(sample_weight, numbers.Number):
@@ -125,9 +200,8 @@ def center_data(X, y, fit_intercept, normalize=False, copy=True,
         else:
             X_mean = np.average(X, axis=0, weights=sample_weight)
             X -= X_mean
-            if normalize:
-                # XXX: currently scaled to variance=n_samples
-                X_std = np.sqrt(np.sum(X ** 2, axis=0))
+            if standardize:
+                X_std = np.sqrt(np.mean(X ** 2, axis=0))
                 X_std[X_std == 0] = 1
                 X /= X_std
             else:
@@ -356,8 +430,8 @@ class LinearRegression(LinearModel, RegressorMixin):
         to false, no intercept will be used in calculations
         (e.g. data is expected to be already centered).
 
-    normalize : boolean, optional, default False
-        If True, the regressors X will be normalized before regression.
+    standardize : boolean, optional, default False
+        If True, the regressors X will be standardized before regression.
 
     copy_X : boolean, optional, default True
         If True, X will be copied; else, it may be overwritten.
@@ -385,13 +459,26 @@ class LinearRegression(LinearModel, RegressorMixin):
 
     """
 
-    def __init__(self, fit_intercept=True, normalize=False, copy_X=True,
-                 n_jobs=1):
+    def __init__(self, fit_intercept=True, standardize=False, normalize=None,
+                 copy_X=True, n_jobs=1):
+        if normalize is not None:
+            warnings.warn("The `normalize` parameter is not in use anymore "
+                          "from version 0.17 and will be removed in 0.19. If "
+                          "you want the data to be standardized instead, use "
+                          "`standardize=True`", DeprecationWarning)
         self.fit_intercept = fit_intercept
-        self.normalize = normalize
+        self.standardize = standardize
         self.copy_X = copy_X
         self.n_jobs = n_jobs
 
+    @property
+    @deprecated("The `normalize` attribute is not in use anymore "
+                "from version 0.17 and will be removed in 0.19. If "
+                "you want the data to be standardized instead, use "
+                "`standardize=True`")
+    def normalize(self):
+        return None
+
     def fit(self, X, y, sample_weight=None):
         """
         Fit linear model.
@@ -416,11 +503,13 @@ def fit(self, X, y, sample_weight=None):
         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
                          y_numeric=True, multi_output=True)
 
-        if ((sample_weight is not None) and np.atleast_1d(sample_weight).ndim > 1):
+        if ((sample_weight is not None) and
+           np.atleast_1d(sample_weight).ndim > 1):
             sample_weight = column_or_1d(sample_weight, warn=True)
 
         X, y, X_mean, y_mean, X_std = self._center_data(
-            X, y, self.fit_intercept, self.normalize, self.copy_X,
+            X, y, fit_intercept=self.fit_intercept,
+            standardize=self.standardize, copy=self.copy_X,
             sample_weight=sample_weight)
 
         if sample_weight is not None:
@@ -450,24 +539,25 @@ def fit(self, X, y, sample_weight=None):
         return self
 
 
-def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy):
+def _pre_fit(X, y, Xy, precompute, standardize, fit_intercept, copy):
     """Aux function used at beginning of fit in linear models"""
     n_samples, n_features = X.shape
 
     if sparse.isspmatrix(X):
         precompute = False
         X, y, X_mean, y_mean, X_std = sparse_center_data(
-            X, y, fit_intercept, normalize)
+            X, y, fit_intercept=fit_intercept, standardize=standardize)
     else:
         # copy was done in fit if necessary
         X, y, X_mean, y_mean, X_std = center_data(
-            X, y, fit_intercept, normalize, copy=copy)
+            X, y, fit_intercept=fit_intercept, standardize=standardize,
+            copy=copy)
     if hasattr(precompute, '__array__') and (
-            fit_intercept and not np.allclose(X_mean, np.zeros(n_features))
-            or normalize and not np.allclose(X_std, np.ones(n_features))):
+            fit_intercept and not np.allclose(X_mean, np.zeros(n_features)) or
+            standardize and not np.allclose(X_std, np.ones(n_features))):
         warnings.warn("Gram matrix was provided but X was centered"
                       " to fit intercept, "
-                      "or X was normalized : recomputing Gram matrix.",
+                      "or X was standardized : recomputing Gram matrix.",
                       UserWarning)
         # recompute Gram
         precompute = 'auto'
 
@@ -139,7 +139,7 @@ def fit(self, X, y):
             X, y, self.fit_intercept, self.normalize, self.copy_X)
         n_samples, n_features = X.shape
 
-        ### Initialization of the values of the parameters
+        # Initialization of the values of the parameters
         alpha_ = 1. / np.var(y)
         lambda_ = 1.
 
@@ -156,10 +156,10 @@ def fit(self, X, y):
         U, S, Vh = linalg.svd(X, full_matrices=False)
         eigen_vals_ = S ** 2
 
-        ### Convergence loop of the bayesian ridge regression
+        # Convergence loop of the bayesian ridge regression
         for iter_ in range(self.n_iter):
 
-            ### Compute mu and sigma
+            # Compute mu and sigma
             # sigma_ = lambda_ / alpha_ * np.eye(n_features) + np.dot(X.T, X)
             # coef_ = sigma_^-1 * XT * y
             if n_samples > n_features:
@@ -178,28 +178,28 @@ def fit(self, X, y):
                     logdet_sigma_[:n_samples] += alpha_ * eigen_vals_
                     logdet_sigma_ = - np.sum(np.log(logdet_sigma_))
 
-            ### Update alpha and lambda
+            # Update alpha and lambda
             rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)
-            gamma_ = (np.sum((alpha_ * eigen_vals_)
-                      / (lambda_ + alpha_ * eigen_vals_)))
-            lambda_ = ((gamma_ + 2 * lambda_1)
-                       / (np.sum(coef_ ** 2) + 2 * lambda_2))
-            alpha_ = ((n_samples - gamma_ + 2 * alpha_1)
-                      / (rmse_ + 2 * alpha_2))
-
-            ### Compute the objective function
+            gamma_ = (np.sum((alpha_ * eigen_vals_) /
+                      (lambda_ + alpha_ * eigen_vals_)))
+            lambda_ = ((gamma_ + 2 * lambda_1) /
+                       (np.sum(coef_ ** 2) + 2 * lambda_2))
+            alpha_ = ((n_samples - gamma_ + 2 * alpha_1) /
+                      (rmse_ + 2 * alpha_2))
+
+            # Compute the objective function
             if self.compute_score:
                 s = lambda_1 * log(lambda_) - lambda_2 * lambda_
                 s += alpha_1 * log(alpha_) - alpha_2 * alpha_
-                s += 0.5 * (n_features * log(lambda_)
-                            + n_samples * log(alpha_)
-                            - alpha_ * rmse_
-                            - (lambda_ * np.sum(coef_ ** 2))
-                            - logdet_sigma_
-                            - n_samples * log(2 * np.pi))
+                s += 0.5 * (n_features * log(lambda_) +
+                            n_samples * log(alpha_) -
+                            alpha_ * rmse_ -
+                            (lambda_ * np.sum(coef_ ** 2)) -
+                            logdet_sigma_ -
+                            n_samples * log(2 * np.pi))
                 self.scores_.append(s)
 
-            ### Check for convergence
+            # Check for convergence
             if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:
                 if verbose:
                     print("Convergence after ", str(iter_), " iterations")
@@ -354,7 +354,7 @@ def fit(self, X, y):
         X, y, X_mean, y_mean, X_std = self._center_data(
             X, y, self.fit_intercept, self.normalize, self.copy_X)
 
-        ### Launch the convergence loop
+        # Launch the convergence loop
         keep_lambda = np.ones(n_features, dtype=bool)
 
         lambda_1 = self.lambda_1
@@ -363,51 +363,51 @@ def fit(self, X, y):
         alpha_2 = self.alpha_2
         verbose = self.verbose
 
-        ### Initialization of the values of the parameters
+        # Initialization of the values of the parameters
         alpha_ = 1. / np.var(y)
         lambda_ = np.ones(n_features)
 
         self.scores_ = list()
         coef_old_ = None
 
-        ### Iterative procedure of ARDRegression
+        # Iterative procedure of ARDRegression
         for iter_ in range(self.n_iter):
-            ### Compute mu and sigma (using Woodbury matrix identity)
+            # Compute mu and sigma (using Woodbury matrix identity)
             sigma_ = pinvh(np.eye(n_samples) / alpha_ +
                            np.dot(X[:, keep_lambda] *
                            np.reshape(1. / lambda_[keep_lambda], [1, -1]),
                            X[:, keep_lambda].T))
-            sigma_ = np.dot(sigma_, X[:, keep_lambda]
-                            * np.reshape(1. / lambda_[keep_lambda], [1, -1]))
-            sigma_ = - np.dot(np.reshape(1. / lambda_[keep_lambda], [-1, 1])
-                              * X[:, keep_lambda].T, sigma_)
+            sigma_ = np.dot(sigma_, X[:, keep_lambda] *
+                            np.reshape(1. / lambda_[keep_lambda], [1, -1]))
+            sigma_ = - np.dot(np.reshape(1. / lambda_[keep_lambda], [-1, 1]) *
+                              X[:, keep_lambda].T, sigma_)
             sigma_.flat[::(sigma_.shape[1] + 1)] += 1. / lambda_[keep_lambda]
             coef_[keep_lambda] = alpha_ * np.dot(
                 sigma_, np.dot(X[:, keep_lambda].T, y))
 
-            ### Update alpha and lambda
+            # Update alpha and lambda
             rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)
             gamma_ = 1. - lambda_[keep_lambda] * np.diag(sigma_)
-            lambda_[keep_lambda] = ((gamma_ + 2. * lambda_1)
-                                    / ((coef_[keep_lambda]) ** 2
-                                       + 2. * lambda_2))
-            alpha_ = ((n_samples - gamma_.sum() + 2. * alpha_1)
-                      / (rmse_ + 2. * alpha_2))
+            lambda_[keep_lambda] = ((gamma_ + 2. * lambda_1) /
+                                    ((coef_[keep_lambda]) ** 2 +
+                                     2. * lambda_2))
+            alpha_ = ((n_samples - gamma_.sum() + 2. * alpha_1) /
+                      (rmse_ + 2. * alpha_2))
 
-            ### Prune the weights with a precision over a threshold
+            # Prune the weights with a precision over a threshold
             keep_lambda = lambda_ < self.threshold_lambda
             coef_[~keep_lambda] = 0
 
-            ### Compute the objective function
+            # Compute the objective function
             if self.compute_score:
                 s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum()
                 s += alpha_1 * log(alpha_) - alpha_2 * alpha_
-                s += 0.5 * (fast_logdet(sigma_) + n_samples * log(alpha_)
-                                                + np.sum(np.log(lambda_)))
+                s += 0.5 * (fast_logdet(sigma_) + n_samples * log(alpha_) +
+                            np.sum(np.log(lambda_)))
                 s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_ ** 2).sum())
                 self.scores_.append(s)
 
-            ### Check for convergence
+            # Check for convergence
             if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:
                 if verbose:
                     print("Converged after %s iterations" % iter_)