From d277a43cea35a719f82272d668eaf90dd8b05245 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Wed, 10 Feb 2021 09:54:16 +0100
Subject: [PATCH 01/54] add ridge to the test + fix the test

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index a5acb4aa25da2..b0a88dd330fe8 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -393,7 +393,10 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params):
     "estimator, is_sparse, with_mean",
     [(LinearRegression, True, False),
      (LinearRegression, False, True),
-     (LinearRegression, False, False)]
+     (LinearRegression, False, False),
+     (Ridge, True, False),
+     (Ridge, False, True),
+     (Ridge, False, False)]
 )
 def test_linear_model_sample_weights_normalize_in_pipeline(
         estimator, is_sparse, with_mean
@@ -425,7 +428,9 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
         StandardScaler(with_mean=with_mean),
         estimator(normalize=False)
     )
-    kwargs = {reg_with_scaler.steps[-1][0] + '__sample_weight':
+    kwargs = {reg_with_scaler.steps[0][0] + '__sample_weight':
+              sample_weight,
+              reg_with_scaler.steps[-1][0] + '__sample_weight':
               sample_weight}
     reg_with_scaler.fit(X_train, y_train, **kwargs)
 

From 03695d7cd9b2a06afe6cfca31d2323d625ae9904 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Wed, 10 Feb 2021 10:35:19 +0100
Subject: [PATCH 02/54] changing alpha for ridgh in a pipeline

---
 .../tests/test_coordinate_descent.py          | 48 +++++++++++++------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index b0a88dd330fe8..c94306c161847 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -391,12 +391,13 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params):
 @pytest.mark.filterwarnings("ignore:'normalize' was deprecated")
 @pytest.mark.parametrize(
     "estimator, is_sparse, with_mean",
-    [(LinearRegression, True, False),
-     (LinearRegression, False, True),
-     (LinearRegression, False, False),
-     (Ridge, True, False),
+    [#(LinearRegression, True, False),
+     #(LinearRegression, False, True),
+     #(LinearRegression, False, False),
+     #(Ridge, True, False),
      (Ridge, False, True),
-     (Ridge, False, False)]
+     #(Ridge, False, False)
+     ]
 )
 def test_linear_model_sample_weights_normalize_in_pipeline(
         estimator, is_sparse, with_mean
@@ -406,27 +407,37 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
     # LinearRegression with no normalize in a pipeline with a StandardScaler
     # and set sample_weight.
     rng = np.random.RandomState(0)
-    X, y = make_regression(n_samples=20, n_features=5, noise=1e-2,
-                           random_state=rng)
+    #X, y = make_regression(n_samples=20, n_features=5, noise=1e-2,
+    #                       random_state=rng)
+    n_samples, n_features = 100, 2
+    w = rng.randn(n_features)
+    X = rng.randn(n_samples, n_features)
+    X += 20  # make features non-zero mean
+    y = X.dot(w)  # XXX : should add some intercept
+
+    params = {"solver": 'sparse_cg', 'tol': 1e-12, "alpha": 1.}
     # make sure the data is not centered to make the problem more
     # difficult
-    X += 10
+    #X += 10
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5,
                                                         random_state=rng)
     if is_sparse:
         X_train = sparse.csr_matrix(X_train)
         X_test = _convert_container(X_train, 'sparse')
 
-    sample_weight = rng.rand(X_train.shape[0])
+    sample_weight = 0.1 * rng.rand(X_train.shape[0])
+    new_params = dict(alpha=params['alpha'] * X_train.shape[0])
+
 
     # linear estimator with explicit sample_weight
-    reg_with_normalize = estimator(normalize=True)
+    reg_with_normalize = estimator(normalize=True, fit_intercept=True,
+                                   **params)
     reg_with_normalize.fit(X_train, y_train, sample_weight=sample_weight)
 
     # linear estimator in a pipeline
     reg_with_scaler = make_pipeline(
         StandardScaler(with_mean=with_mean),
-        estimator(normalize=False)
+        estimator(normalize=False, fit_intercept=True, **new_params)
     )
     kwargs = {reg_with_scaler.steps[0][0] + '__sample_weight':
               sample_weight,
@@ -437,10 +448,17 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
     y_pred_norm = reg_with_normalize.predict(X_test)
     y_pred_pip = reg_with_scaler.predict(X_test)
 
-    assert_allclose(
-        reg_with_normalize.coef_ * reg_with_scaler[0].scale_,
-        reg_with_scaler[1].coef_
-    )
+    # assert_allclose(
+    #    reg_with_normalize.coef_ * reg_with_scaler[0].scale_,
+    #     reg_with_scaler[1].coef_
+    #)
+    #assert_allclose(y_pred_norm, y_pred_pip)
+    y_train_mean = np.average(y_train, weights=sample_weight)
+    X_train_mean = np.average(X_train, weights=sample_weight, axis=0)
+    assert reg_with_scaler[1].intercept_ == pytest.approx(y_train_mean)
+    assert (reg_with_normalize.intercept_ ==
+            pytest.approx(y_train_mean -
+                          reg_with_normalize.coef_.dot(X_train_mean)))
     assert_allclose(y_pred_norm, y_pred_pip)
 
 

From 66cf82dcca57bcbad94c287f835f3d98cc0da82b Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Wed, 10 Feb 2021 11:28:47 +0100
Subject: [PATCH 03/54] updated the test to include update in alpha

---
 .../tests/test_coordinate_descent.py          | 69 +++++++++++++++----
 1 file changed, 55 insertions(+), 14 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index c94306c161847..3afd4b318dc31 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -386,7 +386,7 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params):
                           model_normalize.coef_.dot(X_train.mean(0))))
     assert_allclose(y_pred_normalize, y_pred_standardize)
 
-
+'''
 # FIXME: 'normalize' to be removed in 1.2
 @pytest.mark.filterwarnings("ignore:'normalize' was deprecated")
 @pytest.mark.parametrize(
@@ -399,26 +399,52 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params):
      #(Ridge, False, False)
      ]
 )
+'''
+# FIXME: 'normalize' to be removed in 1.2
+@pytest.mark.filterwarnings("ignore:'normalize' was deprecated")
+@pytest.mark.parametrize(
+    "estimator, params",
+    [(Lasso, {"tol": 1e-16, "alpha": 0.1}),
+     # (LassoLars, {"alpha": 0.1}), (unexpected sample_weight)
+     # (RidgeClassifier, {"solver": 'sparse_cg', "alpha": 0.1}),
+     (ElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.1}),
+     (ElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.1}),
+     (Ridge, {"solver": 'sparse_cg', 'tol': 1e-12, "alpha": 0.1}),
+     (BayesianRidge, {}),
+     # (ARDRegression, {}), (unexpected sample_weight)
+     # (OrthogonalMatchingPursuit, {}), (unexpected sample_weight)
+     # (MultiTaskElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.1}), (unexpected sample_weight)
+     # (MultiTaskElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.1}), (unexpected sample_weight)
+     # (MultiTaskLasso, {"tol": 1e-16, "alpha": 0.1}), (unexpected sample_weight)
+     # (Lars, {}), (unexpected sample_weight)
+     (LinearRegression, {}),
+     # (LassoLarsIC, {}) (unexpected sample_weight)
+     ]
+)
+@pytest.mark.parametrize(
+    "is_sparse",
+    [False] #, True]
+)
+@pytest.mark.parametrize(
+    "with_mean",
+    [True, False]
+)
 def test_linear_model_sample_weights_normalize_in_pipeline(
-        estimator, is_sparse, with_mean
+        with_mean, is_sparse, estimator, params
 ):
     # Test that the results for running linear regression LinearRegression with
     # sample_weight set and with normalize set to True gives similar results as
     # LinearRegression with no normalize in a pipeline with a StandardScaler
     # and set sample_weight.
+    model_name = estimator.__name__
+
     rng = np.random.RandomState(0)
-    #X, y = make_regression(n_samples=20, n_features=5, noise=1e-2,
-    #                       random_state=rng)
-    n_samples, n_features = 100, 2
-    w = rng.randn(n_features)
-    X = rng.randn(n_samples, n_features)
-    X += 20  # make features non-zero mean
-    y = X.dot(w)  # XXX : should add some intercept
+    X, y = make_regression(n_samples=20, n_features=5, noise=1e-2,
+                           random_state=rng)
 
-    params = {"solver": 'sparse_cg', 'tol': 1e-12, "alpha": 1.}
     # make sure the data is not centered to make the problem more
     # difficult
-    #X += 10
+    X += 10
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5,
                                                         random_state=rng)
     if is_sparse:
@@ -426,8 +452,6 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
         X_test = _convert_container(X_train, 'sparse')
 
     sample_weight = 0.1 * rng.rand(X_train.shape[0])
-    new_params = dict(alpha=params['alpha'] * X_train.shape[0])
-
 
     # linear estimator with explicit sample_weight
     reg_with_normalize = estimator(normalize=True, fit_intercept=True,
@@ -437,8 +461,25 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
     # linear estimator in a pipeline
     reg_with_scaler = make_pipeline(
         StandardScaler(with_mean=with_mean),
-        estimator(normalize=False, fit_intercept=True, **new_params)
+        estimator(normalize=False, fit_intercept=True, **params)
     )
+    if 'alpha' in params:
+        # reg_with_scaler.set_params(alpha=params['alpha'])
+        if model_name in ['Lasso', 'LassoLars', 'MultiTaskLasso']:
+            new_params = dict(
+                alpha=params['alpha'] * np.sqrt(X_train.shape[0]))
+        if model_name in ['Ridge', 'RidgeClassifier']:
+            new_params = dict(alpha=params['alpha'] * X_train.shape[0])
+    if model_name in ['ElasticNet', 'MultiTaskElasticNet']:
+        if params['l1_ratio'] == 1:
+            new_params = dict(
+                alpha=params['alpha'] * np.sqrt(X_train.shape[0]))
+        if params['l1_ratio'] == 0:
+            new_params = dict(alpha=params['alpha'] * X_train.shape[0])
+
+    if 'new_params' in locals():
+        reg_with_scaler[1].set_params(**new_params)
+
     kwargs = {reg_with_scaler.steps[0][0] + '__sample_weight':
               sample_weight,
               reg_with_scaler.steps[-1][0] + '__sample_weight':

From 165a5e10179ce31a8cdb3e071025ad2deb5f7a56 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Wed, 10 Feb 2021 11:32:34 +0100
Subject: [PATCH 04/54] updated normalize to include sample_weight when x is
 sparse

---
 sklearn/linear_model/_base.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index f84d4234c193c..0904072cf51b6 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -33,6 +33,7 @@
 from ..utils.validation import _deprecate_positional_args
 from ..utils import check_random_state
 from ..utils.extmath import safe_sparse_dot
+from ..utils.extmath import _incremental_weighted_mean_and_var
 from ..utils.sparsefuncs import mean_variance_axis, inplace_column_scale
 from ..utils.fixes import sparse_lsqr
 from ..utils._seq_dataset import ArrayDataset32, CSRDataset32
@@ -40,7 +41,6 @@
 from ..utils.validation import check_is_fitted, _check_sample_weight
 
 from ..utils.fixes import delayed
-from ..preprocessing import normalize as f_normalize
 
 # TODO: bayesian_ridge_regression and bayesian_regression_ard
 # should be squashed into its respective objects.
@@ -229,12 +229,12 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
 
     if fit_intercept:
         if sp.issparse(X):
-            X_offset, X_var = mean_variance_axis(X, axis=0)
+            X_offset, X_var = mean_variance_axis(X, axis=0,
+                                                 weights=sample_weight)
             if not return_mean:
                 X_offset[:] = X.dtype.type(0)
 
             if normalize:
-
                 # TODO: f_normalize could be used here as well but the function
                 # inplace_csr_row_normalize_l2 must be changed such that it
                 # can return also the norms computed internally
@@ -249,13 +249,19 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
                 X_scale = np.ones(X.shape[1], dtype=X.dtype)
 
         else:
-            X_offset = np.average(X, axis=0, weights=sample_weight)
+            X_offset, X_var, _ = \
+                _incremental_weighted_mean_and_var(X, sample_weight,
+                                                   last_mean=0.,
+                                                   last_variance=0.,
+                                                   last_weight_sum=0.)
             X -= X_offset
+
             if normalize:
-                X, X_scale = f_normalize(X, axis=0, copy=False,
-                                         return_norm=True)
+                X_scale = np.sqrt(X_var) * np.sqrt(len(X))
+                X = X / X_scale
             else:
                 X_scale = np.ones(X.shape[1], dtype=X.dtype)
+
         y_offset = np.average(y, axis=0, weights=sample_weight)
         y = y - y_offset
     else:

From 41366669c7b078d4b3b4dc507a6212406a2aacb1 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Wed, 10 Feb 2021 13:36:30 +0100
Subject: [PATCH 05/54] update the old test for the correct normalize

---
 sklearn/linear_model/tests/test_base.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 75cc9dd5fd8f1..f0b31ce66169a 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -470,10 +470,11 @@ def test_preprocess_data_weighted():
     expected_X_mean = np.average(X, axis=0, weights=sample_weight)
     expected_y_mean = np.average(y, axis=0, weights=sample_weight)
 
-    # XXX: if normalize=True, should we expect a weighted standard deviation?
-    #      Currently not weighted, but calculated with respect to weighted mean
-    expected_X_norm = (np.sqrt(X.shape[0]) *
-                       np.mean((X - expected_X_mean) ** 2, axis=0) ** .5)
+    X_sample_weight_avg = np.average(X, weights=sample_weight, axis=0)
+    X_sample_weight_var = np.average((X-X_sample_weight_avg)**2,
+                                     weights=sample_weight,
+                                     axis=0)
+    expected_X_norm = np.sqrt(X_sample_weight_var) * np.sqrt(len(X))
 
     Xt, yt, X_mean, y_mean, X_norm = \
         _preprocess_data(X, y, fit_intercept=True, normalize=False,

From ff266b4f2c2130ccfa8590d3c98bbcaef8442b07 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Wed, 10 Feb 2021 14:05:12 +0100
Subject: [PATCH 06/54] keep working on the pipeline test

---
 .../tests/test_coordinate_descent.py          | 35 +++----------------
 1 file changed, 4 insertions(+), 31 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 3afd4b318dc31..c3a0b58b28009 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -386,39 +386,17 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params):
                           model_normalize.coef_.dot(X_train.mean(0))))
     assert_allclose(y_pred_normalize, y_pred_standardize)
 
-'''
-# FIXME: 'normalize' to be removed in 1.2
-@pytest.mark.filterwarnings("ignore:'normalize' was deprecated")
-@pytest.mark.parametrize(
-    "estimator, is_sparse, with_mean",
-    [#(LinearRegression, True, False),
-     #(LinearRegression, False, True),
-     #(LinearRegression, False, False),
-     #(Ridge, True, False),
-     (Ridge, False, True),
-     #(Ridge, False, False)
-     ]
-)
-'''
+
 # FIXME: 'normalize' to be removed in 1.2
 @pytest.mark.filterwarnings("ignore:'normalize' was deprecated")
 @pytest.mark.parametrize(
     "estimator, params",
     [(Lasso, {"tol": 1e-16, "alpha": 0.1}),
-     # (LassoLars, {"alpha": 0.1}), (unexpected sample_weight)
      # (RidgeClassifier, {"solver": 'sparse_cg', "alpha": 0.1}),
      (ElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.1}),
      (ElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.1}),
      (Ridge, {"solver": 'sparse_cg', 'tol': 1e-12, "alpha": 0.1}),
-     (BayesianRidge, {}),
-     # (ARDRegression, {}), (unexpected sample_weight)
-     # (OrthogonalMatchingPursuit, {}), (unexpected sample_weight)
-     # (MultiTaskElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.1}), (unexpected sample_weight)
-     # (MultiTaskElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.1}), (unexpected sample_weight)
-     # (MultiTaskLasso, {"tol": 1e-16, "alpha": 0.1}), (unexpected sample_weight)
-     # (Lars, {}), (unexpected sample_weight)
      (LinearRegression, {}),
-     # (LassoLarsIC, {}) (unexpected sample_weight)
      ]
 )
 @pytest.mark.parametrize(
@@ -451,7 +429,7 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
         X_train = sparse.csr_matrix(X_train)
         X_test = _convert_container(X_train, 'sparse')
 
-    sample_weight = 0.1 * rng.rand(X_train.shape[0])
+    sample_weight = rng.rand(X_train.shape[0])
 
     # linear estimator with explicit sample_weight
     reg_with_normalize = estimator(normalize=True, fit_intercept=True,
@@ -465,12 +443,12 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
     )
     if 'alpha' in params:
         # reg_with_scaler.set_params(alpha=params['alpha'])
-        if model_name in ['Lasso', 'LassoLars', 'MultiTaskLasso']:
+        if model_name in ['Lasso']:
             new_params = dict(
                 alpha=params['alpha'] * np.sqrt(X_train.shape[0]))
         if model_name in ['Ridge', 'RidgeClassifier']:
             new_params = dict(alpha=params['alpha'] * X_train.shape[0])
-    if model_name in ['ElasticNet', 'MultiTaskElasticNet']:
+    if model_name in ['ElasticNet']:
         if params['l1_ratio'] == 1:
             new_params = dict(
                 alpha=params['alpha'] * np.sqrt(X_train.shape[0]))
@@ -489,11 +467,6 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
     y_pred_norm = reg_with_normalize.predict(X_test)
     y_pred_pip = reg_with_scaler.predict(X_test)
 
-    # assert_allclose(
-    #    reg_with_normalize.coef_ * reg_with_scaler[0].scale_,
-    #     reg_with_scaler[1].coef_
-    #)
-    #assert_allclose(y_pred_norm, y_pred_pip)
     y_train_mean = np.average(y_train, weights=sample_weight)
     X_train_mean = np.average(X_train, weights=sample_weight, axis=0)
     assert reg_with_scaler[1].intercept_ == pytest.approx(y_train_mean)

From b386af7066f69fd7085e06bffaf40a2b2d8f8a64 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Wed, 10 Feb 2021 14:59:19 +0100
Subject: [PATCH 07/54] add test for sparse and sample_weight when testing for
 _preprocess_data

---
 sklearn/linear_model/tests/test_base.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index f0b31ce66169a..4fd856841567a 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -461,11 +461,16 @@ def test_preprocess_data_multioutput():
         assert_array_almost_equal(yt, y - y_mean)
 
 
-def test_preprocess_data_weighted():
+@pytest.mark.parametrize(
+    "is_sparse",
+    [False, True]
+)
+def test_preprocess_data_weighted(is_sparse):
     n_samples = 200
     n_features = 2
     X = rng.rand(n_samples, n_features)
     y = rng.rand(n_samples)
+
     sample_weight = rng.rand(n_samples)
     expected_X_mean = np.average(X, axis=0, weights=sample_weight)
     expected_y_mean = np.average(y, axis=0, weights=sample_weight)
@@ -476,22 +481,27 @@ def test_preprocess_data_weighted():
                                      axis=0)
     expected_X_norm = np.sqrt(X_sample_weight_var) * np.sqrt(len(X))
 
+    if is_sparse:
+        X = sparse.csr_matrix(X)
+
     Xt, yt, X_mean, y_mean, X_norm = \
         _preprocess_data(X, y, fit_intercept=True, normalize=False,
-                         sample_weight=sample_weight)
+                         sample_weight=sample_weight, return_mean=True)
     assert_array_almost_equal(X_mean, expected_X_mean)
     assert_array_almost_equal(y_mean, expected_y_mean)
     assert_array_almost_equal(X_norm, np.ones(n_features))
-    assert_array_almost_equal(Xt, X - expected_X_mean)
+    if not is_sparse:
+        assert_array_almost_equal(Xt, X - expected_X_mean)
     assert_array_almost_equal(yt, y - expected_y_mean)
 
     Xt, yt, X_mean, y_mean, X_norm = \
         _preprocess_data(X, y, fit_intercept=True, normalize=True,
-                         sample_weight=sample_weight)
+                         sample_weight=sample_weight, return_mean=True)
     assert_array_almost_equal(X_mean, expected_X_mean)
     assert_array_almost_equal(y_mean, expected_y_mean)
     assert_array_almost_equal(X_norm, expected_X_norm)
-    assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm)
+    if not is_sparse:
+        assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm)
     assert_array_almost_equal(yt, y - expected_y_mean)
 
 

From 2a6c2134ec647eb5623b94cfd7f8bd89d167273f Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Wed, 10 Feb 2021 16:58:35 +0100
Subject: [PATCH 08/54] corrected test with dtype, 1 test remaining to be
 corrected

---
 sklearn/linear_model/tests/test_base.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 4fd856841567a..50fb485470236 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -490,7 +490,9 @@ def test_preprocess_data_weighted(is_sparse):
     assert_array_almost_equal(X_mean, expected_X_mean)
     assert_array_almost_equal(y_mean, expected_y_mean)
     assert_array_almost_equal(X_norm, np.ones(n_features))
-    if not is_sparse:
+    if is_sparse:
+        assert_array_almost_equal(Xt.toarray(), X.toarray() - expected_X_mean)
+    else:
         assert_array_almost_equal(Xt, X - expected_X_mean)
     assert_array_almost_equal(yt, y - expected_y_mean)
 
@@ -500,8 +502,14 @@ def test_preprocess_data_weighted(is_sparse):
     assert_array_almost_equal(X_mean, expected_X_mean)
     assert_array_almost_equal(y_mean, expected_y_mean)
     assert_array_almost_equal(X_norm, expected_X_norm)
-    if not is_sparse:
-        assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm)
+    if is_sparse:
+        assert_array_almost_equal(
+            Xt.toarray(), (X.toarray() - expected_X_mean) / expected_X_norm
+            )
+    else:
+        assert_array_almost_equal(
+            Xt, (X - expected_X_mean) / expected_X_norm
+            )
     assert_array_almost_equal(yt, y - expected_y_mean)
 
 

From 82c6344602fa3453af0a6c4b4455f56f8e7258d1 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Wed, 10 Feb 2021 16:59:30 +0100
Subject: [PATCH 09/54] cleanup

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index c3a0b58b28009..b3de4fb5d812e 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -401,11 +401,11 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params):
 )
 @pytest.mark.parametrize(
     "is_sparse",
-    [False] #, True]
+    [False]  # , True]
 )
 @pytest.mark.parametrize(
     "with_mean",
-    [True, False]
+    [True]  # , False]
 )
 def test_linear_model_sample_weights_normalize_in_pipeline(
         with_mean, is_sparse, estimator, params

From 187833c7ea7a66d5bbe0eb120e91ea270f51a576 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Wed, 10 Feb 2021 21:10:19 +0100
Subject: [PATCH 10/54] use _incremental_mean_and_var

---
 sklearn/linear_model/_base.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index 0904072cf51b6..ae0c82052a6a5 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -33,7 +33,7 @@
 from ..utils.validation import _deprecate_positional_args
 from ..utils import check_random_state
 from ..utils.extmath import safe_sparse_dot
-from ..utils.extmath import _incremental_weighted_mean_and_var
+from ..utils.extmath import _incremental_mean_and_var
 from ..utils.sparsefuncs import mean_variance_axis, inplace_column_scale
 from ..utils.fixes import sparse_lsqr
 from ..utils._seq_dataset import ArrayDataset32, CSRDataset32
@@ -250,10 +250,11 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
 
         else:
             X_offset, X_var, _ = \
-                _incremental_weighted_mean_and_var(X, sample_weight,
-                                                   last_mean=0.,
-                                                   last_variance=0.,
-                                                   last_weight_sum=0.)
+                _incremental_mean_and_var(X,
+                                          last_mean=0.,
+                                          last_variance=0.,
+                                          last_sample_count=0.,
+                                          sample_weight=sample_weight)
             X -= X_offset
 
             if normalize:

From 153d5c2fbaa200a0eedd485dc414749d47dd68a8 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Wed, 10 Feb 2021 21:24:29 +0100
Subject: [PATCH 11/54] update X_offset and X_var to be of the correct type

---
 sklearn/linear_model/_base.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index ae0c82052a6a5..ce8cfc2ca3e9d 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -249,12 +249,16 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
                 X_scale = np.ones(X.shape[1], dtype=X.dtype)
 
         else:
+            xtype = X.dtype
+
             X_offset, X_var, _ = \
                 _incremental_mean_and_var(X,
                                           last_mean=0.,
                                           last_variance=0.,
                                           last_sample_count=0.,
                                           sample_weight=sample_weight)
+            X_offset = X_offset.astype(xtype)
+            X_var = X_var.astype(xtype)
             X -= X_offset
 
             if normalize:

From ae9ea27aa3ee98990fd81c1c5259668c089b8f74 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Wed, 10 Feb 2021 21:25:43 +0100
Subject: [PATCH 12/54] fix some tests

---
 sklearn/linear_model/_base.py           | 4 ++++
 sklearn/linear_model/tests/test_base.py | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index ae0c82052a6a5..b384f4e6186df 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -255,6 +255,10 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
                                           last_variance=0.,
                                           last_sample_count=0.,
                                           sample_weight=sample_weight)
+
+            X_var = X_var.astype(X.dtype)
+            X_offset = X_offset.astype(X.dtype)
+
             X -= X_offset
 
             if normalize:
diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 50fb485470236..fab9aa20b6db7 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -476,7 +476,7 @@ def test_preprocess_data_weighted(is_sparse):
     expected_y_mean = np.average(y, axis=0, weights=sample_weight)
 
     X_sample_weight_avg = np.average(X, weights=sample_weight, axis=0)
-    X_sample_weight_var = np.average((X-X_sample_weight_avg)**2,
+    X_sample_weight_var = np.average((X - X_sample_weight_avg)**2,
                                      weights=sample_weight,
                                      axis=0)
     expected_X_norm = np.sqrt(X_sample_weight_var) * np.sqrt(len(X))
@@ -491,7 +491,7 @@ def test_preprocess_data_weighted(is_sparse):
     assert_array_almost_equal(y_mean, expected_y_mean)
     assert_array_almost_equal(X_norm, np.ones(n_features))
     if is_sparse:
-        assert_array_almost_equal(Xt.toarray(), X.toarray() - expected_X_mean)
+        assert_array_almost_equal(Xt.toarray(), X.toarray())
     else:
         assert_array_almost_equal(Xt, X - expected_X_mean)
     assert_array_almost_equal(yt, y - expected_y_mean)
@@ -504,7 +504,7 @@ def test_preprocess_data_weighted(is_sparse):
     assert_array_almost_equal(X_norm, expected_X_norm)
     if is_sparse:
         assert_array_almost_equal(
-            Xt.toarray(), (X.toarray() - expected_X_mean) / expected_X_norm
+            Xt.toarray(), X.toarray() / expected_X_norm
             )
     else:
         assert_array_almost_equal(

From 1f433ac7a7c9181ef216cd033399f29448c66f8f Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Wed, 10 Feb 2021 21:40:04 +0100
Subject: [PATCH 13/54] fix more tests

---
 sklearn/linear_model/_base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index 016f0a3c77be5..5cba9803b459c 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -263,6 +263,7 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
 
             if normalize:
                 X_scale = np.sqrt(X_var) * np.sqrt(len(X))
+                X_scale[X_scale == 0.0] = 1.0
                 X = X / X_scale
             else:
                 X_scale = np.ones(X.shape[1], dtype=X.dtype)

From 563074e2fd12805a9534fe3b5cd0b55c7b0634d3 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Wed, 10 Feb 2021 21:57:09 +0100
Subject: [PATCH 14/54] more fixes

---
 .../tests/test_coordinate_descent.py          | 27 ++++++++++++-------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index b3de4fb5d812e..1f4f3885387c8 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -29,6 +29,7 @@
 from sklearn.utils._testing import _convert_container
 from sklearn.utils._testing import TempMemmap
 from sklearn.utils.fixes import parse_version
+from sklearn.utils.sparsefuncs import mean_variance_axis
 
 from sklearn.linear_model import (
     ARDRegression,
@@ -391,7 +392,8 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params):
 @pytest.mark.filterwarnings("ignore:'normalize' was deprecated")
 @pytest.mark.parametrize(
     "estimator, params",
-    [(Lasso, {"tol": 1e-16, "alpha": 0.1}),
+    [
+     (Lasso, {"tol": 1e-16, "alpha": 0.1}),
      # (RidgeClassifier, {"solver": 'sparse_cg', "alpha": 0.1}),
      (ElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.1}),
      (ElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.1}),
@@ -400,12 +402,12 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params):
      ]
 )
 @pytest.mark.parametrize(
-    "is_sparse",
-    [False]  # , True]
-)
-@pytest.mark.parametrize(
-    "with_mean",
-    [True]  # , False]
+    "is_sparse, with_mean", [
+        (False, True),
+        (False, False),
+        (True, False)
+        # No need to test sparse and with_mean=True
+    ]
 )
 def test_linear_model_sample_weights_normalize_in_pipeline(
         with_mean, is_sparse, estimator, params
@@ -416,6 +418,9 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
     # and set sample_weight.
     model_name = estimator.__name__
 
+    if model_name in ['Lasso', 'ElasticNet'] and is_sparse:
+        pytest.skip(f'{model_name} does not suppert sample_weight with sparse')
+
     rng = np.random.RandomState(0)
     X, y = make_regression(n_samples=20, n_features=5, noise=1e-2,
                            random_state=rng)
@@ -442,7 +447,6 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
         estimator(normalize=False, fit_intercept=True, **params)
     )
     if 'alpha' in params:
-        # reg_with_scaler.set_params(alpha=params['alpha'])
         if model_name in ['Lasso']:
             new_params = dict(
                 alpha=params['alpha'] * np.sqrt(X_train.shape[0]))
@@ -468,8 +472,11 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
     y_pred_pip = reg_with_scaler.predict(X_test)
 
     y_train_mean = np.average(y_train, weights=sample_weight)
-    X_train_mean = np.average(X_train, weights=sample_weight, axis=0)
-    assert reg_with_scaler[1].intercept_ == pytest.approx(y_train_mean)
+    if is_sparse:
+        X_train_mean, _ = mean_variance_axis(X_train, axis=0,
+                                             weights=sample_weight)
+    else:
+        X_train_mean = np.average(X_train, weights=sample_weight, axis=0)
     assert (reg_with_normalize.intercept_ ==
             pytest.approx(y_train_mean -
                           reg_with_normalize.coef_.dot(X_train_mean)))

From 0e98592c69669dbe08f7b530ccb58dfeee0449a7 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Wed, 10 Feb 2021 21:58:30 +0100
Subject: [PATCH 15/54] more fixes

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 1f4f3885387c8..42db2fa9c06cd 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -394,7 +394,7 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params):
     "estimator, params",
     [
      (Lasso, {"tol": 1e-16, "alpha": 0.1}),
-     # (RidgeClassifier, {"solver": 'sparse_cg', "alpha": 0.1}),
+     (RidgeClassifier, {"solver": 'sparse_cg', "alpha": 0.1}),
      (ElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.1}),
      (ElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.1}),
      (Ridge, {"solver": 'sparse_cg', 'tol': 1e-12, "alpha": 0.1}),
@@ -425,6 +425,9 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
     X, y = make_regression(n_samples=20, n_features=5, noise=1e-2,
                            random_state=rng)
 
+    if is_classifier(estimator):
+        y = np.sign(y)
+
     # make sure the data is not centered to make the problem more
     # difficult
     X += 10

From 29ce06037979f5e4aeba242dcd41e77cf9b9e94c Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Wed, 10 Feb 2021 22:06:42 +0100
Subject: [PATCH 16/54] typo

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 42db2fa9c06cd..95e56b8263781 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -419,7 +419,7 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
     model_name = estimator.__name__
 
     if model_name in ['Lasso', 'ElasticNet'] and is_sparse:
-        pytest.skip(f'{model_name} does not suppert sample_weight with sparse')
+        pytest.skip(f'{model_name} does not support sample_weight with sparse')
 
     rng = np.random.RandomState(0)
     X, y = make_regression(n_samples=20, n_features=5, noise=1e-2,

From 65528f499ad4ac6d15895f3846f0608266c83739 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Wed, 10 Feb 2021 22:19:04 +0100
Subject: [PATCH 17/54] factorize code

---
 .../tests/test_coordinate_descent.py          | 53 ++++++++-----------
 1 file changed, 21 insertions(+), 32 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 95e56b8263781..e58c630ebcb8a 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -303,6 +303,24 @@ def test_lasso_cv_positive_constraint():
     assert min(clf_constrained.coef_) >= 0
 
 
+def _scale_alpha(estimator, n_samples):
+    if 'alpha' not in estimator.get_params():
+        return
+
+    model_name = estimator.__class__.__name__
+    if model_name in ['Lasso', 'LassoLars', 'MultiTaskLasso']:
+        alpha = estimator.alpha * np.sqrt(n_samples)
+    if model_name in ['Ridge', 'RidgeClassifier']:
+        alpha = estimator.alpha * n_samples
+    if model_name in ['ElasticNet', 'MultiTaskElasticNet']:
+        if estimator.l1_ratio == 1:
+            alpha = estimator.alpha * np.sqrt(n_samples)
+        if estimator.l1_ratio == 0:
+            alpha = estimator.alpha * n_samples
+
+    estimator.set_params(alpha=alpha)
+
+
 # FIXME: 'normalize' to be removed in 1.2
 @pytest.mark.filterwarnings("ignore:'normalize' was deprecated")
 @pytest.mark.parametrize(
@@ -329,7 +347,6 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params):
     # in the pipeline and with normalize set to False
 
     # normalize is True
-    model_name = LinearModel.__name__
     model_normalize = LinearModel(normalize=True, fit_intercept=True, **params)
 
     pipeline = make_pipeline(
@@ -356,22 +373,7 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params):
 
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
 
-    if 'alpha' in params:
-        model_normalize.set_params(alpha=params['alpha'])
-        if model_name in ['Lasso', 'LassoLars', 'MultiTaskLasso']:
-            new_params = dict(
-                alpha=params['alpha'] * np.sqrt(X_train.shape[0]))
-        if model_name in ['Ridge', 'RidgeClassifier']:
-            new_params = dict(alpha=params['alpha'] * X_train.shape[0])
-    if model_name in ['ElasticNet', 'MultiTaskElasticNet']:
-        if params['l1_ratio'] == 1:
-            new_params = dict(
-                alpha=params['alpha'] * np.sqrt(X_train.shape[0]))
-        if params['l1_ratio'] == 0:
-            new_params = dict(alpha=params['alpha'] * X_train.shape[0])
-
-    if 'new_params' in locals():
-        pipeline[1].set_params(**new_params)
+    _scale_alpha(pipeline[1], X_train.shape[0])
 
     model_normalize.fit(X_train, y_train)
     y_pred_normalize = model_normalize.predict(X_test)
@@ -449,21 +451,8 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
         StandardScaler(with_mean=with_mean),
         estimator(normalize=False, fit_intercept=True, **params)
     )
-    if 'alpha' in params:
-        if model_name in ['Lasso']:
-            new_params = dict(
-                alpha=params['alpha'] * np.sqrt(X_train.shape[0]))
-        if model_name in ['Ridge', 'RidgeClassifier']:
-            new_params = dict(alpha=params['alpha'] * X_train.shape[0])
-    if model_name in ['ElasticNet']:
-        if params['l1_ratio'] == 1:
-            new_params = dict(
-                alpha=params['alpha'] * np.sqrt(X_train.shape[0]))
-        if params['l1_ratio'] == 0:
-            new_params = dict(alpha=params['alpha'] * X_train.shape[0])
-
-    if 'new_params' in locals():
-        reg_with_scaler[1].set_params(**new_params)
+
+    _scale_alpha(reg_with_scaler[1], X_train.shape[0])
 
     kwargs = {reg_with_scaler.steps[0][0] + '__sample_weight':
               sample_weight,

From 84fbd1a5fde104384458e34a4ce5c5d5cd206d93 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Wed, 10 Feb 2021 22:29:37 +0100
Subject: [PATCH 18/54] added whats new

---
 doc/whats_new/v1.0.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index d073ffd80bdf7..46b1bba325ab0 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -131,6 +131,12 @@ Changelog
   :pr:`17743` by :user:`Maria Telenczuk <maikia>` and
   :user:`Alexandre Gramfort <agramfort>`.
 
+- |Fix|: Fixed a bug in linear_model._base._preprocess_data when
+  `normalize=True` and `sample_weight` is set. `sample_weight` now weights
+  standard deviation as expected.
+  :pr:`19426` by :user:`Alexandre Gramfort <agramfort>` and
+  :user:`Maria Telenczuk <maikia>`.
+
 :mod:`sklearn.metrics`
 ......................
 

From 61702a9b53258440305cd4add902d2cdbbf1bd55 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Thu, 11 Feb 2021 09:01:39 +0100
Subject: [PATCH 19/54] nitpick on what's new

---
 doc/whats_new/v1.0.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 46b1bba325ab0..9af28f4abd476 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -131,9 +131,9 @@ Changelog
   :pr:`17743` by :user:`Maria Telenczuk <maikia>` and
   :user:`Alexandre Gramfort <agramfort>`.
 
-- |Fix|: Fixed a bug in linear_model._base._preprocess_data when
-  `normalize=True` and `sample_weight` is set. `sample_weight` now weights
-  standard deviation as expected.
+- |Fix|: `sample_weight` are now fully taken into account in linear models
+  when `normalize=True` for both feature centering and feature
+  scaling.
   :pr:`19426` by :user:`Alexandre Gramfort <agramfort>` and
   :user:`Maria Telenczuk <maikia>`.
 

From 9b9ee563dcb38a9ab9b4e979b6be7bcffe5dd2b4 Mon Sep 17 00:00:00 2001
From: Maria Telenczuk <maja_ka@hotmail.com>
Date: Thu, 11 Feb 2021 09:38:05 +0100
Subject: [PATCH 20/54] Update sklearn/linear_model/tests/test_base.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/linear_model/tests/test_base.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index fab9aa20b6db7..c6a88e64f0039 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -461,10 +461,7 @@ def test_preprocess_data_multioutput():
         assert_array_almost_equal(yt, y - y_mean)
 
 
-@pytest.mark.parametrize(
-    "is_sparse",
-    [False, True]
-)
+@pytest.mark.parametrize("is_sparse", [False, True])
 def test_preprocess_data_weighted(is_sparse):
     n_samples = 200
     n_features = 2

From e7b6e9246d1fc4269c99bda30cb99a827d9ced71 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 09:44:06 +0100
Subject: [PATCH 21/54] restructuring the code (if normalize separate)

---
 sklearn/linear_model/_base.py | 50 ++++++++++++++---------------------
 1 file changed, 20 insertions(+), 30 deletions(-)

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index 5cba9803b459c..4a46578a0e660 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -229,44 +229,34 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
 
     if fit_intercept:
         if sp.issparse(X):
-            X_offset, X_var = mean_variance_axis(X, axis=0,
-                                                 weights=sample_weight)
+            X_offset, X_var = mean_variance_axis(
+                X, axis=0, weights=sample_weight
+            )
+
             if not return_mean:
                 X_offset[:] = X.dtype.type(0)
-
-            if normalize:
-                # TODO: f_normalize could be used here as well but the function
-                # inplace_csr_row_normalize_l2 must be changed such that it
-                # can return also the norms computed internally
-
-                # transform variance to norm in-place
-                X_var *= X.shape[0]
-                X_scale = np.sqrt(X_var, X_var)
-                del X_var
-                X_scale[X_scale == 0] = 1
-                inplace_column_scale(X, 1. / X_scale)
-            else:
-                X_scale = np.ones(X.shape[1], dtype=X.dtype)
-
         else:
-            X_offset, X_var, _ = \
-                _incremental_mean_and_var(X,
-                                          last_mean=0.,
-                                          last_variance=0.,
-                                          last_sample_count=0.,
-                                          sample_weight=sample_weight)
+            X_offset, X_var, _ = _incremental_mean_and_var(
+                X, last_mean=0., last_variance=0., last_sample_count=0.,
+                sample_weight=sample_weight
+            )
 
             X_offset = X_offset.astype(X.dtype)
-            X_var = X_var.astype(X.dtype)
-
             X -= X_offset
 
-            if normalize:
-                X_scale = np.sqrt(X_var) * np.sqrt(len(X))
-                X_scale[X_scale == 0.0] = 1.0
-                X = X / X_scale
+        X_var = X_var.astype(X.dtype)
+
+        if normalize:
+            X_var *= X.shape[0]
+            X_scale = np.sqrt(X_var, X_var)
+            del X_var
+            X_scale[X_scale == 0] = 1
+            if sp.issparse(X):
+                inplace_column_scale(X, 1. / X_scale)
             else:
-                X_scale = np.ones(X.shape[1], dtype=X.dtype)
+                X /= X_scale
+        else:
+            X_scale = np.ones(X.shape[1], dtype=X.dtype)
 
         y_offset = np.average(y, axis=0, weights=sample_weight)
         y = y - y_offset

From b366488caf1165d01e438ba83ef531324219777e Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 09:45:39 +0100
Subject: [PATCH 22/54] remove del x_var

---
 sklearn/linear_model/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index 4a46578a0e660..171f08fc6dfaf 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -249,7 +249,7 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
         if normalize:
             X_var *= X.shape[0]
             X_scale = np.sqrt(X_var, X_var)
-            del X_var
+
             X_scale[X_scale == 0] = 1
             if sp.issparse(X):
                 inplace_column_scale(X, 1. / X_scale)

From e055335536c55c7da5954f1062cdc438d80a3f0c Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 09:58:38 +0100
Subject: [PATCH 23/54] update the docstring of the test

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index e58c630ebcb8a..dec26e598e2a2 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -414,10 +414,10 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params):
 def test_linear_model_sample_weights_normalize_in_pipeline(
         with_mean, is_sparse, estimator, params
 ):
-    # Test that the results for running linear regression LinearRegression with
-    # sample_weight set and with normalize set to True gives similar results as
-    # LinearRegression with no normalize in a pipeline with a StandardScaler
-    # and set sample_weight.
+    # Test that the results for running linear model with sample_weight
+    # and with normalize set to True gives similar results as the same linear
+    # model with normalize set to False in a pipeline with
+    # a StandardScaler and sample_weight.
     model_name = estimator.__name__
 
     if model_name in ['Lasso', 'ElasticNet'] and is_sparse:

From 0db7fe1140156fbe0235b89e5f65a95f2997af49 Mon Sep 17 00:00:00 2001
From: Maria Telenczuk <maja_ka@hotmail.com>
Date: Thu, 11 Feb 2021 09:59:57 +0100
Subject: [PATCH 24/54] Update sklearn/linear_model/tests/test_base.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/linear_model/tests/test_base.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index c6a88e64f0039..67410b3c0c60d 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -500,13 +500,14 @@ def test_preprocess_data_weighted(is_sparse):
     assert_array_almost_equal(y_mean, expected_y_mean)
     assert_array_almost_equal(X_norm, expected_X_norm)
     if is_sparse:
+        # X is not centered
         assert_array_almost_equal(
             Xt.toarray(), X.toarray() / expected_X_norm
-            )
+        )
     else:
         assert_array_almost_equal(
             Xt, (X - expected_X_mean) / expected_X_norm
-            )
+        )
     assert_array_almost_equal(yt, y - expected_y_mean)
 
 

From faaec9c01e1a69bd3c180ccbbd9155f9855886dc Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 10:32:25 +0100
Subject: [PATCH 25/54] improve the docstrings of the tests

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index dec26e598e2a2..a4ab09c43c65b 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -304,6 +304,9 @@ def test_lasso_cv_positive_constraint():
 
 
 def _scale_alpha(estimator, n_samples):
+    """"Rescale the parameter alpha from when the estimator is evoked with
+    normalize set to True to when it is evoked in a Pipeline with normalize set
+    to False and with a StandardScaler."""
     if 'alpha' not in estimator.get_params():
         return
 
@@ -321,7 +324,9 @@ def _scale_alpha(estimator, n_samples):
     estimator.set_params(alpha=alpha)
 
 
-# FIXME: 'normalize' to be removed in 1.2
+# FIXME: 'normalize' to be removed in 1.2 for all the models excluding:
+# OrthogonalMatchingPursuit, Lars, LassoLars, LarsCV, LassoLarsCV
+# for which it is to be removed in 1.4
 @pytest.mark.filterwarnings("ignore:'normalize' was deprecated")
 @pytest.mark.parametrize(
     "LinearModel, params",

From f9f2b872f992c8b0bfdafa42b4a5c5c5d560f309 Mon Sep 17 00:00:00 2001
From: Maria Telenczuk <maja_ka@hotmail.com>
Date: Thu, 11 Feb 2021 10:41:25 +0100
Subject: [PATCH 26/54] Update
 sklearn/linear_model/tests/test_coordinate_descent.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 .../linear_model/tests/test_coordinate_descent.py    | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index a4ab09c43c65b..7fa9688b95c6e 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -400,12 +400,12 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params):
 @pytest.mark.parametrize(
     "estimator, params",
     [
-     (Lasso, {"tol": 1e-16, "alpha": 0.1}),
-     (RidgeClassifier, {"solver": 'sparse_cg', "alpha": 0.1}),
-     (ElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.1}),
-     (ElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.1}),
-     (Ridge, {"solver": 'sparse_cg', 'tol': 1e-12, "alpha": 0.1}),
-     (LinearRegression, {}),
+         (Lasso, {"tol": 1e-16, "alpha": 0.1}),
+         (RidgeClassifier, {"solver": 'sparse_cg', "alpha": 0.1}),
+         (ElasticNet, {"tol": 1e-16, 'l1_ratio': 1, "alpha": 0.1}),
+         (ElasticNet, {"tol": 1e-16, 'l1_ratio': 0, "alpha": 0.1}),
+         (Ridge, {"solver": 'sparse_cg', 'tol': 1e-12, "alpha": 0.1}),
+         (LinearRegression, {}),
      ]
 )
 @pytest.mark.parametrize(

From 1e41bbdfa3b81e61a1e184e0e955a66c0d16a287 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 10:48:47 +0100
Subject: [PATCH 27/54] change the order of the params

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 7fa9688b95c6e..9dbd5c22317c7 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -417,7 +417,7 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params):
     ]
 )
 def test_linear_model_sample_weights_normalize_in_pipeline(
-        with_mean, is_sparse, estimator, params
+        is_sparse, with_mean, estimator, params
 ):
     # Test that the results for running linear model with sample_weight
     # and with normalize set to True gives similar results as the same linear

From 06035b7d963322027a6cdd1b89a4427454a5b398 Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Thu, 11 Feb 2021 11:30:50 +0100
Subject: [PATCH 28/54] add inplace in function name to make it explicit

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 9dbd5c22317c7..c94f7a078ce74 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -303,7 +303,7 @@ def test_lasso_cv_positive_constraint():
     assert min(clf_constrained.coef_) >= 0
 
 
-def _scale_alpha(estimator, n_samples):
+def _scale_alpha_inplace(estimator, n_samples):
     """"Rescale the parameter alpha from when the estimator is evoked with
     normalize set to True to when it is evoked in a Pipeline with normalize set
     to False and with a StandardScaler."""
@@ -378,7 +378,7 @@ def test_model_pipeline_same_as_normalize_true(LinearModel, params):
 
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
 
-    _scale_alpha(pipeline[1], X_train.shape[0])
+    _scale_alpha_inplace(pipeline[1], X_train.shape[0])
 
     model_normalize.fit(X_train, y_train)
     y_pred_normalize = model_normalize.predict(X_test)
@@ -457,7 +457,7 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
         estimator(normalize=False, fit_intercept=True, **params)
     )
 
-    _scale_alpha(reg_with_scaler[1], X_train.shape[0])
+    _scale_alpha_inplace(reg_with_scaler[1], X_train.shape[0])
 
     kwargs = {reg_with_scaler.steps[0][0] + '__sample_weight':
               sample_weight,

From f5fd4d1f79089f7fe2a03f686e540f999b7825e4 Mon Sep 17 00:00:00 2001
From: Maria Telenczuk <maja_ka@hotmail.com>
Date: Thu, 11 Feb 2021 13:12:32 +0100
Subject: [PATCH 29/54] Update sklearn/linear_model/tests/test_base.py

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/linear_model/tests/test_base.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 67410b3c0c60d..85566f98a996b 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -465,7 +465,16 @@ def test_preprocess_data_multioutput():
 def test_preprocess_data_weighted(is_sparse):
     n_samples = 200
     n_features = 2
+    # Generate random data with 50% of zero values to make sure
+    # that the sparse variant of this test is actually sparse. This also
+    # shifts the mean value for each columns in X further away from
+    # zero.
     X = rng.rand(n_samples, n_features)
+    X[X < 0.5] = 0.
+    
+    # Scale the first feature of X to be 10 larger than the other to
+    # better check the impact of feature scaling.
+    X[:, 0] *= 10
     y = rng.rand(n_samples)
 
     sample_weight = rng.rand(n_samples)

From 53a8d5390cf6030de3d535c371837afa2fa50491 Mon Sep 17 00:00:00 2001
From: Maria Telenczuk <maja_ka@hotmail.com>
Date: Thu, 11 Feb 2021 13:13:22 +0100
Subject: [PATCH 30/54] Update sklearn/linear_model/tests/test_base.py

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/linear_model/tests/test_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 85566f98a996b..e765dc1b5a9ea 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -485,7 +485,7 @@ def test_preprocess_data_weighted(is_sparse):
     X_sample_weight_var = np.average((X - X_sample_weight_avg)**2,
                                      weights=sample_weight,
                                      axis=0)
-    expected_X_norm = np.sqrt(X_sample_weight_var) * np.sqrt(len(X))
+    expected_X_norm = np.sqrt(X_sample_weight_var) * np.sqrt(n_samples)
 
     if is_sparse:
         X = sparse.csr_matrix(X)

From 15f5ad722d94f942a4e3b4a7ce8d353b923c3b06 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 13:18:38 +0100
Subject: [PATCH 31/54] change checking name of the model for isinstance()

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index c94f7a078ce74..d451c4aa16d21 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -310,12 +310,11 @@ def _scale_alpha_inplace(estimator, n_samples):
     if 'alpha' not in estimator.get_params():
         return
 
-    model_name = estimator.__class__.__name__
-    if model_name in ['Lasso', 'LassoLars', 'MultiTaskLasso']:
+    if isinstance(estimator, (Lasso, LassoLars, MultiTaskLasso)):
         alpha = estimator.alpha * np.sqrt(n_samples)
-    if model_name in ['Ridge', 'RidgeClassifier']:
+    if isinstance(estimator, (Ridge, RidgeClassifier)):
         alpha = estimator.alpha * n_samples
-    if model_name in ['ElasticNet', 'MultiTaskElasticNet']:
+    if isinstance(estimator, (ElasticNet, MultiTaskElasticNet)):
         if estimator.l1_ratio == 1:
             alpha = estimator.alpha * np.sqrt(n_samples)
         if estimator.l1_ratio == 0:

From 9ac0a66747ce5584d39b5be4729112d91011e64e Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 13:29:50 +0100
Subject: [PATCH 32/54] update the test

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index d451c4aa16d21..ff99ecad83725 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -317,8 +317,11 @@ def _scale_alpha_inplace(estimator, n_samples):
     if isinstance(estimator, (ElasticNet, MultiTaskElasticNet)):
         if estimator.l1_ratio == 1:
             alpha = estimator.alpha * np.sqrt(n_samples)
-        if estimator.l1_ratio == 0:
+        elif estimator.l1_ratio == 0:
             alpha = estimator.alpha * n_samples
+        else:
+            # To avoid silent errors in case of refactoring
+            raise NotImplementedError
 
     estimator.set_params(alpha=alpha)
 

From 75025b914de87276e831fe7eab984a57ee4f19b0 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 13:31:51 +0100
Subject: [PATCH 33/54] cleanup

---
 sklearn/linear_model/tests/test_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index e765dc1b5a9ea..735979520ce45 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -471,7 +471,7 @@ def test_preprocess_data_weighted(is_sparse):
     # zero.
     X = rng.rand(n_samples, n_features)
     X[X < 0.5] = 0.
-    
+
     # Scale the first feature of X to be 10 larger than the other to
     # better check the impact of feature scaling.
     X[:, 0] *= 10

From b29d37f42055a1fd578f3c455b4fd8a62fe846b4 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 14:07:57 +0100
Subject: [PATCH 34/54] update X update

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index ff99ecad83725..485e5762335bf 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -439,7 +439,8 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
 
     # make sure the data is not centered to make the problem more
     # difficult
-    X += 10
+    X[X < 0] = 0
+
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5,
                                                         random_state=rng)
     if is_sparse:

From cfe824d30a414dda67ce846e02c917a49195f94e Mon Sep 17 00:00:00 2001
From: Maria Telenczuk <maja_ka@hotmail.com>
Date: Thu, 11 Feb 2021 14:08:49 +0100
Subject: [PATCH 35/54] Update
 sklearn/linear_model/tests/test_coordinate_descent.py

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/linear_model/tests/test_coordinate_descent.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 485e5762335bf..ce8e472eaa839 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -304,9 +304,12 @@ def test_lasso_cv_positive_constraint():
 
 
 def _scale_alpha_inplace(estimator, n_samples):
-    """"Rescale the parameter alpha from when the estimator is evoked with
+    """Rescale the alpha param to check equivalence with StandardScaler
+    
+    Rescale the alpha parameter from when the estimator is evoked with
     normalize set to True to when it is evoked in a Pipeline with normalize set
-    to False and with a StandardScaler."""
+    to False and with a StandardScaler.
+    """
     if 'alpha' not in estimator.get_params():
         return
 

From 2efe5c38127ccaee7d1ead2d1346ccdea4d1b433 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 15:22:08 +0100
Subject: [PATCH 36/54] cleanup

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index ce8e472eaa839..afa121cd49f40 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -304,9 +304,7 @@ def test_lasso_cv_positive_constraint():
 
 
 def _scale_alpha_inplace(estimator, n_samples):
-    """Rescale the alpha param to check equivalence with StandardScaler
-    
-    Rescale the alpha parameter from when the estimator is evoked with
+    """Rescale the parameter alpha from when the estimator is evoked with
     normalize set to True to when it is evoked in a Pipeline with normalize set
     to False and with a StandardScaler.
     """
@@ -441,7 +439,7 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
         y = np.sign(y)
 
     # make sure the data is not centered to make the problem more
-    # difficult
+    # difficult + add 0s for the sparse case
     X[X < 0] = 0
 
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5,

From e062745c27e8e92ad39c262e73e19979962fe68b Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 15:39:15 +0100
Subject: [PATCH 37/54] towards comparing the results of the StandardScaler and
 _preprocess_data

---
 sklearn/linear_model/tests/test_base.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 735979520ce45..f6bc1f982bb1e 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -1,5 +1,6 @@
 # Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
 #         Fabian Pedregosa <fabian.pedregosa@inria.fr>
+#         Maria Telenczuk <https://github.com/maikia>
 #
 # License: BSD 3 clause
 
@@ -24,6 +25,7 @@
 from sklearn.datasets import make_sparse_uncorrelated
 from sklearn.datasets import make_regression
 from sklearn.datasets import load_iris
+from sklearn.preprocessing import StandardScaler
 
 rng = np.random.RandomState(0)
 rtol = 1e-6
@@ -490,6 +492,7 @@ def test_preprocess_data_weighted(is_sparse):
     if is_sparse:
         X = sparse.csr_matrix(X)
 
+    # normalize is False
     Xt, yt, X_mean, y_mean, X_norm = \
         _preprocess_data(X, y, fit_intercept=True, normalize=False,
                          sample_weight=sample_weight, return_mean=True)
@@ -502,9 +505,11 @@ def test_preprocess_data_weighted(is_sparse):
         assert_array_almost_equal(Xt, X - expected_X_mean)
     assert_array_almost_equal(yt, y - expected_y_mean)
 
+    # normalize is True
     Xt, yt, X_mean, y_mean, X_norm = \
         _preprocess_data(X, y, fit_intercept=True, normalize=True,
                          sample_weight=sample_weight, return_mean=True)
+
     assert_array_almost_equal(X_mean, expected_X_mean)
     assert_array_almost_equal(y_mean, expected_y_mean)
     assert_array_almost_equal(X_norm, expected_X_norm)
@@ -513,10 +518,16 @@ def test_preprocess_data_weighted(is_sparse):
         assert_array_almost_equal(
             Xt.toarray(), X.toarray() / expected_X_norm
         )
+        scaler = StandardScaler(with_mean=False).fit(
+            X, sample_weight=sample_weight)
     else:
         assert_array_almost_equal(
             Xt, (X - expected_X_mean) / expected_X_norm
         )
+        scaler = StandardScaler(with_mean=True).fit(
+            X, sample_weight=sample_weight)
+        assert_array_almost_equal(scaler.mean_, X_mean)
+    assert_array_almost_equal(scaler.transform(X), Xt)
     assert_array_almost_equal(yt, y - expected_y_mean)
 
 

From 4733d975da355fc8fbd1e54c6dd45df847feef4f Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 16:15:29 +0100
Subject: [PATCH 38/54] fix the test

---
 sklearn/linear_model/tests/test_base.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index f6bc1f982bb1e..73d137f4d981b 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -520,6 +520,9 @@ def test_preprocess_data_weighted(is_sparse):
         )
         scaler = StandardScaler(with_mean=False).fit(
             X, sample_weight=sample_weight)
+        assert_array_almost_equal(
+            scaler.transform(X).toarray() / np.sqrt(n_samples), Xt.toarray()
+            )
     else:
         assert_array_almost_equal(
             Xt, (X - expected_X_mean) / expected_X_norm
@@ -527,7 +530,7 @@ def test_preprocess_data_weighted(is_sparse):
         scaler = StandardScaler(with_mean=True).fit(
             X, sample_weight=sample_weight)
         assert_array_almost_equal(scaler.mean_, X_mean)
-    assert_array_almost_equal(scaler.transform(X), Xt)
+        assert_array_almost_equal(scaler.transform(X) / np.sqrt(n_samples), Xt)
     assert_array_almost_equal(yt, y - expected_y_mean)
 
 

From 66ac2972f2ee666b8ad2c9a650e561ba5c15fda9 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 18:28:17 +0100
Subject: [PATCH 39/54] clean up the tests according to the reviewer guidelines

---
 .../tests/test_coordinate_descent.py          | 26 ++++++++++---------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index afa121cd49f40..59b7aa5133eb3 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -13,6 +13,7 @@
 from sklearn.datasets import make_regression
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
+from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
 
 from sklearn.exceptions import ConvergenceWarning
@@ -450,24 +451,25 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
 
     sample_weight = rng.rand(X_train.shape[0])
 
-    # linear estimator with explicit sample_weight
+    # linear estimator with explicit sample_weight, normalize = True
     reg_with_normalize = estimator(normalize=True, fit_intercept=True,
                                    **params)
     reg_with_normalize.fit(X_train, y_train, sample_weight=sample_weight)
 
-    # linear estimator in a pipeline
-    reg_with_scaler = make_pipeline(
-        StandardScaler(with_mean=with_mean),
-        estimator(normalize=False, fit_intercept=True, **params)
-    )
+    # linear estimator in a pipeline with a StandardScaler, normalize=False
+    linear_regressor = estimator(normalize=False, fit_intercept=True, **params)
+    _scale_alpha_inplace(linear_regressor, X_train.shape[0])  # rescale alpha
+    reg_with_scaler = Pipeline([
+        ("scaler", StandardScaler(with_mean=with_mean)),
+        ("linear_regressor", linear_regressor)
+    ])
 
-    _scale_alpha_inplace(reg_with_scaler[1], X_train.shape[0])
+    fit_params = {
+        "scaler__sample_weight":  sample_weight,
+        "linear_regressor__sample_weight": sample_weight,
+    }
 
-    kwargs = {reg_with_scaler.steps[0][0] + '__sample_weight':
-              sample_weight,
-              reg_with_scaler.steps[-1][0] + '__sample_weight':
-              sample_weight}
-    reg_with_scaler.fit(X_train, y_train, **kwargs)
+    reg_with_scaler.fit(X_train, y_train, **fit_params)
 
     y_pred_norm = reg_with_normalize.predict(X_test)
     y_pred_pip = reg_with_scaler.predict(X_test)

From 32ebc775df6616e88b16517fbbb7a21de0458f13 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 18:30:30 +0100
Subject: [PATCH 40/54] more readibility improvements

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 59b7aa5133eb3..58a96446e42fb 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -471,8 +471,8 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
 
     reg_with_scaler.fit(X_train, y_train, **fit_params)
 
-    y_pred_norm = reg_with_normalize.predict(X_test)
-    y_pred_pip = reg_with_scaler.predict(X_test)
+    y_pred_nomalize = reg_with_normalize.predict(X_test)
+    y_pred_scaler = reg_with_scaler.predict(X_test)
 
     y_train_mean = np.average(y_train, weights=sample_weight)
     if is_sparse:
@@ -483,7 +483,7 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
     assert (reg_with_normalize.intercept_ ==
             pytest.approx(y_train_mean -
                           reg_with_normalize.coef_.dot(X_train_mean)))
-    assert_allclose(y_pred_norm, y_pred_pip)
+    assert_allclose(y_pred_nomalize,  y_pred_scaler)
 
 
 # FIXME: 'normalize' to be removed in 1.2

From 760285c0d3f92e3a0af75fa5d9e7b8a2b6313f4e Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 18:32:25 +0100
Subject: [PATCH 41/54] add more comments to the test

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 58a96446e42fb..349193490199d 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -471,6 +471,8 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
 
     reg_with_scaler.fit(X_train, y_train, **fit_params)
 
+    # Check that the 2 regressions models are exactly equivalent in the
+    # sense that they predict exactly the same outcome.
     y_pred_nomalize = reg_with_normalize.predict(X_test)
     y_pred_scaler = reg_with_scaler.predict(X_test)
 

From 41f32b8c21048f5d6dfc7903e45604aca0bce194 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 20:40:21 +0100
Subject: [PATCH 42/54] improve further the test readibility

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 349193490199d..096338ab5d172 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -451,7 +451,7 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
 
     sample_weight = rng.rand(X_train.shape[0])
 
-    # linear estimator with explicit sample_weight, normalize = True
+    # linear estimator with built-in feature normalization
     reg_with_normalize = estimator(normalize=True, fit_intercept=True,
                                    **params)
     reg_with_normalize.fit(X_train, y_train, sample_weight=sample_weight)
@@ -475,6 +475,7 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
     # sense that they predict exactly the same outcome.
     y_pred_nomalize = reg_with_normalize.predict(X_test)
     y_pred_scaler = reg_with_scaler.predict(X_test)
+    assert_allclose(y_pred_nomalize,  y_pred_scaler)
 
     y_train_mean = np.average(y_train, weights=sample_weight)
     if is_sparse:
@@ -485,7 +486,6 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
     assert (reg_with_normalize.intercept_ ==
             pytest.approx(y_train_mean -
                           reg_with_normalize.coef_.dot(X_train_mean)))
-    assert_allclose(y_pred_nomalize,  y_pred_scaler)
 
 
 # FIXME: 'normalize' to be removed in 1.2

From 0ab04c2a1ca8c35d587a41aa5e521a014be75891 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 20:46:06 +0100
Subject: [PATCH 43/54] move standardscaler test to the end + add explanation

---
 sklearn/linear_model/tests/test_base.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 73d137f4d981b..6551736d6056c 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -518,15 +518,22 @@ def test_preprocess_data_weighted(is_sparse):
         assert_array_almost_equal(
             Xt.toarray(), X.toarray() / expected_X_norm
         )
+    else:
+        assert_array_almost_equal(
+            Xt, (X - expected_X_mean) / expected_X_norm
+        )
+
+    # _preprocess_data with normalize=True scales the data by the feature-wise
+    # euclidean norms while StandardScaler scales the data by the feature-wise
+    # standard deviations.
+    # The two are equivalent up to a ration of np.sqrt(n_samples)
+    if is_sparse:
         scaler = StandardScaler(with_mean=False).fit(
             X, sample_weight=sample_weight)
         assert_array_almost_equal(
             scaler.transform(X).toarray() / np.sqrt(n_samples), Xt.toarray()
             )
     else:
-        assert_array_almost_equal(
-            Xt, (X - expected_X_mean) / expected_X_norm
-        )
         scaler = StandardScaler(with_mean=True).fit(
             X, sample_weight=sample_weight)
         assert_array_almost_equal(scaler.mean_, X_mean)

From d34ab61705b39fb703f4a346a717a4f9fc36ad11 Mon Sep 17 00:00:00 2001
From: Maria Telenczuk <maja_ka@hotmail.com>
Date: Thu, 11 Feb 2021 20:46:42 +0100
Subject: [PATCH 44/54] Update sklearn/linear_model/tests/test_base.py

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/linear_model/tests/test_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 6551736d6056c..e5e999b9f568c 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -466,7 +466,7 @@ def test_preprocess_data_multioutput():
 @pytest.mark.parametrize("is_sparse", [False, True])
 def test_preprocess_data_weighted(is_sparse):
     n_samples = 200
-    n_features = 2
+    n_features = 4
     # Generate random data with 50% of zero values to make sure
     # that the sparse variant of this test is actually sparse. This also
     # shifts the mean value for each columns in X further away from

From 013ca9716a89dab33eca24f7cf7f5166d1bb04a7 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 20:48:32 +0100
Subject: [PATCH 45/54] add edge test cases

---
 sklearn/linear_model/tests/test_base.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index e5e999b9f568c..7fad21af09ebe 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -477,6 +477,10 @@ def test_preprocess_data_weighted(is_sparse):
     # Scale the first feature of X to be 10 larger than the other to
     # better check the impact of feature scaling.
     X[:, 0] *= 10
+    # Constant non-zero feature
+    X[:, 2] = 1.
+    # Constant zero feature (non-materialized in the sparse case)
+    X[:, 3] = 0.
     y = rng.rand(n_samples)
 
     sample_weight = rng.rand(n_samples)

From 2f6553cec21e69ae1729acec74b4b62b8b5dedcf Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 20:53:51 +0100
Subject: [PATCH 46/54] cleaning up

---
 sklearn/linear_model/tests/test_coordinate_descent.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 096338ab5d172..132c2f3de2352 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -473,10 +473,10 @@ def test_linear_model_sample_weights_normalize_in_pipeline(
 
     # Check that the 2 regressions models are exactly equivalent in the
     # sense that they predict exactly the same outcome.
-    y_pred_nomalize = reg_with_normalize.predict(X_test)
+    y_pred_normalize = reg_with_normalize.predict(X_test)
     y_pred_scaler = reg_with_scaler.predict(X_test)
-    assert_allclose(y_pred_nomalize,  y_pred_scaler)
-
+    assert_allclose(y_pred_normalize,  y_pred_scaler)
+    # Check intercept computation when normalize is True
     y_train_mean = np.average(y_train, weights=sample_weight)
     if is_sparse:
         X_train_mean, _ = mean_variance_axis(X_train, axis=0,

From 4a8938a8d104fe4dc379b6d9655bcafddbe396f9 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 21:38:52 +0100
Subject: [PATCH 47/54] dealing with first edge case

---
 sklearn/linear_model/_base.py           | 11 +++++++----
 sklearn/linear_model/tests/test_base.py |  4 +++-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index 171f08fc6dfaf..d328033a6c8de 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -249,12 +249,15 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
         if normalize:
             X_var *= X.shape[0]
             X_scale = np.sqrt(X_var, X_var)
-
-            X_scale[X_scale == 0] = 1
+            if np.any(X_scale == 0):
+                X_scale_ = X_scale.copy()
+                X_scale_[X_scale_ == 0] = 1
+            else:
+                X_scale_ = X_scale
             if sp.issparse(X):
-                inplace_column_scale(X, 1. / X_scale)
+                inplace_column_scale(X, 1. / X_scale_)
             else:
-                X /= X_scale
+                X /= X_scale_
         else:
             X_scale = np.ones(X.shape[1], dtype=X.dtype)
 
diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 7fad21af09ebe..ddf353b524e9d 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -478,7 +478,7 @@ def test_preprocess_data_weighted(is_sparse):
     # better check the impact of feature scaling.
     X[:, 0] *= 10
     # Constant non-zero feature
-    X[:, 2] = 1.
+    # X[:, 2] = 1.
     # Constant zero feature (non-materialized in the sparse case)
     X[:, 3] = 0.
     y = rng.rand(n_samples)
@@ -517,6 +517,8 @@ def test_preprocess_data_weighted(is_sparse):
     assert_array_almost_equal(X_mean, expected_X_mean)
     assert_array_almost_equal(y_mean, expected_y_mean)
     assert_array_almost_equal(X_norm, expected_X_norm)
+
+    expected_X_norm[expected_X_norm == 0] = 1
     if is_sparse:
         # X is not centered
         assert_array_almost_equal(

From 98610658ff3f4aaaccd5298c618c0cae0c372562 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 22:21:14 +0100
Subject: [PATCH 48/54] numerical error dealt with in tests

---
 sklearn/linear_model/tests/test_base.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index ddf353b524e9d..80f93843e7efd 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -478,7 +478,7 @@ def test_preprocess_data_weighted(is_sparse):
     # better check the impact of feature scaling.
     X[:, 0] *= 10
     # Constant non-zero feature
-    # X[:, 2] = 1.
+    X[:, 2] = 1.
     # Constant zero feature (non-materialized in the sparse case)
     X[:, 3] = 0.
     y = rng.rand(n_samples)
@@ -518,15 +518,21 @@ def test_preprocess_data_weighted(is_sparse):
     assert_array_almost_equal(y_mean, expected_y_mean)
     assert_array_almost_equal(X_norm, expected_X_norm)
 
-    expected_X_norm[expected_X_norm == 0] = 1
+    if np.any(expected_X_norm == 0):
+        expected_X_norm_ = expected_X_norm.copy()
+        expected_X_norm_[expected_X_norm_ == 0] = 1
+    else:
+        expected_X_norm_ = expected_X_norm
+    # avoid roundoff errors
+    expected_X_norm_[expected_X_norm_ < 1e-10] = 1
     if is_sparse:
         # X is not centered
         assert_array_almost_equal(
-            Xt.toarray(), X.toarray() / expected_X_norm
+            Xt.toarray(), X.toarray() / expected_X_norm_
         )
     else:
         assert_array_almost_equal(
-            Xt, (X - expected_X_mean) / expected_X_norm
+            Xt, (X - expected_X_mean) / expected_X_norm_
         )
 
     # _preprocess_data with normalize=True scales the data by the feature-wise

From 0b88fd643eb83e1a32e7bfed8437de4a459f4ea6 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Thu, 11 Feb 2021 22:57:48 +0100
Subject: [PATCH 49/54] cleanup

---
 sklearn/linear_model/_base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index d328033a6c8de..f1c4212dd28a8 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -255,6 +255,7 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
             else:
                 X_scale_ = X_scale
             if sp.issparse(X):
+                # import pdb; pdb.set_trace()
                 inplace_column_scale(X, 1. / X_scale_)
             else:
                 X /= X_scale_

From 19548adf86ba3c907891ec4dde7464db58ee6c91 Mon Sep 17 00:00:00 2001
From: maikia <telenczukm@gmail.com>
Date: Fri, 12 Feb 2021 22:59:03 +0100
Subject: [PATCH 50/54] comment out troublesome edge test case

---
 sklearn/linear_model/tests/test_base.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 80f93843e7efd..1775a02fdbe7a 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -478,7 +478,8 @@ def test_preprocess_data_weighted(is_sparse):
     # better check the impact of feature scaling.
     X[:, 0] *= 10
     # Constant non-zero feature
-    X[:, 2] = 1.
+    # X[:, 2] = 1. # this edge case is not passing for sparse data because of
+    # the roundoff error and should be addressed elsewhere
     # Constant zero feature (non-materialized in the sparse case)
     X[:, 3] = 0.
     y = rng.rand(n_samples)
@@ -518,13 +519,13 @@ def test_preprocess_data_weighted(is_sparse):
     assert_array_almost_equal(y_mean, expected_y_mean)
     assert_array_almost_equal(X_norm, expected_X_norm)
 
-    if np.any(expected_X_norm == 0):
+    # avoid roundoff errors and division by 0
+    if np.any(expected_X_norm < 5e-15):
         expected_X_norm_ = expected_X_norm.copy()
-        expected_X_norm_[expected_X_norm_ == 0] = 1
+        expected_X_norm_[expected_X_norm_ < 5e-15] = 1
     else:
         expected_X_norm_ = expected_X_norm
-    # avoid roundoff errors
-    expected_X_norm_[expected_X_norm_ < 1e-10] = 1
+
     if is_sparse:
         # X is not centered
         assert_array_almost_equal(
@@ -542,6 +543,7 @@ def test_preprocess_data_weighted(is_sparse):
     if is_sparse:
         scaler = StandardScaler(with_mean=False).fit(
             X, sample_weight=sample_weight)
+
         assert_array_almost_equal(
             scaler.transform(X).toarray() / np.sqrt(n_samples), Xt.toarray()
             )

From 7cd9e517ad37fa3fc687b546e7ced0472028e2e5 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Fri, 19 Feb 2021 18:32:59 +0100
Subject: [PATCH 51/54] Fix handling of near constant features

---
 sklearn/linear_model/_base.py           | 17 +++---
 sklearn/linear_model/tests/test_base.py | 74 ++++++++++++-------------
 2 files changed, 42 insertions(+), 49 deletions(-)

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index f1c4212dd28a8..d804fbda1e68a 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -244,21 +244,18 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
             X_offset = X_offset.astype(X.dtype)
             X -= X_offset
 
-        X_var = X_var.astype(X.dtype)
+        X_var = X_var.astype(X.dtype, copy=False)
 
         if normalize:
             X_var *= X.shape[0]
-            X_scale = np.sqrt(X_var, X_var)
-            if np.any(X_scale == 0):
-                X_scale_ = X_scale.copy()
-                X_scale_[X_scale_ == 0] = 1
-            else:
-                X_scale_ = X_scale
+            X_scale = np.sqrt(X_var, out=X_var)
+            near_zero_mask = X_scale < np.finfo(X_scale.dtype).eps
+            if np.any(near_zero_mask):
+                X_scale[near_zero_mask] = 1
             if sp.issparse(X):
-                # import pdb; pdb.set_trace()
-                inplace_column_scale(X, 1. / X_scale_)
+                inplace_column_scale(X, 1. / X_scale)
             else:
-                X /= X_scale_
+                X /= X_scale
         else:
             X_scale = np.ones(X.shape[1], dtype=X.dtype)
 
diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 1775a02fdbe7a..0d0f546118090 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -409,31 +409,31 @@ def test_preprocess_data():
     X = rng.rand(n_samples, n_features)
     y = rng.rand(n_samples)
     expected_X_mean = np.mean(X, axis=0)
-    expected_X_norm = np.std(X, axis=0) * np.sqrt(X.shape[0])
+    expected_X_scale = np.std(X, axis=0) * np.sqrt(X.shape[0])
     expected_y_mean = np.mean(y, axis=0)
 
-    Xt, yt, X_mean, y_mean, X_norm = \
+    Xt, yt, X_mean, y_mean, X_scale = \
         _preprocess_data(X, y, fit_intercept=False, normalize=False)
     assert_array_almost_equal(X_mean, np.zeros(n_features))
     assert_array_almost_equal(y_mean, 0)
-    assert_array_almost_equal(X_norm, np.ones(n_features))
+    assert_array_almost_equal(X_scale, np.ones(n_features))
     assert_array_almost_equal(Xt, X)
     assert_array_almost_equal(yt, y)
 
-    Xt, yt, X_mean, y_mean, X_norm = \
+    Xt, yt, X_mean, y_mean, X_scale = \
         _preprocess_data(X, y, fit_intercept=True, normalize=False)
     assert_array_almost_equal(X_mean, expected_X_mean)
     assert_array_almost_equal(y_mean, expected_y_mean)
-    assert_array_almost_equal(X_norm, np.ones(n_features))
+    assert_array_almost_equal(X_scale, np.ones(n_features))
     assert_array_almost_equal(Xt, X - expected_X_mean)
     assert_array_almost_equal(yt, y - expected_y_mean)
 
-    Xt, yt, X_mean, y_mean, X_norm = \
+    Xt, yt, X_mean, y_mean, X_scale = \
         _preprocess_data(X, y, fit_intercept=True, normalize=True)
     assert_array_almost_equal(X_mean, expected_X_mean)
     assert_array_almost_equal(y_mean, expected_y_mean)
-    assert_array_almost_equal(X_norm, expected_X_norm)
-    assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm)
+    assert_array_almost_equal(X_scale, expected_X_scale)
+    assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_scale)
     assert_array_almost_equal(yt, y - expected_y_mean)
 
 
@@ -492,18 +492,21 @@ def test_preprocess_data_weighted(is_sparse):
     X_sample_weight_var = np.average((X - X_sample_weight_avg)**2,
                                      weights=sample_weight,
                                      axis=0)
-    expected_X_norm = np.sqrt(X_sample_weight_var) * np.sqrt(n_samples)
+    expected_X_scale = np.sqrt(X_sample_weight_var) * np.sqrt(n_samples)
+
+    # near constant fetures should not be scaled
+    expected_X_scale[expected_X_scale < 1e-15] = 1
 
     if is_sparse:
         X = sparse.csr_matrix(X)
 
     # normalize is False
-    Xt, yt, X_mean, y_mean, X_norm = \
+    Xt, yt, X_mean, y_mean, X_scale = \
         _preprocess_data(X, y, fit_intercept=True, normalize=False,
                          sample_weight=sample_weight, return_mean=True)
     assert_array_almost_equal(X_mean, expected_X_mean)
     assert_array_almost_equal(y_mean, expected_y_mean)
-    assert_array_almost_equal(X_norm, np.ones(n_features))
+    assert_array_almost_equal(X_scale, np.ones(n_features))
     if is_sparse:
         assert_array_almost_equal(Xt.toarray(), X.toarray())
     else:
@@ -511,29 +514,22 @@ def test_preprocess_data_weighted(is_sparse):
     assert_array_almost_equal(yt, y - expected_y_mean)
 
     # normalize is True
-    Xt, yt, X_mean, y_mean, X_norm = \
+    Xt, yt, X_mean, y_mean, X_scale = \
         _preprocess_data(X, y, fit_intercept=True, normalize=True,
                          sample_weight=sample_weight, return_mean=True)
 
     assert_array_almost_equal(X_mean, expected_X_mean)
     assert_array_almost_equal(y_mean, expected_y_mean)
-    assert_array_almost_equal(X_norm, expected_X_norm)
-
-    # avoid roundoff errors and division by 0
-    if np.any(expected_X_norm < 5e-15):
-        expected_X_norm_ = expected_X_norm.copy()
-        expected_X_norm_[expected_X_norm_ < 5e-15] = 1
-    else:
-        expected_X_norm_ = expected_X_norm
+    assert_array_almost_equal(X_scale, expected_X_scale)
 
     if is_sparse:
         # X is not centered
         assert_array_almost_equal(
-            Xt.toarray(), X.toarray() / expected_X_norm_
+            Xt.toarray(), X.toarray() / expected_X_scale
         )
     else:
         assert_array_almost_equal(
-            Xt, (X - expected_X_mean) / expected_X_norm_
+            Xt, (X - expected_X_mean) / expected_X_scale
         )
 
     # _preprocess_data with normalize=True scales the data by the feature-wise
@@ -563,33 +559,33 @@ def test_sparse_preprocess_data_with_return_mean():
     X = X.tolil()
     y = rng.rand(n_samples)
     XA = X.toarray()
-    expected_X_norm = np.std(XA, axis=0) * np.sqrt(X.shape[0])
+    expected_X_scale = np.std(XA, axis=0) * np.sqrt(X.shape[0])
 
-    Xt, yt, X_mean, y_mean, X_norm = \
+    Xt, yt, X_mean, y_mean, X_scale = \
         _preprocess_data(X, y, fit_intercept=False, normalize=False,
                          return_mean=True)
     assert_array_almost_equal(X_mean, np.zeros(n_features))
     assert_array_almost_equal(y_mean, 0)
-    assert_array_almost_equal(X_norm, np.ones(n_features))
+    assert_array_almost_equal(X_scale, np.ones(n_features))
     assert_array_almost_equal(Xt.A, XA)
     assert_array_almost_equal(yt, y)
 
-    Xt, yt, X_mean, y_mean, X_norm = \
+    Xt, yt, X_mean, y_mean, X_scale = \
         _preprocess_data(X, y, fit_intercept=True, normalize=False,
                          return_mean=True)
     assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
     assert_array_almost_equal(y_mean, np.mean(y, axis=0))
-    assert_array_almost_equal(X_norm, np.ones(n_features))
+    assert_array_almost_equal(X_scale, np.ones(n_features))
     assert_array_almost_equal(Xt.A, XA)
     assert_array_almost_equal(yt, y - np.mean(y, axis=0))
 
-    Xt, yt, X_mean, y_mean, X_norm = \
+    Xt, yt, X_mean, y_mean, X_scale = \
         _preprocess_data(X, y, fit_intercept=True, normalize=True,
                          return_mean=True)
     assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
     assert_array_almost_equal(y_mean, np.mean(y, axis=0))
-    assert_array_almost_equal(X_norm, expected_X_norm)
-    assert_array_almost_equal(Xt.A, XA / expected_X_norm)
+    assert_array_almost_equal(X_scale, expected_X_scale)
+    assert_array_almost_equal(Xt.A, XA / expected_X_scale)
     assert_array_almost_equal(yt, y - np.mean(y, axis=0))
 
 
@@ -638,19 +634,19 @@ def test_dtype_preprocess_data():
     for fit_intercept in [True, False]:
         for normalize in [True, False]:
 
-            Xt_32, yt_32, X_mean_32, y_mean_32, X_norm_32 = _preprocess_data(
+            Xt_32, yt_32, X_mean_32, y_mean_32, X_scale_32 = _preprocess_data(
                 X_32, y_32, fit_intercept=fit_intercept, normalize=normalize,
                 return_mean=True)
 
-            Xt_64, yt_64, X_mean_64, y_mean_64, X_norm_64 = _preprocess_data(
+            Xt_64, yt_64, X_mean_64, y_mean_64, X_scale_64 = _preprocess_data(
                 X_64, y_64, fit_intercept=fit_intercept, normalize=normalize,
                 return_mean=True)
 
-            Xt_3264, yt_3264, X_mean_3264, y_mean_3264, X_norm_3264 = (
+            Xt_3264, yt_3264, X_mean_3264, y_mean_3264, X_scale_3264 = (
                 _preprocess_data(X_32, y_64, fit_intercept=fit_intercept,
                                  normalize=normalize, return_mean=True))
 
-            Xt_6432, yt_6432, X_mean_6432, y_mean_6432, X_norm_6432 = (
+            Xt_6432, yt_6432, X_mean_6432, y_mean_6432, X_scale_6432 = (
                 _preprocess_data(X_64, y_32, fit_intercept=fit_intercept,
                                  normalize=normalize, return_mean=True))
 
@@ -658,25 +654,25 @@ def test_dtype_preprocess_data():
             assert yt_32.dtype == np.float32
             assert X_mean_32.dtype == np.float32
             assert y_mean_32.dtype == np.float32
-            assert X_norm_32.dtype == np.float32
+            assert X_scale_32.dtype == np.float32
 
             assert Xt_64.dtype == np.float64
             assert yt_64.dtype == np.float64
             assert X_mean_64.dtype == np.float64
             assert y_mean_64.dtype == np.float64
-            assert X_norm_64.dtype == np.float64
+            assert X_scale_64.dtype == np.float64
 
             assert Xt_3264.dtype == np.float32
             assert yt_3264.dtype == np.float32
             assert X_mean_3264.dtype == np.float32
             assert y_mean_3264.dtype == np.float32
-            assert X_norm_3264.dtype == np.float32
+            assert X_scale_3264.dtype == np.float32
 
             assert Xt_6432.dtype == np.float64
             assert yt_6432.dtype == np.float64
             assert X_mean_6432.dtype == np.float64
             assert y_mean_6432.dtype == np.float64
-            assert X_norm_6432.dtype == np.float64
+            assert X_scale_6432.dtype == np.float64
 
             assert X_32.dtype == np.float32
             assert y_32.dtype == np.float32
@@ -687,7 +683,7 @@ def test_dtype_preprocess_data():
             assert_array_almost_equal(yt_32, yt_64)
             assert_array_almost_equal(X_mean_32, X_mean_64)
             assert_array_almost_equal(y_mean_32, y_mean_64)
-            assert_array_almost_equal(X_norm_32, X_norm_64)
+            assert_array_almost_equal(X_scale_32, X_scale_64)
 
 
 @pytest.mark.parametrize('n_targets', [None, 2])

From 596c036f9834591c3faf8453c5da91ed4182a829 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Fri, 19 Feb 2021 18:42:49 +0100
Subject: [PATCH 52/54] Update sklearn/linear_model/tests/test_base.py

---
 sklearn/linear_model/tests/test_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 0d0f546118090..fb140235d2a62 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -535,7 +535,7 @@ def test_preprocess_data_weighted(is_sparse):
     # _preprocess_data with normalize=True scales the data by the feature-wise
     # euclidean norms while StandardScaler scales the data by the feature-wise
     # standard deviations.
-    # The two are equivalent up to a ration of np.sqrt(n_samples)
+    # The two are equivalent up to a ratio of np.sqrt(n_samples)
     if is_sparse:
         scaler = StandardScaler(with_mean=False).fit(
             X, sample_weight=sample_weight)

From 1c8956466cdd301c25485214b8f89392ab454972 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Sat, 20 Feb 2021 00:27:03 +0100
Subject: [PATCH 53/54] Reenable failing edge case

---
 sklearn/linear_model/_base.py           | 5 +----
 sklearn/linear_model/tests/test_base.py | 7 +++----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index d804fbda1e68a..61005cb4b5d4a 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -232,7 +232,6 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
             X_offset, X_var = mean_variance_axis(
                 X, axis=0, weights=sample_weight
             )
-
             if not return_mean:
                 X_offset[:] = X.dtype.type(0)
         else:
@@ -249,9 +248,7 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
         if normalize:
             X_var *= X.shape[0]
             X_scale = np.sqrt(X_var, out=X_var)
-            near_zero_mask = X_scale < np.finfo(X_scale.dtype).eps
-            if np.any(near_zero_mask):
-                X_scale[near_zero_mask] = 1
+            X_scale[X_scale < 10 * np.finfo(X_scale.dtype).eps] = 1.
             if sp.issparse(X):
                 inplace_column_scale(X, 1. / X_scale)
             else:
diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index fb140235d2a62..06266711d80aa 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -478,8 +478,7 @@ def test_preprocess_data_weighted(is_sparse):
     # better check the impact of feature scaling.
     X[:, 0] *= 10
     # Constant non-zero feature
-    # X[:, 2] = 1. # this edge case is not passing for sparse data because of
-    # the roundoff error and should be addressed elsewhere
+    X[:, 2] = 1.
     # Constant zero feature (non-materialized in the sparse case)
     X[:, 3] = 0.
     y = rng.rand(n_samples)
@@ -494,8 +493,8 @@ def test_preprocess_data_weighted(is_sparse):
                                      axis=0)
     expected_X_scale = np.sqrt(X_sample_weight_var) * np.sqrt(n_samples)
 
-    # near constant fetures should not be scaled
-    expected_X_scale[expected_X_scale < 1e-15] = 1
+    # near constant features should not be scaled
+    expected_X_scale[expected_X_scale < 10 * np.finfo(np.float64).eps] = 1
 
     if is_sparse:
         X = sparse.csr_matrix(X)

From b0a9090efa294cafe7eee9e77da51ba236c64909 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 22 Feb 2021 11:12:54 +0100
Subject: [PATCH 54/54] Disable constant non-zero edge case in tests

---
 sklearn/linear_model/tests/test_base.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 06266711d80aa..56ee18f5f0d06 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -477,8 +477,12 @@ def test_preprocess_data_weighted(is_sparse):
     # Scale the first feature of X to be 10 larger than the other to
     # better check the impact of feature scaling.
     X[:, 0] *= 10
-    # Constant non-zero feature
-    X[:, 2] = 1.
+
+    # Constant non-zero feature: this edge-case is currently not handled
+    # correctly for sparse data, see:
+    # https://github.com/scikit-learn/scikit-learn/issues/19450
+    # X[:, 2] = 1.
+
     # Constant zero feature (non-materialized in the sparse case)
     X[:, 3] = 0.
     y = rng.rand(n_samples)