scikit-learn
diff --git a/‎doc/whats_new/v1.1.rst
Lines changed: 8 additions & 3 deletions b/‎doc/whats_new/v1.1.rst
Lines changed: 8 additions & 3 deletions
diff --git a/‎sklearn/linear_model/_base.py
Lines changed: 6 additions & 11 deletions b/‎sklearn/linear_model/_base.py
Lines changed: 6 additions & 11 deletions
diff --git a/‎sklearn/linear_model/_bayes.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/linear_model/_bayes.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/linear_model/_ridge.py
Lines changed: 33 additions & 10 deletions b/‎sklearn/linear_model/_ridge.py
Lines changed: 33 additions & 10 deletions
diff --git a/‎sklearn/linear_model/tests/test_base.py
Lines changed: 4 additions & 4 deletions b/‎sklearn/linear_model/tests/test_base.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎sklearn/linear_model/tests/test_ridge.py
Lines changed: 38 additions & 24 deletions b/‎sklearn/linear_model/tests/test_ridge.py
Lines changed: 38 additions & 24 deletions
@@ -618,9 +618,14 @@ Changelog
   :class:`linear_model.ARDRegression` now preserve float32 dtype. :pr:`9087` by
   :user:`Arthur Imbert <Henley13>` and :pr:`22525` by :user:`Meekail Zain <micky774>`.
 
-- |Fix| The `intercept_` attribute of :class:`LinearRegression` is now correctly
-  computed in the presence of sample weights when the input is sparse.
-  :pr:`22891` by :user:`Jérémie du Boisberranger <jeremiedbb>`. 
+- |Fix| The `coef_` and `intercept_` attributes of :class:`LinearRegression` are now
+  correctly computed in the presence of sample weights when the input is sparse.
+  :pr:`22891` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+
+- |Fix| The `coef_` and `intercept_` attributes of :class:`Ridge` with
+  `solver="sparse_cg"` and `solver="lbfgs"` are now correctly computed in the presence
+  of sample weights when the input is sparse.
+  :pr:`22899` by :user:`Jérémie du Boisberranger <jeremiedbb>`. 
 
 :mod:`sklearn.manifold`
 .......................
 
@@ -325,14 +325,11 @@ def _preprocess_data(
 # sample_weight makes the refactoring tricky.
 
 
-def _rescale_data(X, y, sample_weight, sqrt_sample_weight=True):
+def _rescale_data(X, y, sample_weight):
     """Rescale data sample-wise by square root of sample_weight.
 
     For many linear models, this enables easy support for sample_weight.
 
-    Set sqrt_sample_weight=False if the square root of the sample weights has already
-    been done prior to calling this function.
-
     Returns
     -------
     X_rescaled : {array-like, sparse matrix}
@@ -343,12 +340,11 @@ def _rescale_data(X, y, sample_weight, sqrt_sample_weight=True):
     sample_weight = np.asarray(sample_weight)
     if sample_weight.ndim == 0:
         sample_weight = np.full(n_samples, sample_weight, dtype=sample_weight.dtype)
-    if sqrt_sample_weight:
-        sample_weight = np.sqrt(sample_weight)
-    sw_matrix = sparse.dia_matrix((sample_weight, 0), shape=(n_samples, n_samples))
+    sample_weight_sqrt = np.sqrt(sample_weight)
+    sw_matrix = sparse.dia_matrix((sample_weight_sqrt, 0), shape=(n_samples, n_samples))
     X = safe_sparse_dot(sw_matrix, X)
     y = safe_sparse_dot(sw_matrix, y)
-    return X, y
+    return X, y, sample_weight_sqrt
 
 
 class LinearModel(BaseEstimator, metaclass=ABCMeta):
@@ -695,8 +691,7 @@ def fit(self, X, y, sample_weight=None):
         )
 
         # Sample weight can be implemented via a simple rescaling.
-        sample_weight_sqrt = np.sqrt(sample_weight)
-        X, y = _rescale_data(X, y, sample_weight_sqrt, sqrt_sample_weight=False)
+        X, y, sample_weight_sqrt = _rescale_data(X, y, sample_weight)
 
         if self.positive:
             if y.ndim < 2:
@@ -844,7 +839,7 @@ def _pre_fit(
             sample_weight=sample_weight,
         )
     if sample_weight is not None:
-        X, y = _rescale_data(X, y, sample_weight=sample_weight)
+        X, y, _ = _rescale_data(X, y, sample_weight=sample_weight)
 
     # FIXME: 'normalize' to be removed in 1.2
     if hasattr(precompute, "__array__"):
 
@@ -253,7 +253,7 @@ def fit(self, X, y, sample_weight=None):
 
         if sample_weight is not None:
             # Sample weight can be implemented via a simple rescaling.
-            X, y = _rescale_data(X, y, sample_weight)
+            X, y, _ = _rescale_data(X, y, sample_weight)
 
         self.X_offset_ = X_offset_
         self.X_scale_ = X_scale_
 
@@ -41,17 +41,28 @@
 
 
 def _solve_sparse_cg(
-    X, y, alpha, max_iter=None, tol=1e-3, verbose=0, X_offset=None, X_scale=None
+    X,
+    y,
+    alpha,
+    max_iter=None,
+    tol=1e-3,
+    verbose=0,
+    X_offset=None,
+    X_scale=None,
+    sample_weight_sqrt=None,
 ):
+    if sample_weight_sqrt is None:
+        sample_weight_sqrt = np.ones(X.shape[0], dtype=X.dtype)
+
     def _get_rescaled_operator(X):
 
         X_offset_scale = X_offset / X_scale
 
         def matvec(b):
-            return X.dot(b) - b.dot(X_offset_scale)
+            return X.dot(b) - sample_weight_sqrt * b.dot(X_offset_scale)
 
         def rmatvec(b):
-            return X.T.dot(b) - X_offset_scale * np.sum(b)
+            return X.T.dot(b) - X_offset_scale * b.dot(sample_weight_sqrt)
 
         X1 = sparse.linalg.LinearOperator(shape=X.shape, matvec=matvec, rmatvec=rmatvec)
         return X1
@@ -241,7 +252,15 @@ def _solve_svd(X, y, alpha):
 
 
 def _solve_lbfgs(
-    X, y, alpha, positive=True, max_iter=None, tol=1e-3, X_offset=None, X_scale=None
+    X,
+    y,
+    alpha,
+    positive=True,
+    max_iter=None,
+    tol=1e-3,
+    X_offset=None,
+    X_scale=None,
+    sample_weight_sqrt=None,
 ):
     """Solve ridge regression with LBFGS.
 
@@ -269,6 +288,9 @@ def _solve_lbfgs(
     else:
         X_offset_scale = None
 
+    if sample_weight_sqrt is None:
+        sample_weight_sqrt = np.ones(X.shape[0], dtype=X.dtype)
+
     coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)
 
     for i in range(y.shape[1]):
@@ -278,11 +300,11 @@ def _solve_lbfgs(
         def func(w):
             residual = X.dot(w) - y_column
             if X_offset_scale is not None:
-                residual -= w.dot(X_offset_scale)
+                residual -= sample_weight_sqrt * w.dot(X_offset_scale)
             f = 0.5 * residual.dot(residual) + 0.5 * alpha[i] * w.dot(w)
             grad = X.T @ residual + alpha[i] * w
             if X_offset_scale is not None:
-                grad -= X_offset_scale * np.sum(residual)
+                grad -= X_offset_scale * residual.dot(sample_weight_sqrt)
 
             return f, grad
 
@@ -568,7 +590,7 @@ def _ridge_regression(
         if solver not in ["sag", "saga"]:
             # SAG supports sample_weight directly. For other solvers,
             # we implement sample_weight via a simple rescaling.
-            X, y = _rescale_data(X, y, sample_weight)
+            X, y, sample_weight_sqrt = _rescale_data(X, y, sample_weight)
 
     # Some callers of this method might pass alpha as single
     # element array which already has been validated.
@@ -603,6 +625,7 @@ def _ridge_regression(
             verbose=verbose,
             X_offset=X_offset,
             X_scale=X_scale,
+            sample_weight_sqrt=sample_weight_sqrt if has_sw else None,
         )
 
     elif solver == "lsqr":
@@ -673,6 +696,7 @@ def _ridge_regression(
             max_iter=max_iter,
             X_offset=X_offset,
             X_scale=X_scale,
+            sample_weight_sqrt=sample_weight_sqrt if has_sw else None,
         )
 
     if solver == "svd":
@@ -804,7 +828,7 @@ def fit(self, X, y, sample_weight=None):
 
         else:
             if sparse.issparse(X) and self.fit_intercept:
-                # required to fit intercept with sparse_cg solver
+                # required to fit intercept with sparse_cg and lbfgs solver
                 params = {"X_offset": X_offset, "X_scale": X_scale}
             else:
                 # for dense matrices or when intercept is set to 0
@@ -1910,8 +1934,7 @@ def fit(self, X, y, sample_weight=None):
         n_samples = X.shape[0]
 
         if sample_weight is not None:
-            X, y = _rescale_data(X, y, sample_weight)
-            sqrt_sw = np.sqrt(sample_weight)
+            X, y, sqrt_sw = _rescale_data(X, y, sample_weight)
         else:
             sqrt_sw = np.ones(n_samples, dtype=X.dtype)
 
 
@@ -692,12 +692,12 @@ def test_rescale_data_dense(n_targets):
         y = rng.rand(n_samples)
     else:
         y = rng.rand(n_samples, n_targets)
-    rescaled_X, rescaled_y = _rescale_data(X, y, sample_weight)
-    rescaled_X2 = X * np.sqrt(sample_weight)[:, np.newaxis]
+    rescaled_X, rescaled_y, sqrt_sw = _rescale_data(X, y, sample_weight)
+    rescaled_X2 = X * sqrt_sw[:, np.newaxis]
     if n_targets is None:
-        rescaled_y2 = y * np.sqrt(sample_weight)
+        rescaled_y2 = y * sqrt_sw
     else:
-        rescaled_y2 = y * np.sqrt(sample_weight)[:, np.newaxis]
+        rescaled_y2 = y * sqrt_sw[:, np.newaxis]
     assert_array_almost_equal(rescaled_X, rescaled_X2)
     assert_array_almost_equal(rescaled_y, rescaled_y2)
 
 
@@ -1363,33 +1363,41 @@ def test_n_iter():
 
 
 @pytest.mark.parametrize("solver", ["sparse_cg", "lbfgs", "auto"])
-def test_ridge_fit_intercept_sparse(solver):
+@pytest.mark.parametrize("with_sample_weight", [True, False])
+def test_ridge_fit_intercept_sparse(solver, with_sample_weight, global_random_seed):
+    """Check that ridge finds the same coefs and intercept on dense and sparse input
+    in the presence of sample weights.
+
+    For now only sparse_cg and lbfgs can correctly fit an intercept
+    with sparse X with default tol and max_iter.
+    'sag' is tested separately in test_ridge_fit_intercept_sparse_sag because it
+    requires more iterations and should raise a warning if default max_iter is used.
+    Other solvers raise an exception, as checked in
+    test_ridge_fit_intercept_sparse_error
+    """
     positive = solver == "lbfgs"
     X, y = _make_sparse_offset_regression(
-        n_features=20, random_state=0, positive=positive
+        n_features=20, random_state=global_random_seed, positive=positive
     )
-    X_csr = sp.csr_matrix(X)
 
-    # for now only sparse_cg and lbfgs can correctly fit an intercept
-    # with sparse X with default tol and max_iter.
-    # sag is tested separately in test_ridge_fit_intercept_sparse_sag
-    # because it requires more iterations and should raise a warning if default
-    # max_iter is used.
-    # other solvers raise an exception, as checked in
-    # test_ridge_fit_intercept_sparse_error
-    #
+    sample_weight = None
+    if with_sample_weight:
+        rng = np.random.RandomState(global_random_seed)
+        sample_weight = 1.0 + rng.uniform(size=X.shape[0])
+
     # "auto" should switch to "sparse_cg" when X is sparse
     # so the reference we use for both ("auto" and "sparse_cg") is
     # Ridge(solver="sparse_cg"), fitted using the dense representation (note
     # that "sparse_cg" can fit sparse or dense data)
-    dense_ridge = Ridge(solver="sparse_cg", tol=1e-12)
+    dense_solver = "sparse_cg" if solver == "auto" else solver
+    dense_ridge = Ridge(solver=dense_solver, tol=1e-12, positive=positive)
     sparse_ridge = Ridge(solver=solver, tol=1e-12, positive=positive)
-    dense_ridge.fit(X, y)
-    with warnings.catch_warnings():
-        warnings.simplefilter("error", UserWarning)
-        sparse_ridge.fit(X_csr, y)
-    assert np.allclose(dense_ridge.intercept_, sparse_ridge.intercept_)
-    assert np.allclose(dense_ridge.coef_, sparse_ridge.coef_)
+
+    dense_ridge.fit(X, y, sample_weight=sample_weight)
+    sparse_ridge.fit(sp.csr_matrix(X), y, sample_weight=sample_weight)
+
+    assert_allclose(dense_ridge.intercept_, sparse_ridge.intercept_)
+    assert_allclose(dense_ridge.coef_, sparse_ridge.coef_)
 
 
 @pytest.mark.parametrize("solver", ["saga", "lsqr", "svd", "cholesky"])
@@ -1402,23 +1410,29 @@ def test_ridge_fit_intercept_sparse_error(solver):
         sparse_ridge.fit(X_csr, y)
 
 
-def test_ridge_fit_intercept_sparse_sag():
+@pytest.mark.parametrize("with_sample_weight", [True, False])
+def test_ridge_fit_intercept_sparse_sag(with_sample_weight, global_random_seed):
     X, y = _make_sparse_offset_regression(
-        n_features=5, n_samples=20, random_state=0, X_offset=5.0
+        n_features=5, n_samples=20, random_state=global_random_seed, X_offset=5.0
     )
+    if with_sample_weight:
+        rng = np.random.RandomState(global_random_seed)
+        sample_weight = 1.0 + rng.uniform(size=X.shape[0])
+    else:
+        sample_weight = None
     X_csr = sp.csr_matrix(X)
 
     params = dict(
         alpha=1.0, solver="sag", fit_intercept=True, tol=1e-10, max_iter=100000
     )
     dense_ridge = Ridge(**params)
     sparse_ridge = Ridge(**params)
-    dense_ridge.fit(X, y)
+    dense_ridge.fit(X, y, sample_weight=sample_weight)
     with warnings.catch_warnings():
         warnings.simplefilter("error", UserWarning)
-        sparse_ridge.fit(X_csr, y)
-    assert np.allclose(dense_ridge.intercept_, sparse_ridge.intercept_, rtol=1e-4)
-    assert np.allclose(dense_ridge.coef_, sparse_ridge.coef_, rtol=1e-4)
+        sparse_ridge.fit(X_csr, y, sample_weight=sample_weight)
+    assert_allclose(dense_ridge.intercept_, sparse_ridge.intercept_, rtol=1e-4)
+    assert_allclose(dense_ridge.coef_, sparse_ridge.coef_, rtol=1e-4)
     with pytest.warns(UserWarning, match='"sag" solver requires.*'):
         Ridge(solver="sag").fit(X_csr, y)