scikit-learn · lorentzenchr · Nov 4, 2022 · Nov 6, 2022 · Nov 19, 2022 · Jan 23, 2023
diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
@@ -264,6 +264,17 @@ Changelog
 :mod:`sklearn.linear_model`
 ...........................
 
+- |Enhancement| :class:`linear_model.LogisticRegression`,
+  :class:`linear_model.LogisticRegressionCV`, :class:`linear_model.GammaRegressor`,
+  :class:`linear_model.PoissonRegressor` and :class:`linear_model.TweedieRegressor` got
+  a new solver `solver="newton-lsmr"`. This is a 2nd order (Newton) optimisation
+  routine that uses the iterative LSMR algorithm: to find the Newton direction in each
+  step, the 2nd order equation is cast as a least squares problem and solved
+  iteratively, therefore called iteratively reweighted least squares (IRLS), via LSMR.
+  Due to using LSMR, only matrix-vector multiplications are used and sparse matrices
+  are supported as well. Especially for multiclass problems it might be worth a try.
+  :pr:`25462` by :user:`Christian Lorentzen <lorentzenchr>`.
+
 - |Fix| :class:`linear_model.ElasticNet`, :class:`linear_model.ElasticNetCV`,
   :class:`linear_model.Lasso` and :class:`linear_model.LassoCV` now explicitly don't
   accept large sparse data formats. :pr:`27576` by :user:`Stefanie Senger

diff --git a/sklearn/linear_model/_glm/_newton_solver.py b/sklearn/linear_model/_glm/_newton_solver.py
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
@@ -25,7 +25,12 @@
 from ...utils.optimize import _check_optimize_result
 from ...utils.validation import _check_sample_weight, check_is_fitted
 from .._linear_loss import LinearModelLoss
-from ._newton_solver import NewtonCholeskySolver, NewtonSolver
+from ._newton_solver import NewtonCholeskySolver, NewtonLSMRSolver, NewtonSolver
+
+NEWTON_SOLVER = {
+    "newton-cholesky": NewtonCholeskySolver,
+    "newton-lsmr": NewtonLSMRSolver,
+}
 
 
 class _GeneralizedLinearRegressor(RegressorMixin, BaseEstimator):
@@ -65,7 +70,7 @@ class _GeneralizedLinearRegressor(RegressorMixin, BaseEstimator):
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X @ coef + intercept).
 
-    solver : {'lbfgs', 'newton-cholesky'}, default='lbfgs'
+    solver : {'lbfgs', 'newton-cholesky', 'newton-lsmr'}, default='lbfgs'
         Algorithm to use in the optimization problem:
 
         'lbfgs'
@@ -81,6 +86,20 @@ class _GeneralizedLinearRegressor(RegressorMixin, BaseEstimator):
 
             .. versionadded:: 1.2
 
+        'newton-lsmr'
+            Uses Newton-Raphson steps formulated as iteratively reweighted least
+            squares (IRLS), which is solved by LSMR. Contrary to `newton-cholesky`,
+            this solver does not explicitly materialize the Hessian matrix but instead
+            leverages knowledge about its structure to incrementally solve the least
+            squares problems via a series of matrix vector operations where the
+            matrices have block structure with block sizes scaling as
+            `(n_samples, n_features)` and `(n_samples, n_classes)` therefore limiting
+            the memory requirements.
+            Additionaly, this is numerically more stable for ill-conditioned X compared
+            to `newton-cholesky`.
+
+            .. versionadded:: 1.5
+
     max_iter : int, default=100
         The maximal number of iterations for the solver.
         Values must be in the range `[1, inf)`.
@@ -140,7 +159,7 @@ class _GeneralizedLinearRegressor(RegressorMixin, BaseEstimator):
         "alpha": [Interval(Real, 0.0, None, closed="left")],
         "fit_intercept": ["boolean"],
         "solver": [
-            StrOptions({"lbfgs", "newton-cholesky"}),
+            StrOptions({"lbfgs", "newton-cholesky", "newton-lsmr"}),
             Hidden(type),
         ],
         "max_iter": [Interval(Integral, 1, None, closed="left")],
@@ -284,8 +303,8 @@ def fit(self, X, y, sample_weight=None):
             )
             self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
             coef = opt_res.x
-        elif self.solver == "newton-cholesky":
-            sol = NewtonCholeskySolver(
+        elif self.solver in NEWTON_SOLVER.keys():
+            sol = NEWTON_SOLVER[self.solver](
                 coef=coef,
                 linear_loss=linear_loss,
                 l2_reg_strength=l2_reg_strength,
@@ -483,7 +502,7 @@ class PoissonRegressor(_GeneralizedLinearRegressor):
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (`X @ coef + intercept`).
 
-    solver : {'lbfgs', 'newton-cholesky'}, default='lbfgs'
+    solver : {'lbfgs', 'newton-cholesky', 'newton-lsmr'}, default='lbfgs'
         Algorithm to use in the optimization problem:
 
         'lbfgs'
@@ -499,6 +518,18 @@ class PoissonRegressor(_GeneralizedLinearRegressor):
 
             .. versionadded:: 1.2
 
+        'newton-lsmr'
+            Uses Newton-Raphson steps formulated as iteratively reweighted least
+            squares (IRLS), which is solved by LSMR. Contrary to `newton-cholesky`,
+            this solver does not explicitly materialize the Hessian matrix but instead
+            leverages knowledge about its structure to incrementally solve the least
+            squares problems via a series of matrix vector operations where the
+            matrices have size `(n_samples, n_features)`.
+            Additionaly, this is numerically more stable for ill-conditioned X compared
+            to `newton-cholesky`.
+
+            .. versionadded:: 1.5
+
     max_iter : int, default=100
         The maximal number of iterations for the solver.
         Values must be in the range `[1, inf)`.
@@ -614,7 +645,7 @@ class GammaRegressor(_GeneralizedLinearRegressor):
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor `X @ coef_ + intercept_`.
 
-    solver : {'lbfgs', 'newton-cholesky'}, default='lbfgs'
+    solver : {'lbfgs', 'newton-cholesky', 'newton-lsmr'}, default='lbfgs'
         Algorithm to use in the optimization problem:
 
         'lbfgs'
@@ -630,6 +661,18 @@ class GammaRegressor(_GeneralizedLinearRegressor):
 
             .. versionadded:: 1.2
 
+        'newton-lsmr'
+            Uses Newton-Raphson steps formulated as iteratively reweighted least
+            squares (IRLS), which is solved by LSMR. Contrary to `newton-cholesky`,
+            this solver does not explicitly materialize the Hessian matrix but instead
+            leverages knowledge about its structure to incrementally solve the least
+            squares problems via a series of matrix vector operations where the
+            matrices have size `(n_samples, n_features)`.
+            Additionaly, this is numerically more stable for ill-conditioned X compared
+            to `newton-cholesky`.
+
+            .. versionadded:: 1.3
+
     max_iter : int, default=100
         The maximal number of iterations for the solver.
         Values must be in the range `[1, inf)`.
@@ -776,7 +819,7 @@ class TweedieRegressor(_GeneralizedLinearRegressor):
         - 'log' for ``power > 0``, e.g. for Poisson, Gamma and Inverse Gaussian
           distributions
 
-    solver : {'lbfgs', 'newto
9E13
n-cholesky'}, default='lbfgs'
+    solver : {'lbfgs', 'newton-cholesky', 'newton-lsmr'}, default='lbfgs'
         Algorithm to use in the optimization problem:
 
         'lbfgs'
@@ -792,6 +835,18 @@ class TweedieRegressor(_GeneralizedLinearRegressor):
 
             .. versionadded:: 1.2
 
+        'newton-lsmr'
+            Uses Newton-Raphson steps formulated as iteratively reweighted least
+            squares (IRLS), which is solved by LSMR. Contrary to `newton-cholesky`,
+            this solver does not explicitly materialize the Hessian matrix but instead
+            leverages knowledge about its structure to incrementally solve the least
+            squares problems via a series of matrix vector operations where the
+            matrices have size `(n_samples, n_features)`.
+            Additionaly, this is numerically more stable for ill-conditioned X compared
+            to `newton-cholesky`.
+
+            .. versionadded:: 1.3
+
     max_iter : int, default=100
         The maximal number of iterations for the solver.
         Values must be in the range `[1, inf)`.