8000 ENH edge cases in LSMR · scikit-learn/scikit-learn@55f456f · GitHub
[go: up one dir, main page]

Skip to content

Commit 55f456f

Browse files
committed
ENH edge cases in LSMR
1 parent 5608bd1 commit 55f456f

File tree

2 files changed

+33
-25
lines changed

2 files changed

+33
-25
lines changed

sklearn/linear_model/_glm/glm.py

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -681,11 +681,10 @@ def update_gradient_hessian(self, X, y, sample_weight):
681681
hessian_out=self.h,
682682
n_threads=self.n_threads,
683683
)
684-
# For non-canonical link functions and far away from the optimum, we take
685-
# care that the hessian is at least non-negative. Tiny positive values are set
686-
# to zero, too.
684+
# For non-canonical link functions and far away from the optimum, we take care
685+
# that the hessian is positive.
687686
eps = 16 * np.finfo(y.dtype).eps
688-
self.h[self.h <= eps] = 0
687+
self.h[self.h <= eps] = eps
689688

690689
n_features = X.shape[1]
691690
# This duplicates a bit of code from LinearModelLoss.gradient.
@@ -704,13 +703,8 @@ def inner_solve(self, X, y, sample_weight):
704703
"""
705704
n_samples, n_features = X.shape
706705
sqrt_h = np.sqrt(self.h)
707-
# Take care of h = 0. Tiny h are already set to 0.
708-
# If h = 0 we can exclude the corresponding row of X such that the value of b
709-
# becomes irrelevant. We set it -g as if h = 1.
710-
g_over_h_sqrt = self.g
711-
g_over_h_sqrt[sqrt_h > 0] /= sqrt_h[sqrt_h > 0]
712706

713-
b = np.r_[-g_over_h_sqrt, -self.sqrt_P * self.coef[:n_features]]
707+
b = np.r_[-self.g / sqrt_h, -self.sqrt_P * self.coef[:n_features]]
714708

715709
if self.linear_loss.fit_intercept:
716710
n_dof = n_features + 1
@@ -766,6 +760,7 @@ def rmatvec(x):
766760
damp=0,
767761
atol=self.tol / (max(1, self.A_norm) * max(1, self.r_norm)),
768762
btol=self.tol,
763+
maxiter=max(n_samples, n_dof), # default is min(A.shape)
769764
show=self.verbose >= 3,
770765
)
771766
# We store the estimated Frobenius norm of A and norm of residual r in
@@ -780,15 +775,28 @@ def rmatvec(x):
780775
conda,
781776
normx,
782777
) = result
783-
# LSMR reached maxiter.
784-
eps = 4 * np.finfo(self.gradient.dtype).eps
785-
if istop == 7:
778+
# LSMR reached maxiter or did produce an excessively large newton step.
779+
# Note: We could detect too large steps by comparing norms(coef_newton) = normx
780+
# with norm(gradient). Instead of the gradient, we use the already available
781+
# condition number of A.
782+
if istop == 7 or normx > 1e2 * conda:
786783
if self.gradient_step == 0:
787784
# We only need to throw this warning once.
785+
if istop == 7:
786+
msg = (
787+
f"The inner solver of {self.__class__.__name__} reached "
788+
"maxiter={itn} before the other stopping conditions were "
789+
"satisfied at iteration #{self.iteration}. "
790+
)
791+
else:
792+
msg = (
793+
f"The inner solver of {self.__class__.__name__} produced an"
794+
" excessively large newton step at iteration"
795+
" #{self.iteration}. "
796+
)
788797
warnings.warn(
789-
f"The inner solver of {self.__class__.__name__} reached "
790-
"maxiter={itn} before the other stopping conditions were "
791-
"satisfied at iteration #{self.iteration}. It will now try a "
798+
msg
799+
+ "It will now try a "
792800
"simple gradient step. "
793801
"Note that this warning is only raised once, the problem may, "
794802
" however, occur in several or all iterations. Set verbose >= 1"

sklearn/linear_model/_glm/tests/test_glm.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -397,9 +397,9 @@ def test_glm_regression_unpenalized(solver, fit_intercept, glm_dataset):
397397
# we get a solution, i.e. a (non-unique) minimum of the objective function ...
398398
assert_allclose(model.predict(X), y)
399399
assert_allclose(model._get_loss().link.inverse(X @ coef + intercept), y)
400-
if (solver in ["lbfgs", "newton-qr-cholesky"] and fit_intercept) or solver in [
401-
"newton-cholesky"
402-
]:
400+
if (
401+
solver in ["lbfgs", "newton-lsmr", "newton-qr-cholesky"] and fit_intercept
402+
) or solver in ["newton-cholesky"]:
403403
# But it is not the minimum norm solution. Otherwise the norms would be
404404
# equal.
405405
norm_solution = (1 + 1e-12) * np.linalg.norm(np.r_[intercept, coef])
@@ -472,9 +472,9 @@ def test_glm_regression_unpenalized_hstacked_X(solver, fit_intercept, glm_datase
472472
# As it is an underdetermined problem, prediction = y. The following shows that
473473
# we get a solution, i.e. a (non-unique) minimum of the objective function ...
474474
assert_allclose(model.predict(X), y)
475-
if (solver in ["lbfgs", "newton-qr-cholesky"] and fit_intercept) or solver in [
476-
"newton-cholesky"
477-
]:
475+
if (
476+
solver in ["lbfgs", "newton-lsmr", "newton-qr-cholesky"] and fit_intercept
477+
) or solver in ["newton-cholesky"]:
478478
# FIXME: Same as in test_glm_regression_unpenalized.
479479
# But it is not the minimum norm solution. Otherwise the norms would be
480480
# equal.
@@ -540,9 +540,9 @@ def test_glm_regression_unpenalized_vstacked_X(solver, fit_intercept, glm_datase
540540
# As it is an underdetermined problem, prediction = y. The following shows that
541541
# we get a solution, i.e. a (non-unique) minimum of the objective function ...
542542
assert_allclose(model.predict(X), y)
543-
if (solver in ["lbfgs", "newton-qr-cholesky"] and fit_intercept) or solver in [
544-
"newton-cholesky"
545-
]:
543+
if (
544+
solver in ["lbfgs", "newton-lsmr", "newton-qr-cholesky"] and fit_intercept
545+
) or solver in ["newton-cholesky"]:
546546
# FIXME: Same as in test_glm_regression_unpenalized.
547547
# But it is not the minimum norm solution. Otherwise the norms would be
548548
# equal.

0 commit comments

Comments
 (0)
0