diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst index 4c489c1887815..c4fa3818aa1bc 100644 --- a/doc/whats_new/v0.23.rst +++ b/doc/whats_new/v0.23.rst @@ -298,6 +298,11 @@ Changelog of strictly inferior for maximum of `absgrad` and `tol` in `utils.optimize._newton_cg`. :pr:`16266` by :user:`Rushabh Vasani `. +- |Enhancement| :class:`linear_model.LassoLars` and + :class:`linear_model.Lars` now support a `jitter` parameter that adds + random noise to the target. This might help with stability in some edge + cases. :pr:`15179` by :user:`angelaambroz`. + :mod:`sklearn.metrics` ...................... diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py index a3781cf981710..bc71d7a1fccbd 100644 --- a/sklearn/linear_model/_least_angle.py +++ b/sklearn/linear_model/_least_angle.py @@ -21,6 +21,7 @@ from ..base import RegressorMixin, MultiOutputMixin # mypy error: Module 'sklearn.utils' has no attribute 'arrayfuncs' from ..utils import arrayfuncs, as_float_array # type: ignore +from ..utils import check_random_state from ..model_selection import check_cv from ..exceptions import ConvergenceWarning @@ -800,6 +801,16 @@ class Lars(MultiOutputMixin, RegressorMixin, LinearModel): setting ``fit_path`` to ``False`` will lead to a speedup, especially with a small alpha. + jitter : float, default=None + Upper bound on a uniform noise parameter to be added to the + `y` values, to satisfy the model's assumption of + one-at-a-time computations. Might help with stability. + + random_state : int, RandomState instance or None (default) + Determines random number generation for jittering. Pass an int + for reproducible output across multiple function calls. + See :term:`Glossary `. Ignored if `jitter` is None. + Attributes ---------- alphas_ : array-like of shape (n_alphas + 1,) | list of n_targets such \ @@ -846,7 +857,8 @@ class Lars(MultiOutputMixin, RegressorMixin, LinearModel): def __init__(self, fit_intercept=True, verbose=False, normalize=True, precompute='auto', n_nonzero_coefs=500, - eps=np.finfo(np.float).eps, copy_X=True, fit_path=True): + eps=np.finfo(np.float).eps, copy_X=True, fit_path=True, + jitter=None, random_state=None): self.fit_intercept = fit_intercept self.verbose = verbose self.normalize = normalize @@ -855,6 +867,8 @@ def __init__(self, fit_intercept=True, verbose=False, normalize=True, self.eps = eps self.copy_X = copy_X self.fit_path = fit_path + self.jitter = jitter + self.random_state = random_state @staticmethod def _get_gram(precompute, X, y): @@ -954,6 +968,12 @@ def fit(self, X, y, Xy=None): else: max_iter = self.max_iter + if self.jitter is not None: + rng = check_random_state(self.random_state) + + noise = rng.uniform(high=self.jitter, size=len(y)) + y = y + noise + self._fit(X, y, max_iter=max_iter, alpha=alpha, fit_path=self.fit_path, Xy=Xy) @@ -1031,6 +1051,16 @@ class LassoLars(Lars): algorithm are typically in congruence with the solution of the coordinate descent Lasso estimator. + jitter : float, default=None + Upper bound on a uniform noise parameter to be added to the + `y` values, to satisfy the model's assumption of + one-at-a-time computations. Might help with stability. + + random_state : int, RandomState instance or None (default) + Determines random number generation for jittering. Pass an int + for reproducible output across multiple function calls. + See :term:`Glossary `. Ignored if `jitter` is None. + Attributes ---------- alphas_ : array-like of shape (n_alphas + 1,) | list of n_targets such \ @@ -1083,7 +1113,7 @@ class LassoLars(Lars): def __init__(self, alpha=1.0, fit_intercept=True, verbose=False, normalize=True, precompute='auto', max_iter=500, eps=np.finfo(np.float).eps, copy_X=True, fit_path=True, - positive=False): + positive=False, jitter=None, random_state=None): self.alpha = alpha self.fit_intercept = fit_intercept self.max_iter = max_iter @@ -1094,6 +1124,8 @@ def __init__(self, alpha=1.0, fit_intercept=True, verbose=False, self.copy_X = copy_X self.eps = eps self.fit_path = fit_path + self.jitter = jitter + self.random_state = random_state ############################################################################### diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index 6e7c1fb37096a..e198dfb15e323 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -6,6 +6,7 @@ import pytest from scipy import linalg +from sklearn.base import clone from sklearn.model_selection import train_test_split from sklearn.utils._testing import assert_allclose from sklearn.utils._testing import assert_array_almost_equal @@ -17,6 +18,7 @@ from sklearn import linear_model, datasets from sklearn.linear_model._least_angle import _lars_path_residues from sklearn.linear_model import LassoLarsIC, lars_path +from sklearn.linear_model import Lars, LassoLars # TODO: use another dataset that has multiple drops diabetes = datasets.load_diabetes() @@ -733,6 +735,28 @@ def test_lasso_lars_fit_copyX_behaviour(copy_X): assert copy_X == np.array_equal(X, X_copy) +@pytest.mark.parametrize('est', (LassoLars(alpha=1e-3), Lars())) +def test_lars_with_jitter(est): + # Test that a small amount of jitter helps stability, + # using example provided in issue #2746 + + X = np.array([[0.0, 0.0, 0.0, -1.0, 0.0], + [0.0, -1.0, 0.0, 0.0, 0.0]]) + y = [-2.5, -2.5] + expected_coef = [0, 2.5, 0, 2.5, 0] + + # set to fit_intercept to False since target is constant and we want check + # the value of coef. coef would be all zeros otherwise. + est.set_params(fit_intercept=False) + est_jitter = clone(est).set_params(jitter=10e-8, random_state=0) + + est.fit(X, y) + est_jitter.fit(X, y) + + assert np.mean((est.coef_ - est_jitter.coef_)**2) > .1 + np.testing.assert_allclose(est_jitter.coef_, expected_coef, rtol=1e-3) + + def test_X_none_gram_not_none(): with pytest.raises(ValueError, match="X cannot be None if Gram is not None"):