From 2160badc1c65e7b33814a981df8a2ad6d467761d Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 29 Jun 2020 12:32:37 +0200 Subject: [PATCH 1/9] filterwarnings in the test_least angle test --- .../linear_model/tests/test_least_angle.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index 1bfb4fc8c090a..40390f2c38f67 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -113,6 +113,8 @@ def test_all_precomputed(): assert_array_almost_equal(expected, got) +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.filterwarnings('ignore: `rcond` parameter will change') # numpy deprecation def test_lars_lstsq(): @@ -198,6 +200,8 @@ def test_no_path_all_precomputed(): assert alpha_ == alphas_[-1] +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize( 'classifier', [linear_model.Lars, linear_model.LarsCV, linear_model.LassoLarsIC]) @@ -221,6 +225,8 @@ def test_singular_matrix(): assert_array_almost_equal(coef_path.T, [[0, 0], [1, 0]]) +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") def test_rank_deficient_design(): # consistency test that checks that LARS Lasso is handling rank # deficient input data (with n_features < rank) in the same way @@ -248,6 +254,8 @@ def test_rank_deficient_design(): assert obj_lars < obj_cd * (1. + 1e-8) +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") def test_lasso_lars_vs_lasso_cd(): # Test that LassoLars and Lasso using coordinate descent give the # same results. @@ -285,6 +293,8 @@ def test_lasso_lars_vs_lasso_cd(): assert error < 0.01 +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") def test_lasso_lars_vs_lasso_cd_early_stopping(): # Test that LassoLars and Lasso using coordinate descent give the # same results when early stopping is used. @@ -311,6 +321,8 @@ def test_lasso_lars_vs_lasso_cd_early_stopping(): assert error < 0.01 +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") def test_lasso_lars_path_length(): # Test that the path length of the LassoLars is right lasso = linear_model.LassoLars() @@ -354,6 +366,8 @@ def test_lasso_lars_vs_lasso_cd_ill_conditioned(): assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1) +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") def test_lasso_lars_vs_lasso_cd_ill_conditioned2(): # Create an ill-conditioned situation in which the LARS has to go # far in the path to converge, and check that LARS and coordinate @@ -403,6 +417,8 @@ def test_lars_n_nonzero_coefs(verbose=False): assert len(lars.alphas_) == 7 +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @ignore_warnings def test_multitarget(): # Assure that estimators receiving multidimensional y do the right thing @@ -431,6 +447,8 @@ def test_multitarget(): assert_array_almost_equal(Y_pred[:, k], y_pred) +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") def test_lars_cv(): # Test the LassoLarsCV object by checking that the optimal alpha # increases as the number of samples increases. @@ -447,6 +465,8 @@ def test_lars_cv(): assert not hasattr(lars_cv, 'n_nonzero_coefs') +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") def test_lars_cv_max_iter(recwarn): warnings.simplefilter('always') with np.errstate(divide='raise', invalid='raise'): @@ -466,6 +486,8 @@ def test_lars_cv_max_iter(recwarn): assert recorded_warnings == [] +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") def test_lasso_lars_ic(): # Test the LassoLarsIC object by checking that # - some good features are selected. @@ -536,6 +558,8 @@ def test_lars_path_positive_constraint(): 'LassoLarsIC': {}} +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") def test_estimatorclasses_positive_constraint(): # testing the transmissibility for the positive option of all estimator # classes in this same function here @@ -555,6 +579,8 @@ def test_estimatorclasses_positive_constraint(): assert min(estimator.coef_) >= 0 +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") def test_lasso_lars_vs_lasso_cd_positive(): # Test that LassoLars and Lasso using coordinate descent give the # same results when using the positive option @@ -607,6 +633,8 @@ def test_lasso_lars_vs_lasso_cd_positive(): assert error < 0.01 +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") def test_lasso_lars_vs_R_implementation(): # Test that sklearn LassoLars implementation agrees with the LassoLars # implementation available in R (lars library) under the following @@ -703,6 +731,8 @@ def test_lasso_lars_vs_R_implementation(): ########################################################################### +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize('copy_X', [True, False]) def test_lasso_lars_copyX_behaviour(copy_X): """ @@ -719,6 +749,8 @@ def test_lasso_lars_copyX_behaviour(copy_X): assert copy_X == np.array_equal(X, X_copy) +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize('copy_X', [True, False]) def test_lasso_lars_fit_copyX_behaviour(copy_X): """ @@ -734,6 +766,8 @@ def test_lasso_lars_fit_copyX_behaviour(copy_X): assert copy_X == np.array_equal(X, X_copy) +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") @pytest.mark.parametrize('est', (LassoLars(alpha=1e-3), Lars())) def test_lars_with_jitter(est): # Test that a small amount of jitter helps stability, From cc3c32ef1594c2c01abb37de6823a4d004ac4d27 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 29 Jun 2020 12:47:34 +0200 Subject: [PATCH 2/9] updated the warnings when normalize is used --- sklearn/linear_model/_least_angle.py | 106 ++++++++++++++++-- .../linear_model/tests/test_least_angle.py | 9 +- 2 files changed, 105 insertions(+), 10 deletions(-) diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py index d89d9affae479..fab1bb92f0cc1 100644 --- a/sklearn/linear_model/_least_angle.py +++ b/sklearn/linear_model/_least_angle.py @@ -779,6 +779,10 @@ class Lars(MultiOutputMixin, RegressorMixin, LinearModel): :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. + .. deprecated:: 0.24 + ``normalize`` was deprecated in version 0.24 and will be removed in + 0.26. + precompute : bool, 'auto' or array-like , default='auto' Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram @@ -859,8 +863,8 @@ class Lars(MultiOutputMixin, RegressorMixin, LinearModel): positive = False @_deprecate_positional_args - def __init__(self, *, fit_intercept=True, verbose=False, normalize=True, - precompute='auto', n_nonzero_coefs=500, + def __init__(self, *, fit_intercept=True, verbose=False, + normalize='deprecate', precompute='auto', n_nonzero_coefs=500, eps=np.finfo(float).eps, copy_X=True, fit_path=True, jitter=None, random_state=None): self.fit_intercept = fit_intercept @@ -889,7 +893,7 @@ def _fit(self, X, y, max_iter, alpha, fit_path, Xy=None): n_features = X.shape[1] X, y, X_offset, y_offset, X_scale = self._preprocess_data( - X, y, self.fit_intercept, self.normalize, self.copy_X) + X, y, self.fit_intercept, self._normalize, self.copy_X) if y.ndim == 1: y = y[:, np.newaxis] @@ -963,6 +967,28 @@ def fit(self, X, y, Xy=None): self : object returns an instance of self. """ + + if self.normalize == 'deprecate': + self._normalize = True + else: + self._normalize = self.normalize + + if not self._normalize: + warnings.warn( + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26.", FutureWarning + ) + else: + warnings.warn( + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26. If you wish to keep an equivalent" + " behaviour, use Pipeline with a StandardScaler in a" + " preprocessing stage:" + " model = make_pipeline( \n" + " StandardScaler(), \n" + " {type(self).__name__}())", FutureWarning + ) + X, y = self._validate_data(X, y, y_numeric=True, multi_output=True) alpha = getattr(self, 'alpha', 0.) @@ -1020,6 +1046,10 @@ class LassoLars(Lars): :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. + .. deprecated:: 0.24 + ``normalize`` was deprecated in version 0.24 and will be removed in + 0.26. + precompute : bool, 'auto' or array-like, default='auto' Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram @@ -1116,7 +1146,7 @@ class LassoLars(Lars): @_deprecate_positional_args def __init__(self, alpha=1.0, *, fit_intercept=True, verbose=False, - normalize=True, precompute='auto', max_iter=500, + normalize='deprecate', precompute='auto', max_iter=500, eps=np.finfo(float).eps, copy_X=True, fit_path=True, positive=False, jitter=None, random_state=None): self.alpha = alpha @@ -1199,6 +1229,10 @@ def _lars_path_residues(X_train, y_train, X_test, y_test, Gram=None, :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. + .. deprecated:: 0.24 + ``normalize`` was deprecated in version 0.24 and will be removed in + 0.26. + max_iter : int, default=500 Maximum number of iterations to perform. @@ -1285,6 +1319,10 @@ class LarsCV(Lars): :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. + .. deprecated:: 0.24 + ``normalize`` was deprecated in version 0.24 and will be removed in + 0.26. + precompute : bool, 'auto' or array-like , default='auto' Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix @@ -1374,7 +1412,7 @@ class LarsCV(Lars): @_deprecate_positional_args def __init__(self, *, fit_intercept=True, verbose=False, max_iter=500, - normalize=True, precompute='auto', cv=None, + normalize='deprecate', precompute='auto', cv=None, max_n_alphas=1000, n_jobs=None, eps=np.finfo(float).eps, copy_X=True): self.max_iter = max_iter @@ -1406,6 +1444,27 @@ def fit(self, X, y): self : object returns an instance of self. """ + if self.normalize == 'deprecate': + self._normalize = True + else: + self._normalize = self.normalize + + if not self._normalize: + warnings.warn( + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26.", FutureWarning + ) + else: + warnings.warn( + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26. If you wish to keep an equivalent" + " behaviour, use Pipeline with a StandardScaler in a" + " preprocessing stage:" + " model = make_pipeline( \n" + " StandardScaler(), \n" + " {type(self).__name__}())", FutureWarning + ) + X, y = self._validate_data(X, y, y_numeric=True) X = as_float_array(X, copy=self.copy_X) y = as_float_array(y, copy=self.copy_X) @@ -1425,7 +1484,7 @@ def fit(self, X, y): delayed(_lars_path_residues)( X[train], y[train], X[test], y[test], Gram=Gram, copy=False, method=self.method, verbose=max(0, self.verbose - 1), - normalize=self.normalize, fit_intercept=self.fit_intercept, + normalize=self._normalize, fit_intercept=self.fit_intercept, max_iter=self.max_iter, eps=self.eps, positive=self.positive) for train, test in cv.split(X, y)) all_alphas = np.concatenate(list(zip(*cv_paths))[0]) @@ -1503,6 +1562,10 @@ class LassoLarsCV(LarsCV): :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. + .. deprecated:: 0.24 + ``normalize`` was deprecated in version 0.24 and will be removed in + 0.26. + precompute : bool or 'auto' , default='auto' Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram matrix @@ -1616,7 +1679,7 @@ class LassoLarsCV(LarsCV): @_deprecate_positional_args def __init__(self, *, fit_intercept=True, verbose=False, max_iter=500, - normalize=True, precompute='auto', cv=None, + normalize='deprecate', precompute='auto', cv=None, max_n_alphas=1000, n_jobs=None, eps=np.finfo(float).eps, copy_X=True, positive=False): self.fit_intercept = fit_intercept @@ -1670,6 +1733,10 @@ class LassoLarsIC(LassoLars): :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. + .. deprecated:: 0.24 + ``normalize`` was deprecated in version 0.24 and will be removed in + 0.26. + precompute : bool, 'auto' or array-like, default='auto' Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. The Gram @@ -1750,7 +1817,7 @@ class LassoLarsIC(LassoLars): """ @_deprecate_positional_args def __init__(self, criterion='aic', *, fit_intercept=True, verbose=False, - normalize=True, precompute='auto', max_iter=500, + normalize='deprecate', precompute='auto', max_iter=500, eps=np.finfo(float).eps, copy_X=True, positive=False): self.criterion = criterion self.fit_intercept = fit_intercept @@ -1787,12 +1854,33 @@ def fit(self, X, y, copy_X=None): self : object returns an instance of self. """ + if self.normalize == 'deprecate': + self._normalize = True + else: + self._normalize = self.normalize + + if not self._normalize: + warnings.warn( + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26.", FutureWarning + ) + else: + warnings.warn( + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26. If you wish to keep an equivalent" + " behaviour, use Pipeline with a StandardScaler in a" + " preprocessing stage:" + " model = make_pipeline( \n" + " StandardScaler(), \n" + " {type(self).__name__}())", FutureWarning + ) + if copy_X is None: copy_X = self.copy_X X, y = self._validate_data(X, y, y_numeric=True) X, y, Xmean, ymean, Xstd = LinearModel._preprocess_data( - X, y, self.fit_intercept, self.normalize, copy_X) + X, y, self.fit_intercept, self._normalize, copy_X) max_iter = self.max_iter Gram = self.precompute diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index 40390f2c38f67..ab3ea44f2b43d 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -397,6 +397,8 @@ def objective_function(coef): assert lars_obj < cd_obj * (1. + 1e-8) +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") def test_lars_add_features(): # assure that at least some features get added if necessary # test for 6d2b4c @@ -408,6 +410,8 @@ def test_lars_add_features(): assert np.all(np.isfinite(clf.coef_)) +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") def test_lars_n_nonzero_coefs(verbose=False): lars = linear_model.Lars(n_nonzero_coefs=6, verbose=verbose) lars.fit(X, y) @@ -483,7 +487,10 @@ def test_lars_cv_max_iter(recwarn): # Materialize the string representation of the warning to get a more # informative error message in case of AssertionError. recorded_warnings = [str(w) for w in recwarn] - assert recorded_warnings == [] + # FIXME: when 'normalize' is removed set exchange below for: + # assert len(recorded_warnings) == [] + assert len(recorded_warnings) == 1 + assert 'normalize\' was deprecated in version 0.24' in recorded_warnings[0] # FIXME: 'normalize' to be removed in 0.26 From 4ef05dcbe19e90c5f72f4f2d616559647d06dcb3 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 29 Jun 2020 12:53:36 +0200 Subject: [PATCH 3/9] added a test to check for warnings --- .../linear_model/tests/test_least_angle.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index ab3ea44f2b43d..759015d59abc1 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -13,11 +13,13 @@ from sklearn.utils._testing import assert_warns from sklearn.utils._testing import TempMemmap from sklearn.utils.fixes import np_version, parse_version +from sklearn.utils import check_random_state from sklearn.exceptions import ConvergenceWarning from sklearn import linear_model, datasets from sklearn.linear_model._least_angle import _lars_path_residues from sklearn.linear_model import LassoLarsIC, lars_path from sklearn.linear_model import Lars, LassoLars +from sklearn.linear_model import LarsCV, LassoLarsCV # TODO: use another dataset that has multiple drops diabetes = datasets.load_diabetes() @@ -27,6 +29,34 @@ n_samples = y.size +# FIXME: 'normalize' to be removed in 0.26 +@pytest.mark.parametrize('LeastAngleModel', [Lars, + LassoLars, + LarsCV, + LassoLarsCV, + LassoLarsIC]) +@pytest.mark.parametrize( + 'normalize, n_warnings, warning', + [(True, 1, FutureWarning), + (False, 1, FutureWarning), + ("deprecate", 1, FutureWarning)] +) +def test_assure_warning_when_normalize(LeastAngleModel, + normalize, n_warnings, warning): + # check that we issue a FutureWarning when normalize was set + rng = check_random_state(0) + n_samples = 200 + n_features = 2 + X = rng.randn(n_samples, n_features) + X[X < 0.1] = 0. + y = rng.rand(n_samples) + + model = LeastAngleModel(normalize=normalize) + with pytest.warns(warning) as record: + model.fit(X, y) + assert len(record) == n_warnings + + def test_simple(): # Principle of Lars is to keep covariances tied and decreasing From de4c858a17e9b3c453f6bd18defe62b8502b4d03 Mon Sep 17 00:00:00 2001 From: Alexandre Gramfort Date: Fri, 18 Jun 2021 08:55:25 +0200 Subject: [PATCH 4/9] fix docstring test --- sklearn/tests/test_docstring_parameters.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py index 025b7fb6c08ba..bf59b35385a4c 100644 --- a/sklearn/tests/test_docstring_parameters.py +++ b/sklearn/tests/test_docstring_parameters.py @@ -245,6 +245,11 @@ def test_fit_docstring_attributes(name, Estimator): if Estimator.__name__ in ( "OrthogonalMatchingPursuit", "OrthogonalMatchingPursuitCV", + "Lars", + "LarsCV", + "LassoLars", + "LassoLarsCV", + "LassoLarsIC", ): est.set_params(normalize=False) From af93767e7445f402b289fb301d17368c2ea9fecc Mon Sep 17 00:00:00 2001 From: Alexandre Gramfort Date: Fri, 18 Jun 2021 08:58:00 +0200 Subject: [PATCH 5/9] update what's new --- doc/whats_new/v1.0.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 4ff9fefe7f191..e4bff3c124dc5 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -369,6 +369,12 @@ Changelog False in 1.2 and will be removed in 1.4. :pr:`17750` by :user:`Maria Telenczuk ` and :user:`Alexandre Gramfort `. + The ``normalize`` parameter of :class:`linear_model.Lars` + :class:`linear_model.LarsCV` :class:`linear_model.LassoLars` + :class:`linear_model.LassoLarsCV` :class:`linear_model.LassoLarsIC` + will default to False in 1.2 and will be removed in 1.4. + :pr:`17769` by :user:`Maria Telenczuk ` and + :user:`Alexandre Gramfort `. - |Fix| `sample_weight` are now fully taken into account in linear models when `normalize=True` for both feature centering and feature From 8083c363f21deb2e29c14d3edf3521119bff7e00 Mon Sep 17 00:00:00 2001 From: Alexandre Gramfort Date: Fri, 18 Jun 2021 09:18:18 +0200 Subject: [PATCH 6/9] fix docstrings --- sklearn/linear_model/_least_angle.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py index d13bc90218cdb..6e48e1cd358cc 100644 --- a/sklearn/linear_model/_least_angle.py +++ b/sklearn/linear_model/_least_angle.py @@ -945,9 +945,9 @@ class Lars(MultiOutputMixin, RegressorMixin, LinearModel): Examples -------- >>> from sklearn import linear_model - >>> reg = linear_model.Lars(n_nonzero_coefs=1) + >>> reg = linear_model.Lars(n_nonzero_coefs=1, normalize=False) >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111]) - Lars(n_nonzero_coefs=1) + Lars(n_nonzero_coefs=1, normalize=False) >>> print(reg.coef_) [ 0. -1.11...] @@ -1256,11 +1256,11 @@ class LassoLars(Lars): Examples -------- >>> from sklearn import linear_model - >>> reg = linear_model.LassoLars(alpha=0.01) + >>> reg = linear_model.LassoLars(alpha=0.01, normalize=False) >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1]) - LassoLars(alpha=0.01) + LassoLars(alpha=0.01, normalize=False) >>> print(reg.coef_) - [ 0. -0.963257...] + [ 0. -0.955...] See Also -------- @@ -1567,13 +1567,13 @@ class LarsCV(Lars): >>> from sklearn.linear_model import LarsCV >>> from sklearn.datasets import make_regression >>> X, y = make_regression(n_samples=200, noise=4.0, random_state=0) - >>> reg = LarsCV(cv=5).fit(X, y) + >>> reg = LarsCV(cv=5, normalize=False).fit(X, y) >>> reg.score(X, y) 0.9996... >>> reg.alpha_ - 0.0254... + 0.2961... >>> reg.predict(X[:1,]) - array([154.0842...]) + array([154.3996...]) See Also -------- @@ -1846,13 +1846,13 @@ class LassoLarsCV(LarsCV): >>> from sklearn.linear_model import LassoLarsCV >>> from sklearn.datasets import make_regression >>> X, y = make_regression(noise=4.0, random_state=0) - >>> reg = LassoLarsCV(cv=5).fit(X, y) + >>> reg = LassoLarsCV(cv=5, normalize=False).fit(X, y) >>> reg.score(X, y) - 0.9992... + 0.9993... >>> reg.alpha_ - 0.0484... + 0.3972... >>> reg.predict(X[:1,]) - array([-77.8723...]) + array([-78.4831...]) Notes ----- @@ -2009,9 +2009,9 @@ class LassoLarsIC(LassoLars): Examples -------- >>> from sklearn import linear_model - >>> reg = linear_model.LassoLarsIC(criterion='bic') + >>> reg = linear_model.LassoLarsIC(criterion='bic', normalize=False) >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111]) - LassoLarsIC(criterion='bic') + LassoLarsIC(criterion='bic', normalize=False) >>> print(reg.coef_) [ 0. -1.11...] From 3f082f775f535dba8e39213c336633bcdaa36022 Mon Sep 17 00:00:00 2001 From: Alexandre Gramfort Date: Fri, 18 Jun 2021 09:24:23 +0200 Subject: [PATCH 7/9] update doc --- doc/modules/linear_model.rst | 6 +++--- examples/linear_model/plot_lasso_model_selection.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst index 4b76c35245d36..f3f9b77152ab2 100644 --- a/doc/modules/linear_model.rst +++ b/doc/modules/linear_model.rst @@ -520,11 +520,11 @@ function of the norm of its coefficients. :: >>> from sklearn import linear_model - >>> reg = linear_model.LassoLars(alpha=.1) + >>> reg = linear_model.LassoLars(alpha=.1, normalize=False) >>> reg.fit([[0, 0], [1, 1]], [0, 1]) - LassoLars(alpha=0.1) + LassoLars(alpha=0.1, normalize=False) >>> reg.coef_ - array([0.717157..., 0. ]) + array([0.6..., 0. ]) .. topic:: Examples: diff --git a/examples/linear_model/plot_lasso_model_selection.py b/examples/linear_model/plot_lasso_model_selection.py index 73fc94fb94600..845a86c3bda4a 100644 --- a/examples/linear_model/plot_lasso_model_selection.py +++ b/examples/linear_model/plot_lasso_model_selection.py @@ -68,13 +68,13 @@ # ############################################################################# # LassoLarsIC: least angle regression with BIC/AIC criterion -model_bic = LassoLarsIC(criterion='bic') +model_bic = LassoLarsIC(criterion='bic', normalize=False) t1 = time.time() model_bic.fit(X, y) t_bic = time.time() - t1 alpha_bic_ = model_bic.alpha_ -model_aic = LassoLarsIC(criterion='aic') +model_aic = LassoLarsIC(criterion='aic', normalize=False) model_aic.fit(X, y) alpha_aic_ = model_aic.alpha_ @@ -129,7 +129,7 @@ def plot_ic_criterion(model, name, color): # Compute paths print("Computing regularization path using the Lars lasso...") t1 = time.time() -model = LassoLarsCV(cv=20).fit(X, y) +model = LassoLarsCV(cv=20, normalize=False).fit(X, y) t_lasso_lars_cv = time.time() - t1 # Display results From 84f93c96fe41e41da2f9d2df8ee39c21d95cff58 Mon Sep 17 00:00:00 2001 From: Alexandre Gramfort Date: Fri, 18 Jun 2021 11:27:56 +0200 Subject: [PATCH 8/9] black --- .../linear_model/tests/test_least_angle.py | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index fdb41f3cbe498..6de282759c704 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -26,25 +26,19 @@ # FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.parametrize('LeastAngleModel', [Lars, - LassoLars, - LarsCV, - LassoLarsCV, - LassoLarsIC]) @pytest.mark.parametrize( - 'normalize, n_warnings', - [(True, 0), - (False, 0), - ("deprecated", 1)] + "LeastAngleModel", [Lars, LassoLars, LarsCV, LassoLarsCV, LassoLarsIC] ) -def test_assure_warning_when_normalize(LeastAngleModel, - normalize, n_warnings): +@pytest.mark.parametrize( + "normalize, n_warnings", [(True, 0), (False, 0), ("deprecated", 1)] +) +def test_assure_warning_when_normalize(LeastAngleModel, normalize, n_warnings): # check that we issue a FutureWarning when normalize was set rng = check_random_state(0) n_samples = 200 n_features = 2 X = rng.randn(n_samples, n_features) - X[X < 0.1] = 0. + X[X < 0.1] = 0.0 y = rng.rand(n_samples) model = LeastAngleModel(normalize=normalize) @@ -517,8 +511,7 @@ def test_lars_cv_max_iter(recwarn): # FIXME: when 'normalize' is removed set exchange below for: # assert len(recorded_warnings) == [] assert len(recorded_warnings) == 1 - assert ('normalize\' will be set to False in version 1.2' - in recorded_warnings[0]) + assert "normalize' will be set to False in version 1.2" in recorded_warnings[0] # FIXME: 'normalize' to be removed in 1.4 From 6ecd958b76d1da368088b085ea4d6c90679bf48e Mon Sep 17 00:00:00 2001 From: Alexandre Gramfort Date: Fri, 18 Jun 2021 13:06:23 +0200 Subject: [PATCH 9/9] fixes --- .../linear_model/tests/test_least_angle.py | 63 ++++++++----------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index 6de282759c704..469ffa50e4050 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -24,6 +24,11 @@ Xy = np.dot(X.T, y) n_samples = y.size +# FIXME: 'normalize' to be removed in 1.4 +filterwarnings_normalize = pytest.mark.filterwarnings( + "ignore:The default of 'normalize'" +) + # FIXME: 'normalize' to be removed in 1.4 @pytest.mark.parametrize( @@ -133,7 +138,7 @@ def test_all_precomputed(): # FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:The default of 'normalize'") +@filterwarnings_normalize @pytest.mark.filterwarnings("ignore: `rcond` parameter will change") # numpy deprecation def test_lars_lstsq(): @@ -224,8 +229,7 @@ def test_no_path_all_precomputed(): assert alpha_ == alphas_[-1] -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:The default of 'normalize'") +@filterwarnings_normalize @pytest.mark.parametrize( "classifier", [linear_model.Lars, linear_model.LarsCV, linear_model.LassoLarsIC] ) @@ -249,8 +253,7 @@ def test_singular_matrix(): assert_array_almost_equal(coef_path.T, [[0, 0], [1, 0]]) -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:The default of 'normalize'") +@filterwarnings_normalize def test_rank_deficient_design(): # consistency test that checks that LARS Lasso is handling rank # deficient input data (with n_features < rank) in the same way @@ -272,8 +275,7 @@ def test_rank_deficient_design(): assert obj_lars < obj_cd * (1.0 + 1e-8) -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:The default of 'normalize'") +@filterwarnings_normalize def test_lasso_lars_vs_lasso_cd(): # Test that LassoLars and Lasso using coordinate descent give the # same results. @@ -309,8 +311,7 @@ def test_lasso_lars_vs_lasso_cd(): assert error < 0.01 -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:The default of 'normalize'") +@filterwarnings_normalize def test_lasso_lars_vs_lasso_cd_early_stopping(): # Test that LassoLars and Lasso using coordinate descent give the # same results when early stopping is used. @@ -339,8 +340,7 @@ def test_lasso_lars_vs_lasso_cd_early_stopping(): assert error < 0.01 -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:The default of 'normalize'") +@filterwarnings_normalize def test_lasso_lars_path_length(): # Test that the path length of the LassoLars is right lasso = linear_model.LassoLars() @@ -383,8 +383,7 @@ def test_lasso_lars_vs_lasso_cd_ill_conditioned(): assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1) -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:The default of 'normalize'") +@filterwarnings_normalize def test_lasso_lars_vs_lasso_cd_ill_conditioned2(): # Create an ill-conditioned situation in which the LARS has to go # far in the path to converge, and check that LARS and coordinate @@ -415,8 +414,7 @@ def objective_function(coef): assert lars_obj < cd_obj * (1.0 + 1e-8) -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:The default of 'normalize'") +@filterwarnings_normalize def test_lars_add_features(): # assure that at least some features get added if necessary # test for 6d2b4c @@ -427,8 +425,7 @@ def test_lars_add_features(): assert np.all(np.isfinite(clf.coef_)) -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:The default of 'normalize'") +@filterwarnings_normalize def test_lars_n_nonzero_coefs(verbose=False): lars = linear_model.Lars(n_nonzero_coefs=6, verbose=verbose) lars.fit(X, y) @@ -438,8 +435,7 @@ def test_lars_n_nonzero_coefs(verbose=False): assert len(lars.alphas_) == 7 -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:The default of 'normalize'") +@filterwarnings_normalize @ignore_warnings def test_multitarget(): # Assure that estimators receiving multidimensional y do the right thing @@ -472,8 +468,7 @@ def test_multitarget(): assert_array_almost_equal(Y_pred[:, k], y_pred) -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:The default of 'normalize'") +@filterwarnings_normalize def test_lars_cv(): # Test the LassoLarsCV object by checking that the optimal alpha # increases as the number of samples increases. @@ -490,8 +485,7 @@ def test_lars_cv(): assert not hasattr(lars_cv, "n_nonzero_coefs") -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:The default of 'normalize'") +@filterwarnings_normalize def test_lars_cv_max_iter(recwarn): warnings.simplefilter("always") with np.errstate(divide="raise", invalid="raise"): @@ -514,8 +508,7 @@ def test_lars_cv_max_iter(recwarn): assert "normalize' will be set to False in version 1.2" in recorded_warnings[0] -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") +@filterwarnings_normalize def test_lasso_lars_ic(): # Test the LassoLarsIC object by checking that # - some good features are selected. @@ -591,8 +584,7 @@ def test_lars_path_positive_constraint(): } -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") +@filterwarnings_normalize def test_estimatorclasses_positive_constraint(): # testing the transmissibility for the positive option of all estimator # classes in this same function here @@ -614,8 +606,7 @@ def test_estimatorclasses_positive_constraint(): assert min(estimator.coef_) >= 0 -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") +@filterwarnings_normalize def test_lasso_lars_vs_lasso_cd_positive(): # Test that LassoLars and Lasso using coordinate descent give the # same results when using the positive option @@ -669,8 +660,7 @@ def test_lasso_lars_vs_lasso_cd_positive(): assert error < 0.01 -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") +@filterwarnings_normalize def test_lasso_lars_vs_R_implementation(): # Test that sklearn LassoLars implementation agrees with the LassoLars # implementation available in R (lars library) under the following @@ -815,8 +805,7 @@ def test_lasso_lars_vs_R_implementation(): ########################################################################### -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") +@filterwarnings_normalize @pytest.mark.parametrize("copy_X", [True, False]) def test_lasso_lars_copyX_behaviour(copy_X): """ @@ -833,8 +822,7 @@ def test_lasso_lars_copyX_behaviour(copy_X): assert copy_X == np.array_equal(X, X_copy) -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") +@filterwarnings_normalize @pytest.mark.parametrize("copy_X", [True, False]) def test_lasso_lars_fit_copyX_behaviour(copy_X): """ @@ -850,8 +838,7 @@ def test_lasso_lars_fit_copyX_behaviour(copy_X): assert copy_X == np.array_equal(X, X_copy) -# FIXME: 'normalize' to be removed in 1.4 -@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") +@filterwarnings_normalize @pytest.mark.parametrize("est", (LassoLars(alpha=1e-3), Lars())) def test_lars_with_jitter(est): # Test that a small amount of jitter helps stability, @@ -903,6 +890,7 @@ def test_copy_X_with_auto_gram(): ), ) @pytest.mark.parametrize("dtype", (np.float32, np.float64)) +@filterwarnings_normalize def test_lars_dtype_match(LARS, has_coef_path, args, dtype): # The test ensures that the fit method preserves input dtype rng = np.random.RandomState(0) @@ -928,6 +916,7 @@ def test_lars_dtype_match(LARS, has_coef_path, args, dtype): (LassoLarsCV, True, {"max_iter": 5}), ), ) +@filterwarnings_normalize def test_lars_numeric_consistency(LARS, has_coef_path, args): # The test ensures numerical consistency between trained coefficients # of float32 and float64.