From ccc97b002e77ba1ac176754a170dd7eec3992941 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 16:35:56 +0200 Subject: [PATCH 01/17] init deprecate normalize in bayse models --- sklearn/linear_model/_bayes.py | 97 ++++++++-------------------------- 1 file changed, 22 insertions(+), 75 deletions(-) diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index e568407899f6f..96fabefa83075 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -8,6 +8,7 @@ from math import log import numpy as np from scipy import linalg +import warnings from ._base import LinearModel, _rescale_data from ..base import RegressorMixin @@ -22,60 +23,45 @@ class BayesianRidge(RegressorMixin, LinearModel): """Bayesian ridge regression. - Fit a Bayesian ridge model. See the Notes section for details on this implementation and the optimization of the regularization parameters lambda (precision of the weights) and alpha (precision of the noise). - Read more in the :ref:`User Guide `. - Parameters ---------- n_iter : int, default=300 Maximum number of iterations. Should be greater than or equal to 1. - tol : float, default=1e-3 Stop the algorithm if w has converged. - alpha_1 : float, default=1e-6 Hyper-parameter : shape parameter for the Gamma distribution prior over the alpha parameter. - alpha_2 : float, default=1e-6 Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the alpha parameter. - lambda_1 : float, default=1e-6 Hyper-parameter : shape parameter for the Gamma distribution prior over the lambda parameter. - lambda_2 : float, default=1e-6 Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the lambda parameter. - alpha_init : float, default=None Initial value for alpha (precision of the noise). If not set, alpha_init is 1/Var(y). - .. versionadded:: 0.22 - lambda_init : float, default=None Initial value for lambda (precision of the weights). If not set, lambda_init is 1. - .. versionadded:: 0.22 - compute_score : bool, default=False If True, compute the log marginal likelihood at each iteration of the optimization. - fit_intercept : bool, default=True Whether to calculate the intercept for this model. The intercept is not treated as a probabilistic parameter and thus has no associated variance. If set to False, no intercept will be used in calculations (i.e. data is expected to be centered). - normalize : bool, default=False This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by @@ -83,42 +69,34 @@ class BayesianRidge(RegressorMixin, LinearModel): If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. - + .. deprecated:: 0.24 + ``normalize`` was deprecated in version 0.24 and will be removed in + 0.26. copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. - verbose : bool, default=False Verbose mode when fitting the model. - - Attributes ---------- coef_ : array-like of shape (n_features,) Coefficients of the regression model (mean of distribution) - intercept_ : float Independent term in decision function. Set to 0.0 if ``fit_intercept = False``. - alpha_ : float Estimated precision of the noise. - lambda_ : float Estimated precision of the weights. - sigma_ : array-like of shape (n_features, n_features) Estimated variance-covariance matrix of the weights - scores_ : array-like of shape (n_iter_+1,) If computed_score is True, value of the log marginal likelihood (to be maximized) at each iteration of the optimization. The array starts with the value of the log marginal likelihood obtained for the initial values of alpha and lambda and ends with the value obtained for the estimated alpha and lambda. - n_iter_ : int The actual number of iterations to reach the stopping criterion. - Examples -------- >>> from sklearn import linear_model @@ -127,7 +105,6 @@ class BayesianRidge(RegressorMixin, LinearModel): BayesianRidge() >>> clf.predict([[1, 1]]) array([1.]) - Notes ----- There exist several strategies to perform Bayesian ridge regression. This @@ -137,12 +114,10 @@ class BayesianRidge(RegressorMixin, LinearModel): View of Automatic Relevance Determination (Wipf and Nagarajan, 2008) these update rules do not guarantee that the marginal likelihood is increasing between two consecutive iterations of the optimization. - References ---------- D. J. C. MacKay, Bayesian Interpolation, Computation and Neural Systems, Vol. 4, No. 3, 1992. - M. E. Tipping, Sparse Bayesian Learning and the Relevance Vector Machine, Journal of Machine Learning Research, Vol. 1, 2001. """ @@ -150,7 +125,7 @@ class BayesianRidge(RegressorMixin, LinearModel): def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, lambda_1=1.e-6, lambda_2=1.e-6, alpha_init=None, lambda_init=None, compute_score=False, fit_intercept=True, - normalize=False, copy_X=True, verbose=False): + normalize='deprecate', copy_X=True, verbose=False): self.n_iter = n_iter self.tol = tol self.alpha_1 = alpha_1 @@ -167,24 +142,25 @@ def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, def fit(self, X, y, sample_weight=None): """Fit the model - Parameters ---------- X : ndarray of shape (n_samples, n_features) Training data y : ndarray of shape (n_samples,) Target values. Will be cast to X's dtype if necessary - sample_weight : ndarray of shape (n_samples,), default=None Individual weights for each sample - .. versionadded:: 0.20 parameter *sample_weight* support to BayesianRidge. - Returns ------- self : returns an instance of self. """ + if self.normalize != "deprecate": + warnings.warn("'normalize' was deprecated in version 0.24 and will" + " be removed in 0.26.", FutureWarning) + else: + self.normalize = False if self.n_iter < 1: raise ValueError('n_iter should be greater than or equal to 1.' @@ -293,23 +269,18 @@ def fit(self, X, y, sample_weight=None): def predict(self, X, return_std=False): """Predict using the linear model. - In addition to the mean of the predictive distribution, also its standard deviation can be returned. - Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) Samples. - return_std : bool, default=False Whether to return the standard deviation of posterior prediction. - Returns ------- y_mean : array-like of shape (n_samples,) Mean of predictive distribution of query points. - y_std : array-like of shape (n_samples,) Standard deviation of predictive distribution of query points. """ @@ -326,7 +297,6 @@ def predict(self, X, return_std=False): def _update_coef_(self, X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_): """Update posterior mean and compute corresponding rmse. - Posterior mean is given by coef_ = scaled_sigma_ * X.T * y where scaled_sigma_ = (lambda_/alpha_ * np.eye(n_features) + np.dot(X.T, X))^-1 @@ -383,51 +353,39 @@ def _log_marginal_likelihood(self, n_samples, n_features, eigen_vals, class ARDRegression(RegressorMixin, LinearModel): """Bayesian ARD regression. - Fit the weights of a regression model, using an ARD prior. The weights of the regression model are assumed to be in Gaussian distributions. Also estimate the parameters lambda (precisions of the distributions of the weights) and alpha (precision of the distribution of the noise). The estimation is done by an iterative procedures (Evidence Maximization) - Read more in the :ref:`User Guide `. - Parameters ---------- n_iter : int, default=300 Maximum number of iterations. - tol : float, default=1e-3 Stop the algorithm if w has converged. - alpha_1 : float, default=1e-6 Hyper-parameter : shape parameter for the Gamma distribution prior over the alpha parameter. - alpha_2 : float, default=1e-6 Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the alpha parameter. - lambda_1 : float, default=1e-6 Hyper-parameter : shape parameter for the Gamma distribution prior over the lambda parameter. - lambda_2 : float, default=1e-6 Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the lambda parameter. - compute_score : bool, default=False If True, compute the objective function at each step of the model. - threshold_lambda : float, default=10 000 threshold for removing (pruning) weights with high precision from the computation. - fit_intercept : bool, default=True whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered). - normalize : bool, default=False This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by @@ -435,34 +393,28 @@ class ARDRegression(RegressorMixin, LinearModel): If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. - + .. deprecated:: 0.24 + ``normalize`` was deprecated in version 0.24 and will be removed in + 0.26. copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. - verbose : bool, default=False Verbose mode when fitting the model. - Attributes ---------- coef_ : array-like of shape (n_features,) Coefficients of the regression model (mean of distribution) - alpha_ : float estimated precision of the noise. - lambda_ : array-like of shape (n_features,) estimated precisions of the weights. - sigma_ : array-like of shape (n_features, n_features) estimated variance-covariance matrix of the weights - scores_ : float if computed, value of the objective function (to be maximized) - intercept_ : float Independent term in decision function. Set to 0.0 if ``fit_intercept = False``. - Examples -------- >>> from sklearn import linear_model @@ -471,17 +423,14 @@ class ARDRegression(RegressorMixin, LinearModel): ARDRegression() >>> clf.predict([[1, 1]]) array([1.]) - Notes ----- For an example, see :ref:`examples/linear_model/plot_ard.py `. - References ---------- D. J. C. MacKay, Bayesian nonlinear modeling for the prediction competition, ASHRAE Transactions, 1994. - R. Salakhutdinov, Lecture notes on Statistical Machine Learning, http://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=15 Their beta is our ``self.alpha_`` @@ -493,8 +442,8 @@ class ARDRegression(RegressorMixin, LinearModel): @_deprecate_positional_args def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False, - threshold_lambda=1.e+4, fit_intercept=True, normalize=False, - copy_X=True, verbose=False): + threshold_lambda=1.e+4, fit_intercept=True, + normalize='deprecate', copy_X=True, verbose=False): self.n_iter = n_iter self.tol = tol self.fit_intercept = fit_intercept @@ -511,9 +460,7 @@ def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, def fit(self, X, y): """Fit the ARDRegression model according to the given training data and parameters. - Iterative procedure to maximize the evidence - Parameters ---------- X : array-like of shape (n_samples, n_features) @@ -521,11 +468,16 @@ def fit(self, X, y): n_features is the number of features. y : array-like of shape (n_samples,) Target values (integers). Will be cast to X's dtype if necessary - Returns ------- self : returns an instance of self. """ + if self.normalize != "deprecate": + warnings.warn("'normalize' was deprecated in version 0.24 and will" + " be removed in 0.26.", FutureWarning) + else: + self.normalize = False + X, y = self._validate_data(X, y, dtype=np.float64, y_numeric=True, ensure_min_samples=2) @@ -642,23 +594,18 @@ def _update_sigma(self, X, alpha_, lambda_, keep_lambda): def predict(self, X, return_std=False): """Predict using the linear model. - In addition to the mean of the predictive distribution, also its standard deviation can be returned. - Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) Samples. - return_std : bool, default=False Whether to return the standard deviation of posterior prediction. - Returns ------- y_mean : array-like of shape (n_samples,) Mean of predictive distribution of query points. - y_std : array-like of shape (n_samples,) Standard deviation of predictive distribution of query points. """ @@ -671,4 +618,4 @@ def predict(self, X, return_std=False): X = X[:, self.lambda_ < self.threshold_lambda] sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1) y_std = np.sqrt(sigmas_squared_data + (1. / self.alpha_)) - return y_mean, y_std + return y_mean, y_std \ No newline at end of file From 3d871f80146b42959988f387c8a156eb8b299bde Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 18:01:14 +0200 Subject: [PATCH 02/17] updated warning message and self.normalize for self._normalize --- sklearn/linear_model/_bayes.py | 44 +++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index 96fabefa83075..92a4b93348164 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -157,10 +157,24 @@ def fit(self, X, y, sample_weight=None): self : returns an instance of self. """ if self.normalize != "deprecate": - warnings.warn("'normalize' was deprecated in version 0.24 and will" - " be removed in 0.26.", FutureWarning) + if not self.normalize: + warnings.warn( + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26.", FutureWarning + ) + else: + warnings.warn( + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26. If you wish to keep an equivalent" + " behaviour, use Pipeline with a StandardScaler in a" + " preprocessing stage:" + " model = make_pipeline( \n" + " StandardScaler(with_mean=False), \n" + " {type(self).__name__}())", FutureWarning + ) + self._normalize = self.normalize else: - self.normalize = False + self._normalize = False if self.n_iter < 1: raise ValueError('n_iter should be greater than or equal to 1.' @@ -473,10 +487,24 @@ def fit(self, X, y): self : returns an instance of self. """ if self.normalize != "deprecate": - warnings.warn("'normalize' was deprecated in version 0.24 and will" - " be removed in 0.26.", FutureWarning) + if not self.normalize: + warnings.warn( + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26.", FutureWarning + ) + else: + warnings.warn( + "'normalize' was deprecated in version 0.24 and will be" + " removed in 0.26. If you wish to keep an equivalent" + " behaviour, use Pipeline with a StandardScaler in a" + " preprocessing stage:" + " model = make_pipeline( \n" + " StandardScaler(with_mean=False), \n" + " {type(self).__name__}())", FutureWarning + ) + self._normalize = self.normalize else: - self.normalize = False + self._normalize = False X, y = self._validate_data(X, y, dtype=np.float64, y_numeric=True, ensure_min_samples=2) @@ -485,7 +513,7 @@ def fit(self, X, y): coef_ = np.zeros(n_features) X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data( - X, y, self.fit_intercept, self.normalize, self.copy_X) + X, y, self.fit_intercept, self._normalize, self.copy_X) # Launch the convergence loop keep_lambda = np.ones(n_features, dtype=bool) @@ -613,7 +641,7 @@ def predict(self, X, return_std=False): if return_std is False: return y_mean else: - if self.normalize: + if self._normalize: X = (X - self.X_offset_) / self.X_scale_ X = X[:, self.lambda_ < self.threshold_lambda] sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1) From 1572523720118d5d828ce592918cf9f1daead95c Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 18:04:45 +0200 Subject: [PATCH 03/17] make tests pass --- sklearn/linear_model/_bayes.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index 92a4b93348164..a4fdbd29058e7 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -69,9 +69,11 @@ class BayesianRidge(RegressorMixin, LinearModel): If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. + .. deprecated:: 0.24 ``normalize`` was deprecated in version 0.24 and will be removed in 0.26. + copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. verbose : bool, default=False @@ -187,7 +189,7 @@ def fit(self, X, y, sample_weight=None): dtype=X.dtype) X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data( - X, y, self.fit_intercept, self.normalize, self.copy_X, + X, y, self.fit_intercept, self._normalize, self.copy_X, sample_weight=sample_weight) if sample_weight is not None: @@ -302,7 +304,7 @@ def predict(self, X, return_std=False): if return_std is False: return y_mean else: - if self.normalize: + if self._normalize: X = (X - self.X_offset_) / self.X_scale_ sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1) y_std = np.sqrt(sigmas_squared_data + (1. / self.alpha_)) @@ -407,9 +409,11 @@ class ARDRegression(RegressorMixin, LinearModel): If you wish to standardize, please use :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. + .. deprecated:: 0.24 ``normalize`` was deprecated in version 0.24 and will be removed in 0.26. + copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. verbose : bool, default=False From 1e6ad062ca25a037f4af511448a5f2391a14e905 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 18:05:26 +0200 Subject: [PATCH 04/17] clean up --- sklearn/linear_model/_bayes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index a4fdbd29058e7..18bd0d58b8c9f 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -650,4 +650,4 @@ def predict(self, X, return_std=False): X = X[:, self.lambda_ < self.threshold_lambda] sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1) y_std = np.sqrt(sigmas_squared_data + (1. / self.alpha_)) - return y_mean, y_std \ No newline at end of file + return y_mean, y_std From f0c755396a5b4d360d3308dc189b398d8b862ab4 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 18:09:33 +0200 Subject: [PATCH 05/17] added test that the warning is raised --- sklearn/linear_model/tests/test_bayes.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/sklearn/linear_model/tests/test_bayes.py b/sklearn/linear_model/tests/test_bayes.py index ff3ac13c2d7f6..7a75aee6a1ddd 100644 --- a/sklearn/linear_model/tests/test_bayes.py +++ b/sklearn/linear_model/tests/test_bayes.py @@ -23,6 +23,29 @@ diabetes = datasets.load_diabetes() +@pytest.mark.parametrize('BayesModel', [ARDRegression, BayesianRidge]) +@pytest.mark.parametrize( + 'normalize, n_warnings, warning', + [(True, 1, FutureWarning), + (False, 1, FutureWarning), + ("deprecate", 0, None)] +) +def test_assure_warning_when_normalize(BayesModel, + normalize, n_warnings, warning): + # check that we issue a FutureWarning when normalize was set + rng = check_random_state(0) + n_samples = 200 + n_features = 2 + X = rng.randn(n_samples, n_features) + X[X < 0.1] = 0. + y = rng.rand(n_samples) + + model = BayesModel(normalize=normalize) + with pytest.warns(warning) as record: + model.fit(X, y) + assert len(record) == n_warnings + + def test_n_iter(): """Check value of n_iter.""" X = np.array([[1], [2], [6], [8], [10]]) From 66a282793d15eb64bdd9ee57464dc3784f6dcce7 Mon Sep 17 00:00:00 2001 From: maikia Date: Fri, 26 Jun 2020 18:11:11 +0200 Subject: [PATCH 06/17] clean up --- sklearn/linear_model/tests/test_bayes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/linear_model/tests/test_bayes.py b/sklearn/linear_model/tests/test_bayes.py index 7a75aee6a1ddd..5928a0cff82c1 100644 --- a/sklearn/linear_model/tests/test_bayes.py +++ b/sklearn/linear_model/tests/test_bayes.py @@ -23,6 +23,7 @@ diabetes = datasets.load_diabetes() +# FIXME: 'normalize' to be removed in 0.26 @pytest.mark.parametrize('BayesModel', [ARDRegression, BayesianRidge]) @pytest.mark.parametrize( 'normalize, n_warnings, warning', From 58835838535ed8736b500121b504e0cb0024d535 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 29 Jun 2020 10:12:26 +0200 Subject: [PATCH 07/17] removed False from standardScaler --- sklearn/linear_model/_bayes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index 18bd0d58b8c9f..7d20060d54ec3 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -171,7 +171,7 @@ def fit(self, X, y, sample_weight=None): " behaviour, use Pipeline with a StandardScaler in a" " preprocessing stage:" " model = make_pipeline( \n" - " StandardScaler(with_mean=False), \n" + " StandardScaler(), \n" " {type(self).__name__}())", FutureWarning ) self._normalize = self.normalize @@ -503,7 +503,7 @@ def fit(self, X, y): " behaviour, use Pipeline with a StandardScaler in a" " preprocessing stage:" " model = make_pipeline( \n" - " StandardScaler(with_mean=False), \n" + " StandardScaler(), \n" " {type(self).__name__}())", FutureWarning ) self._normalize = self.normalize From cfd78d0bbd9bc03364325d8ccfe88bc62c34d454 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 26 Apr 2021 14:52:32 +0200 Subject: [PATCH 08/17] merge --- sklearn/linear_model/_bayes.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index 3975380978bcb..2c2a677479020 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -99,8 +99,6 @@ class BayesianRidge(RegressorMixin, LinearModel): estimated alpha and lambda. n_iter_ : int The actual number of iterations to reach the stopping criterion. -<<<<<<< HEAD -======= X_offset_ : float If `normalize=True`, offset subtracted for centering data to a @@ -110,7 +108,6 @@ class BayesianRidge(RegressorMixin, LinearModel): If `normalize=True`, parameter used to scale data to a unit standard deviation. ->>>>>>> 6927fa26aedf48162314b675016180e3356ad557 Examples -------- >>> from sklearn import linear_model @@ -446,8 +443,6 @@ class ARDRegression(RegressorMixin, LinearModel): intercept_ : float Independent term in decision function. Set to 0.0 if ``fit_intercept = False``. -<<<<<<< HEAD -======= X_offset_ : float If `normalize=True`, offset subtracted for centering data to a @@ -457,7 +452,6 @@ class ARDRegression(RegressorMixin, LinearModel): If `normalize=True`, parameter used to scale data to a unit standard deviation. ->>>>>>> 6927fa26aedf48162314b675016180e3356ad557 Examples -------- >>> from sklearn import linear_model From dc4432dc1060d65011c745fa98e90a9093c373f9 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 26 Apr 2021 15:41:02 +0200 Subject: [PATCH 09/17] update whats new --- doc/whats_new/v1.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 3b3884e68e185..2d1b94498425a 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -261,6 +261,8 @@ Changelog Ridge, RidgeClassifier, RidgeCV or RidgeClassifierCV were deprecated in: :pr:`17772` by :user:`Maria Telenczuk ` and :user:`Alexandre Gramfort `. + BayesianRidge, ARDRegression were deprecated in: + :pr:`17746` by :user:`Maria Telenczuk `. - |Fix|: `sample_weight` are now fully taken into account in linear models when `normalize=True` for both feature centering and feature From 2f994a776ca9f4a86703ffb352a3e4742ab532cf Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 26 Apr 2021 16:04:35 +0200 Subject: [PATCH 10/17] updated deprecate normalize in bayes --- sklearn/linear_model/_bayes.py | 60 +++++++++------------------------- 1 file changed, 15 insertions(+), 45 deletions(-) diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index 2c2a677479020..46362b34f0b10 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -8,10 +8,10 @@ from math import log import numpy as np from scipy import linalg -import warnings from ._base import LinearModel, _rescale_data from ..base import RegressorMixin +from ._base import _deprecate_normalize from ..utils.extmath import fast_logdet from scipy.linalg import pinvh from ..utils.validation import _check_sample_weight @@ -70,9 +70,9 @@ class BayesianRidge(RegressorMixin, LinearModel): :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. - .. deprecated:: 0.24 - ``normalize`` was deprecated in version 0.24 and will be removed in - 0.26. + .. deprecated:: 1.0 + ``normalize`` was deprecated in version 1.0 and will be removed in + 1.2. copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. @@ -167,25 +167,10 @@ def fit(self, X, y, sample_weight=None): ------- self : returns an instance of self. """ - if self.normalize != "deprecate": - if not self.normalize: - warnings.warn( - "'normalize' was deprecated in version 0.24 and will be" - " removed in 0.26.", FutureWarning - ) - else: - warnings.warn( - "'normalize' was deprecated in version 0.24 and will be" - " removed in 0.26. If you wish to keep an equivalent" - " behaviour, use Pipeline with a StandardScaler in a" - " preprocessing stage:" - " model = make_pipeline( \n" - " StandardScaler(), \n" - " {type(self).__name__}())", FutureWarning - ) - self._normalize = self.normalize - else: - self._normalize = False + self._normalize = _deprecate_normalize( + self.normalize, default=False, + estimator_name=self.__class__.__name__ + ) if self.n_iter < 1: raise ValueError('n_iter should be greater than or equal to 1.' @@ -420,9 +405,9 @@ class ARDRegression(RegressorMixin, LinearModel): :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. - .. deprecated:: 0.24 - ``normalize`` was deprecated in version 0.24 and will be removed in - 0.26. + .. deprecated:: 1.0 + ``normalize`` was deprecated in version 1.0 and will be removed in + 1.2. copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. @@ -509,25 +494,10 @@ def fit(self, X, y): ------- self : returns an instance of self. """ - if self.normalize != "deprecate": - if not self.normalize: - warnings.warn( - "'normalize' was deprecated in version 0.24 and will be" - " removed in 0.26.", FutureWarning - ) - else: - warnings.warn( - "'normalize' was deprecated in version 0.24 and will be" - " removed in 0.26. If you wish to keep an equivalent" - " behaviour, use Pipeline with a StandardScaler in a" - " preprocessing stage:" - " model = make_pipeline( \n" - " StandardScaler(), \n" - " {type(self).__name__}())", FutureWarning - ) - self._normalize = self.normalize - else: - self._normalize = False + self._normalize = _deprecate_normalize( + self.normalize, default=False, + estimator_name=self.__class__.__name__ + ) X, y = self._validate_data(X, y, dtype=np.float64, y_numeric=True, ensure_min_samples=2) From 990aacee064d51a5b66dc50b26da5c86f8504b31 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 26 Apr 2021 16:11:02 +0200 Subject: [PATCH 11/17] add bayes to common tests --- sklearn/linear_model/tests/test_common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_common.py b/sklearn/linear_model/tests/test_common.py index 96a996d18dac7..f255384be4167 100644 --- a/sklearn/linear_model/tests/test_common.py +++ b/sklearn/linear_model/tests/test_common.py @@ -12,6 +12,8 @@ from sklearn.linear_model import RidgeCV from sklearn.linear_model import RidgeClassifier from sklearn.linear_model import RidgeClassifierCV +from sklearn.linear_model import BayesianRidge +from sklearn.linear_model import ARDRegression from sklearn.utils import check_random_state @@ -24,7 +26,8 @@ ) @pytest.mark.parametrize( "estimator", - [LinearRegression, Ridge, RidgeCV, RidgeClassifier, RidgeClassifierCV] + [LinearRegression, Ridge, RidgeCV, RidgeClassifier, RidgeClassifierCV, + BayesianRidge, ARDRegression] ) # FIXME remove test in 1.2 def test_linear_model_normalize_deprecation_message( From ed9ad8dd47a85e80bf3c683dd283a7dd99628772 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 26 Apr 2021 16:14:34 +0200 Subject: [PATCH 12/17] update default of normalize to deprecated --- sklearn/linear_model/_bayes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index 46362b34f0b10..ed8a8edd4e70b 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -136,7 +136,7 @@ class BayesianRidge(RegressorMixin, LinearModel): def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, lambda_1=1.e-6, lambda_2=1.e-6, alpha_init=None, lambda_init=None, compute_score=False, fit_intercept=True, - normalize='deprecate', copy_X=True, verbose=False): + normalize='deprecated', copy_X=True, verbose=False): self.n_iter = n_iter self.tol = tol self.alpha_1 = alpha_1 @@ -465,7 +465,7 @@ class ARDRegression(RegressorMixin, LinearModel): def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False, threshold_lambda=1.e+4, fit_intercept=True, - normalize='deprecate', copy_X=True, verbose=False): + normalize='deprecated', copy_X=True, verbose=False): self.n_iter = n_iter self.tol = tol self.fit_intercept = fit_intercept From b19040f784574281c1bfaaf8c86f6d8c4d34e9fa Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 26 Apr 2021 16:15:34 +0200 Subject: [PATCH 13/17] moving checking if the deprecated error message is raised to test_common --- sklearn/linear_model/tests/test_bayes.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/sklearn/linear_model/tests/test_bayes.py b/sklearn/linear_model/tests/test_bayes.py index a6ac954a4aceb..529702ff752ac 100644 --- a/sklearn/linear_model/tests/test_bayes.py +++ b/sklearn/linear_model/tests/test_bayes.py @@ -23,30 +23,6 @@ diabetes = datasets.load_diabetes() -# FIXME: 'normalize' to be removed in 0.26 -@pytest.mark.parametrize('BayesModel', [ARDRegression, BayesianRidge]) -@pytest.mark.parametrize( - 'normalize, n_warnings, warning', - [(True, 1, FutureWarning), - (False, 1, FutureWarning), - ("deprecate", 0, None)] -) -def test_assure_warning_when_normalize(BayesModel, - normalize, n_warnings, warning): - # check that we issue a FutureWarning when normalize was set - rng = check_random_state(0) - n_samples = 200 - n_features = 2 - X = rng.randn(n_samples, n_features) - X[X < 0.1] = 0. - y = rng.rand(n_samples) - - model = BayesModel(normalize=normalize) - with pytest.warns(warning) as record: - model.fit(X, y) - assert len(record) == n_warnings - - def test_n_iter(): """Check value of n_iter.""" X = np.array([[1], [2], [6], [8], [10]]) From 525b176ad01edd101ca919fb2203f366d45335d2 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 26 Apr 2021 16:17:20 +0200 Subject: [PATCH 14/17] filter warning when normalize is set --- sklearn/linear_model/tests/test_bayes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/linear_model/tests/test_bayes.py b/sklearn/linear_model/tests/test_bayes.py index 529702ff752ac..a22a0243cdcb7 100644 --- a/sklearn/linear_model/tests/test_bayes.py +++ b/sklearn/linear_model/tests/test_bayes.py @@ -274,6 +274,8 @@ def test_update_sigma(seed): np.testing.assert_allclose(sigma, sigma_woodbury) +# FIXME: 'normalize' to be removed in 1.2 in LinearRegression +@pytest.mark.filterwarnings("ignore:'normalize' was deprecated") def test_ard_regression_predict_normalize_true(): """Check that we can predict with `normalize=True` and `return_std=True`. Non-regression test for: From 77b47936b8fbdd3247882b43280a40ff91b1acd5 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 26 Apr 2021 16:31:51 +0200 Subject: [PATCH 15/17] cleanup --- sklearn/linear_model/_bayes.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index ed8a8edd4e70b..5549d729d35de 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -27,41 +27,52 @@ class BayesianRidge(RegressorMixin, LinearModel): implementation and the optimization of the regularization parameters lambda (precision of the weights) and alpha (precision of the noise). Read more in the :ref:`User Guide `. + Parameters ---------- n_iter : int, default=300 Maximum number of iterations. Should be greater than or equal to 1. + tol : float, default=1e-3 Stop the algorithm if w has converged. + alpha_1 : float, default=1e-6 Hyper-parameter : shape parameter for the Gamma distribution prior over the alpha parameter. + alpha_2 : float, default=1e-6 Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the alpha parameter. + lambda_1 : float, default=1e-6 Hyper-parameter : shape parameter for the Gamma distribution prior over the lambda parameter. + lambda_2 : float, default=1e-6 Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the lambda parameter. + alpha_init : float, default=None Initial value for alpha (precision of the noise). If not set, alpha_init is 1/Var(y). .. versionadded:: 0.22 + lambda_init : float, default=None Initial value for lambda (precision of the weights). If not set, lambda_init is 1. .. versionadded:: 0.22 + compute_score : bool, default=False If True, compute the log marginal likelihood at each iteration of the optimization. + fit_intercept : bool, default=True Whether to calculate the intercept for this model. The intercept is not treated as a probabilistic parameter and thus has no associated variance. If set to False, no intercept will be used in calculations (i.e. data is expected to be centered). + normalize : bool, default=False This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by @@ -78,25 +89,33 @@ class BayesianRidge(RegressorMixin, LinearModel): If True, X will be copied; else, it may be overwritten. verbose : bool, default=False Verbose mode when fitting the model. + + Attributes ---------- coef_ : array-like of shape (n_features,) Coefficients of the regression model (mean of distribution) + intercept_ : float Independent term in decision function. Set to 0.0 if ``fit_intercept = False``. + alpha_ : float Estimated precision of the noise. + lambda_ : float Estimated precision of the weights. + sigma_ : array-like of shape (n_features, n_features) Estimated variance-covariance matrix of the weights + scores_ : array-like of shape (n_iter_+1,) If computed_score is True, value of the log marginal likelihood (to be maximized) at each iteration of the optimization. The array starts with the value of the log marginal likelihood obtained for the initial values of alpha and lambda and ends with the value obtained for the estimated alpha and lambda. + n_iter_ : int The actual number of iterations to reach the stopping criterion. @@ -116,6 +135,7 @@ class BayesianRidge(RegressorMixin, LinearModel): BayesianRidge() >>> clf.predict([[1, 1]]) array([1.]) + Notes ----- There exist several strategies to perform Bayesian ridge regression. This @@ -125,6 +145,7 @@ class BayesianRidge(RegressorMixin, LinearModel): View of Automatic Relevance Determination (Wipf and Nagarajan, 2008) these update rules do not guarantee that the marginal likelihood is increasing between two consecutive iterations of the optimization. + References ---------- D. J. C. MacKay, Bayesian Interpolation, Computation and Neural Systems, From c1b8570a5ace24d6582e411fb3bd6db11f8c4d40 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 26 Apr 2021 17:49:46 +0200 Subject: [PATCH 16/17] add line-spaces --- sklearn/linear_model/_bayes.py | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index 5549d729d35de..d5e9af7ca6394 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -23,9 +23,11 @@ class BayesianRidge(RegressorMixin, LinearModel): """Bayesian ridge regression. + Fit a Bayesian ridge model. See the Notes section for details on this implementation and the optimization of the regularization parameters lambda (precision of the weights) and alpha (precision of the noise). + Read more in the :ref:`User Guide `. Parameters @@ -55,11 +57,13 @@ class BayesianRidge(RegressorMixin, LinearModel): alpha_init : float, default=None Initial value for alpha (precision of the noise). If not set, alpha_init is 1/Var(y). + .. versionadded:: 0.22 lambda_init : float, default=None Initial value for lambda (precision of the weights). If not set, lambda_init is 1. + .. versionadded:: 0.22 compute_score : bool, default=False @@ -150,6 +154,7 @@ class BayesianRidge(RegressorMixin, LinearModel): ---------- D. J. C. MacKay, Bayesian Interpolation, Computation and Neural Systems, Vol. 4, No. 3, 1992. + M. E. Tipping, Sparse Bayesian Learning and the Relevance Vector Machine, Journal of Machine Learning Research, Vol. 1, 2001. """ @@ -174,16 +179,20 @@ def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, def fit(self, X, y, sample_weight=None): """Fit the model + Parameters ---------- X : ndarray of shape (n_samples, n_features) Training data y : ndarray of shape (n_samples,) Target values. Will be cast to X's dtype if necessary + sample_weight : ndarray of shape (n_samples,), default=None Individual weights for each sample + .. versionadded:: 0.20 parameter *sample_weight* support to BayesianRidge. + Returns ------- self : returns an instance of self. @@ -300,18 +309,23 @@ def fit(self, X, y, sample_weight=None): def predict(self, X, return_std=False): """Predict using the linear model. + In addition to the mean of the predictive distribution, also its standard deviation can be returned. + Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) Samples. + return_std : bool, default=False Whether to return the standard deviation of posterior prediction. + Returns ------- y_mean : array-like of shape (n_samples,) Mean of predictive distribution of query points. + y_std : array-like of shape (n_samples,) Standard deviation of predictive distribution of query points. """ @@ -328,6 +342,7 @@ def predict(self, X, return_std=False): def _update_coef_(self, X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_): """Update posterior mean and compute corresponding rmse. + Posterior mean is given by coef_ = scaled_sigma_ * X.T * y where scaled_sigma_ = (lambda_/alpha_ * np.eye(n_features) + np.dot(X.T, X))^-1 @@ -390,34 +405,45 @@ class ARDRegression(RegressorMixin, LinearModel): Also estimate the parameters lambda (precisions of the distributions of the weights) and alpha (precision of the distribution of the noise). The estimation is done by an iterative procedures (Evidence Maximization) + Read more in the :ref:`User Guide `. + Parameters ---------- n_iter : int, default=300 Maximum number of iterations. + tol : float, default=1e-3 Stop the algorithm if w has converged. + alpha_1 : float, default=1e-6 Hyper-parameter : shape parameter for the Gamma distribution prior over the alpha parameter. + alpha_2 : float, default=1e-6 Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the alpha parameter. + lambda_1 : float, default=1e-6 Hyper-parameter : shape parameter for the Gamma distribution prior over the lambda parameter. + lambda_2 : float, default=1e-6 Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the lambda parameter. + compute_score : bool, default=False If True, compute the objective function at each step of the model. + threshold_lambda : float, default=10 000 threshold for removing (pruning) weights with high precision from the computation. + fit_intercept : bool, default=True whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered). + normalize : bool, default=False This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by @@ -432,20 +458,27 @@ class ARDRegression(RegressorMixin, LinearModel): copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. + verbose : bool, default=False Verbose mode when fitting the model. + Attributes ---------- coef_ : array-like of shape (n_features,) Coefficients of the regression model (mean of distribution) + alpha_ : float estimated precision of the noise. + lambda_ : array-like of shape (n_features,) estimated precisions of the weights. + sigma_ : array-like of shape (n_features, n_features) estimated variance-covariance matrix of the weights + scores_ : float if computed, value of the objective function (to be maximized) + intercept_ : float Independent term in decision function. Set to 0.0 if ``fit_intercept = False``. @@ -466,14 +499,17 @@ class ARDRegression(RegressorMixin, LinearModel): ARDRegression() >>> clf.predict([[1, 1]]) array([1.]) + Notes ----- For an example, see :ref:`examples/linear_model/plot_ard.py `. + References ---------- D. J. C. MacKay, Bayesian nonlinear modeling for the prediction competition, ASHRAE Transactions, 1994. + R. Salakhutdinov, Lecture notes on Statistical Machine Learning, http://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=15 Their beta is our ``self.alpha_`` @@ -503,7 +539,9 @@ def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, def fit(self, X, y): """Fit the ARDRegression model according to the given training data and parameters. + Iterative procedure to maximize the evidence + Parameters ---------- X : array-like of shape (n_samples, n_features) @@ -511,6 +549,7 @@ def fit(self, X, y): n_features is the number of features. y : array-like of shape (n_samples,) Target values (integers). Will be cast to X's dtype if necessary + Returns ------- self : returns an instance of self. @@ -639,18 +678,22 @@ def _update_sigma(self, X, alpha_, lambda_, keep_lambda): def predict(self, X, return_std=False): """Predict using the linear model. + In addition to the mean of the predictive distribution, also its standard deviation can be returned. + Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) Samples. return_std : bool, default=False Whether to return the standard deviation of posterior prediction. + Returns ------- y_mean : array-like of shape (n_samples,) Mean of predictive distribution of query points. + y_std : array-like of shape (n_samples,) Standard deviation of predictive distribution of query points. """ From f7afe98f6b9790970096cdeb40de43592b01d405 Mon Sep 17 00:00:00 2001 From: maikia Date: Mon, 26 Apr 2021 17:52:25 +0200 Subject: [PATCH 17/17] cleanup --- sklearn/linear_model/_bayes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index d5e9af7ca6394..2eae8b5c13cee 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -91,6 +91,7 @@ class BayesianRidge(RegressorMixin, LinearModel): copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. + verbose : bool, default=False Verbose mode when fitting the model. @@ -400,6 +401,7 @@ def _log_marginal_likelihood(self, n_samples, n_features, eigen_vals, class ARDRegression(RegressorMixin, LinearModel): """Bayesian ARD regression. + Fit the weights of a regression model, using an ARD prior. The weights of the regression model are assumed to be in Gaussian distributions. Also estimate the parameters lambda (precisions of the distributions of the @@ -686,6 +688,7 @@ def predict(self, X, return_std=False): ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) Samples. + return_std : bool, default=False Whether to return the standard deviation of posterior prediction.