8000 FIX Sample weight in BayesianRidge by antoinebaker · Pull Request #30644 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

FIX Sample weight in BayesianRidge #30644

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
- The update and initialization of the hyperparameters now properly handle
sample weights in :class:`linear_model.BayesianRidge`.
By :user:`Antoine Baker <antoinebaker>`.
62 changes: 40 additions & 22 deletions sklearn/linear_model/_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,15 @@ def fit(self, X, y, sample_weight=None):
y_numeric=True,
)
dtype = X.dtype
n_samples, n_features = X.shape

sw_sum = n_samples
y_var = y.var()
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X, dtype=dtype)
sw_sum = sample_weight.sum()
y_mean = np.average(y, weights=sample_weight)
y_var = np.average((y - y_mean) ** 2, weights=sample_weight)

X, y, X_offset_, y_offset_, X_scale_ = _preprocess_data(
X,
Expand All @@ -262,16 +268,14 @@ def fit(self, X, y, sample_weight=None):

self.X_offset_ = X_offset_
self.X_scale_ = X_scale_
n_samples, n_features = X.shape

# Initialization of the values of the parameters
eps = np.finfo(np.float64).eps
# Add `eps` in the denominator to omit division by zero if `np.var(y)`
# is zero
# Add `eps` in the denominator to omit division by zero
alpha_ = self.alpha_init
lambda_ = self.lambda_init
if alpha_ is None:
alpha_ = 1.0 / (np.var(y) + eps)
alpha_ = 1.0 / (y_var + eps)
if lambda_ is None:
lambda_ = 1.0

Expand All @@ -295,21 +299,28 @@ def fit(self, X, y, sample_weight=None):
# Convergence loop of the bayesian ridge regression
for iter_ in range(self.max_iter):
# update posterior mean coef_ based on alpha_ and lambda_ and
# compute corresponding rmse
coef_, rmse_ = self._update_coef_(
# compute corresponding sse (sum of squared errors)
coef_, sse_ = self._update_coef_(
X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_
)
if self.compute_score:
# compute the log marginal likelihood
s = self._log_marginal_likelihood(
n_samples, n_features, eigen_vals_, alpha_, lambda_, coef_, rmse_
n_samples,
n_features,
sw_sum,
eigen_vals_,
alpha_,
lambda_,
coef_,
sse_,
)
self.scores_.append(s)

# Update alpha and lambda according to (MacKay, 1992)
gamma_ = np.sum((alpha_ * eigen_vals_) / (lambda_ + alpha_ * eigen_vals_))
lambda_ = (gamma_ + 2 * lambda_1) / (np.sum(coef_**2) + 2 * lambda_2)
alpha_ = (n_samples - gamma_ + 2 * alpha_1) / (rmse_ + 2 * alpha_2)
alpha_ = (sw_sum - gamma_ + 2 * alpha_1) / (sse_ + 2 * alpha_2)

# Check for convergence
if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:
Expand All @@ -324,13 +335,20 @@ def fit(self, X, y, sample_weight=None):
# log marginal likelihood and posterior covariance
self.alpha_ = alpha_
self.lambda_ = lambda_
self.coef_, rmse_ = self._update_coef_(
self.coef_, sse_ = self._update_coef_(
X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_
)
if self.compute_score:
# compute the log marginal likelihood
s = self._log_marginal_likelihood(
n_samples, n_features, eigen_vals_, alpha_, lambda_, coef_, rmse_
n_samples,
n_features,
sw_sum,
eigen_vals_,
alpha_,
lambda_,
coef_,
sse_,
)
self.scores_.append(s)
self.scores_ = np.array(self.scores_)
Expand Down Expand Up @@ -378,7 +396,7 @@ def predict(self, X, return_std=False):
def _update_coef_(
self, X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_
):
"""Update posterior mean and compute corresponding rmse.
"""Update posterior mean and compute corresponding sse (sum of squared errors).

Posterior mean is given by coef_ = scaled_sigma_ * X.T * y where
scaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)
Expand All @@ -394,12 +412,14 @@ def _update_coef_(
[X.T, U / (eigen_vals_ + lambda_ / alpha_)[None, :], U.T, y]
)

rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)
# Note: we do not need to explicitly use the weights in this sum because
# y and X were preprocessed by _rescale_data to handle the weights.
sse_ = np.sum((y - np.dot(X, coef_)) ** 2)

return coef_, rmse_
return coef_, sse_

def _log_marginal_likelihood(
self, n_samples, n_features, eigen_vals, alpha_, lambda_, coef, rmse
self, n_samples, n_features, sw_sum, eigen_vals, alpha_, lambda_, coef, sse
):
"""Log marginal likelihood."""
alpha_1 = self.alpha_1
Expand All @@ -421,11 +441,11 @@ def _log_marginal_likelihood(
score += alpha_1 * log(alpha_) - alpha_2 * alpha_
score += 0.5 * (
n_features * log(lambda_)
+ n_samples * log(alpha_)
- alpha_ * rmse
+ sw_sum * log(alpha_)
- alpha_ * sse
- lambda_ * np.sum(coef**2)
+ logdet_sigma
- n_samples * log(2 * np.pi)
- sw_sum * log(2 * np.pi)
)

return score
Expand Down Expand Up @@ -684,14 +704,12 @@ def update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_):
coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)

# Update alpha and lambda
rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)
sse_ = np.sum((y - np.dot(X, coef_)) ** 2)
gamma_ = 1.0 - lambda_[keep_lambda] * np.diag(sigma_)
lambda_[keep_lambda] = (gamma_ + 2.0 * lambda_1) / (
(coef_[keep_lambda]) ** 2 + 2.0 * lambda_2
)
alpha_ = (n_samples - gamma_.sum() + 2.0 * alpha_1) / (
rmse_ + 2.0 * alpha_2
)
alpha_ = (n_samples - gamma_.sum() + 2.0 * alpha_1) / (sse_ + 2.0 * alpha_2)

# Prune the weights with a precision over a threshold
keep_lambda = lambda_ < self.threshold_lambda
Expand All @@ -706,7 +724,7 @@ def update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_):
+ n_samples * log(alpha_)
+ np.sum(np.log(lambda_))
)
s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_**2).sum())
s -= 0.5 * (alpha_ * sse_ + (lambda_ * coef_**2).sum())
self.scores_.append(s)

# Check for convergence
Expand Down
9 changes: 0 additions & 9 deletions sklearn/utils/_test_common/instance_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,15 +836,6 @@ def _yield_instances_for_check(check, estimator_orig):
"sample_weight is not equivalent to removing/repeating samples."
),
},
BayesianRidge: {
# TODO: fix sample_weight handling of this estimator, see meta-issue #16298
"check_sample_weight_equivalence_on_dense_data": (
"sample_weight is not equivalent to removing/repeating samples."
),
"check_sample_weight_equivalence_on_sparse_data": (
"sample_weight is not equivalent to removing/repeating samples."
),
},
BernoulliRBM: {
"check_methods_subset_invariance": ("fails for the decision_function method"),
"check_methods_sample_order_invariance": ("fails for the score_samples method"),
Expand Down
Loading
0