10000 FIX Sample weight in BayesianRidge (#30644) · scikit-learn/scikit-learn@9a53acf · GitHub
[go: up one dir, main page]

Skip to content

Commit 9a53acf

Browse files
authored
FIX Sample weight in BayesianRidge (#30644)
1 parent 311bf6b commit 9a53acf

File tree

3 files changed

+43
-31
lines changed

3 files changed

+43
-31
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
- The update and initialization of the hyperparameters now properly handle
2+
sample weights in :class:`linear_model.BayesianRidge`.
3+
By :user:`Antoine Baker <antoinebaker>`.

sklearn/linear_model/_bayes.py

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -244,9 +244,15 @@ def fit(self, X, y, sample_weight=None):
244244
y_numeric=True,
245245
)
246246
dtype = X.dtype
247+
n_samples, n_features = X.shape
247248

249+
sw_sum = n_samples
250+
y_var = y.var()
248251
if sample_weight is not None:
249252
sample_weight = _check_sample_weight(sample_weight, X, dtype=dtype)
253+
sw_sum = sample_weight.sum()
254+
y_mean = np.average(y, weights=sample_weight)
255+
y_var = np.average((y - y_mean) ** 2, weights=sample_weight)
250256

251257
X, y, X_offset_, y_offset_, X_scale_ = _preprocess_data(
252258
X,
@@ -262,16 +268,14 @@ def fit(self, X, y, sample_weight=None):
262268

263269
self.X_offset_ = X_offset_
264270
self.X_scale_ = X_scale_
265-
n_samples, n_features = X.shape
266271

267272
# Initialization of the values of the parameters
268273
eps = np.finfo(np.float64).eps
269-
# Add `eps` in the denominator to omit division by zero if `np.var(y)`
270-
# is zero
274+
# Add `eps` in the denominator to omit division by zero
271275
alpha_ = self.alpha_init
272276
lambda_ = self.lambda_init
273277
if alpha_ is None:
274-
alpha_ = 1.0 / (np.var(y) + eps)
278+
alpha_ = 1.0 / (y_var + eps)
275279
if lambda_ is None:
276280
lambda_ = 1.0
277281

@@ -295,21 +299,28 @@ def fit(self, X, y, sample_weight=None):
295299
# Convergence loop of the bayesian ridge regression
296300
for iter_ in range(self.max_iter):
297301
# update posterior mean coef_ based on alpha_ and lambda_ and
298-
# compute corresponding rmse
299-
coef_, rmse_ = self._update_coef_(
302+
# compute corresponding sse (sum of squared errors)
303+
coef_, sse_ = self._update_coef_(
300304
X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_
301305
)
302306
if self.compute_score:
303307
# compute the log marginal likelihood
304308
s = self._log_marginal_likelihood(
305-
n_samples, n_features, eigen_vals_, alpha_, lambda_, coef_, rmse_
309+
n_samples,
310+
n_features,
311+
sw_sum,
312+
eigen_vals_,
313+
alpha_,
314+
lambda_,
315+
coef_,
316+
sse_,
306317
)
307318
self.scores_.append(s)
308319

309320
# Update alpha and lambda according to (MacKay, 1992)
310321
gamma_ = np.sum((alpha_ * eigen_vals_) / (lambda_ + alpha_ * eigen_vals_))
311322
lambda_ = (gamma_ + 2 * lambda_1) / (np.sum(coef_**2) + 2 * lambda_2)
312-
alpha_ = (n_samples - gamma_ + 2 * alpha_1) / (rmse_ + 2 * alpha_2)
323+
alpha_ = (sw_sum - gamma_ + 2 * alpha_1) / (sse_ + 2 * alpha_2)
313324

314325
# Check for convergence
315326
if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:
@@ -324,13 +335,20 @@ def fit(self, X, y, sample_weight=None):
324335
# log marginal likelihood and posterior covariance
325336
self.alpha_ = alpha_
326337
self.lambda_ = lambda_
327-
self.coef_, rmse_ = self._update_coef_(
338+
self.coef_, sse_ = self._update_coef_(
328339
X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_
329340
)
330341
if self.compute_score:
331342
# compute the log marginal likelihood
332343
s = self._log_marginal_likelihood(
333-
n_samples, n_features, eigen_vals_, alpha_, lambda_, coef_, rmse_
344+
n_samples,
345+
n_features,
346+
sw_sum,
347+
eigen_vals_,
348+
alpha_,
349+
lambda_,
350+
coef_,
351+
sse_,
334352
)
335353
self.scores_.append(s)
336354
self.scores_ = np.array(self.scores_)
@@ -378,7 +396,7 @@ def predict(self, X, return_std=False):
378396
def _update_coef_(
379397
self, X, y, n_samples, n_features, XT_y, U, Vh, eigen_vals_, alpha_, lambda_
380398
):
381-
"""Update posterior mean and compute corresponding rmse.
399+
"""Update posterior mean and compute corresponding sse (sum of squared errors).
382400
383401
Posterior mean is given by coef_ = scaled_sigma_ * X.T * y where
384402
scaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)
@@ -394,12 +412,14 @@ def _update_coef_(
394412
[X.T, U / (eigen_vals_ + lambda_ / alpha_)[None, :], U.T, y]
395413
)
396414

397-
rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)
415+
# Note: we do not need to explicitly use the weights in this sum because
416+
# y and X were preprocessed by _rescale_data to handle the weights.
417+
sse_ = np.sum((y - np.dot(X, coef_)) ** 2)
398418

399-
return coef_, rmse_
419+
return coef_, sse_
400420

401421
def _log_marginal_likelihood(
402-
self, n_samples, n_features, eigen_vals, alpha_, lambda_, coef, rmse
422+
self, n_samples, n_features, sw_sum, eigen_vals, alpha_, lambda_, coef, sse
403423
):
404424
"""Log marginal likelihood."""
405425
alpha_1 = self.alpha_1
@@ -421,11 +441,11 @@ def _log_marginal_likelihood(
421441
score += alpha_1 * log(alpha_) - alpha_2 * alpha_
422442
score += 0.5 * (
423443
n_features * log(lambda_)
424-
+ n_samples * log(alpha_)
425-
- alpha_ * rmse
444+
+ sw_sum * log(alpha_)
445+
- alpha_ * sse
426446
- lambda_ * np.sum(coef**2)
427447
+ logdet_sigma
428-
- n_samples * log(2 * np.pi)
448+
- sw_sum * log(2 * np.pi)
429449
)
430450

431451
return score
@@ -684,14 +704,12 @@ def update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_):
684704
coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)
685705

686706
# Update alpha and lambda
687-
rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)
707+
sse_ = np.sum((y - np.dot(X, coef_)) ** 2)
688708
gamma_ = 1.0 - lambda_[keep_lambda] * np.diag(sigma_)
689709
lambda_[keep_lambda] = (gamma_ + 2.0 * lambda_1) / (
690710
(coef_[keep_lambda]) ** 2 + 2.0 * lambda_2
691711
)
692-
alpha_ = (n_samples - gamma_.sum() + 2.0 * alpha_1) / (
693-
rmse_ + 2.0 * alpha_2
694-
)
712+
alpha_ = (n_samples - gamma_.sum() + 2.0 * alpha_1) / (sse_ + 2.0 * alpha_2)
695713

696714
# Prune the weights with a precision over a threshold
697715
keep_lambda = lambda_ < self.threshold_lambda
@@ -706,7 +724,7 @@ def update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_):
706724
+ n_samples * log(alpha_)
707725
+ np.sum(np.log(lambda_))
708726
)
709-
s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_**2).sum())
727+
s -= 0.5 * (alpha_ * sse_ + (lambda_ * coef_**2).sum())
710728
self.scores_.append(s)
711729

712730
# Check for convergence

sklearn/utils/_test_common/instance_generator.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -836,15 +836,6 @@ def _yield_instances_for_check(check, estimator_orig):
836836
"sample_weight is not equivalent to removing/repeating samples."
837837
),
838838
},
839-
BayesianRidge: {
840-
# TODO: fix sample_weight handling of this estimator, see meta-issue #16298
841-
"check_sample_weight_equivalence_on_dense_data": (
842-
"sample_weight is not equivalent to removing/repeating samples."
843-
),
844-
"check_sample_weight_equivalence_on_sparse_data": (
845-
"sample_weight is not equivalent to removing/repeating samples."
846-
),
847-
},
848839
BernoulliRBM: {
849840
"check_methods_subset_invariance": ("fails for the decision_function method"),
850841
"check_methods_sample_order_invariance": ("fails for the score_samples method"),

0 commit comments

Comments
 (0)
0