@@ -244,9 +244,15 @@ def fit(self, X, y, sample_weight=None):
244
244
y_numeric = True ,
245
245
)
246
246
dtype = X .dtype
247
+ n_samples , n_features = X .shape
247
248
249
+ sw_sum = n_samples
250
+ y_var = y .var ()
248
251
if sample_weight is not None :
249
252
sample_weight = _check_sample_weight (sample_weight , X , dtype = dtype )
253
+ sw_sum = sample_weight .sum ()
254
+ y_mean = np .average (y , weights = sample_weight )
255
+ y_var = np .average ((y - y_mean ) ** 2 , weights = sample_weight )
250
256
251
257
X , y , X_offset_ , y_offset_ , X_scale_ = _preprocess_data (
252
258
X ,
@@ -262,16 +268,14 @@ def fit(self, X, y, sample_weight=None):
262
268
263
269
self .X_offset_ = X_offset_
264
270
self .X_scale_ = X_scale_
265
- n_samples , n_features = X .shape
266
271
267
272
# Initialization of the values of the parameters
268
273
eps = np .finfo (np .float64 ).eps
269
- # Add `eps` in the denominator to omit division by zero if `np.var(y)`
270
- # is zero
274
+ # Add `eps` in the denominator to omit division by zero
271
275
alpha_ = self .alpha_init
272
276
lambda_ = self .lambda_init
273
277
if alpha_ is None :
274
- alpha_ = 1.0 / (np . var ( y ) + eps )
278
+ alpha_ = 1.0 / (y_var + eps )
275
279
if lambda_ is None :
276
280
lambda_ = 1.0
277
281
@@ -295,21 +299,28 @@ def fit(self, X, y, sample_weight=None):
295
299
# Convergence loop of the bayesian ridge regression
296
300
for iter_ in range (self .max_iter ):
297
301
# update posterior mean coef_ based on alpha_ and lambda_ and
298
- # compute corresponding rmse
299
- coef_ , rmse_ = self ._update_coef_ (
302
+ # compute corresponding sse (sum of squared errors)
303
+ coef_ , sse_ = self ._update_coef_ (
300
304
X , y , n_samples , n_features , XT_y , U , Vh , eigen_vals_ , alpha_ , lambda_
301
305
)
302
306
if self .compute_score :
303
307
# compute the log marginal likelihood
304
308
s = self ._log_marginal_likelihood (
305
- n_samples , n_features , eigen_vals_ , alpha_ , lambda_ , coef_ , rmse_
309
+ n_samples ,
310
+ n_features ,
311
+ sw_sum ,
312
+ eigen_vals_ ,
313
+ alpha_ ,
314
+ lambda_ ,
315
+ coef_ ,
316
+ sse_ ,
306
317
)
307
318
self .scores_ .append (s )
308
319
309
320
# Update alpha and lambda according to (MacKay, 1992)
310
321
gamma_ = np .sum ((alpha_ * eigen_vals_ ) / (lambda_ + alpha_ * eigen_vals_ ))
311
322
lambda_ = (gamma_ + 2 * lambda_1 ) / (np .sum (coef_ ** 2 ) + 2 * lambda_2 )
312
- alpha_ = (n_samples - gamma_ + 2 * alpha_1 ) / (rmse_ + 2 * alpha_2 )
323
+ alpha_ = (sw_sum - gamma_ + 2 * alpha_1 ) / (sse_ + 2 * alpha_2 )
313
324
314
325
# Check for convergence
315
326
if iter_ != 0 and np .sum (np .abs (coef_old_ - coef_ )) < self .tol :
@@ -324,13 +335,20 @@ def fit(self, X, y, sample_weight=None):
324
335
# log marginal likelihood and posterior covariance
325
336
self .alpha_ = alpha_
326
337
self .lambda_ = lambda_
327
- self .coef_ , rmse_ = self ._update_coef_ (
338
+ self .coef_ , sse_ = self ._update_coef_ (
328
339
X , y , n_samples , n_features , XT_y , U , Vh , eigen_vals_ , alpha_ , lambda_
329
340
)
330
341
if self .compute_score :
331
342
# compute the log marginal likelihood
332
343
s = self ._log_marginal_likelihood (
333
- n_samples , n_features , eigen_vals_ , alpha_ , lambda_ , coef_ , rmse_
344
+ n_samples ,
345
+ n_features ,
346
+ sw_sum ,
347
+ eigen_vals_ ,
348
+ alpha_ ,
349
+ lambda_ ,
350
+ coef_ ,
351
+ sse_ ,
334
352
)
335
353
self .scores_ .append (s )
336
354
self .scores_ = np .array (self .scores_ )
@@ -378,7 +396,7 @@ def predict(self, X, return_std=False):
378
396
def _update_coef_ (
379
397
self , X , y , n_samples , n_features , XT_y , U , Vh , eigen_vals_ , alpha_ , lambda_
380
398
):
381
- """Update posterior mean and compute corresponding rmse .
399
+ """Update posterior mean and compute corresponding sse (sum of squared errors) .
382
400
383
401
Posterior mean is given by coef_ = scaled_sigma_ * X.T * y where
384
402
scaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)
@@ -394,12 +412,14 @@ def _update_coef_(
394
412
[X .T , U / (eigen_vals_ + lambda_ / alpha_ )[None , :], U .T , y ]
395
413
)
396
414
397
- rmse_ = np .sum ((y - np .dot (X , coef_ )) ** 2 )
415
+ # Note: we do not need to explicitly use the weights in this sum because
416
+ # y and X were preprocessed by _rescale_data to handle the weights.
417
+ sse_ = np .sum ((y - np .dot (X , coef_ )) ** 2 )
398
418
399
- return coef_ , rmse_
419
+ return coef_ , sse_
400
420
401
421
def _log_marginal_likelihood (
402
- self , n_samples , n_features , eigen_vals , alpha_ , lambda_ , coef , rmse
422
+ self , n_samples , n_features , sw_sum , eigen_vals , alpha_ , lambda_ , coef , sse
403
423
):
404
424
"""Log marginal likelihood."""
405
425
alpha_1 = self .alpha_1
@@ -421,11 +441,11 @@ def _log_marginal_likelihood(
421
441
score += alpha_1 * log (alpha_ ) - alpha_2 * alpha_
422
442
score += 0.5 * (
423
443
n_features * log (lambda_ )
424
- + n_samples * log (alpha_ )
425
- - alpha_ * rmse
444
+ + sw_sum * log (alpha_ )
445
+ - alpha_ * sse
426
446
- lambda_ * np .sum (coef ** 2 )
427
447
+ logdet_sigma
428
- - n_samples * log (2 * np .pi )
448
+ - sw_sum * log (2 * np .pi )
429
449
)
430
450
431
451
return score
@@ -684,14 +704,12 @@ def update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_):
684
704
coef_ = update_coeff (X , y , coef_ , alpha_ , keep_lambda , sigma_ )
685
705
686
706
# Update alpha and lambda
687
- rmse_ = np .sum ((y - np .dot (X , coef_ )) ** 2 )
707
+ sse_ = np .sum ((y - np .dot (X , coef_ )) ** 2 )
688
708
gamma_ = 1.0 - lambda_ [keep_lambda ] * np .diag (sigma_ )
689
709
lambda_ [keep_lambda ] = (gamma_ + 2.0 * lambda_1 ) / (
690
710
(coef_ [keep_lambda ]) ** 2 + 2.0 * lambda_2
691
711
)
692
- alpha_ = (n_samples - gamma_ .sum () + 2.0 * alpha_1 ) / (
693
- rmse_ + 2.0 * alpha_2
694
- )
712
+ alpha_ = (n_samples - gamma_ .sum () + 2.0 * alpha_1 ) / (sse_ + 2.0 * alpha_2 )
695
713
696
714
# Prune the weights with a precision over a threshold
697
715
keep_lambda = lambda_ < self .threshold_lambda
@@ -706,7 +724,7 @@ def update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_):
706
724
+ n_samples * log (alpha_ )
707
725
+ np .sum (np .log (lambda_ ))
708
726
)
709
- s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_ ** 2 ).sum ())
727
+ s -= 0.5 * (alpha_ * sse_ + (lambda_ * coef_ ** 2 ).sum ())
710
728
self .scores_ .append (s )
711
729
712
730
# Check for convergence
0 commit comments