@@ -91,6 +91,9 @@ class BayesianRidge(LinearModel, RegressorMixin):
91
91
lambda_ : float
92
92
estimated precision of the weights.
93
93
94
+ sigma_ : array, shape = (n_features, n_features)
95
+ estimated variance-covariance matrix of the weights
96
+
94
97
scores_ : float
95
98
if computed, value of the objective function (to be maximized)
96
99
@@ -109,6 +112,16 @@ class BayesianRidge(LinearModel, RegressorMixin):
109
112
Notes
110
113
-----
111
114
See examples/linear_model/plot_bayesian_ridge.py for an example.
115
+
116
+ References
117
+ ----------
118
+ D. J. C. MacKay, Bayesian Interpolation, Computation and Neural Systems,
119
+ Vol. 4, No. 3, 1992.
120
+
121
+ R. Salakhutdinov, Lecture notes on Statistical Machine Learning,
122
+ http://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=15
123
+ Their beta is our self.alpha_
124
+ Their alpha is our self.lambda_
112
125
"""
113
126
114
127
def __init__ (self , n_iter = 300 , tol = 1.e-3 , alpha_1 = 1.e-6 , alpha_2 = 1.e-6 ,
@@ -142,8 +155,10 @@ def fit(self, X, y):
142
155
self : returns an instance of self.
143
156
"""
144
157
X , y = check_X_y (X , y , dtype = np .float64 , y_numeric = True )
145
- X , y , X_offset , y_offset , X_scale = self ._preprocess_data (
158
+ X , y , X_offset_ , y_offset_ , X_scale_ = self ._preprocess_data (
146
159
X , y , self .fit_intercept , self .normalize , self .copy_X )
160
+ self .X_offset_ = X_offset_
161
+ self .X_scale_ = X_scale_
147
162
n_samples , n_features = X .shape
148
163
149
164
# Initialization of the values of the parameters
@@ -171,7 +186,8 @@ def fit(self, X, y):
171
186
# coef_ = sigma_^-1 * XT * y
172
187
if n_samples > n_features :
173
188
coef_ = np .dot (Vh .T ,
174
- Vh / (eigen_vals_ + lambda_ / alpha_ )[:, None ])
189
+ Vh / (eigen_vals_ +
190
+ lambda_ / alpha_ )[:, np .newaxis ])
175
191
coef_ = np .dot (coef_ , XT_y )
176
192
if self .compute_score :
177
193
logdet_sigma_ = - np .sum (
@@ -216,10 +232,45 @@ def fit(self, X, y):
216
232
self .alpha_ = alpha_
217
233
self .lambda_ = lambda_
218
234
self .coef_ = coef_
235
+ sigma_ = np .dot (Vh .T ,
236
+ Vh / (eigen_vals_ + lambda_ / alpha_ )[:, np .newaxis ])
237
+ self .sigma_ = (1. / alpha_ ) * sigma_
219
238
220
- self ._set_intercept (X_offset , y_offset , X_scale )
239
+ self ._set_intercept (X_offset_ , y_offset_ , X_scale_ )
221
240
return self
222
241
242
+ def predict (self , X , return_std = False ):
243
+ """Predict using the linear model.
244
+
245
+ In addition to the mean of the predictive distribution, also its
246
+ standard deviation can be returned.
247
+
248
+ Parameters
249
+ ----------
250
+ X : {array-like, sparse matrix}, shape = (n_samples, n_features)
251
+ Samples.
252
+
253
+ return_std : boolean, optional
254
+ Whether to return the standard deviation of posterior prediction.
255
+
256
+ Returns
257
+ -------
258
+ y_mean : array, shape = (n_samples,)
259
+ Mean of predictive distribution of query points.
260
+
261
+ y_std : array, shape = (n_samples,)
262
+ Standard deviation of predictive distribution of query points.
263
+ """
264
+ y_mean = self ._decision_function (X )
265
+ if return_std is False :
266
+ return y_mean
267
+ else :
268
+ if self .normalize :
269
+ X = (X - self .X_offset_ ) / self .X_scale_
270
+ sigmas_squared_data = (np .dot (X , self .sigma_ ) * X ).sum (axis = 1 )
271
+ y_std = np .sqrt (sigmas_squared_data + (1. / self .alpha_ ))
272
+ return y_mean , y_std
273
+
223
274
224
275
###############################################################################
225
276
# ARD (Automatic Relevance Determination) regression
@@ -323,6 +374,19 @@ class ARDRegression(LinearModel, RegressorMixin):
323
374
Notes
324
375
--------
325
376
See examples/linear_model/plot_ard.py for an example.
377
+
378
+ References
379
+ ----------
380
+ D. J. C. MacKay, Bayesian nonlinear modeling for the prediction
381
+ competition, ASHRAE Transactions, 1994.
382
+
383
+ R. Salakhutdinov, Lecture notes on Statistical Machine Learning,
384
+ http://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=15
385
+ Their beta is our self.alpha_
386
+ Their alpha is our self.lambda_
387
+ ARD is a little different than the slide: only dimensions/features for
388
+ which self.lambda_ < self.threshold_lambda are kept and the rest are
389
+ discarded.
326
390
"""
327
391
328
392
def __init__ (self , n_iter = 300 , tol = 1.e-3 , alpha_1 = 1.e-6 , alpha_2 = 1.e-6 ,
@@ -365,7 +429,7 @@ def fit(self, X, y):
365
429
n_samples , n_features = X .shape
366
430
coef_ = np .zeros (n_features )
367
431
368
- X , y , X_offset , y_offset , X_scale = self ._preprocess_data (
432
+ X , y , X_offset_ , y_offset_ , X_scale_ = self ._preprocess_data (
369
433
X , y , self .fit_intercept , self .normalize , self .copy_X )
370
434
371
435
# Launch the convergence loop
@@ -417,7 +481,7 @@ def fit(self, X, y):
417
481
s = (lambda_1 * np .log (lambda_ ) - lambda_2 * lambda_ ).sum ()
418
482
s += alpha_1 * log (alpha_ ) - alpha_2 * alpha_
419
483
s += 0.5 * (fast_logdet (sigma_ ) + n_samples * log (alpha_ ) +
420
- np .sum (np .log (lambda_ )))
484
+ np .sum (np .log (lambda_ )))
421
485
s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_ ** 2 ).sum ())
422
486
self .scores_ .append (s )
423
487
@@ -432,5 +496,38 @@ def fit(self, X, y):
432
496
self .alpha_ = alpha_
433
497
self .sigma_ = sigma_
434
498
self .lambda_ = lambda_
435
- self ._set_intercept (X_offset , y_offset , X_scale )
499
+ self ._set_intercept (X_offset_ , y_offset_ , X_scale_ )
436
500
return self
501
+
502
+ def predict (self , X , return_std = False ):
503
+ """Predict using the linear model.
504
+
505
+ In addition to the mean of the predictive distribution, also its
506
+ standard deviation can be returned.
507
+
508
+ Parameters
509
+ ----------
510
+ X : {array-like, sparse matrix}, shape = (n_samples, n_features)
511
+ Samples.
512
+
513
+ return_std : boolean, optional
514
+ Whether to return the standard deviation of posterior prediction.
515
+
516
+ Returns
517
+ -------
518
+ y_mean : array, shape = (n_samples,)
519
+ Mean of predictive distribution of query points.
520
+
521
+ y_std : array, shape = (n_samples,)
522
+ Standard deviation of predictive distribution of query points.
523
+ """
524
+ y_mean = self ._decision_function (X )
525
+ if return_std is False :
526
+ return y_mean
527
+ else :
528
+ if self .normalize :
529
+ X = (X - self .X_offset_ ) / self .X_scale_
530
+ X = X [:, self .lambda_ < self .threshold_lambda ]
531
+ sigmas_squared_data = (np .dot (X , self .sigma_ ) * X ).sum (axis = 1 )
532
+ y_std = np .sqrt (sigmas_squared_data + (1. / self .alpha_ ))
533
+ return y_mean , y_std
0 commit comments