8000 FIX Use cho_solve when return_std=True for GaussianProcessRegressor (… · scikit-learn/scikit-learn@7b1c9af · GitHub
[go: up one dir, main page]

Skip to content

Commit 7b1c9af

Browse files
authored
FIX Use cho_solve when return_std=True for GaussianProcessRegressor (#19939)
1 parent 6a562d3 commit 7b1c9af

File tree

3 files changed

+49
-39
lines changed

3 files changed

+49
-39
lines changed

doc/whats_new/v0.24.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,13 @@ Changelog
5252
:mod:`sklearn.gaussian_process`
5353
...............................
5454

55+
- |Fix| Avoid explicitly forming inverse covariance matrix in
56+
:class:`gaussian_process.GaussianProcessRegressor` when set to output
57+
standard deviation. With certain covariance matrices this inverse is unstable
58+
to compute explicitly. Calling Cholesky solver mitigates this issue in
59+
computation.
60+
:pr:`19939` by :user:`Ian Halvic <iwhalvic>`.
61+
5562
- |Fix| Avoid division by zero when scaling constant target in
5663
:class:`gaussian_process.GaussianProcessRegressor`. It was due to a std. dev.
5764
equal to 0. Now, such case is detected and the std. dev. is affected to 1

sklearn/gaussian_process/_gpr.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from operator import itemgetter
99

1010
import numpy as np
11-
from scipy.linalg import cholesky, cho_solve, solve_triangular
11+
from scipy.linalg import cholesky, cho_solve
1212
import scipy.optimize
1313

1414
from ..base import BaseEstimator, RegressorMixin, clone
@@ -270,8 +270,6 @@ def obj_func(theta, eval_gradient=True):
270270
K[np.diag_indices_from(K)] += self.alpha
271271
try:
272272
self.L_ = cholesky(K, lower=True) # Line 2
273-
# self.L_ changed, self._K_inv needs to be recomputed
274-
self._K_inv = None
275273
except np.linalg.LinAlgError as exc:
276274
exc.args = ("The kernel, %s, is not returning a "
277275
"positive definite matrix. Try gradually "
@@ -345,31 +343,27 @@ def predict(self, X, return_std=False, return_cov=False):
345343
else: # Predict based on GP posterior
346344
K_trans = self.kernel_(X, self.X_train_)
347345
y_mean = K_trans.dot(self.alpha_) # Line 4 (y_mean = f_star)
348-
349346
# undo normalisation
350347
y_mean = self._y_train_std * y_mean + self._y_train_mean
351348

352349
if return_cov:
353-
v = cho_solve((self.L_, True), K_trans.T) # Line 5
354-
y_cov = self.kernel_(X) - K_trans.dot(v) # Line 6
350+
# Solve K @ V = K_trans.T
351+
V = cho_solve((self.L_, True), K_trans.T) # Line 5
352+
y_cov = self.kernel_(X) - K_trans.dot(V) # Line 6
355353

356354
# undo normalisation
357355
y_cov = y_cov * self._y_train_std**2
358356

359357
return y_mean, y_cov
360358
elif return_std:
361-
# cache result of K_inv computation
362-
if self._K_inv is None:
363-
# compute inverse K_inv of K based on its Cholesky
364-
# decomposition L and its inverse L_inv
365-
L_inv = solve_triangular(self.L_.T,
366-
np.eye(self.L_.shape[0]))
367-
self._K_inv = L_inv.dot(L_inv.T)
359+
# Solve K @ V = K_trans.T
360+
V = cho_solve((self.L_, True), K_trans.T) # Line 5
368361

369362
# Compute variance of predictive distribution
363+
# Use einsum to avoid explicitly forming the large matrix
364+
# K_trans @ V just to extract its diagonal afterward.
370365
y_var = self.kernel_.diag(X)
371-
y_var -= np.einsum("ij,ij->i",
372-
np.dot(K_trans, self._K_inv), K_trans)
366+
y_var -= np.einsum("ij,ji->i", K_trans, V)
373367

374368
# Check if any of the variances is negative because of
375369
# numerical issues. If yes: set the variance to 0.

sklearn/gaussian_process/tests/test_gpr.py

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@
2020
from sklearn.gaussian_process.tests._mini_sequence_kernel import MiniSeqKernel
2121
from sklearn.exceptions import ConvergenceWarning
2222

23-
from sklearn.utils._testing \
24-
import (assert_array_less,
25-
assert_almost_equal, assert_array_almost_equal,
26-
assert_array_equal, assert_allclose)
23+
from sklearn.utils._testing import (
24+
assert_array_less,
25+
assert_almost_equal,
26+
assert_array_almost_equal,
27+
assert_allclose
28+
)
2729

2830

2931
def f(x):
@@ -185,7 +187,8 @@ def test_no_optimizer():
185187

186188

187189
@pytest.mark.parametrize('kernel', kernels)
188-
def test_predict_cov_vs_std(kernel):
190+
@pytest.mark.parametrize("target", [y, np.ones(X.shape[0], dtype=np.float64)])
191+
def test_predict_cov_vs_std(kernel, target):
189192
if sys.maxsize <= 2 ** 32 and sys.version_info[:2] == (3, 6):
190193
pytest.xfail("This test may fail on 32bit Py3.6")
191194

@@ -455,25 +458,6 @@ def test_no_fit_default_predict():
455458
assert_array_almost_equal(y_cov1, y_cov2)
456459

457460

458-
@pytest.mark.parametrize('kernel', kernels)
459-
def test_K_inv_reset(kernel):
460-
y2 = f(X2).ravel()
461-
462-
# Test that self._K_inv is reset after a new fit
463-
gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
464-
assert hasattr(gpr, '_K_inv')
465-
assert gpr._K_inv is None
466-
gpr.predict(X, return_std=True)
467-
assert gpr._K_inv is not None
468-
gpr.fit(X2, y2)
469-
assert gpr._K_inv is None
470-
gpr.predict(X2, return_std=True)
471-
gpr2 = GaussianProcessRegressor(kernel=kernel).fit(X2, y2)
472-
gpr2.predict(X2, return_std=True)
473-
# the value of K_inv should be independent of the first fit
474-
assert_array_equal(gpr._K_inv, gpr2._K_inv)
475-
476-
477461
def test_warning_bounds():
478462
kernel = RBF(length_scale_bounds=[1e-5, 1e-3])
479463
gpr = GaussianProcessRegressor(kernel=kernel)
@@ -569,3 +553,28 @@ def test_constant_target(kernel):
569553
assert_allclose(y_pred, y_constant)
570554
# set atol because we compare to zero
571555
assert_allclose(np.diag(y_cov), 0., atol=1e-9)
556+
557+
558+
def test_gpr_consistency_std_cov_non_invertible_kernel():
559+
"""Check the consistency between the returned std. dev. and the covariance.
560+
Non-regression test for:
561+
https://github.com/scikit-learn/scikit-learn/issues/19936
562+
Inconsistencies were observed when the kernel cannot be inverted (or
563+
numerically stable).
564+
"""
565+
kernel = (C(8.98576054e+05, (1e-12, 1e12)) *
566+
RBF([5.91326520e+02, 1.32584051e+03], (1e-12, 1e12)) +
567+
WhiteKernel(noise_level=1e-5))
568+
gpr = GaussianProcessRegressor(kernel=kernel, alpha=0, optimizer=None)
569+
X_train = np.array([[0., 0.], [1.54919334, -0.77459667], [-< 770E /span>1.54919334, 0.],
570+
[0., -1.54919334], [0.77459667, 0.77459667],
571+
[-0.77459667, 1.54919334]])
572+
y_train = np.array([[-2.14882017e-10], [-4.66975823e+00], [4.01823986e+00],
573+
[-1.30303674e+00], [-1.35760156e+00],
574+
[3.31215668e+00]])
575+
gpr.fit(X_train, y_train)
576+
X_test = np.array([[-1.93649167, -1.93649167], [1.93649167, -1.93649167],
577+
[-1.93649167, 1.93649167], [1.93649167, 1.93649167]])
578+
pred1, std = gpr.predict(X_test, return_std=True)
579+
pred2, cov = gpr.predict(X_test, return_cov=True)
580+
assert_allclose(std, np.sqrt(np.diagonal(cov)), rtol=1e-5)

0 commit comments

Comments
 (0)
0