|
11 | 11 | In both cases, the kernel's parameters are estimated using the maximum
|
12 | 12 | likelihood principle.
|
13 | 13 |
|
14 |
| -The figures illustrate the interpolating property of the Gaussian Process |
15 |
| -model as well as its probabilistic nature in the form of a pointwise 95% |
16 |
| -confidence interval. |
17 |
| -
|
18 |
| -Note that the parameter ``alpha`` is applied as a Tikhonov |
19 |
| -regularization of the assumed covariance between the training points. |
| 14 | +The figures illustrate the interpolating property of the Gaussian Process model |
| 15 | +as well as its probabilistic nature in the form of a pointwise 95% confidence |
| 16 | +interval. |
20 | 17 |
|
| 18 | +Note that `alpha` is a parameter to control the strength of the Tikhonov |
| 19 | +regularization on the assumed training points' covariance matrix. |
21 | 20 | """
|
22 | 21 |
|
23 | 22 | # Author: Vincent Dubourg <vincent.dubourg@gmail.com>
|
24 | 23 | # Jake Vanderplas <vanderplas@astro.washington.edu>
|
25 |
| -# Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>s |
| 24 | +# Jan Hendrik Metzen <jhm@informatik.uni-bremen.de> |
| 25 | +# Guillaume Lemaitre <g.lemaitre58@gmail.com> |
26 | 26 | # License: BSD 3 clause
|
27 | 27 |
|
| 28 | +# %% |
| 29 | +# Dataset generation |
| 30 | +# ------------------ |
| 31 | +# |
| 32 | +# We will start by generating a synthetic dataset. The true generative process |
| 33 | +# is defined as :math:`f(x) = x \sin(x)`. |
28 | 34 | import numpy as np
|
29 |
| -from matplotlib import pyplot as plt |
30 |
| - |
31 |
| -from sklearn.gaussian_process import GaussianProcessRegressor |
32 |
| -from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C |
33 |
| - |
34 |
| -np.random.seed(1) |
35 |
| - |
36 |
| - |
37 |
| -def f(x): |
38 |
| - """The function to predict.""" |
39 |
| - return x * np.sin(x) |
40 |
| - |
41 |
| - |
42 |
| -# ---------------------------------------------------------------------- |
43 |
| -# First the noiseless case |
44 |
| -X = np.atleast_2d([1.0, 3.0, 5.0, 6.0, 7.0, 8.0]).T |
45 | 35 |
|
46 |
| -# Observations |
47 |
| -y = f(X).ravel() |
| 36 | +X = np.linspace(start=0, stop=10, num=1_000).reshape(-1, 1) |
| 37 | +y = np.squeeze(X * np.sin(X)) |
48 | 38 |
|
49 |
| -# Mesh the input space for evaluations of the real function, the prediction and |
50 |
| -# its MSE |
51 |
| -x = np.atleast_2d(np.linspace(0, 10, 1000)).T |
| 39 | +# %% |
| 40 | +import matplotlib.pyplot as plt |
52 | 41 |
|
53 |
| -# Instantiate a Gaussian Process model |
54 |
| -kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) |
55 |
| -gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) |
56 |
| - |
57 |
| -# Fit to data using Maximum Likelihood Estimation of the parameters |
58 |
| -gp.fit(X, y) |
59 |
| - |
60 |
| -# Make the prediction on the meshed x-axis (ask for MSE as well) |
61 |
| -y_pred, sigma = gp.predict(x, return_std=True) |
62 |
| - |
63 |
| -# Plot the function, the prediction and the 95% confidence interval based on |
64 |
| -# the MSE |
65 |
| -plt.figure() |
66 |
| -plt.plot(x, f(x), "r:", label=r"$f(x) = x\,\sin(x)$") |
67 |
| -plt.plot(X, y, "r.", markersize=10, label="Observations") |
68 |
| -plt.plot(x, y_pred, "b-", label="Prediction") |
69 |
| -plt.fill( |
70 |
| - np.concatenate([x, x[::-1]]), |
71 |
| - np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-
67E6
1]]), |
| 42 | +plt.plot(X, y, label=r"$f(x) = x \sin(x)$", linestyle="dotted") |
| 43 | +plt.legend() |
| 44 | +plt.xlabel("$x$") |
| 45 | +plt.ylabel("$f(x)$") |
| 46 | +_ = plt.title("True generative process") |
| 47 | + |
| 48 | +# %% |
| 49 | +# We will use this dataset in the next experiment to illustrate how Gaussian |
| 50 | +# Process regression is working. |
| 51 | +# |
| 52 | +# Example with noise-free target |
| 53 | +# ------------------------------ |
| 54 | +# |
| 55 | +# In this first example, we will use the true generative process without |
| 56 | +# adding any noise. For training the Gaussian Process regression, we will only |
| 57 | +# select few samples. |
| 58 | +rng = np.random.RandomState(1) |
| 59 | +training_indices = rng.choice(np.arange<
A3E2
/span>(y.size), size=6, replace=False) |
| 60 | +X_train, y_train = X[training_indices], y[training_indices] |
| 61 | + |
| 62 | +# %% |
| 63 | +# Now, we fit a Gaussian process on these few training data samples. We will |
| 64 | +# use a radial basis function (RBF) kernel and a constant parameter to fit the |
| 65 | +# amplitude. |
| 66 | +from sklearn.gaussian_process import GaussianProcessRegressor |
| 67 | +from sklearn.gaussian_process.kernels import RBF |
| 68 | + |
| 69 | +kernel = 1 * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2)) |
| 70 | +gaussian_process = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) |
| 71 | +gaussian_process.fit(X_train, y_train) |
| 72 | +gaussian_process.kernel_ |
| 73 | + |
| 74 | +# %% |
| 75 | +# After fitting our model, we see that the hyperparameters of the kernel have |
| 76 | +# been optimized. Now, we will use our kernel to compute the mean prediction |
| 77 | +# of the full dataset and plot the 95% confidence interval. |
| 78 | +mean_prediction, std_prediction = gaussian_process.predict(X, return_std=True) |
| 79 | + |
| 80 | +plt.plot(X, y, label=r"$f(x) = x \sin(x)$", linestyle="dotted") |
| 81 | +plt.scatter(X_train, y_train, label="Observations") |
| 82 | +plt.plot(X, mean_prediction, label="Mean prediction") |
| 83 | +plt.fill_between( |
| 84 | + X.ravel(), |
| 85 | + mean_prediction - 1.96 * std_prediction, |
| 86 | + mean_prediction + 1.96 * std_prediction, |
72 | 87 | alpha=0.5,
|
73 |
| - fc="b", |
74 |
| - ec="None", |
75 |
| - label="95% confidence interval", |
| 88 | + label=r"95% confidence interval", |
76 | 89 | )
|
| 90 | +plt.legend() |
77 | 91 | plt.xlabel("$x$")
|
78 | 92 | plt.ylabel("$f(x)$")
|
79 |
| -plt.ylim(-10, 20) |
80 |
| -plt.legend(loc="upper left") |
81 |
| - |
82 |
| -# ---------------------------------------------------------------------- |
83 |
| -# now the noisy case |
84 |
| -X = np.linspace(0.1, 9.9, 20) |
85 |
| -X = np.atleast_2d(X).T |
86 |
| - |
87 |
| -# Observations and noise |
88 |
| -y = f(X).ravel() |
89 |
| -dy = 0.5 + 1.0 * np.random.random(y.shape) |
90 |
| -noise = np.random.normal(0, dy) |
91 |
| -y += noise |
92 |
| - |
93 |
| -# Instantiate a Gaussian Process model |
94 |
| -gp = GaussianProcessRegressor(kernel=kernel, alpha=dy ** 2, n_restarts_optimizer=10) |
95 |
| - |
96 |
| -# Fit to data using Maximum Likelihood Estimation of the parameters |
97 |
| -gp.fit(X, y) |
98 |
| - |
99 |
| -# Make the prediction on the meshed x-axis (ask for MSE as well) |
100 |
| -y_pred, sigma = gp.predict(x, return_std=True) |
101 |
| - |
102 |
| -# Plot the function, the prediction and the 95% confidence interval based on |
103 |
| -# the MSE |
104 |
| -plt.figure() |
105 |
| -plt.plot(x, f(x), "r:", label=r"$f(x) = x\,\sin(x)$") |
106 |
| -plt.errorbar(X.ravel(), y, dy, fmt="r.", markersize=10, label="Observations") |
107 |
| -plt.plot(x, y_pred, "b-", label="Prediction") |
108 |
| -plt.fill( |
109 |
| - np.concatenate([x, x[::-1]]), |
110 |
| - np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), |
| 93 | +_ = plt.title("Gaussian process regression on noise-free dataset") |
| 94 | + |
| 95 | +# %% |
| 96 | +# We see that for a prediction made on a data point close to the one from the |
| 97 | +# training set, the 95% confidence has a small amplitude. Whenever a sample |
| 98 | +# falls far from training data, our model's prediction is less accurate and the |
| 99 | +# model prediction is less precise (higher uncertainty). |
| 100 | +# |
| 101 | +# Example with noisy targets |
| 102 | +# -------------------------- |
| 103 | +# |
| 104 | +# We can repeat a similar experiment adding an additional noise to the target |
| 105 | +# this time. It will allow seeing the effect of the noise on the fitted model. |
| 106 | +# |
| 107 | +# We add some random Gaussian noise to the target with an arbitrary |
| 108 | +# standard deviation. |
| 109 | +noise_std = 0.75 |
| 110 | +y_train_noisy = y_train + rng.normal(loc=0.0, scale=noise_std, size=y_train.shape) |
| 111 | + |
| 112 | +# %% |
| 113 | +# We create a similar Gaussian process model. In addition to the kernel, this |
| 114 | +# time, we specify the parameter `alpha` which can be interpreted as the |
| 115 | +# variance of a Gaussian noise. |
| 116 | +gaussian_process = GaussianProcessRegressor( |
| 117 | + kernel=kernel, alpha=noise_std ** 2, n_restarts_optimizer=9 |
| 118 | +) |
| 119 | +gaussian_process.fit(X_train, y_train_noisy) |
| 120 | +mean_prediction, std_prediction = gaussian_process.predict(X, return_std=True) |
| 121 | + |
| 122 | +# %% |
| 123 | +# Let's plot the mean prediction and the uncertainty region as before. |
| 124 | +plt.plot(X, y, label=r"$f(x) = x \sin(x)$", linestyle="dotted") |
| 125 | +plt.errorbar( |
| 126 | + X_train, |
| 127 | + y_train_noisy, |
| 128 | + noise_std, |
| 129 | + linestyle="None", |
| 130 | + color="tab:blue", |
| 131 | + marker=".", |
| 132 | + markersize=10, |
| 133 | + label="Observations", |
| 134 | +) |
| 135 | +plt.plot(X, mean_prediction, label="Mean prediction") |
| 136 | +plt.fill_between( |
| 137 | + X.ravel(), |
| 138 | + mean_prediction - 1.96 * std_prediction, |
| 139 | + mean_prediction + 1.96 * std_prediction, |
| 140 | + color="tab:orange", |
111 | 141 | alpha=0.5,
|
112 |
| - fc="b", |
113 |
| - ec="None", |
114 |
| - label="95% confidence interval", |
| 142 | + label=r"95% confidence interval", |
115 | 143 | )
|
| 144 | +plt.legend() |
116 | 145 | plt.xlabel("$x$")
|
117 | 146 | plt.ylabel("$f(x)$")
|
118 |
| -plt.ylim(-10, 20) |
119 |
| -plt.legend(loc="upper left") |
| 147 | +_ = plt.title("Gaussian process regression on a noisy dataset") |
120 | 148 |
|
121 |
| -plt.show() |
| 149 | +# %% |
| 150 | +# The noise affects the predictions close to the training samples: the |
| 151 | +# predictive uncertainty near to the training samples is larger because we |
| 152 | +# explicitly model a given level target noise independent of the input |
| 153 | +# variable. |
0 commit comments