|
| 1 | +""" |
| 2 | +====================================================== |
| 3 | +Plot a confidence ellipse of a two-dimensional dataset |
| 4 | +====================================================== |
| 5 | +
|
| 6 | +This example shows how to plot a confidence ellipse of a |
| 7 | +two-dimensional dataset, using its pearson correlation coefficient. |
| 8 | +
|
| 9 | +The approach that is used to obtain the correct geometry is |
| 10 | +explained and proved here: |
| 11 | +
|
| 12 | +https://carstenschelp.github.io/2018/09/14/Plot_Confidence_Ellipse_001.html |
| 13 | +
|
| 14 | +The method avoids the use of an iterative eigen decomposition algorithm |
| 15 | +and makes use of the fact that a normalized covariance matrix (composed of |
| 16 | +pearson correlation coefficients and ones) is particularly easy to handle. |
| 17 | +""" |
| 18 | + |
| 19 | +############################################################################# |
| 20 | +# |
| 21 | +# The plotting function itself |
| 22 | +# """""""""""""""""""""""""""" |
| 23 | +# |
| 24 | +# This function plots the confidence ellipse of the covariance of the given |
| 25 | +# array-like variables x and y. The ellipse is plotted into the given |
| 26 | +# axes-object ax. |
| 27 | +# |
| 28 | +# The radiuses of the ellipse can be controlled by n_std which is the number |
| 29 | +# of standard deviations. The default value is 3 which makes the ellipse |
| 30 | +# enclose 99.7% of the points (given the data is normally distributed |
| 31 | +# like in these examples). |
| 32 | + |
| 33 | + |
| 34 | +import numpy as np |
| 35 | +import matplotlib.pyplot as plt |
| 36 | +from matplotlib.patches import Ellipse |
| 37 | +import matplotlib.transforms as transforms |
| 38 | + |
| 39 | + |
| 40 | +def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs): |
| 41 | + """ |
| 42 | + Create a plot of the covariance confidence ellipse of `x` and `y` |
| 43 | +
|
| 44 | + Parameters |
| 45 | + ---------- |
| 46 | + x, y : array_like, shape (n, ) |
| 47 | + Input data. |
| 48 | +
|
| 49 | + ax : matplotlib.axes.Axes |
| 50 | + The axes object to draw the ellipse into. |
| 51 | +
|
| 52 | + n_std : float |
| 53 | + The number of standard deviations to determine the ellipse's radiuses. |
| 54 | +
|
| 55 | + Returns |
| 56 | + ------- |
| 57 | + matplotlib.patches.Ellipse |
| 58 | +
|
| 59 | + Other parameters |
| 60 | + ---------------- |
| 61 | + kwargs : `~matplotlib.patches.Patch` properties |
| 62 | + """ |
| 63 | + if x.size != y.size: |
| 64 | + raise ValueError("x and y must be the same size") |
| 65 | + |
| 66 | + cov = np.cov(x, y) |
| 67 | + pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1]) |
| 68 | + # Using a special case to obtain the eigenvalues of this |
| 69 | + # two-dimensionl dataset. |
| 70 | + ell_radius_x = np.sqrt(1 + pearson) |
| 71 | + ell_radius_y = np.sqrt(1 - pearson) |
| 72 | + ellipse = Ellipse((0, 0), |
| 73 | + width=ell_radius_x * 2, |
| 74 | + height=ell_radius_y * 2, |
| 75 | + facecolor=facecolor, |
| 76 | + **kwargs) |
| 77 | + |
| 78 | + # Calculating the stdandard deviation of x from |
| 79 | + # the squareroot of the variance and multiplying |
| 80 | + # with the given number of standard deviations. |
| 81 | + scale_x = np.sqrt(cov[0, 0]) * n_std |
| 82 | + mean_x = np.mean(x) |
| 83 | + |
| 84 | + # calculating the stdandard deviation of y ... |
| 85 | + scale_y = np.sqrt(cov[1, 1]) * n_std |
| 86 | + mean_y = np.mean(y) |
| 87 | + |
| 88 | + transf = transforms.Affine2D() \ |
| 89 | + .rotate_deg(45) \ |
| 90 | + .scale(scale_x, scale_y) \ |
| 91 | + .translate(mean_x, mean_y) |
| 92 | + |
| 93 | + ellipse.set_transform(transf + ax.transData) |
| 94 | + return ax.add_patch(ellipse) |
| 95 | + |
| 96 | + |
| 97 | +############################################################################# |
| 98 | +# |
| 99 | +# A helper function to create a correlated dataset |
| 100 | +# """""""""""""""""""""""""""""""""""""""""""""""" |
| 101 | +# |
| 102 | +# Creates a random two-dimesional dataset with the specified |
| 103 | +# two-dimensional mean (mu) and dimensions (scale). |
| 104 | +# The correlation can be controlled by the param 'dependency', |
| 105 | +# a 2x2 matrix. |
| 106 | + |
| 107 | +def get_correlated_dataset(n, dependency, mu, scale): |
| 108 | + latent = np.random.randn(n, 2) |
| 109 | + dependent = latent.dot(dependency) |
| 110 | + scaled = dependent * scale |
| 111 | + scaled_with_offset = scaled + mu |
| 112 | + # return x and y of the new, correlated dataset |
| 113 | + return scaled_with_offset[:, 0], scaled_with_offset[:, 1] |
| 114 | + |
| 115 | + |
| 116 | +############################################################################# |
| 117 | +# |
| 118 | +# Positive, negative and weak correlation |
| 119 | +# """"""""""""""""""""""""""""""""""""""" |
| 120 | +# |
| 121 | +# Note that the shape for the weak correlation (right) is an ellipse, |
| 122 | +# not a circle because x and y are differently scaled. |
| 123 | +# However, the fact that x and y are uncorrelated is shown by |
| 124 | +# the axes of the ellipse being aligned with the x- and y-axis |
| 125 | +# of the coordinate system. |
| 126 | + |
| 127 | +np.random.seed(0) |
| 128 | + |
| 129 | +PARAMETERS = { |
| 130 | + 'Positive correlation': np.array([[0.85, 0.35], |
| 131 | + [0.15, -0.65]]), |
| 132 | + 'Negative correlation': np.array([[0.9, -0.4], |
| 133 | + [0.1, -0.6]]), |
| 134 | + 'Weak correlation': np.array([[1, 0], |
| 135 | + [0, 1]]), |
| 136 | +} |
| 137 | + |
| 138 | +mu = 2, 4 |
| 139 | +scale = 3, 5 |
| 140 | + |
| 141 | +fig, axs = plt.subplots(1, 3, figsize=(9, 3)) |
| 142 | +for ax, (title, dependency) in zip(axs, PARAMETERS.items()): |
| 143 | + x, y = get_correlated_dataset(800, dependency, mu, scale) |
| 144 | + ax.scatter(x, y, s=0.5) |
| 145 | + |
| 146 | + ax.axvline(c='grey', lw=1) |
| 147 | + ax.axhline(c='grey', lw=1) |
| 148 | + |
| 149 | + confidence_ellipse(x, y, ax, edgecolor='red') |
| 150 | + |
| 151 | + ax.scatter(mu[0], mu[1], c='red', s=3) |
| 152 | + ax.set_title(title) |
| 153 | + |
| 154 | +plt.show() |
| 155 | + |
| 156 | + |
| 157 | +############################################################################# |
| 158 | +# |
| 159 | +# Different number of standard deviations |
| 160 | +# """"""""""""""""""""""""""""""""""""""" |
| 161 | +# |
| 162 | +# A plot with n_std = 3 (blue), 2 (purple) and 1 (red) |
| 163 | + |
| 164 | +fig, ax_nstd = plt.subplots(figsize=(6, 6)) |
| 165 | + |
| 166 | +dependency_nstd = np.array([ |
| 167 | + [0.8, 0.75], |
| 168 | + [-0.2, 0.35] |
| 169 | +]) |
| 170 | +mu = 0, 0 |
| 171 | +scale = 8, 5 |
| 172 | + |
| 173 | +ax_nstd.axvline(c='grey', lw=1) |
| 174 | +ax_nstd.axhline(c='grey', lw=1) |
| 175 | + |
| 176 | +x, y = get_correlated_dataset(500, dependency_nstd, mu, scale) |
| 177 | +ax_nstd.scatter(x, y, s=0.5) |
| 178 | + |
| 179 | +confidence_ellipse(x, y, ax_nstd, n_std=1, |
| 180 | + label=r'$1\sigma$', edgecolor='firebrick') |
| 181 | +confidence_ellipse(x, y, ax_nstd, n_std=2, |
| 182 | + label=r'$2\sigma$', edgecolor='fuchsia', linestyle='--') |
| 183 | +confidence_ellipse(x, y, ax_nstd, n_std=3, |
| 184 | + label=r'$3\sigma$', edgecolor='blue', linestyle=':') |
| 185 | + |
| 186 | +ax_nstd.scatter(mu[0], mu[1], c='red', s=3) |
| 187 | +ax_nstd.set_title('Different standard deviations') |
| 188 | +ax_nstd.legend() |
| 189 | +plt.show() |
| 190 | + |
| 191 | + |
| 192 | +############################################################################# |
| 193 | +# |
| 194 | +# Using the keyword arguments |
| 195 | +# """"""""""""""""""""""""""" |
| 196 | +# |
| 197 | +# Use the kwargs specified for matplotlib.patches.Patch in order |
| 198 | +# to have the ellipse rendered in different ways. |
| 199 | + |
| 200 | +fig, ax_kwargs = plt.subplots(figsize=(6, 6)) |
| 201 | +dependency_kwargs = np.array([ |
| 202 | + [-0.8, 0.5], |
| 203 | + [-0.2, 0.5] |
| 204 | +]) |
| 205 | +mu = 2, -3 |
| 206 | +scale = 6, 5 |
| 207 | + |
| 208 | +ax_kwargs.axvline(c='grey', lw=1) |
| 209 | +ax_kwargs.axhline(c='grey', lw=1) |
| 210 | + |
| 211 | +x, y = get_correlated_dataset(500, dependency_kwargs, mu, scale) |
| 212 | +# Plot the ellipse with zorder=0 in order to demonstrate |
| 213 | +# its transparency (caused by the use of alpha). |
| 214 | +confidence_ellipse(x, y, ax_kwargs, |
| 215 | + alpha=0.5, facecolor='pink', edgecolor='purple', zorder=0) |
| 216 | + |
| 217 | +ax_kwargs.scatter(x, y, s=0.5) |
| 218 | +ax_kwargs.scatter(mu[0], mu[1], c='red', s=3) |
| 219 | +ax_kwargs.set_title(f'Using kwargs') |
| 220 | + |
| 221 | +fig.subplots_adjust(hspace=0.25) |
| 222 | +plt.show() |
0 commit comments