8000 Merge pull request #13570 from CarstenSchelp/confidence_ellipse · matplotlib/matplotlib@4eb452a · GitHub
[go: up one dir, main page]

Skip to content

Commit 4eb452a

Browse files
authored
Merge pull request #13570 from CarstenSchelp/confidence_ellipse
Add new example for plotting a confidence_ellipse
2 parents f27cde1 + 566b6c8 commit 4eb452a

File tree

1 file changed

+222
-0
lines changed

1 file changed

+222
-0
lines changed
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
"""
2+
======================================================
3+
Plot a confidence ellipse of a two-dimensional dataset
4+
======================================================
5+
6+
This example shows how to plot a confidence ellipse of a
7+
two-dimensional dataset, using its pearson correlation coefficient.
8+
9+
The approach that is used to obtain the correct geometry is
10+
explained and proved here:
11+
12+
https://carstenschelp.github.io/2018/09/14/Plot_Confidence_Ellipse_001.html
13+
14+
The method avoids the use of an iterative eigen decomposition algorithm
15+
and makes use of the fact that a normalized covariance matrix (composed of
16+
pearson correlation coefficients and ones) is particularly easy to handle.
17+
"""
18+
19+
#############################################################################
20+
#
21+
# The plotting function itself
22+
# """"""""""""""""""""""""""""
23+
#
24+
# This function plots the confidence ellipse of the covariance of the given
25+
# array-like variables x and y. The ellipse is plotted into the given
26+
# axes-object ax.
27+
#
28+
# The radiuses of the ellipse can be controlled by n_std which is the number
29+
# of standard deviations. The default value is 3 which makes the ellipse
30+
# enclose 99.7% of the points (given the data is normally distributed
31+
# like in these examples).
32+
33+
34+
import numpy as np
35+
import matplotlib.pyplot as plt
36+
from matplotlib.patches import Ellipse
37+
import matplotlib.transforms as transforms
38+
39+
40+
def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
41+
"""
42+
Create a plot of the covariance confidence ellipse of `x` and `y`
43+
44+
Parameters
45+
----------
46+
x, y : array_like, shape (n, )
47+
Input data.
48+
49+
ax : matplotlib.axes.Axes
50+
The axes object to draw the ellipse into.
51+
52+
n_std : float
53+
The number of standard deviations to determine the ellipse's radiuses.
54+
55+
Returns
56+
-------
57+
matplotlib.patches.Ellipse
58+
59+
Other parameters
60+
----------------
61+
kwargs : `~matplotlib.patches.Patch` properties
62+
"""
63+
if x.size != y.size:
64+
raise ValueError("x and y must be the same size")
65+
66+
cov = np.cov(x, y)
67+
pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
68+
# Using a special case to obtain the eigenvalues of this
69+
# two-dimensionl dataset.
70+
ell_radius_x = np.sqrt(1 + pearson)
71+
ell_radius_y = np.sqrt(1 - pearson)
72+
ellipse = Ellipse((0, 0),
73+
width=ell_radius_x * 2,
74+
height=ell_radius_y * 2,
75+
facecolor=facecolor,
76+
**kwargs)
77+
78+
# Calculating the stdandard deviation of x from
79+
# the squareroot of the variance and multiplying
80+
# with the given number of standard deviations.
81+
scale_x = np.sqrt(cov[0, 0]) * n_std
82+
mean_x = np.mean(x)
83+
84+
# calculating the stdandard deviation of y ...
85+
scale_y = np.sqrt(cov[1, 1]) * n_std
86+
mean_y = np.mean(y)
87+
88+
transf = transforms.Affine2D() \
89+
.rotate_deg(45) \
90+
.scale(scale_x, scale_y) \
91+
.translate(mean_x, mean_y)
92+
93+
ellipse.set_transform(transf + ax.transData)
94+
return ax.add_patch(ellipse)
95+
96+
97+
#############################################################################
98+
#
99+
# A helper function to create a correlated dataset
100+
# """"""""""""""""""""""""""""""""""""""""""""""""
101+
#
102+
# Creates a random two-dimesional dataset with the specified
103+
# two-dimensional mean (mu) and dimensions (scale).
104+
# The correlation can be controlled by the param 'dependency',
105+
# a 2x2 matrix.
106+
107+
def get_correlated_dataset(n, dependency, mu, scale):
108+
latent = np.random.randn(n, 2)
109+
dependent = latent.dot(dependency)
110+
scaled = dependent * scale
111+
scaled_with_offset = scaled + mu
112+
# return x and y of the new, correlated dataset
113+
return scaled_with_offset[:, 0], scaled_with_offset[:, 1]
114+
115+
116+
#############################################################################
117+
#
118+
# Positive, negative and weak correlation
119+
# """""""""""""""""""""""""""""""""""""""
120+
#
121+
# Note that the shape for the weak correlation (right) is an ellipse,
122+
# not a circle because x and y are differently scaled.
123+
# However, the fact that x and y are uncorrelated is shown by
124+
# the axes of the ellipse being aligned with the x- and y-axis
125+
# of the coordinate system.
126+
127+
np.random.seed(0)
128+
129+
PARAMETERS = {
130+
'Positive correlation': np.array([[0.85, 0.35],
131+
[0.15, -0.65]]),
132+
'Negative correlation': np.array([[0.9, -0.4],
133+
[0.1, -0.6]]),
134+
'Weak correlation': np.array([[1, 0],
135+
[0, 1]]),
136+
}
137+
138+
mu = 2, 4
139+
scale = 3, 5
140+
141+
fig, axs = plt.subplots(1, 3, figsize=(9, 3))
142+
for ax, (title, dependency) in zip(axs, PARAMETERS.items()):
143+
x, y = get_correlated_dataset(800, dependency, mu, scale)
144+
ax.scatter(x, y, s=0.5)
145+
146+
ax.axvline(c='grey', lw=1)
147+
ax.axhline(c='grey', lw=1)
148+
149+
confidence_ellipse(x, y, ax, edgecolor='red')
150+
151+
ax.scatter(mu[0], mu[1], c='red', s=3)
152+
ax.set_title(title)
153+
154+
plt.show()
155+
156+
157+
#############################################################################
158+
#
159+
# Different number of standard deviations
160+
# """""""""""""""""""""""""""""""""""""""
161+
#
162+
# A plot with n_std = 3 (blue), 2 (purple) and 1 (red)
163+
164+
fig, ax_nstd = plt.subplots(figsize=(6, 6))
165+
166+
dependency_nstd = np.array([
167+
[0.8, 0.75],
168+
[-0.2, 0.35]
169+
])
170+
mu = 0, 0
171+
scale = 8, 5
172+
173+
ax_nstd.axvline(c='grey', lw=1)
174+
ax_nstd.axhline(c='grey', lw=1)
175+
176+
x, y = get_correlated_dataset(500, dependency_nstd, mu, scale)
177+
ax_nstd.scatter(x, y, s=0.5)
178+
179+
confidence_ellipse(x, y, ax_nstd, n_std=1,
180+
label=r'$1\sigma$', edgecolor='firebrick')
181+
confidence_ellipse(x, y, ax_nstd, n_std=2,
182+
label=r'$2\sigma$', edgecolor='fuchsia', linestyle='--')
183+
confidence_ellipse(x, y, ax_nstd, n_std=3,
184+
label=r'$3\sigma$', edgecolor='blue', linestyle=':')
185+
186+
ax_nstd.scatter(mu[0], mu[1], c='red', s=3)
187+
ax_nstd.set_title('Different standard deviations')
188+
ax_nstd.legend()
189+
plt.show()
190+
191+
192+
#############################################################################
193+
#
194+
# Using the keyword arguments
195+
# """""""""""""""""""""""""""
196+
#
197+
# Use the kwargs specified for matplotlib.patches.Patch in order
198+
# to have the ellipse rendered in different ways.
199+
200+
fig, ax_kwargs = plt.subplots(figsize=(6, 6))
201+
dependency_kwargs = np.array([
202+
[-0.8, 0.5],
203+
[-0.2, 0.5]
204+
])
205+
mu = 2, -3
206+
scale = 6, 5
207+
208+
ax_kwargs.axvline(c='grey', lw=1)
209+
ax_kwargs.axhline(c='grey', lw=1)
210+
211+
x, y = get_correlated_dataset(500, dependency_kwargs, mu, scale)
212+
# Plot the ellipse with zorder=0 in order to demonstrate
213+
# its transparency (caused by the use of alpha).
214+
confidence_ellipse(x, y, ax_kwargs,
215+
alpha=0.5, facecolor='pink', edgecolor='purple', zorder=0)
216+
217+
ax_kwargs.scatter(x, y, s=0.5)
218+
ax_kwargs.scatter(mu[0], mu[1], c='red', s=3)
219+
ax_kwargs.set_title(f'Using kwargs')
220+
221+
fig.subplots_adjust(hspace=0.25)
222+
plt.show()

0 commit comments

Comments
 (0)
0