-
-
Notifications
You must be signed in to change notification settings - Fork 7.9k
Add new example for plotting a confidence_ellipse #13570
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
9661375
b48f97d
e74b886
701e171
c845c88
2df575a
d413596
713a985
ef22b47
49a2869
856b27d
ea287e0
71b2b05
f0e1f91
47a87f8
55e0f51
b7df47b
31d35d3
dbd50b3
2d1fca7
566b6c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
""" | ||
====================================================== | ||
Plot a confidence ellipse of a two-dimensional dataset | ||
====================================================== | ||
|
||
This example shows how to plot a confidence ellipse of a | ||
two-dimensional dataset, using its pearson correlation coefficient. | ||
|
||
The approach that is used to obtain the correct geometry is | ||
explained and proved here: | ||
|
||
https://carstenschelp.github.io/2018/09/14/Plot_Confidence_Ellipse_001.html | ||
""" | ||
|
||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
from matplotlib.patches import Ellipse | ||
import matplotlib.transforms as transforms | ||
|
||
|
||
def confidence_ellipse(x, y, ax, n_std=3.0, **kwargs): | ||
""" | ||
Create a plot of the covariance confidence ellipse of `x` and `y` | ||
|
||
Parameters | ||
---------- | ||
x, y : array_like, shape (n, ) | ||
Input data | ||
|
||
ax : matplotlib.axes object to the ellipse into | ||
QuLogic marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
n_std : number of standard deviations to determine the ellipse's radiuses | ||
QuLogic marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Returns | ||
------- | ||
None | ||
|
||
Other parameters | ||
---------------- | ||
kwargs : `~matplotlib.patches.Patch` properties | ||
|
||
author : Carsten Schelp | ||
license: GNU General Public License v3.0 (https://github.com/CarstenSchelp/CarstenSchelp.github.io/blob/master/LICENSE) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a lawyer so pinging @tacaswell on this, but I think GPL won't be acceptable. Anyone can dismiss once resolved. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And most other examples don’t have an author block.... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When no other example bothers with licenses I won't, either. I just thought that the python license was GPLv3.0 compatible? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh - matplotlib has a license of its own. Either way - I removed the license block. I hope that it is ok now if I resolve this conversation? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @anntzer: Hi, thanks to the community this is pull request is approaching a state that might be called 'mature'. Do you agree that your change request has been addressed appropriately? It was about that probably problematic license-tag in the docstring. |
||
""" | ||
if x.size != y.size: | ||
raise ValueError("x and y must be the same size") | ||
|
||
cov = np.cov(x, y) | ||
pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1,1]) | ||
# Using a special case to obtain the eigenvalues of this two-dimensionl dataset. | ||
ell_radius_x = np.sqrt(1 + pearson) | ||
ell_radius_y = np.sqrt(1 - pearson) | ||
ellipse = Ellipse((0,0), width=ell_radius_x * 2, height=ell_radius_y * 2, **kwargs) | ||
|
||
# Calculating the stdandard deviation of x from the squareroot of the variance | ||
# and multiplying with the given number of standard deviations. | ||
scale_x = np.sqrt(cov[0, 0]) * n_std | ||
mean_x = np.mean(x) | ||
|
||
# calculating the stdandard deviation of y ... | ||
scale_y = np.sqrt(cov[1, 1]) * n_std | ||
mean_y = np.mean(y) | ||
|
||
transf = transforms.Affine2D() \ | ||
.rotate_deg(45) \ | ||
.scale(scale_x, scale_y) \ | ||
.translate(mean_x, mean_y) | ||
|
||
ellipse.set_transform(transf + ax.transData) | ||
ax.add_patch(ellipse) | ||
|
||
QuLogic marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def get_correlated_dataset(n, dependency, mu, scale): | ||
latent = np.random.randn(n, 2) | ||
dependent = latent.dot(dependency) | ||
scaled = dependent * scale | ||
scaled_with_offset = scaled + mu | ||
# return x and y of the new, correlated dataset | ||
return scaled_with_offset[:,0],scaled_with_offset[:,1] | ||
|
||
fig, ((ax_pos, ax_neg, ax_uncorrel), (ax_nstd1, ax_nstd2, ax_kwargs)) = plt.subplots(nrows=2, ncols=3, figsize=(9, 6), sharex=True, sharey=True) | ||
np.random.seed(1234) | ||
|
||
# Demo top left: positive correlation | ||
|
||
# Create a matrix that transforms the independent "latent" dataset | ||
# into a dataset where x and y are correlated - positive correlation, in this case. | ||
dependency_pos = np.array([ | ||
[0.85, 0.35], | ||
[0.15, -0.65] | ||
]) | ||
mu_pos = np.array([2, 4]).T | ||
scale_pos = np.array([3, 5]).T | ||
|
||
# Indicate the x- and y-axis | ||
ax_pos.axvline(c='grey', lw=1) | ||
ax_pos.axhline(c='grey', lw=1) | ||
|
||
x, y = get_correlated_dataset(500, dependency_pos, mu_pos, scale_pos) | ||
confidence_ellipse(x, y, ax_pos, facecolor='none', edgecolor='red') | ||
|
||
# Also plot the dataset itself, for reference | ||
ax_pos.scatter(x, y, s=0.5) | ||
# Mark the mean ("mu") | ||
ax_pos.scatter([mu_pos[0]], [mu_pos[1]],c='red', s=3) | ||
ax_pos.set_title(f'Positive correlation') | ||
|
||
# Demo top middle: negative correlation | ||
dependency_neg = np.array([ | ||
[0.9, -0.4], | ||
[0.1, -0.6] | ||
]) | ||
mu = np.array([2, 4]).T | ||
CarstenSchelp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
scale = np.array([3, 5]).T | ||
CarstenSchelp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# Indicate the x- and y-axes | ||
ax_neg.axvline(c='grey', lw=1) | ||
ax_neg.axhline(c='grey', lw=1) | ||
|
||
x, y = get_correlated_dataset(500, dependency_neg, mu, scale) | ||
confidence_ellipse(x, y, ax_neg, facecolor='none', edgecolor='red') | ||
# Again, plot the dataset itself, for reference | ||
ax_neg.scatter(x, y, s=0.5) | ||
# Mark the mean ("mu") | ||
ax_neg.scatter([mu[0]], [mu[1]],c='red', s=3) | ||
ax_neg.set_title(f'Negative correlation') | ||
|
||
# Demo top right: uncorrelated dataset | ||
# This uncorrelated plot (bottom left) is not a circle since x and y | ||
# are differently scaled. However, the fact that x and y are uncorrelated | ||
# is shown however by the ellipse being aligned with the x- and y-axis. | ||
|
||
in_dependency = np.array([ | ||
[1, 0], | ||
[0, 1] | ||
]) | ||
mu = np.array([2, 4]).T | ||
scale = np.array([5, 3]).T | ||
|
||
ax_uncorrel.axvline(c='grey', lw=1) | ||
ax_uncorrel.axhline(c='grey', lw=1) | ||
|
||
x, y = get_correlated_dataset(500, in_dependency, mu, scale) | ||
confidence_ellipse(x, y, ax_uncorrel, facecolor='none', edgecolor='red') | ||
ax_uncorrel.scatter(x, y, s=0.5) | ||
ax_uncorrel.scatter([mu[0]], [mu[1]],c='red', s=3) | ||
ax_uncorrel.set_title(f'Weak correlation') | ||
|
||
# Demo bottom left and middle: ellipse two standard deviations wide | ||
# In the confidence_ellipse function the default of the number | ||
# of standard deviations is 3, which makes the ellipse enclose | ||
# 99.7% of the points when the data is normally distributed. | ||
# This demo shows a two plots of the same dataset with different | ||
# values for "n_std". | ||
dependency_nstd_1 = np.array([ | ||
[0.8, 0.75], | ||
[-0.2, 0.35] | ||
]) | ||
mu = np.array([0, 0]).T | ||
scale = np.array([8, 5]).T | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Be consistent in the way you define There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
So far all comments and suggestions were aiming for an example as concise and readable as possible. This is a good thing and I would not add anything extra. |
||
|
||
ax_kwargs.axvline(c='grey', lw=1) | ||
ax_kwargs.axhline(c='grey', lw=1) | ||
|
||
x, y = get_correlated_dataset(500, dependency_nstd_1, mu, scale) | ||
# Onde standard deviation | ||
# Now plot the dataset first ("under" the ellipse) in order to | ||
# demonstrate the transparency of the ellipse (alpha). | ||
ax_nstd1.scatter(x, y, s=0.5) | ||
confidence_ellipse(x, y, ax_nstd1, n_std=1, facecolor='none', edgecolor='red') | ||
confidence_ellipse(x, y, ax_nstd1, n_std=3, facecolor='none', edgecolor='gray', linestyle='--') | ||
|
||
ax_nstd1.scatter([mu[0]], [mu[1]],c='red', s=3) | ||
ax_nstd1.set_title(f'One standard deviation') | ||
|
||
# Two standard deviations | ||
ax_nstd2.scatter(x, y, s=0.5) | ||
confidence_ellipse(x, y, ax_nstd2, n_std=2, facecolor='none', edgecolor='red') | ||
confidence_ellipse(x, y, ax_nstd2, n_std=3, facecolor='none', edgecolor='gray', linestyle='--') | ||
|
||
ax_nstd2.scatter([mu[0]], [mu[1]],c='red', s=3) | ||
ax_nstd2.set_title(f'Two standard deviations') | ||
|
||
|
||
# Demo bottom right: Using kwargs | ||
dependency_kwargs = np.array([ | ||
[-0.8, 0.5], | ||
[-0.2, 0.5] | ||
]) | ||
mu = np.array([2, -3]).T | ||
scale = np.array([6, 5]).T | ||
CarstenSchelp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
ax_kwargs.axvline(c='grey', lw=1) | ||
ax_kwargs.axhline(c='grey', lw=1) | ||
|
||
x, y = get_correlated_dataset(500, dependency_kwargs, mu, scale) | ||
# Now plot the dataset first ("under" the ellipse) in order to | ||
# demonstrate the transparency of the ellipse (alpha). | ||
ax_kwargs.scatter(x, y, s=0.5) | ||
confidence_ellipse(x, y, ax_kwargs, alpha=0.5, facecolor='pink', edgecolor='purple') | ||
|
||
ax_kwargs.scatter([mu[0]], [mu[1]],c='red', s=3) | ||
ax_kwargs.set_title(f'Using kwargs') | ||
CarstenSchelp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
fig.subplots_adjust(hspace=0.25) | ||
plt.show() |
Uh oh!
There was an error while loading. Please reload this page.