From ce0d59129f753acc0a08de8e0eb8ee3c24e6cfbd Mon Sep 17 00:00:00 2001 From: Antony Lee Date: Tue, 2 Apr 2019 11:11:07 +0200 Subject: [PATCH] Backport PR #13570: Add new example for plotting a confidence_ellipse --- examples/statistics/confidence_ellipse.py | 222 ++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 examples/statistics/confidence_ellipse.py diff --git a/examples/statistics/confidence_ellipse.py b/examples/statistics/confidence_ellipse.py new file mode 100644 index 000000000000..d83246d31d33 --- /dev/null +++ b/examples/statistics/confidence_ellipse.py @@ -0,0 +1,222 @@ +""" +====================================================== +Plot a confidence ellipse of a two-dimensional dataset +====================================================== + +This example shows how to plot a confidence ellipse of a +two-dimensional dataset, using its pearson correlation coefficient. + +The approach that is used to obtain the correct geometry is +explained and proved here: + +https://carstenschelp.github.io/2018/09/14/Plot_Confidence_Ellipse_001.html + +The method avoids the use of an iterative eigen decomposition algorithm +and makes use of the fact that a normalized covariance matrix (composed of +pearson correlation coefficients and ones) is particularly easy to handle. +""" + +############################################################################# +# +# The plotting function itself +# """""""""""""""""""""""""""" +# +# This function plots the confidence ellipse of the covariance of the given +# array-like variables x and y. The ellipse is plotted into the given +# axes-object ax. +# +# The radiuses of the ellipse can be controlled by n_std which is the number +# of standard deviations. The default value is 3 which makes the ellipse +# enclose 99.7% of the points (given the data is normally distributed +# like in these examples). + + +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.patches import Ellipse +import matplotlib.transforms as transforms + + +def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs): + """ + Create a plot of the covariance confidence ellipse of `x` and `y` + + Parameters + ---------- + x, y : array_like, shape (n, ) + Input data. + + ax : matplotlib.axes.Axes + The axes object to draw the ellipse into. + + n_std : float + The number of standard deviations to determine the ellipse's radiuses. + + Returns + ------- + matplotlib.patches.Ellipse + + Other parameters + ---------------- + kwargs : `~matplotlib.patches.Patch` properties + """ + if x.size != y.size: + raise ValueError("x and y must be the same size") + + cov = np.cov(x, y) + pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1]) + # Using a special case to obtain the eigenvalues of this + # two-dimensionl dataset. + ell_radius_x = np.sqrt(1 + pearson) + ell_radius_y = np.sqrt(1 - pearson) + ellipse = Ellipse((0, 0), + width=ell_radius_x * 2, + height=ell_radius_y * 2, + facecolor=facecolor, + **kwargs) + + # Calculating the stdandard deviation of x from + # the squareroot of the variance and multiplying + # with the given number of standard deviations. + scale_x = np.sqrt(cov[0, 0]) * n_std + mean_x = np.mean(x) + + # calculating the stdandard deviation of y ... + scale_y = np.sqrt(cov[1, 1]) * n_std + mean_y = np.mean(y) + + transf = transforms.Affine2D() \ + .rotate_deg(45) \ + .scale(scale_x, scale_y) \ + .translate(mean_x, mean_y) + + ellipse.set_transform(transf + ax.transData) + return ax.add_patch(ellipse) + + +############################################################################# +# +# A helper function to create a correlated dataset +# """""""""""""""""""""""""""""""""""""""""""""""" +# +# Creates a random two-dimesional dataset with the specified +# two-dimensional mean (mu) and dimensions (scale). +# The correlation can be controlled by the param 'dependency', +# a 2x2 matrix. + +def get_correlated_dataset(n, dependency, mu, scale): + latent = np.random.randn(n, 2) + dependent = latent.dot(dependency) + scaled = dependent * scale + scaled_with_offset = scaled + mu + # return x and y of the new, correlated dataset + return scaled_with_offset[:, 0], scaled_with_offset[:, 1] + + +############################################################################# +# +# Positive, negative and weak correlation +# """"""""""""""""""""""""""""""""""""""" +# +# Note that the shape for the weak correlation (right) is an ellipse, +# not a circle because x and y are differently scaled. +# However, the fact that x and y are uncorrelated is shown by +# the axes of the ellipse being aligned with the x- and y-axis +# of the coordinate system. + +np.random.seed(0) + +PARAMETERS = { + 'Positive correlation': np.array([[0.85, 0.35], + [0.15, -0.65]]), + 'Negative correlation': np.array([[0.9, -0.4], + [0.1, -0.6]]), + 'Weak correlation': np.array([[1, 0], + [0, 1]]), +} + +mu = 2, 4 +scale = 3, 5 + +fig, axs = plt.subplots(1, 3, figsize=(9, 3)) +for ax, (title, dependency) in zip(axs, PARAMETERS.items()): + x, y = get_correlated_dataset(800, dependency, mu, scale) + ax.scatter(x, y, s=0.5) + + ax.axvline(c='grey', lw=1) + ax.axhline(c='grey', lw=1) + + confidence_ellipse(x, y, ax, edgecolor='red') + + ax.scatter(mu[0], mu[1], c='red', s=3) + ax.set_title(title) + +plt.show() + + +############################################################################# +# +# Different number of standard deviations +# """"""""""""""""""""""""""""""""""""""" +# +# A plot with n_std = 3 (blue), 2 (purple) and 1 (red) + +fig, ax_nstd = plt.subplots(figsize=(6, 6)) + +dependency_nstd = np.array([ + [0.8, 0.75], + [-0.2, 0.35] +]) +mu = 0, 0 +scale = 8, 5 + +ax_nstd.axvline(c='grey', lw=1) +ax_nstd.axhline(c='grey', lw=1) + +x, y = get_correlated_dataset(500, dependency_nstd, mu, scale) +ax_nstd.scatter(x, y, s=0.5) + +confidence_ellipse(x, y, ax_nstd, n_std=1, + label=r'$1\sigma$', edgecolor='firebrick') +confidence_ellipse(x, y, ax_nstd, n_std=2, + label=r'$2\sigma$', edgecolor='fuchsia', linestyle='--') +confidence_ellipse(x, y, ax_nstd, n_std=3, + label=r'$3\sigma$', edgecolor='blue', linestyle=':') + +ax_nstd.scatter(mu[0], mu[1], c='red', s=3) +ax_nstd.set_title('Different standard deviations') +ax_nstd.legend() +plt.show() + + +############################################################################# +# +# Using the keyword arguments +# """"""""""""""""""""""""""" +# +# Use the kwargs specified for matplotlib.patches.Patch in order +# to have the ellipse rendered in different ways. + +fig, ax_kwargs = plt.subplots(figsize=(6, 6)) +dependency_kwargs = np.array([ + [-0.8, 0.5], + [-0.2, 0.5] +]) +mu = 2, -3 +scale = 6, 5 + +ax_kwargs.axvline(c='grey', lw=1) +ax_kwargs.axhline(c='grey', lw=1) + +x, y = get_correlated_dataset(500, dependency_kwargs, mu, scale) +# Plot the ellipse with zorder=0 in order to demonstrate +# its transparency (caused by the use of alpha). +confidence_ellipse(x, y, ax_kwargs, + alpha=0.5, facecolor='pink', edgecolor='purple', zorder=0) + +ax_kwargs.scatter(x, y, s=0.5) +ax_kwargs.scatter(mu[0], mu[1], c='red', s=3) +ax_kwargs.set_title(f'Using kwargs') + +fig.subplots_adjust(hspace=0.25) +plt.show()