diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py index 5912f7527fa02..be3bf4837eb9f 100644 --- a/examples/covariance/plot_covariance_estimation.py +++ b/examples/covariance/plot_covariance_estimation.py @@ -13,51 +13,15 @@ :ref:`shrunk_covariance` estimators. In particular, it focuses on how to set the amount of regularization, i.e. how to choose the bias-variance trade-off. - -Here we compare 3 approaches: - -* Setting the parameter by cross-validating the likelihood on three folds - according to a grid of potential shrinkage parameters. - -* A close formula proposed by Ledoit and Wolf to compute - the asymptotically optimal regularization parameter (minimizing a MSE - criterion), yielding the :class:`~sklearn.covariance.LedoitWolf` - covariance estimate. - -* An improvement of the Ledoit-Wolf shrinkage, the - :class:`~sklearn.covariance.OAS`, proposed by Chen et al. Its - convergence is significantly better under the assumption that the data - are Gaussian, in particular for small samples. - -To quantify estimation error, we plot the likelihood of unseen data for -different values of the shrinkage parameter. We also show the choices by -cross-validation, or with the LedoitWolf and OAS estimates. - -Note that the maximum likelihood estimate corresponds to no shrinkage, -and thus performs poorly. The Ledoit-Wolf estimate performs really well, -as it is close to the optimal and is computational not costly. In this -example, the OAS estimate is a bit further away. Interestingly, both -approaches outperform cross-validation, which is significantly most -computationally costly. - """ -import numpy as np -import matplotlib.pyplot as plt -from scipy import linalg -from sklearn.covariance import ( - LedoitWolf, - OAS, - ShrunkCovariance, - log_likelihood, - empirical_covariance, -) -from sklearn.model_selection import GridSearchCV +# %% +# Generate sample data +# -------------------- +import numpy as np -# ############################################################################# -# Generate sample data n_features, n_samples = 40, 20 np.random.seed(42) base_X_train = np.random.normal(size=(n_samples, n_features)) @@ -68,8 +32,13 @@ X_train = np.dot(base_X_train, coloring_matrix) X_test = np.dot(base_X_test, coloring_matrix) -# ############################################################################# + +# %% # Compute the likelihood on test data +# ----------------------------------- + +from sklearn.covariance import ShrunkCovariance, empirical_covariance, log_likelihood +from scipy import linalg # spanning a range of possible shrinkage coefficient values shrinkages = np.logspace(-2, 0, 30) @@ -83,8 +52,29 @@ emp_cov = empirical_covariance(X_train) loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov)) -# ############################################################################# -# Compare different approaches to setting the parameter + +# %% +# Compare different approaches to setting the regularization parameter +# -------------------------------------------------------------------- +# +# Here we compare 3 approaches: +# +# * Setting the parameter by cross-validating the likelihood on three folds +# according to a grid of potential shrinkage parameters. +# +# * A close formula proposed by Ledoit and Wolf to compute +# the asymptotically optimal regularization parameter (minimizing a MSE +# criterion), yielding the :class:`~sklearn.covariance.LedoitWolf` +# covariance estimate. +# +# * An improvement of the Ledoit-Wolf shrinkage, the +# :class:`~sklearn.covariance.OAS`, proposed by Chen et al. Its +# convergence is significantly better under the assumption that the data +# are Gaussian, in particular for small samples. + + +from sklearn.model_selection import GridSearchCV +from sklearn.covariance import LedoitWolf, OAS # GridSearch for an optimal shrinkage coefficient tuned_parameters = [{"shrinkage": shrinkages}] @@ -99,8 +89,17 @@ oa = OAS() loglik_oa = oa.fit(X_train).score(X_test) -# ############################################################################# +# %% # Plot results +# ------------ +# +# +# To quantify estimation error, we plot the likelihood of unseen data for +# different values of the shrinkage parameter. We also show the choices by +# cross-validation, or with the LedoitWolf and OAS estimates. + +import matplotlib.pyplot as plt + fig = plt.figure() plt.title("Regularized covariance: likelihood and shrinkage coefficient") plt.xlabel("Regularization parameter: shrinkage coefficient") @@ -145,3 +144,13 @@ plt.legend() plt.show() + +# %% +# .. note:: +# +# The maximum likelihood estimate corresponds to no shrinkage, +# and thus performs poorly. The Ledoit-Wolf estimate performs really well, +# as it is close to the optimal and is not computationally costly. In this +# example, the OAS estimate is a bit further away. Interestingly, both +# approaches outperform cross-validation, which is significantly most +# computationally costly.