From 7221a691df8bbe82973e0d74cd4e7f571c6d6a76 Mon Sep 17 00:00:00 2001
From: darioka <cannone.dario@gmail.com>
Date: Sun, 17 Apr 2022 20:09:55 +0200
Subject: [PATCH 1/4] notebook style in covariance plot example

---
 .../covariance/plot_covariance_estimation.py  | 74 +++++++++++--------
 1 file changed, 43 insertions(+), 31 deletions(-)

diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py
index 5912f7527fa02..5c04cbc359dea 100644
--- a/examples/covariance/plot_covariance_estimation.py
+++ b/examples/covariance/plot_covariance_estimation.py
@@ -13,33 +13,6 @@
 :ref:`shrunk_covariance` estimators. In particular, it focuses on how to
 set the amount of regularization, i.e. how to choose the bias-variance
 trade-off.
-
-Here we compare 3 approaches:
-
-* Setting the parameter by cross-validating the likelihood on three folds
-  according to a grid of potential shrinkage parameters.
-
-* A close formula proposed by Ledoit and Wolf to compute
-  the asymptotically optimal regularization parameter (minimizing a MSE
-  criterion), yielding the :class:`~sklearn.covariance.LedoitWolf`
-  covariance estimate.
-
-* An improvement of the Ledoit-Wolf shrinkage, the
-  :class:`~sklearn.covariance.OAS`, proposed by Chen et al. Its
-  convergence is significantly better under the assumption that the data
-  are Gaussian, in particular for small samples.
-
-To quantify estimation error, we plot the likelihood of unseen data for
-different values of the shrinkage parameter. We also show the choices by
-cross-validation, or with the LedoitWolf and OAS estimates.
-
-Note that the maximum likelihood estimate corresponds to no shrinkage,
-and thus performs poorly. The Ledoit-Wolf estimate performs really well,
-as it is close to the optimal and is computational not costly. In this
-example, the OAS estimate is a bit further away. Interestingly, both
-approaches outperform cross-validation, which is significantly most
-computationally costly.
-
 """
 
 import numpy as np
@@ -56,8 +29,9 @@
 from sklearn.model_selection import GridSearchCV
 
 
-# #############################################################################
+# %%
 # Generate sample data
+# ---------------------------------------------------
 n_features, n_samples = 40, 20
 np.random.seed(42)
 base_X_train = np.random.normal(size=(n_samples, n_features))
@@ -68,8 +42,10 @@
 X_train = np.dot(base_X_train, coloring_matrix)
 X_test = np.dot(base_X_test, coloring_matrix)
 
-# #############################################################################
+
+# %%
 # Compute the likelihood on test data
+# ---------------------------------------------------
 
 # spanning a range of possible shrinkage coefficient values
 shrinkages = np.logspace(-2, 0, 30)
@@ -83,8 +59,26 @@
 emp_cov = empirical_covariance(X_train)
 loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov))
 
-# #############################################################################
+
+# %%
 # Compare different approaches to setting the parameter
+# ---------------------------------------------------
+# 
+# Here we compare 3 approaches:
+#
+# * Setting the parameter by cross-validating the likelihood on three folds
+#   according to a grid of potential shrinkage parameters.
+#
+# * A close formula proposed by Ledoit and Wolf to compute
+#   the asymptotically optimal regularization parameter (minimizing a MSE
+#   criterion), yielding the :class:`~sklearn.covariance.LedoitWolf`
+#   covariance estimate.
+#
+# * An improvement of the Ledoit-Wolf shrinkage, the
+#   :class:`~sklearn.covariance.OAS`, proposed by Chen et al. Its
+#   convergence is significantly better under the assumption that the data
+#   are Gaussian, in particular for small samples.
+
 
 # GridSearch for an optimal shrinkage coefficient
 tuned_parameters = [{"shrinkage": shrinkages}]
@@ -99,8 +93,15 @@
 oa = OAS()
 loglik_oa = oa.fit(X_train).score(X_test)
 
-# #############################################################################
+# %%
 # Plot results
+# ---------------------------------------------------
+#
+#
+# To quantify estimation error, we plot the likelihood of unseen data for
+# different values of the shrinkage parameter. We also show the choices by
+# cross-validation, or with the LedoitWolf and OAS estimates.
+
 fig = plt.figure()
 plt.title("Regularized covariance: likelihood and shrinkage coefficient")
 plt.xlabel("Regularization parameter: shrinkage coefficient")
@@ -145,3 +146,14 @@
 plt.legend()
 
 plt.show()
+
+# %%
+# .. note::
+#
+#    The maximum likelihood estimate corresponds to no shrinkage,
+#    and thus performs poorly. The Ledoit-Wolf estimate performs really well,
+#    as it is close to the optimal and is computational not costly. In this
+#    example, the OAS estimate is a bit further away. Interestingly, both
+#    approaches outperform cross-validation, which is significantly most
+#    computationally costly.
+

From ec9c063c764ff12fb8de7c376a26d5cc3f975a3d Mon Sep 17 00:00:00 2001
From: darioka <cannone.dario@gmail.com>
Date: Sun, 17 Apr 2022 20:15:28 +0200
Subject: [PATCH 2/4] just black

---
 examples/covariance/plot_covariance_estimation.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py
index 5c04cbc359dea..b4f9590640853 100644
--- a/examples/covariance/plot_covariance_estimation.py
+++ b/examples/covariance/plot_covariance_estimation.py
@@ -63,7 +63,7 @@
 # %%
 # Compare different approaches to setting the parameter
 # ---------------------------------------------------
-# 
+#
 # Here we compare 3 approaches:
 #
 # * Setting the parameter by cross-validating the likelihood on three folds
@@ -156,4 +156,3 @@
 #    example, the OAS estimate is a bit further away. Interestingly, both
 #    approaches outperform cross-validation, which is significantly most
 #    computationally costly.
-

From 1f8ca43a3e37b8e729fc4a4e09c20978c9749954 Mon Sep 17 00:00:00 2001
From: darioka <cannone.dario@gmail.com>
Date: Mon, 18 Apr 2022 16:29:49 +0200
Subject: [PATCH 3/4] Apply suggestions from code review

Co-authored-by: Jordan Silke <51223540+jsilke@users.noreply.github.com>
---
 examples/covariance/plot_covariance_estimation.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py
index b4f9590640853..6118e120edbbf 100644
--- a/examples/covariance/plot_covariance_estimation.py
+++ b/examples/covariance/plot_covariance_estimation.py
@@ -31,7 +31,7 @@
 
 # %%
 # Generate sample data
-# ---------------------------------------------------
+# --------------------
 n_features, n_samples = 40, 20
 np.random.seed(42)
 base_X_train = np.random.normal(size=(n_samples, n_features))
@@ -45,7 +45,7 @@
 
 # %%
 # Compute the likelihood on test data
-# ---------------------------------------------------
+# -----------------------------------
 
 # spanning a range of possible shrinkage coefficient values
 shrinkages = np.logspace(-2, 0, 30)
@@ -61,8 +61,8 @@
 
 
 # %%
-# Compare different approaches to setting the parameter
-# ---------------------------------------------------
+# Compare different approaches to setting the regularization parameter
+# --------------------------------------------------------------------
 #
 # Here we compare 3 approaches:
 #
@@ -95,7 +95,7 @@
 
 # %%
 # Plot results
-# ---------------------------------------------------
+# ------------
 #
 #
 # To quantify estimation error, we plot the likelihood of unseen data for
@@ -152,7 +152,7 @@
 #
 #    The maximum likelihood estimate corresponds to no shrinkage,
 #    and thus performs poorly. The Ledoit-Wolf estimate performs really well,
-#    as it is close to the optimal and is computational not costly. In this
+#    as it is close to the optimal and is not computationally costly. In this
 #    example, the OAS estimate is a bit further away. Interestingly, both
 #    approaches outperform cross-validation, which is significantly most
 #    computationally costly.

From e8e51cebde6219d03a8b56f254667b619a1a5147 Mon Sep 17 00:00:00 2001
From: darioka <cannone.dario@gmail.com>
Date: Mon, 18 Apr 2022 16:45:21 +0200
Subject: [PATCH 4/4] moved imports to the cell where they are used

---
 .../covariance/plot_covariance_estimation.py  | 24 +++++++++----------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py
index 6118e120edbbf..be3bf4837eb9f 100644
--- a/examples/covariance/plot_covariance_estimation.py
+++ b/examples/covariance/plot_covariance_estimation.py
@@ -15,23 +15,13 @@
 trade-off.
 """
 
-import numpy as np
-import matplotlib.pyplot as plt
-from scipy import linalg
-
-from sklearn.covariance import (
-    LedoitWolf,
-    OAS,
-    ShrunkCovariance,
-    log_likelihood,
-    empirical_covariance,
-)
-from sklearn.model_selection import GridSearchCV
-
 
 # %%
 # Generate sample data
 # --------------------
+
+import numpy as np
+
 n_features, n_samples = 40, 20
 np.random.seed(42)
 base_X_train = np.random.normal(size=(n_samples, n_features))
@@ -47,6 +37,9 @@
 # Compute the likelihood on test data
 # -----------------------------------
 
+from sklearn.covariance import ShrunkCovariance, empirical_covariance, log_likelihood
+from scipy import linalg
+
 # spanning a range of possible shrinkage coefficient values
 shrinkages = np.logspace(-2, 0, 30)
 negative_logliks = [
@@ -80,6 +73,9 @@
 #   are Gaussian, in particular for small samples.
 
 
+from sklearn.model_selection import GridSearchCV
+from sklearn.covariance import LedoitWolf, OAS
+
 # GridSearch for an optimal shrinkage coefficient
 tuned_parameters = [{"shrinkage": shrinkages}]
 cv = GridSearchCV(ShrunkCovariance(), tuned_parameters)
@@ -102,6 +98,8 @@
 # different values of the shrinkage parameter. We also show the choices by
 # cross-validation, or with the LedoitWolf and OAS estimates.
 
+import matplotlib.pyplot as plt
+
 fig = plt.figure()
 plt.title("Regularized covariance: likelihood and shrinkage coefficient")
 plt.xlabel("Regularization parameter: shrinkage coefficient")