8000 TST: More tests for Ledoit-Wolf · GaelVaroquaux/scikit-learn@6a7eae5 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6a7eae5

Browse files
committed
TST: More tests for Ledoit-Wolf
Fixes scikit-learn#6195 Indeed, scikit-learn#6195 was not a bug: the code in scikit-learn is correct. However, it is fairly hard to convinced oneself that it is the case. This commit adds tests that are easier to read and relate to the publication.
1 parent d3175db commit 6a7eae5

File tree

2 files changed

+38
-0
lines changed

2 files changed

+38
-0
lines changed

sklearn/covariance/shrunk_covariance_.py

Lines changed: 5 additions & 0 deletions
8000
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,9 @@ def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):
198198
if not assume_centered:
199199
X = X - X.mean(0)
200200

201+
# A non-blocked version of the computation is present in the tests
202+
# in tests/test_covariance.py
203+
201204
# number of blocks to split the covariance matrix into
202205
n_splits = int(n_features / block_size)
203206
X2 = X ** 2
@@ -232,6 +235,8 @@ def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):
232235
delta = delta_ - 2. * mu * emp_cov_trace.sum() + n_features * mu ** 2
233236
delta /= n_features
234237
# get final beta as the min between beta and delta
238+
# We do this to prevent shrinking more than "1", which whould invert
239+
# the value of covariances
235240
beta = min(beta, delta)
236241
# finally get shrinkage
237242
shrinkage = 0 if beta == 0 else beta / delta

sklearn/covariance/tests/test_covariance.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def test_ledoit_wolf():
112112
lw = LedoitWolf(assume_centered=True)
113113
lw.fit(X_centered)
114114
shrinkage_ = lw.shrinkage_
115+
115116
score_ = lw.score(X_centered)
116117
assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
117118
assume_centered=True),
@@ -186,6 +187,38 @@ def test_ledoit_wolf():
186187
assert(lw.precision_ is None)
187188

188189

190+
def _naive_ledoit_wolf_shrinkage(X):
191+
# A simple implementation of the formulas from Ledoit & Wolf
192+
193+
# The computation below achieves the following computations of the
194+
# "O. Ledoit and M. Wolf, A Well-Conditioned Estimator for
195+
# Large-Dimensional Covariance Matrices"
196+
# beta and delta are given in the beginning of section 3.2
197+
n_samples, n_features = X.shape
198+
emp_cov = empirical_covariance(X, assume_centered=False)
199+
mu = np.trace(emp_cov) / n_features
200+
delta_ = emp_cov.copy()
201+
delta_.flat[::n_features + 1] -= mu
202+
delta = (delta_ ** 2).sum() / n_features
203+
X2 = X ** 2
204+
beta_ = 1. / (n_features * n_samples) \
205+
* np.sum(np.dot(X2.T, X2) / n_samples - emp_cov ** 2)
206+
207+
beta = min(beta_, delta)
208+
shrinkage = beta / delta
209+
return shrinkage
210+
211+
212+
def test_ledoit_wolf_small():
213+
# Compare our blocked implementation to the naive implementation
214+
X_small = X[:, :4]
215+
lw = LedoitWolf()
216+
lw.fit(X_small)
217+
shrinkage_ = lw.shrinkage_
218+
219+
assert_almost_equal(shrinkage_, _naive_ledoit_wolf_shrinkage(X_small))
220+
221+
189222
def test_ledoit_wolf_large():
190223
# test that ledoit_wolf doesn't error on data that is wider than block_size
191224
rng = np.random.RandomState(0)

0 commit comments

Comments
 (0)
0