8000 ENH: Amend TestQuantile::test_quantile_identification_equation (#9211) · chunweiyuan/numpy@d684454 · GitHub
[go: up one dir, main page]

Skip to content

Commit d684454

Browse files
committed
ENH: Amend TestQuantile::test_quantile_identification_equation (numpy#9211)
1 parent c8e2343 commit d684454

File tree

1 file changed

+20
-18
lines changed

1 file changed

+20
-18
lines changed

numpy/lib/tests/test_function_base.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3683,30 +3683,32 @@ def test_quantile_scalar_nan(self):
36833683
def test_quantile_identification_equation(self, method, alpha):
36843684
# Test that the identification equation holds for the empirical
36853685
# CDF:
3686-
# E[V(x, Y)] = 0 <=> x is quantile
3686+
# dE[V(x, Y) * (x - y)]/dx = 0 <=> x is quantile
36873687
# with Y the random variable for which we have observed values and
36883688
# V(x, y) the canonical identification function for the quantile (at
36893689
# level alpha), see
3690-
# https://doi.org/10.48550/arXiv.0912.0902
3690+
# https://doi.org/10.48550/arXiv.0912.0902
3691+
3692+
# This test case sets up a uniform distribution [0, 1), for which
3693+
# the above formula has an analytical solution:
3694+
# x = alpha & E[V(x, Y) * (x - y)] = alpha / 2 * (1 - alpha),
3695+
# and tests that the numerical integral using np.quantile()'s result
3696+
# (actual) is reasonably close to the analytical result (expected).
36913697
rng = np.random.default_rng(4321)
3692-
# We choose n and alpha such that we cover 3 cases:
3693-
# - n * alpha is an integer
3694-
# - n * alpha is a float that gets rounded down
3695-
# - n * alpha is a float that gest rounded up
36963698
n = 102 # n * alpha = 20.4, 51. , 91.8
3697-
y = rng.random(n)
3699+
y = np.sort(rng.random(n))
3700+
3701+
# Here we compute the numerical integral (actual).
3702+
# NOTE: The following estimate of dy assumes no duplicate y values.
3703+
dy = np.diff(np.append(y, [1])) # a crude approximation of bin widths
3704+
dy = dy / np.sum(dy) # normalize to make sure bin widths sum to 1
36983705
x = np.quantile(y, alpha, method=method)
3699-
if method in ("higher",):
3700-
# These methods do not fulfill the identification equation.
3701-
assert np.abs(np.mean(self.V(x, y, alpha))) > 0.1 / n
3702-
elif int(n * alpha) == n * alpha:
3703-
# We can expect exact results, up to machine precision.
3704-
assert_allclose(np.mean(self.V(x, y, alpha)), 0, atol=1e-14)
3705-
else:
3706-
# V = (x >= y) - alpha cannot sum to zero exactly but within
3707-
# "sample precision".
3708-
assert_allclose(np.mean(self.V(x, y, alpha)), 0,
3709-
atol=1 / n / np.amin([alpha, 1 - alpha]))
3706+
3707+
# For a uniform distribution (0, 1], the minimum occurs at x = alpha,
3708+
# where E[V(x, Y) * (x - y)] = alpha / 2 * (1 - alpha).
3709+
actual = np.sum(self.V(x, y, alpha) * (x - y) * dy) # crude integral.
3710+
expected = alpha / 2 * (1 - alpha)
3711+
assert_allclose(actual, expected, rtol=1/n) # rtol ~ average bin width
37103712

37113713
@pytest.mark.parametrize("method", quantile_methods)
37123714
@pytest.mark.parametrize("alpha", [0.2, 0.5, 0.9])

0 commit comments

Comments
 (0)
0