@@ -3683,30 +3683,32 @@ def test_quantile_scalar_nan(self):
3683
3683
def test_quantile_identification_equation (self , method , alpha ):
3684
3684
# Test that the identification equation holds for the empirical
3685
3685
# CDF:
3686
- # E [V(x, Y)] = 0 <=> x is quantile
3686
+ # dE [V(x, Y) * (x - y)]/dx = 0 <=> x is quantile
3687
3687
# with Y the random variable for which we have observed values and
3688
3688
# V(x, y) the canonical identification function for the quantile (at
3689
3689
# level alpha), see
3690
- # https://doi.org/10.48550/arXiv.0912.0902
3690
+ # https://doi.org/10.48550/arXiv.0912.0902
3691
+
3692
+ # This test case sets up a uniform distribution [0, 1), for which
3693
+ # the above formula has an analytical solution:
3694
+ # x = alpha & E[V(x, Y) * (x - y)] = alpha / 2 * (1 - alpha),
3695
+ # and tests that the numerical integral using np.quantile()'s result
3696
+ # (actual) is reasonably close to the analytical result (expected).
3691
3697
rng = np .random .default_rng (4321 )
3692
- # We choose n and alpha such that we cover 3 cases:
3693
- # - n * alpha is an integer
3694
- # - n * alpha is a float that gets rounded down
3695
- # - n * alpha is a float that gest rounded up
3696
3698
n = 102 # n * alpha = 20.4, 51. , 91.8
3697
- y = rng .random (n )
3699
+ y = np .sort (rng .random (n ))
3700
+
3701
+ # Here we compute the numerical integral (actual).
3702
+ # NOTE: The following estimate of dy assumes no duplicate y values.
3703
+ dy = np .diff (np .append (y , [1 ])) # a crude approximation of bin widths
3704
+ dy = dy / np .sum (dy ) # normalize to make sure bin widths sum to 1
3698
3705
x = np .quantile (y , alpha , method = method )
3699
- if method in ("higher" ,):
3700
- # These methods do not fulfill the identification equation.
3701
- assert np .abs (np .mean (self .V (x , y , alpha ))) > 0.1 / n
3702
- elif int (n * alpha ) == n * alpha :
3703
- # We can expect exact results, up to machine precision.
3704
- assert_allclose (np .mean (self .V (x , y , alpha )), 0 , atol = 1e-14 )
3705
- else :
3706
- # V = (x >= y) - alpha cannot sum to zero exactly but within
3707
- # "sample precision".
3708
- assert_allclose (np .mean (self .V (x , y , alpha )), 0 ,
3709
- atol = 1 / n / np .amin ([alpha , 1 - alpha ]))
3706
+
3707
+ # For a uniform distribution (0, 1], the minimum occurs at x = alpha,
3708
+ # where E[V(x, Y) * (x - y)] = alpha / 2 * (1 - alpha).
3709
+ actual = np .sum (self .V (x , y , alpha ) * (x - y ) * dy ) # crude integral.
3710
+ expected = alpha / 2 * (1 - alpha )
3711
+ assert_allclose (actual , expected , rtol = 1 / n ) # rtol ~ average bin width
3710
3712
3711
3713
@pytest .mark .parametrize ("method" , quantile_methods )
3712
3714
@pytest .mark .parametrize ("alpha" , [0.2 , 0.5 , 0.9 ])
0 commit comments