8000 BUG: Fix np.quantile([0, 1], 0, method='weibull') (#24710) · numpy/numpy@8fe55b0 · GitHub
[go: up one dir, main page]

Skip to content

Commit 8fe55b0

Browse files
authored
BUG: Fix np.quantile([0, 1], 0, method='weibull') (#24710)
Issue #24592 was opened because the np.quantile method gives incorrect answers for the weibull method. The problem: import numpy as np x=np.arange(10) np.quantile(x, 0., method='weibull') # output 0.0, correct np.quantile(x, 0, method='weibull') # output 9, incorrect! np.quantile(x, 1, method='weibull') # raises an error The value of np.quantile(x, q=0, method='weibull') is incorrect for integer inputs. Two options to address this: Do not allow integer input for np.quantile. The documentation for the probability argument states that "q : array_like of float", so this seems reasonable. But this would be a behaviour change that might have a large impact and there are unit tests that test for integer input as well. Automatically cast the input argument q to float. Also this would be a behaviour change and break existing tests. We rejected these two options because of the issues mentioned. Applying either one of the options only for weibull would make the input handling less uniform. The problem with weibull (and some other methods) is that inside np.lib._function_base_impl._quantile there are two paths: one for integer virtual indices and one for other types of virtual indices (in the code the statement if np.issubdtype(virtual_indexes.dtype, np.integer):. The integer path is only valid for methods returning integer indices in the range [0, size(arr)>. For the weibull method something like q = [0, .5, 1] is cast to float, which results in float virtual_indices (which contains floats with integer values, but that is okay). We can either: i) Modify the code in the integer path to handle integers < 0 and > size(arr) -1 ii) Check whether the methods supports integer output, and if not use the alternative path We opted for the second option. Fixes #24592
1 parent c389ca6 commit 8fe55b0

File tree

2 files changed

+32
-12
lines changed

2 files changed

+32
-12
lines changed

numpy/lib/_function_base_impl.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
# _QuantileMethods is a dictionary listing all the supported methods to
5151
# compute quantile/percentile.
5252
#
53-
# Below virtual_index refer to the index of the element where the percentile
53+
# Below virtual_index refers to the index of the element where the percentile
5454
# would be found in the sorted sample.
5555
# When the sample contains exactly the percentile wanted, the virtual_index is
5656
# an integer to the index of this element.
@@ -68,7 +68,7 @@
6868
# Discrete methods
6969
inverted_cdf=dict(
7070
get_virtual_index=lambda n, quantiles: _inverted_cdf(n, quantiles),
71-
fix_gamma=lambda gamma, _: gamma, # should never be called
71+
fix_gamma=None, # should never be called
7272
),
7373
averaged_inverted_cdf=dict(
7474
get_virtual_index=lambda n, quantiles: (n * quantiles) - 1,
@@ -81,7 +81,7 @@
8181
closest_observation=dict(
8282
get_virtual_index=lambda n, quantiles: _closest_observation(n,
8383
quantiles),
84-
fix_gamma=lambda gamma, _: gamma, # should never be called
84+
fix_gamma=None, # should never be called
8585
),
8686
# Continuous methods
8787
interpolated_inverted_cdf=dict(
@@ -121,14 +121,12 @@
121121
lower=dict(
122122
get_virtual_index=lambda n, quantiles: np.floor(
123123
(n - 1) * quantiles).astype(np.intp),
124-
fix_gamma=lambda gamma, _: gamma,
125-
# should never be called, index dtype is int
124+
fix_gamma=None, # should never be called, index dtype is int
126125
),
127126
higher=dict(
128127
get_virtual_index=lambda n, quantiles: np.ceil(
129128
(n - 1) * quantiles).astype(np.intp),
130-
fix_gamma=lambda gamma, _: gamma,
131-
# should never be called, index dtype is int
129+
fix_gamma=None, # should never be called, index dtype is int
132130
),
133131
midpoint=dict(
134132
get_virtual_index=lambda n, quantiles: 0.5 * (
@@ -143,7 +141,7 @@
143141
nearest=dict(
144142
get_virtual_index=lambda n, quantiles: np.around(
145143
(n - 1) * quantiles).astype(np.intp),
146-
fix_gamma=lambda gamma, _: gamma,
144+
fix_gamma=None,
147145
# should never be called, index dtype is int
148146
))
149147

@@ -4866,15 +4864,23 @@ def _quantile(
48664864
# Virtual because it is a floating point value, not an valid index.
48674865
# The nearest neighbours are used for interpolation
48684866
try:
4869-
method = _QuantileMethods[method]
4867+
method_props = _QuantileMethods[method]
48704868
except KeyError:
48714869
raise ValueError(
48724870
f"{method!r} is not a valid method. Use one of: "
48734871
f"{_QuantileMethods.keys()}") from None
4874-
virtual_indexes = method["get_virtual_index"](values_count, quantiles)
4872+
virtual_indexes = method_props["get_virtual_index"](values_count,
4873+
quantiles)
48754874
virtual_indexes = np.asanyarray(virtual_indexes)
48764875

4877-
if np.issubdtype(virtual_indexes.dtype, np.integer):
4876+
if method_props["fix_gamma"] is None:
4877+
supports_integers = True
4878+
else:
4879+
int_virtual_indices = np.issubdtype(virtual_indexes.dtype,
4880+
np.integer)
4881+
supports_integers = method == 'linear' and int_virtual_indices
4882+
4883+
if supports_integers:
48784884
# No interp E864 olation needed, take the points along axis
48794885
if supports_nans:
48804886
# may contain nan, which would sort to the end
@@ -4906,7 +4912,7 @@ def _quantile(
49064912
previous = arr[previous_indexes]
49074913
next = arr[next_indexes]
49084914
# --- Linear interpolation
4909-
gamma = _get_gamma(virtual_indexes, previous_indexes, method)
4915+
gamma = _get_gamma(virtual_indexes, previous_indexes, method_props)
49104916
result_shape = virtual_indexes.shape + (1,) * (arr.ndim - 1)
49114917
gamma = gamma.reshape(result_shape)
49124918
result = _lerp(previous,

numpy/lib/tests/test_function_base.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3738,6 +3738,13 @@ def test_quantile_preserve_int_type(self, dtype):
37383738
method="nearest")
37393739
assert res.dtype == dtype
37403740

3741+
@pytest.mark.parametrize("method", quantile_methods)
3742+
def test_q_zero_one(self, method):
3743+
# gh-24710
3744+
arr = [10, 11, 12]
3745+
quantile = np.quantile(arr, q = [0, 1], method=method)
3746+
assert_equal(quantile, np.array([10, 12]))
3747+
37413748
@pytest.mark.parametrize("method", quantile_methods)
37423749
def test_quantile_monotonic(self, method):
37433750
# GH 14685
@@ -3973,6 +3980,13 @@ def test_quantile_weights_raises_unsupported_methods(self, method):
39733980
with pytest.raises(ValueError, match=msg):
39743981
np.quantile(y, 0.5, weights=w, method=method)
39753982

3983+
def test_weibull_fraction(self):
3984+
arr = [Fraction(0, 1), Fraction(1, 10)]
3985+
quantile = np.quantile(arr, [0, ], method='weibull')
3986+
assert_equal(quantile, np.array(Fraction(0, 1)))
3987+
quantile = np.quantile(arr, [Fraction(1, 2)], method='weibull')
3988+
assert_equal(quantile, np.array(Fraction(1, 20)))
3989+
39763990

39773991
class TestLerp:
39783992
@hypothesis.given(t0=st.floats(allow_nan=False, allow_infinity=False,

0 commit comments

Comments
 (0)
0