chunweiyuan
diff --git a/‎numpy/lib/function_base.py
Lines changed: 65 additions & 40 deletions b/‎numpy/lib/function_base.py
Lines changed: 65 additions & 40 deletions
diff --git a/‎numpy/lib/tests/test_function_base.py
Lines changed: 56 additions & 18 deletions b/‎numpy/lib/tests/test_function_base.py
Lines changed: 56 additions & 18 deletions
diff --git a/‎numpy/lib/tests/test_nanfunctions.py
Lines changed: 4 additions & 18 deletions b/‎numpy/lib/tests/test_nanfunctions.py
Lines changed: 4 additions & 18 deletions
@@ -69,84 +69,108 @@
     # --- HYNDMAN and FAN METHODS
     # Discrete methods
     inverted_cdf=dict(
-        get_virtual_index=lambda n, quantiles: _inverted_cdf(n, quantiles),
-        fix_gamma=lambda gamma, _: gamma,  # should never be called
+        get_virtual_index=lambda n, quantiles: (n * quantiles) - 1,
+        fix_gamma=lambda g, _: _get_gamma_mask(
+            shape=g.shape,
+            default_value=1.,
+            conditioned_value=0.0,
+            where=g == 0),
+        discrete_shortcut=lambda n, quantiles: _inverted_cdf(n, quantiles),
     ),
     averaged_inverted_cdf=dict(
         get_virtual_index=lambda n, quantiles: (n * quantiles) - 1,
-        fix_gamma=lambda gamma, _: _get_gamma_mask(
-            shape=gamma.shape,
+        fix_gamma=lambda g, _: _get_gamma_mask(
+            shape=g.shape,
             default_value=1.,
             conditioned_value=0.5,
-            where=gamma == 0),
+            where=g == 0),
+        discrete_shortcut=None,
     ),
     closest_observation=dict(
-        get_virtual_index=lambda n, quantiles: _closest_observation(n,
-                                                                    quantiles),
-        fix_gamma=lambda gamma, _: gamma,  # should never be called
+        get_virtual_index=lambda n, quantiles: (n * quantiles) - 1 - 0.5,
+        fix_gamma=lambda g, index: _get_gamma_mask(
+            shape=g.shape,
+            default_value=1.,
+            conditioned_value=0.0,
+            where=(g == 0) & (np.floor(index) % 2 == 0)),
+        discrete_shortcut=(lambda n, quantiles:
+                           _closest_observation(n, quantiles)),
     ),
     # Continuous methods
     interpolated_inverted_cdf=dict(
         get_virtual_index=lambda n, quantiles:
         _compute_virtual_index(n, quantiles, 0, 1),
-        fix_gamma=lambda gamma, _: gamma,
+        fix_gamma=lambda g, _: g,
+        discrete_shortcut=None,
     ),
     hazen=dict(
         get_virtual_index=lambda n, quantiles:
         _compute_virtual_index(n, quantiles, 0.5, 0.5),
-        fix_gamma=lambda gamma, _: gamma,
+        fix_gamma=lambda g, _: g,
+        discrete_shortcut=None,
     ),
     weibull=dict(
         get_virtual_index=lambda n, quantiles:
         _compute_virtual_index(n, quantiles, 0, 0),
-        fix_gamma=lambda gamma, _: gamma,
+        fix_gamma=lambda g, _: g,
+        discrete_shortcut=None,
     ),
     # Default method.
     # To avoid some rounding issues, `(n-1) * quantiles` is preferred to
     # `_compute_virtual_index(n, quantiles, 1, 1)`.
     # They are mathematically equivalent.
     linear=dict(
         get_virtual_index=lambda n, quantiles: (n - 1) * quantiles,
-        fix_gamma=lambda gamma, _: gamma,
+        fix_gamma=lambda g, _: g,
+        discrete_shortcut=None,
     ),
     median_unbiased=dict(
         get_virtual_index=lambda n, quantiles:
         _compute_virtual_index(n, quantiles, 1 / 3.0, 1 / 3.0),
-        fix_gamma=lambda gamma, _: gamma,
+        fix_gamma=lambda g, _: g,
+        discrete_shortcut=None,
     ),
     normal_unbiased=dict(
         get_virtual_index=lambda n, quantiles:
         _compute_virtual_index(n, quantiles, 3 / 8.0, 3 / 8.0),
-        fix_gamma=lambda gamma, _: gamma,
+        fix_gamma=lambda g, _: g,
+        discrete_shortcut=None,
     ),
     # --- OTHER METHODS
     lower=dict(
-        get_virtual_index=lambda n, quantiles: np.floor(
+        get_virtual_index=lambda n, quantiles: (n - 1) * quantiles,
+        fix_gamma=lambda g, _: np.floor(g),
+        discrete_shortcut=lambda n, quantiles: np.floor(
             (n - 1) * quantiles).astype(np.intp),
-        fix_gamma=lambda gamma, _: gamma,
-        # should never be called, index dtype is int
     ),
     higher=dict(
-        get_virtual_index=lambda n, quantiles: np.ceil(
+        get_virtual_index=lambda n, quantiles: (n - 1) * quantiles,
+        fix_gamma=lambda g, _: np.ceil(g),
+        discrete_shortcut=lambda n, quantiles: np.ceil(
             (n - 1) * quantiles).astype(np.intp),
-        fix_gamma=lambda gamma, _: gamma,
-        # should never be called, index dtype is int
     ),
     midpoint=dict(
         get_virtual_index=lambda n, quantiles: 0.5 * (
                 np.floor((n - 1) * quantiles)
                 + np.ceil((n - 1) * quantiles)),
-        fix_gamma=lambda gamma, index: _get_gamma_mask(
-            shape=gamma.shape,
+        fix_gamma=lambda g, index: _get_gamma_mask(
+            shape=g.shape,
             default_value=0.5,
             conditioned_value=0.,
-            where=index % 1 == 0),
+            where=(index % 1 == 0) & (g == 0)),
+        discrete_shortcut=None,
     ),
     nearest=dict(
-        get_virtual_index=lambda n, quantiles: np.around(
+        get_virtual_index=lambda n, quantiles: (n - 1) * quantiles,
+        # fix_gamma here meant to match behavior of discrete_shortcut because
+        # np.around rounds to the nearest even integer.
+        fix_gamma=lambda g, index: _get_gamma_mask(
+            shape=g.shape,
+            default_value=np.around(g),
+            conditioned_value=np.around(index) - np.around(index - g),
+            where=g == 0.5),
+        discrete_shortcut=lambda n, quantiles: np.around(
             (n - 1) * quantiles).astype(np.intp),
-        fix_gamma=lambda gamma, _: gamma,
-        # should never be called, index dtype is int
     ))
 
 
@@ -4765,7 +4789,7 @@ def _compute_virtual_index(n, quantiles, alpha: float, beta: float):
 
 def _get_gamma(virtual_indexes, previous_indexes, method):
     """
-    Compute gamma (a.k.a 'm' or 'weight') for the linear interpolation
+    Compute gamma (a.k.a 'm') for the linear interpolation
     of quantiles.
 
     virtual_indexes : array_like
@@ -4780,8 +4804,10 @@ def _get_gamma(virtual_indexes, previous_indexes, method):
     gamma is usually the fractional part of virtual_indexes but can be modified
     by the interpolation method.
     """
-    gamma = np.asanyarray(virtual_indexes - previous_indexes)
-    gamma = method["fix_gamma"](gamma, virtual_indexes)
+    # % 1 because index diff can be > 1 in weight space calculation
+    # when virtual index lies within a value of large weight.
+    g = np.asanyarray(virtual_indexes - previous_indexes) % 1
+    gamma = method["fix_gamma"](g, virtual_indexes)
     return np.asanyarray(gamma)
 
 
@@ -4899,12 +4925,6 @@ def _get_indexes(arr, virtual_indexes, valid_values_count):
     if indexes_below_bounds.any():
         previous_indexes[indexes_below_bounds] = 0
         next_indexes[indexes_below_bounds] = 0
-    if np.issubdtype(arr.dtype, np.inexact):
-        # After the sort, slices having NaNs will have for last element a NaN
-        virtual_indexes_nans = np.isnan(virtual_indexes)  # indexes have nans?
-        if virtual_indexes_nans.any():
-            previous_indexes[virtual_indexes_nans] = -1
-            next_indexes[virtual_indexes_nans] = -1
     previous_indexes = previous_indexes.astype(np.intp)
     next_indexes = next_indexes.astype(np.intp)
     return previous_indexes, next_indexes
@@ -5037,13 +5057,11 @@ def _get_weighted_quantile_values(arr1d, wgts1d):
                 np.interp(previous_w_indexes, w_index_bounds, real_indexes)
             next_indexes =\
                 np.interp(next_w_indexes, w_index_bounds, real_indexes)
-            indexes =\
-                np.interp(weight_space_indexes, w_index_bounds, real_indexes)
 
             # method-dependent gammas determine interpolation scheme between
             # neighboring values, and are computed in weight space.
-            gamma = _get_gamma(indexes, previous_indexes, method)
-
+            gamma =\
+                _get_gamma(weight_space_indexes, previous_w_indexes, method)
             previous = take(arr1d, previous_indexes.astype(int))
             next = take(arr1d, next_indexes.astype(int))
             return _lerp(previous, next, gamma, out=out)
@@ -5064,7 +5082,14 @@ def _get_weighted_quantile_values(arr1d, wgts1d):
         if axis != 0:  # moveaxis is slow, so only call it if necessary.
             arr = np.moveaxis(arr, axis, destination=0)
 
-        virtual_indexes = method["get_virtual_index"](values_count, quantiles)
+        if method["discrete_shortcut"]:  # lumps indexing + gamma interplation
+            # discrete methods result in dtype = np.intp
+            virtual_indexes =\
+                method["discrete_shortcut"](values_count, quantiles)
+        else:
+            virtual_indexes =\
+                method["get_virtual_index"](values_count, quantiles)
+
         virtual_indexes = np.asanyarray(virtual_indexes)
 
         result, slices_having_nans =\
 
@@ -3785,12 +3785,7 @@ def test_quantile_add_and_multiply_constant(self, method, alpha):
             # "median_unbiased", "normal_unbiased", "midpoint"
             assert_allclose(q, np.quantile(y, alpha, method=method))
 
-    @pytest.mark.parametrize(
-        "method",
-        ['inverted_cdf', 'averaged_inverted_cdf', 'closest_observation',
-         'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear',
-         'median_unbiased', 'normal_unbiased',
-         'nearest', 'lower', 'higher', 'midpoint'])
+    @pytest.mark.parametrize("method", quantile_methods)
     def test_weights_all_ones(self, method):
         ar = np.arange(24).reshape(2, 3, 4)
         q = 0.5
@@ -3827,12 +3822,7 @@ def test_weights_all_ones(self, method):
         actual = np.quantile(ar, q=q, weights=weights, method=method)
         assert_almost_equal(actual, expected)
 
-    @pytest.mark.parametrize(
-        "method",
-        ['inverted_cdf', 'closest_observation',
-         'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear',
-         'median_unbiased', 'normal_unbiased',
-         'nearest', 'lower', 'higher', 'midpoint'])
+    @pytest.mark.parametrize("method", quantile_methods)
     def test_weights_on_multiple_axes(self, method):
         """Test supplying ND weights."""
         ar = np.arange(12).reshape(3, 4).astype(float)
@@ -3844,12 +3834,7 @@ def test_weights_on_multiple_axes(self, method):
                              weights=weights, method=method)
         assert_almost_equal(actual, expected)
 
-    @pytest.mark.parametrize(
-        "method",
-        ['inverted_cdf', 'averaged_inverted_cdf', 'closest_observation',
-         'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear',
-         'median_unbiased', 'normal_unbiased',
-         'nearest', 'lower', 'higher', 'midpoint'])
+    @pytest.mark.parametrize("method", quantile_methods)
     def test_various_weights(self, method):
         """Test various weights arg scenarios."""
         ar = np.arange(12).reshape(3, 4)
@@ -3919,6 +3904,59 @@ def test_weights_flags(self):
         with assert_raises_regex(ZeroDivisionError, "Weights sum to zero"):
             np.quantile(ar, q=q, axis=axis, weights=[0, 0])
 
+    @pytest.mark.parametrize("method", ["linear", "lower", "higher",
+                                        "midpoint", "higher"])
+    def test_decimal_weights_for_linear_discrete_methods(self, method):
+        """Test consistency when interpolating between weight bands.
+
+        For these five methods, the virtual index is (n - 1) * quantile.
+        Therefore, if sum of weights == 3, then q=0.5 points to index = 1.
+        An array of [3, 4] with weights=[2, 1] --> [3, 3, 4],
+        and the 0.5 qunatile sits at index=1, which is 3.
+
+        But if weights=[1.9, 1.1], then the index boundary for value 3
+        is [0, 0.9], and the index boundary for value 4 is [1.9, 2]
+        This means, the index=1 value is equivalent to interpolating
+        between 3 and 4, at q=0.1.
+
+        This test is designed to ensure our weighted quantile method is
+        faithful to this paradigm.
+        """
+        actual = np.quantile([3, 4], q=0.5, weights=[1.9, 1.1], method=method)
+        expected = np.quantile([3, 4], q=0.1, method=method)
+        assert_almost_equal(actual, expected)
+
+
+class TestQuantileMethods:
+    """Test algorithmic consistency within  _QuantileMethods."""
+
+    @pytest.mark.parametrize("method", quantile_methods)
+    def test_discrete_shortcut(self, method):
+        """Test reproducibility of discrete shortcuts.
+
+        One should be able to reproduce discrete_shortcut with
+        get_virtual_index and fix_gamma.
+        """
+        n = 10
+        ar = np.random.rand(10)
+        quantiles = np.array([0.0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0])
+
+        # only testing methods where discrete_shortcut exists
+        if nfb._QuantileMethods[method]["discrete_shortcut"]:
+            actual =\
+                nfb._QuantileMethods[method]["discrete_shortcut"](n, quantiles)
+
+            indexes =\
+                nfb._QuantileMethods[method]["get_virtual_index"](n, quantiles)
+            indexes[indexes < 0] = 0
+            previous_indexes, _ = nfb._get_indexes(ar, indexes, n)
+            previous_indexes[previous_indexes == -1] = n - 1
+            gs = indexes - previous_indexes
+            gammas = nfb._QuantileMethods[method]["fix_gamma"](gs, indexes)
+            expected = previous_indexes + gammas
+
+            assert_almost_equal(actual, expected)
+
 
 class TestLerp:
     @hypothesis.given(t0=st.floats(allow_nan=False, allow_infinity=False,
 
@@ -5,6 +5,7 @@
 
 import numpy as np
 from numpy.core.numeric import normalize_axis_tuple
+import numpy.lib.function_base as nfb
 from numpy.lib.nanfunctions import _nan_mask, _replace_nan
 from numpy.testing import (
     assert_, assert_equal, assert_almost_equal, assert_raises,
@@ -1276,12 +1277,7 @@ def test_allnans(self, axis, dtype, array):
         assert np.isnan(out).all()
         assert out.dtype == array.dtype
 
-    @pytest.mark.parametrize(
-        "method",
-        ['inverted_cdf', 'averaged_inverted_cdf', 'closest_observation',
-         'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear',
-         'median_unbiased', 'normal_unbiased',
-         'nearest', 'lower', 'higher', 'midpoint'])
+    @pytest.mark.parametrize("method", list(nfb._QuantileMethods.keys()))
     def test_weights_all_ones(self, method):
         """Test that all weights == 1 gives same results as no weights."""
         ar = np.arange(24).reshape(2, 3, 4).astype(float)
@@ -1320,12 +1316,7 @@ def test_weights_all_ones(self, method):
         actual = np.nanquantile(ar, q=q, weights=weights, method=method)
         assert_almost_equal(actual, expected)
 
-    @pytest.mark.parametrize(
-        "method",
-        ['inverted_cdf', 'averaged_inverted_cdf', 'closest_observation',
-         'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear',
-         'median_unbiased', 'normal_unbiased',
-         'nearest', 'lower', 'higher', 'midpoint'])
+    @pytest.mark.parametrize("method", list(nfb._QuantileMethods.keys()))
     def test_multiple_axes(self, method):
         """Test that weights work on multiple axes."""
         ar = np.arange(12).reshape(3, 4).astype(float)
@@ -1339,12 +1330,7 @@ def test_multiple_axes(self, method):
                                 method=method)
         assert_almost_equal(actual, expected)
 
-    @pytest.mark.parametrize(
-        "method",
-        ['inverted_cdf', 'averaged_inverted_cdf', 'closest_observation',
-         'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear',
-         'median_unbiased', 'normal_unbiased',
-         'nearest', 'lower', 'higher', 'midpoint'])
+    @pytest.mark.parametrize("method", list(nfb._QuantileMethods.keys()))
     def test_various_weights(self, method):
         """Test various weights arg scenarios."""
         ar = np.arange(12).reshape(3, 4).astype(float)