numpy · charris · Dec 13, 2016 · Dec 11, 2016 · Dec 5, 2016 · Dec 11, 2016
diff --git a/doc/release/1.12.0-notes.rst b/doc/release/1.12.0-notes.rst
@@ -177,6 +177,11 @@ The following functions are changed: ``sum``, ``product``,
 The previous identity was 1, it is now -1. See entry in `Improvements`_ for
 more explanation.
 
+ma.median warns and returns nan when unmasked invalid values are encountered
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Similar to unmasked median the masked median `ma.median` now emits a Runtime
+warning and returns `NaN` in slices where an unmasked `NaN` is present.
+
 Greater consistancy in ``assert_almost_equal``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The precision check for scalars has been changed to match that for arrays. It

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
@@ -3994,23 +3994,7 @@ def _median(a, axis=None, out=None, overwrite_input=False):
     if np.issubdtype(a.dtype, np.inexact) and sz > 0:
         # warn and return nans like mean would
         rout = mean(part[indexer], axis=axis, out=out)
-        part = np.rollaxis(part, axis, part.ndim)
-        n = np.isnan(part[..., -1])
-        if rout.ndim == 0:
-            if n == True:
-                warnings.warn("Invalid value encountered in median",
-                              RuntimeWarning, stacklevel=3)
-                if out is not None:
-                    out[...] = a.dtype.type(np.nan)
-                    rout = out
-                else:
-                    rout = a.dtype.type(np.nan)
-        elif np.count_nonzero(n.ravel()) > 0:
-            warnings.warn("Invalid value encountered in median for" +
-                          " %d results" % np.count_nonzero(n.ravel()),
-                          RuntimeWarning, stacklevel=3)
-            rout[n] = np.nan
-        return rout
+        return np.lib.utils._median_nancheck(part, rout, axis, out)
     else:
         # if there are no nans
         # Use mean in odd and even case to coerce data type

diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
@@ -869,7 +869,7 @@ def _nanmedian(a, axis=None, out=None, overwrite_input=False):
     else:
         # for small medians use sort + indexing which is still faster than
         # apply_along_axis
-        if a.shape[axis] < 400:
+        if a.shape[axis] < 1000:
             return _nanmedian_small(a, axis, out, overwrite_input)
         result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
         if out is not None:
@@ -1134,7 +1134,7 @@ def _nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
     See nanpercentile for parameter usage
 
     """
-    if axis is None:
+    if axis is None or a.ndim == 1:
         part = a.ravel()
         result = _nanpercentile1d(part, q, overwrite_input, interpolation)
     else:

diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py
@@ -693,18 +693,36 @@ def test_extended_axis_invalid(self):
     def test_float_special(self):
         with suppress_warnings() as sup:
             sup.filter(RuntimeWarning)
-            a = np.array([[np.inf,  np.nan], [np.nan, np.nan]])
-            assert_equal(np.nanmedian(a, axis=0), [np.inf,  np.nan])
-            assert_equal(np.nanmedian(a, axis=1), [np.inf,  np.nan])
-            assert_equal(np.nanmedian(a), np.inf)
-
-            # minimum fill value check
-            a = np.array([[np.nan, np.nan, np.inf], [np.nan, np.nan, np.inf]])
-            assert_equal(np.nanmedian(a, axis=1), np.inf)
-
-            # no mask path
-            a = np.array([[np.inf, np.inf], [np.inf, np.inf]])
-            assert_equal(np.nanmedian(a, axis=1), np.inf)
+            for inf in [np.inf, -np.inf]:
+                a = np.array([[inf,  np.nan], [np.nan, np.nan]])
+                assert_equal(np.nanmedian(a, axis=0), [inf,  np.nan])
+                assert_equal(np.nanmedian(a, axis=1), [inf,  np.nan])
+                assert_equal(np.nanmedian(a), inf)
+
+                # minimum fill value check
+                a = np.array([[np.nan, np.nan, inf],
+                             [np.nan, np.nan, inf]])
+                assert_equal(np.nanmedian(a), inf)
+                assert_equal(np.nanmedian(a, axis=0), [np.nan, np.nan, inf])
+                assert_equal(np.nanmedian(a, axis=1), inf)
+
+                # no mask path
+                a = np.array([[inf, inf], [inf, inf]])
+                assert_equal(np.nanmedian(a, axis=1), inf)
+
+                for i in range(0, 10):
+                    for j in range(1, 10):
+                        a = np.array([([np.nan] * i) + ([inf] * j)] * 2)
+                        assert_equal(np.nanmedian(a), inf)
+                        assert_equal(np.nanmedian(a, axis=1), inf)
+                        assert_equal(np.nanmedian(a, axis=0),
+                                     ([np.nan] * i) + [inf] * j)
+
+                        a = np.array([([np.nan] * i) + ([-inf] * j)] * 2)
+                        assert_equal(np.nanmedian(a), -inf)
+                        assert_equal(np.nanmedian(a, axis=1), -inf)
+                        assert_equal(np.nanmedian(a, axis=0),
+                                     ([np.nan] * i) + [-inf] * j)
 
 
 class TestNanFunctions_Percentile(TestCase):
@@ -805,7 +823,11 @@ def test_empty(self):
                 assert_(len(w) == 0)
 
     def test_scalar(self):
-        assert_(np.nanpercentile(0., 100) == 0.)
+        assert_equal(np.nanpercentile(0., 100), 0.)
+        a = np.arange(6)
+        r = np.nanpercentile(a, 50, axis=0)
+        assert_equal(r, 2.5)
+        assert_(np.isscalar(r))
 
     def test_extended_axis_invalid(self):
         d = np.ones((3, 5, 7, 11))

diff --git a/numpy/lib/utils.py b/numpy/lib/utils.py
@@ -8,6 +8,7 @@
 
 from numpy.core.numerictypes import issubclass_, issubsctype, issubdtype
 from numpy.core import ndarray, ufunc, asarray
+import numpy as np
 
 # getargspec and formatargspec were removed in Python 3.6
 from numpy.compat import getargspec, formatargspec
@@ -1119,4 +1120,49 @@ def safe_eval(source):
     import ast
 
     return ast.literal_eval(source)
+
+
+def _median_nancheck(data, result, axis, out):
+    """
+    Utility function to check median result from data for NaN values at the end
+    and return NaN in that case. Input result can also be a MaskedArray.
+
+    Parameters
+    ----------
+    data : array
+        Input data to median function
+    result : Array or MaskedArray
+        Result of median function
+    axis : {int, sequence of int, None}, optional
+        Axis or axes along which the median was computed.
+    out : ndarray, optional
+        Output array in which to place the result.
+    Returns
+    -------
+    median : scalar or ndarray
+        Median or NaN in axes which contained NaN in the input.
+    """
+    if data.size == 0:
+        return result
+    data = np.rollaxis(data, axis, data.ndim)
+    n = np.isnan(data[..., -1])
+    # masked NaN values are ok
+    if np.ma.isMaskedArray(n):
+        n = n.filled(False)
+    if result.ndim == 0:
+        if n == True:
+            warnings.warn("Invalid value encountered in median",
+                          RuntimeWarning, stacklevel=3)
+            if out is not None:
+                out[...] = data.dtype.type(np.nan)
+                result = out
+            else:
+                result = data.dtype.type(np.nan)
+    elif np.count_nonzero(n.ravel()) > 0:
+        warnings.warn("Invalid value encountered in median for" +
+                      " %d results" % np.count_nonzero(n.ravel()),
+                      RuntimeWarning, stacklevel=3)
+        result[n] = np.nan
+    return result
+
 #-----------------------------------------------------------------------------
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
@@ -699,24 +699,45 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
         return r
 
 def _median(a, axis=None, out=None, overwrite_input=False):
+    # when an unmasked NaN is present return it, so we need to sort the NaN
+    # values behind the mask
+    if np.issubdtype(a.dtype, np.inexact):
+        fill_value = np.inf
+    else:
+        fill_value = None
     if overwrite_input:
         if axis is None:
             asorted = a.ravel()
-            asorted.sort()
+            asorted.sort(fill_value=fill_value)
         else:
-            a.sort(axis=axis)
+            a.sort(axis=axis, fill_value=fill_value)
             asorted = a
     else:
-        asorted = sort(a, axis=axis)
+        asorted = sort(a, axis=axis, fill_value=fill_value)
 
     if axis is None:
         axis = 0
     elif axis < 0:
         axis += asorted.ndim
 
     if asorted.ndim == 1:
+        counts = count(asorted)
         idx, odd = divmod(count(asorted), 2)
-        return asorted[idx + odd - 1 : idx + 1].mean(out=out)
+        mid = asorted[idx + odd - 1 : idx + 1]
+        if np.issubdtype(asorted.dtype, np.inexact) and asorted.size > 0:
+            # avoid inf / x = masked
+            s = mid.sum(out=out)
+            np.true_divide(s, 2., casting='unsafe')
+            s = np.lib.utils._median_nancheck(asorted, s, axis, out)
+        else:
+            s = mid.mean(out=out)
+
+        # if result is masked either the input contained enough
+        # minimum_fill_value so that it would be the median or all values
+        # masked
+        if np.ma.is_masked(s) and not np.all(asorted.mask):
+            return np.ma.minimum_fill_value(asorted)
+        return s
 
     counts = count(asorted, axis=axis)
     h = counts // 2
@@ -727,24 +748,35 @@ def _median(a, axis=None, out=None, overwrite_input=False):
     ind = np.meshgrid(*axes_grid, sparse=True, indexing='ij')
 
     # insert indices of low and high median
-    ind.insert(axis, np.maximum(0, h - 1))
+    ind.insert(axis, h - 1)
     low = asorted[tuple(ind)]
-    ind[axis] = h
+    ind[axis] = np.minimum(h, asorted.shape[axis] - 1)
     high = asorted[tuple(ind)]
 
     # duplicate high if odd number of elements so mean does nothing
     odd = counts % 2 == 1
-    if asorted.ndim > 1:
-        np.copyto(low, high, where=odd)
-    elif odd:
-        low = high
+    np.copyto(low, high, where=odd)
+    # not necessary for scalar True/False masks
+    try:
+        np.copyto(low.mask, high.mask, where=odd)
+    except:
+        pass
 
     if np.issubdtype(asorted.dtype, np.inexact):
         # avoid inf / x = masked
         s = np.ma.sum([low, high], axis=0, out=out)
         np.true_divide(s.data, 2., casting='unsafe', out=s.data)
+
+        s = np.lib.utils._median_nancheck(asorted, s, axis, out)
     else:
         s = np.ma.mean([low, high], axis=0, out=out)
+
+    # if result is masked either the input contained enough minimum_fill_value
+    # so that it would be the median or all values masked
+    if np.ma.is_masked(s):
+        rep = (~np.all(asorted.mask, axis=axis)) & s.mask
+        s.data[rep] = np.ma.minimum_fill_value(asorted)
+        s.mask[rep] = False
     return s