keflavich
diff --git a/‎CHANGES.rst
Lines changed: 4 additions & 0 deletions b/‎CHANGES.rst
Lines changed: 4 additions & 0 deletions
diff --git a/‎astropy/stats/funcs.py
Lines changed: 52 additions & 10 deletions b/‎astropy/stats/funcs.py
Lines changed: 52 additions & 10 deletions
diff --git a/‎astropy/stats/tests/test_funcs.py
Lines changed: 67 additions & 0 deletions b/‎astropy/stats/tests/test_funcs.py
Lines changed: 67 additions & 0 deletions
@@ -597,6 +597,10 @@ New Features
   - Added ``axis`` keyword to ``biweight_location`` and
     ``biweight_midvariance``. [#5127, #5158]
 
+  - ``median_absolute_deviation`` and ``mad_std`` have ``ignore_nan`` option
+    that will use ``np.ma.median`` with nans masked out or ``np.nanmedian``
+    instead of ``np.median`` when computing the median. [#5232]
+
 - ``astropy.table``
 
   - Allow renaming mixin columns. [#5469]
 
@@ -15,6 +15,10 @@
 import math
 import numpy as np
 
+from warnings import warn
+
+from ..utils.compat import NUMPY_LT_1_10
+from ..utils.decorators import deprecated_renamed_argument
 from ..utils import isiterable
 from ..extern.six.moves import range
 
@@ -718,22 +722,28 @@ def poisson_conf_interval(n, interval='root-n', sigma=1, background=0,
     return conf_interval
 
 
-def median_absolute_deviation(a, axis=None, func=None):
+@deprecated_renamed_argument('a', 'data', '2.0')
+def median_absolute_deviation(data, axis=None, func=None, ignore_nan=False):
     """
     Calculate the median absolute deviation (MAD).
 
     The MAD is defined as ``median(abs(a - median(a)))``.
 
     Parameters
     ----------
-    a : array-like
+    data : array-like
         Input array or object that can be converted to an array.
     axis : {int, sequence of int, None}, optional
         Axis along which the MADs are computed.  The default (`None`) is
         to compute the MAD of the flattened array.
     func : callable, optional
         The function used to compute the median. Defaults to `numpy.ma.median`
         for masked arrays, otherwise to `numpy.median`.
+    ignore_nan : bool
+        Ignore NaN values (treat them as if they are not in the array) when
+        computing the median.  This will use `numpy.ma.median` if ``axis`` is
+        specified, or `numpy.nanmedian` if ``axis==None`` and numpy's version
+        is >1.10 because nanmedian is slightly faster in this case.
 
     Returns
     -------
@@ -765,26 +775,52 @@ def median_absolute_deviation(a, axis=None, func=None):
         # See https://github.com/numpy/numpy/issues/7330 why using np.ma.median
         # for normal arrays should not be done (summary: np.ma.median always
         # returns an masked array even if the result should be scalar). (#4658)
-        if isinstance(a, np.ma.MaskedArray):
+        if isinstance(data, np.ma.MaskedArray):
+            is_masked = True
             func = np.ma.median
+            if ignore_nan:
+                data = np.ma.masked_invalid(data)
+        elif ignore_nan:
+            is_masked = False
+            func = np.nanmedian
         else:
+            is_masked = False
             func = np.median
+    else:
+        is_masked = None
 
-    a = np.asanyarray(a)
-    a_median = func(a, axis=axis)
+    if not ignore_nan and NUMPY_LT_1_10 and np.any(np.isnan(data)):
+        warn("Numpy versions <1.10 will return a number rather than NaN for "
+             "the median of arrays containing NaNs.  This behavior is "
+             "unlikely to be what you expect.")
+
+    data = np.asanyarray(data)
+    # np.nanmedian has `keepdims`, which is a good option if we're not allowing
+    # user-passed functions here
+    data_median = func(data, axis=axis)
 
     # broadcast the median array before subtraction
     if axis is not None:
         if isiterable(axis):
             for ax in sorted(list(axis)):
-                a_median = np.expand_dims(a_median, axis=ax)
+                data_median = np.expand_dims(data_median, axis=ax)
         else:
-            a_median = np.expand_dims(a_median, axis=axis)
+            data_median = np.expand_dims(data_median, axis=axis)
+
+    result = func(np.abs(data - data_median), axis=axis, overwrite_input=True)
+
+    if axis is None and np.ma.isMaskedArray(result):
+        # return scalar version
+        result = result.item()
+    elif np.ma.isMaskedArray(result) and is_masked == False:
+        # if the input array was not a masked array, we don't want to return a
+        # masked array
+        result = result.filled(fill_value=np.nan)
 
-    return func(np.abs(a - a_median), axis=axis)
+    return result
 
 
-def mad_std(data, axis=None, func=None):
+def mad_std(data, axis=None, func=None, ignore_nan=False):
     r"""
     Calculate a robust standard deviation using the `median absolute
     deviation (MAD)
@@ -811,6 +847,11 @@ def mad_std(data, axis=None, func=None):
     func : callable, optional
         The function used to compute the median. Defaults to `numpy.ma.median`
         for masked arrays, otherwise to `numpy.median`.
+    ignore_nan : bool
+        Ignore NaN values (treat them as if they are not in the array) when
+        computing the median.  This will use `numpy.ma.median` if ``axis`` is
+        specified, or `numpy.nanmedian` if ``axis=None`` and numpy's version is
+        >1.10 because nanmedian is slightly faster in this case.
 
     Returns
     -------
@@ -834,7 +875,8 @@ def mad_std(data, axis=None, func=None):
     """
 
     # NOTE: 1. / scipy.stats.norm.ppf(0.75) = 1.482602218505602
-    return median_absolute_deviation(data, axis=axis, func=func) * 1.482602218505602
+    MAD = median_absolute_deviation(data, axis=axis, func=func, ignore_nan=ignore_nan)
+    return MAD * 1.482602218505602
 
 
 def signal_to_noise_oir_ccd(t, source_eps, sky_eps, dark_eps, rd, npix,
 
@@ -7,6 +7,8 @@
 from numpy.random import randn, normal
 from numpy.testing import assert_equal
 from numpy.testing.utils import assert_allclose
+from astropy.utils.compat import NUMPY_LT_1_10, NUMPY_LT_1_11
+from ...tests.helper import catch_warnings
 
 try:
     import scipy  # pylint: disable=W0611
@@ -329,6 +331,48 @@ def test_mad_std():
         data = normal(5, 2, size=(100, 100))
         assert_allclose(funcs.mad_std(data), 2.0, rtol=0.05)
 
+@pytest.mark.xfail('not NUMPY_LT_1_10')
+def test_mad_std_scalar_return():
+    with NumpyRNGContext(12345):
+        data = normal(5, 2, size=(10, 10))
+        # make a masked array with no masked points
+        data = np.ma.masked_where(np.isnan(data), data)
+        rslt = funcs.mad_std(data)
+        # want a scalar result, NOT a masked array
+        assert np.isscalar(rslt)
+
+        data[5,5] = np.nan
+        rslt = funcs.mad_std(data, ignore_nan=True)
+        assert np.isscalar(rslt)
+        with catch_warnings() as warns:
+            rslt = funcs.mad_std(data)
+            assert np.isscalar(rslt)
+            assert not np.isnan(rslt)
+
+def test_mad_std_warns():
+    with NumpyRNGContext(12345):
+        data = normal(5, 2, size=(10, 10))
+        data[5,5] = np.nan
+
+        with catch_warnings() as warns:
+            rslt = funcs.mad_std(data, ignore_nan=False)
+            if NUMPY_LT_1_10:
+                w = warns[0]
+                assert str(w.message).startswith("Numpy versions <1.10 will return")
+            else:
+                assert np.isnan(rslt)
+
+def test_mad_std_withnan():
+    with NumpyRNGContext(12345):
+        data = np.empty([102,102])
+        data[:] = np.nan
+        data[1:-1,1:-1] = normal(5, 2, size=(100, 100))
+        assert_allclose(funcs.mad_std(data, ignore_nan=True), 2.0, rtol=0.05)
+
+    if not NUMPY_LT_1_10:
+        assert np.isnan(funcs.mad_std([1, 2, 3, 4, 5, np.nan]))
+    assert_allclose(funcs.mad_std([1, 2, 3, 4, 5, np.nan],  ignore_nan=True),
+                    1.482602218505602)
 
 def test_mad_std_with_axis():
     data = np.array([[1, 2, 3, 4],
@@ -340,6 +384,29 @@ def test_mad_std_with_axis():
     assert_allclose(funcs.mad_std(data, axis=0), result_axis0)
     assert_allclose(funcs.mad_std(data, axis=1), result_axis1)
 
+def test_mad_std_with_axis_and_nan():
+    data = np.array([[1, 2, 3, 4, np.nan],
+                     [4, 3, 2, 1, np.nan]])
+    # results follow data symmetry
+    result_axis0 = np.array([2.22390333, 0.74130111, 0.74130111,
+                             2.22390333, np.nan])
+    result_axis1 = np.array([1.48260222, 1.48260222])
+
+    assert_allclose(funcs.mad_std(data, axis=0, ignore_nan=True), result_axis0)
+    assert_allclose(funcs.mad_std(data, axis=1, ignore_nan=True), result_axis1)
+
+def test_mad_std_with_axis_and_nan_array_type():
+    # mad_std should return a masked array if given one, and not otherwise
+    data = np.array([[1, 2, 3, 4, np.nan],
+                     [4, 3, 2, 1, np.nan]])
+
+    result = funcs.mad_std(data, axis=0, ignore_nan=True)
+    assert not np.ma.isMaskedArray(result)
+
+    data = np.ma.masked_where(np.isnan(data), data)
+    result = funcs.mad_std(data, axis=0, ignore_nan=True)
+    assert np.ma.isMaskedArray(result)
+
 
 def test_gaussian_fwhm_to_sigma():
     fwhm = (2.0 * np.sqrt(2.0 * np.log(2.0)))