8000 Merge pull request #4307 from dfreese/feature/nanmedian · numpy/numpy@d150103 · GitHub
[go: up one dir, main page]

Skip to content

Commit d150103

Browse files
committed
Merge pull request #4307 from dfreese/feature/nanmedian
ENH: added functionality nanmedian to numpy
2 parents b2b3347 + beec75b commit d150103

File tree

3 files changed

+243
-6
lines changed

3 files changed

+243
-6
lines changed

doc/release/1.9.0-notes.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,11 @@ this new boolean argument.
148148
The number of times each unique item comes up in the input can now be
149149
obtained as an optional return value.
150150

151+
Support for median in nanfunctions
152+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
153+
Taking the median of an ``ndarray`` while ignoring the nans in an array
154+
was added with the ``np.nanmedian`` function.
155+
151156

152157
Improvements
153158
============

numpy/lib/nanfunctions.py

Lines changed: 144 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@
1717
from __future__ import division, absolute_import, print_function
1818

1919
import warnings
20+
import operator
2021
import numpy as np
21-
22+
from numpy.core.fromnumeric import partition
23+
from numpy.lib.function_base import _ureduce as _ureduce
2224

2325
__all__ = [
2426
'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
25-
'nanvar', 'nanstd'
27+
'nanmedian', 'nanvar', 'nanstd'
2628
]
2729

2830

@@ -601,6 +603,146 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False):
601603
return avg
602604

603605

606+
def _nanmedian1d(arr1d, overwrite_input=False): # This only works on 1d arrays
607+
"""
608+
Private function for rank 1 arrays. Compute the median ignoring NaNs.
609+
See nanmedian for parameter usage
610+
611+
"""
612+
c = np.isnan(arr1d)
613+
s = np.where(c)[0]
614+
if s.size == arr1d.size:
615+
warnings.warn("All-NaN slice encountered", RuntimeWarning)
616+
return np.nan
617+
elif s.size == 0:
618+
return np.median(arr1d, overwrite_input=overwrite_input)
619+
else:
620+
if overwrite_input:
621+
x = arr1d
622+
else:
623+
x = arr1d.copy()
624+
# select non-nans at end of array
625+
enonan = arr1d[-s.size:][~c[-s.size:]]
626+
# fill nans in beginning of array with non-nans of end
627+
x[s[:enonan.size]] = enonan
628+
# slice nans away
629+
return np.median(x[:-s.size], overwrite_input=True)
630+
631+
632+
def _nanmedian(a, axis=None, out=None, overwrite_input=False):
633+
"""
634+
Private function that doesn't support extended axis or keepdims.
635+
These methods are extended to this function using _ureduce
636+
See nanmedian for parameter usage
637+
638+
"""
639+
if axis is None:
640+
part = a.ravel()
641+
if out is None:
642+
return _nanmedian1d(part, overwrite_input)
643+
else:
644+
out[:] = _nanmedian1d(part, overwrite_input)
645+
return out
646+
else:
647+
result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
648+
if out is not None:
649+
out[:] = result
650+
return result
651+
652+
653+
def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False):
654+
"""
655+
Compute the median along the specified axis, while ignoring NaNs.
656+
657+
Returns the median of the array elements.
658+
659+
.. versionadded:: 1.9.0
660+
661+
Parameters
662+
----------
663+
a : array_like
664+
Input array or object that can be converted to an array.
665+
axis : int, optional
666+
Axis along which the medians are computed. The default (axis=None)
667+
is to compute the median along a flattened version of the array.
668+
A sequence of axes is supported since version 1.9.0.
669+
out : ndarray, optional
670+
Alternative output array in which to place the result. It must have
671+
the same shape and buffer length as the expected output, but the
672+
type (of the output) will be cast if necessary.
673+
overwrite_input : bool, optional
674+
If True, then allow use of memory of input array (a) for
675+
calculations. The input array will be modified by the call to
676+
median. This will save memory when you do not need to preserve
677+
the contents of the input array. Treat the input as undefined,
678+
but it will probably be fully or partially sorted. Default is
679+
False. Note that, if `overwrite_input` is True and the input
680+
is not already an ndarray, an error will be raised.
681+
keepdims : bool, optional
682+
If this is set to True, the axes which are reduced are left
683+
in the result as dimensions with size one. With this option,
684+
the result will broadcast correctly against the original `arr`.
685+
686+
687+
688+
Returns
689+
-------
690+
median : ndarray
691+
A new array holding the result. If the input contains integers, or
692+
floats of smaller precision than 64, then the output data-type is
693+
float64. Otherwise, the output data-type is the same as that of the
694+
input.
695+
696+
See Also
697+
--------
698+
mean, median, percentile
699+
700+
Notes
701+
-----
702+
Given a vector V of length N, the median of V is the middle value of
703+
a sorted copy of V, ``V_sorted`` - i.e., ``V_sorted[(N-1)/2]``, when N is
704+
odd. When N is even, it is the average of the two middle values of
705+
``V_sorted``.
706+
707+
Examples
708+
--------
709+
>>> a = np.array([[10.0, 7, 4], [3, 2, 1]])
710+
>>> a[0, 1] = np.nan
711+
>>> a
712+
array([[ 10., nan, 4.],
713+
[ 3., 2., 1.]])
714+
>>> np.median(a)
715+
nan
716+
>>> np.nanmedian(a)
717+
3.0
718+
>>> np.nanmedian(a, axis=0)
719+
array([ 6.5, 2., 2.5])
720+
>>> np.median(a, axis=1)
721+
array([ 7., 2.])
722+
>>> b = a.copy()
723+
>>> np.nanmedian(b, axis=1, overwrite_input=True)
724+
array([ 7., 2.])
725+
>>> assert not np.all(a==b)
726+
>>> b = a.copy()
727+
>>> np.nanmedian(b, axis=None, overwrite_input=True)
728+
3.0
729+
>>> assert not np.all(a==b)
730+
731+
"""
732+
a = np.asanyarray(a)
733+
# apply_along_axis in _nanmedian doesn't handle empty arrays well,
734+
# so deal them upfront
735+
if 0 in a.shape:
736+
return np.nanmean(a, axis, out=out, keepdims=keepdims)
737+
738+
r, k = _ureduce(a, func=_nanmedian, axis=axis, out=out,
739+
overwrite_input=overwrite_input)
740+
if keepdims:
741+
return r.reshape(k)
742+
else:
743+
return r
744+
745+
604746
def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
605747
"""
606748
Compute the variance along the specified axis, while ignoring NaNs.

numpy/lib/tests/test_nanfunctions.py

Lines changed: 94 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,15 @@
1111

1212
# Test data
1313
_ndat = np.array([[0.6244, np.nan, 0.2692, 0.0116, np.nan, 0.1170],
14-
[0.5351, 0.9403, np.nan, 0.2100, 0.4759, 0.2833],
15-
[np.nan, np.nan, np.nan, 0.1042, np.nan, 0.5954],
14+
[0.5351, -0.9403, np.nan, 0.2100, 0.4759, 0.2833],
15+
[np.nan, np.nan, np.nan, 0.1042, np.nan, -0.5954],
1616
[0.1610, np.nan, np.nan, 0.1859, 0.3146, np.nan]])
1717

1818

1919
# Rows of _ndat with nans removed
2020
_rdat = [np.array([ 0.6244, 0.2692, 0.0116, 0.1170]),
21-
np.array([ 0.5351, 0.9403, 0.2100, 0.4759, 0.2833]),
22-
np.array([ 0.1042, 0.5954]),
21+
np.array([ 0.5351, -0.9403, 0.2100, 0.4759, 0.2833]),
22+
np.array([ 0.1042, -0.5954]),
2323
np.array([ 0.1610, 0.1859, 0.3146])]
2424

2525

@@ -527,5 +527,95 @@ def test_matrices(self):
527527
assert_(np.isscalar(res))
528528

529529

530+
class TestNanFunctions_Median(TestCase):
531+
532+
def test_mutation(self):
533+
# Check that passed array is not modified.
534+
ndat = _ndat.copy()
535+
np.nanmedian(ndat)
536+
assert_equal(ndat, _ndat)
537+
538+
def test_keepdims(self):
539+
mat = np.eye(3)
540+
for axis in [None, 0, 1]:
541+
tgt = np.median(mat, axis=axis, out=None, overwrite_input=False)
542+
res = np.nanmedian(mat, axis=axis, out=None, overwrite_input=False)
543+
assert_(res.ndim == tgt.ndim)
544+
545+
def test_out(self):
546+
mat = np.random.rand(3,3)
547+
resout = np.zeros(3)
548+
tgt = np.median(mat, axis=1)
549+
res = np.nanmedian(mat, axis=1, out=resout)
550+
assert_almost_equal(res, resout)
551+
assert_almost_equal(res, tgt)
552+
553+
def test_result_values(self):
554+
tgt = [np.median(d) for d in _rdat]
555+
res = np.nanmedian(_ndat, axis=1)
556+
assert_almost_equal(res, tgt)
557+
558+
def test_allnans(self):
559+
mat = np.array([np.nan]*9).reshape(3, 3)
560+
for axis in [None, 0, 1]:
561+
with warnings.catch_warnings(record=True) as w:
562+
warnings.simplefilter('always')
563+
assert_(np.isnan(np.nanmedian(mat, axis=axis)).all())
564+
if axis is None:
565+
assert_(len(w) == 1)
566+
else:
567+
assert_(len(w) == 3)
568+
assert_(issubclass(w[0].category, RuntimeWarning))
569+
# Check scalar
570+
assert_(np.isnan(np.nanmedian(np.nan)))
571+
if axis is None:
572+
assert_(len(w) == 2)
573+
else:
574+
assert_(len(w) == 4)
575+
assert_(issubclass(w[0].category, RuntimeWarning))
576+
577+
def test_empty(self):
578+
mat = np.zeros((0, 3))
579+
for axis in [0, None]:
580+
with warnings.catch_warnings(record=True) as w:
581+
warnings.simplefilter('always')
582+
assert_(np.isnan(np.nanmedian(mat, axis=axis)).all())
583+
assert_(len(w) == 1)
584+
assert_(issubclass(w[0].category, RuntimeWarning))
585+
for axis in [1]:
586+
with warnings.catch_warnings(record=True) as w:
587+
warnings.simplefilter('always')
588+
assert_equal(np.nanmedian(mat, axis=axis), np.zeros([]))
589+
assert_(len(w) == 0)
590+
591+
def test_scalar(self):
592+
assert_(np.nanmedian(0.) == 0.)
593+
594+
def test_extended_axis_invalid(self):
595+
d = np.ones((3, 5, 7, 11))
596+
assert_raises(IndexError, np.nanmedian, d, axis=-5)
597+
assert_raises(IndexError, np.nanmedian, d, axis=(0, -5))
598+
assert_raises(IndexError, np.nanmedian, d, axis=4)
599+
assert_raises(IndexError, np.nanmedian, d, axis=(0, 4))
600+
assert_raises(ValueError, np.nanmedian, d, axis=(1, 1))
601+
602+
def test_keepdims(self):
603+
d = np.ones((3, 5, 7, 11))
604+
assert_equal(np.nanmedian(d, axis=None, keepdims=True).shape,
605+
(1, 1, 1, 1))
606+
assert_equal(np.nanmedian(d, axis=(0, 1), keepdims=True).shape,
607+
(1, 1, 7, 11))
608+
assert_equal(np.nanmedian(d, axis=(0, 3), keepdims=True).shape,
609+
(1, 5, 7, 1))
610+
assert_equal(np.nanmedian(d, axis=(1,), keepdims=True).shape,
611+
(3, 1, 7, 11))
612+
assert_equal(np.nanmedian(d, axis=(0, 1, 2, 3), keepdims=True).shape,
613+
(1, 1, 1, 1))
614+
assert_equal(np.nanmedian(d, axis=(0, 1, 3), keepdims=True).shape,
615+
(1, 1, 7, 1))
616+
617+
618+
619+
530620
if __name__ == "__main__":
531621
run_module_suite()

0 commit comments

Comments
 (0)
0