8000 ENH: adds np.nancumsum and np.nancumprod · numpy/numpy@a76b872 · GitHub
[go: up one dir, main page]

Skip to content

Commit a76b872

Browse files
committed
ENH: adds np.nancumsum and np.nancumprod
This PR adds an implementation of `nancumsum` and `nancumprod`. The actual function is a two-liner adapted from `nansum`. Its structure is adapted from PR: #5418
1 parent 1429c60 commit a76b872

File tree

4 files changed

+252
-16
lines changed

4 files changed

+252
-16
lines changed

doc/release/1.12.0-notes.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,11 @@ Add a hook in ``numpy/__init__.py`` to import a ``numpy/_distributor_init.py``
125125
file that will remain empty (bar a docstring) in the standard numpy source,
126126
but that can be overwritten by people making binary distributions of numpy.
127127

128+
New nanfunctions ``nancumsum`` and ``nancumprod`` added
129+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
130+
Nanfunctions ``nancumsum`` and ``nancumprod`` have been added to
131+
compute ``cumsum`` and ``cumprod`` by ignoring nans.
132+
128133
Improvements
129134
============
130135

doc/source/reference/routines.math.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ Sums, products, differences
5858
nansum
5959
cumprod
6060
cumsum
61+
nancumprod
62+
nancumsum
6163
diff
6264
ediff1d
6365
gradient

numpy/lib/nanfunctions.py

Lines changed: 139 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
- `nanargmax` -- index of maximum non-NaN value
1111
- `nansum` -- sum of non-NaN values
1212
- `nanprod` -- product of non-NaN values
13+
- `nancumsum` -- cumulative sum of non-NaN values
14+
- `nancumprod` -- cumulative product of non-NaN values
1315
- `nanmean` -- mean of non-NaN values
1416
- `nanvar` -- variance of non-NaN values
1517
- `nanstd` -- standard deviation of non-NaN values
@@ -27,6 +29,7 @@
2729
__all__ = [
2830
'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
2931
'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod',
32+
'nancumsum', 'nancumprod'
3033
]
3134

3235

@@ -493,7 +496,11 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
493496
494497
Returns
495498
-------
496-
y : ndarray or numpy scalar
499+
nansum : ndarray.
500+
A new array holding the result is returned unless `out` is
501+
specified, in which it is returned. The result has the same
502+
size as `a`, and the same shape as `a` if `axis` is not None
503+
or `a` is a 1-d array.
497504
498505
See Also
499506
--------
@@ -506,11 +513,6 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
506513
If both positive and negative infinity are present, the sum will be Not
507514
A Number (NaN).
508515
509-
Numpy integer arithmetic is modular. If the size of a sum exceeds the
510-
size of an integer accumulator, its value will wrap around and the
511-
result will be incorrect. Specifying ``dtype=double`` can alleviate
512-
that problem.
513-
514516
Examples
515517
--------
516518
>>> np.nansum(1)
@@ -539,7 +541,7 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
539541
def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
540542
"""
541543
Return the product of array elements over a given axis treating Not a
542-
Numbers (NaNs) as zero.
544+
Numbers (NaNs) as ones.
543545
544546
One is returned for slices that are all-NaN or empty.
545547
@@ -573,20 +575,15 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
573575
574576
Returns
575577
-------
576-
y : ndarray or numpy scalar
578+
nanprod : ndarray
579+
A new array holding the result is returned unless `out` is
580+
specified, in which case it is returned.
577581
578582
See Also
579583
--------
580584
numpy.prod : Product across array propagating NaNs.
581585
isnan : Show which elements are NaN.
582586
583-
Notes
584-
-----
585-
Numpy integer arithmetic is modular. If the size of a product exceeds
586-
the size of an integer accumulator, its value will wrap around and the
587-
result will be incorrect. Specifying ``dtype=double`` can alleviate
588-
that problem.
589-
590587
Examples
591588
--------
592589
>>> np.nanprod(1)
@@ -606,6 +603,133 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
606603
return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
607604

608605

606+
def nancumsum(a, axis=None, dtype=None, out=None):
607+
"""
608+
Return the cumulative sum of array elements over a given axis treating Not a
609+
Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are
610+
encountered and leading NaNs are replaced by zeros.
611+
612+
Zeros are returned for slices that are all-NaN or empty.
613+
614+
.. versionadded:: 1.12.0
615+
616+
Parameters
617+
----------
618+
a : array_like
619+
Input array.
620+
axis : int, optional
621+
Axis along which the cumulative sum is computed. The default
622+
(None) is to compute the cumsum over the flattened array.
623+
dtype : dtype, optional
624+
Type of the returned array and of the accumulator in which the
625+
elements are summed. If `dtype` is not specified, it defaults
626+
to the dtype of `a`, unless `a` has an integer dtype with a
627+
precision less than that of the default platform integer. In
628+
that case, the default platform integer is used.
629+
out : ndarray, optional
630+
Alternative output array in which to place the result. It must
631+
have the same shape and buffer leng 10000 th as the expected output
632+
but the type will be cast if necessary. See `doc.ufuncs`
633+
(Section "Output arguments") for more details.
634+
635+
Returns
636+
-------
637+
nancumsum : ndarray.
638+
A new array holding the result is returned unless `out` is
639+
specified, in which it is returned. The result has the same
640+
size as `a`, and the same shape as `a` if `axis` is not None
641+
or `a` is a 1-d array.
642+
643+
See Also
644+
--------
645+
numpy.cumsum : Cumulative sum across array propagating NaNs.
646+
isnan : Show which elements are NaN.
647+
648+
Examples
649+
--------
650+
>>> np.nancumsum(1)
651+
array([1])
652+
>>> np.nancumsum([1])
653+
array([1])
654+
>>> np.nancumsum([1, np.nan])
655+
array([ 1., 1.])
656+
>>> a = np.array([[1, 2], [3, np.nan]])
657+
>>> np.nancumsum(a)
658+
array([ 1., 3., 6., 6.])
659+
>>> np.nancumsum(a, axis=0)
660+
array([[ 1., 2.],
661+
[ 4., 2.]])
662+
>>> np.nancumsum(a, axis=1)
663+
array([[ 1., 3.],
664+
[ 3., 3.]])
665+
666+
"""
667+
a, mask = _replace_nan(a, 0)
668+
return np.cumsum(a, axis=axis, dtype=dtype, out=out)
669+
670+
671+
def nancumprod(a, axis=None, dtype=None, out=None):
672+
"""
673+
Return the cumulative product of array elements over a given axis treating Not a
674+
Numbers (NaNs) as one. The cumulative product does not change when NaNs are
675+
encountered and leading NaNs are replaced by ones.
676+
677+
Ones are returned for slices that are all-NaN or empty.
678+
679+
.. versionadded:: 1.12.0
680+
681+
Parameters
682+
----------
683+
a : array_like
684+
Input array.
685+
axis : int, optional
686+
Axis along which the cumulative product is computed. By default
687+
the input is flattened.
688+
dtype : dtype, optional
689+
Type of the returned array, as well as of the accumulator in which
690+
the elements are multiplied. If *dtype* is not specified, it
691+
defaults to the dtype of `a`, unless `a` has an integer dtype with
692+
a precision less than that of the default platform integer. In
693+
that case, the default platform integer is used instead.
694+
out : ndarray, optional
695+
Alternative output array in which to place the result. It must
696+
have the same shape and buffer length as the expected output
697+
but the type of the resulting values will be cast if necessary.
698+
699+
Returns
700+
-------
701+
nancumprod : ndarray
702+
A new array holding the result is returned unless `out` is
703+
specified, in which case it is returned.
704+
705+
See Also
706+
--------
707+
numpy.cumprod : Cumulative product across array propagating NaNs.
708+
isnan : Show which elements are NaN.
709+
710+
Examples
711+
--------
712+
>>> np.nancumprod(1)
713+
array([1])
714+
>>> np.nancumprod([1])
715+
array([1])
716+
>>> np.nancumprod([1, np.nan])
717+
array([ 1., 1.])
718+
>>> a = np.array([[1, 2], [3, np.nan]])
719+
>>> np.nancumprod(a)
720+
array([ 1., 2., 6., 6.])
721+
>>> np.nancumprod(a, axis=0)
722+
array([[ 1., 2.],
723+
[ 3., 2.]])
724+
>>> np.nancumprod(a, axis=1)
725+
array([[ 1., 2.],
726+
[ 3., 3.]])
727+
728+
"""
729+
a, mask = _replace_nan(a, 1)
730+
return np.cumprod(a, axis=axis, dtype=dtype, out=out)
731+
732+
609733
def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
610734
"""
611735
Compute the arithmetic mean along the specified axis, ignoring NaNs.

numpy/lib/tests/test_nanfunctions.py

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import numpy as np
66
from numpy.testing import (
77
run_module_suite, TestCase, assert_, assert_equal, assert_almost_equal,
8-
assert_raises, assert_array_equal
8+
assert_warns, assert_no_warnings, assert_raises, assert_array_equal
99
)
1010

1111

@@ -22,6 +22,18 @@
2222
np.array([0.1042, -0.5954]),
2323
np.array([0.1610, 0.1859, 0.3146])]
2424

25+
# Rows of _ndat with nans converted to ones
26+
_ndat_ones = np.array([[0.6244, 1.0, 0.2692, 0.0116, 1.0, 0.1170],
27+
[0.5351, -0.9403, 1.0, 0.2100, 0.4759, 0.2833],
28+
[1.0, 1.0, 1.0, 0.1042, 1.0, -0.5954],
29+
[0.1610, 1.0, 1.0, 0.1859, 0.3146, 1.0]])
30+
31+
# Rows of _ndat with nans converted to zeros
32+
_ndat_zeros = np.array([[0.6244, 0.0, 0.2692, 0.0116, 0.0, 0.1170],
33+
[0.5351, -0.9403, 0.0, 0.2100, 0.4759, 0.2833],
34+
[0.0, 0.0, 0.0, 0.1042, 0.0, -0.5954],
35+
[0.1610, 0.0, 0.0, 0.1859, 0.3146, 0.0]])
36+
2537

2638
class TestNanFunctions_MinMax(TestCase):
2739

@@ -241,6 +253,16 @@ def test_nanprod(self):
241253
for mat in self.integer_arrays():
242254
assert_equal(np.nanprod(mat), tgt)
243255

256+
def test_nancumsum(self):
257+
tgt = np.cumsum(self.mat)
258+
for mat in self.integer_arrays():
259+
assert_equal(np.nancumsum(mat), tgt)
260+
261+
def test_nancumprod(self):
262+
tgt = np.cumprod(self.mat)
263+
for mat in self.integer_arrays():
264+
assert_equal(np.nancumprod(mat), tgt)
265+
244266
def test_nanmean(self):
245267
tgt = np.mean(self.mat)
246268
for mat in self.integer_arrays():
@@ -388,6 +410,89 @@ def test_empty(self):
388410
assert_equal(res, tgt)
389411

390412

413+
class TestNanFunctions_CumSumProd(TestCase, SharedNanFunctionsTestsMixin):
414+
415+
nanfuncs = [np.nancumsum, np.nancumprod]
416+
stdfuncs = [np.cumsum, np.cumprod]
417+
418+
def test_allnans(self):
419+
for f, tgt_value in zip(self.nanfuncs, [0, 1]):
420+
# Unlike other nan-functions, sum/prod/cumsum/cumprod don't warn on all nan input
421+
with assert_no_warnings():
422+
res = f([np.nan]*3, axis=None)
423+
tgt = tgt_value*np.ones((3))
424+
assert_(np.array_equal(res, tgt), 'result is not %s * np.ones((3))' % (tgt_value))
425+
# Check scalar
426+
res = f(np.nan)
427+
tgt = tgt_value*np.ones((1))
428+
assert_(np.array_equal(res, tgt), 'result is not %s * np.ones((1))' % (tgt_value))
429+
# Check there is no warning for not all-nan
430+
f([0]*3, axis=None)
431+
432+
def test_empty(self):
433+
for f, tgt_value in zip(self.nanfuncs, [0, 1]):
434+
mat = np.zeros((0, 3))
435+
tgt = tgt_value*np.ones((0, 3))
436+
res = f(mat, axis=0)
437+
assert_equal(res, tgt)
438+
tgt = mat
439+
res = f(mat, axis=1)
440+
assert_equal(res, tgt)
441+
tgt = np.zeros((0))
442+
res = f(mat, axis=None)
443+
assert_equal(res, tgt)
444+
445+
def test_keepdims(self):
446+
for f, g in zip(self.nanfuncs, self.stdfuncs):
447+
mat = np.eye(3)
448+
for axis in [None, 0, 1]:
449+
tgt = f(mat, axis=axis, out=None)
450+
res = g(mat, axis=axis, out=None)
451+
assert_(res.ndim == tgt.ndim)
452+
453+
for f in self.nanfuncs:
454+
d = np.ones((3, 5, 7, 11))
455+
# Randomly set some elements to NaN:
456+
rs = np.random.RandomState(0)
457+
d[rs.rand(*d.shape) < 0.5] = np.nan
458+
res = f(d, axis=None)
459+
assert_equal(res.shape, (1155,))
460+
for axis in np.arange(4):
461+
res = f(d, axis=axis)
462+
assert_equal(res.shape, (3, 5, 7, 11))
463+
464+
def test_matrices(self):
465+
# Check that it works and that type and
466+
# shape are preserved
467+
mat = np.matrix(np.eye(3))
468+
for f in self.nanfuncs:
469+
for axis in np.arange(2):
470+
res = f(mat, axis=axis)
471+
assert_(isinstance(res, np.matrix))
472+
assert_(res.shape == (3, 3))
473+
res = f(mat)
474+
assert_(res.shape == (1, 3*3))
475+
476+
def test_result_values(self):
477+
for axis in (-2, -1, 0, 1, None):
478+
tgt = np.cumprod(_ndat_ones, axis=axis)
479+
res = np.nancumprod(_ndat, axis=axis)
480+
assert_almost_equal(res, tgt)
481+
tgt = np.cumsum(_ndat_zeros,axis=axis)
482+
res = np.nancumsum(_ndat, axis=axis)
483+
assert_almost_equal(res, tgt)
484+
485+
def test_out(self):
486+
mat = np.eye(3)
487+
for nf, rf in zip(self.nanfuncs, self.stdfuncs):
488+
resout = np.eye(3)
489+
for axis in (-2, -1, 0, 1):
490+
tgt = rf(mat, axis=axis)
491+
res = nf(mat, axis=axis, out=resout)
492+
assert_almost_equal(res, resout)
493+
assert_almost_equal(res, tgt)
494+
495+
391496
class TestNanFunctions_MeanVarStd(TestCase, SharedNanFunctionsTestsMixin):
392497

393498
nanfuncs = [np.nanmean, np.nanvar, np.nanstd]

0 commit comments

Comments
 (0)
0