8000 ENH: missingdata: Add skipna=, keepdims= parameters to methods · numpy/numpy@dfd12cf · GitHub
[go: up one dir, main page]

Skip to content

Commit dfd12cf

Browse files
committed
ENH: missingdata: Add skipna=, keepdims= parameters to methods
Also fix some memory leaks, improve some type resolution code. The methods still have some issues with array subtypes that needs working through.
1 parent 3ba3937 commit dfd12cf

File tree

7 files changed

+148
-229
lines changed

7 files changed

+148
-229
lines changed

doc/release/2.0.0-notes.rst

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ list of things that do and do not work with NA values:
2525
What works with NA:
2626
* Basic indexing and slicing, as well as full boolean mask indexing.
2727
* All element-wise ufuncs.
28-
* UFunc.reduce methods, with a new skipna parameter.
28+
* All UFunc.reduce methods, with a new skipna parameter.
2929
* The nditer object.
3030
* Array methods:
3131
+ ndarray.clip, ndarray.min, ndarray.max, ndarray.sum, ndarray.prod,
@@ -40,8 +40,6 @@ What doesn't work with NA:
4040
mechanism instead of the newer nditer.
4141
* Struct dtypes, which will have corresponding struct masks with
4242
one mask value per primitive field of the struct dtype.
43-
* UFunc.reduce of multi-dimensional arrays, with skipna=True and a ufunc
44-
that doesn't have an identity.
4543
* UFunc.accumulate, UFunc.reduceat.
4644
* Ufuncs calls with both NA masks and a where= mask at the same time.
4745
* np.logical_and, np.logical_or, np.all, and np.any don't satisfy the
@@ -59,7 +57,7 @@ Differences with R:
5957
* np.isna(nan) is False, but R's is.na(nan) is TRUE. This is because
6058
NumPy's NA is treated independently of the underlying data type.
6159
* Boolean indexing, where the result is compressed to just
62-
the elements with true in the mask, raises if the booelan mask
60+
the elements with true in the mask, raises if the boolean mask
6361
has an NA value in it. This is because that value could be either
6462
True or False, meaning the count of the output array is actually
6563
NA. R treats this case in a manner inconsistent with the NA model,

numpy/core/fromnumeric.py

Lines changed: 16 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import umath as um
1717
import numerictypes as nt
1818
from numeric import asarray, array, asanyarray, concatenate
19+
import _methods
1920
_dt_ = nt.sctype2char
2021

2122
import types
@@ -1482,12 +1483,12 @@ def sum(a, axis=None, dtype=None, out=None, skipna=False, keepdims=False):
14821483
try:
14831484
sum = a.sum
14841485
except AttributeError:
1485-
return um.add.reduce(a, axis=axis, dtype=dtype,
1486+
return _methods._sum(a, axis=axis, dtype=dtype,
14861487
out=out, skipna=skipna, keepdims=keepdims)
14871488
# NOTE: Dropping the skipna and keepdims parameters here...
14881489
return sum(axis=axis, dtype=dtype, out=out)
14891490
else:
1490-
return um.add.reduce(a, axis=axis, dtype=dtype,
1491+
return _methods._sum(a, axis=axis, dtype=dtype,
14911492
out=out, skipna=skipna, keepdims=keepdims)
14921493

14931494
def product (a, axis=None, dtype=None, out=None, skipna=False, keepdims=False):
@@ -1603,7 +1604,8 @@ def any(a, axis=None, out=None, skipna=False, keepdims=False):
16031604
(191614240, 191614240)
16041605
16051606
"""
1606-
return um.logical_or.reduce(a, axis=axis, out=out, skipna=skipna, keepdims=keepdims)
1607+
return _methods._any(a, axis=axis, out=out,
1608+
skipna=skipna, keepdims=keepdims)
16071609

16081610
def all(a, axis=None, out=None, skipna=False, keepdims=False):
16091611
"""
@@ -1674,7 +1676,8 @@ def all(a, axis=None, out=None, skipna=False, keepdims=False):
16741676
(28293632, 28293632, array([ True], dtype=bool))
16751677
16761678
"""
1677-
return um.logical_and.reduce(a, axis=axis, out=out, skipna=skipna, keepdims=keepdims)
1679+
return _methods._all(a, axis=axis, out=out,
1680+
skipna=skipna, keepdims=keepdims)
16781681

16791682
def cumsum (a, axis=None, dtype=None, out=None):
16801683
"""
@@ -1873,12 +1876,12 @@ def amax(a, axis=None, out=None, skipna=False, keepdims=False):
18731876
try:
18741877
amax = a.max
18751878
except AttributeError:
1876-
return um.maximum.reduce(a, axis=axis,
1879+
return _methods._amax(a, axis=axis,
18771880
out=out, skipna=skipna, keepdims=keepdims)
18781881
# NOTE: Dropping the skipna and keepdims parameters
18791882
return amax(axis=axis, out=out)
18801883
else:
1881-
return um.maximum.reduce(a, axis=axis,
1884+
return _methods._amax(a, axis=axis,
18821885
out=out, skipna=skipna, keepdims=keepdims)
18831886

18841887
def amin(a, axis=None, out=None, skipna=False, keepdims=False):
@@ -1947,12 +1950,12 @@ def amin(a, axis=None, out=None, skipna=False, keepdims=False):
19471950
try:
19481951
amin = a.min
19491952
except AttributeError:
1950-
return um.minimum.reduce(a, axis=axis,
1953+
return _methods._amin(a, axis=axis,
19511954
out=out, skipna=skipna, keepdims=keepdims)
19521955
# NOTE: Dropping the skipna and keepdims parameters
19531956
return amin(axis=axis, out=out)
19541957
else:
1955-
return um.minimum.reduce(a, axis=axis,
1958+
return _methods._amin(a, axis=axis,
19561959
out=out, skipna=skipna, keepdims=keepdims)
19571960

19581961
def alen(a):
@@ -2080,11 +2083,11 @@ def prod(a, axis=None, dtype=None, out=None, skipna=False, keepdims=False):
20802083
try:
20812084
prod = a.prod
20822085
except AttributeError:
2083-
return um.multiply.reduce(a, axis=axis, dtype=dtype,
2086+
return _methods._prod(a, axis=axis, dtype=dtype,
20842087
out=out, skipna=skipna, keepdims=keepdims)
20852088
return prod(axis=axis, dtype=dtype, out=out)
20862089
else:
2087-
return um.multiply.reduce(a, axis=axis, dtype=dtype,
2090+
return _methods._prod(a, axis=axis, dtype=dtype,
20882091
out=out, skipna=skipna, keepdims=keepdims)
20892092

20902093
def cumprod(a, axis=None, dtype=None, out=None):
@@ -2459,22 +2462,8 @@ def mean(a, axis=None, dtype=None, out=None, skipna=False, keepdims=False):
24592462
except AttributeError:
24602463
pass
24612464

2462-
arr = asarray(a)
2463-
2464-
# Upgrade bool, unsigned int, and int to float64
2465-
if dtype is None and arr.dtype.kind in ['b','u','i']:
2466-
ret = um.add.reduce(arr, axis=axis, dtype='f8',
2465+
return _methods._mean(a, axis=axis, dtype=dtype,
24672466
out=out, skipna=skipna, keepdims=keepdims)
2468-
else:
2469-
ret = um.add.reduce(arr, axis=axis, dtype=dtype,
2470-
out=out, skipna=skipna, keepdims=keepdims)
2471-
rcount = mu.count_reduce_items(arr, axis=axis,
2472-
skipna=skipna, keepdims=keepdims)
2473-
if isinstance(ret, mu.ndarray):
2474-
um.true_divide(ret, rcount, out=ret, casting='unsafe')
2475-
else:
2476-
ret = ret / float(rcount)
2477-
return ret
24782467

24792468

24802469
def std(a, axis=None, dtype=None, out=None, ddof=0,
@@ -2579,16 +2568,9 @@ def std(a, axis=None, dtype=None, out=None, ddof=0,
25792568
except AttributeError:
25802569
pass
25812570

2582-
ret = var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
2571+
return _methods._std(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
25832572
skipna=skipna, keepdims=keepdims)
25842573

2585-
if isinstance(ret, mu.ndarray):
2586-
um.sqrt(ret, out=ret)
2587-
else:
2588-
ret = um.sqrt(ret)
2589-
2590-
return ret
2591-
25922574
def var(a, axis=None, dtype=None, out=None, ddof=0,
25932575
skipna=False, keepdims=False):
25942576
"""
@@ -2692,43 +2674,6 @@ def var(a, axis=None, dtype=None, out=None, ddof=0,
26922674
except AttributeError:
26932675
pass
26942676

2695-
arr = asarray(a)
2696-
2697-
# First compute the mean, saving 'rcount' for reuse later
2698-
if dtype is None and arr.dtype.kind in ['b','u','i']:
2699-
arrmean = um.add.reduce(arr, axis=axis, dtype='f8',
2700-
skipna=skipna, keepdims=True)
2701-
else:
2702-
arrmean = um.add.reduce(arr, axis=axis, dtype=dtype,
2703-
skipna=skipna, keepdims=True)
2704-
rcount = mu.count_reduce_items(arr, axis=axis,
2705-
skipna=skipna, keepdims=True)
2706-
if isinstance(arrmean, mu.ndarray):
2707-
um.true_divide(arrmean, rcount, out=arrmean, casting='unsafe')
2708-
else:
2709-
arrmean = arrmean / float(rcount)
2710-
2711-
# arr - arrmean
2712-
x = arr - arrmean
2713-
2714-
# (arr - arrmean) ** 2
2715-
if arr.dtype.kind == 'c':
2716-
um.multiply(x, um.conjugate(x), out=x)
2717-
x = x.real
2718-
else:
2719-
um.multiply(x, x, out=x)
2720-
2721-
# add.reduce((arr - arrmean) ** 2, axis)
2722-
ret = um.add.reduce(x, axis=axis, dtype=dtype, out=out,
2677+
return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
27232678
skipna=skipna, keepdims=keepdims)
27242679

2725-
# add.reduce((arr - arrmean) ** 2, axis) / (n - ddof)
2726-
if not keepdims and isinstance(rcount, mu.ndarray):
2727-
rcount = rcount.squeeze(axis=axis)
2728-
rcount -= ddof
2729-
if isinstance(ret, mu.ndarray):
2730-
um.true_divide(ret, rcount, out=ret, casting='unsafe')
2731-
else:
2732-
ret = ret / float(rcount)
2733-
2734-
return ret

0 commit comments

Comments
 (0)
0