8000 Adding np.nanmean(), nanstd(), and nanvar() by WeatherGod · Pull Request #3297 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

Adding np.nanmean(), nanstd(), and nanvar() #3297

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Updated comments and dtype tests in _methods.py
  • Loading branch information
WeatherGod committed May 31, 2013
commit 5be45b280b258e158b93163b937f8f9c08d30393
29 changes: 19 additions & 10 deletions numpy/core/_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

from numpy.core import multiarray as mu
from numpy.core import umath as um
from numpy.core.numeric import array, asanyarray, isnan
from numpy.core.numeric import array, asanyarray, isnan, issubdtype
from numpy.core import numerictypes as nt

def _amax(a, axis=None, out=None, keepdims=False):
return um.maximum.reduce(a, axis=axis,
Expand Down Expand Up @@ -46,8 +47,9 @@ def _count_reduce_items(arr, axis):
def _mean(a, axis=None, dtype=None, out=None, keepdims=False):
arr = asanyarray(a)

# Upgrade bool, unsigned int, and int to float64
if dtype is None and arr.dtype.kind in ['b','u','i']:
# Cast bool, unsigned int, and int to float64
if dtype is None and (issubdtype(arr.dtype, nt.integer) or
issubdtype(arr.dtype, nt.bool_)):
ret = um.add.reduce(arr, axis=axis, dtype='f8',
out=out, keepdims=keepdims)
else:
Expand All @@ -62,11 +64,14 @@ def _mean(a, axis=None, dtype=None, out=None, keepdims=False):
return ret

def _nanmean(a, axis=None, dtype=None, out=None, keepdims=False):
# Using array() instead of asanyarray() because the former always
# makes a copy, which is important due to the copyto() action later
arr = array(a, subok=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was going to say that it might be better a baseclass + wrap at the end (for matrix support, but matrix support is bad anyway...), but then the non-nan code does the same. Which makes me wonder, would it be sensible to just create a where= kwarg instead making the nan-funcs just tiny wrappers? Of course I could dream about having where for usual ufunc.reduce, but I think it probably would require larger additions to the nditer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think @mwiebe did something along those lines at one point with the NA work, but it got pulled out. I seriously want a where= kwarg in the ufunc architecture so that I can "fix" masked arrays making a copy of itself whenever one does a min or a max.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC, there is a where= in ufunc.call now, but Mark didn't get around
to implementing it for ufunc.reduce. It would be great to have, definitely.

On Thu, May 2, 2013 at 9:09 AM, Benjamin Root notifications@github.comwrote:

In numpy/core/_methods.py:

@@ -61,6 +61,26 @@ def _mean(a, axis=None, dtype=None, out=None, keepdims=False):
ret = ret / float(rcount)
return ret

+def _nanmean(a, axis=None, dtype=None, out=None, keepdims=False):

  • arr = array(a, subok=True)

I think @mwiebe https://github.com/mwiebe did something along those
lines at one point with the NA work, but it got pulled out. I seriously
want a where= kwarg in the ufunc architecture so that I can "fix" masked
arrays making a copy of itself whenever one does a min or a max.


Reply to this email directly or view it on GitHubhttps://github.com//pull/3297/files#r4054067
.

mask = isnan(arr)

# Upgrade bool, unsigned int, and int to float64
if dtype is None and arr.dtype.kind in ['b','u','i']:
# Cast bool, unsigned int, and int to float64
if dtype is None and (issubdtype(arr.dtype, nt.integer) or
issubdtype(arr.dtype, nt.bool_)):
ret = um.add.reduce(arr, axis=axis, dtype='f8',
out=out, keepdims=keepdims)
else:
Expand All @@ -86,7 +91,8 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0,
arr = asanyarray(a)

# First compute the mean, saving 'rcount' for reuse later
if dtype is None and arr.dtype.kind in ['b','u','i']:
if dtype is None and (issubdtype(arr.dtype, nt.integer) or
issubdtype(arr.dtype, nt.bool_)):
arrmean = um.add.reduce(arr, axis=axis, dtype='f8', keepdims=True)
else:
arrmean = um.add.reduce(arr, axis=axis, dtype=dtype, keepdims=True)
Expand All @@ -101,7 +107,7 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0,
x = arr - arrmean

# (arr - arrmean) ** 2
if arr.dtype.kind == 'c':
if issubdtype(arr.dtype, nt.complex_):
x = um.multiply(x, um.conjugate(x), out=x).real
else:
x = um.multiply(x, x, out=x)
Expand All @@ -123,11 +129,14 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0,

def _nanvar(a, axis=None, dtype=None, out=None, ddof=0,
keepdims=False):
# Using array() instead of asanyarray() because the former always
# makes a copy, which is important due to the copyto() action later
arr = array(a, subok=True)
mask = isnan(arr)

# First compute the mean, saving 'rcount' for reuse later
if dtype is None and arr.dtype.kind in ['b','u','i']:
if dtype is None and (issubdtype(arr.dtype, nt.integer) or
issubdtype(arr.dtype, nt.bool_)):
arrmean = um.add.reduce(arr, axis=axis, dtype='f8', keepdims=True)
else:
mu.copyto(arr, 0.0, where=mask)
Expand All @@ -142,10 +151,10 @@ def _nanvar(a, axis=None, dtype=None, out=None, ddof=0,

# arr - arrmean
x = arr - arrmean
x[mask] = 0.0
mu.copyto(x, 0.0, where=mask)

# (arr - arrmean) ** 2
if arr.dtype.kind == 'c':
if issubdtype(arr.dtype, nt.complex_):
x = um.multiply(x, um.conjugate(x), out=x).real
else:
x = um.multiply(x, x, out=x)
Expand Down
26 changes: 26 additions & 0 deletions numpy/core/tests/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -1334,6 +1334,19 @@ def setUp(self):
def test_basic(self):
assert_almost_equal(nanmean(self.A),self.real_mean)

def test_mutation(self):
# Because of the "messing around" we do to replace NaNs with zeros
# this is meant to ensure we don't actually replace the NaNs in the
# actual array.
a_copy = self.A.copy()
b_copy = self.B.copy()
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings('always', '', RuntimeWarning)
a_ret = nanmean(self.A)
assert_equal(self.A, a_copy)
b_ret = nanmean(self.B)
assert_equal(self.B, b_copy)

def test_allnans(self):
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings('always', '', RuntimeWarning)
Expand Down Expand Up @@ -1377,6 +1390,19 @@ def test_basic(self):
assert_almost_equal(nanvar(self.A),self.real_var)
assert_almost_equal(nanstd(self.A)**2,self.real_var)

def test_mutation(self):
# Because of the "messing around" we do to replace NaNs with zeros
# this is meant to ensure we don't actually replace the NaNs in the
# actual array.
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings('always', '', RuntimeWarning)
a_copy = self.A.copy()
b_copy = self.B.copy()
a_ret = nanvar(self.A)
assert_equal(self.A, a_copy)
b_ret = nanstd(self.B)
assert_equal(self.B, b_copy)

def test_ddof1(self):
assert_almost_equal(nanvar(self.A,ddof=1),
self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-1))
Expand Down
0