From 79f91c335425e04032776843e8e7f588ef0f0f85 Mon Sep 17 00:00:00 2001 From: Benjamin Root Date: Thu, 2 May 2013 00:50:39 -0400 Subject: [PATCH 1/6] Adding np.nanmean(), np.nanstd(), np.nanvar() --- numpy/core/_methods.py | 79 +++++++++++- numpy/core/fromnumeric.py | 263 +++++++++++++++++++++++++++++++++++++- 2 files changed, 340 insertions(+), 2 deletions(-) diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py index 66a05e86814c..297d708a85c7 100644 --- a/numpy/core/_methods.py +++ b/numpy/core/_methods.py @@ -7,7 +7,7 @@ from numpy.core import multiarray as mu from numpy.core import umath as um -from numpy.core.numeric import asanyarray +from numpy.core.numeric import array, asanyarray, isnan def _amax(a, axis=None, out=None, keepdims=False): return um.maximum.reduce(a, axis=axis, @@ -61,6 +61,26 @@ def _mean(a, axis=None, dtype=None, out=None, keepdims=False): ret = ret / float(rcount) return ret +def _nanmean(a, axis=None, dtype=None, out=None, keepdims=False): + arr = array(a, subok=True) + mask = isnan(arr) + + # Upgrade bool, unsigned int, and int to float64 + if dtype is None and arr.dtype.kind in ['b','u','i']: + ret = um.add.reduce(arr, axis=axis, dtype='f8', + out=out, keepdims=keepdims) + else: + mu.copyto(arr, 0.0, where=mask) + ret = um.add.reduce(arr, axis=axis, dtype=dtype, + out=out, keepdims=keepdims) + rcount = (~mask).sum(axis=axis) + if isinstance(ret, mu.ndarray): + ret = um.true_divide(ret, rcount, + out=ret, casting='unsafe', subok=False) + else: + ret = ret / float(rcount) + return ret + def _var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): arr = asanyarray(a) @@ -101,6 +121,52 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, return ret +def _nanvar(a, axis=None, dtype=None, out=None, ddof=0, + keepdims=False): + arr = array(a, subok=True) + mask = isnan(arr) + + # First compute the mean, saving 'rcount' for reuse later + if dtype is None and arr.dtype.kind in ['b','u','i']: + arrmean = um.add.reduce(arr, axis=axis, dtype='f8', keepdims=True) + else: + mu.copyto(arr, 0.0, where=mask) + arrmean = um.add.reduce(arr, axis=axis, dtype=dtype, + keepdims=True) + rcount = (~mask).sum(axis=axis, keepdims=True) + if isinstance(arrmean, mu.ndarray): + arrmean = um.true_divide(arrmean, rcount, + out=arrmean, casting='unsafe', subok=False) + else: + arrmean = arrmean / float(rcount) + + # arr - arrmean + x = arr - arrmean + x[mask] = 0.0 + + # (arr - arrmean) ** 2 + if arr.dtype.kind == 'c': + x = um.multiply(x, um.conjugate(x), out=x).real + else: + x = um.multiply(x, x, out=x) + + # add.reduce((arr - arrmean) ** 2, axis) + ret = um.add.reduce(x, axis=axis, dtype=dtype, out=out, + keepdims=keepdims) + + # add.reduce((arr - arrmean) ** 2, axis) / (n - ddof) + if not keepdims and isinstance(rcount, mu.ndarray): + rcount = rcount.squeeze(axis=axis) + rcount -= ddof + if isinstance(ret, mu.ndarray): + ret = um.true_divide(ret, rcount, + out=ret, casting='unsafe', subok=False) + else: + ret = ret / float(rcount) + + return ret + + def _std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) @@ -111,3 +177,14 @@ def _std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): ret = um.sqrt(ret) return ret + +def _nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): + ret = _nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) + + if isinstance(ret, mu.ndarray): + ret = um.sqrt(ret, out=ret) + else: + ret = um.sqrt(ret) + + return ret diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index 5735b6124c55..c65139fdde6c 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -21,7 +21,8 @@ 'resize', 'diagonal', 'trace', 'ravel', 'nonzero', 'shape', 'compress', 'clip', 'sum', 'product', 'prod', 'sometrue', 'alltrue', 'any', 'all', 'cumsum', 'cumproduct', 'cumprod', 'ptp', 'ndim', - 'rank', 'size', 'around', 'round_', 'mean', 'std', 'var', 'squeeze', + 'rank', 'size', 'around', 'round_', 'mean', 'nanmean', + 'std', 'nanstd', 'var', 'nanvar', 'squeeze', 'amax', 'amin', ] @@ -2517,6 +2518,8 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=False): See Also -------- average : Weighted average + nanmean : Arithmetic mean while ignoring NaNs + var, nanvar Notes ----- @@ -2563,6 +2566,80 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=False): return _methods._mean(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) +def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): + """ + Compute the arithmetic mean along the specified axis, ignoring NaNs. + + Returns the average of the array elements. The average is taken over + the flattened array by default, otherwise over the specified axis. + `float64` intermediate and return values are used for integer inputs. + + Parameters + ---------- + a : array_like + Array containing numbers whose mean is desired. If `a` is not an + array, a conversion is attempted. + axis : int, optional + Axis along which the means are computed. The default is to compute + the mean of the flattened array. + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default + is `float64`; for floating point inputs, it is the same as the + input dtype. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output, but the type will be cast if necessary. + See `doc.ufuncs` for details. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + Returns + ------- + m : ndarray, see dtype parameter above + If `out=None`, returns a new array containing the mean values, + otherwise a reference to the output array is returned. + + See Also + -------- + average : Weighted average + mean : Arithmetic mean taken while not ignoring NaNs + var, nanvar + + Notes + ----- + The arithmetic mean is the sum of the non-nan elements along the axis + divided by the number of non-nan elements. + + Note that for floating-point input, the mean is computed using the + same precision the input has. Depending on the input data, this can + cause the results to be inaccurate, especially for `float32`. + Specifying a higher-precision accumulator using the `dtype` keyword + can alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.nanmean(a) + 2.6666666666666665 + >>> np.nanmean(a, axis=0) + array([ 2., 4.]) + >>> np.nanmean(a, axis=1) + array([ 1., 3.5]) + + """ + if not (type(a) is mu.ndarray): + try: + mean = a.nanmean + return mean(axis=axis, dtype=dtype, out=out) + except AttributeError: + pass + + return _methods._nanmean(a, axis=axis, dtype=dtype, + out=out, keepdims=keepdims) + def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ @@ -2605,6 +2682,7 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): See Also -------- var, mean + nanmean, nanstd numpy.doc.ufuncs : Section "Output arguments" Notes @@ -2665,6 +2743,97 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): return _methods._std(a, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) +def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): + """ + Compute the standard deviation along the specified axis, while + ignoring NaNs. + + Returns the standard deviation, a measure of the spread of a distribution, + of the non-NaN array elements. The standard deviation is computed for the + flattened array by default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Calculate the standard deviation of the non-NaN values. + axis : int, optional + Axis along which the standard deviation is computed. The default is + to compute the standard deviation of the flattened array. + dtype : dtype, optional + Type to use in computing the standard deviation. For arrays of + integer type the default is float64, for arrays of float types it is + the same as the array type. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output but the type (of the calculated + values) will be cast if necessary. + ddof : int, optional + Means Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + By default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + Returns + ------- + standard_deviation : ndarray, see dtype parameter above. + If `out` is None, return a new array containing the standard deviation, + otherwise return a reference to the output array. + + See Also + -------- + var, mean, std + nanvar, nanmean + numpy.doc.ufuncs : Section "Output arguments" + + Notes + ----- + The standard deviation is the square root of the average of the squared + deviations from the mean, i.e., ``std = sqrt(mean(abs(x - x.mean())**2))``. + + The average squared deviation is normally calculated as + ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is specified, + the divisor ``N - ddof`` is used instead. In standard statistical + practice, ``ddof=1`` provides an unbiased estimator of the variance + of the infinite population. ``ddof=0`` provides a maximum likelihood + estimate of the variance for normally distributed variables. The + standard deviation computed in this function is the square root of + the estimated variance, so even with ``ddof=1``, it will not be an + unbiased estimate of the standard deviation per se. + + Note that, for complex numbers, `std` takes the absolute + value before squaring, so that the result is always real and nonnegative. + + For floating-point input, the *std* is computed using the same + precision the input has. Depending on the input data, this can cause + the results to be inaccurate, especially for float32 (see example below). + Specifying a higher-accuracy accumulator using the `dtype` keyword can + alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.nanstd(a) + 1.247219128924647 + >>> np.nanstd(a, axis=0) + array([ 1., 0.]) + >>> np.nanstd(a, axis=1) + array([ 0., 0.5]) + + """ + + if not (type(a) is mu.ndarray): + try: + nanstd = a.nanstd + return nanstd(axis=axis, dtype=dtype, out=out, ddof=ddof) + except AttributeError: + pass + + return _methods._nanstd(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) + def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ @@ -2767,3 +2936,95 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) + + +def nanvar(a, axis=None, dtype=None, out=None, ddof=0, + keepdims=False): + """ + Compute the variance along the specified axis, while ignoring NaNs. + + Returns the variance of the array elements, a measure of the spread of a + distribution. The variance is computed for the flattened array by + default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Array containing numbers whose variance is desired. If `a` is not an + array, a conversion is attempted. + axis : int, optional + Axis along which the variance is computed. The default is to compute + the variance of the flattened array. + dtype : data-type, optional + Type to use in computing the variance. For arrays of integer type + the default is `float32`; for arrays of float types it is the same as + the array type. + out : ndarray, optional + Alternate output array in which to place the result. It must have + the same shape as the expected output, but the type is cast if + necessary. + ddof : int, optional + "Delta Degrees of Freedom": the divisor used in the calculation is + ``N - ddof``, where ``N`` represents the number of elements. By + default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + Returns + ------- + variance : ndarray, see dtype parameter above + If ``out=None``, returns a new array containing the variance; + otherwise, a reference to the output array is returned. + + See Also + -------- + std : Standard deviation + mean : Average + var : Variance while not ignoring NaNs + nanstd, nanmean + numpy.doc.ufuncs : Section "Output arguments" + + Notes + ----- + The variance is the average of the squared deviations from the mean, + i.e., ``var = mean(abs(x - x.mean())**2)``. + + The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``. + If, however, `ddof` is specified, the divisor ``N - ddof`` is used + instead. In standard statistical practice, ``ddof=1`` provides an + unbiased estimator of the variance of a hypothetical infinite population. + ``ddof=0`` provides a maximum likelihood estimate of the variance for + normally distributed variables. + + Note that for complex numbers, the absolute value is taken before + squaring, so that the result is always real and nonnegative. + + For floating-point input, the variance is computed using the same + precision the input has. Depending on the input data, this can cause + the results to be inaccurate, especially for `float32` (see example + below). Specifying a higher-accuracy accumulator using the ``dtype`` + keyword can alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.var(a) + 1.5555555555555554 + >>> np.nanvar(a, axis=0) + array([ 1., 0.]) + >>> np.nanvar(a, axis=1) + array([ 0., 0.25]) + + """ + + if not (type(a) is mu.ndarray): + try: + nanvar = a.nanvar + return nanvar(axis=axis, dtype=dtype, out=out, ddof=ddof) + except AttributeError: + pass + + return _methods._nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) From 64d2b20d61d9be6acfe253d268a819965434aad3 Mon Sep 17 00:00:00 2001 From: Benjamin Root Date: Wed, 15 May 2013 22:09:23 -0400 Subject: [PATCH 2/6] Added tests for nanmean(), nanvar(), nanstd() --- numpy/core/tests/test_numeric.py | 65 ++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index ed4e0b79eb94..9b4128bbe371 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -177,18 +177,36 @@ def test_mean(self): assert_(all(mean(A,0) == array([2.5,3.5,4.5]))) assert_(all(mean(A,1) == array([2.,5.]))) + def test_nanmean(self): + A = [[1, nan, nan], [nan, 4, 5]] + assert_(nanmean(A) == (10.0 / 3)) + assert_(all(nanmean(A,0) == array([1, 4, 5]))) + assert_(all(nanmean(A,1) == array([1, 4.5]))) + def test_std(self): A = [[1,2,3],[4,5,6]] assert_almost_equal(std(A), 1.707825127659933) assert_almost_equal(std(A,0), array([1.5, 1.5, 1.5])) assert_almost_equal(std(A,1), array([0.81649658, 0.81649658])) + def test_nanstd(self): + A = [[1, nan, nan], [nan, 4, 5]] + assert_almost_equal(nanstd(A), 1.699673171197595) + assert_almost_equal(nanstd(A,0), array([0.0, 0.0, 0.0])) + assert_almost_equal(nanstd(A,1), array([0.0, 0.5])) + def test_var(self): A = [[1,2,3],[4,5,6]] assert_almost_equal(var(A), 2.9166666666666665) assert_almost_equal(var(A,0), array([2.25, 2.25, 2.25])) assert_almost_equal(var(A,1), array([0.66666667, 0.66666667])) + def test_nanvar(self): + A = [[1, nan, nan], [nan, 4, 5]] + assert_almost_equal(nanvar(A), 2.88888888889) + assert_almost_equal(nanvar(A,0), array([0.0, 0.0, 0.0])) + assert_almost_equal(nanvar(A,1), array([0.0, 0.25])) + class TestBoolScalar(TestCase): def test_logical(self): @@ -1367,6 +1385,23 @@ def test_no_parameter_modification(self): assert_array_equal(x, array([inf, 1])) assert_array_equal(y, array([0, inf])) +class TestNaNMean(TestCase): + def setUp(self): + self.A = array([1,nan,-1,nan,nan,1,-1]) + self.B = array([nan, nan, nan, nan]) + self.real_mean = 0 + + def test_basic(self): + assert_almost_equal(nanmean(self.A),self.real_mean) + + def test_allnans(self): + assert_(isnan(nanmean(self.B))) + + def test_empty(self): + assert_(isnan(nanmean(array([])))) + + + class TestStdVar(TestCase): def setUp(self): @@ -1389,6 +1424,36 @@ def test_ddof2(self): assert_almost_equal(std(self.A,ddof=2)**2, self.real_var*len(self.A)/float(len(self.A)-2)) +class TestNaNStdVar(TestCase): + def setUp(self): + self.A = array([nan,1,-1,nan,1,nan,-1]) + self.B = array([nan, nan, nan, nan]) + self.real_var = 1 + + def test_basic(self): + assert_almost_equal(nanvar(self.A),self.real_var) + assert_almost_equal(nanstd(self.A)**2,self.real_var) + + def test_ddof1(self): + assert_almost_equal(nanvar(self.A,ddof=1), + self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-1)) + assert_almost_equal(nanstd(self.A,ddof=1)**2, + self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-1)) + + def test_ddof2(self): + assert_almost_equal(nanvar(self.A,ddof=2), + self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-2)) + assert_almost_equal(nanstd(self.A,ddof=2)**2, + self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-2)) + + def test_allnans(self): + assert_(isnan(nanvar(self.B))) + assert_(isnan(nanstd(self.B))) + + def test_empty(self): + assert_(isnan(nanvar(array([])))) + assert_(isnan(nanstd(array([])))) + class TestStdVarComplex(TestCase): def test_basic(self): From f58f89433656405fbd0a4298b48b4c8a32ff4df1 Mon Sep 17 00:00:00 2001 From: Benjamin Root Date: Sun, 19 May 2013 14:27:13 -0400 Subject: [PATCH 3/6] Tests now checks the warning state --- numpy/core/tests/test_numeric.py | 44 ++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 9b4128bbe371..5385c8bb1a74 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -3,6 +3,7 @@ import sys import platform from decimal import Decimal +import warnings import numpy as np from numpy.core import * @@ -177,6 +178,11 @@ def test_mean(self): assert_(all(mean(A,0) == array([2.5,3.5,4.5]))) assert_(all(mean(A,1) == array([2.,5.]))) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(isnan(mean([]))) + assert_(w[0].category is RuntimeWarning) + def test_nanmean(self): A = [[1, nan, nan], [nan, 4, 5]] assert_(nanmean(A) == (10.0 / 3)) @@ -189,6 +195,11 @@ def test_std(self): assert_almost_equal(std(A,0), array([1.5, 1.5, 1.5])) assert_almost_equal(std(A,1), array([0.81649658, 0.81649658])) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(isnan(std([]))) + assert_(w[0].category is RuntimeWarning) + def test_nanstd(self): A = [[1, nan, nan], [nan, 4, 5]] assert_almost_equal(nanstd(A), 1.699673171197595) @@ -201,6 +212,11 @@ def test_var(self): assert_almost_equal(var(A,0), array([2.25, 2.25, 2.25])) assert_almost_equal(var(A,1), array([0.66666667, 0.66666667])) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(isnan(mean([]))) + assert_(w[0].category is RuntimeWarning) + def test_nanvar(self): A = [[1, nan, nan], [nan, 4, 5]] assert_almost_equal(nanvar(A), 2.88888888889) @@ -1395,13 +1411,16 @@ def test_basic(self): assert_almost_equal(nanmean(self.A),self.real_mean) def test_allnans(self): - assert_(isnan(nanmean(self.B))) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(isnan(nanmean(self.B))) + assert_(w[0].category is RuntimeWarning) def test_empty(self): - assert_(isnan(nanmean(array([])))) - - - + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(isnan(nanmean(array([])))) + assert_(w[0].category is RuntimeWarning) class TestStdVar(TestCase): def setUp(self): @@ -1447,13 +1466,18 @@ def test_ddof2(self): self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-2)) def test_allnans(self): - assert_(isnan(nanvar(self.B))) - assert_(isnan(nanstd(self.B))) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(isnan(nanvar(self.B))) + assert_(isnan(nanstd(self.B))) + assert_(w[0].category is RuntimeWarning) def test_empty(self): - assert_(isnan(nanvar(array([])))) - assert_(isnan(nanstd(array([])))) - + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(isnan(nanvar(array([])))) + assert_(isnan(nanstd(array([])))) + assert_(w[0].category is RuntimeWarning) class TestStdVarComplex(TestCase): def test_basic(self): From 7c67eee1ff6dd5120cbead521689a7f92c01d604 Mon Sep 17 00:00:00 2001 From: Benjamin Root Date: Thu, 30 May 2013 22:38:56 -0400 Subject: [PATCH 4/6] Updated comments and dtype tests in _methods.py --- numpy/core/_methods.py | 29 +++++++++++++++++++---------- numpy/core/tests/test_numeric.py | 26 ++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py index 297d708a85c7..c317358e1aa2 100644 --- a/numpy/core/_methods.py +++ b/numpy/core/_methods.py @@ -7,7 +7,8 @@ from numpy.core import multiarray as mu from numpy.core import umath as um -from numpy.core.numeric import array, asanyarray, isnan +from numpy.core.numeric import array, asanyarray, isnan, issubdtype +from numpy.core import numerictypes as nt def _amax(a, axis=None, out=None, keepdims=False): return um.maximum.reduce(a, axis=axis, @@ -46,8 +47,9 @@ def _count_reduce_items(arr, axis): def _mean(a, axis=None, dtype=None, out=None, keepdims=False): arr = asanyarray(a) - # Upgrade bool, unsigned int, and int to float64 - if dtype is None and arr.dtype.kind in ['b','u','i']: + # Cast bool, unsigned int, and int to float64 + if dtype is None and (issubdtype(arr.dtype, nt.integer) or + issubdtype(arr.dtype, nt.bool_)): ret = um.add.reduce(arr, axis=axis, dtype='f8', out=out, keepdims=keepdims) else: @@ -62,11 +64,14 @@ def _mean(a, axis=None, dtype=None, out=None, keepdims=False): return ret def _nanmean(a, axis=None, dtype=None, out=None, keepdims=False): + # Using array() instead of asanyarray() because the former always + # makes a copy, which is important due to the copyto() action later arr = array(a, subok=True) mask = isnan(arr) - # Upgrade bool, unsigned int, and int to float64 - if dtype is None and arr.dtype.kind in ['b','u','i']: + # Cast bool, unsigned int, and int to float64 + if dtype is None and (issubdtype(arr.dtype, nt.integer) or + issubdtype(arr.dtype, nt.bool_)): ret = um.add.reduce(arr, axis=axis, dtype='f8', out=out, keepdims=keepdims) else: @@ -86,7 +91,8 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, arr = asanyarray(a) # First compute the mean, saving 'rcount' for reuse later - if dtype is None and arr.dtype.kind in ['b','u','i']: + if dtype is None and (issubdtype(arr.dtype, nt.integer) or + issubdtype(arr.dtype, nt.bool_)): arrmean = um.add.reduce(arr, axis=axis, dtype='f8', keepdims=True) else: arrmean = um.add.reduce(arr, axis=axis, dtype=dtype, keepdims=True) @@ -101,7 +107,7 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, x = arr - arrmean # (arr - arrmean) ** 2 - if arr.dtype.kind == 'c': + if issubdtype(arr.dtype, nt.complex_): x = um.multiply(x, um.conjugate(x), out=x).real else: x = um.multiply(x, x, out=x) @@ -123,11 +129,14 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, def _nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): + # Using array() instead of asanyarray() because the former always + # makes a copy, which is important due to the copyto() action later arr = array(a, subok=True) mask = isnan(arr) # First compute the mean, saving 'rcount' for reuse later - if dtype is None and arr.dtype.kind in ['b','u','i']: + if dtype is None and (issubdtype(arr.dtype, nt.integer) or + issubdtype(arr.dtype, nt.bool_)): arrmean = um.add.reduce(arr, axis=axis, dtype='f8', keepdims=True) else: mu.copyto(arr, 0.0, where=mask) @@ -142,10 +151,10 @@ def _nanvar(a, axis=None, dtype=None, out=None, ddof=0, # arr - arrmean x = arr - arrmean - x[mask] = 0.0 + mu.copyto(x, 0.0, where=mask) # (arr - arrmean) ** 2 - if arr.dtype.kind == 'c': + if issubdtype(arr.dtype, nt.complex_): x = um.multiply(x, um.conjugate(x), out=x).real else: x = um.multiply(x, x, out=x) diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 5385c8bb1a74..c2183276d88d 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -1410,6 +1410,19 @@ def setUp(self): def test_basic(self): assert_almost_equal(nanmean(self.A),self.real_mean) + def test_mutation(self): + # Because of the "messing around" we do to replace NaNs with zeros + # this is meant to ensure we don't actually replace the NaNs in the + # actual array. + a_copy = self.A.copy() + b_copy = self.B.copy() + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + a_ret = nanmean(self.A) + assert_equal(self.A, a_copy) + b_ret = nanmean(self.B) + assert_equal(self.B, b_copy) + def test_allnans(self): with warnings.catch_warnings(record=True) as w: warnings.filterwarnings('always', '', RuntimeWarning) @@ -1453,6 +1466,19 @@ def test_basic(self): assert_almost_equal(nanvar(self.A),self.real_var) assert_almost_equal(nanstd(self.A)**2,self.real_var) + def test_mutation(self): + # Because of the "messing around" we do to replace NaNs with zeros + # this is meant to ensure we don't actually replace the NaNs in the + # actual array. + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + a_copy = self.A.copy() + b_copy = self.B.copy() + a_ret = nanvar(self.A) + assert_equal(self.A, a_copy) + b_ret = nanstd(self.B) + assert_equal(self.B, b_copy) + def test_ddof1(self): assert_almost_equal(nanvar(self.A,ddof=1), self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-1)) From 8f2cbf7005d294659a99cd6da2bbe234ea1e98a1 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Thu, 20 Jun 2013 20:44:54 -0600 Subject: [PATCH 5/6] MAINT: Separate nan functions into their own module. New files lib/nanfunctions.py and lib/tests/test_nanfunctions.py are added and both the previous and new nan functions and tests are moved into them. The existing nan functions moved from lib/function_base are: nansum, nanmin, nanmax, nanargmin, nanargmax The added nan functions moved from core/numeric are: nanmean, nanvar, nanstd --- numpy/core/_methods.py | 84 +--- numpy/core/fromnumeric.py | 274 +---------- numpy/core/tests/test_numeric.py | 101 ---- numpy/lib/__init__.py | 5 +- numpy/lib/function_base.py | 342 +------------ numpy/lib/nanfunctions.py | 678 ++++++++++++++++++++++++++ numpy/lib/tests/test_function_base.py | 133 +---- numpy/lib/tests/test_nanfunctions.py | 240 +++++++++ 8 files changed, 943 insertions(+), 914 deletions(-) create mode 100644 numpy/lib/nanfunctions.py create mode 100644 numpy/lib/tests/test_nanfunctions.py diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py index c317358e1aa2..51731b9c221d 100644 --- a/numpy/core/_methods.py +++ b/numpy/core/_methods.py @@ -7,7 +7,7 @@ from numpy.core import multiarray as mu from numpy.core import umath as um -from numpy.core.numeric import array, asanyarray, isnan, issubdtype +from numpy.core.numeric import asanyarray, isnan, issubdtype from numpy.core import numerictypes as nt def _amax(a, axis=None, out=None, keepdims=False): @@ -63,29 +63,6 @@ def _mean(a, axis=None, dtype=None, out=None, keepdims=False): ret = ret / float(rcount) return ret -def _nanmean(a, axis=None, dtype=None, out=None, keepdims=False): - # Using array() instead of asanyarray() because the former always - # makes a copy, which is important due to the copyto() action later - arr = array(a, subok=True) - mask = isnan(arr) - - # Cast bool, unsigned int, and int to float64 - if dtype is None and (issubdtype(arr.dtype, nt.integer) or - issubdtype(arr.dtype, nt.bool_)): - ret = um.add.reduce(arr, axis=axis, dtype='f8', - out=out, keepdims=keepdims) - else: - mu.copyto(arr, 0.0, where=mask) - ret = um.add.reduce(arr, axis=axis, dtype=dtype, - out=out, keepdims=keepdims) - rcount = (~mask).sum(axis=axis) - if isinstance(ret, mu.ndarray): - ret = um.true_divide(ret, rcount, - out=ret, casting='unsafe', subok=False) - else: - ret = ret / float(rcount) - return ret - def _var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): arr = asanyarray(a) @@ -127,55 +104,6 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, return ret -def _nanvar(a, axis=None, dtype=None, out=None, ddof=0, - keepdims=False): - # Using array() instead of asanyarray() because the former always - # makes a copy, which is important due to the copyto() action later - arr = array(a, subok=True) - mask = isnan(arr) - - # First compute the mean, saving 'rcount' for reuse later - if dtype is None and (issubdtype(arr.dtype, nt.integer) or - issubdtype(arr.dtype, nt.bool_)): - arrmean = um.add.reduce(arr, axis=axis, dtype='f8', keepdims=True) - else: - mu.copyto(arr, 0.0, where=mask) - arrmean = um.add.reduce(arr, axis=axis, dtype=dtype, - keepdims=True) - rcount = (~mask).sum(axis=axis, keepdims=True) - if isinstance(arrmean, mu.ndarray): - arrmean = um.true_divide(arrmean, rcount, - out=arrmean, casting='unsafe', subok=False) - else: - arrmean = arrmean / float(rcount) - - # arr - arrmean - x = arr - arrmean - mu.copyto(x, 0.0, where=mask) - - # (arr - arrmean) ** 2 - if issubdtype(arr.dtype, nt.complex_): - x = um.multiply(x, um.conjugate(x), out=x).real - else: - x = um.multiply(x, x, out=x) - - # add.reduce((arr - arrmean) ** 2, axis) - ret = um.add.reduce(x, axis=axis, dtype=dtype, out=out, - keepdims=keepdims) - - # add.reduce((arr - arrmean) ** 2, axis) / (n - ddof) - if not keepdims and isinstance(rcount, mu.ndarray): - rcount = rcount.squeeze(axis=axis) - rcount -= ddof - if isinstance(ret, mu.ndarray): - ret = um.true_divide(ret, rcount, - out=ret, casting='unsafe', subok=False) - else: - ret = ret / float(rcount) - - return ret - - def _std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) @@ -187,13 +115,3 @@ def _std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): return ret -def _nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): - ret = _nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, - keepdims=keepdims) - - if isinstance(ret, mu.ndarray): - ret = um.sqrt(ret, out=ret) - else: - ret = um.sqrt(ret) - - return ret diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index c65139fdde6c..c4c7d0e7dcf8 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -15,16 +15,13 @@ # functions that are now methods -__all__ = ['take', 'reshape', 'choose', 'repeat', 'put', - 'swapaxes', 'transpose', 'sort', 'argsort', 'argmax', 'argmin', - 'searchsorted', 'alen', - 'resize', 'diagonal', 'trace', 'ravel', 'nonzero', 'shape', - 'compress', 'clip', 'sum', 'product', 'prod', 'sometrue', 'alltrue', - 'any', 'all', 'cumsum', 'cumproduct', 'cumprod', 'ptp', 'ndim', - 'rank', 'size', 'around', 'round_', 'mean', 'nanmean', - 'std', 'nanstd', 'var', 'nanvar', 'squeeze', - 'amax', 'amin', - ] +__all__ = [ + 'take', 'reshape', 'choose', 'repeat', 'put', 'swapaxes', 'transpose', + 'sort', 'argsort', 'argmax', 'argmin', 'searchsorted', 'alen', + 'resize', 'diagonal', 'trace', 'ravel', 'nonzero', 'shape', 'compress', + 'clip', 'sum', 'product', 'prod', 'sometrue', 'alltrue', 'any', 'all', + 'cumsum', 'cumproduct', 'cumprod', 'ptp', 'ndim', 'rank', 'size', + 'around', 'round_', 'mean', 'std', 'var', 'squeeze', 'amax', 'amin', ] try: @@ -2566,81 +2563,6 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=False): return _methods._mean(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) -def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): - """ - Compute the arithmetic mean along the specified axis, ignoring NaNs. - - Returns the average of the array elements. The average is taken over - the flattened array by default, otherwise over the specified axis. - `float64` intermediate and return values are used for integer inputs. - - Parameters - ---------- - a : array_like - Array containing numbers whose mean is desired. If `a` is not an - array, a conversion is attempted. - axis : int, optional - Axis along which the means are computed. The default is to compute - the mean of the flattened array. - dtype : data-type, optional - Type to use in computing the mean. For integer inputs, the default - is `float64`; for floating point inputs, it is the same as the - input dtype. - out : ndarray, optional - Alternate output array in which to place the result. The default - is ``None``; if provided, it must have the same shape as the - expected output, but the type will be cast if necessary. - See `doc.ufuncs` for details. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. - - Returns - ------- - m : ndarray, see dtype parameter above - If `out=None`, returns a new array containing the mean values, - otherwise a reference to the output array is returned. - - See Also - -------- - average : Weighted average - mean : Arithmetic mean taken while not ignoring NaNs - var, nanvar - - Notes - ----- - The arithmetic mean is the sum of the non-nan elements along the axis - divided by the number of non-nan elements. - - Note that for floating-point input, the mean is computed using the - same precision the input has. Depending on the input data, this can - cause the results to be inaccurate, especially for `float32`. - Specifying a higher-precision accumulator using the `dtype` keyword - can alleviate this issue. - - Examples - -------- - >>> a = np.array([[1, np.nan], [3, 4]]) - >>> np.nanmean(a) - 2.6666666666666665 - >>> np.nanmean(a, axis=0) - array([ 2., 4.]) - >>> np.nanmean(a, axis=1) - array([ 1., 3.5]) - - """ - if not (type(a) is mu.ndarray): - try: - mean = a.nanmean - return mean(axis=axis, dtype=dtype, out=out) - except AttributeError: - pass - - return _methods._nanmean(a, axis=axis, dtype=dtype, - out=out, keepdims=keepdims) - - def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ Compute the standard deviation along the specified axis. @@ -2743,97 +2665,6 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): return _methods._std(a, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) -def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): - """ - Compute the standard deviation along the specified axis, while - ignoring NaNs. - - Returns the standard deviation, a measure of the spread of a distribution, - of the non-NaN array elements. The standard deviation is computed for the - flattened array by default, otherwise over the specified axis. - - Parameters - ---------- - a : array_like - Calculate the standard deviation of the non-NaN values. - axis : int, optional - Axis along which the standard deviation is computed. The default is - to compute the standard deviation of the flattened array. - dtype : dtype, optional - Type to use in computing the standard deviation. For arrays of - integer type the default is float64, for arrays of float types it is - the same as the array type. - out : ndarray, optional - Alternative output array in which to place the result. It must have - the same shape as the expected output but the type (of the calculated - values) will be cast if necessary. - ddof : int, optional - Means Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements. - By default `ddof` is zero. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. - - Returns - ------- - standard_deviation : ndarray, see dtype parameter above. - If `out` is None, return a new array containing the standard deviation, - otherwise return a reference to the output array. - - See Also - -------- - var, mean, std - nanvar, nanmean - numpy.doc.ufuncs : Section "Output arguments" - - Notes - ----- - The standard deviation is the square root of the average of the squared - deviations from the mean, i.e., ``std = sqrt(mean(abs(x - x.mean())**2))``. - - The average squared deviation is normally calculated as - ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is specified, - the divisor ``N - ddof`` is used instead. In standard statistical - practice, ``ddof=1`` provides an unbiased estimator of the variance - of the infinite population. ``ddof=0`` provides a maximum likelihood - estimate of the variance for normally distributed variables. The - standard deviation computed in this function is the square root of - the estimated variance, so even with ``ddof=1``, it will not be an - unbiased estimate of the standard deviation per se. - - Note that, for complex numbers, `std` takes the absolute - value before squaring, so that the result is always real and nonnegative. - - For floating-point input, the *std* is computed using the same - precision the input has. Depending on the input data, this can cause - the results to be inaccurate, especially for float32 (see example below). - Specifying a higher-accuracy accumulator using the `dtype` keyword can - alleviate this issue. - - Examples - -------- - >>> a = np.array([[1, np.nan], [3, 4]]) - >>> np.nanstd(a) - 1.247219128924647 - >>> np.nanstd(a, axis=0) - array([ 1., 0.]) - >>> np.nanstd(a, axis=1) - array([ 0., 0.5]) - - """ - - if not (type(a) is mu.ndarray): - try: - nanstd = a.nanstd - return nanstd(axis=axis, dtype=dtype, out=out, ddof=ddof) - except AttributeError: - pass - - return _methods._nanstd(a, axis=axis, dtype=dtype, out=out, ddof=ddof, - keepdims=keepdims) - def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ @@ -2937,94 +2768,3 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) - -def nanvar(a, axis=None, dtype=None, out=None, ddof=0, - keepdims=False): - """ - Compute the variance along the specified axis, while ignoring NaNs. - - Returns the variance of the array elements, a measure of the spread of a - distribution. The variance is computed for the flattened array by - default, otherwise over the specified axis. - - Parameters - ---------- - a : array_like - Array containing numbers whose variance is desired. If `a` is not an - array, a conversion is attempted. - axis : int, optional - Axis along which the variance is computed. The default is to compute - the variance of the flattened array. - dtype : data-type, optional - Type to use in computing the variance. For arrays of integer type - the default is `float32`; for arrays of float types it is the same as - the array type. - out : ndarray, optional - Alternate output array in which to place the result. It must have - the same shape as the expected output, but the type is cast if - necessary. - ddof : int, optional - "Delta Degrees of Freedom": the divisor used in the calculation is - ``N - ddof``, where ``N`` represents the number of elements. By - default `ddof` is zero. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. - - Returns - ------- - variance : ndarray, see dtype parameter above - If ``out=None``, returns a new array containing the variance; - otherwise, a reference to the output array is returned. - - See Also - -------- - std : Standard deviation - mean : Average - var : Variance while not ignoring NaNs - nanstd, nanmean - numpy.doc.ufuncs : Section "Output arguments" - - Notes - ----- - The variance is the average of the squared deviations from the mean, - i.e., ``var = mean(abs(x - x.mean())**2)``. - - The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``. - If, however, `ddof` is specified, the divisor ``N - ddof`` is used - instead. In standard statistical practice, ``ddof=1`` provides an - unbiased estimator of the variance of a hypothetical infinite population. - ``ddof=0`` provides a maximum likelihood estimate of the variance for - normally distributed variables. - - Note that for complex numbers, the absolute value is taken before - squaring, so that the result is always real and nonnegative. - - For floating-point input, the variance is computed using the same - precision the input has. Depending on the input data, this can cause - the results to be inaccurate, especially for `float32` (see example - below). Specifying a higher-accuracy accumulator using the ``dtype`` - keyword can alleviate this issue. - - Examples - -------- - >>> a = np.array([[1, np.nan], [3, 4]]) - >>> np.var(a) - 1.5555555555555554 - >>> np.nanvar(a, axis=0) - array([ 1., 0.]) - >>> np.nanvar(a, axis=1) - array([ 0., 0.25]) - - """ - - if not (type(a) is mu.ndarray): - try: - nanvar = a.nanvar - return nanvar(axis=axis, dtype=dtype, out=out, ddof=ddof) - except AttributeError: - pass - - return _methods._nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, - keepdims=keepdims) diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index c2183276d88d..0f9dbd0c0c3e 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -183,12 +183,6 @@ def test_mean(self): assert_(isnan(mean([]))) assert_(w[0].category is RuntimeWarning) - def test_nanmean(self): - A = [[1, nan, nan], [nan, 4, 5]] - assert_(nanmean(A) == (10.0 / 3)) - assert_(all(nanmean(A,0) == array([1, 4, 5]))) - assert_(all(nanmean(A,1) == array([1, 4.5]))) - def test_std(self): A = [[1,2,3],[4,5,6]] assert_almost_equal(std(A), 1.707825127659933) @@ -200,12 +194,6 @@ def test_std(self): assert_(isnan(std([]))) assert_(w[0].category is RuntimeWarning) - def test_nanstd(self): - A = [[1, nan, nan], [nan, 4, 5]] - assert_almost_equal(nanstd(A), 1.699673171197595) - assert_almost_equal(nanstd(A,0), array([0.0, 0.0, 0.0])) - assert_almost_equal(nanstd(A,1), array([0.0, 0.5])) - def test_var(self): A = [[1,2,3],[4,5,6]] assert_almost_equal(var(A), 2.9166666666666665) @@ -217,12 +205,6 @@ def test_var(self): assert_(isnan(mean([]))) assert_(w[0].category is RuntimeWarning) - def test_nanvar(self): - A = [[1, nan, nan], [nan, 4, 5]] - assert_almost_equal(nanvar(A), 2.88888888889) - assert_almost_equal(nanvar(A,0), array([0.0, 0.0, 0.0])) - assert_almost_equal(nanvar(A,1), array([0.0, 0.25])) - class TestBoolScalar(TestCase): def test_logical(self): @@ -1401,40 +1383,6 @@ def test_no_parameter_modification(self): assert_array_equal(x, array([inf, 1])) assert_array_equal(y, array([0, inf])) -class TestNaNMean(TestCase): - def setUp(self): - self.A = array([1,nan,-1,nan,nan,1,-1]) - self.B = array([nan, nan, nan, nan]) - self.real_mean = 0 - - def test_basic(self): - assert_almost_equal(nanmean(self.A),self.real_mean) - - def test_mutation(self): - # Because of the "messing around" we do to replace NaNs with zeros - # this is meant to ensure we don't actually replace the NaNs in the - # actual array. - a_copy = self.A.copy() - b_copy = self.B.copy() - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - a_ret = nanmean(self.A) - assert_equal(self.A, a_copy) - b_ret = nanmean(self.B) - assert_equal(self.B, b_copy) - - def test_allnans(self): - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_(isnan(nanmean(self.B))) - assert_(w[0].category is RuntimeWarning) - - def test_empty(self): - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_(isnan(nanmean(array([])))) - assert_(w[0].category is RuntimeWarning) - class TestStdVar(TestCase): def setUp(self): self.A = array([1,-1,1,-1]) @@ -1456,55 +1404,6 @@ def test_ddof2(self): assert_almost_equal(std(self.A,ddof=2)**2, self.real_var*len(self.A)/float(len(self.A)-2)) -class TestNaNStdVar(TestCase): - def setUp(self): - self.A = array([nan,1,-1,nan,1,nan,-1]) - self.B = array([nan, nan, nan, nan]) - self.real_var = 1 - - def test_basic(self): - assert_almost_equal(nanvar(self.A),self.real_var) - assert_almost_equal(nanstd(self.A)**2,self.real_var) - - def test_mutation(self): - # Because of the "messing around" we do to replace NaNs with zeros - # this is meant to ensure we don't actually replace the NaNs in the - # actual array. - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - a_copy = self.A.copy() - b_copy = self.B.copy() - a_ret = nanvar(self.A) - assert_equal(self.A, a_copy) - b_ret = nanstd(self.B) - assert_equal(self.B, b_copy) - - def test_ddof1(self): - assert_almost_equal(nanvar(self.A,ddof=1), - self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-1)) - assert_almost_equal(nanstd(self.A,ddof=1)**2, - self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-1)) - - def test_ddof2(self): - assert_almost_equal(nanvar(self.A,ddof=2), - self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-2)) - assert_almost_equal(nanstd(self.A,ddof=2)**2, - self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-2)) - - def test_allnans(self): - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_(isnan(nanvar(self.B))) - assert_(isnan(nanstd(self.B))) - assert_(w[0].category is RuntimeWarning) - - def test_empty(self): - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_(isnan(nanvar(array([])))) - assert_(isnan(nanstd(array([])))) - assert_(w[0].category is RuntimeWarning) - class TestStdVarComplex(TestCase): def test_basic(self): A = array([1,1.j,-1,-1.j]) diff --git a/numpy/lib/__init__.py b/numpy/lib/__init__.py index 12acae95b583..64a8550c6b34 100644 --- a/numpy/lib/__init__.py +++ b/numpy/lib/__init__.py @@ -1,11 +1,14 @@ from __future__ import division, absolute_import, print_function +import math + from .info import __doc__ from numpy.version import version as __version__ from .type_check import * from .index_tricks import * from .function_base import * +from .nanfunctions import * from .shape_base import * from .stride_tricks import * from .twodim_base import * @@ -18,7 +21,6 @@ from .arraysetops import * from .npyio import * from .financial import * -import math from .arrayterator import * from .arraypad import * @@ -36,6 +38,7 @@ __all__ += arraysetops.__all__ __all__ += npyio.__all__ __all__ += financial.__all__ +__all__ += nanfunctions.__all__ from numpy.testing import Tester test = Tester().test diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index d782f454a5e7..3c05cdb98718 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -1,15 +1,14 @@ from __future__ import division, absolute_import, print_function __docformat__ = "restructuredtext en" -__all__ = ['select', 'piecewise', 'trim_zeros', 'copy', 'iterable', - 'percentile', 'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', - 'disp', 'extract', 'place', 'nansum', 'nanmax', 'nanargmax', - 'nanargmin', 'nanmin', 'vectorize', 'asarray_chkfinite', 'average', - 'histogram', 'histogramdd', 'bincount', 'digitize', 'cov', - 'corrcoef', 'msort', 'median', 'sinc', 'hamming', 'hanning', - 'bartlett', 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', - 'add_docstring', 'meshgrid', 'delete', 'insert', 'append', 'interp', - 'add_newdoc_ufunc'] +__all__ = [ + 'select', 'piecewise', 'trim_zeros', 'copy', 'iterable', 'percentile', + 'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', 'disp', + 'extract', 'place', 'vectorize', 'asarray_chkfinite', 'average', + 'histogram', 'histogramdd', 'bincount', 'digitize', 'cov', 'corrcoef', + 'msort', 'median', 'sinc', 'hamming', 'hanning', 'bartlett', + 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring', + 'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc'] import warnings import types @@ -1361,331 +1360,6 @@ def place(arr, mask, vals): """ return _insert(arr, mask, vals) -def _nanop(op, fill, a, axis=None): - """ - General operation on arrays with not-a-number values. - - Parameters - ---------- - op : callable - Operation to perform. - fill : float - NaN values are set to fill before doing the operation. - a : array-like - Input array. - axis : {int, None}, optional - Axis along which the operation is computed. - By default the input is flattened. - - Returns - ------- - y : {ndarray, scalar} - Processed data. - - """ - y = array(a, subok=True) - - # We only need to take care of NaN's in floating point arrays - dt = y.dtype - if np.issubdtype(dt, np.integer) or np.issubdtype(dt, np.bool_): - return op(y, axis=axis) - - mask = isnan(a) - # y[mask] = fill - # We can't use fancy indexing here as it'll mess w/ MaskedArrays - # Instead, let's fill the array directly... - np.copyto(y, fill, where=mask) - res = op(y, axis=axis) - mask_all_along_axis = mask.all(axis=axis) - - # Along some axes, only nan's were encountered. As such, any values - # calculated along that axis should be set to nan. - if mask_all_along_axis.any(): - if np.isscalar(res): - res = np.nan - else: - res[mask_all_along_axis] = np.nan - - return res - -def nansum(a, axis=None): - """ - Return the sum of array elements over a given axis treating - Not a Numbers (NaNs) as zero. - - Parameters - ---------- - a : array_like - Array containing numbers whose sum is desired. If `a` is not an - array, a conversion is attempted. - axis : int, optional - Axis along which the sum is computed. The default is to compute - the sum of the flattened array. - - Returns - ------- - y : ndarray - An array with the same shape as a, with the specified axis removed. - If a is a 0-d array, or if axis is None, a scalar is returned with - the same dtype as `a`. - - See Also - -------- - numpy.sum : Sum across array including Not a Numbers. - isnan : Shows which elements are Not a Number (NaN). - isfinite: Shows which elements are not: Not a Number, positive and - negative infinity - - Notes - ----- - Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic - (IEEE 754). This means that Not a Number is not equivalent to infinity. - If positive or negative infinity are present the result is positive or - negative infinity. But if both positive and negative infinity are present, - the result is Not A Number (NaN). - - Arithmetic is modular when using integer types (all elements of `a` must - be finite i.e. no elements that are NaNs, positive infinity and negative - infinity because NaNs are floating point types), and no error is raised - on overflow. - - - Examples - -------- - >>> np.nansum(1) - 1 - >>> np.nansum([1]) - 1 - >>> np.nansum([1, np.nan]) - 1.0 - >>> a = np.array([[1, 1], [1, np.nan]]) - >>> np.nansum(a) - 3.0 - >>> np.nansum(a, axis=0) - array([ 2., 1.]) - - When positive infinity and negative infinity are present - - >>> np.nansum([1, np.nan, np.inf]) - inf - >>> np.nansum([1, np.nan, np.NINF]) - -inf - >>> np.nansum([1, np.nan, np.inf, np.NINF]) - nan - - """ - return _nanop(np.sum, 0, a, axis) - -def nanmin(a, axis=None): - """ - Return the minimum of an array or minimum along an axis, ignoring any NaNs. - - Parameters - ---------- - a : array_like - Array containing numbers whose minimum is desired. If `a` is not - an array, a conversion is attempted. - axis : int, optional - Axis along which the minimum is computed. The default is to compute - the minimum of the flattened array. - - Returns - ------- - nanmin : ndarray - An array with the same shape as `a`, with the specified axis removed. - If `a` is a 0-d array, or if axis is None, an ndarray scalar is - returned. The same dtype as `a` is returned. - - See Also - -------- - nanmax : - The maximum value of an array along a given axis, ignoring any NaNs. - amin : - The minimum value of an array along a given axis, propagating any NaNs. - fmin : - Element-wise minimum of two arrays, ignoring any NaNs. - minimum : - Element-wise minimum of two arrays, propagating any NaNs. - isnan : - Shows which elements are Not a Number (NaN). - isfinite: - Shows which elements are neither NaN nor infinity. - - amax, fmax, maximum - - Notes - ----- - Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic - (IEEE 754). This means that Not a Number is not equivalent to infinity. - Positive infinity is treated as a very large number and negative infinity - is treated as a very small (i.e. negative) number. - - If the input has a integer type the function is equivalent to np.min. - - Examples - -------- - >>> a = np.array([[1, 2], [3, np.nan]]) - >>> np.nanmin(a) - 1.0 - >>> np.nanmin(a, axis=0) - array([ 1., 2.]) - >>> np.nanmin(a, axis=1) - array([ 1., 3.]) - - When positive infinity and negative infinity are present: - - >>> np.nanmin([1, 2, np.nan, np.inf]) - 1.0 - >>> np.nanmin([1, 2, np.nan, np.NINF]) - -inf - - """ - a = np.asanyarray(a) - if axis is not None: - return np.fmin.reduce(a, axis) - else: - return np.fmin.reduce(a.flat) - -def nanargmin(a, axis=None): - """ - Return indices of the minimum values over an axis, ignoring NaNs. - - Parameters - ---------- - a : array_like - Input data. - axis : int, optional - Axis along which to operate. By default flattened input is used. - - Returns - ------- - index_array : ndarray - An array of indices or a single index value. - - See Also - -------- - argmin, nanargmax - - Examples - -------- - >>> a = np.array([[np.nan, 4], [2, 3]]) - >>> np.argmin(a) - 0 - >>> np.nanargmin(a) - 2 - >>> np.nanargmin(a, axis=0) - array([1, 1]) - >>> np.nanargmin(a, axis=1) - array([1, 0]) - - """ - return _nanop(np.argmin, np.inf, a, axis) - -def nanmax(a, axis=None): - """ - Return the maximum of an array or maximum along an axis, ignoring any NaNs. - - Parameters - ---------- - a : array_like - Array containing numbers whose maximum is desired. If `a` is not - an array, a conversion is attempted. - axis : int, optional - Axis along which the maximum is computed. The default is to compute - the maximum of the flattened array. - - Returns - ------- - nanmax : ndarray - An array with the same shape as `a`, with the specified axis removed. - If `a` is a 0-d array, or if axis is None, an ndarray scalar is - returned. The same dtype as `a` is returned. - - See Also - -------- - nanmin : - The minimum value of an array along a given axis, ignoring any NaNs. - amax : - The maximum value of an array along a given axis, propagating any NaNs. - fmax : - Element-wise maximum of two arrays, ignoring any NaNs. - maximum : - Element-wise maximum of two arrays, propagating any NaNs. - isnan : - Shows which elements are Not a Number (NaN). - isfinite: - Shows which elements are neither NaN nor infinity. - - amin, fmin, minimum - - Notes - ----- - Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic - (IEEE 754). This means that Not a Number is not equivalent to infinity. - Positive infinity is treated as a very large number and negative infinity - is treated as a very small (i.e. negative) number. - - If the input has a integer type the function is equivalent to np.max. - - Examples - -------- - >>> a = np.array([[1, 2], [3, np.nan]]) - >>> np.nanmax(a) - 3.0 - >>> np.nanmax(a, axis=0) - array([ 3., 2.]) - >>> np.nanmax(a, axis=1) - array([ 2., 3.]) - - When positive infinity and negative infinity are present: - - >>> np.nanmax([1, 2, np.nan, np.NINF]) - 2.0 - >>> np.nanmax([1, 2, np.nan, np.inf]) - inf - - """ - a = np.asanyarray(a) - if axis is not None: - return np.fmax.reduce(a, axis) - else: - return np.fmax.reduce(a.flat) - -def nanargmax(a, axis=None): - """ - Return indices of the maximum values over an axis, ignoring NaNs. - - Parameters - ---------- - a : array_like - Input data. - axis : int, optional - Axis along which to operate. By default flattened input is used. - - Returns - ------- - index_array : ndarray - An array of indices or a single index value. - - See Also - -------- - argmax, nanargmin - - Examples - -------- - >>> a = np.array([[np.nan, 4], [2, 3]]) - >>> np.argmax(a) - 0 - >>> np.nanargmax(a) - 1 - >>> np.nanargmax(a, axis=0) - array([1, 0]) - >>> np.nanargmax(a, axis=1) - array([1, 1]) - - """ - return _nanop(np.argmax, -np.inf, a, axis) - def disp(mesg, device=None, linefeed=True): """ Display a message on a device. diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py new file mode 100644 index 000000000000..f0e635791c7e --- /dev/null +++ b/numpy/lib/nanfunctions.py @@ -0,0 +1,678 @@ +"""Functions that ignore nan. + +""" +from __future__ import division, absolute_import, print_function + +import numpy as np + +__all__ = [ + 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', + 'nanvar', 'nanstd' + ] + + +def _nanmean(a, axis=None, dtype=None, out=None, keepdims=False): + # Using array() instead of asanyarray() because the former always + # makes a copy, which is important due to the copyto() action later + arr = np.array(a, subok=True) + mask = np.isnan(arr) + + # Cast bool, unsigned int, and int to float64 + if np.dtype is None and issubclass(arr.dtype.type, (np.integer, np.bool_)): + ret = np.add.reduce(arr, axis=axis, dtype='f8', + out=out, keepdims=keepdims) + else: + np.copyto(arr, 0.0, where=mask) + ret = np.add.reduce(arr, axis=axis, dtype=dtype, + out=out, keepdims=keepdims) + rcount = (~mask).sum(axis=axis) + if isinstance(ret, np.ndarray): + ret = np.true_divide(ret, rcount, out=ret, casting='unsafe', + subok=False) + else: + ret = ret / rcount + return ret + + +def _nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): + # Using array() instead of asanyarray() because the former always + # makes a copy, which is important due to the copyto() action later + arr = np.array(a, subok=True) + mask = np.isnan(arr) + + # First compute the mean, saving 'rcount' for reuse later + if dtype is None and issubclass(arr.dtype.type, (np.integer, np.bool_)): + arrmean = np.add.reduce(arr, axis=axis, dtype='f8', keepdims=True) + else: + np.copyto(arr, 0.0, where=mask) + arrmean = np.add.reduce(arr, axis=axis, dtype=dtype, keepdims=True) + rcount = (~mask).sum(axis=axis, keepdims=True) + if isinstance(arrmean, np.ndarray): + arrmean = np.true_divide(arrmean, rcount, + out=arrmean, casting='unsafe', subok=False) + else: + arrmean = arrmean / rcount + + # arr - arrmean + x = arr - arrmean + np.copyto(x, 0.0, where=mask) + + # (arr - arrmean) ** 2 + if issubclass(arr.dtype.type, np.complex_): + x = np.multiply(x, np.conjugate(x), out=x).real + else: + x = np.multiply(x, x, out=x) + + # add.reduce((arr - arrmean) ** 2, axis) + ret = np.add.reduce(x, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + + # add.reduce((arr - arrmean) ** 2, axis) / (n - ddof) + if not keepdims and isinstance(rcount, np.ndarray): + rcount = rcount.squeeze(axis=axis) + rcount -= ddof + if isinstance(ret, np.ndarray): + ret = np.true_divide(ret, rcount, out=ret, casting='unsafe', subok=False) + else: + ret = ret / rcount + + return ret + + +def _nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): + ret = _nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) + + if isinstance(ret, np.ndarray): + ret = np.sqrt(ret, out=ret) + else: + ret = np.sqrt(ret) + + return ret + + +def _nanop(op, fill, a, axis=None): + """ + General operation on arrays with not-a-number values. + + Parameters + ---------- + op : callable + Operation to perform. + fill : float + NaN values are set to fill before doing the operation. + a : array-like + Input array. + axis : {int, None}, optional + Axis along which the operation is computed. + By default the input is flattened. + + Returns + ------- + y : {ndarray, scalar} + Processed data. + + """ + y = np.array(a, subok=True) + + # We only need to take care of NaN's in floating point arrays + dt = y.dtype + if np.issubdtype(dt, np.integer) or np.issubdtype(dt, np.bool_): + return op(y, axis=axis) + + mask = np.isnan(a) + # y[mask] = fill + # We can't use fancy indexing here as it'll mess w/ MaskedArrays + # Instead, let's fill the array directly... + np.copyto(y, fill, where=mask) + res = op(y, axis=axis) + mask_all_along_axis = mask.all(axis=axis) + + # Along some axes, only nan's were encountered. As such, any values + # calculated along that axis should be set to nan. + if mask_all_along_axis.any(): + if np.isscalar(res): + res = np.nan + else: + res[mask_all_along_axis] = np.nan + + return res + + +def nansum(a, axis=None): + """ + Return the sum of array elements over a given axis treating + Not a Numbers (NaNs) as zero. + + Parameters + ---------- + a : array_like + Array containing numbers whose sum is desired. If `a` is not an + array, a conversion is attempted. + axis : int, optional + Axis along which the sum is computed. The default is to compute + the sum of the flattened array. + + Returns + ------- + y : ndarray + An array with the same shape as a, with the specified axis removed. + If a is a 0-d array, or if axis is None, a scalar is returned with + the same dtype as `a`. + + See Also + -------- + numpy.sum : Sum across array including Not a Numbers. + isnan : Shows which elements are Not a Number (NaN). + isfinite: Shows which elements are not: Not a Number, positive and + negative infinity + + Notes + ----- + Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). This means that Not a Number is not equivalent to infinity. + If positive or negative infinity are present the result is positive or + negative infinity. But if both positive and negative infinity are present, + the result is Not A Number (NaN). + + Arithmetic is modular when using integer types (all elements of `a` must + be finite i.e. no elements that are NaNs, positive infinity and negative + infinity because NaNs are floating point types), and no error is raised + on overflow. + + + Examples + -------- + >>> np.nansum(1) + 1 + >>> np.nansum([1]) + 1 + >>> np.nansum([1, np.nan]) + 1.0 + >>> a = np.array([[1, 1], [1, np.nan]]) + >>> np.nansum(a) + 3.0 + >>> np.nansum(a, axis=0) + array([ 2., 1.]) + + When positive infinity and negative infinity are present + + >>> np.nansum([1, np.nan, np.inf]) + inf + >>> np.nansum([1, np.nan, np.NINF]) + -inf + >>> np.nansum([1, np.nan, np.inf, np.NINF]) + nan + + """ + return _nanop(np.sum, 0, a, axis) + + +def nanmin(a, axis=None): + """ + Return the minimum of an array or minimum along an axis, ignoring any NaNs. + + Parameters + ---------- + a : array_like + Array containing numbers whose minimum is desired. If `a` is not + an array, a conversion is attempted. + axis : int, optional + Axis along which the minimum is computed. The default is to compute + the minimum of the flattened array. + + Returns + ------- + nanmin : ndarray + An array with the same shape as `a`, with the specified axis removed. + If `a` is a 0-d array, or if axis is None, an ndarray scalar is + returned. The same dtype as `a` is returned. + + See Also + -------- + nanmax : + The maximum value of an array along a given axis, ignoring any NaNs. + amin : + The minimum value of an array along a given axis, propagating any NaNs. + fmin : + Element-wise minimum of two arrays, ignoring any NaNs. + minimum : + Element-wise minimum of two arrays, propagating any NaNs. + isnan : + Shows which elements are Not a Number (NaN). + isfinite: + Shows which elements are neither NaN nor infinity. + + amax, fmax, maximum + + Notes + ----- + Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). This means that Not a Number is not equivalent to infinity. + Positive infinity is treated as a very large number and negative infinity + is treated as a very small (i.e. negative) number. + + If the input has a integer type the function is equivalent to np.min. + + Examples + -------- + >>> a = np.array([[1, 2], [3, np.nan]]) + >>> np.nanmin(a) + 1.0 + >>> np.nanmin(a, axis=0) + array([ 1., 2.]) + >>> np.nanmin(a, axis=1) + array([ 1., 3.]) + + When positive infinity and negative infinity are present: + + >>> np.nanmin([1, 2, np.nan, np.inf]) + 1.0 + >>> np.nanmin([1, 2, np.nan, np.NINF]) + -inf + + """ + a = np.asanyarray(a) + if axis is not None: + return np.fmin.reduce(a, axis) + else: + return np.fmin.reduce(a.flat) + + +def nanargmin(a, axis=None): + """ + Return indices of the minimum values over an axis, ignoring NaNs. + + Parameters + ---------- + a : array_like + Input data. + axis : int, optional + Axis along which to operate. By default flattened input is used. + + Returns + ------- + index_array : ndarray + An array of indices or a single index value. + + See Also + -------- + argmin, nanargmax + + Examples + -------- + >>> a = np.array([[np.nan, 4], [2, 3]]) + >>> np.argmin(a) + 0 + >>> np.nanargmin(a) + 2 + >>> np.nanargmin(a, axis=0) + array([1, 1]) + >>> np.nanargmin(a, axis=1) + array([1, 0]) + + """ + return _nanop(np.argmin, np.inf, a, axis) + + +def nanmax(a, axis=None): + """ + Return the maximum of an array or maximum along an axis, ignoring any NaNs. + + Parameters + ---------- + a : array_like + Array containing numbers whose maximum is desired. If `a` is not + an array, a conversion is attempted. + axis : int, optional + Axis along which the maximum is computed. The default is to compute + the maximum of the flattened array. + + Returns + ------- + nanmax : ndarray + An array with the same shape as `a`, with the specified axis removed. + If `a` is a 0-d array, or if axis is None, an ndarray scalar is + returned. The same dtype as `a` is returned. + + See Also + -------- + nanmin : + The minimum value of an array along a given axis, ignoring any NaNs. + amax : + The maximum value of an array along a given axis, propagating any NaNs. + fmax : + Element-wise maximum of two arrays, ignoring any NaNs. + maximum : + Element-wise maximum of two arrays, propagating any NaNs. + isnan : + Shows which elements are Not a Number (NaN). + isfinite: + Shows which elements are neither NaN nor infinity. + + amin, fmin, minimum + + Notes + ----- + Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). This means that Not a Number is not equivalent to infinity. + Positive infinity is treated as a very large number and negative infinity + is treated as a very small (i.e. negative) number. + + If the input has a integer type the function is equivalent to np.max. + + Examples + -------- + >>> a = np.array([[1, 2], [3, np.nan]]) + >>> np.nanmax(a) + 3.0 + >>> np.nanmax(a, axis=0) + array([ 3., 2.]) + >>> np.nanmax(a, axis=1) + array([ 2., 3.]) + + When positive infinity and negative infinity are present: + + >>> np.nanmax([1, 2, np.nan, np.NINF]) + 2.0 + >>> np.nanmax([1, 2, np.nan, np.inf]) + inf + + """ + a = np.asanyarray(a) + if axis is not None: + return np.fmax.reduce(a, axis) + else: + return np.fmax.reduce(a.flat) + + +def nanargmax(a, axis=None): + """ + Return indices of the maximum values over an axis, ignoring NaNs. + + Parameters + ---------- + a : array_like + Input data. + axis : int, optional + Axis along which to operate. By default flattened input is used. + + Returns + ------- + index_array : ndarray + An array of indices or a single index value. + + See Also + -------- + argmax, nanargmin + + Examples + -------- + >>> a = np.array([[np.nan, 4], [2, 3]]) + >>> np.argmax(a) + 0 + >>> np.nanargmax(a) + 1 + >>> np.nanargmax(a, axis=0) + array([1, 0]) + >>> np.nanargmax(a, axis=1) + array([1, 1]) + + """ + return _nanop(np.argmax, -np.inf, a, axis) + + +def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): + """ + Compute the arithmetic mean along the specified axis, ignoring NaNs. + + Returns the average of the array elements. The average is taken over + the flattened array by default, otherwise over the specified axis. + `float64` intermediate and return values are used for integer inputs. + + Parameters + ---------- + a : array_like + Array containing numbers whose mean is desired. If `a` is not an + array, a conversion is attempted. + axis : int, optional + Axis along which the means are computed. The default is to compute + the mean of the flattened array. + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default + is `float64`; for floating point inputs, it is the same as the + input dtype. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output, but the type will be cast if necessary. + See `doc.ufuncs` for details. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + Returns + ------- + m : ndarray, see dtype parameter above + If `out=None`, returns a new array containing the mean values, + otherwise a reference to the output array is returned. + + See Also + -------- + average : Weighted average + mean : Arithmetic mean taken while not ignoring NaNs + var, nanvar + + Notes + ----- + The arithmetic mean is the sum of the non-nan elements along the axis + divided by the number of non-nan elements. + + Note that for floating-point input, the mean is computed using the + same precision the input has. Depending on the input data, this can + cause the results to be inaccurate, especially for `float32`. + Specifying a higher-precision accumulator using the `dtype` keyword + can alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.nanmean(a) + 2.6666666666666665 + >>> np.nanmean(a, axis=0) + array([ 2., 4.]) + >>> np.nanmean(a, axis=1) + array([ 1., 3.5]) + + """ + if not (type(a) is np.ndarray): + try: + mean = a.nanmean + return mean(axis=axis, dtype=dtype, out=out) + except AttributeError: + pass + + return _nanmean(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + + +def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): + """ + Compute the standard deviation along the specified axis, while + ignoring NaNs. + + Returns the standard deviation, a measure of the spread of a distribution, + of the non-NaN array elements. The standard deviation is computed for the + flattened array by default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Calculate the standard deviation of the non-NaN values. + axis : int, optional + Axis along which the standard deviation is computed. The default is + to compute the standard deviation of the flattened array. + dtype : dtype, optional + Type to use in computing the standard deviation. For arrays of + integer type the default is float64, for arrays of float types it is + the same as the array type. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output but the type (of the calculated + values) will be cast if necessary. + ddof : int, optional + Means Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + By default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + Returns + ------- + standard_deviation : ndarray, see dtype parameter above. + If `out` is None, return a new array containing the standard deviation, + otherwise return a reference to the output array. + + See Also + -------- + var, mean, std + nanvar, nanmean + numpy.doc.ufuncs : Section "Output arguments" + + Notes + ----- + The standard deviation is the square root of the average of the squared + deviations from the mean, i.e., ``std = sqrt(mean(abs(x - x.mean())**2))``. + + The average squared deviation is normally calculated as + ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is specified, + the divisor ``N - ddof`` is used instead. In standard statistical + practice, ``ddof=1`` provides an unbiased estimator of the variance + of the infinite population. ``ddof=0`` provides a maximum likelihood + estimate of the variance for normally distributed variables. The + standard deviation computed in this function is the square root of + the estimated variance, so even with ``ddof=1``, it will not be an + unbiased estimate of the standard deviation per se. + + Note that, for complex numbers, `std` takes the absolute + value before squaring, so that the result is always real and nonnegative. + + For floating-point input, the *std* is computed using the same + precision the input has. Depending on the input data, this can cause + the results to be inaccurate, especially for float32 (see example below). + Specifying a higher-accuracy accumulator using the `dtype` keyword can + alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.nanstd(a) + 1.247219128924647 + >>> np.nanstd(a, axis=0) + array([ 1., 0.]) + >>> np.nanstd(a, axis=1) + array([ 0., 0.5]) + + """ + + if not (type(a) is np.ndarray): + try: + nanstd = a.nanstd + return nanstd(axis=axis, dtype=dtype, out=out, ddof=ddof) + except AttributeError: + pass + + return _nanstd(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) + + +def nanvar(a, axis=None, dtype=None, out=None, ddof=0, + keepdims=False): + """ + Compute the variance along the specified axis, while ignoring NaNs. + + Returns the variance of the array elements, a measure of the spread of a + distribution. The variance is computed for the flattened array by + default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Array containing numbers whose variance is desired. If `a` is not an + array, a conversion is attempted. + axis : int, optional + Axis along which the variance is computed. The default is to compute + the variance of the flattened array. + dtype : data-type, optional + Type to use in computing the variance. For arrays of integer type + the default is `float32`; for arrays of float types it is the same as + the array type. + out : ndarray, optional + Alternate output array in which to place the result. It must have + the same shape as the expected output, but the type is cast if + necessary. + ddof : int, optional + "Delta Degrees of Freedom": the divisor used in the calculation is + ``N - ddof``, where ``N`` represents the number of elements. By + default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + Returns + ------- + variance : ndarray, see dtype parameter above + If ``out=None``, returns a new array containing the variance; + otherwise, a reference to the output array is returned. + + See Also + -------- + std : Standard deviation + mean : Average + var : Variance while not ignoring NaNs + nanstd, nanmean + numpy.doc.ufuncs : Section "Output arguments" + + Notes + ----- + The variance is the average of the squared deviations from the mean, + i.e., ``var = mean(abs(x - x.mean())**2)``. + + The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``. + If, however, `ddof` is specified, the divisor ``N - ddof`` is used + instead. In standard statistical practice, ``ddof=1`` provides an + unbiased estimator of the variance of a hypothetical infinite population. + ``ddof=0`` provides a maximum likelihood estimate of the variance for + normally distributed variables. + + Note that for complex numbers, the absolute value is taken before + squaring, so that the result is always real and nonnegative. + + For floating-point input, the variance is computed using the same + precision the input has. Depending on the input data, this can cause + the results to be inaccurate, especially for `float32` (see example + below). Specifying a higher-accuracy accumulator using the ``dtype`` + keyword can alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.var(a) + 1.5555555555555554 + >>> np.nanvar(a, axis=0) + array([ 1., 0.]) + >>> np.nanvar(a, axis=1) + array([ 0., 0.25]) + + """ + if not (type(a) is np.ndarray): + try: + nanvar = a.nanvar + return nanvar(axis=axis, dtype=dtype, out=out, ddof=ddof) + except AttributeError: + pass + + return _nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 45e2489131eb..a3f3512e8dd7 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -3,10 +3,10 @@ import warnings import numpy as np from numpy.testing import ( - run_module_suite, TestCase, assert_, assert_equal, - assert_array_equal, assert_almost_equal, assert_array_almost_equal, - assert_raises, assert_allclose, assert_array_max_ulp, assert_warns - ) + run_module_suite, TestCase, assert_, assert_equal, assert_array_equal, + assert_almost_equal, assert_array_almost_equal, assert_raises, + assert_allclose, assert_array_max_ulp, assert_warns + ) from numpy.random import rand from numpy.lib import * from numpy.compat import long @@ -1103,127 +1103,6 @@ def test_dtype_order(self): assert_(a.dtype == np.float64) -class TestNaNFuncts(TestCase): - def setUp(self): - self.A = np.array([[[ np.nan, 0.01319214, 0.01620964], - [ 0.11704017, np.nan, 0.75157887], - [ 0.28333658, 0.1630199 , np.nan ]], - [[ 0.59541557, np.nan, 0.37910852], - [ np.nan, 0.87964135, np.nan ], - [ 0.70543747, np.nan, 0.34306596]], - [[ 0.72687499, 0.91084584, np.nan ], - [ 0.84386844, 0.38944762, 0.23913896], - [ np.nan, 0.37068164, 0.33850425]]]) - - def test_nansum(self): - assert_almost_equal(nansum(self.A), 8.0664079100000006) - assert_almost_equal(nansum(self.A, 0), - np.array([[ 1.32229056, 0.92403798, 0.39531816], - [ 0.96090861, 1.26908897, 0.99071783], - [ 0.98877405, 0.53370154, 0.68157021]])) - assert_almost_equal(nansum(self.A, 1), - np.array([[ 0.40037675, 0.17621204, 0.76778851], - [ 1.30085304, 0.87964135, 0.72217448], - [ 1.57074343, 1.6709751 , 0.57764321]])) - assert_almost_equal(nansum(self.A, 2), - np.array([[ 0.02940178, 0.86861904, 0.44635648], - [ 0.97452409, 0.87964135, 1.04850343], - [ 1.63772083, 1.47245502, 0.70918589]])) - - def test_nanmin(self): - assert_almost_equal(nanmin(self.A), 0.01319214) - assert_almost_equal(nanmin(self.A, 0), - np.array([[ 0.59541557, 0.01319214, 0.01620964], - [ 0.11704017, 0.38944762, 0.23913896], - [ 0.28333658, 0.1630199 , 0.33850425]])) - assert_almost_equal(nanmin(self.A, 1), - np.array([[ 0.11704017, 0.01319214, 0.01620964], - [ 0.59541557, 0.87964135, 0.34306596], - [ 0.72687499, 0.37068164, 0.23913896]])) - assert_almost_equal(nanmin(self.A, 2), - np.array([[ 0.01319214, 0.11704017, 0.1630199 ], - [ 0.37910852, 0.87964135, 0.34306596], - [ 0.72687499, 0.23913896, 0.33850425]])) - assert_(np.isnan(nanmin([np.nan, np.nan]))) - - def test_nanargmin(self): - assert_almost_equal(nanargmin(self.A), 1) - assert_almost_equal(nanargmin(self.A, 0), - np.array([[1, 0, 0], - [0, 2, 2], - [0, 0, 2]])) - assert_almost_equal(nanargmin(self.A, 1), - np.array([[1, 0, 0], - [0, 1, 2], - [0, 2, 1]])) - assert_almost_equal(nanargmin(self.A, 2), - np.array([[1, 0, 1], - [2, 1, 2], - [0, 2, 2]])) - - def test_nanmax(self): - assert_almost_equal(nanmax(self.A), 0.91084584000000002) - assert_almost_equal(nanmax(self.A, 0), - np.array([[ 0.72687499, 0.91084584, 0.37910852], - [ 0.84386844, 0.87964135, 0.75157887], - [ 0.70543747, 0.37068164, 0.34306596]])) - assert_almost_equal(nanmax(self.A, 1), - np.array([[ 0.28333658, 0.1630199 , 0.75157887], - [ 0.70543747, 0.87964135, 0.37910852], - [ 0.84386844, 0.91084584, 0.33850425]])) - assert_almost_equal(nanmax(self.A, 2), - np.array([[ 0.01620964, 0.75157887, 0.28333658], - [ 0.59541557, 0.87964135, 0.70543747], - [ 0.91084584, 0.84386844, 0.37068164]])) - assert_(np.isnan(nanmax([np.nan, np.nan]))) - - def test_nanmin_allnan_on_axis(self): - assert_array_equal(np.isnan(nanmin([[np.nan] * 2] * 3, axis=1)), - [True, True, True]) - - def test_nanmin_masked(self): - a = np.ma.fix_invalid([[2, 1, 3, np.nan], [5, 2, 3, np.nan]]) - ctrl_mask = a._mask.copy() - test = np.nanmin(a, axis=1) - assert_equal(test, [1, 2]) - assert_equal(a._mask, ctrl_mask) - assert_equal(np.isinf(a), np.zeros((2, 4), dtype=bool)) - - -class TestNanFunctsIntTypes(TestCase): - - int_types = ( - np.int8, np.int16, np.int32, np.int64, np.uint8, - np.uint16, np.uint32, np.uint64) - - def setUp(self, *args, **kwargs): - self.A = np.array([127, 39, 93, 87, 46]) - - def integer_arrays(self): - for dtype in self.int_types: - yield self.A.astype(dtype) - - def test_nanmin(self): - min_value = min(self.A) - for A in self.integer_arrays(): - assert_equal(nanmin(A), min_value) - - def test_nanmax(self): - max_value = max(self.A) - for A in self.integer_arrays(): - assert_equal(nanmax(A), max_value) - - def test_nanargmin(self): - min_arg = np.argmin(self.A) - for A in self.integer_arrays(): - assert_equal(nanargmin(A), min_arg) - - def test_nanargmax(self): - max_arg = np.argmax(self.A) - for A in self.integer_arrays(): - assert_equal(nanargmax(A), max_arg) - - class TestCorrCoef(TestCase): A = np.array([[ 0.15391142, 0.18045767, 0.14197213], [ 0.70461506, 0.96474128, 0.27906989], @@ -1270,7 +1149,7 @@ def test_empty(self): assert_equal(cov(np.array([]).reshape(0, 2)).shape, (0, 2)) -class Test_i0(TestCase): +class Test_I0(TestCase): def test_simple(self): assert_almost_equal(i0(0.5), np.array(1.0634833707413234)) A = np.array([ 0.49842636, 0.6969809 , 0.22011976, 0.0155549]) @@ -1541,7 +1420,5 @@ def test_string_arg(self): assert_raises(TypeError, add_newdoc_ufunc, np.add, 3) - - if __name__ == "__main__": run_module_suite() diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py new file mode 100644 index 000000000000..1d11862e9d85 --- /dev/null +++ b/numpy/lib/tests/test_nanfunctions.py @@ -0,0 +1,240 @@ +from __future__ import division, absolute_import, print_function + +import warnings + +import numpy as np +from numpy.testing import ( + run_module_suite, TestCase, assert_, assert_equal, assert_almost_equal + ) +from numpy.lib import ( + nansum, nanmax, nanargmax, nanargmin, nanmin, nanmean, nanvar, nanstd + ) + +class TestNaNFuncts(TestCase): + def setUp(self): + self.A = np.array([[[ np.nan, 0.01319214, 0.01620964], + [ 0.11704017, np.nan, 0.75157887], + [ 0.28333658, 0.1630199 , np.nan ]], + [[ 0.59541557, np.nan, 0.37910852], + [ np.nan, 0.87964135, np.nan ], + [ 0.70543747, np.nan, 0.34306596]], + [[ 0.72687499, 0.91084584, np.nan ], + [ 0.84386844, 0.38944762, 0.23913896], + [ np.nan, 0.37068164, 0.33850425]]]) + + def test_nansum(self): + assert_almost_equal(nansum(self.A), 8.0664079100000006) + assert_almost_equal(nansum(self.A, 0), + np.array([[ 1.32229056, 0.92403798, 0.39531816], + [ 0.96090861, 1.26908897, 0.99071783], + [ 0.98877405, 0.53370154, 0.68157021]])) + assert_almost_equal(nansum(self.A, 1), + np.array([[ 0.40037675, 0.17621204, 0.76778851], + [ 1.30085304, 0.87964135, 0.72217448], + [ 1.57074343, 1.6709751 , 0.57764321]])) + assert_almost_equal(nansum(self.A, 2), + np.array([[ 0.02940178, 0.86861904, 0.44635648], + [ 0.97452409, 0.87964135, 1.04850343], + [ 1.63772083, 1.47245502, 0.70918589]])) + + def test_nanmin(self): + assert_almost_equal(nanmin(self.A), 0.01319214) + assert_almost_equal(nanmin(self.A, 0), + np.array([[ 0.59541557, 0.01319214, 0.01620964], + [ 0.11704017, 0.38944762, 0.23913896], + [ 0.28333658, 0.1630199 , 0.33850425]])) + assert_almost_equal(nanmin(self.A, 1), + np.array([[ 0.11704017, 0.01319214, 0.01620964], + [ 0.59541557, 0.87964135, 0.34306596], + [ 0.72687499, 0.37068164, 0.23913896]])) + assert_almost_equal(nanmin(self.A, 2), + np.array([[ 0.01319214, 0.11704017, 0.1630199 ], + [ 0.37910852, 0.87964135, 0.34306596], + [ 0.72687499, 0.23913896, 0.33850425]])) + assert_(np.isnan(nanmin([np.nan, np.nan]))) + + def test_nanargmin(self): + assert_almost_equal(nanargmin(self.A), 1) + assert_almost_equal(nanargmin(self.A, 0), + np.array([[1, 0, 0], + [0, 2, 2], + [0, 0, 2]])) + assert_almost_equal(nanargmin(self.A, 1), + np.array([[1, 0, 0], + [0, 1, 2], + [0, 2, 1]])) + assert_almost_equal(nanargmin(self.A, 2), + np.array([[1, 0, 1], + [2, 1, 2], + [0, 2, 2]])) + + def test_nanmax(self): + assert_almost_equal(nanmax(self.A), 0.91084584000000002) + assert_almost_equal(nanmax(self.A, 0), + np.array([[ 0.72687499, 0.91084584, 0.37910852], + [ 0.84386844, 0.87964135, 0.75157887], + [ 0.70543747, 0.37068164, 0.34306596]])) + assert_almost_equal(nanmax(self.A, 1), + np.array([[ 0.28333658, 0.1630199 , 0.75157887], + [ 0.70543747, 0.87964135, 0.37910852], + [ 0.84386844, 0.91084584, 0.33850425]])) + assert_almost_equal(nanmax(self.A, 2), + np.array([[ 0.01620964, 0.75157887, 0.28333658], + [ 0.59541557, 0.87964135, 0.70543747], + [ 0.91084584, 0.84386844, 0.37068164]])) + assert_(np.isnan(nanmax([np.nan, np.nan]))) + + def test_nanmin_allnan_on_axis(self): + assert_equal(np.isnan(nanmin([[np.nan] * 2] * 3, axis=1)), + [True, True, True]) + + def test_nanmin_masked(self): + a = np.ma.fix_invalid([[2, 1, 3, np.nan], [5, 2, 3, np.nan]]) + ctrl_mask = a._mask.copy() + test = np.nanmin(a, axis=1) + assert_equal(test, [1, 2]) + assert_equal(a._mask, ctrl_mask) + assert_equal(np.isinf(a), np.zeros((2, 4), dtype=bool)) + + def test_nanmean(self): + A = [[1, np.nan, np.nan], [np.nan, 4, 5]] + assert_(nanmean(A) == (10.0 / 3)) + assert_(all(nanmean(A,0) == np.array([1, 4, 5]))) + assert_(all(nanmean(A,1) == np.array([1, 4.5]))) + + def test_nanstd(self): + A = [[1, np.nan, np.nan], [np.nan, 4, 5]] + assert_almost_equal(nanstd(A), 1.699673171197595) + assert_almost_equal(nanstd(A,0), np.array([0.0, 0.0, 0.0])) + assert_almost_equal(nanstd(A,1), np.array([0.0, 0.5])) + + def test_nanvar(self): + A = [[1, np.nan, np.nan], [np.nan, 4, 5]] + assert_almost_equal(nanvar(A), 2.88888888889) + assert_almost_equal(nanvar(A,0), np.array([0.0, 0.0, 0.0])) + assert_almost_equal(nanvar(A,1), np.array([0.0, 0.25])) + + +class TestNaNMean(TestCase): + def setUp(self): + self.A = np.array([1, np.nan, -1, np.nan, np.nan, 1, -1]) + self.B = np.array([np.nan, np.nan, np.nan, np.nan]) + self.real_mean = 0 + + def test_basic(self): + assert_almost_equal(nanmean(self.A),self.real_mean) + + def test_mutation(self): + # Because of the "messing around" we do to replace NaNs with zeros + # this is meant to ensure we don't actually replace the NaNs in the + # actual _array. + a_copy = self.A.copy() + b_copy = self.B.copy() + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + a_ret = nanmean(self.A) + assert_equal(self.A, a_copy) + b_ret = nanmean(self.B) + assert_equal(self.B, b_copy) + + def test_allnans(self): + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(np.isnan(nanmean(self.B))) + assert_(w[0].category is RuntimeWarning) + + def test_empty(self): + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(np.isnan(nanmean(np.array([])))) + assert_(w[0].category is RuntimeWarning) + + +class TestNaNStdVar(TestCase): + def setUp(self): + self.A = np.array([np.nan, 1, -1, np.nan, 1, np.nan, -1]) + self.B = np.array([np.nan, np.nan, np.nan, np.nan]) + self.real_var = 1 + + def test_basic(self): + assert_almost_equal(nanvar(self.A),self.real_var) + assert_almost_equal(nanstd(self.A)**2,self.real_var) + + def test_mutation(self): + # Because of the "messing around" we do to replace NaNs with zeros + # this is meant to ensure we don't actually replace the NaNs in the + # actual array. + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + a_copy = self.A.copy() + b_copy = self.B.copy() + a_ret = nanvar(self.A) + assert_equal(self.A, a_copy) + b_ret = nanstd(self.B) + assert_equal(self.B, b_copy) + + def test_ddof1(self): + mask = ~np.isnan(self.A) + assert_almost_equal(nanvar(self.A,ddof=1), + self.real_var*sum(mask)/float(sum(mask) - 1)) + assert_almost_equal(nanstd(self.A,ddof=1)**2, + self.real_var*sum(mask)/float(sum(mask) - 1)) + + def test_ddof2(self): + mask = ~np.isnan(self.A) + assert_almost_equal(nanvar(self.A,ddof=2), + self.real_var*sum(mask)/float(sum(mask) - 2)) + assert_almost_equal(nanstd(self.A,ddof=2)**2, + self.real_var*sum(mask)/float(sum(mask) - 2)) + + def test_allnans(self): + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(np.isnan(nanvar(self.B))) + assert_(np.isnan(nanstd(self.B))) + assert_(w[0].category is RuntimeWarning) + + def test_empty(self): + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(np.isnan(nanvar(np.array([])))) + assert_(np.isnan(nanstd(np.array([])))) + assert_(w[0].category is RuntimeWarning) + + +class TestNanFunctsIntTypes(TestCase): + + int_types = ( + np.int8, np.int16, np.int32, np.int64, np.uint8, + np.uint16, np.uint32, np.uint64) + + def setUp(self, *args, **kwargs): + self.A = np.array([127, 39, 93, 87, 46]) + + def integer_arrays(self): + for dtype in self.int_types: + yield self.A.astype(dtype) + + def test_nanmin(self): + min_value = min(self.A) + for A in self.integer_arrays(): + assert_equal(nanmin(A), min_value) + + def test_nanmax(self): + max_value = max(self.A) + for A in self.integer_arrays(): + assert_equal(nanmax(A), max_value) + + def test_nanargmin(self): + min_arg = np.argmin(self.A) + for A in self.integer_arrays(): + assert_equal(nanargmin(A), min_arg) + + def test_nanargmax(self): + max_arg = np.argmax(self.A) + for A in self.integer_arrays(): + assert_equal(nanargmax(A), max_arg) + + +if __name__ == "__main__": + run_module_suite() From 997a48fda334a24d0024f12d2c76c4a7e4a9309d Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Fri, 21 Jun 2013 10:28:41 -0600 Subject: [PATCH 6/6] MAINT: Clean up core/_methods.py and core/fromnumeric.py Use issubclass instead of issubdtype. Add some blank lines. Remove trailing whitespace. Remove uneeded float casts since true_divide is default. Clean up documentation a bit. --- doc/source/reference/routines.statistics.rst | 5 +- numpy/core/_methods.py | 19 +- numpy/core/fromnumeric.py | 13 +- numpy/lib/nanfunctions.py | 501 ++++++++++++------- 4 files changed, 328 insertions(+), 210 deletions(-) diff --git a/doc/source/reference/routines.statistics.rst b/doc/source/reference/routines.statistics.rst index c420705afb53..64745ff0c582 100644 --- a/doc/source/reference/routines.statistics.rst +++ b/doc/source/reference/routines.statistics.rst @@ -23,11 +23,14 @@ Averages and variances .. autosummary:: :toctree: generated/ + median average mean - median std var + nanmean + nanstd + nanvar Correlating ----------- diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py index 51731b9c221d..935316897dde 100644 --- a/numpy/core/_methods.py +++ b/numpy/core/_methods.py @@ -7,7 +7,7 @@ from numpy.core import multiarray as mu from numpy.core import umath as um -from numpy.core.numeric import asanyarray, isnan, issubdtype +from numpy.core.numeric import asanyarray from numpy.core import numerictypes as nt def _amax(a, axis=None, out=None, keepdims=False): @@ -48,19 +48,20 @@ def _mean(a, axis=None, dtype=None, out=None, keepdims=False): arr = asanyarray(a) # Cast bool, unsigned int, and int to float64 - if dtype is None and (issubdtype(arr.dtype, nt.integer) or - issubdtype(arr.dtype, nt.bool_)): + if dtype is None and issubclass(arr.dtype.type, (nt.integer, nt.bool_)): ret = um.add.reduce(arr, axis=axis, dtype='f8', out=out, keepdims=keepdims) else: ret = um.add.reduce(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + rcount = _count_reduce_items(arr, axis) if isinstance(ret, mu.ndarray): ret = um.true_divide(ret, rcount, out=ret, casting='unsafe', subok=False) else: - ret = ret / float(rcount) + ret = ret / rcount + return ret def _var(a, axis=None, dtype=None, out=None, ddof=0, @@ -68,23 +69,23 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, arr = asanyarray(a) # First compute the mean, saving 'rcount' for reuse later - if dtype is None and (issubdtype(arr.dtype, nt.integer) or - issubdtype(arr.dtype, nt.bool_)): + if dtype is None and issubclass(arr.dtype.type, (nt.integer, nt.bool_)): arrmean = um.add.reduce(arr, axis=axis, dtype='f8', keepdims=True) else: arrmean = um.add.reduce(arr, axis=axis, dtype=dtype, keepdims=True) + rcount = _count_reduce_items(arr, axis) if isinstance(arrmean, mu.ndarray): arrmean = um.true_divide(arrmean, rcount, out=arrmean, casting='unsafe', subok=False) else: - arrmean = arrmean / float(rcount) + arrmean = arrmean / rcount # arr - arrmean x = arr - arrmean # (arr - arrmean) ** 2 - if issubdtype(arr.dtype, nt.complex_): + if issubclass(arr.dtype.type, nt.complex_): x = um.multiply(x, um.conjugate(x), out=x).real else: x = um.multiply(x, x, out=x) @@ -100,7 +101,7 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, ret = um.true_divide(ret, rcount, out=ret, casting='unsafe', subok=False) else: - ret = ret / float(rcount) + ret = ret / rcount return ret diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index c4c7d0e7dcf8..4716c81fb660 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -60,9 +60,9 @@ def take(a, indices, axis=None, out=None, mode='raise'): The source array. indices : array_like The indices of the values to extract. - + .. versionadded:: 1.8.0 - + Also allow scalars for indices. axis : int, optional The axis over which to select values. By default, the flattened @@ -2515,8 +2515,7 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=False): See Also -------- average : Weighted average - nanmean : Arithmetic mean while ignoring NaNs - var, nanvar + std, var, nanmean, nanstd, nanvar Notes ----- @@ -2603,8 +2602,7 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): See Also -------- - var, mean - nanmean, nanstd + var, mean, nanmean, nanstd, nanvar numpy.doc.ufuncs : Section "Output arguments" Notes @@ -2707,8 +2705,7 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, See Also -------- - std : Standard deviation - mean : Average + std , mean, nanmean, nanstd, nanvar numpy.doc.ufuncs : Section "Output arguments" Notes diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index f0e635791c7e..75173a45e717 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -3,6 +3,7 @@ """ from __future__ import division, absolute_import, print_function +import warnings import numpy as np __all__ = [ @@ -18,7 +19,7 @@ def _nanmean(a, axis=None, dtype=None, out=None, keepdims=False): mask = np.isnan(arr) # Cast bool, unsigned int, and int to float64 - if np.dtype is None and issubclass(arr.dtype.type, (np.integer, np.bool_)): + if dtype is None and issubclass(arr.dtype.type, (np.integer, np.bool_)): ret = np.add.reduce(arr, axis=axis, dtype='f8', out=out, keepdims=keepdims) else: @@ -89,125 +90,111 @@ def _nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): return ret +def _count_reduce_items(arr, axis): + if axis is None: + axis = tuple(range(arr.ndim)) + elif not isinstance(axis, tuple): + axis = (axis,) + items = 1 + for ax in axis: + items *= arr.shape[ax] + return items + -def _nanop(op, fill, a, axis=None): +def _replace_nan(a, val): """ - General operation on arrays with not-a-number values. + If `a` is of inexact type, make a copy of `a`, replace NaN's with + the `val` value, and return the copy together with a boolean mask + marking the locations where NaN's were present. If `a` is not of + inexact type, do nothing and return `a` together with a mask of None. Parameters ---------- - op : callable - Operation to perform. - fill : float - NaN values are set to fill before doing the operation. a : array-like Input array. - axis : {int, None}, optional - Axis along which the operation is computed. - By default the input is flattened. + val : float + NaN values are set to val before doing the operation. Returns ------- - y : {ndarray, scalar} - Processed data. + y : ndarray + If `a` is of inexact type, return a copy of `a` with the NaN's + replaced by the fill value, otherwise return `a`. + mask: {bool, None} + If `a` is of inexact type, return a boolean mask marking locations of + NaN's, otherwise return None. """ - y = np.array(a, subok=True) - - # We only need to take care of NaN's in floating point arrays - dt = y.dtype - if np.issubdtype(dt, np.integer) or np.issubdtype(dt, np.bool_): - return op(y, axis=axis) + is_new = not isinstance(a, np.ndarray) + if is_new: + a = np.array(a) + if not issubclass(a.dtype.type, np.inexact): + return a, None + if not is_new: + # need copy + a = np.array(a, subok=True) mask = np.isnan(a) - # y[mask] = fill - # We can't use fancy indexing here as it'll mess w/ MaskedArrays - # Instead, let's fill the array directly... - np.copyto(y, fill, where=mask) - res = op(y, axis=axis) - mask_all_along_axis = mask.all(axis=axis) - - # Along some axes, only nan's were encountered. As such, any values - # calculated along that axis should be set to nan. - if mask_all_along_axis.any(): - if np.isscalar(res): - res = np.nan - else: - res[mask_all_along_axis] = np.nan - - return res + np.copyto(a, val, where=mask) + return a, mask -def nansum(a, axis=None): +def _copyto(a, val, mask): """ - Return the sum of array elements over a given axis treating - Not a Numbers (NaNs) as zero. + Replace values in `a` with NaN where `mask` is True. This differs from + copyto in that it will deal with the case that `a` is a numpy scalar. Parameters ---------- - a : array_like - Array containing numbers whose sum is desired. If `a` is not an - array, a conversion is attempted. - axis : int, optional - Axis along which the sum is computed. The default is to compute - the sum of the flattened array. + a : ndarray, numpy scalar + Array or numpy scalar some of whose values are to be replaced + by val. + val : numpy scalar + Value used a replacement. + mask : ndarray, scalar + Boolean array. Where True the corresponding element of `a` is + replaced by NaN. Broadcasts. Returns ------- - y : ndarray - An array with the same shape as a, with the specified axis removed. - If a is a 0-d array, or if axis is None, a scalar is returned with - the same dtype as `a`. - - See Also - -------- - numpy.sum : Sum across array including Not a Numbers. - isnan : Shows which elements are Not a Number (NaN). - isfinite: Shows which elements are not: Not a Number, positive and - negative infinity + res : ndarray, scalar + Array with elements replaced or scalar NaN. - Notes - ----- - Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic - (IEEE 754). This means that Not a Number is not equivalent to infinity. - If positive or negative infinity are present the result is positive or - negative infinity. But if both positive and negative infinity are present, - the result is Not A Number (NaN). - Arithmetic is modular when using integer types (all elements of `a` must - be finite i.e. no elements that are NaNs, positive infinity and negative - infinity because NaNs are floating point types), and no error is raised - on overflow. + """ + if np.any(mask): + if isinstance(a, np.ndarray): + np.copyto(a, np.nan, where=mask, casting='unsafe') + else: + a = a.dtype.type(val) + return a - Examples - -------- - >>> np.nansum(1) - 1 - >>> np.nansum([1]) - 1 - >>> np.nansum([1, np.nan]) - 1.0 - >>> a = np.array([[1, 1], [1, np.nan]]) - >>> np.nansum(a) - 3.0 - >>> np.nansum(a, axis=0) - array([ 2., 1.]) +def _divide_by_count(a, b): + """ + Do true_divide of `a` by `b`, ignoring RuntimeWarnings. Does inplace + divide if `a` is an instance of ndarray. - When positive infinity and negative infinity are present + Parameters + ---------- + a, b : {ndarray, numpy scalar} + Numerator/Dividend respectively. The b variable will have + non-positive values replaced by nan. - >>> np.nansum([1, np.nan, np.inf]) - inf - >>> np.nansum([1, np.nan, np.NINF]) - -inf - >>> np.nansum([1, np.nan, np.inf, np.NINF]) - nan + Returns + ------- + ret : {ndarray, numpy scalar} + The return value is a/b. If `a` was an ndarray the division is done + inplace. If `a` is a numpy scalar, the division preserves its type. """ - return _nanop(np.sum, 0, a, axis) + _copyto(b, np.nan, b <= 0) + if isinstance(a, np.ndarray): + return np.divide(a, b, out=a, casting='unsafe', subok=False) + return np.divide(a, b, dtype=a.dtype, casting='unsafe') -def nanmin(a, axis=None): +def nanmin(a, axis=None, dtype=None, out=None, keepdims=False): """ Return the minimum of an array or minimum along an axis, ignoring any NaNs. @@ -219,6 +206,25 @@ def nanmin(a, axis=None): axis : int, optional Axis along which the minimum is computed. The default is to compute the minimum of the flattened array. + dtype : data-type code, optional + The type used to represent the intermediate results. Defaults + to the data-type of the output array if this is provided, or + the data-type of the input array if no output array is provided. + + .. versionadded:: 1.8.0 + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output, but the type will be cast if necessary. + See `doc.ufuncs` for details. + + .. versionadded:: 1.8.0 + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `a`. + + .. versionadded:: 1.8.0 Returns ------- @@ -271,11 +277,7 @@ def nanmin(a, axis=None): -inf """ - a = np.asanyarray(a) - if axis is not None: - return np.fmin.reduce(a, axis) - else: - return np.fmin.reduce(a.flat) + return np.fmin.reduce(a, axis, dtype, out, keepdims) def nanargmin(a, axis=None): @@ -311,10 +313,14 @@ def nanargmin(a, axis=None): array([1, 0]) """ - return _nanop(np.argmin, np.inf, a, axis) + a, mask = _replace_nan(a, np.inf) + ind = np.argmin(a, axis) + if mask is None: + return ind + return _copyto(ind, np.nan, np.all(mask, axis)) -def nanmax(a, axis=None): +def nanmax(a, axis=None, dtype=None, out=None, keepdims=False): """ Return the maximum of an array or maximum along an axis, ignoring any NaNs. @@ -326,6 +332,25 @@ def nanmax(a, axis=None): axis : int, optional Axis along which the maximum is computed. The default is to compute the maximum of the flattened array. + dtype : data-type code, optional + The type used to represent the intermediate results. Defaults + to the data-type of the output array if this is provided, or + the data-type of the input array if no output array is provided. + + .. versionadded:: 1.8.0 + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output, but the type will be cast if necessary. + See `doc.ufuncs` for details. + + .. versionadded:: 1.8.0 + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `a`. + + .. versionadded:: 1.8.0 Returns ------- @@ -378,11 +403,7 @@ def nanmax(a, axis=None): inf """ - a = np.asanyarray(a) - if axis is not None: - return np.fmax.reduce(a, axis) - else: - return np.fmax.reduce(a.flat) + return np.fmax.reduce(a, axis, dtype, out, keepdims) def nanargmax(a, axis=None): @@ -418,7 +439,91 @@ def nanargmax(a, axis=None): array([1, 1]) """ - return _nanop(np.argmax, -np.inf, a, axis) + a, mask = _replace_nan(a, -np.inf) + ind = np.argmax(a, axis) + if mask is None: + return ind + return _copyto(ind, np.nan, np.all(mask, axis)) + + +def nansum(a, axis=None, dtype=None, out=None, keepdims=0): + """ + Return the sum of array elements over a given axis treating + Not a Numbers (NaNs) as zero. + + Parameters + ---------- + a : array_like + Array containing numbers whose sum is desired. If `a` is not an + array, a conversion is attempted. + axis : int, optional + Axis along which the sum is computed. The default is to compute + the sum of the flattened array. + + Returns + ------- + y : ndarray + An array with the same shape as a, with the specified axis removed. + If a is a 0-d array, or if axis is None, a scalar is returned with + the same dtype as `a`. + + See Also + -------- + numpy.sum : Sum across array including Not a Numbers. + isnan : Shows which elements are Not a Number (NaN). + isfinite: Shows which elements are not: Not a Number, positive and + negative infinity + + Notes + ----- + Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic + (IEEE 754). This means that Not a Number is not equivalent to infinity. + If positive or negative infinity are present the result is positive or + negative infinity. But if both positive and negative infinity are present, + the result is Not A Number (NaN). + + Arithmetic is modular when using integer types (all elements of `a` must + be finite i.e. no elements that are NaNs, positive infinity and negative + infinity because NaNs are floating point types), and no error is raised + on overflow. + + + Examples + -------- + >>> np.nansum(1) + 1 + >>> np.nansum([1]) + 1 + >>> np.nansum([1, np.nan]) + 1.0 + >>> a = np.array([[1, 1], [1, np.nan]]) + >>> np.nansum(a) + 3.0 + >>> np.nansum(a, axis=0) + array([ 2., 1.]) + + When positive infinity and negative infinity are present + + >>> np.nansum([1, np.nan, np.inf]) + inf + >>> np.nansum([1, np.nan, np.NINF]) + -inf + >>> np.nansum([1, np.nan, np.inf, np.NINF]) + nan + + """ + a, mask = _replace_nan(a, 0) + if mask is None: + return a.sum(axis, dtype, out, keepdims) + + a = a.sum(axis, dtype, out, keepdims) + if not issubclass(a.dtype.type, np.inexact): + msg = "Input is inexact, but either `dtype` or `out` is not." + raise TypeError(msg) + + mask = mask.all(axis, keepdims=keepdims) + a = _copyto(a, np.nan, mask) + return a def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): @@ -429,6 +534,8 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): the flattened array by default, otherwise over the specified axis. `float64` intermediate and return values are used for integer inputs. + .. versionadded:: 1.8.0 + Parameters ---------- a : array_like @@ -484,15 +591,112 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): >>> np.nanmean(a, axis=1) array([ 1., 3.5]) + """ + a, mask = _replace_nan(a, 0) + if mask is None: + return np.mean(a, axis, dtype=dtype, out=out, keepdims=keepdims) + + a = np.sum(a, axis, dtype=dtype, out=out, keepdims=keepdims) + if not issubclass(a.dtype.type, np.inexact): + msg = "Input is inexact, but either `dtype` or `out` is not." + raise TypeError(msg) + + n = np.sum(~mask, axis, dtype=np.float64, keepdims=keepdims) + a = _divide_by_count(a, n) + return a + + +def nanvar(a, axis=None, dtype=None, out=None, ddof=0, + keepdims=False): + """ + Compute the variance along the specified axis, while ignoring NaNs. + + Returns the variance of the array elements, a measure of the spread of a + distribution. The variance is computed for the flattened array by + default, otherwise over the specified axis. + + .. versionadded:: 1.8.0 + + Parameters + ---------- + a : array_like + Array containing numbers whose variance is desired. If `a` is not an + array, a conversion is attempted. + axis : int, optional + Axis along which the variance is computed. The default is to compute + the variance of the flattened array. + dtype : data-type, optional + Type to use in computing the variance. For arrays of integer type + the default is `float32`; for arrays of float types it is the same as + the array type. + out : ndarray, optional + Alternate output array in which to place the result. It must have + the same shape as the expected output, but the type is cast if + necessary. + ddof : int, optional + "Delta Degrees of Freedom": the divisor used in the calculation is + ``N - ddof``, where ``N`` represents the number of elements. By + default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + Returns + ------- + variance : ndarray, see dtype parameter above + If ``out=None``, returns a new array containing the variance; + otherwise, a reference to the output array is returned. + + See Also + -------- + std : Standard deviation + mean : Average + var : Variance while not ignoring NaNs + nanstd, nanmean + numpy.doc.ufuncs : Section "Output arguments" + + Notes + ----- + The variance is the average of the squared deviations from the mean, + i.e., ``var = mean(abs(x - x.mean())**2)``. + + The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``. + If, however, `ddof` is specified, the divisor ``N - ddof`` is used + instead. In standard statistical practice, ``ddof=1`` provides an + unbiased estimator of the variance of a hypothetical infinite population. + ``ddof=0`` provides a maximum likelihood estimate of the variance for + normally distributed variables. + + Note that for complex numbers, the absolute value is taken before + squaring, so that the result is always real and nonnegative. + + For floating-point input, the variance is computed using the same + precision the input has. Depending on the input data, this can cause + the results to be inaccurate, especially for `float32` (see example + below). Specifying a higher-accuracy accumulator using the ``dtype`` + keyword can alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.var(a) + 1.5555555555555554 + >>> np.nanvar(a, axis=0) + array([ 1., 0.]) + >>> np.nanvar(a, axis=1) + array([ 0., 0.25]) + """ if not (type(a) is np.ndarray): try: - mean = a.nanmean - return mean(axis=axis, dtype=dtype, out=out) + nanvar = a.nanvar + return nanvar(axis=axis, dtype=dtype, out=out, ddof=ddof) except AttributeError: pass - return _nanmean(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + return _nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): @@ -504,6 +708,8 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): of the non-NaN array elements. The standard deviation is computed for the flattened array by default, otherwise over the specified axis. + .. versionadded:: 1.8.0 + Parameters ---------- a : array_like @@ -587,92 +793,3 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): keepdims=keepdims) -def nanvar(a, axis=None, dtype=None, out=None, ddof=0, - keepdims=False): - """ - Compute the variance along the specified axis, while ignoring NaNs. - - Returns the variance of the array elements, a measure of the spread of a - distribution. The variance is computed for the flattened array by - default, otherwise over the specified axis. - - Parameters - ---------- - a : array_like - Array containing numbers whose variance is desired. If `a` is not an - array, a conversion is attempted. - axis : int, optional - Axis along which the variance is computed. The default is to compute - the variance of the flattened array. - dtype : data-type, optional - Type to use in computing the variance. For arrays of integer type - the default is `float32`; for arrays of float types it is the same as - the array type. - out : ndarray, optional - Alternate output array in which to place the result. It must have - the same shape as the expected output, but the type is cast if - necessary. - ddof : int, optional - "Delta Degrees of Freedom": the divisor used in the calculation is - ``N - ddof``, where ``N`` represents the number of elements. By - default `ddof` is zero. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. - - Returns - ------- - variance : ndarray, see dtype parameter above - If ``out=None``, returns a new array containing the variance; - otherwise, a reference to the output array is returned. - - See Also - -------- - std : Standard deviation - mean : Average - var : Variance while not ignoring NaNs - nanstd, nanmean - numpy.doc.ufuncs : Section "Output arguments" - - Notes - ----- - The variance is the average of the squared deviations from the mean, - i.e., ``var = mean(abs(x - x.mean())**2)``. - - The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``. - If, however, `ddof` is specified, the divisor ``N - ddof`` is used - instead. In standard statistical practice, ``ddof=1`` provides an - unbiased estimator of the variance of a hypothetical infinite population. - ``ddof=0`` provides a maximum likelihood estimate of the variance for - normally distributed variables. - - Note that for complex numbers, the absolute value is taken before - squaring, so that the result is always real and nonnegative. - - For floating-point input, the variance is computed using the same - precision the input has. Depending on the input data, this can cause - the results to be inaccurate, especially for `float32` (see example - below). Specifying a higher-accuracy accumulator using the ``dtype`` - keyword can alleviate this issue. - - Examples - -------- - >>> a = np.array([[1, np.nan], [3, 4]]) - >>> np.var(a) - 1.5555555555555554 - >>> np.nanvar(a, axis=0) - array([ 1., 0.]) - >>> np.nanvar(a, axis=1) - array([ 0., 0.25]) - - """ - if not (type(a) is np.ndarray): - try: - nanvar = a.nanvar - return nanvar(axis=axis, dtype=dtype, out=out, ddof=ddof) - except AttributeError: - pass - - return _nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, - keepdims=keepdims)