ENH: added axis param for np.count_nonzero

Closes gh-391.
numpy · shoyer · Aug 5, 2016 · Jan 29, 2016 · Aug 5, 2016 · 0fc9e4520b1d00b58a77f28936da2fec2672de83
commit 0fc9e4520b1d00b58a77f28936da2fec2672de83
diff --git a/benchmarks/benchmarks/bench_core.py b/benchmarks/benchmarks/bench_core.py
@@ -106,3 +106,27 @@ def time_correlate(self, size1, size2, mode):
 
     def time_convolve(self, size1, size2, mode):
         np.convolve(self.x1, self.x2, mode=mode)
+
+
+class CountNonzero(Benchmark):
+    param_names = ['numaxes', 'size', 'dtype']
+    params = [
+        [1, 2, 3],
+        [100, 10000, 1000000],
+        [bool, int, str, object]
+    ]
+
+    def setup(self, numaxes, size, dtype):
+        self.x = np.empty(shape=(
+            numaxes, size), dtype=dtype)
+
+    def time_count_nonzero(self, numaxes, size, dtype):
+        np.count_nonzero(self.x)
+
+    def time_count_nonzero_axis(self, numaxes, size, dtype):
+        np.count_nonzero(self.x, axis=self.x.ndim - 1)
+
+    def time_count_nonzero_multi_axis(self, numaxes, size, dtype):
+        if self.x.ndim >= 2:
+            np.count_nonzero(self.x, axis=(
+                self.x.ndim - 1, self.x.ndim - 2))
diff --git a/benchmarks/benchmarks/bench_ufunc.py b/benchmarks/benchmarks/bench_ufunc.py
@@ -67,9 +67,6 @@ def setup(self):
     def time_nonzero(self):
         np.nonzero(self.b)
 
-    def time_count_nonzero(self):
-        np.count_nonzero(self.b)
-
     def time_not_bool(self):
         (~self.b)
 

diff --git a/doc/release/1.12.0-notes.rst b/doc/release/1.12.0-notes.rst
@@ -138,6 +138,9 @@ Generalized ``flip``
 axis=1 respectively. The newly added ``flip`` function reverses the elements of
 an array along any given axis.
 
+* ``np.count_nonzero`` now has an ``axis`` parameter, allowing
+  non-zero counts to be generated on more than just a flattened
+  array object.
 
 BLIS support in ``numpy.distutils``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py
@@ -942,34 +942,6 @@ def luf(lamdaexpr, *args, **kwargs):
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'count_nonzero',
-    """
-    count_nonzero(a)
-
-    Counts the number of non-zero values in the array ``a``.
-
-    Parameters
-    ----------
-    a : array_like
-        The array for which to count non-zeros.
-
-    Returns
-    -------
-    count : int or array of int
-        Number of non-zero values in the array.
-
-    See Also
-    --------
-    nonzero : Return the coordinates of all the non-zero values.
-
-    Examples
-    --------
-    >>> np.count_nonzero(np.eye(4))
-    4
-    >>> np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]])
-    5
-    """)
-
 add_newdoc('numpy.core.multiarray', 'set_typeDict',
     """set_typeDict(dict)
 

diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
@@ -6,6 +6,7 @@
 import sys
 import warnings
 
+import numpy as np
 from . import multiarray
 from .multiarray import (
     _fastCopyAndTranspose as fastCopyAndTranspose, ALLOW_THREADS,
@@ -376,6 +377,89 @@ def extend_all(module):
             __all__.append(a)

 
+def count_nonzero(a, axis=None):
+    """
+    Counts the number of non-zero values in the array ``a``.
+
+    The word "non-zero" is in reference to the Python 2.x
+    built-in method ``__nonzero__()`` (renamed ``__bool__()``
+    in Python 3.x) of Python objects that tests an object's
+    "truthfulness". For example, any number is considered
+    truthful if it is nonzero, whereas any string is considered
+    truthful if it is not the empty string. Thus, this function
+    (recursively) counts how many elements in ``a`` (and in
+    sub-arrays thereof) have their ``__nonzero__()`` or ``__bool__()``
+    method evaluated to ``True``.
+
+    Parameters
+    ----------
+    a : array_like
+        The array for which to count non-zeros.
+    axis : int or tuple, optional
+        Axis or tuple of axes along which to count non-zeros.
+        Default is None, meaning that non-zeros will be counted
+        along a flattened version of ``a``.
+
+        .. versionadded:: 1.12.0
+
+    Returns
+    -------
+    count : int or array of int
+        Number of non-zero values in the array along a given axis.
+        Otherwise, the total number of non-zero values in the array
+        is returned.
+
+    See Also
+    --------
+    nonzero : Return the coordinates of all the non-zero values.
+
+    Examples
+    --------
+    >>> np.count_nonzero(np.eye(4))
+    4
+    >>> np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]])
+    5
+    >>> np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]], axis=0)
+    array([1, 1, 1, 1, 1])
+    >>> np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]], axis=1)
+    array([2, 3])
+
+    """
+    if axis is None or axis == ():
+        return multiarray.count_nonzero(a)
+
+    a = asanyarray(a)
+
+    if a.dtype == bool:
+        return a.sum(axis=axis, dtype=np.intp)
+
+    if issubdtype(a.dtype, np.number):
+        return (a != 0).sum(axis=axis, dtype=np.intp)
+
+    if (issubdtype(a.dtype, np.string_) or
+            issubdtype(a.dtype, np.unicode_)):
+        nullstr = a.dtype.type('')
+        return (a != nullstr).sum(axis=axis, dtype=np.intp)
+
+    axis = asarray(_validate_axis(axis, a.ndim, 'axis'))
+    counts = np.apply_along_axis(multiarray.count_nonzero, axis[0], a)
+
+    if axis.size == 1:
+        return counts
+    else:
+        # for subsequent axis numbers, that number decreases
+        # by one in this new 'counts' array if it was larger
+        # than the first axis upon which 'count_nonzero' was
+        # applied but remains unchanged if that number was
+        # smaller than that first axis
+        #
+        # this trick enables us to perform counts on object-like
+        # elements across multiple axes very quickly because integer
+        # addition is very well optimized
+        return counts.sum(axis=tuple(axis[1:] - (
+            axis[1:] > axis[0])), dtype=np.intp)
+
+
 def asarray(a, dtype=None, order=None):
     """Convert the input to an array.
 
@@ -891,7 +975,7 @@ def correlate(a, v, mode='valid'):
     return multiarray.correlate2(a, v, mode)
 
 
-def convolve(a,v,mode='full'):
+def convolve(a, v, mode='full'):
     """
     Returns the discrete, linear convolution of two one-dimensional sequences.
 
@@ -1752,7 +1836,7 @@ def cross(a, b, axisa=-1, axisb=-1, axisc=-1, axis=None):
     return rollaxis(cp, -1, axisc)
 
 
-#Use numarray's printing function
+# Use numarray's printing function
 from .arrayprint import array2string, get_printoptions, set_printoptions
 
 
@@ -2283,6 +2367,7 @@ def load(file):
 # These are all essentially abbreviations
 # These might wind up in a special abbreviations module
 
+
 def _maketup(descr, val):
     dt = dtype(descr)
     # Place val in all scalar tuples:

diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
@@ -1980,16 +1980,10 @@ array_zeros(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
 static PyObject *
 array_count_nonzero(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
 {
-    PyObject *array_in;
     PyArrayObject *array;
     npy_intp count;
 
-    if (!PyArg_ParseTuple(args, "O", &array_in)) {
-        return NULL;
-    }
-
-    array = (PyArrayObject *)PyArray_FromAny(array_in, NULL, 0, 0, 0, NULL);
-    if (array == NULL) {
+    if (!PyArg_ParseTuple(args, "O&", PyArray_Converter, &array)) {
         return NULL;
     }
 

diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
@@ -67,6 +67,13 @@ def test_compress(self):
         out = np.compress([0, 1], arr, axis=0)
         assert_equal(out, tgt)
 
+    def test_count_nonzero(self):
+        arr = [[0, 1, 7, 0, 0],
+               [3, 0, 0, 2, 19]]
+        tgt = np.array([2, 3])
+        out = np.count_nonzero(arr, axis=1)
+        assert_equal(out, tgt)
+
     def test_cumproduct(self):
         A = [[1, 2, 3], [4, 5, 6]]
         assert_(np.all(np.cumproduct(A) == np.array([1, 2, 6, 24, 120, 720])))
@@ -991,9 +998,110 @@ class C(np.ndarray):
                         assert_(type(nzx_i) is np.ndarray)
                         assert_(nzx_i.flags.writeable)
 
-    # Tests that the array method
-    # call works
+    def test_count_nonzero_axis(self):
+        # Basic check of functionality
+        m = np.array([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]])
+
+        expected = np.array([1, 1, 1, 1, 1])
+        assert_equal(np.count_nonzero(m, axis=0), expected)
+
+        expected = np.array([2, 3])
+        assert_equal(np.count_nonzero(m, axis=1), expected)
+
+        assert_raises(ValueError, np.count_nonzero, m, axis=(1, 1))
+        assert_raises(TypeError, np.count_nonzero, m, axis='foo')
+        assert_raises(ValueError, np.count_nonzero, m, axis=3)
+        assert_raises(TypeError, np.count_nonzero,
+                      m, axis=np.array([[1], [2]]))
+
+    def test_count_nonzero_axis_all_dtypes(self):
+        # More thorough test that the axis argument is respected
+        # for all dtypes and responds correctly when presented with
+        # either integer or tuple arguments for axis
+        msg = "Mismatch for dtype: %s"
+
+        for dt in np.typecodes['All']:
+            err_msg = msg % (np.dtype(dt).name,)
+
+            if dt != 'V':
+                if dt != 'M':
+                    m = np.zeros((3, 3), dtype=dt)
+                    n = np.ones(1, dtype=dt)
+
+                    m[0, 0] = n[0]
+                    m[1, 0] = n[0]
+
+                else:  # np.zeros doesn't work for np.datetime64
+                    m = np.array(['1970-01-01'] * 9)
+                    m = m.reshape((3, 3))
+
+                    m[0, 0] = '1970-01-12'
+                    m[1, 0] = '1970-01-12'
+                    m = m.astype(dt)
+
+                expected = np.array([2, 0, 0])
+                assert_equal(np.count_nonzero(m, axis=0),
+                             expected, err_msg=err_msg)
+
+                expected = np.array([1, 1, 0])
+                assert_equal(np.count_nonzero(m, axis=1),
+                             expected, err_msg=err_msg)
+
+                expected = np.array(2)
+                assert_equal(np.count_nonzero(m, axis=(0, 1)),
+                             expected, err_msg=err_msg)
+                assert_equal(np.count_nonzero(m, axis=None),
+                             expected, err_msg=err_msg)
+                assert_equal(np.count_nonzero(m),
+                             expected, err_msg=err_msg)
+
+            if dt == 'V':
+                # There are no 'nonzero' objects for np.void, so the testing
+                # setup is slightly different for this dtype
+                m = np.array([np.void(1)] * 6).reshape((2, 3))
+
+                expected = np.array([0, 0, 0])
+                assert_equal(np.count_nonzero(m, axis=0),
+                             expected, err_msg=err_msg)
+
+                expected = np.array([0, 0])
+                assert_equal(np.count_nonzero(m, axis=1),
+                             expected, err_msg=err_msg)
+
+                expected = np.array(0)
+                assert_equal(np.count_nonzero(m, axis=(0, 1)),
+                             expected, err_msg=err_msg)
+                assert_equal(np.count_nonzero(m, axis=None),
+                             expected, err_msg=err_msg)
+                assert_equal(np.count_nonzero(m),
+                             expected, err_msg=err_msg)
+
+    def test_count_nonzero_axis_consistent(self):
+        # Check that the axis behaviour for valid axes in
+        # non-special cases is consistent (and therefore
+        # correct) by checking it against an integer array
+        # that is then casted to the generic object dtype
+        from itertools import combinations, permutations
+
+        axis = (0, 1, 2, 3)
+        size = (5, 5, 5, 5)
+        msg = "Mismatch for axis: %s"
+
+        rng = np.random.RandomState(1234)
+        m = rng.randint(-100, 100, size=size)
+        n = m.astype(np.object)
+
+        for length in range(len(axis)):
+            for combo in combinations(axis, length):
+                for perm in permutations(combo):
+                    assert_equal(
+                        np.count_nonzero(m, axis=perm),
+                        np.count_nonzero(n, axis=perm),
+                        err_msg=msg % (perm,))
+
     def test_array_method(self):
+        # Tests that the array method
+        # call to nonzero works
         m = np.array([[1, 0, 0], [4, 0, 6]])
         tgt = [[0, 1, 1], [0, 0, 2]]