8000 ENH: added axis param for np.count_nonzero by gfyoung · Pull Request #7177 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

ENH: added axis param for np.count_nonzero #7177

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
ENH: added axis param for np.count_nonzero
Closes gh-391.
  • Loading branch information
gfyoung committed Aug 5, 2016
commit 0fc9e4520b1d00b58a77f28936da2fec2672de83
24 changes: 24 additions & 0 deletions benchmarks/benchmarks/bench_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,27 @@ def time_correlate(self, size1, size2, mode):

def time_convolve(self, size1, size2, mode):
np.convolve(self.x1, self.x2, mode=mode)


class CountNonzero(Benchmark):
param_names = ['numaxes', 'size', 'dtype']
params = [
[1, 2, 3],
[100, 10000, 1000000],
[bool, int, str, object]
]

def setup(self, numaxes, size, dtype):
self.x = np.empty(shape=(
numaxes, size), dtype=dtype)

def time_count_nonzero(self, numaxes, size, dtype):
np.count_nonzero(self.x)

def time_count_nonzero_axis(self, numaxes, size, dtype):
np.count_nonzero(self.x, axis=self.x.ndim - 1)

def time_count_nonzero_multi_axis(self, numaxes, size, dtype):
if self.x.ndim >= 2:
np.count_nonzero(self.x, axis=(
self.x.ndim - 1, self.x.ndim - 2))
3 changes: 0 additions & 3 deletions benchmarks/benchmarks/bench_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,6 @@ def setup(self):
def time_nonzero(self):
np.nonzero(self.b)

def time_count_nonzero(self):
np.count_nonzero(self.b)

def time_not_bool(self):
(~self.b)

Expand Down
3 changes: 3 additions & 0 deletions doc/release/1.12.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ Generalized ``flip``
axis=1 respectively. The newly added ``flip`` function reverses the elements of
an array along any given axis.

* ``np.count_nonzero`` now has an ``axis`` parameter, allowing
non-zero counts to be generated on more than just a flattened
array object.

BLIS support in ``numpy.distutils``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
28 changes: 0 additions & 28 deletions numpy/add_newdocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,34 +942,6 @@ def luf(lamdaexpr, *args, **kwargs):

""")

add_newdoc('numpy.core.multiarray', 'count_nonzero',
"""
count_nonzero(a)

Counts the number of non-zero values in the array ``a``.

Parameters
----------
a : array_like
The array for which to count non-zeros.

Returns
-------
count : int or array of int
Number of non-zero values in the array.

See Also
--------
nonzero : Return the coordinates of all the non-zero values.
Examples
--------
>>> np.count_nonzero(np.eye(4))
4
>>> np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]])
5
""")

add_newdoc('numpy.core.multiarray', 'set_typeDict',
"""set_typeDict(dict)

Expand Down
89 changes: 87 additions & 2 deletions numpy/core/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import sys
import warnings

import numpy as np
from . import multiarray
from .multiarray import (
_fastCopyAndTranspose as fastCopyAndTranspose, ALLOW_THREADS,
Expand Down Expand Up @@ -376,6 +377,89 @@ def extend_all(module):
__all__.append(a)


def count_nonzero(a, axis=None):
"""
Counts the number of non-zero values in the array ``a``.

The word "non-zero" is in reference to the Python 2.x
built-in method ``__nonzero__()`` (renamed ``__bool__()``
in Python 3.x) of Python objects that tests an object's
"truthfulness". For example, any number is considered
truthful if it is nonzero, whereas any string is considered
truthful if it is not the empty string. Thus, this function
(recursively) counts how many elements in ``a`` (and in
sub-arrays thereof) have their ``__nonzero__()`` or ``__bool__()``
method evaluated to ``True``.

Parameters
----------
a : array_like
The array for which to count non-zeros.
axis : int or tuple, optional
Axis or tuple of axes along which to count non-zeros.
Default is None, meaning that non-zeros will be counted
along a flattened version of ``a``.

.. versionadded:: 1.12.0

Returns
-------
count : int or array of int
Number of non-zero values in the array along a given axis.
Otherwise, the total number of non-zero values in the array
is returned.

See Also
--------
nonzero : Return the coordinates of all the non-zero values.

Examples
--------
>>> np.count_nonzero(np.eye(4))
4
>>> np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]])
5
>>> np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]], axis=0)
array([1, 1, 1, 1, 1])
>>> np.count_nonzero([[0,1,7,0,0],[3,0,0,2,19]], axis=1)
array([2, 3])

"""
if axis is None or axis == ():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd consider this a bug, described in #9728.

return multiarray.count_nonzero(a)

a = asanyarray(a)

if a.dtype == bool:
return a.sum(axis=axis, dtype=np.intp)

if issubdtype(a.dtype, np.number):
return (a != 0).sum(axis=axis, dtype=np.intp)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This allocates a new boolean array of the same shape as the original. I thought the whole point was to avoid doing that...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@madphysicist: When was that the whole point?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought wrong apparently. It just seems a bit hacky to do that with a function that is implemented in C exactly to avoid such an operation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hacky, a bit, but it does get the job done without too much sadness.


if (issubdtype(a.dtype, np.string_) or
issubdtype(a.dtype, np.unicode_)):
nullstr = a.dtype.type('')
return (a != nullstr).sum(axis=axis, dtype=np.intp)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This allocates as well...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For future reference: see conversation above


axis = asarray(_validate_axis(axis, a.ndim, 'axis'))
counts = np.apply_along_axis(multiarray.count_nonzero, axis[0], a)

if axis.size == 1:
return counts
else:
# for subsequent axis numbers, that number decreases
# by one in this new 'counts' array if it was larger
# than the first axis upon which 'count_nonzero' was
# applied but remains unchanged if that number was
# smaller than that first axis
#
# this trick enables us to perform counts on object-like
# elements across multiple axes very quickly because integer
# addition is very well optimized
return counts.sum(axis=tuple(axis[1:] - (
axis[1:] > axis[0])), dtype=np.intp)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if this is what you want, but have you considered just applying np.lib.function_base._ureduce on top of np.apply_along_axis above instead of doing sums?

Copy link
Contributor Author
@gfyoung gfyoung Jul 8, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have, and unfortunately, it doesn't quite do what I want (i.e. I get test failures). My other objection is that it would make things difficult for expansion (e.g. add an out or keepdims arg, which is what @juliantaylor had proposed should be added later).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That may be the way to go now that I think about it. Perhaps writing this as a ufunc will be easier since the existing infrastructure will provide all the axis, out and keepdims parameters.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although on second thought a reduction function may not be appropriate as a ufunc...

Copy link
Contributor Author
@gfyoung gfyoung Jul 8, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly, though additional ufunc behaviour is best saved for another PR 😄



def asarray(a, dtype=None, order=None):
"""Convert the input to an array.

Expand Down Expand Up @@ -891,7 +975,7 @@ def correlate(a, v, mode='valid'):
return multiarray.correlate2(a, v, mode)


def convolve(a,v,mode='full'):
def convolve(a, v, mode='full'):
"""
Returns the discrete, linear convolution of two one-dimensional sequences.

Expand Down Expand Up @@ -1752,7 +1836,7 @@ def cross(a, b, axisa=-1, axisb=-1, axisc=-1, axis=None):
return rollaxis(cp, -1, axisc)


#Use numarray's printing function
# Use numarray's printing function
from .arrayprint import array2string, get_printoptions, set_printoptions


Expand Down Expand Up @@ -2283,6 +2367,7 @@ def load(file):
# These are all essentially abbreviations
# These might wind up in a special abbreviations module


def _maketup(descr, val):
dt = dtype(descr)
# Place val in all scalar tuples:
Expand Down
8 changes: 1 addition & 7 deletions numpy/core/src/multiarray/multiarraymodule.c
F987
Original file line number Diff line number Diff line change
Expand Up @@ -1980,16 +1980,10 @@ array_zeros(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
static PyObject *
array_count_nonzero(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
{
PyObject *array_in;
PyArrayObject *array;
npy_intp count;

if (!PyArg_ParseTuple(args, "O", &array_in)) {
return NULL;
}

array = (PyArrayObject *)PyArray_FromAny(array_in, NULL, 0, 0, 0, NULL);
if (array == NULL) {
if (!PyArg_ParseTuple(args, "O&", PyArray_Converter, &array)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If only you could add an additional axis parameter here...

Copy link
Contributor Author
@gfyoung gfyoung Jul 7, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah...much, much harder said than done. :(

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah. I'm beginning to see that. I will keep trying though. Even if I succeed eventually, your solution should probably accepted since it provides the correct functionality. A C drop-in replacement implementation should not change the API you propose.

return NULL;
}

Expand Down
112 changes: 110 additions & 2 deletions numpy/core/tests/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ def test_compress(self):
out = np.compress([0, 1], arr, axis=0)
assert_equal(out, tgt)

def test_count_nonzero(self):
arr = [[0, 1, 7, 0, 0],
[3, 0, 0, 2, 19]]
tgt = np.array([2, 3])
out = np.count_nonzero(arr, axis=1)
assert_equal(out, tgt)

def test_cumproduct(self):
A = [[1, 2, 3], [4, 5, 6]]
assert_(np.all(np.cumproduct(A) == np.array([1, 2, 6, 24, 120, 720])))
Expand Down Expand Up @@ -991,9 +998,110 @@ class C(np.ndarray):
assert_(type(nzx_i) is np.ndarray)
assert_(nzx_i.flags.writeable)

# Tests that the array method
# call works
def test_count_nonzero_axis(self):
# Basic check of functionality
m = np.array([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]])

expected = np.array([1, 1, 1, 1, 1])
assert_equal(np.count_nonzero(m, axis=0), expected)

expected = np.array([2, 3])
assert_equal(np.count_nonzero(m, axis=1), expected)

assert_raises(ValueError, np.count_nonzero, m, axis=(1, 1))
assert_raises(TypeError, np.count_nonzero, m, axis='foo')
assert_raises(ValueError, np.count_nonzero, m, axis=3)
assert_raises(TypeError, np.count_nonzero,
m, axis=np.array([[1], [2]]))

def test_count_nonzero_axis_all_dtypes(self):
# More thorough test that the axis argument is respected
# for all dtypes and responds correctly when presented with
# either integer or tuple arguments for axis
msg = "Mismatch for dtype: %s"

for dt in np.typecodes['All']:
err_msg = msg % (np.dtype(dt).name,)

if dt != 'V':
if dt != 'M':
m = np.zeros((3, 3), dtype=dt)
n = np.ones(1, dtype=dt)

m[0, 0] = n[0]
m[1, 0] = n[0]

else: # np.zeros doesn't work for np.datetime64
m = np.array(['1970-01-01'] * 9)
m = m.reshape((3, 3))

m[0, 0] = '1970-01-12'
m[1, 0] = '1970-01-12'
m = m.astype(dt)

expected = np.array([2, 0, 0])
assert_equal(np.count_nonzero(m, axis=0),
expected, err_msg=err_msg)

expected = np.array([1, 1, 0])
assert_equal(np.count_nonzero(m, axis=1),
expected, err_msg=err_msg)

expected = np.array(2)
assert_equal(np.count_nonzero(m, axis=(0, 1)),
expected, err_msg=err_msg)
assert_equal(np.count_nonzero(m, axis=None),
expected, err_msg=err_msg)
assert_equal(np.count_nonzero(m),
expected, err_msg=err_msg)

if dt == 'V':
# There are no 'nonzero' objects for np.void, so the testing
# setup is slightly different for this dtype
m = np.array([np.void(1)] * 6).reshape((2, 3))

expected = np.array([0, 0, 0])
assert_equal(np.count_nonzero(m, axis=0),
expected, err_msg=err_msg)

expected = np.array([0, 0])
assert_equal(np.count_nonzero(m, axis=1),
expected, err_msg=err_msg)

expected = np.array(0)
assert_equal(np.count_nonzero(m, axis=(0, 1)),
expected, err_msg=err_msg)
assert_equal(np.count_nonzero(m, axis=None),
expected, err_msg=err_msg)
assert_equal(np.count_nonzero(m),
expected, err_msg=err_msg)

def test_count_nonzero_axis_consistent(self):
# Check that the axis behaviour for valid axes in
# non-special cases is consistent (and therefore
# correct) by checking it against an integer array
# that is then casted to the generic object dtype
from itertools import combinations, permutations

axis = (0, 1, 2, 3)
size = (5, 5, 5, 5)
msg = "Mismatch for axis: %s"

rng = np.random.RandomState(1234)
m = rng.randint(-100, 100, size=size)
n = m.astype(np.object)

for length in range(len(axis)):
for combo in combinations(axis, length):
for perm in permutations(combo):
assert_equal(
np.count_nonzero(m, axis=perm),
np.count_nonzero(n, axis=perm),
err_msg=msg % (perm,))

def test_array_method(self):
# Tests that the array method
# call to nonzero works
m = np.array([[1, 0, 0], [4, 0, 6]])
tgt = [[0, 1, 1], [0, 0, 2]]

Expand Down
0