10000 ENH: Add an "axis" kwarg to numpy.unique by joferkington · Pull Request #3584 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

ENH: Add an "axis" kwarg to numpy.unique #3584

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
ENH: Add an "axis" kwarg to numpy.unique
  • Loading branch information
joferkington committed Sep 9, 2013
commit 4fcf6a8412de167a085e3cfa48d457bd76db45a1
62 changes: 56 additions & 6 deletions numpy/lib/arraysetops.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def ediff1d(ary, to_end=None, to_begin=None):

return ed

def unique(ar, return_index=False, return_inverse=False):
def unique(ar, return_index=False, return_inverse=False, axis=None):
"""
Find the unique elements of an array.

Expand All @@ -102,13 +102,18 @@ def unique(ar, return_index=False, return_inverse=False):
Parameters
----------
ar : array_like
Input array. This will be flattened if it is not already 1-D.
Input array. Unless `axis` is specified, this will be flattened if it
is not already 1-D.
return_index : bool, optional
If True, also return the indices of `ar` that result in the unique
array.
If True, also return the indices of `ar` along the specified axis that
result in the unique array.
return_inverse : bool, optional
If True, also return the indices of the unique array that can be used
to reconstruct `ar`.
If True, also return the indices of the unique array along the
specified axis that can be used to reconstruct `ar`.
axis : int or None, optional
The axis to operate on. If None, `ar` will be flattened beforehand.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can say this clearer? Thinking about "along", or saying that all other axis are the elements. There was some discussion about other names for the argument, but I am not sure if there was any better idea. Axis seems fine to me though.

Object arrays or structured arrays that contain objects are not
supported if the `axis` kwarg is used.

Returns
-------
Expand All @@ -134,6 +139,12 @@ def unique(ar, return_index=False, return_inverse=False):
>>> np.unique(a)
array([1, 2, 3])

Return the unique rows of a 2D array

>>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
>>> np.unique(a, axis=0)
array([[1, 1, 0], [2, 3, 4]])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems wrong copy paste, should be [1, 0, 0]


Return the indices of the original array that give the unique values:

>>> a = np.array(['a', 'b', 'b', 'c', 'a'])
Expand All @@ -158,6 +169,45 @@ def unique(ar, return_index=False, return_inverse=False):
>>> u[indices]
array([1, 2, 6, 4, 2, 3, 2])

"""
if axis is None or ar.ndim == 1:
return _unique1d(ar, return_index, return_inverse)

ar = np.swapaxes(ar, axis, 0)
orig_shape, orig_dtype = ar.shape, ar.dtype
# Must reshape to a contiguous 2D array for this to work...
ar = ar.reshape(orig_shape[0], -1)
ar = np.ascontiguousarray(ar)

if ar.dtype.char in (np.typecodes['AllInteger'] + 'S'):
# Optimization inspired by <http://stackoverflow.com/a/16973510/325565>
dtype = np.dtype((np.void, ar.dtype.itemsize * ar.shape[1]))
else:
dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if it is worth it, but if the dtype has no fields, you probably could also use [('', ar.dtype, ar.shape[1])]. (Also could write [...] * ar.shape[1] but that is style mostly)


try:
consolidated = ar.view(dtype)
except TypeError:
# There's no good way to do this for object arrays, etc...
msg = 'The axis argument to unique is not supported for dtype {dt}'
raise TypeError(msg.format(dt=ar.dtype))

def reshape_uniq(uniq):
uniq = uniq.view(orig_dtype)
uniq = uniq.reshape(-1, *orig_shape[1:])
uniq = np.swapaxes(uniq, 0, axis)
return uniq

output = _unique1d(consolidated, return_index, return_inverse)
if not (return_index or return_inverse):
return reshape_uniq(output)
else:
uniq = reshape_uniq(output[0])
return tuple([uniq] + list(output[1:]))

def _unique1d(ar, return_index=False, return_inverse=False):
"""
Find the unique elements of an array.
"""
try:
ar = ar.flatten()
Expand Down
62 changes: 62 additions & 0 deletions numpy/lib/tests/test_arraysetops.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,68 @@ def check_all(a, b, i1, i2, dt):
bb = np.array(list(zip(b, b)), dt)
check_all(aa, bb, i1, i2, dt)

def test_unique_axis(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe make this a class? TestUnique. move the other unique tests into it, and then you can if you like split the error checks into their own function (since run_axis_tests is part of the class). I would like some short tests for invalid axes, too. (actually, the last point doesn't matter much, I suppose, so whatever you like)

def run_axis_tests(dtype):
data = np.array([[0, 1, 0, 0],
[1, 0, 0, 0],
[0, 1, 0, 0],
[1, 0, 0, 0]]).astype(dtype)

msg = 'Unique with 1d array and axis=0 failed'
result = np.array([0,1])
assert_array_equal(unique(data), result.astype(dtype), msg)

msg = 'Unique with 2d array and axis=0 failed'
result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]])
assert_array_equal(unique(data, axis=0), result.astype(dtype), msg)

msg = 'Unique with 2d array and axis=1 failed'
result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]])
assert_array_equal(unique(data, axis=1), result.astype(dtype), msg)

msg = 'Unique with 3d array and axis=2 failed'
data3d = np.dstack([data] * 3)
result = data3d[..., :1]
assert_array_equal(unique(data3d, axis=2), result, msg)

uniq, idx, inv = unique(data, axis=0, return_index=True,
return_inverse=True)
msg = "Unique's return_index=True failed with axis=0"
assert_array_equal(data[idx], uniq, msg)
msg = "Unique's return_inverse=True failed with axis=0"
assert_array_equal(uniq[inv], data)

uniq, idx, inv = unique(data, axis=1, return_index=True,
return_inverse=True)
msg = "Unique's return_index=True failed with axis=1"
assert_array_equal(data[:,idx], uniq)
msg = "Unique's return_inverse=True failed with axis=1"
assert_array_equal(uniq[:,inv], data)

types = []
types.extend(np.typecodes['AllInteger'])
types.extend(np.typecodes['AllFloat'])
types.append('datetime64[D]')
types.append('timedelta64[D]')
types.append([('a', int), ('b', int)])
types.append([('a', int), ('b', float)])

for dtype in types:
run_axis_tests(dtype)

assert_raises(TypeError, run_axis_tests, object)
assert_raises(TypeError, run_axis_tests, [('a', int), ('b', object)])

msg = 'Non-bitwise-equal booleans test failed'
data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool)
result = np.array([[False, True], [True, True]], dtype=bool)
assert_array_equal(unique(data, axis=0), result, msg)

msg = 'Negative zero equality test failed'
data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]])
result = np.array([[-0.0, 0.0]])
assert_array_equal(unique(data, axis=0), result, msg)

def test_intersect1d(self):
# unique inputs
a = np.array([5, 7, 1, 2])
Expand Down
0