8000 ENH: Add an "axis" kwarg to numpy.unique by joferkington · Pull Request #3584 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

ENH: Add an "axis" kwarg to numpy.unique #3584

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 58 additions & 6 deletions numpy/lib/arraysetops.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def ediff1d(ary, to_end=None, to_begin=None):

return ed

def unique(ar, return_index=False, return_inverse=False):
def unique(ar, return_index=False, return_inverse=False, axis=None):
"""
Find the unique elements of an array.

Expand All @@ -102,13 +102,18 @@ def unique(ar, return_index=False, return_inverse=False):
Parameters
----------
ar : array_like
Input array. This will be flattened if it is not already 1-D.
Input array. Unless `axis` is specified, this will be flattened if it
is not already 1-D.
return_index : bool, optional
If True, also return the indices of `ar` that result in the unique
array.
If True, also return the indices of `ar` along the specified axis that
result in the unique array.
return_inverse : bool, optional
If True, also return the indices of the unique array that can be used
to reconstruct `ar`.
10000 If True, also return the indices of the unique array along the
specified axis that can be used to reconstruct `ar`.
axis : int or None, optional
The axis to operate on. If None, `ar` will be flattened beforehand.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can say this clearer? Thinking about "along", or saying that all other axis are the elements. There was some discussion about other names for the argument, but I am not sure if there was any better idea. Axis seems fine to me though.

Object arrays or structured arrays that contain objects are not
supported if the `axis` kwarg is used.

Returns
-------
Expand All @@ -134,6 +139,12 @@ def unique(ar, return_index=False, return_inverse=False):
>>> np.unique(a)
array([1, 2, 3])

Return the unique rows of a 2D array

>>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
>>> np.unique(a, axis=0)
array([[1, 0, 0], [2, 3, 4]])

Return the indices of the original array that give the unique values:

>>> a = np.array(['a', 'b', 'b', 'c', 'a'])
Expand All @@ -158,6 +169,47 @@ def unique(ar, return_index=False, return_inverse=False):
>>> u[indices]
array([1, 2, 6, 4, 2, 3, 2])

"""
if axis is None:
return _unique1d(ar, return_index, return_inverse)
if abs(axis) > ar.ndim:
raise ValueError('Invalid axis kwarg specified for unique')

ar = np.swapaxes(ar, axis, 0)
orig_shape, orig_dtype = ar.shape, ar.dtype
# Must reshape to a contiguous 2D array for this to work...
ar = ar.reshape(orig_shape[0], -1)
ar = np.ascontiguousarray(ar)

if ar.dtype.char in (np.typecodes['AllInteger'] + 'S'):
# Optimization inspired by <http://stackoverflow.com/a/16973510/325565>
dtype = np.dtype((np.void, ar.dtype.itemsize * ar.shape[1]))
else:
dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if it is worth it, but if the dtype has no fields, you probably could also use [('', ar.dtype, ar.shape[1])]. (Also could write [...] * ar.shape[1] but that is style mostly)


try:
consolidated = ar.view(dtype)
except TypeError:
# There's no good way to do this for object arrays, etc...
msg = 'The axis argument to unique is not supported for dtype {dt}'
raise TypeError(msg.format(dt=ar.dtype))

def reshape_uniq(uniq):
uniq = uniq.view(orig_dtype)
uniq = uniq.reshape(-1, *orig_shape[1:])
uniq = np.swapaxes(uniq, 0, axis)
return uniq

output = _unique1d(consolidated, return_index, return_inverse)
if not (return_index or return_inverse):
return reshape_uniq(output)
else:
uniq = reshape_uniq(output[0])
return tuple([uniq] + list(output[1:]))

def _unique1d(ar, return_index=False, return_inverse=False):
"""
Find the unique elements of an array.
"""
try:
ar = ar.flatten()
Expand Down
173 changes: 120 additions & 53 deletions numpy/lib/tests/test_arraysetops.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,59 +12,6 @@

class TestSetOps(TestCase):

def test_unique(self):

def check_all(a, b, i1, i2, dt):
msg = "check values failed for type '%s'" % dt
v = unique(a)
assert_array_equal(v, b, msg)

msg = "check indexes failed for type '%s'" % dt
v, j = unique(a, 1, 0)
assert_array_equal(v, b, msg)
assert_array_equal(j, i1, msg)

msg = "check reverse indexes failed for type '%s'" % dt
v, j = unique(a, 0, 1)
assert_array_equal(v, b, msg)
assert_array_equal(j, i2, msg)

msg = "check with all indexes failed for type '%s'" % dt
v, j1, j2 = unique(a, 1, 1)
assert_array_equal(v, b, msg)
assert_array_equal(j1, i1, msg)
assert_array_equal(j2, i2, msg)

a = [5, 7, 1, 2, 1, 5, 7]*10
b = [1, 2, 5, 7]
i1 = [2, 3, 0, 1]
i2 = [2, 3, 0, 1, 0, 2, 3]*10

# test for numeric arrays
types = []
types.extend(np.typecodes['AllInteger'])
types.extend(np.typecodes['AllFloat'])
types.append('datetime64[D]')
types.append('timedelta64[D]')
for dt in types:
aa = np.array(a, dt)
bb = np.array(b, dt)
check_all(aa, bb, i1, i2, dt)

# test for object arrays
dt = 'O'
aa = np.empty(len(a), dt)
aa[:] = a
bb = np.empty(len(b), dt)
bb[:] = b
check_all(aa, bb, i1, i2, dt)

# test for structured arrays
dt = [('', 'i'), ('', 'i')]
aa = np.array(list(zip(a, a)), dt)
bb = np.array(list(zip(b, b)), dt)
check_all(aa, bb, i1, i2, dt)

def test_intersect1d(self):
# unique inputs
a = np.array([5, 7, 1, 2])
Expand Down Expand Up @@ -252,6 +199,126 @@ def test_manyways(self):
c2 = setdiff1d(aux2, aux1)
assert_array_equal(c1, c2)

class TestUnique(TestCase):

def test_1d_functionality(self):
a = [5, 7, 1, 2, 1, 5, 7]*10
b = [1, 2, 5, 7]
i1 = [2, 3, 0, 1]
i2 = [2, 3, 0, 1, 0, 2, 3]*10

# test for numeric arrays
types = []
types.extend(np.typecodes['AllInteger'])
types.extend(np.typecodes['AllFloat'])
types.append('datetime64[D]')
types.append('timedelta64[D]')
for dt in types:
aa = np.array(a, dt)
bb = np.array(b, dt)
self._run_1d_tests(aa, bb, i1, i2, dt)

# test for object arrays
dt = 'O'
aa = np.empty(len(a), dt)
aa[:] = a
bb = np.empty(len(b), dt)
bb[:] = b
self._run_1d_tests(aa, bb, i1, i2, dt)

# test for structured arrays
dt = [('', 'i'), ('', 'i')]
aa = np.array(list(zip(a, a)), dt)
bb = np.array(list(zip(b, b)), dt)
self._run_1d_tests(aa, bb, i1, i2, dt)

def _run_1d_tests(self, a, b, i1, i2, dt):
msg = "check values failed for type '%s'" % dt
v = unique(a)
assert_array_equal(v, b, msg)

msg = "check indexes failed for type '%s'" % dt
v, j = unique(a, 1, 0)
assert_array_equal(v, b, msg)
assert_array_equal(j, i1, msg)

msg = "check reverse indexes failed for type '%s'" % dt
v, j = unique(a, 0, 1)
assert_array_equal(v, b, msg)
assert_array_equal(j, i2, msg)

msg = "check with all indexes failed for type '%s'" % dt
v, j1, j2 = unique(a, 1, 1)
assert_array_equal(v, b, msg)
assert_array_equal(j1, i1, msg)
assert_array_equal(j2, i2, msg)

def test_unique_axis_errors(self):
assert_raises(TypeError, self._run_axis_tests, object)
assert_raises(TypeError, self._run_axis_tests,
[('a', int), ('b', object)])

assert_raises(ValueError, unique, np.arange(10), axis=2)
assert_raises(ValueError, unique, np.arange(10), axis=-2)

def test_unique_axis(self):
types = []
types.extend(np.typecodes['AllInteger'])
types.extend(np.typecodes['AllFloat'])
types.append('datetime64[D]')
types.append('timedelta64[D]')
types.append([('a', int), ('b', int)])
types.append([('a', int), ('b', float)])

for dtype in types:
self._run_axis_tests(dtype)

msg = 'Non-bitwise-equal booleans test failed'
data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool)
result = np.array([[False, True], [True, True]], dtype=bool)
assert_array_equal(unique(data, axis=0), result, msg)

msg = 'Negative zero equality test failed'
data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]])
result = np.array([[-0.0, 0.0]])
assert_array_equal(unique(data, axis=0), result, msg)

def _run_axis_tests(self, dtype):
data = np.array([[0, 1, 0, 0],
[1, 0, 0, 0],
[0, 1, 0, 0],
[1, 0, 0, 0]]).astype(dtype)

msg = 'Unique with 1d array and axis=0 failed'
result = np.array([0,1])
assert_array_equal(unique(data), result.astype(dtype), msg)

msg = 'Unique with 2d array and axis=0 failed'
result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]])
assert_array_equal(unique(data, axis=0), result.astype(dtype), msg)

msg = 'Unique with 2d array and axis=1 failed'
result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]])
assert_array_equal(unique(data, axis=1), result.astype(dtype), msg)

msg = 'Unique with 3d array and axis=2 failed'
data3d = np.dstack([data] * 3)
result = data3d[..., :1]
assert_array_equal(unique(data3d, axis=2), result, msg)

uniq, idx, inv = unique(data, axis=0, return_index=True,
return_inverse=True)
msg = "Unique's return_index=True failed with axis=0"
assert_array_equal(data[idx], uniq, msg)
msg = "Unique's return_inverse=True failed with axis=0"
assert_array_equal(uniq[inv], data)

uniq, idx, inv = unique(data, axis=1, return_index=True,
return_inverse=True)
msg = "Unique's return_index=True failed with axis=1"
assert_array_equal(data[:,idx], uniq)
msg = "Unique's return_inverse=True failed with axis=1"
assert_array_equal(uniq[:,inv], data)

if __name__ == "__main__":
run_module_suite()
0