From 4fcf6a8412de167a085e3cfa48d457bd76db45a1 Mon Sep 17 00:00:00 2001 From: Joe Kington Date: Tue, 6 Aug 2013 22:10:41 -0500 Subject: [PATCH 1/5] ENH: Add an "axis" kwarg to `numpy.unique` --- numpy/lib/arraysetops.py | 62 ++++++++++++++++++++++++++--- numpy/lib/tests/test_arraysetops.py | 62 +++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 6 deletions(-) diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 5cd535703690..e65b6e064e23 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -90,7 +90,7 @@ def ediff1d(ary, to_end=None, to_begin=None): return ed -def unique(ar, return_index=False, return_inverse=False): +def unique(ar, return_index=False, return_inverse=False, axis=None): """ Find the unique elements of an array. @@ -102,13 +102,18 @@ def unique(ar, return_index=False, return_inverse=False): Parameters ---------- ar : array_like - Input array. This will be flattened if it is not already 1-D. + Input array. Unless `axis` is specified, this will be flattened if it + is not already 1-D. return_index : bool, optional - If True, also return the indices of `ar` that result in the unique - array. + If True, also return the indices of `ar` along the specified axis that + result in the unique array. return_inverse : bool, optional - If True, also return the indices of the unique array that can be used - to reconstruct `ar`. + If True, also return the indices of the unique array along the + specified axis that can be used to reconstruct `ar`. + axis : int or None, optional + The axis to operate on. If None, `ar` will be flattened beforehand. + Object arrays or structured arrays that contain objects are not + supported if the `axis` kwarg is used. Returns ------- @@ -134,6 +139,12 @@ def unique(ar, return_index=False, return_inverse=False): >>> np.unique(a) array([1, 2, 3]) + Return the unique rows of a 2D array + + >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]]) + >>> np.unique(a, axis=0) + array([[1, 1, 0], [2, 3, 4]]) + Return the indices of the original array that give the unique values: >>> a = np.array(['a', 'b', 'b', 'c', 'a']) @@ -158,6 +169,45 @@ def unique(ar, return_index=False, return_inverse=False): >>> u[indices] array([1, 2, 6, 4, 2, 3, 2]) + """ + if axis is None or ar.ndim == 1: + return _unique1d(ar, return_index, return_inverse) + + ar = np.swapaxes(ar, axis, 0) + orig_shape, orig_dtype = ar.shape, ar.dtype + # Must reshape to a contiguous 2D array for this to work... + ar = ar.reshape(orig_shape[0], -1) + ar = np.ascontiguousarray(ar) + + if ar.dtype.char in (np.typecodes['AllInteger'] + 'S'): + # Optimization inspired by + dtype = np.dtype((np.void, ar.dtype.itemsize * ar.shape[1])) + else: + dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])] + + try: + consolidated = ar.view(dtype) + except TypeError: + # There's no good way to do this for object arrays, etc... + msg = 'The axis argument to unique is not supported for dtype {dt}' + raise TypeError(msg.format(dt=ar.dtype)) + + def reshape_uniq(uniq): + uniq = uniq.view(orig_dtype) + uniq = uniq.reshape(-1, *orig_shape[1:]) + uniq = np.swapaxes(uniq, 0, axis) + return uniq + + output = _unique1d(consolidated, return_index, return_inverse) + if not (return_index or return_inverse): + return reshape_uniq(output) + else: + uniq = reshape_uniq(output[0]) + return tuple([uniq] + list(output[1:])) + +def _unique1d(ar, return_index=False, return_inverse=False): + """ + Find the unique elements of an array. """ try: ar = ar.flatten() diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index 5934ca05a08d..360cbe828eed 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -65,6 +65,68 @@ def check_all(a, b, i1, i2, dt): bb = np.array(list(zip(b, b)), dt) check_all(aa, bb, i1, i2, dt) + def test_unique_axis(self): + def run_axis_tests(dtype): + data = np.array([[0, 1, 0, 0], + [1, 0, 0, 0], + [0, 1, 0, 0], + [1, 0, 0, 0]]).astype(dtype) + + msg = 'Unique with 1d array and axis=0 failed' + result = np.array([0,1]) + assert_array_equal(unique(data), result.astype(dtype), msg) + + msg = 'Unique with 2d array and axis=0 failed' + result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]]) + assert_array_equal(unique(data, axis=0), result.astype(dtype), msg) + + msg = 'Unique with 2d array and axis=1 failed' + result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]]) + assert_array_equal(unique(data, axis=1), result.astype(dtype), msg) + + msg = 'Unique with 3d array and axis=2 failed' + data3d = np.dstack([data] * 3) + result = data3d[..., :1] + assert_array_equal(unique(data3d, axis=2), result, msg) + + uniq, idx, inv = unique(data, axis=0, return_index=True, + return_inverse=True) + msg = "Unique's return_index=True failed with axis=0" + assert_array_equal(data[idx], uniq, msg) + msg = "Unique's return_inverse=True failed with axis=0" + assert_array_equal(uniq[inv], data) + + uniq, idx, inv = unique(data, axis=1, return_index=True, + return_inverse=True) + msg = "Unique's return_index=True failed with axis=1" + assert_array_equal(data[:,idx], uniq) + msg = "Unique's return_inverse=True failed with axis=1" + assert_array_equal(uniq[:,inv], data) + + types = [] + types.extend(np.typecodes['AllInteger']) + types.extend(np.typecodes['AllFloat']) + types.append('datetime64[D]') + types.append('timedelta64[D]') + types.append([('a', int), ('b', int)]) + types.append([('a', int), ('b', float)]) + + for dtype in types: + run_axis_tests(dtype) + + assert_raises(TypeError, run_axis_tests, object) + assert_raises(TypeError, run_axis_tests, [('a', int), ('b', object)]) + + msg = 'Non-bitwise-equal booleans test failed' + data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool) + result = np.array([[False, True], [True, True]], dtype=bool) + assert_array_equal(unique(data, axis=0), result, msg) + + msg = 'Negative zero equality test failed' + data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]]) + result = np.array([[-0.0, 0.0]]) + assert_array_equal(unique(data, axis=0), result, msg) + def test_intersect1d(self): # unique inputs a = np.array([5, 7, 1, 2]) From a9f8ece4e5813473d760e62330927d0bcbe9546c Mon Sep 17 00:00:00 2001 From: Joe Kington Date: Sun, 8 Sep 2013 21:34:21 -0500 Subject: [PATCH 2/5] DOC: Fixed typo in docstring examples for unique --- numpy/lib/arraysetops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index e65b6e064e23..44baa0403a7b 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -143,7 +143,7 @@ def unique(ar, return_index=False, return_inverse=False, axis=None): >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]]) >>> np.unique(a, axis=0) - array([[1, 1, 0], [2, 3, 4]]) + array([[1, 0, 0], [2, 3, 4]]) Return the indices of the original array that give the unique values: From fccd7fe3a47f492c71bc5b39792b2a6b4d6e8f3c Mon Sep 17 00:00:00 2001 From: Joe Kington Date: Sun, 8 Sep 2013 21:42:40 -0500 Subject: [PATCH 3/5] BUG: unique should raise a clearer error if an invalid axis kwarg is specified --- numpy/lib/arraysetops.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 44baa0403a7b..b7c71e979203 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -170,8 +170,10 @@ def unique(ar, return_index=False, return_inverse=False, axis=None): array([1, 2, 6, 4, 2, 3, 2]) """ - if axis is None or ar.ndim == 1: + if axis is None: return _unique1d(ar, return_index, return_inverse) + if abs(axis) > ar.ndim: + raise ValueError('Invalid axis kwarg specified for unique') ar = np.swapaxes(ar, axis, 0) orig_shape, orig_dtype = ar.shape, ar.dtype From 2544df4d80e38496650dbfcf5a8dc42561324582 Mon Sep 17 00:00:00 2001 From: Joe Kington Date: Mon, 9 Sep 2013 06:34:30 -0500 Subject: [PATCH 4/5] STY: Refactored unqiue tests into their own class --- numpy/lib/tests/test_arraysetops.py | 232 ++++++++++++++-------------- 1 file changed, 117 insertions(+), 115 deletions(-) diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index 360cbe828eed..b114dcb57fd8 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -12,121 +12,6 @@ class TestSetOps(TestCase): - def test_unique(self): - - def check_all(a, b, i1, i2, dt): - msg = "check values failed for type '%s'" % dt - v = unique(a) - assert_array_equal(v, b, msg) - - msg = "check indexes failed for type '%s'" % dt - v, j = unique(a, 1, 0) - assert_array_equal(v, b, msg) - assert_array_equal(j, i1, msg) - - msg = "check reverse indexes failed for type '%s'" % dt - v, j = unique(a, 0, 1) - assert_array_equal(v, b, msg) - assert_array_equal(j, i2, msg) - - msg = "check with all indexes failed for type '%s'" % dt - v, j1, j2 = unique(a, 1, 1) - assert_array_equal(v, b, msg) - assert_array_equal(j1, i1, msg) - assert_array_equal(j2, i2, msg) - - a = [5, 7, 1, 2, 1, 5, 7]*10 - b = [1, 2, 5, 7] - i1 = [2, 3, 0, 1] - i2 = [2, 3, 0, 1, 0, 2, 3]*10 - - # test for numeric arrays - types = [] - types.extend(np.typecodes['AllInteger']) - types.extend(np.typecodes['AllFloat']) - types.append('datetime64[D]') - types.append('timedelta64[D]') - for dt in types: - aa = np.array(a, dt) - bb = np.array(b, dt) - check_all(aa, bb, i1, i2, dt) - - # test for object arrays - dt = 'O' - aa = np.empty(len(a), dt) - aa[:] = a - bb = np.empty(len(b), dt) - bb[:] = b - check_all(aa, bb, i1, i2, dt) - - # test for structured arrays - dt = [('', 'i'), ('', 'i')] - aa = np.array(list(zip(a, a)), dt) - bb = np.array(list(zip(b, b)), dt) - check_all(aa, bb, i1, i2, dt) - - def test_unique_axis(self): - def run_axis_tests(dtype): - data = np.array([[0, 1, 0, 0], - [1, 0, 0, 0], - [0, 1, 0, 0], - [1, 0, 0, 0]]).astype(dtype) - - msg = 'Unique with 1d array and axis=0 failed' - result = np.array([0,1]) - assert_array_equal(unique(data), result.astype(dtype), msg) - - msg = 'Unique with 2d array and axis=0 failed' - result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]]) - assert_array_equal(unique(data, axis=0), result.astype(dtype), msg) - - msg = 'Unique with 2d array and axis=1 failed' - result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]]) - assert_array_equal(unique(data, axis=1), result.astype(dtype), msg) - - msg = 'Unique with 3d array and axis=2 failed' - data3d = np.dstack([data] * 3) - result = data3d[..., :1] - assert_array_equal(unique(data3d, axis=2), result, msg) - - uniq, idx, inv = unique(data, axis=0, return_index=True, - return_inverse=True) - msg = "Unique's return_index=True failed with axis=0" - assert_array_equal(data[idx], uniq, msg) - msg = "Unique's return_inverse=True failed with axis=0" - assert_array_equal(uniq[inv], data) - - uniq, idx, inv = unique(data, axis=1, return_index=True, - return_inverse=True) - msg = "Unique's return_index=True failed with axis=1" - assert_array_equal(data[:,idx], uniq) - msg = "Unique's return_inverse=True failed with axis=1" - assert_array_equal(uniq[:,inv], data) - - types = [] - types.extend(np.typecodes['AllInteger']) - types.extend(np.typecodes['AllFloat']) - types.append('datetime64[D]') - types.append('timedelta64[D]') - types.append([('a', int), ('b', int)]) - types.append([('a', int), ('b', float)]) - - for dtype in types: - run_axis_tests(dtype) - - assert_raises(TypeError, run_axis_tests, object) - assert_raises(TypeError, run_axis_tests, [('a', int), ('b', object)]) - - msg = 'Non-bitwise-equal booleans test failed' - data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool) - result = np.array([[False, True], [True, True]], dtype=bool) - assert_array_equal(unique(data, axis=0), result, msg) - - msg = 'Negative zero equality test failed' - data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]]) - result = np.array([[-0.0, 0.0]]) - assert_array_equal(unique(data, axis=0), result, msg) - def test_intersect1d(self): # unique inputs a = np.array([5, 7, 1, 2]) @@ -314,6 +199,123 @@ def test_manyways(self): c2 = setdiff1d(aux2, aux1) assert_array_equal(c1, c2) +class TestUnique(TestCase): + + def test_1d_functionality(self): + a = [5, 7, 1, 2, 1, 5, 7]*10 + b = [1, 2, 5, 7] + i1 = [2, 3, 0, 1] + i2 = [2, 3, 0, 1, 0, 2, 3]*10 + + # test for numeric arrays + types = [] + types.extend(np.typecodes['AllInteger']) + types.extend(np.typecodes['AllFloat']) + types.append('datetime64[D]') + types.append('timedelta64[D]') + for dt in types: + aa = np.array(a, dt) + bb = np.array(b, dt) + self.run_1d_tests(aa, bb, i1, i2, dt) + + # test for object arrays + dt = 'O' + aa = np.empty(len(a), dt) + aa[:] = a + bb = np.empty(len(b), dt) + bb[:] = b + self.run_1d_tests(aa, bb, i1, i2, dt) + + # test for structured arrays + dt = [('', 'i'), ('', 'i')] + aa = np.array(list(zip(a, a)), dt) + bb = np.array(list(zip(b, b)), dt) + self.run_1d_tests(aa, bb, i1, i2, dt) + + def run_1d_tests(self, a, b, i1, i2, dt): + msg = "check values failed for type '%s'" % dt + v = unique(a) + assert_array_equal(v, b, msg) + + msg = "check indexes failed for type '%s'" % dt + v, j = unique(a, 1, 0) + assert_array_equal(v, b, msg) + assert_array_equal(j, i1, msg) + + msg = "check reverse indexes failed for type '%s'" % dt + v, j = unique(a, 0, 1) + assert_array_equal(v, b, msg) + assert_array_equal(j, i2, msg) + + msg = "check with all indexes failed for type '%s'" % dt + v, j1, j2 = unique(a, 1, 1) + assert_array_equal(v, b, msg) + assert_array_equal(j1, i1, msg) + assert_array_equal(j2, i2, msg) + + def test_unique_axis_errors(self): + assert_raises(TypeError, self.run_axis_tests, object) + assert_raises(TypeError, self.run_axis_tests, + [('a', int), ('b', object)]) + + def test_unique_axis(self): + types = [] + types.extend(np.typecodes['AllInteger']) + types.extend(np.typecodes['AllFloat']) + types.append('datetime64[D]') + types.append('timedelta64[D]') + types.append([('a', int), ('b', int)]) + types.append([('a', int), ('b', float)]) + + for dtype in types: + self.run_axis_tests(dtype) + + msg = 'Non-bitwise-equal booleans test failed' + data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool) + result = np.array([[False, True], [True, True]], dtype=bool) + assert_array_equal(unique(data, axis=0), result, msg) + + msg = 'Negative zero equality test failed' + data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]]) + result = np.array([[-0.0, 0.0]]) + assert_array_equal(unique(data, axis=0), result, msg) + + def run_axis_tests(self, dtype): + data = np.array([[0, 1, 0, 0], + [1, 0, 0, 0], + [0, 1, 0, 0], + [1, 0, 0, 0]]).astype(dtype) + + msg = 'Unique with 1d array and axis=0 failed' + result = np.array([0,1]) + assert_array_equal(unique(data), result.astype(dtype), msg) + + msg = 'Unique with 2d array and axis=0 failed' + result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]]) + assert_array_equal(unique(data, axis=0), result.astype(dtype), msg) + + msg = 'Unique with 2d array and axis=1 failed' + result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]]) + assert_array_equal(unique(data, axis=1), result.astype(dtype), msg) + + msg = 'Unique with 3d array and axis=2 failed' + data3d = np.dstack([data] * 3) + result = data3d[..., :1] + assert_array_equal(unique(data3d, axis=2), result, msg) + + uniq, idx, inv = unique(data, axis=0, return_index=True, + return_inverse=True) + msg = "Unique's return_index=True failed with axis=0" + assert_array_equal(data[idx], uniq, msg) + msg = "Unique's return_inverse=True failed with axis=0" + assert_array_equal(uniq[inv], data) + + uniq, idx, inv = unique(data, axis=1, return_index=True, + return_inverse=True) + msg = "Unique's return_index=True failed with axis=1" + assert_array_equal(data[:,idx], uniq) + msg = "Unique's return_inverse=True failed with axis=1" + assert_array_equal(uniq[:,inv], data) if __name__ == "__main__": run_module_suite() From d9ea28dabb4701ccd75fcf53fb891402587e42b2 Mon Sep 17 00:00:00 2001 From: Joe Kington Date: Mon, 9 Sep 2013 06:35:33 -0500 Subject: [PATCH 5/5] TST: Added basic tests for an invalid axis kwarg to unique --- numpy/lib/tests/test_arraysetops.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index b114dcb57fd8..aa2a4e92f126 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -216,7 +216,7 @@ def test_1d_functionality(self): for dt in types: aa = np.array(a, dt) bb = np.array(b, dt) - self.run_1d_tests(aa, bb, i1, i2, dt) + self._run_1d_tests(aa, bb, i1, i2, dt) # test for object arrays dt = 'O' @@ -224,15 +224,15 @@ def test_1d_functionality(self): aa[:] = a bb = np.empty(len(b), dt) bb[:] = b - self.run_1d_tests(aa, bb, i1, i2, dt) + self._run_1d_tests(aa, bb, i1, i2, dt) # test for structured arrays dt = [('', 'i'), ('', 'i')] aa = np.array(list(zip(a, a)), dt) bb = np.array(list(zip(b, b)), dt) - self.run_1d_tests(aa, bb, i1, i2, dt) + self._run_1d_tests(aa, bb, i1, i2, dt) - def run_1d_tests(self, a, b, i1, i2, dt): + def _run_1d_tests(self, a, b, i1, i2, dt): msg = "check values failed for type '%s'" % dt v = unique(a) assert_array_equal(v, b, msg) @@ -254,10 +254,13 @@ def run_1d_tests(self, a, b, i1, i2, dt): assert_array_equal(j2, i2, msg) def test_unique_axis_errors(self): - assert_raises(TypeError, self.run_axis_tests, object) - assert_raises(TypeError, self.run_axis_tests, + assert_raises(TypeError, self._run_axis_tests, object) + assert_raises(TypeError, self._run_axis_tests, [('a', int), ('b', object)]) + assert_raises(ValueError, unique, np.arange(10), axis=2) + assert_raises(ValueError, unique, np.arange(10), axis=-2) + def test_unique_axis(self): types = [] types.extend(np.typecodes['AllInteger']) @@ -268,7 +271,7 @@ def test_unique_axis(self): types.append([('a', int), ('b', float)]) for dtype in types: - self.run_axis_tests(dtype) + self._run_axis_tests(dtype) msg = 'Non-bitwise-equal booleans test failed' data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool) @@ -280,7 +283,7 @@ def test_unique_axis(self): result = np.array([[-0.0, 0.0]]) assert_array_equal(unique(data, axis=0), result, msg) - def run_axis_tests(self, dtype): + def _run_axis_tests(self, dtype): data = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0],