ENH: Add an "axis" kwarg to numpy.unique

numpy · joferkington · Aug 7, 2013 · Sep 9, 2013 · Sep 9, 2013 · Sep 9, 2013
commit 4fcf6a8412de167a085e3cfa48d457bd76db45a1
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
@@ -90,7 +90,7 @@ def ediff1d(ary, to_end=None, to_begin=None):
 
     return ed
 
-def unique(ar, return_index=False, return_inverse=False):
+def unique(ar, return_index=False, return_inverse=False, axis=None):
     """
     Find the unique elements of an array.
 
@@ -102,13 +102,18 @@ def unique(ar, return_index=False, return_inverse=False):
     Parameters
     ----------
     ar : array_like
-        Input array. This will be flattened if it is not already 1-D.
+        Input array. Unless `axis` is specified, this will be flattened if it
+        is not already 1-D.
     return_index : bool, optional
-        If True, also return the indices of `ar` that result in the unique
-        array.
+        If True, also return the indices of `ar` along the specified axis that
+        result in the unique array.
     return_inverse : bool, optional
-        If True, also return the indices of the unique array that can be used
-        to reconstruct `ar`.
+        If True, also return the indices of the unique array along the
+        specified axis that can be used to reconstruct `ar`.
+    axis : int or None, optional
+        The axis to operate on. If None, `ar` will be flattened beforehand.
+        Object arrays or structured arrays that contain objects are not 
+        supported if the `axis` kwarg is used.
 
     Returns
     -------
@@ -134,6 +139,12 @@ def unique(ar, return_index=False, return_inverse=False):
     >>> np.unique(a)
     array([1, 2, 3])
 
+    Return the unique rows of a 2D array
+
+    >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
+    >>> np.unique(a, axis=0)
+    array([[1, 1, 0], [2, 3, 4]])
+
     Return the indices of the original array that give the unique values:
 
     >>> a = np.array(['a', 'b', 'b', 'c', 'a'])
@@ -158,6 +169,45 @@ def unique(ar, return_index=False, return_inverse=False):
     >>> u[indices]
     array([1, 2, 6, 4, 2, 3, 2])
 
+    """
+    if axis is None or ar.ndim == 1:
+        return _unique1d(ar, return_index, return_inverse)
+
+    ar = np.swapaxes(ar, axis, 0)
+    orig_shape, orig_dtype = ar.shape, ar.dtype
+    # Must reshape to a contiguous 2D array for this to work...
+    ar = ar.reshape(orig_shape[0], -1)
+    ar = np.ascontiguousarray(ar)
+
+    if ar.dtype.char in (np.typecodes['AllInteger'] + 'S'):
+        # Optimization inspired by <http://stackoverflow.com/a/16973510/325565>
+        dtype = np.dtype((np.void, ar.dtype.itemsize * ar.shape[1]))
+    else:
+        dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]
+
+    try:
+        consolidated = ar.view(dtype)
+    except TypeError:
+        # There's no good way to do this for object arrays, etc...
+        msg = 'The axis argument to unique is not supported for dtype {dt}'
+        raise TypeError(msg.format(dt=ar.dtype))
+
+    def reshape_uniq(uniq):
+        uniq = uniq.view(orig_dtype)
+        uniq = uniq.reshape(-1, *orig_shape[1:])
+        uniq = np.swapaxes(uniq, 0, axis)
+        return uniq
+
+    output = _unique1d(consolidated, return_index, return_inverse)
+    if not (return_index or return_inverse):
+        return reshape_uniq(output)
+    else:
+        uniq = reshape_uniq(output[0])
+        return tuple([uniq] + list(output[1:]))
+
+def _unique1d(ar, return_index=False, return_inverse=False):
+    """
+    Find the unique elements of an array.
     """
     try:
         ar = ar.flatten()

diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
@@ -65,6 +65,68 @@ def check_all(a, b, i1, i2, dt):
         bb = np.array(list(zip(b, b)), dt)
         check_all(aa, bb, i1, i2, dt)
 
+    def test_unique_axis(self):
+        def run_axis_tests(dtype):
+            data = np.array([[0, 1, 0, 0],
+                             [1, 0, 0, 0],
+                             [0, 1, 0, 0],
+                             [1, 0, 0, 0]]).astype(dtype)
+
+            msg = 'Unique with 1d array and axis=0 failed'
+            result = np.array([0,1])
+            assert_array_equal(unique(data), result.astype(dtype), msg)
+
+            msg = 'Unique with 2d array and axis=0 failed'
+            result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]])
+            assert_array_equal(unique(data, axis=0), result.astype(dtype), msg)
+
+            msg = 'Unique with 2d array and axis=1 failed'
+            result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]])
+            assert_array_equal(unique(data, axis=1), result.astype(dtype), msg)
+
+            msg = 'Unique with 3d array and axis=2 failed'
+            data3d = np.dstack([data] * 3)
+            result = data3d[..., :1]
+            assert_array_equal(unique(data3d, axis=2), result, msg)
+
+            uniq, idx, inv = unique(data, axis=0, return_index=True, 
+                                    return_inverse=True)
+            msg = "Unique's return_index=True failed with axis=0"
+            assert_array_equal(data[idx], uniq, msg)
+            msg = "Unique's return_inverse=True failed with axis=0"
+            assert_array_equal(uniq[inv], data)
+
+            uniq, idx, inv = unique(data, axis=1, return_index=True, 
+                                    return_inverse=True)
+            msg = "Unique's return_index=True failed with axis=1"
+            assert_array_equal(data[:,idx], uniq)
+            msg = "Unique's return_inverse=True failed with axis=1"
+            assert_array_equal(uniq[:,inv], data)
+
+        types = []
+        types.extend(np.typecodes['AllInteger'])
+        types.extend(np.typecodes['AllFloat'])
+        types.append('datetime64[D]')
+        types.append('timedelta64[D]')
+        types.append([('a', int), ('b', int)])
+        types.append([('a', int), ('b', float)])
+
+        for dtype in types:
+            run_axis_tests(dtype)
+
+        assert_raises(TypeError, run_axis_tests, object)
+        assert_raises(TypeError, run_axis_tests, [('a', int), ('b', object)])
+
+        msg = 'Non-bitwise-equal booleans test failed'
+        data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool)
+        result = np.array([[False, True], [True, True]], dtype=bool)
+        assert_array_equal(unique(data, axis=0), result, msg)
+
+        msg = 'Negative zero equality test failed'
+        data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]])
+        result = np.array([[-0.0, 0.0]])
+        assert_array_equal(unique(data, axis=0), result, msg)
+
     def test_intersect1d(self):
         # unique inputs
         a = np.array([5, 7, 1, 2])