ENH: make numpy.lib._arraysetops.intersect1d work on multiple arrays

Intersect1d can be used with multiple arrays now and also returns the indices of all arrays when using `return_indices=True`.
numpy · skuschel · Jan 25, 2024 · Feb 1, 2024 · Jan 25, 2024 · e2c956efd5e1020b0e57ed4a9c8c6348af6dff9d
commit e2c956efd5e1020b0e57ed4a9c8c6348af6dff9d
diff --git a/numpy/lib/_arraysetops_impl.py b/numpy/lib/_arraysetops_impl.py
@@ -571,28 +571,28 @@ def unique_values(x):
 
 
 def _intersect1d_dispatcher(
-        ar1, ar2, assume_unique=None, return_indices=None):
-    return (ar1, ar2)
+        *ars, assume_unique=None, return_indices=None):
+    return ars
 
 
 @array_function_dispatch(_intersect1d_dispatcher)
-def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
+def intersect1d(*ars, assume_unique=False, return_indices=False):
     """
-    Find the intersection of two arrays.
+    Find the intersection of multiple arrays.
 
-    Return the sorted, unique values that are in both of the input arrays.
+    Return the sorted, unique values that are in all of the input arrays.
 
     Parameters
     ----------
-    ar1, ar2 : array_like
-        Input arrays. Will be flattened if not already 1D.
+    *ars : array_like
+        Input arrays. Each will be flattened if not already 1D.
     assume_unique : bool
-        If True, the input arrays are both assumed to be unique, which
-        can speed up the calculation.  If True but ``ar1`` or ``ar2`` are not
-        unique, incorrect results and out-of-bounds indices could result.
+        If True, the input arrays are
8000
 all assumed to be unique, which
+        can speed up the calculation. If True but any of the arrays in ars is
+        not unique, incorrect results and out-of-bounds indices could result.
         Default is False.
     return_indices : bool
-        If True, the indices which correspond to the intersection of the two
+        If True, the indices which correspond to the intersection of all
         arrays are returned. The first instance of a value is used if there are
         multiple. Default is False.
 
@@ -602,23 +602,28 @@ def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
     -------
     intersect1d : ndarray
         Sorted 1D array of common and unique elements.
-    comm1 : ndarray
-        The indices of the first occurrences of the common values in `ar1`.
-        Only provided if `return_indices` is True.
-    comm2 : ndarray
-        The indices of the first occurrences of the common values in `ar2`.
+    *comms : list of ndarray
+        The indices of the first occurrences of the common values in `ars`.
         Only provided if `return_indices` is True.
+        comms[0] contains the indices for ars[0],
+        comms[1] contains the indices for ars[1] and so on
+
+
+    See Also
+    --------
+    numpy.lib.arraysetops : Module with a number of other functions for
+                            performing set operations on arrays.
 
     Examples
     --------
     >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1])
     array([1, 3])
 
-    To intersect more than two arrays, use functools.reduce:
+    To intersect more than two arrays, use:
 
-    >>> from functools import reduce
-    >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
-    array([3])
+    >>> ars = ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])
+    >>> intersect1d(*ars, return_indices=True)
+    (array([3]), array([1]), array([0]), array([1]))
 
     To return the indices of the values common to the input arrays
     along with the intersected values:
@@ -632,38 +637,43 @@ def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
     (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4]))
 
     """
-    ar1 = np.asanyarray(ar1)
-    ar2 = np.asanyarray(ar2)
+    ars = [np.asanyarray(ar) for ar in ars]
 
     if not assume_unique:
         if return_indices:
-            ar1, ind1 = unique(ar1, return_index=True)
-            ar2, ind2 = unique(ar2, return_index=True)
+            inds = [None] * len(ars)
+            for i, ar in enumerate(ars):
+                ars[i], inds[i] = unique(ar, return_index=True)
         else:
-            ar1 = unique(ar1)
-            ar2 = unique(ar2)
+            for i, ar in enumerate(ars):
+                ars[i] = unique(ar)
     else:
-        ar1 = ar1.ravel()
-        ar2 = ar2.ravel()
+        for i, ar in enumerate(ars):
+            ars[i] = ar.ravel()
 
-    aux = np.concatenate((ar1, ar2))
+    aux = np.concatenate(ars)
     if return_indices:
         aux_sort_indices = np.argsort(aux, kind='mergesort')
         aux = aux[aux_sort_indices]
     else:
         aux.sort()
 
-    mask = aux[1:] == aux[:-1]
-    int1d = aux[:-1][mask]
+    # aux is sorted and each array in ars has only unique elements.
+    # The same element in a distance of len(ars)+1 away means,
+    # that the element must have been in each of the arrays in ars.
+    mask = aux[:-len(ars)+1] == aux[len(ars)-1:]
+    int1d = aux[:-len(ars)+1][mask]
 
     if return_indices:
-        ar1_indices = aux_sort_indices[:-1][mask]
-        ar2_indices = aux_sort_indices[1:][mask] - ar1.size
-        if not assume_unique:
-            ar1_indices = ind1[ar1_indices]
-            ar2_indices = ind2[ar2_indices]
-
-        return int1d, ar1_indices, ar2_indices
+        ret_indizes = [None] * len(ars)
+        offset = 0
+        for i, ar in enumerate(ars):
+            imax = aux_sort_indices.size - len(ars) + i + 1
+            ret_indizes[i] = aux_sort_indices[i:imax][mask] - offset
+            offset += ar.size
+            if not assume_unique:
+                ret_indizes[i] = inds[i][ret_indizes[i]]
+        return int1d, *ret_indizes
     else:
         return int1d