8000 API: make numpy.lib._arraysetops.intersect1d work on multiple arrays by skuschel · Pull Request #25688 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

API: make numpy.lib._arraysetops.intersect1d work on multiple arrays #25688

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
ENH: make numpy.lib._arraysetops.intersect1d work on multiple arrays
Intersect1d can be used with multiple arrays now and also returns the
indices of all arrays when using `return_indices=True`.
  • Loading branch information
skuschel committed Jan 25, 2024
commit e2c956efd5e1020b0e57ed4a9c8c6348af6dff9d
86 changes: 48 additions & 38 deletions numpy/lib/_arraysetops_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,28 +571,28 @@ def unique_values(x):


def _intersect1d_dispatcher(
ar1, ar2, assume_unique=None, return_indices=None):
return (ar1, ar2)
*ars, assume_unique=None, return_indices=None):
return ars


@array_function_dispatch(_intersect1d_dispatcher)
def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
def intersect1d(*ars, assume_unique=False, return_indices=False):
"""
Find the intersection of two arrays.
Find the intersection of multiple arrays.

Return the sorted, unique values that are in both of the input arrays.
Return the sorted, unique values that are in all of the input arrays.

Parameters
----------
ar1, ar2 : array_like
Input arrays. Will be flattened if not already 1D.
*ars : array_like
Input arrays. Each will be flattened if not already 1D.
assume_unique : bool
If True, the input arrays are both assumed to be unique, which
can speed up the calculation. If True but ``ar1`` or ``ar2`` are not
unique, incorrect results and out-of-bounds indices could result.
If True, the input arrays are 8000 all assumed to be unique, which
can speed up the calculation. If True but any of the arrays in ars is
not unique, incorrect results and out-of-bounds indices could result.
Default is False.
return_indices : bool
If True, the indices which correspond to the intersection of the two
If True, the indices which correspond to the intersection of all
arrays are returned. The first instance of a value is used if there are
multiple. Default is False.

Expand All @@ -602,23 +602,28 @@ def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
-------
intersect1d : ndarray
Sorted 1D array of common and unique elements.
comm1 : ndarray
The indices of the first occurrences of the common values in `ar1`.
Only provided if `return_indices` is True.
comm2 : ndarray
The indices of the first occurrences of the common values in `ar2`.
*comms : list of ndarray
The indices of the first occurrences of the common values in `ars`.
Only provided if `return_indices` is True.
comms[0] contains the indices for ars[0],
comms[1] contains the indices for ars[1] and so on


See Also
--------
numpy.lib.arraysetops : Module with a number of other functions for
performing set operations on arrays.

Examples
--------
>>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1])
array([1, 3])

To intersect more than two arrays, use functools.reduce:
To intersect more than two arrays, use:

>>> from functools import reduce
>>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
array([3])
>>> ars = ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])
>>> intersect1d(*ars, return_indices=True)
(array([3]), array([1]), array([0]), array([1]))

To return the indices of the values common to the input arrays
along with the intersected values:
Expand All @@ -632,38 +637,43 @@ def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
(array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4]))

"""
ar1 = np.asanyarray(ar1)
ar2 = np.asanyarray(ar2)
ars = [np.asanyarray(ar) for ar in ars]

if not assume_unique:
if return_indices:
ar1, ind1 = unique(ar1, return_index=True)
ar2, ind2 = unique(ar2, return_index=True)
inds = [None] * len(ars)
for i, ar in enumerate(ars):
ars[i], inds[i] = unique(ar, return_index=True)
else:
ar1 = unique(ar1)
ar2 = unique(ar2)
for i, ar in enumerate(ars):
ars[i] = unique(ar)
else:
ar1 = ar1.ravel()
ar2 = ar2.ravel()
for i, ar in enumerate(ars):
ars[i] = ar.ravel()

aux = np.concatenate((ar1, ar2))
aux = np.concatenate(ars)
if return_indices:
aux_sort_indices = np.argsort(aux, kind='mergesort')
aux = aux[aux_sort_indices]
else:
aux.sort()

mask = aux[1:] == aux[:-1]
int1d = aux[:-1][mask]
# aux is sorted and each array in ars has only unique elements.
# The same element in a distance of len(ars)+1 away means,
# that the element must have been in each of the arrays in ars.
mask = aux[:-len(ars)+1] == aux[len(ars)-1:]
int1d = aux[:-len(ars)+1][mask]

if return_indices:
ar1_indices = aux_sort_indices[:-1][mask]
ar2_indices = aux_sort_indices[1:][mask] - ar1.size
if not assume_unique:
ar1_indices = ind1[ar1_indices]
ar2_indices = ind2[ar2_indices]

return int1d, ar1_indices, ar2_indices
ret_indizes = [None] * len(ars)
offset = 0
for i, ar in enumerate(ars):
imax = aux_sort_indices.size - len(ars) + i + 1
ret_indizes[i] = aux_sort_indices[i:imax][mask] - offset
offset += ar.size
if not assume_unique:
ret_indizes[i] = inds[i][ret_indizes[i]]
return int1d, *ret_indizes
else:
return int1d

Expand Down
0