diff --git a/doc/release/upcoming_changes/12094.new_function.rst b/doc/release/upcoming_changes/12094.new_function.rst new file mode 100644 index 000000000000..137cd66ac806 --- /dev/null +++ b/doc/release/upcoming_changes/12094.new_function.rst @@ -0,0 +1,7 @@ +``ndrange`` a multi-dimensional range-like object +------------------------------------------------- +``ndrange`` behaves much like the Python 3 ``range`` object and allows +multi-dimensional slicing, iteration, reversal and efficient membership lookup. +``ndrange`` now supersedes the ``ndindex`` object for generating multi-index +iterators. ``ndindex`` will continue to be maintained for backward +compatibility. diff --git a/doc/source/reference/routines.indexing.rst b/doc/source/reference/routines.indexing.rst index aa545fd2e4f9..58e50bc8d646 100644 --- a/doc/source/reference/routines.indexing.rst +++ b/doc/source/reference/routines.indexing.rst @@ -63,6 +63,7 @@ Iterating over arrays nditer ndenumerate ndindex + ndrange nested_iters flatiter iterable diff --git a/numpy/__init__.py b/numpy/__init__.py index 13c899384842..73bbedf19f90 100644 --- a/numpy/__init__.py +++ b/numpy/__init__.py @@ -231,9 +231,9 @@ savez_compressed, unpackbits, fromregex ) from .lib._index_tricks_impl import ( - diag_indices_from, diag_indices, fill_diagonal, ndindex, ndenumerate, - ix_, c_, r_, s_, ogrid, mgrid, unravel_index, ravel_multi_index, - index_exp + diag_indices_from, diag_indices, fill_diagonal, ndindex, ndrange, + ndenumerate, ix_, c_, r_, s_, ogrid, mgrid, unravel_index, + ravel_multi_index, index_exp ) from . import matrixlib as _mat diff --git a/numpy/lib/_index_tricks_impl.py b/numpy/lib/_index_tricks_impl.py index 3014e46130e8..c9851d66a092 100644 --- a/numpy/lib/_index_tricks_impl.py +++ b/numpy/lib/_index_tricks_impl.py @@ -1,12 +1,14 @@ import functools +import itertools import sys import math import warnings +import collections.abc import numpy as np from .._utils import set_module import numpy._core.numeric as _nx -from numpy._core.numeric import ScalarType, array +from numpy._core.numeric import ScalarType, array, cumprod from numpy._core.numerictypes import issubdtype import numpy.matrixlib as matrixlib @@ -22,8 +24,8 @@ __all__ = [ 'ravel_multi_index', 'unravel_index', 'mgrid', 'ogrid', 'r_', 'c_', - 's_', 'index_exp', 'ix_', 'ndenumerate', 'ndindex', 'fill_diagonal', - 'diag_indices', 'diag_indices_from' + 's_', 'index_exp', 'ix_', 'ndenumerate', 'ndindex', 'ndrange', + 'fill_diagonal', 'diag_indices', 'diag_indices_from' ] @@ -604,7 +606,7 @@ class ndenumerate: See Also -------- - ndindex, flatiter + ndindex, flatiter, ndrange Examples -------- @@ -640,6 +642,403 @@ def __iter__(self): return self +@set_module('numpy') +class ndrange(collections.abc.Sequence): + """ + An N-dimensional range-like object for indexing arrays. + + The ndrange type represents an immutable sequence of tuples of numbers and + can be used for looping over specific multi-dimensional indicies in + ``for`` loops. Can be called as ``ndrange(stop)`` or + ``ndrange(start, stop, [step])``, see parameters below. + + When iterated, ndrange produces a sequence of tuples of integers + `(i_0, i_1, ..., i_Nminus1)` as produced by nested for-loops over ranges. + For instance, for dimension 3 this sequence is identical to that produced + by: + + ((i_0, i_1, i_2) for i_0 in range(start[0], stop[0], step[0]) + for i_1 in range(start[1], stop[1], step[1]) + for i_2 in range(start[2], stop[2], step[2])) + + While the ndrange range iterates as a flat sequence, it can be sliced along + multiple axes. The ndrange object also has attributes ``ndim`` and + ``shape`` corresponding to the number of dimensions and the number of + indexed elements along each dimension respectively. + + The ndrange object is not designed as a substitute for vectorized numeric + code. Whenever possible, vectorized routines should be used for + mathematical operations. ndrange is provided primarily for convenient for + iteration over multi-dimensional collections when no vectorized primitives + exist. + + Like the Python 3 range object, ndrange objects implement the + ``collections.abc.Sequence`` ABC and provide features such as membership + tests, element index lookup, slicing, and support for negative indices. + See the `python class range documentation + `_ for more details. + + Testing ndrange objects for equality with ``==`` and ``!=`` compares them + as sequences. See the examples for more details. + + If ndrange is called with a single positional argument, that argument + is assumed to be ``stop`` and ``start`` defaults to a tuple of zeros. + + Parameters + ---------- + stop: tuple of ints, or a single int + Specifies the stop parameter along each axis. + start: tuple of ints, or a single int, optional + If provided, must be the same length as stop. Specifies the start + parameter along each axis. + step: tuple of ints, or a single int, optional + If provided, must be the same length as stop. Specifies the increment + along each axis. If step is not provided, it defaults to a tuple of + ones. + + When a single position parameter is provided, ndrange assumes it is + the ``stop`` parameter and the ndrange constructor behaves as though it has + the signature ``ndrange(stop)``. In all other cases, ``ndrange`` + behaves as though the signalture is ``ndrange(start, stop[, step])``. + + See Also + -------- + flatiter, ndenumerate, ndindex + + Notes + ----- + .. versionadded:: 2.2.0 + + Examples + -------- + This is likely the most natural way to iterate through the sequence of + ndrange object. Here, the first parameter is the shape of the array + ``shape`` attribute of an array and we slice through the ``ndrange`` + object. + + >>> a = np.zeros((4, 4), dtype='object') + >>> # [...] # operate on ``a`` + >>> for i in np.ndrange(a.shape)[::2, 1:-1]: + ... print(i) + (0, 1) + (0, 2) + (2, 1) + (2, 2) + + >>> list(np.ndrange((2, 3))) + [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)] + + >>> list(np.ndrange((1, 2), (4, 4))) + [(1, 2), (1, 3), (2, 2), (2, 3), (3, 2), (3, 3)] + + Reversed iteration is also possible + + >>> for i in reversed(np.ndrange((3, 2))): + ... print(i) + (2, 1) + (2, 0) + (1, 1) + (1, 0) + (0, 1) + (0, 0) + + Keep in mind that ``ndrange`` is not an iterator. To obtain an iterator, + call ``iter`` first. + + >>> ndrange_iter = iter(np.ndrange((2, 2))) + >>> next(ndrange_iter) + (0, 0) + >>> for i in ndrange_iter: + ... print(i) + (0, 1) + (1, 0) + (1, 1) + + Equality comparison is also possible. Notice that + + >>> np.ndrange( + ... (0, 0), (3, 3), (2, 2)) == np.ndrange((0, 0), (4, 4), (2, 2)) + True + + returns ``True``, since the sequence generated by both those pairs of + ``ndrange`` objects would be the same. + + Testing for membership is also possible + + >>> (1, 3) in np.ndrange((1, 2), (4, 4)) + True + + >>> (0, 3) in np.ndrange((1, 2), (4, 4)) + False + + Fortran iteration ordering is possible with: + + >>> for i in np.ndrange((3, 2)).ravel('F'): + ... print(i) + ... + (0, 0) + (1, 0) + (2, 0) + (0, 1) + (1, 1) + (2, 1) + + # TODO: Remove these two bottom examples if ravel is kept in. + While native support for Fortran iteration ordering isn't supported, it can + be acheived readily through: + + >>> for i in np.ndrange((3, 2)[::-1]): + ... i = i[::-1] + ... print(i) + (0, 0) + (1, 0) + (2, 0) + (0, 1) + (1, 1) + (2, 1) + + or + + >>> for i_f in (i_c[::-1] for i_c in np.ndrange((3, 2)[::-1])): + ... print(i_f) + (0, 0) + (1, 0) + (2, 0) + (0, 1) + (1, 1) + (2, 1) + + An array of the indicies can be obtained by calling ``np.asarray``. + We strongly recommend against this method of array creation since it + is slow. + + >>> np.asarray(np.ndrange((4, 4))[::2, 1:-1]) + array([[(0, 1), (0, 2)], + [(2, 1), (2, 2)]], dtype=object) + + """ + + __slots__ = ['_start', '_stop', '_step', '_ranges'] + + def __init__(self, start=None, stop=None, step=None): + if stop is None: + stop = start + start = None + + if stop is None: + raise ValueError('``stop`` must be provided.') + + if step is not None and start is None: + raise ValueError( + '``step`` can only be provided if both ``start`` and ' + '``stop`` are provided.') + + # Is there a function that deals with this input sanitization for me? + if not isinstance(stop, collections.abc.Iterable): + stop = (stop,) + elif not isinstance(stop, tuple): + stop = tuple(stop) + + if start is None: + start = (0,) * len(stop) + elif not isinstance(start, collections.abc.Iterable): + start = (start,) + elif not isinstance(start, tuple): + start = tuple(start) + + if step is None: + step = (1,) * len(stop) + elif not isinstance(step, collections.abc.Iterable): + step = (step,) + elif not isinstance(step, tuple): + step = tuple(step) + + if len(start) != len(stop): + raise ValueError( + "``start`` and ``stop`` must have the same length.") + + if len(step) != len(stop): + raise ValueError( + "``start``, ``stop``, and ``step`` must have the same length.") + + self._start = start + self._stop = stop + self._step = step + + # Create the range objects here so that in case of invalid parameters + # ndrange fails early + self._ranges = tuple(range(*r) for r in zip(start, stop, step)) + + @property + def start(self): + """ + The value of the start parameter (or ``(0,) * len(stop)`` if the + parameter was not supplied) + """ + return self._start + + @property + def stop(self): + """ + The provided ``stop`` parameter as a tuple. + """ + return self._stop + + @property + def step(self): + """ + Tuple containing the ``step`` parameter along each axis. + """ + return self._step + + def count(self, x): + """ + Return number of occurrences of the provided tuple. + """ + return 1 if x in self else 0 + + def index(self, x, start=None, stop=None): + """ + Return the index of the provided tuple. + + raises ``ValueError`` if the index is not found. + + Parameters + ---------- + x: tuple of ints, or int + Value for which to look for in the ndrange object. + + Returns + ------- + index: tuple of ints + Index of the value x. + + """ + # Note: It was empiracally found that indexing range objects does not + # support the additional ``start`` and ``stop`` parameters even in + # Python 3.7.0. + # A relevant bug report is found https://bugs.python.org/issue34848 + if not isinstance(x, tuple): + x = (x,) + + if len(x) != len(self._ranges): + raise ValueError("{} is not in numpy.range".format(x)) + + try: + return tuple(r.index(i) for r, i in zip(self._ranges, x)) + except ValueError: + raise ValueError("{} is not in numpy.range".format(x)) + + @property + def shape(self): + return tuple(len(r) for r in self._ranges) + + @property + def ndim(self): + return len(self._ranges) + + def __iter__(self): + return itertools.product(*(iter(r) for r in self._ranges)) + + def __reversed__(self): + return itertools.product(*(reversed(r) for r in self._ranges)) + + def __contains__(self, index): + # Should we accept lists/arrays in the containment operation? + if not isinstance(index, tuple): + index = (index,) + + if len(index) != len(self._ranges): + return False + + return all(i in r for i, r in zip(index, self._ranges)) + + def __getitem__(self, sl): + if not isinstance(sl, tuple): + sl = (sl,) + + if len(sl) > len(self._ranges): + raise IndexError('too many indices for numpy.ndrange') + + try: + sl = sl + (slice(None),) * (len(self._ranges) - len(sl)) + except TypeError: + raise TypeError('numpy.ndrange indices must be tuple containing a ' + 'mixture of slices or integers.') + + new_ranges = tuple(r[s] for r, s in zip(self._ranges, sl)) + # If they are all singletons, then we need to return an index. + if all(not isinstance(r, range) for r in new_ranges): + return new_ranges + # If any of them are ranges, we need to cast all the other + # ones to ranges as well + start, stop, step = zip( + *( + (r.start, r.stop, r.step) + if isinstance(r, range) else (r, r+1, 1) + for r in new_ranges + ) + ) + + return ndrange(start, stop, step) + + def __len__(self): + length = 1 + for r in self._ranges: + length = length * len(r) + return length + + def __eq__(self, other): + # If they don't return the same length tuples, then it is always False + if len(self._ranges) != len(other._ranges): + return False + + return all(s == o for s, o in zip(self._ranges, other._ranges)) + + def __repr__(self): + s = "numpy.ndrange({start}, {stop}".format(start=self._start, + stop=self._stop) + if any(step != 1 for step in self._step): + s += ", {step}".format(step=self._step) + s += ")" + return s + + def __hash__(self): + return hash(tuple(hash(r) for r in self._ranges)) + + def __array__(self, dtype=None): + if dtype is not None: + raise ValueError('numpy.ndrange dtype must not be specified.') + + # The simple implementation of + # return asarray(list(self), dtype='object').reshape(self.shape) + # doesn't work as it will coerce tuples of length 1 to become + # singletons. This is not what I think is expected. + + # How do I assert the dtype is object? + arr = _nx.empty(self.shape, dtype='object') + for i, value in zip(ndrange(self.shape), self): + arr[i] = value + return arr + + @property + def flat(self): + # Does flat have to return a flatiter? + # Will flatiter even support lazy arrays like this one? + return iter(self) + + def ravel(self, order='C'): + # Is there a special class I need to return? + # Once again, will that special class require the numpy array to be + # dense? + if order in 'CAK': + return iter(self) + elif order == 'F': + return (i[::-1] for i in itertools.product(*self._ranges[::-1])) + else: + raise ValueError('order must be one of "C", "F", "A", or "K"') + + @set_module('numpy') class ndindex: """ @@ -655,9 +1054,16 @@ class ndindex: The size of each dimension of the array can be passed as individual parameters or as the elements of a tuple. + Notes + ----- + This function continues to be supported for backward compatibility, but you + should prefer ``ndrange``. Unlike ``ndindex``, ``ndrange`` is not an + iterator (but can return an iterator) and allows multi-dimensional slicing + for more flexible iteration. + See Also -------- - ndenumerate, flatiter + ndenumerate, flatiter, ndrange Examples -------- diff --git a/numpy/lib/tests/test_index_tricks.py b/numpy/lib/tests/test_index_tricks.py index fe1cfce2eaf8..4ab540648ffc 100644 --- a/numpy/lib/tests/test_index_tricks.py +++ b/numpy/lib/tests/test_index_tricks.py @@ -7,9 +7,13 @@ ) from numpy.lib._index_tricks_impl import ( mgrid, ogrid, ndenumerate, fill_diagonal, diag_indices, diag_indices_from, - index_exp, ndindex, c_, r_, s_, ix_ + index_exp, ndindex, ndrange, c_, r_, s_, ix_ ) +import collections.abc +import pytest +from random import shuffle + class TestRavelUnravelIndex: def test_basic(self): @@ -551,3 +555,228 @@ def test_ndindex(): # Make sure 0-sized ndindex works correctly x = list(ndindex(*[0])) assert_equal(x, []) + + +def ndrange_tester_helper(expected, not_expected, + start=None, stop=None, step=None, + slices=None): + + kwargs = {} + if start is not None: + kwargs['start'] = start + if stop is not None: + kwargs['stop'] = stop + if step is not None: + kwargs['step'] = step + + r = ndrange(**kwargs) + if isinstance(stop, collections.abc.Iterable): + assert r.ndim == len(stop) + else: + assert r.ndim == 1 + + if slices is not None: + r = r[slices] + + assert_equal(len(expected), len(r)) + + assert stop not in r + assert isinstance(r, collections.abc.Sequence) + + # copy the list + shuffled = list(expected) + shuffle(shuffled) + # Make sure we can assert identity in a random order. + # This is important since if `__contains__` traverses the iterator, + # Then it won't find an element when probed repeatedly. + for e in shuffled: + assert e in r + assert r.count(e) == 1 + + for e in not_expected: + assert e not in r + assert r.count(e) == 0 + assert_raises(ValueError, r.index, e) + + arr = np.asarray(r) + assert_equal(arr.shape, r.shape) + for i, e in enumerate(expected): + indx = np.unravel_index(i, r.shape) + assert_equal(expected[i], r[indx]) + assert_equal(r.index(e), indx) + assert_equal(arr[indx], r[indx]) + + x = list(iter(r)) + assert_array_equal(x, expected) + x_reversed = list(reversed(r)) + e_reversed = expected[::-1] + assert_array_equal(x_reversed, e_reversed) + + for arr_item, list_item in zip(r.flat, x): + assert arr_item == list_item + + for indx, value in enumerate(r.ravel()): + assert expected[indx] == value + + for order in 'CAK': + for indx, value in enumerate(r.ravel(order)): + assert expected[indx] == value + + r_f_order = (i[::-1] for i in ndrange(start=r.start[::-1], + stop=r.stop[::-1], + step=r.step[::-1])) + for raveled, other in zip(r.ravel('F'), r_f_order): + assert raveled == other + + +def test_ndrange(): + expected = [(r,) for r in range(5)] + not_expected = [(-1,), (7,), (1, 2)] + ndrange_tester_helper(expected, not_expected, stop=5) + # 1D + expected = [(0,), (2,)] + ndrange_tester_helper( + expected, not_expected, stop=5, slices=slice(0, 4, 2)) + + # 2D + expected = [(1, 0), (1, 3), (1, 6), + (3, 0), (3, 3), (3, 6)] + ndrange_tester_helper(expected, not_expected, + stop=(4, 9), slices=np.s_[1::2, ::3]) + ndrange_tester_helper(expected, not_expected, + stop=(4, 9), slices=np.s_[1::2, ::3]) + + expected = [(0, 0), (0, 1)] + not_expected = [(-1, 0), (3, 1), (0, 3)] + ndrange_tester_helper(expected, not_expected, + stop=(1, 2)) + ndrange_tester_helper(expected, not_expected, + stop=(1, 2), start=(0, 0)) + + expected = [(0, 0), (1, 0)] + ndrange_tester_helper(expected, not_expected, stop=[2, 1]) + ndrange_tester_helper(expected, not_expected, stop=(2, 1), start=(0, 0)) + + expected = [] + ndrange_tester_helper(expected, not_expected, stop=(0, 0), start=(1, 2)) + ndrange_tester_helper(expected, not_expected, stop=[5, 0], start=(1, 2)) + expected = [(0, 1), (0, 3), + (3, 1), (3, 3)] + not_expected = [(-1, 0), (3, 2), (0, 4)] + ndrange_tester_helper(expected, not_expected, + start=(0, 1), stop=(4, 5), step=(3, 2)) + + +def test_ndrange_failers(): + assert_raises(ValueError, ndrange, (1,), (1, 2)) + assert_raises(ValueError, ndrange, (0, 0), (1, 2), (0,)) + assert_raises(ValueError, ndrange, (0, 0), (1,), (1, 2)) + assert_raises(ValueError, ndrange, (0,), (1, 2), (1, 2)) + + assert_raises(ValueError, ndrange, (0, 0), (1, 2), 0) + assert_raises(ValueError, ndrange, (0, 0), 1, (1, 2)) + assert_raises(ValueError, ndrange, 0, (1, 2), (1, 2)) + + assert_raises(ValueError, ndrange, (0, 0), (1, 2), (0, 1, 2)) + assert_raises(ValueError, ndrange, (0, 0), (1, 2, 3), (1, 2)) + assert_raises(ValueError, ndrange, (0, 1, 2), (1, 2), (1, 2)) + + assert_raises(TypeError, ndrange, (.0, 1, 2), (0, 1, 2), (1, 1, 2)) + + assert_raises(TypeError, ndrange, + (np.float32(0.1), 1, 2), (0, 1, 2), (1, 1, 2)) + assert_raises(TypeError, ndrange, + (np.float64(0.1), 1, 2), (0, 1, 2), (1, 1, 2)) + assert_raises(TypeError, ndrange, (0, 1, 2), (0, .1, 2), (1, 1, 2)) + assert_raises(TypeError, ndrange, (0, 1, 2), (0, 1, 2), (1, .1, 2)) + + assert_raises(ValueError, ndrange, (0, 1, 2), (0, 1, 2), (0, 1, 1)) + assert_raises(ValueError, ndrange, 0, 1, 0) + + assert_raises(ValueError, ndrange((4, 2)).index, (4, 0)) + assert_raises(ValueError, ndrange((4, 2)).index, (0, 2)) + assert_raises(ValueError, ndrange((4, 2)).index, (0)) + assert_raises(ValueError, ndrange((4, 2)).index, (-1, 0)) + + assert_raises(IndexError, ndrange((4, 2)).__getitem__, (0, 0, 0)) + + assert_raises(ValueError, ndrange) + assert_raises(ValueError, ndrange, None) + + # Step can only be provided with both start and stop + assert_raises(ValueError, ndrange, 2, step=1) + assert_raises(ValueError, ndrange, stop=2, step=1) + assert_raises(ValueError, ndrange, start=1, stop=None, step=1) + assert_raises(ValueError, ndrange, start=None, stop=1, step=1) + + +def test_ndrange_properties(): + stop = (1, 2, 3) + r = ndrange(stop) + assert_equal(r.stop, stop) + assert_equal(r.start, (0,) * 3) + assert_equal(r.step, (1,) * 3) + + start = (0, 1, 2) + r = ndrange(start, stop) + assert_equal(r.start, start) + assert_equal(r.stop, stop) + assert_equal(r.step, (1,) * 3) + + step = (-1, 2, 2) + r = ndrange(start, stop, step) + assert_equal(r.start, start) + assert_equal(r.stop, stop) + assert_equal(r.step, step) + + +def test_ndrange_equality(): + r1 = np.ndrange((0, 0), (3, 3), (2, 2)) + r2 = np.ndrange([0, 0], [4, 4], [2, 2]) + r3 = np.ndrange([0, 0], [4, 4], [1, 2]) + assert_equal(r1, r2) + assert r1 != r3 + assert_equal(hash(r1), hash(r2)) + assert hash(r1) != hash(r3) + + +def test_ndrange_negative(): + expected = [(r,) for r in range(4, -1, -1)] + not_expected = [(-1,), (7,), (1, 2)] + ndrange_tester_helper(expected, not_expected, start=4, stop=-1, step=-1) + # 1D + expected = [(2,), (0,)] + ndrange_tester_helper( + expected, not_expected, stop=5, slices=slice(2, None, -2)) + + # 2D + expected = [(1, -3), (1, 0), (1, 3), (1, 6), + (3, -3), (3, 0), (3, 3), (3, 6)] + ndrange_tester_helper(expected, not_expected, + start=(np.uint8(0), np.int16(-3)), + stop=(4, np.uint16(9)), + slices=np.s_[1::2, ::3]) + + expected = [(0, 1), (0, 0)] + not_expected = [(-1, 0), (3, 1), (0, 3)] + ndrange_tester_helper(expected, not_expected, + start=(0, 1), stop=(np.int64(-1), -1), step=(-1, -1)) + + expected = [(0, 0), (-1, 0)] + not_expected = [(0, 10), (3, -1), (0, 3)] + ndrange_tester_helper(expected, not_expected, + start=(0, 0), stop=[-2, -1], step=(-1, -1)) + + expected = [] + ndrange_tester_helper(expected, not_expected, stop=(-1, 0), start=(0, 2)) + ndrange_tester_helper(expected, not_expected, stop=[5, -1], start=(-1, 2)) + + +@pytest.mark.parametrize('s_func', [str, repr]) +def test_ndrange_repr(s_func): + assert s_func(np.ndrange(5)) == 'numpy.ndrange((0,), (5,))' + assert s_func(np.ndrange(0, 5, 2)) == 'numpy.ndrange((0,), (5,), (2,))' + assert s_func(np.ndrange([0, 0], (1, 5), (1, 2))) == \ + 'numpy.ndrange((0, 0), (1, 5), (1, 2))' + assert s_func(np.ndrange((0, 0), [1, 5], (1, 1))) == \ + 'numpy.ndrange((0, 0), (1, 5))'