-
-
Notifications
You must be signed in to change notification settings - Fork 11.1k
ENH: Add index_tricks.as_index_tuple #8276
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
9181402
be959e8
69ed623
431c339
5afeb16
a86cc8c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
Approaches #8275
- Loading branch information
There are no files selected for viewing
8000
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -511,6 +511,98 @@ def test_indexing_array_negative_strides(self): | |
arr[slices] = 10 | ||
assert_array_equal(arr, 10.) | ||
|
||
def test_as_index_tuple(self): | ||
from numpy.core.multiarray import as_index_tuple | ||
|
||
arr = np.array([1]) | ||
sl = np.s_[:] | ||
ell = Ellipsis | ||
obj = object() | ||
|
||
# scalars are wrapped in a 1-tuple | ||
assert_equal(as_index_tuple(1), (1,)) | ||
assert_equal(as_index_tuple(ell), (ell,)) | ||
assert_equal(as_index_tuple(None), (None,)) | ||
assert_equal(as_index_tuple(sl), (sl,)) | ||
assert_equal(as_index_tuple(arr), (arr,)) | ||
assert_equal(as_index_tuple(obj), (obj,)) | ||
|
||
# tuples are untouched | ||
assert_equal(as_index_tuple((1, 2, 3)), (1, 2, 3)) | ||
assert_equal(as_index_tuple((1, 2, ell)), (1, 2, ell)) | ||
assert_equal(as_index_tuple((1, 2, None)), (1, 2, None)) | ||
assert_equal(as_index_tuple((1, 2, sl)), (1, 2, sl)) | ||
assert_equal(as_index_tuple((1, 2, arr)), (1, 2, arr)) | ||
|
||
# sequences of scalars are wrapped | ||
assert_equal(as_index_tuple([1, 2, 3]), ([1, 2, 3],)) | ||
|
||
# sequences containing slice objects or ellipses are tuple-ified | ||
assert_equal(as_index_tuple([1, 2, ell]), (1, 2, ell)) | ||
assert_equal(as_index_tuple([1, 2, None]), (1, 2, None)) | ||
assert_equal(as_index_tuple([1, 2, sl]), (1, 2, sl)) | ||
assert_equal(as_index_tuple([1, 2, arr]), (1, 2, arr)) | ||
|
||
# unless they are >= np.MAXDIMS, in which case they are always wrapped | ||
nd = np.MAXDIMS | ||
assert_equal(as_index_tuple(nd * [1]), (nd * [1],)) | ||
assert_equal(as_index_tuple(nd * [ell]), (nd * [ell],)) | ||
assert_equal(as_index_tuple(nd * [None]), (nd * [None],)) | ||
assert_equal(as_index_tuple(nd * [sl]), (nd * [sl],)) | ||
assert_equal(as_index_tuple(nd * [arr]), (nd * [arr],)) | ||
|
||
def test_as_index_tuple_broken_getitem(self): | ||
from numpy.core.multiarray import as_index_tuple | ||
|
||
# test sequences with a broken __getitem__ | ||
def make_broken_sequence(base, items): | ||
class Broken(base): | ||
def __len__(self): | ||
return len(items) | ||
def __getitem__(self, i): | ||
val = items[i] | ||
if isinstance(val, Exception): | ||
raise val | ||
return val | ||
return Broken() | ||
|
||
# error comes first, so just treat as a scalar | ||
idx = make_broken_sequence(object, [1, ValueError(), None]) | ||
assert_raises(ValueError, operator.getitem, idx, 1) | ||
assert_equal(as_index_tuple(idx), (idx,)) | ||
|
||
# none comes first, so commit to making the tuple | ||
idx = make_broken_sequence(object, [1, None, ValueError()]) | ||
assert_raises(ValueError, operator.getitem, idx, 2) | ||
assert_raises(ValueError, as_index_tuple, idx) | ||
|
||
# tuples subclasses error in both cases | ||
idx = make_broken_sequence(tuple, [1, ValueError(), None]) | ||
assert_raises(ValueError, operator.getitem, idx, 1) | ||
assert_raises(ValueError, as_index_tuple, idx) | ||
|
||
idx = make_broken_sequence(tuple, [1, None, ValueError()]) | ||
assert_raises(ValueError, operator.getitem, idx, 2) | ||
assert_raises(ValueError, as_index_tuple, idx) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This and the test above it is a behaviour change - previously, This is because Do we want to keep the old behaviour, or is it a bug? |
||
|
||
def test_as_index_tuple_broken_len(self): | ||
from numpy.core.multiarray import as_index_tuple | ||
|
||
def make_badlen_sequence(base): | ||
class cls(base): | ||
def __len__(self): raise ValueError | ||
def __getitem__(self, i): raise IndexError | ||
return cls() | ||
|
||
idx = make_badlen_sequence(object) | ||
assert_raises(ValueError, len, idx) | ||
assert_equal(as_index_tuple(idx), (idx,)) | ||
|
||
idx = make_badlen_sequence(tuple) | ||
assert_raises(ValueError, len, idx) | ||
assert_raises(ValueError, as_index_tuple, idx) | ||
|
||
|
||
class TestFieldIndexing(TestCase): | ||
def test_scalar_return_type(self): | ||
# Field access on an array should return an array, even if it | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This makes sense, but it doesn't really explain what it does do. There are lots of types of index normalization beyond ensuring you always get a tuple. It would be valuable to describe the precise type of the return value (e.g., can the result include Ellipsis? or lists? or arrays with boolean dtype?).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Stephan, you are a subclasser ;). Would having more of the normalization/information available be worth it? I am also a bit thinking again about the new indexing attributes idea in combination with subclasses.
Right now, this does not really change the index much at all, but I mean we could give all information that numpy has after its index preperation step without much hassle. Like that indexer object which is like
Dunno, you probably saw this suggestion back in the day, but....
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The question is, is there any intermediate result of
prepare_index
that might in some situations be more useful than the final result? Right now, all I can think of is allowing non-integernp.array
s. Any one able to come up with something else?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess non-int arrays are somewhat thinkable since someone might do something like (texture) interpolation based on floating indices. Other then that, I have my doubts ;). The thing is, you can only go that far and actually maintain numpy compatibility.... It would otherwise quickly become some other kind of indexing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel the only reason you'd want this feature in the first place is to implement some custom form of indexing that diverges from a numpy index. In the past, I've considered:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To be clear, I only use duck-array types, no subclasses!
I don't know if this would be directly useful (maybe?) -- it's hard to say without a clear description of exactly what it does! In practice I would guess that most folks who write customized indexing code that works mostly but not exactly like NumPy will roll their own normalization (like I did in xarray).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@shoyer actually, my biggest reason to think about it was making masked array indexing subclassing with oindex/vindex, etc. not a huge pile of odd python code (instead put the pile of odd code to C to make it better reusable ;)).
I understand that the casting part (or replacement of bool arrays, not allowing some odder objects) might not be ideal, but on the other hand, information like "numpy would make the result a copy, a scalar or a view" or expanding the ellipsis might be very nice. The tuple normalization is a rather small thing (it basically checks for a non-array sequence of length <= np.MAXDIMS, and if it contains another sequence, slice or None consider it a tuple).
For classes which do basically the same as numpy, it would seem more useful to actually provide some more information, for other classes I am unsure that providing this hack is actually much useful. Which basically just asks the question whether it is worth putting it in numpy instead of just stealing the normalization code from xarray ;).