8000 MAINT: Overhaul function to try and increase speed · numpy/numpy@5afeb16 · GitHub
[go: up one dir, main page]

Skip to content

Commit 5afeb16

Browse files
committed
MAINT: Overhaul function to try and increase speed
This eliminates all tuple constructors, in favor of working with a C array
1 parent 431c339 commit 5afeb16

File tree

1 file changed

+113
-76
lines changed

1 file changed

+113
-76
lines changed

numpy/core/src/multiarray/mapping.c

Lines changed: 113 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,10 @@ PyArray_MapIterSwapAxes(PyArrayMapIterObject *mit, PyArrayObject **ret, int getm
139139
*ret = (PyArrayObject *)new;
140140
}
141141

142-
143142
/**
144-
* Prepare an index argument into a tuple
143+
* Prepare an index argument into a c-array of indices.
144+
*
145+
* Returns the number of indices, or -1 on failure
145146
*
146147
* This mainly implements the following section from the advanced indexing docs:
147148
* > In order to remain backward compatible with a common usage in Numeric,
@@ -150,100 +151,137 @@ PyArray_MapIterSwapAxes(PyArrayMapIterObject *mit, PyArrayObject **ret, int getm
150151
* > or the newaxis object, but not for integer arrays or other embedded
151152
* > sequences.
152153
*
153-
* This also promotes scalars to 1-tuples, and downcasts tuple subclasses
154+
* This promotes scalars to 1-tuples.
154155
*
155156
* @param the index object, which may or may not be a tuple
156157
*
157-
* @returns the index converted to a tuple, if possible, else NULL on an error
158-
* It is the caller's responsibility to call Py_DECREF on a non-null
159-
* result, even if it is the same as the input.
158+
* @returns the index converted to null-terminated pyobject array. The caller
159+
* must call PyArray_Free
160160
*/
161-
NPY_NO_EXPORT PyObject *
162-
prepare_index_tuple(PyObject *index)
161+
NPY_NO_EXPORT npy_intp
162+
prepare_index_tuple(PyObject *index, PyObject **result)
163163
{
164-
int i;
165-
npy_intp n;
166-
npy_bool make_tuple = 0;
164+
npy_intp n, i;
165+
npy_bool commit_to_unpack;
167166

168-
PyObject *index_as_tuple = index;
167+
/* fast route for passing a tuple */
168+
if (PyTuple_CheckExact(index)) {
169+
n = PyTuple_GET_SIZE(index);
170+
if (n > NPY_MAXDIMS * 2) {
171+
PyErr_SetString(PyExc_IndexError,
172+
"too many indices for array");
173+
return -1;
174+
}
175+
for (i = 0; i < n; i++) {
176+
result[i] = PyTuple_GET_ITEM(index, i);
177+
}
178+
return n;
179+
}
169180

170-
if (!PyTuple_CheckExact(index)
171-
/* Next three are just to avoid slow checks */
181+
/* Obvious single-entry cases */
182+
if (0
172183
#if !defined(NPY_PY3K)
173-
&& (!PyInt_CheckExact(index))
184+
|| PyInt_CheckExact(index)
174185
#else
175-
&& (!PyLong_CheckExact(index))
186+
|| PyLong_CheckExact(index)
176187
#endif
177-
&& (index != Py_None)
178-
&& (!PySlice_Check(index))
179-
&& (!PyArray_Check(index))
180-
&& (PySequence_Check(index))) {
181-
/*
182-
* Sequences < NPY_MAXDIMS with any slice objects
183-
* or newaxis, Ellipsis or other arrays or sequences
184-
* embedded, are considered equivalent to an indexing
185-
* tuple. (`a[[[1,2], [3,4]]] == a[[1,2], [3,4]]`)
186-
*/
188+
|| index == Py_None
189+
|| PySlice_Check(index)
190+
|| PyArray_Check(index)
191+
|| !PySequence_Check(index)) {
187192

188-
if (PyTuple_Check(index)) {
189-
/* If it is already a tuple, make it an exact tuple anyway */
190-
n = 0;
191-
make_tuple = 1;
192-
}
193-
else {
194-
n = PySequence_Size(index);
193+
result[0] = index;
194+
return 1;
195+
}
196+
197+
/* passing a tuple subclass - needs to handle errors */
198+
if (PyTuple_Check(index)) {
199+
n = PySequence_Size(index);
200+
if (n < 0) {
201+
return -1;
195202
}
196-
if (n < 0 || n >= NPY_MAXDIMS) {
197-
n = 0;
203+
if (n > NPY_MAXDIMS * 2) {
204+
PyErr_SetString(PyExc_IndexError,
205+
"too many indices for array");
206+
return -1;
198207
}
199208
for (i = 0; i < n; i++) {
200-
PyObject *tmp_obj = PySequence_GetItem(index, i);
201-
/* if getitem fails (unusual) treat this as a single index */
209+
result[i] = PySequence_GetItem(index, i);
210+
if (result[i] == NULL) {
211+
return -1;
212+
F438 }
213+
}
214+
return n;
215+
}
216+
217+
/* At this point, we're left with a non-tuple, non-array, sequence:
218+
* typically, a list
219+
*
220+
* Sequences < NPY_MAXDIMS with any slice objects
221+
* or newaxis, Ellipsis or other arrays or sequences
222+
* embedded, are considered equivalent to an indexing
223+
* tuple. (`a[[[1,2], [3,4]]] == a[[1,2], [3,4]]`)
224+
*/
225+
226+
/* if len fails, treat like a scalar */
227+
n = PySequence_Size(index);
228+
if (n < 0) {
229+
PyErr_Clear();
230+
result[0] = index;
231+
return 1;
232+
}
233+
234+
/* for some reason, anything that's long but not too long is turned into
235+
* a single index. The *2 is missing here for backward-compatibility. */
236+
if (n >= NPY_MAXDIMS) {
237+
result[0] = index;
238+
return 1;
239+
}
240+
241+
/* Some other type of short sequence - assume we should unpack it like a
242+
* tuple, until we find something that proves us wrong */
243+
commit_to_unpack = 0;
244+
for (i = 0; i < n; i++) {
245+
PyObject *tmp_obj = result[i] = PySequence_GetItem(index, i);
246+
247+
if (commit_to_unpack) {
248+
/* propagate errors */
249+
if (tmp_obj == NULL) {
250+
return -1;
251+
}
252+
}
253+
else {
254+
/* if getitem fails (unusual) before we've committed, then
255+
* commit to not unpacking */
202256
if (tmp_obj == NULL) {
203257
PyErr_Clear();
204-
make_tuple = 0;
205258
break;
206259
}
207-
if (PyArray_Check(tmp_obj) || PySequence_Check(tmp_obj)
208-
|| PySlice_Check(tmp_obj) || tmp_obj == Py_Ellipsis
260+
261+
/* decide if we should treat this sequence like a tuple */
262+
if (PyArray_Check(tmp_obj)
263+
|| PySequence_Check(tmp_obj)
264+
|| PySlice_Check(tmp_obj)
265+
|| tmp_obj == Py_Ellipsis
209266
|| tmp_obj == Py_None) {
210-
make_tuple = 1;
211-
Py_DECREF(tmp_obj);
212-
break;
267+
commit_to_unpack = 1;
213268
}
214-
Py_DECREF(tmp_obj);
215269
}
216270

217-
if (make_tuple) {
218-
/* We want to interpret it as a tuple, so make it one */
219-
index_as_tuple = PySequence_Tuple(index);
220-
if (index_as_tuple == NULL) {
221-
return NULL;
222-
}
223-
}
271+
Py_DECREF(tmp_obj);
224272
}
225273

226-
/* If the index is not a tuple, convert it into (index,) */
227-
if (!make_tuple && !PyTuple_CheckExact(index)) {
228-
make_tuple = 1;
229-
index_as_tuple = PyTuple_Pack(1, index);
230-
}
231-
/* Otherwise, check if the tuple is too long */
232-
else if (PyTuple_GET_SIZE(index_as_tuple) > NPY_MAXDIMS * 2) {
233-
PyErr_SetString(PyExc_IndexError,
234-
"too many indices for array");
235-
if (make_tuple) {
236-
Py_DECREF(index_as_tuple);
237-
}
238-
return NULL;
274+
/* unpacking was the right thing to do, and we already did it */
275+
if (commit_to_unpack) {
276+
return n;
239277
}
240278

241-
/* if we didn't make a tuple, then we're creating another reference */
242-
if (!make_tuple) {
243-
Py_INCREF(index);
279+
/* got to the end, never found an indication that we should have unpacked */
280+
else {
281+
/* we already filled result, but it doesn't matter */
282+
result[0] = index;
283+
return 1;
244284
}
245-
246-
return index_as_tuple;
247285
}
248286

249287
/**
@@ -286,13 +324,13 @@ prepare_index(PyArrayObject *self, PyObject *index,
286324
int index_type = 0;
287325
int ellipsis_pos = -1;
288326

289-
index = prepare_index_tuple(index);
290-
if (index == NULL) {
327+
PyObject *raw_indices[NPY_MAXDIMS*2];
328+
329+
index_ndim = prepare_index_tuple(index, raw_indices);
330+
if (index_ndim == -1) {
291331
return -1;
292332
}
293333

294-
index_ndim = (int) PyTuple_GET_SIZE(index);
295-
296334
/*
297335
* Parse all indices into the `indices` array of index_info structs
298336
*/
@@ -308,7 +346,8 @@ prepare_index(PyArrayObject *self, PyObject *index,
308346
"too many indices for array");
309347
goto failed_building_indices;
310348
}
311-
obj = PyTuple_GET_ITEM(index, get_idx++);
349+
350+
obj = raw_indices[get_idx++];
312351

313352
/**** Try the cascade of possible indices ****/
314353

@@ -712,15 +751,13 @@ prepare_index(PyArrayObject *self, PyObject *index,
712751
*ndim = new_ndim + fancy_ndim;
713752
*out_fancy_ndim = fancy_ndim;
714753

715-
Py_DECREF(index);
716754

717755
return index_type;
718756

719757
failed_building_indices:
720758
for (i=0; i < curr_idx; i++) {
721759
Py_XDECREF(indices[i].object);
722760
}
723-
Py_DECREF(index);
724761
return -1;
725762
}
726763

0 commit comments

Comments
 (0)
0