From 87e1294dd2c40fb8899e4b3e117320d148cb9cc5 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Mon, 14 Nov 2016 23:16:11 +0000
Subject: [PATCH 01/11] MAINT: Make the refactor suggested in prepare_index

---
 numpy/core/src/multiarray/mapping.c | 148 ++++++++++++++++------------
 1 file changed, 86 insertions(+), 62 deletions(-)

diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 302b0ca1ca8e..987649f85263 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -141,52 +141,32 @@ PyArray_MapIterSwapAxes(PyArrayMapIterObject *mit, PyArrayObject **ret, int getm
 
 
 /**
- * Prepare an npy_index_object from the python slicing object.
+ * Prepare an index argument into a tuple
  *
- * This function handles all index preparations with the exception
- * of field access. It fills the array of index_info structs correctly.
- * It already handles the boolean array special case for fancy indexing,
- * i.e. if the index type is boolean, it is exactly one matching boolean
- * array. If the index type is fancy, the boolean array is already
- * converted to integer arrays. There is (as before) no checking of the
- * boolean dimension.
+ * This mainly implements the following section from the advanced indexing docs:
+ * > In order to remain backward compatible with a common usage in Numeric,
+ * > basic slicing is also initiated if the selection object is any non-ndarray
+ * > sequence (such as a list) containing slice objects, the Ellipsis object,
+ * > or the newaxis object, but not for integer arrays or other embedded
+ * > sequences.
  *
- * Checks everything but the bounds.
+ * This also promotes scalars to 1-tuples, and downcasts tuple subclasses
  *
- * @param the array being indexed
- * @param the index object
- * @param index info struct being filled (size of NPY_MAXDIMS * 2 + 1)
- * @param number of indices found
- * @param dimension of the indexing result
- * @param dimension of the fancy/advanced indices part
- * @param whether to allow the boolean special case
+ * @param the index object, which may or may not be a tuple
  *
- * @returns the index_type or -1 on failure and fills the number of indices.
+ * @returns the index converted to a tuple, if possible, else NULL on an error
+ *          It is the caller's responsibility to call Py_DECREF on a non-null
+ *          result, even if it is the same as the input.
  */
-NPY_NO_EXPORT int
-prepare_index(PyArrayObject *self, PyObject *index,
-              npy_index_info *indices,
-              int *num, int *ndim, int *out_fancy_ndim, int allow_boolean)
+NPY_NO_EXPORT PyObject *
+prepare_index_tuple(PyObject *index)
 {
-    int new_ndim, fancy_ndim, used_ndim, index_ndim;
-    int curr_idx, get_idx;
-
     int i;
     npy_intp n;
-
     npy_bool make_tuple = 0;
-    PyObject *obj = NULL;
-    PyArrayObject *arr;
 
-    int index_type = 0;
-    int ellipsis_pos = -1;
+    PyObject *index_as_tuple = index;
 
-    /*
-     * The index might be a multi-dimensional index, but not yet a tuple
-     * this makes it a tuple in that case.
-     *
-     * TODO: Refactor into its own function.
-     */
     if (!PyTuple_CheckExact(index)
             /* Next three are just to avoid slow checks */
 #if !defined(NPY_PY3K)
@@ -236,29 +216,86 @@ prepare_index(PyArrayObject *self, PyObject *index,
 
         if (make_tuple) {
             /* We want to interpret it as a tuple, so make it one */
-            index = PySequence_Tuple(index);
-            if (index == NULL) {
-                return -1;
+            index_as_tuple = PySequence_Tuple(index);
+            if (index_as_tuple == NULL) {
+                return NULL;
             }
         }
     }
 
-    /* If the index is not a tuple, handle it the same as (index,) */
-    if (!PyTuple_CheckExact(index)) {
-        obj = index;
-        index_ndim = 1;
+    /* If the index is not a tuple, conver it into (index,) */
+    if (!make_tuple && !PyTuple_CheckExact(index)) {
+        make_tuple = 1;
+        index_as_tuple = Py_BuildValue("(O)", index);
     }
+    /* Otherwise, check if the tuple is too long */
     else {
-        n = PyTuple_GET_SIZE(index);
+        n = PyTuple_GET_SIZE(index_as_tuple);
         if (n > NPY_MAXDIMS * 2) {
             PyErr_SetString(PyExc_IndexError,
                             "too many indices for array");
-            goto fail;
+            if (make_tuple) {
+                Py_DECREF(index_as_tuple);
+            }
+            return NULL;
         }
-        index_ndim = (int)n;
-        obj = NULL;
     }
 
+    /* if we didn't make a tuple, then we're creating another reference */
+    if (!make_tuple) {
+        Py_INCREF(index);
+    }
+
+    return index_as_tuple;
+}
+
+/**
+ * Prepare an npy_index_object from the python slicing object.
+ *
+ * This function handles all index preparations with the exception
+ * of field access. It fills the array of index_info structs correctly.
+ * It already handles the boolean array special case for fancy indexing,
+ * i.e. if the index type is boolean, it is exactly one matching boolean
+ * array. If the index type is fancy, the boolean array is already
+ * converted to integer arrays. There is (as before) no checking of the
+ * boolean dimension.
+ *
+ * Checks everything but the bounds.
+ *
+ * @param the array being indexed
+ * @param the index object
+ * @param index info struct being filled (size of NPY_MAXDIMS * 2 + 1)
+ * @param number of indices found
+ * @param dimension of the indexing result
+ * @param dimension of the fancy/advanced indices part
+ * @param whether to allow the boolean special case
+ *
+ * @returns the index_type or -1 on failure and fills the number of indices.
+ */
+NPY_NO_EXPORT int
+prepare_index(PyArrayObject *self, PyObject *index,
+              npy_index_info *indices,
+              int *num, int *ndim, int *out_fancy_ndim, int allow_boolean)
+{
+    int new_ndim, fancy_ndim, used_ndim, index_ndim;
+    int curr_idx, get_idx;
+
+    int i;
+    npy_intp n;
+
+    PyObject *obj = NULL;
+    PyArrayObject *arr;
+
+    int index_type = 0;
+    int ellipsis_pos = -1;
+
+    index = prepare_index_tuple(index);
+    if (index == NULL) {
+        return -1;
+    }
+
+    index_ndim = (int) PyTuple_GET_SIZE(index);
+
     /*
      * Parse all indices into the `indices` array of index_info structs
      */
@@ -274,15 +311,7 @@ prepare_index(PyArrayObject *self, PyObject *index,
                             "too many indices for array");
             goto failed_building_indices;
         }
-
-        /* Check for single index. obj is already set then. */
-        if ((curr_idx != 0) || (obj == NULL)) {
-            obj = PyTuple_GET_ITEM(index, get_idx++);
-        }
-        else {
-            /* only one loop */
-            get_idx += 1;
-        }
+        obj = PyTuple_GET_ITEM(index, get_idx++);
 
         /**** Try the cascade of possible indices ****/
 
@@ -686,9 +715,7 @@ prepare_index(PyArrayObject *self, PyObject *index,
     *ndim = new_ndim + fancy_ndim;
     *out_fancy_ndim = fancy_ndim;
 
-    if (make_tuple) {
-        Py_DECREF(index);
-    }
+    Py_DECREF(index);
 
     return index_type;
 
@@ -696,10 +723,7 @@ prepare_index(PyArrayObject *self, PyObject *index,
     for (i=0; i < curr_idx; i++) {
         Py_XDECREF(indices[i].object);
     }
-  fail:
-    if (make_tuple) {
-        Py_DECREF(index);
-    }
+    Py_DECREF(index);
     return -1;
 }
 

From bfd70c23b57ab278566f3503d9d93f20b0512281 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Mon, 5 Dec 2016 14:07:19 +0000
Subject: [PATCH 02/11] MAINT: Fix typo, remove unnecessary nesting

---
 numpy/core/src/multiarray/mapping.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 987649f85263..a37766f64410 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -223,22 +223,19 @@ prepare_index_tuple(PyObject *index)
         }
     }
 
-    /* If the index is not a tuple, conver it into (index,) */
+    /* If the index is not a tuple, convert it into (index,) */
     if (!make_tuple && !PyTuple_CheckExact(index)) {
         make_tuple = 1;
         index_as_tuple = Py_BuildValue("(O)", index);
     }
     /* Otherwise, check if the tuple is too long */
-    else {
-        n = PyTuple_GET_SIZE(index_as_tuple);
-        if (n > NPY_MAXDIMS * 2) {
-            PyErr_SetString(PyExc_IndexError,
-                            "too many indices for array");
-            if (make_tuple) {
-                Py_DECREF(index_as_tuple);
-            }
-            return NULL;
+    else if (PyTuple_GET_SIZE(index_as_tuple) > NPY_MAXDIMS * 2) {
+        PyErr_SetString(PyExc_IndexError,
+                        "too many indices for array");
+        if (make_tuple) {
+            Py_DECREF(index_as_tuple);
         }
+        return NULL;
     }
 
     /* if we didn't make a tuple, then we're creating another reference */

From 37e6e1f6cebacbd3953070f271111caa7d68af8c Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Wed, 21 Dec 2016 11:57:32 +0000
Subject: [PATCH 03/11] MAINT: use PyTuple_Pack, which is probably faster than
 Py_BuildValue

---
 numpy/core/src/multiarray/mapping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index a37766f64410..c424879910db 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -226,7 +226,7 @@ prepare_index_tuple(PyObject *index)
     /* If the index is not a tuple, convert it into (index,) */
     if (!make_tuple && !PyTuple_CheckExact(index)) {
         make_tuple = 1;
-        index_as_tuple = Py_BuildValue("(O)", index);
+        index_as_tuple = PyTuple_Pack(1, index);
     }
     /* Otherwise, check if the tuple is too long */
     else if (PyTuple_GET_SIZE(index_as_tuple) > NPY_MAXDIMS * 2) {

From 3a0ad85ece152120a84283da573a1f2efea3d01b Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Fri, 7 Apr 2017 17:59:25 +0100
Subject: [PATCH 04/11] MAINT: Overhaul function to try and increase speed

This eliminates all tuple constructors, in favor of working with a C array
---
 numpy/core/src/multiarray/mapping.c | 201 +++++++++++++++++-----------
 1 file changed, 122 insertions(+), 79 deletions(-)

diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index c424879910db..8e6014e8335f 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -139,111 +139,155 @@ PyArray_MapIterSwapAxes(PyArrayMapIterObject *mit, PyArrayObject **ret, int getm
     *ret = (PyArrayObject *)new;
 }
 
-
 /**
- * Prepare an index argument into a tuple
+ * Turn an index argument into a c-array of `PyObject *`s, one for each index.
+ *
+ * When a scalar is passed, this is written directly to the buffer. When a
+ * tuple is passed, the tuple elements are unpacked into the buffer.
+ *
+ * When some other sequence is passed, this implements the following section
+ * from the advanced indexing docs to decide whether to unpack or just write
+ * one element:
  *
- * This mainly implements the following section from the advanced indexing docs:
  * > In order to remain backward compatible with a common usage in Numeric,
  * > basic slicing is also initiated if the selection object is any non-ndarray
  * > sequence (such as a list) containing slice objects, the Ellipsis object,
  * > or the newaxis object, but not for integer arrays or other embedded
  * > sequences.
  *
- * This also promotes scalars to 1-tuples, and downcasts tuple subclasses
- *
- * @param the index object, which may or may not be a tuple
+ * @param  index   The index object, which may or may not be a tuple. This is a
+ *                 borrowed reference.
+ * @param  result  An empty buffer of 2*NPY_MAXDIMS PyObject* to write each
+ *                 index component to. The references written are borrowed.
+ *                 This function will in some cases clobber the array beyond
+ *                 the last item unpacked.
  *
- * @returns the index converted to a tuple, if possible, else NULL on an error
- *          It is the caller's responsibility to call Py_DECREF on a non-null
- *          result, even if it is the same as the input.
+ * @returns        The number of items in `result`, or -1 if an error occured.
  */
-NPY_NO_EXPORT PyObject *
-prepare_index_tuple(PyObject *index)
+NPY_NO_EXPORT npy_intp
+unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
 {
-    int i;
-    npy_intp n;
-    npy_bool make_tuple = 0;
+    npy_intp n, i;
+    npy_bool commit_to_unpack;
 
-    PyObject *index_as_tuple = index;
+    /* fast route for passing a tuple */
+    if (PyTuple_CheckExact(index)) {
+        n = PyTuple_GET_SIZE(index);
+        if (n > NPY_MAXDIMS * 2) {
+            PyErr_SetString(PyExc_IndexError,
+                            "too many indices for array");
+            return -1;
+        }
+        for (i = 0; i < n; i++) {
+            result[i] = PyTuple_GET_ITEM(index, i);
+        }
+        return n;
+    }
 
-    if (!PyTuple_CheckExact(index)
-            /* Next three are just to avoid slow checks */
+    /* Obvious single-entry cases */
+    if (0
 #if !defined(NPY_PY3K)
-            && (!PyInt_CheckExact(index))
+            || PyInt_CheckExact(index)
 #else
-            && (!PyLong_CheckExact(index))
+            || PyLong_CheckExact(index)
 #endif
-            && (index != Py_None)
-            && (!PySlice_Check(index))
-            && (!PyArray_Check(index))
-            && (PySequence_Check(index))) {
-        /*
-         * Sequences < NPY_MAXDIMS with any slice objects
-         * or newaxis, Ellipsis or other arrays or sequences
-         * embedded, are considered equivalent to an indexing
-         * tuple. (`a[[[1,2], [3,4]]] == a[[1,2], [3,4]]`)
-         */
+            || index == Py_None
+            || PySlice_Check(index)
+            || PyArray_Check(index)
+            || !PySequence_Check(index)) {
 
-        if (PyTuple_Check(index)) {
-            /* If it is already a tuple, make it an exact tuple anyway */
-            n = 0;
-            make_tuple = 1;
-        }
-        else {
-            n = PySequence_Size(index);
+        result[0] = index;
+        return 1;
+    }
+
+    /* passing a tuple subclass - needs to handle errors */
+    if (PyTuple_Check(index)) {
+        n = PySequence_Size(index);
+        if (n < 0) {
+            return -1;
         }
-        if (n < 0 || n >= NPY_MAXDIMS) {
-            n = 0;
+        if (n > NPY_MAXDIMS * 2) {
+            PyErr_SetString(PyExc_IndexError,
+                            "too many indices for array");
+            return -1;
         }
         for (i = 0; i < n; i++) {
-            PyObject *tmp_obj = PySequence_GetItem(index, i);
-            /* if getitem fails (unusual) treat this as a single index */
+            result[i] = PySequence_GetItem(index, i);
+            if (result[i] == NULL) {
+                return -1;
+            }
+        }
+        return n;
+    }
+
+    /* At this point, we're left with a non-tuple, non-array, sequence:
+     * typically, a list
+     *
+     * Sequences < NPY_MAXDIMS with any slice objects
+     * or newaxis, Ellipsis or other arrays or sequences
+     * embedded, are considered equivalent to an indexing
+     * tuple. (`a[[[1,2], [3,4]]] == a[[1,2], [3,4]]`)
+     */
+
+    /* if len fails, treat like a scalar */
+    n = PySequence_Size(index);
+    if (n < 0) {
+        PyErr_Clear();
+        result[0] = index;
+        return 1;
+    }
+
+    /* for some reason, anything that's long but not too long is turned into
+     * a single index. The *2 is missing here for backward-compatibility. */
+    if (n >= NPY_MAXDIMS) {
+        result[0] = index;
+        return 1;
+    }
+
+    /* Some other type of short sequence - assume we should unpack it like a
+     * tuple, until we find something that proves us wrong */
+    commit_to_unpack = 0;
+    for (i = 0; i < n; i++) {
+        PyObject *tmp_obj = result[i] = PySequence_GetItem(index, i);
+
+        if (commit_to_unpack) {
+            /* propagate errors */
+            if (tmp_obj == NULL) {
+                return -1;
+            }
+        }
+        else {
+            /* if getitem fails (unusual) before we've committed, then
+             * commit to not unpacking */
             if (tmp_obj == NULL) {
                 PyErr_Clear();
-                make_tuple = 0;
                 break;
             }
-            if (PyArray_Check(tmp_obj) || PySequence_Check(tmp_obj)
-                    || PySlice_Check(tmp_obj) || tmp_obj == Py_Ellipsis
+
+            /* decide if we should treat this sequence like a tuple */
+            if (PyArray_Check(tmp_obj)
+                    || PySequence_Check(tmp_obj)
+                    || PySlice_Check(tmp_obj)
+                    || tmp_obj == Py_Ellipsis
                     || tmp_obj == Py_None) {
-                make_tuple = 1;
-                Py_DECREF(tmp_obj);
-                break;
+                commit_to_unpack = 1;
             }
-            Py_DECREF(tmp_obj);
         }
 
-        if (make_tuple) {
-            /* We want to interpret it as a tuple, so make it one */
-            index_as_tuple = PySequence_Tuple(index);
-            if (index_as_tuple == NULL) {
-                return NULL;
-            }
-        }
+        Py_DECREF(tmp_obj);
     }
 
-    /* If the index is not a tuple, convert it into (index,) */
-    if (!make_tuple && !PyTuple_CheckExact(index)) {
-        make_tuple = 1;
-        index_as_tuple = PyTuple_Pack(1, index);
-    }
-    /* Otherwise, check if the tuple is too long */
-    else if (PyTuple_GET_SIZE(index_as_tuple) > NPY_MAXDIMS * 2) {
-        PyErr_SetString(PyExc_IndexError,
-                        "too many indices for array");
-        if (make_tuple) {
-            Py_DECREF(index_as_tuple);
-        }
-        return NULL;
+    /* unpacking was the right thing to do, and we already did it */
+    if (commit_to_unpack) {
+        return n;
     }
 
-    /* if we didn't make a tuple, then we're creating another reference */
-    if (!make_tuple) {
-        Py_INCREF(index);
+    /* got to the end, never found an indication that we should have unpacked */
+    else {
+        /* we already filled result, but it doesn't matter */
+        result[0] = index;
+        return 1;
     }
-
-    return index_as_tuple;
 }
 
 /**
@@ -286,13 +330,13 @@ prepare_index(PyArrayObject *self, PyObject *index,
     int index_type = 0;
     int ellipsis_pos = -1;
 
-    index = prepare_index_tuple(index);
-    if (index == NULL) {
+    PyObject *raw_indices[NPY_MAXDIMS*2];
+
+    index_ndim = unpack_indices(index, raw_indices);
+    if (index_ndim == -1) {
         return -1;
     }
 
-    index_ndim = (int) PyTuple_GET_SIZE(index);
-
     /*
      * Parse all indices into the `indices` array of index_info structs
      */
@@ -308,7 +352,8 @@ prepare_index(PyArrayObject *self, PyObject *index,
                             "too many indices for array");
             goto failed_building_indices;
         }
-        obj = PyTuple_GET_ITEM(index, get_idx++);
+
+        obj = raw_indices[get_idx++];
 
         /**** Try the cascade of possible indices ****/
 
@@ -712,7 +757,6 @@ prepare_index(PyArrayObject *self, PyObject *index,
     *ndim = new_ndim + fancy_ndim;
     *out_fancy_ndim = fancy_ndim;
 
-    Py_DECREF(index);
 
     return index_type;
 
@@ -720,7 +764,6 @@ prepare_index(PyArrayObject *self, PyObject *index,
     for (i=0; i < curr_idx; i++) {
         Py_XDECREF(indices[i].object);
     }
-    Py_DECREF(index);
     return -1;
 }
 

From 8c4e5569ab62527d434e242d6bee57809d74323c Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sun, 9 Apr 2017 11:53:47 +0100
Subject: [PATCH 05/11] BUG: Fix reference counting

---
 numpy/core/src/multiarray/mapping.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 8e6014e8335f..4ab3647f1c33 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -139,6 +139,15 @@ PyArray_MapIterSwapAxes(PyArrayMapIterObject *mit, PyArrayObject **ret, int getm
     *ret = (PyArrayObject *)new;
 }
 
+NPY_NO_EXPORT NPY_INLINE void
+multi_DECREF(PyObject **objects, npy_intp n)
+{
+    npy_intp i;
+    for (i = 0; i < n; i++) {
+        Py_DECREF(objects[i]);
+    }
+}
+
 /**
  * Turn an index argument into a c-array of `PyObject *`s, one for each index.
  *
@@ -158,7 +167,7 @@ PyArray_MapIterSwapAxes(PyArrayMapIterObject *mit, PyArrayObject **ret, int getm
  * @param  index   The index object, which may or may not be a tuple. This is a
  *                 borrowed reference.
  * @param  result  An empty buffer of 2*NPY_MAXDIMS PyObject* to write each
- *                 index component to. The references written are borrowed.
+ *                 index component to. The references written are new..
  *                 This function will in some cases clobber the array beyond
  *                 the last item unpacked.
  *
@@ -180,6 +189,7 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
         }
         for (i = 0; i < n; i++) {
             result[i] = PyTuple_GET_ITEM(index, i);
+            Py_INCREF(result[i]);
         }
         return n;
     }
@@ -196,6 +206,7 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
             || PyArray_Check(index)
             || !PySequence_Check(index)) {
 
+        Py_INCREF(index);
         result[0] = index;
         return 1;
     }
@@ -214,8 +225,10 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
         for (i = 0; i < n; i++) {
             result[i] = PySequence_GetItem(index, i);
             if (result[i] == NULL) {
+                multi_DECREF(result, i);
                 return -1;
             }
+            Py_INCREF(result[i]);
         }
         return n;
     }
@@ -233,6 +246,7 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
     n = PySequence_Size(index);
     if (n < 0) {
         PyErr_Clear();
+        Py_INCREF(index);
         result[0] = index;
         return 1;
     }
@@ -240,6 +254,7 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
     /* for some reason, anything that's long but not too long is turned into
      * a single index. The *2 is missing here for backward-compatibility. */
     if (n >= NPY_MAXDIMS) {
+        Py_INCREF(index);
         result[0] = index;
         return 1;
     }
@@ -253,6 +268,7 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
         if (commit_to_unpack) {
             /* propagate errors */
             if (tmp_obj == NULL) {
+                multi_DECREF(result, i);
                 return -1;
             }
         }
@@ -260,6 +276,7 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
             /* if getitem fails (unusual) before we've committed, then
              * commit to not unpacking */
             if (tmp_obj == NULL) {
+                multi_DECREF(result, i);
                 PyErr_Clear();
                 break;
             }
@@ -273,18 +290,18 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
                 commit_to_unpack = 1;
             }
         }
-
-        Py_DECREF(tmp_obj);
     }
 
     /* unpacking was the right thing to do, and we already did it */
     if (commit_to_unpack) {
         return n;
     }
-
     /* got to the end, never found an indication that we should have unpacked */
     else {
-        /* we already filled result, but it doesn't matter */
+        /* we already filled result, so empty it first */
+        multi_DECREF(result, n);
+
+        Py_INCREF(index);
         result[0] = index;
         return 1;
     }
@@ -757,6 +774,7 @@ prepare_index(PyArrayObject *self, PyObject *index,
     *ndim = new_ndim + fancy_ndim;
     *out_fancy_ndim = fancy_ndim;
 
+    multi_DECREF(raw_indices, index_ndim);
 
     return index_type;
 
@@ -764,6 +782,7 @@ prepare_index(PyArrayObject *self, PyObject *index,
     for (i=0; i < curr_idx; i++) {
         Py_XDECREF(indices[i].object);
     }
+    multi_DECREF(raw_indices, index_ndim);
     return -1;
 }
 

From 5d9315c1ba2175414e64e3a34364c013a8e50aaf Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sun, 9 Apr 2017 12:10:18 +0100
Subject: [PATCH 06/11] STY: Fix comment capitalization and format

[ci skip]
---
 numpy/core/src/multiarray/mapping.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 4ab3647f1c33..4bbada5ce444 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -179,7 +179,7 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
     npy_intp n, i;
     npy_bool commit_to_unpack;
 
-    /* fast route for passing a tuple */
+    /* Fast route for passing a tuple */
     if (PyTuple_CheckExact(index)) {
         n = PyTuple_GET_SIZE(index);
         if (n > NPY_MAXDIMS * 2) {
@@ -211,7 +211,7 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
         return 1;
     }
 
-    /* passing a tuple subclass - needs to handle errors */
+    /* Passing a tuple subclass - needs to handle errors */
     if (PyTuple_Check(index)) {
         n = PySequence_Size(index);
         if (n < 0) {
@@ -233,7 +233,8 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
         return n;
     }
 
-    /* At this point, we're left with a non-tuple, non-array, sequence:
+    /*
+     * At this point, we're left with a non-tuple, non-array, sequence:
      * typically, a list
      *
      * Sequences < NPY_MAXDIMS with any slice objects
@@ -251,16 +252,20 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
         return 1;
     }
 
-    /* for some reason, anything that's long but not too long is turned into
-     * a single index. The *2 is missing here for backward-compatibility. */
+    /*
+     * For some reason, anything that's long but not too long is turned into
+     * a single index. The *2 is missing here for backward-compatibility.
+     */
     if (n >= NPY_MAXDIMS) {
         Py_INCREF(index);
         result[0] = index;
         return 1;
     }
 
-    /* Some other type of short sequence - assume we should unpack it like a
-     * tuple, until we find something that proves us wrong */
+    /*
+     * Some other type of short sequence - assume we should unpack it like a
+     * tuple, and then decide whether that was actually necessary.
+     */
     commit_to_unpack = 0;
     for (i = 0; i < n; i++) {
         PyObject *tmp_obj = result[i] = PySequence_GetItem(index, i);
@@ -273,8 +278,10 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
             }
         }
         else {
-            /* if getitem fails (unusual) before we've committed, then
-             * commit to not unpacking */
+            /*
+             * if getitem fails (unusual) before we've committed, then stop
+             * unpacking
+             */
             if (tmp_obj == NULL) {
                 multi_DECREF(result, i);
                 PyErr_Clear();

From 966b2c7e00d797d23740034cda691ee433743fef Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sun, 9 Apr 2017 12:28:59 +0100
Subject: [PATCH 07/11] fixup! BUG: Fix reference counting

---
 numpy/core/src/multiarray/mapping.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 4bbada5ce444..dbd6e538782b 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -228,7 +228,6 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
                 multi_DECREF(result, i);
                 return -1;
             }
-            Py_INCREF(result[i]);
         }
         return n;
     }
@@ -283,7 +282,6 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
              * unpacking
              */
             if (tmp_obj == NULL) {
-                multi_DECREF(result, i);
                 PyErr_Clear();
                 break;
             }
@@ -305,8 +303,8 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
     }
     /* got to the end, never found an indication that we should have unpacked */
     else {
-        /* we already filled result, so empty it first */
-        multi_DECREF(result, n);
+        /* we partially filled result, so empty it first */
+        multi_DECREF(result, i);
 
         Py_INCREF(index);
         result[0] = index;

From 9832f05474dc7d45f79fefd04af7a39c1f62a880 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sun, 9 Apr 2017 21:34:33 +0100
Subject: [PATCH 08/11] fixup! MAINT: Overhaul function to try and increase
 speed

Improve comments

[ci skip]
---
 numpy/core/src/multiarray/mapping.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index dbd6e538782b..b34bbb0f1e9b 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -164,14 +164,21 @@ multi_DECREF(PyObject **objects, npy_intp n)
  * > or the newaxis object, but not for integer arrays or other embedded
  * > sequences.
  *
+ * The rationale for only unpacking `2*NPY_MAXDIMS` items is the assumption
+ * that the longest possible index that is allowed to produce a result is
+ * `(0,)*np.MAXDIMS + (None,)*np.MAXDIMS`. This assumption turns out to be
+ * wrong (we could add one more item, an Ellipsis), but we keep it for
+ * compatibility.
+ *
  * @param  index   The index object, which may or may not be a tuple. This is a
  *                 borrowed reference.
  * @param  result  An empty buffer of 2*NPY_MAXDIMS PyObject* to write each
- *                 index component to. The references written are new..
- *                 This function will in some cases clobber the array beyond
- *                 the last item unpacked.
+ *                 index component to. The references written are new.
  *
  * @returns        The number of items in `result`, or -1 if an error occured.
+ *                 The entries in `result` at and beyond this index should be
+ *                 assumed to contain garbage, even if they were initialized
+ *                 to NULL, so are not safe to Py_XDECREF.
  */
 NPY_NO_EXPORT npy_intp
 unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
@@ -234,7 +241,9 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
 
     /*
      * At this point, we're left with a non-tuple, non-array, sequence:
-     * typically, a list
+     * typically, a list. We use some somewhat-arbirary heuristics from here
+     * onwards to decided whether to treat that list as a single index, or a
+     * list of indices. It might be worth deprecating this behaviour (gh-4434).
      *
      * Sequences < NPY_MAXDIMS with any slice objects
      * or newaxis, Ellipsis or other arrays or sequences

From 68bad6a47f7090a0680ef3b5b48a76623de91df3 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sun, 16 Jul 2017 15:07:16 +0100
Subject: [PATCH 09/11] MAINT: Improve comments, simplify handling of tuple
 subclasses

---
 numpy/core/src/multiarray/mapping.c | 143 +++++++++++++++-------------
 1 file changed, 78 insertions(+), 65 deletions(-)

diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index b34bbb0f1e9b..bd3464500622 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -148,6 +148,39 @@ multi_DECREF(PyObject **objects, npy_intp n)
     }
 }
 
+/**
+ * Unpack a tuple into an array of new references. Returns the number of objects
+ * unpacked.
+ *
+ * Useful if a tuple is being iterated over multiple times, or for a code path
+ * that doesn't always want the overhead of allocating a tuple.
+ */
+NPY_NO_EXPORT NPY_INLINE npy_intp
+unpack_tuple(PyTupleObject *index, PyObject **result, npy_intp result_n)
+{
+    npy_intp n, i;
+    n = PyTuple_GET_SIZE(index);
+    if (n > result_n) {
+        PyErr_SetString(PyExc_IndexError,
+                        "too many indices for array");
+        return -1;
+    }
+    for (i = 0; i < n; i++) {
+        result[i] = PyTuple_GET_ITEM(index, i);
+        Py_INCREF(result[i]);
+    }
+    return n;
+}
+
+/* Unpack a single scalar index, taking a new reference to match unpack_tuple */
+NPY_NO_EXPORT NPY_INLINE npy_intp
+unpack_scalar(PyObject *index, PyObject **result, npy_intp result_n)
+{
+    Py_INCREF(index);
+    result[0] = index;
+    return 1;
+}
+
 /**
  * Turn an index argument into a c-array of `PyObject *`s, one for each index.
  *
@@ -164,45 +197,34 @@ multi_DECREF(PyObject **objects, npy_intp n)
  * > or the newaxis object, but not for integer arrays or other embedded
  * > sequences.
  *
- * The rationale for only unpacking `2*NPY_MAXDIMS` items is the assumption
- * that the longest possible index that is allowed to produce a result is
- * `(0,)*np.MAXDIMS + (None,)*np.MAXDIMS`. This assumption turns out to be
- * wrong (we could add one more item, an Ellipsis), but we keep it for
- * compatibility.
+ * It might be worth deprecating this behaviour (gh-4434), in which case the
+ * entire function should become a simple check of PyTuple_Check.
  *
- * @param  index   The index object, which may or may not be a tuple. This is a
- *                 borrowed reference.
- * @param  result  An empty buffer of 2*NPY_MAXDIMS PyObject* to write each
- *                 index component to. The references written are new.
+ * @param  index     The index object, which may or may not be a tuple. This is
+ *                   a borrowed reference.
+ * @param  result    An empty buffer of PyObject* to write each index component
+ *                   to. The references written are new.
+ * @param  result_n  The length of the result buffer
  *
- * @returns        The number of items in `result`, or -1 if an error occured.
- *                 The entries in `result` at and beyond this index should be
- *                 assumed to contain garbage, even if they were initialized
- *                 to NULL, so are not safe to Py_XDECREF.
+ * @returns          The number of items in `result`, or -1 if an error occured.
+ *                   The entries in `result` at and beyond this index should be
+ *                   assumed to contain garbage, even if they were initialized
+ *                   to NULL, so are not safe to Py_XDECREF. Use multi_DECREF to
+ *                   dispose of them.
  */
 NPY_NO_EXPORT npy_intp
-unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
+unpack_indices(PyObject *index, PyObject **result, npy_intp result_n)
 {
     npy_intp n, i;
     npy_bool commit_to_unpack;
 
     /* Fast route for passing a tuple */
     if (PyTuple_CheckExact(index)) {
-        n = PyTuple_GET_SIZE(index);
-        if (n > NPY_MAXDIMS * 2) {
-            PyErr_SetString(PyExc_IndexError,
-                            "too many indices for array");
-            return -1;
-        }
-        for (i = 0; i < n; i++) {
-            result[i] = PyTuple_GET_ITEM(index, i);
-            Py_INCREF(result[i]);
-        }
-        return n;
+        return unpack_tuple((PyTupleObject *)index, result, result_n);
     }
 
     /* Obvious single-entry cases */
-    if (0
+    if (0  /* to aid macros below */
 #if !defined(NPY_PY3K)
             || PyInt_CheckExact(index)
 #else
@@ -213,63 +235,51 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
             || PyArray_Check(index)
             || !PySequence_Check(index)) {
 
-        Py_INCREF(index);
-        result[0] = index;
-        return 1;
+        return unpack_scalar(index, result, result_n);
     }
 
-    /* Passing a tuple subclass - needs to handle errors */
+    /*
+     * Passing a tuple subclass - coerce to the base type. This incurs an
+     * allocation, but doesn't need to be a fast path anyway
+     */
     if (PyTuple_Check(index)) {
-        n = PySequence_Size(index);
-        if (n < 0) {
+        PyTupleObject *tup = PySequence_Tuple(index);
+        if (tup == NULL) {
             return -1;
         }
-        if (n > NPY_MAXDIMS * 2) {
-            PyErr_SetString(PyExc_IndexError,
-                            "too many indices for array");
-            return -1;
-        }
-        for (i = 0; i < n; i++) {
-            result[i] = PySequence_GetItem(index, i);
-            if (result[i] == NULL) {
-                multi_DECREF(result, i);
-                return -1;
-            }
-        }
-        return n;
+        return unpack_tuple(tup, result, result_n);
     }
 
     /*
      * At this point, we're left with a non-tuple, non-array, sequence:
-     * typically, a list. We use some somewhat-arbirary heuristics from here
+     * typically, a list. We use some somewhat-arbitrary heuristics from here
      * onwards to decided whether to treat that list as a single index, or a
-     * list of indices. It might be worth deprecating this behaviour (gh-4434).
-     *
-     * Sequences < NPY_MAXDIMS with any slice objects
-     * or newaxis, Ellipsis or other arrays or sequences
-     * embedded, are considered equivalent to an indexing
-     * tuple. (`a[[[1,2], [3,4]]] == a[[1,2], [3,4]]`)
+     * list of indices.
      */
 
     /* if len fails, treat like a scalar */
     n = PySequence_Size(index);
     if (n < 0) {
         PyErr_Clear();
-        Py_INCREF(index);
-        result[0] = index;
-        return 1;
+        return unpack_scalar(index, result, result_n);
     }
 
     /*
-     * For some reason, anything that's long but not too long is turned into
-     * a single index. The *2 is missing here for backward-compatibility.
+     * Backwards compatibility only takes effect for short sequences - otherwise
+     * we treat it like any other scalar.
+     *
+     * Sequences < NPY_MAXDIMS with any slice objects
+     * or newaxis, Ellipsis or other arrays or sequences
+     * embedded, are considered equivalent to an indexing
+     * tuple. (`a[[[1,2], [3,4]]] == a[[1,2], [3,4]]`)
      */
     if (n >= NPY_MAXDIMS) {
-        Py_INCREF(index);
-        result[0] = index;
-        return 1;
+        return unpack_scalar(index, result, result_n);
     }
 
+    /* In case we change result_n elsewhere */
+    assert(n <= result_n);
+
     /*
      * Some other type of short sequence - assume we should unpack it like a
      * tuple, and then decide whether that was actually necessary.
@@ -314,10 +324,7 @@ unpack_indices(PyObject *index, PyObject *result[2*NPY_MAXDIMS])
     else {
         /* we partially filled result, so empty it first */
         multi_DECREF(result, i);
-
-        Py_INCREF(index);
-        result[0] = index;
-        return 1;
+        return unpack_scalar(index, result, result_n);
     }
 }
 
@@ -361,9 +368,15 @@ prepare_index(PyArrayObject *self, PyObject *index,
     int index_type = 0;
     int ellipsis_pos = -1;
 
+    /* 
+     * The choice of only unpacking `2*NPY_MAXDIMS` items is historic.
+     * The longest "reasonable" index that produces a result of <= 32 dimensions
+     * is `(0,)*np.MAXDIMS + (None,)*np.MAXDIMS`. Longer indices can exist, but
+     * are uncommon.
+     */
     PyObject *raw_indices[NPY_MAXDIMS*2];
 
-    index_ndim = unpack_indices(index, raw_indices);
+    index_ndim = unpack_indices(index, raw_indices, NPY_MAXDIMS*2);
     if (index_ndim == -1) {
         return -1;
     }

From c5879638a381380b7b88c00992983fbbf08baab1 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sun, 16 Jul 2017 15:28:17 +0100
Subject: [PATCH 10/11] fixup! MAINT: Improve comments, simplify handling of
 tuple subclasses

---
 numpy/core/src/multiarray/mapping.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index bd3464500622..f16dc85f32a3 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -247,7 +247,9 @@ unpack_indices(PyObject *index, PyObject **result, npy_intp result_n)
         if (tup == NULL) {
             return -1;
         }
-        return unpack_tuple(tup, result, result_n);
+        n = unpack_tuple(tup, result, result_n);
+        Py_DECREF(tup);
+        return n;
     }
 
     /*
@@ -368,7 +370,7 @@ prepare_index(PyArrayObject *self, PyObject *index,
     int index_type = 0;
     int ellipsis_pos = -1;
 
-    /* 
+    /*
      * The choice of only unpacking `2*NPY_MAXDIMS` items is historic.
      * The longest "reasonable" index that produces a result of <= 32 dimensions
      * is `(0,)*np.MAXDIMS + (None,)*np.MAXDIMS`. Longer indices can exist, but

From 105e0b4939a02ceda46ceb92f2978383cdc2b377 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sun, 16 Jul 2017 16:34:30 +0100
Subject: [PATCH 11/11] fixup! MAINT: Improve comments, simplify handling of
 tuple subclasses

---
 numpy/core/src/multiarray/mapping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index f16dc85f32a3..779a8d232798 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -243,7 +243,7 @@ unpack_indices(PyObject *index, PyObject **result, npy_intp result_n)
      * allocation, but doesn't need to be a fast path anyway
      */
     if (PyTuple_Check(index)) {
-        PyTupleObject *tup = PySequence_Tuple(index);
+        PyTupleObject *tup = (PyTupleObject *) PySequence_Tuple(index);
         if (tup == NULL) {
             return -1;
         }