From b592672bdbfa3332e035e7622c5ecd3bfa9408a6 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 19 Dec 2024 15:39:01 +0100 Subject: [PATCH 1/5] ENH: Allow an arbitrary number of operands in nditer Internally, the change is very moderate mainly requiring to use `int` in the struct (growing it a bit) and otherwise a temporary work-array in the buffer setup function (one could probably use scratch space on the nditer, but this seemed OK to me). The big and somewhat annoying change is that the Python wrapper needs a lot of per operand space, so setting it up is somewhat annoying. --- doc/release/upcoming_changes/28080.c_api.rst | 1 + .../upcoming_changes/28080.improvement.rst | 2 + numpy/_core/src/multiarray/nditer_constr.c | 34 +- numpy/_core/src/multiarray/nditer_impl.h | 4 +- numpy/_core/src/multiarray/nditer_pywrap.c | 297 ++++++++++-------- numpy/_core/tests/test_nditer.py | 24 +- 6 files changed, 205 insertions(+), 157 deletions(-) create mode 100644 doc/release/upcoming_changes/28080.c_api.rst create mode 100644 doc/release/upcoming_changes/28080.improvement.rst diff --git a/doc/release/upcoming_changes/28080.c_api.rst b/doc/release/upcoming_changes/28080.c_api.rst new file mode 100644 index 000000000000..f72be7ef52fe --- /dev/null +++ b/doc/release/upcoming_changes/28080.c_api.rst @@ -0,0 +1 @@ +* ``NpyIter`` now has no limit on the number of operands it supports. diff --git a/doc/release/upcoming_changes/28080.improvement.rst b/doc/release/upcoming_changes/28080.improvement.rst new file mode 100644 index 000000000000..19b85ae3c96a --- /dev/null +++ b/doc/release/upcoming_changes/28080.improvement.rst @@ -0,0 +1,2 @@ +* ``np.nditer`` now has no limit on the number of supported operands + (C-integer). diff --git a/numpy/_core/src/multiarray/nditer_constr.c b/numpy/_core/src/multiarray/nditer_constr.c index 8719ef561b2d..3ed5cf1a0245 100644 --- a/numpy/_core/src/multiarray/nditer_constr.c +++ b/numpy/_core/src/multiarray/nditer_constr.c @@ -50,7 +50,7 @@ npyiter_prepare_operands(int nop, PyArray_Descr **op_dtype, npy_uint32 flags, npy_uint32 *op_flags, npyiter_opitflags *op_itflags, - npy_int8 *out_maskop); + int *out_maskop); static int npyiter_check_casting(int nop, PyArrayObject **op, PyArray_Descr **op_dtype, @@ -100,7 +100,7 @@ npyiter_get_priority_subtype(int nop, PyArrayObject **op, static int npyiter_allocate_transfer_functions(NpyIter *iter); -static void +static int npyiter_find_buffering_setup(NpyIter *iter, npy_intp buffersize); /*NUMPY_API @@ -158,13 +158,6 @@ NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags, NPY_IT_TIME_POINT(c_start); - if (nop > NPY_MAXARGS) { - PyErr_Format(PyExc_ValueError, - "Cannot construct an iterator with more than %d operands " - "(%d were requested)", NPY_MAXARGS, nop); - return NULL; - } - /* * Before 1.8, if `oa_ndim == 0`, this meant `op_axes != NULL` was an error. * With 1.8, `oa_ndim == -1` takes this role, while op_axes in that case @@ -433,7 +426,10 @@ NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags, /* If buffering is set prepare it */ if (itflags & NPY_ITFLAG_BUFFER) { - npyiter_find_buffering_setup(iter, buffersize); + if (npyiter_find_buffering_setup(iter, buffersize) < 0) { + NpyIter_Deallocate(iter); + return NULL; + } /* * Initialize for use in FirstVisit, which may be called before @@ -1168,10 +1164,10 @@ npyiter_prepare_operands(int nop, PyArrayObject **op_in, PyArray_Descr **op_dtype, npy_uint32 flags, npy_uint32 *op_flags, npyiter_opitflags *op_itflags, - npy_int8 *out_maskop) + int *out_maskop) { int iop, i; - npy_int8 maskop = -1; + int maskop = -1; int any_writemasked_ops = 0; /* @@ -2001,7 +1997,7 @@ operand_different_than_broadcast: { * In theory, the reduction could also span multiple axes if other operands * are buffered. We do not try to discover this. */ -static void +static int npyiter_find_buffering_setup(NpyIter *iter, npy_intp buffersize) { int nop = iter->nop; @@ -2009,14 +2005,19 @@ npyiter_find_buffering_setup(NpyIter *iter, npy_intp buffersize) npy_uint32 itflags = iter->itflags; NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); + /* Per operand space; could also reuse an iterator field initialized later */ + NPY_ALLOC_WORKSPACE(dim_scratch_space, int, 10, 2 * nop); + if (dim_scratch_space == NULL) { + return -1; + } /* * We check two things here, first how many operand dimensions can be * iterated using a single stride (all dimensions are consistent), * and second, whether we found a reduce dimension for the operand. * That is an outer dimension a reduce would have to take place on. */ - int op_single_stride_dims[NPY_MAXARGS]; - int op_reduce_outer_dim[NPY_MAXARGS]; + int *op_single_stride_dims = dim_scratch_space; + int *op_reduce_outer_dim = dim_scratch_space + nop; npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); @@ -2312,7 +2313,8 @@ npyiter_find_buffering_setup(NpyIter *iter, npy_intp buffersize) /* Core starts at 0 initially, if needed it is set in goto index. */ NIT_BUFFERDATA(iter)->coreoffset = 0; - return; + npy_free_workspace(dim_scratch_space); + return 0; } diff --git a/numpy/_core/src/multiarray/nditer_impl.h b/numpy/_core/src/multiarray/nditer_impl.h index bae1744f6790..ab3724d67d11 100644 --- a/numpy/_core/src/multiarray/nditer_impl.h +++ b/numpy/_core/src/multiarray/nditer_impl.h @@ -152,8 +152,8 @@ struct NpyIter_InternalOnly { /* Initial fixed position data */ npy_uint32 itflags; - npy_uint8 ndim, nop; - npy_int8 maskop; + npy_uint8 ndim; + int nop, maskop; npy_intp itersize, iterstart, iterend; /* iterindex is only used if RANGED or BUFFERED is set */ npy_intp iterindex; diff --git a/numpy/_core/src/multiarray/nditer_pywrap.c b/numpy/_core/src/multiarray/nditer_pywrap.c index ad20194f308f..9658408a80ae 100644 --- a/numpy/_core/src/multiarray/nditer_pywrap.c +++ b/numpy/_core/src/multiarray/nditer_pywrap.c @@ -42,8 +42,7 @@ struct NewNpyArrayIterObject_tag { PyArray_Descr **dtypes; PyArrayObject **operands; npy_intp *innerstrides, *innerloopsizeptr; - char readflags[NPY_MAXARGS]; - char writeflags[NPY_MAXARGS]; + char *writeflags; /* could inline allocation with variable sized object */ }; static int npyiter_cache_values(NewNpyArrayIterObject *self) @@ -77,8 +76,14 @@ static int npyiter_cache_values(NewNpyArrayIterObject *self) self->innerloopsizeptr = NULL; } - /* The read/write settings */ - NpyIter_GetReadFlags(iter, self->readflags); + if (self->writeflags == NULL) { + self->writeflags = PyMem_Malloc(sizeof(char) * NpyIter_GetNOp(iter)); + if (self->writeflags == NULL) { + PyErr_NoMemory(); + return -1; + } + } + /* The write flags settings (not-readable cannot be signalled to Python) */ NpyIter_GetWriteFlags(iter, self->writeflags); return 0; } @@ -93,6 +98,7 @@ npyiter_new(PyTypeObject *subtype, PyObject *NPY_UNUSED(args), if (self != NULL) { self->iter = NULL; self->nested_child = NULL; + self->writeflags = NULL; } return (PyObject *)self; @@ -576,66 +582,61 @@ npyiter_convert_op_axes(PyObject *op_axes_in, int nop, return 1; } -/* - * Converts the operand array and op_flags array into the form - * NpyIter_AdvancedNew needs. Sets nop, and on success, each - * op[i] owns a reference to an array object. - */ + static int -npyiter_convert_ops(PyObject *op_in, PyObject *op_flags_in, - PyArrayObject **op, npy_uint32 *op_flags, - int *nop_out) +npyiter_prepare_ops(PyObject *op_in, PyObject **out_owner, PyObject ***out_objs) { - int iop, nop; - - /* nop and op */ + /* Take ownership of op_in (either a tuple/list or single element): */ if (PyTuple_Check(op_in) || PyList_Check(op_in)) { - nop = PySequence_Size(op_in); - if (nop == 0) { + PyObject *seq = PySequence_Fast(op_in, "failed accessing item list"); + if (op_in == NULL) { + Py_DECREF(op_in); + return -1; + } + Py_ssize_t length = PySequence_Fast_GET_SIZE(op_in); + if (length == 0) { PyErr_SetString(PyExc_ValueError, "Must provide at least one operand"); - return 0; - } - if (nop > NPY_MAXARGS) { - PyErr_SetString(PyExc_ValueError, "Too many operands"); - return 0; + Py_DECREF(op_in); + return -1; } - - for (iop = 0; iop < nop; ++iop) { - PyObject *item = PySequence_GetItem(op_in, iop); - if (item == NULL) { - npy_intp i; - for (i = 0; i < iop; ++i) { - Py_XDECREF(op[i]); - } - return 0; - } - else if (item == Py_None) { - Py_DECREF(item); - item = NULL; - } - /* This is converted to an array after op flags are retrieved */ - op[iop] = (PyArrayObject *)item; + if (length > NPY_MAX_INT) { + /* NpyIter supports fewer args, but deal with it there. */ + PyErr_Format(PyExc_ValueError, + "Too many operands to nditer, found %zd.", length); + Py_DECREF(op_in); + return -1; } + *out_objs = PySequence_Fast_ITEMS(op_in); + *out_owner = seq; + return (int)length; } else { - nop = 1; - /* Is converted to an array after op flags are retrieved */ Py_INCREF(op_in); - op[0] = (PyArrayObject *)op_in; + *out_owner = op_in; + *out_objs = out_owner; + return 1; } +} - *nop_out = nop; - +/* + * Converts the operand array and op_flags array into the form + * NpyIter_AdvancedNew needs. Sets nop, and on success, each + * op[i] owns a reference to an array object. + */ +static int +npyiter_convert_ops(int nop, PyObject **op_objs, PyObject *op_flags_in, + PyArrayObject **op, npy_uint32 *op_flags) +{ /* op_flags */ if (op_flags_in == NULL || op_flags_in == Py_None) { - for (iop = 0; iop < nop; ++iop) { + for (int iop = 0; iop < nop; ++iop) { /* * By default, make NULL operands writeonly and flagged for * allocation, and everything else readonly. To write * to a provided operand, you must specify the write flag manually. */ - if (op[iop] == NULL) { + if (op_objs[iop] == Py_None) { op_flags[iop] = NPY_ITER_WRITEONLY | NPY_ITER_ALLOCATE; } else { @@ -645,23 +646,19 @@ npyiter_convert_ops(PyObject *op_in, PyObject *op_flags_in, } else if (npyiter_convert_op_flags_array(op_flags_in, op_flags, nop) != 1) { - for (iop = 0; iop < nop; ++iop) { - Py_XDECREF(op[iop]); - } - *nop_out = 0; return 0; } /* Now that we have the flags - convert all the ops to arrays */ - for (iop = 0; iop < nop; ++iop) { - if (op[iop] != NULL) { + for (int iop = 0; iop < nop; ++iop) { + if (op_objs[iop] != Py_None) { PyArrayObject *ao; int fromanyflags = 0; if (op_flags[iop]&(NPY_ITER_READWRITE|NPY_ITER_WRITEONLY)) { fromanyflags |= NPY_ARRAY_WRITEBACKIFCOPY; } - ao = (PyArrayObject *)PyArray_FROM_OF((PyObject *)op[iop], + ao = (PyArrayObject *)PyArray_FROM_OF((PyObject *)op_objs[iop], fromanyflags); if (ao == NULL) { if (PyErr_Occurred() && @@ -671,13 +668,8 @@ npyiter_convert_ops(PyObject *op_in, PyObject *op_flags_in, "but is an object which cannot be written " "back to via WRITEBACKIFCOPY"); } - for (iop = 0; iop < nop; ++iop) { - Py_DECREF(op[iop]); - } - *nop_out = 0; return 0; } - Py_DECREF(op[iop]); op[iop] = ao; } } @@ -696,19 +688,15 @@ npyiter_init(NewNpyArrayIterObject *self, PyObject *args, PyObject *kwds) PyObject *op_in = NULL, *op_flags_in = NULL, *op_dtypes_in = NULL, *op_axes_in = NULL; - int iop, nop = 0; - PyArrayObject *op[NPY_MAXARGS]; npy_uint32 flags = 0; NPY_ORDER order = NPY_KEEPORDER; NPY_CASTING casting = NPY_SAFE_CASTING; - npy_uint32 op_flags[NPY_MAXARGS]; - PyArray_Descr *op_request_dtypes[NPY_MAXARGS]; int oa_ndim = -1; - int op_axes_arrays[NPY_MAXARGS][NPY_MAXDIMS]; - int *op_axes[NPY_MAXARGS]; PyArray_Dims itershape = {NULL, -1}; int buffersize = 0; + int res = -1; + if (self->iter != NULL) { PyErr_SetString(PyExc_ValueError, "Iterator was already initialized"); @@ -729,32 +717,60 @@ npyiter_init(NewNpyArrayIterObject *self, PyObject *args, PyObject *kwds) return -1; } - /* Set the dtypes and ops to all NULL to start */ - memset(op_request_dtypes, 0, sizeof(op_request_dtypes)); + /* Need nop to set up workspaces */ + PyObject **op_objs = NULL; + PyObject *op_in_owned; /* Sequence/object owning op_objs. */ + int nop = npyiter_prepare_ops(op_in, &op_in_owned, &op_objs); + if (nop < 0) { + npy_free_cache_dim_obj(itershape); + return -1; + } + + /* allocate workspace for Python objects (operands and dtypes) */ + NPY_ALLOC_WORKSPACE(op, PyArrayObject *, 2 * 8, 2 * nop); + if (op == NULL) { + npy_free_cache_dim_obj(itershape); + Py_DECREF(op_in_owned); + return -1; + } + memset(op, 0, sizeof(PyObject *) * 2 * nop); + PyArray_Descr **op_request_dtypes = (PyArray_Descr **)(op + nop); + + /* And other workspaces (that do not need to clean up their content) */ + NPY_ALLOC_WORKSPACE(op_flags, npy_uint32, 8, nop); + NPY_ALLOC_WORKSPACE(op_axes_storage, int, 8 * NPY_MAXDIMS, nop * NPY_MAXDIMS); + NPY_ALLOC_WORKSPACE(op_axes, int *, 8, nop); + /* + * Trying to allocate should be OK if one failed, check for error now + * that we can use `goto cleanup` to clean up everything. + * (NPY_ALLOC_WORKSPACE has to be done before a goto fail currently.) + */ + if (op_flags == NULL || op_axes_storage == NULL || op_axes == NULL) { + goto finish; + } /* op and op_flags */ - if (npyiter_convert_ops(op_in, op_flags_in, op, op_flags, &nop) - != 1) { - goto fail; + if (npyiter_convert_ops(nop, op_objs, op_flags_in, op, op_flags) != 1) { + goto finish; } /* op_request_dtypes */ if (op_dtypes_in != NULL && op_dtypes_in != Py_None && npyiter_convert_dtypes(op_dtypes_in, op_request_dtypes, nop) != 1) { - goto fail; + goto finish; } /* op_axes */ if (op_axes_in != NULL && op_axes_in != Py_None) { /* Initialize to point to the op_axes arrays */ - for (iop = 0; iop < nop; ++iop) { - op_axes[iop] = op_axes_arrays[iop]; + for (int iop = 0; iop < nop; ++iop) { + op_axes[iop] = &op_axes_storage[iop * NPY_MAXDIMS]; } if (npyiter_convert_op_axes(op_axes_in, nop, op_axes, &oa_ndim) != 1) { - goto fail; + goto finish; } } @@ -767,7 +783,7 @@ npyiter_init(NewNpyArrayIterObject *self, PyObject *args, PyObject *kwds) PyErr_SetString(PyExc_ValueError, "'op_axes' and 'itershape' must have the same number " "of entries equal to the iterator ndim"); - goto fail; + goto finish; } } @@ -778,12 +794,12 @@ npyiter_init(NewNpyArrayIterObject *self, PyObject *args, PyObject *kwds) buffersize); if (self->iter == NULL) { - goto fail; + goto finish; } /* Cache some values for the member functions to use */ if (npyiter_cache_values(self) < 0) { - goto fail; + goto finish; } if (NpyIter_GetIterSize(self->iter) == 0) { @@ -795,25 +811,24 @@ npyiter_init(NewNpyArrayIterObject *self, PyObject *args, PyObject *kwds) self->finished = 0; } - npy_free_cache_dim_obj(itershape); + res = 0; - /* Release the references we got to the ops and dtypes */ - for (iop = 0; iop < nop; ++iop) { - Py_XDECREF(op[iop]); - Py_XDECREF(op_request_dtypes[iop]); - } - - return 0; + finish: + Py_DECREF(op_in_owned); -fail: npy_free_cache_dim_obj(itershape); - for (iop = 0; iop < nop; ++iop) { + for (int iop = 0; iop < nop; ++iop) { Py_XDECREF(op[iop]); Py_XDECREF(op_request_dtypes[iop]); } - return -1; + npy_free_workspace(op); + npy_free_workspace(op_flags); + npy_free_workspace(op_axes_storage); + npy_free_workspace(op_axes); + return res; } + NPY_NO_EXPORT PyObject * NpyIter_NestedIters(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds) @@ -826,14 +841,11 @@ NpyIter_NestedIters(PyObject *NPY_UNUSED(self), PyObject *op_in = NULL, *axes_in = NULL, *op_flags_in = NULL, *op_dtypes_in = NULL; - int iop, nop = 0, inest, nnest = 0; - PyArrayObject *op[NPY_MAXARGS]; + int iop, inest, nnest = 0; npy_uint32 flags = 0, flags_inner; NPY_ORDER order = NPY_KEEPORDER; NPY_CASTING casting = NPY_SAFE_CASTING; - npy_uint32 op_flags[NPY_MAXARGS], op_flags_inner[NPY_MAXARGS]; - PyArray_Descr *op_request_dtypes[NPY_MAXARGS], - *op_request_dtypes_inner[NPY_MAXARGS]; + int op_axes_data[NPY_MAXDIMS]; int *nested_op_axes[NPY_MAXDIMS]; int nested_naxes[NPY_MAXDIMS], iaxes, naxes; @@ -841,7 +853,8 @@ NpyIter_NestedIters(PyObject *NPY_UNUSED(self), char used_axes[NPY_MAXDIMS]; int buffersize = 0; - PyObject *ret = NULL; + PyObject *res = NULL; /* returned */ + PyObject *ret = NULL; /* intermediate object on failure */ if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|O&OOO&O&i", kwlist, &op_in, @@ -921,27 +934,53 @@ NpyIter_NestedIters(PyObject *NPY_UNUSED(self), Py_DECREF(item); } - /* op and op_flags */ - if (npyiter_convert_ops(op_in, op_flags_in, op, op_flags, &nop) - != 1) { + /* Need nop to set up workspaces */ + PyObject **op_objs = NULL; + PyObject *op_in_owned; /* Sequence/object owning op_objs. */ + int nop = npyiter_prepare_ops(op_in, &op_in_owned, &op_objs); + if (nop < 0) { + return NULL; + } + + /* allocate workspace for Python objects (operands and dtypes) */ + NPY_ALLOC_WORKSPACE(op, PyArrayObject *, 3 * 8, 3 * nop); + if (op == NULL) { + Py_DECREF(op_in_owned); return NULL; } + memset(op, 0, sizeof(PyObject *) * 3 * nop); + PyArray_Descr **op_request_dtypes = (PyArray_Descr **)(op + nop); + PyArray_Descr **op_request_dtypes_inner = op_request_dtypes + nop; + + /* And other workspaces (that do not need to clean up their content) */ + NPY_ALLOC_WORKSPACE(op_flags, npy_uint32, 8, nop); + NPY_ALLOC_WORKSPACE(op_flags_inner, npy_uint32, 8, nop); + NPY_ALLOC_WORKSPACE(op_axes_storage, int, 8 * NPY_MAXDIMS, nop * NPY_MAXDIMS); + NPY_ALLOC_WORKSPACE(op_axes, int *, 8, nop); + /* + * Trying to allocate should be OK if one failed, check for error now + * that we can use `goto cleanup` to clean up everything. + * (NPY_ALLOC_WORKSPACE has to be done before a goto fail currently.) + */ + if (op_flags == NULL || op_axes_storage == NULL || op_axes == NULL) { + goto finish; + } - /* Set the dtypes to all NULL to start as well */ - memset(op_request_dtypes, 0, sizeof(op_request_dtypes[0])*nop); - memset(op_request_dtypes_inner, 0, - sizeof(op_request_dtypes_inner[0])*nop); + /* op and op_flags */ + if (npyiter_convert_ops(nop, op_objs, op_flags_in, op, op_flags) != 1) { + goto finish; + } /* op_request_dtypes */ if (op_dtypes_in != NULL && op_dtypes_in != Py_None && npyiter_convert_dtypes(op_dtypes_in, op_request_dtypes, nop) != 1) { - goto fail; + goto finish; } ret = PyTuple_New(nnest); if (ret == NULL) { - goto fail; + goto finish; } /* For broadcasting allocated arrays */ @@ -1023,10 +1062,12 @@ NpyIter_NestedIters(PyObject *NPY_UNUSED(self), /* Allocate the iterator */ iter = (NewNpyArrayIterObject *)npyiter_new(&NpyIter_Type, NULL, NULL); if (iter == NULL) { - Py_DECREF(ret); - goto fail; + goto finish; } + /* Store iter into return tuple (owns the reference). */ + PyTuple_SET_ITEM(ret, inest, (PyObject *)iter); + if (inest < nnest-1) { iter->iter = NpyIter_AdvancedNew(nop, op, flags, order, casting, op_flags, op_request_dtypes, @@ -1044,15 +1085,12 @@ NpyIter_NestedIters(PyObject *NPY_UNUSED(self), } if (iter->iter == NULL) { - Py_DECREF(ret); - Py_DECREF(iter); - goto fail; + goto finish; } /* Cache some values for the member functions to use */ if (npyiter_cache_values(iter) < 0) { - Py_DECREF(ret); - goto fail; + goto finish; } if (NpyIter_GetIterSize(iter->iter) == 0) { @@ -1087,15 +1125,6 @@ NpyIter_NestedIters(PyObject *NPY_UNUSED(self), /* Clear the common dtype flag for the rest of the iterators */ flags &= ~NPY_ITER_COMMON_DTYPE; } - - PyTuple_SET_ITEM(ret, inest, (PyObject *)iter); - } - - /* Release our references to the ops and dtypes */ - for (iop = 0; iop < nop; ++iop) { - Py_XDECREF(op[iop]); - Py_XDECREF(op_request_dtypes[iop]); - Py_XDECREF(op_request_dtypes_inner[iop]); } /* Set up the nested child references */ @@ -1115,20 +1144,29 @@ NpyIter_NestedIters(PyObject *NPY_UNUSED(self), */ if (NpyIter_ResetBasePointers(iter->nested_child->iter, iter->dataptrs, NULL) != NPY_SUCCEED) { - Py_DECREF(ret); - return NULL; + goto finish; } } - return ret; + res = Py_NewRef(ret); + +finish: + Py_DECREF(op_in_owned); + Py_XDECREF(ret); -fail: for (iop = 0; iop < nop; ++iop) { Py_XDECREF(op[iop]); Py_XDECREF(op_request_dtypes[iop]); Py_XDECREF(op_request_dtypes_inner[iop]); } - return NULL; + + npy_free_workspace(op); + npy_free_workspace(op_flags); + npy_free_workspace(op_flags_inner); + npy_free_workspace(op_axes_storage); + npy_free_workspace(op_axes); + + return res; } @@ -2001,21 +2039,6 @@ npyiter_seq_item(NewNpyArrayIterObject *self, Py_ssize_t i) return NULL; } -#if 0 - /* - * This check is disabled because it prevents things like - * np.add(it[0], it[1], it[2]), where it[2] is a write-only - * parameter. When write-only, the value of it[i] is - * likely random junk, as if it were allocated with an - * np.empty(...) call. - */ - if (!self->readflags[i]) { - PyErr_Format(PyExc_RuntimeError, - "Iterator operand %zd is write-only", i); - return NULL; - } -#endif - dataptr = self->dataptrs[i]; dtype = self->dtypes[i]; has_external_loop = NpyIter_HasExternalLoop(self->iter); diff --git a/numpy/_core/tests/test_nditer.py b/numpy/_core/tests/test_nditer.py index b61b87ec69f1..083af756fffe 100644 --- a/numpy/_core/tests/test_nditer.py +++ b/numpy/_core/tests/test_nditer.py @@ -12,6 +12,7 @@ assert_, assert_equal, assert_array_equal, assert_raises, IS_WASM, HAS_REFCOUNT, suppress_warnings, break_cycles, ) +from numpy.testing._private.utils import requires_memory def iter_multi_index(i): ret = [] @@ -719,8 +720,6 @@ def test_iter_flags_errors(): # Not enough operands assert_raises(ValueError, nditer, [], [], []) - # Too many operands - assert_raises(ValueError, nditer, [a] * 100, [], [['readonly']] * 100) # Bad global flag assert_raises(ValueError, nditer, [a], ['bad flag'], [['readonly']]) # Bad op flag @@ -3379,6 +3378,27 @@ def test_partial_iteration_error(in_dtype, buf_dtype): assert count == sys.getrefcount(value) +def test_arbitrary_number_of_ops(): + # 2*16 + 1 is still just a few kiB, so should be fast an easy to deal with + # but larger than any small custom integer. + ops = [np.arange(10) for a in range(2**16 + 1)] + + it = np.nditer(ops) + for i, vals in enumerate(it): + assert all(v == i for v in vals) + + +@pytest.mark.slow +@requires_memory(10 * np.iinfo(np.intc).max) +def test_arbitrary_number_of_ops_error(): + # A different error may happen for more than integer operands, but that + # is too large to test nicely. + a = np.ones(1) + args = [a] * (np.iinfo(np.intc).max + 1) + with pytest.raises(ValueError, match="Too many operands to nditer"): + np.nditer(args) + + def test_debug_print(capfd): """ Matches the expected output of a debug print with the actual output. From a41abb33365695b4012d539e9c0b274569947a43 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 31 Dec 2024 14:06:47 +0100 Subject: [PATCH 2/5] BUG,TST: Test nested_iters and remove missed MAXARGS use --- numpy/_core/src/multiarray/nditer_pywrap.c | 5 ++--- numpy/_core/tests/test_nditer.py | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/numpy/_core/src/multiarray/nditer_pywrap.c b/numpy/_core/src/multiarray/nditer_pywrap.c index 9658408a80ae..c0f2ec8ba056 100644 --- a/numpy/_core/src/multiarray/nditer_pywrap.c +++ b/numpy/_core/src/multiarray/nditer_pywrap.c @@ -956,7 +956,8 @@ NpyIter_NestedIters(PyObject *NPY_UNUSED(self), NPY_ALLOC_WORKSPACE(op_flags, npy_uint32, 8, nop); NPY_ALLOC_WORKSPACE(op_flags_inner, npy_uint32, 8, nop); NPY_ALLOC_WORKSPACE(op_axes_storage, int, 8 * NPY_MAXDIMS, nop * NPY_MAXDIMS); - NPY_ALLOC_WORKSPACE(op_axes, int *, 8, nop); + NPY_ALLOC_WORKSPACE(op_axes, int *, 2 * 8, 2 * nop); + int **op_axes_nop = op_axes + nop; /* * Trying to allocate should be OK if one failed, check for error now * that we can use `goto cleanup` to clean up everything. @@ -1027,8 +1028,6 @@ NpyIter_NestedIters(PyObject *NPY_UNUSED(self), for (inest = 0; inest < nnest; ++inest) { NewNpyArrayIterObject *iter; - int *op_axes_nop[NPY_MAXARGS]; - /* * All the operands' op_axes are the same, except for * allocated outputs. diff --git a/numpy/_core/tests/test_nditer.py b/numpy/_core/tests/test_nditer.py index 083af756fffe..bbf596bea41c 100644 --- a/numpy/_core/tests/test_nditer.py +++ b/numpy/_core/tests/test_nditer.py @@ -3388,8 +3388,18 @@ def test_arbitrary_number_of_ops(): assert all(v == i for v in vals) +def test_arbitrary_number_of_ops_nested(): + # 2*16 + 1 is still just a few kiB, so should be fast an easy to deal with + # but larger than any small custom integer. + ops = [np.arange(10) for a in range(2**16 + 1)] + + it = np.nested_iters(ops, [[0], []]) + for i, vals in enumerate(it): + assert all(v == i for v in vals) + + @pytest.mark.slow -@requires_memory(10 * np.iinfo(np.intc).max) +@requires_memory(9 * np.iinfo(np.intc).max) def test_arbitrary_number_of_ops_error(): # A different error may happen for more than integer operands, but that # is too large to test nicely. @@ -3398,6 +3408,9 @@ def test_arbitrary_number_of_ops_error(): with pytest.raises(ValueError, match="Too many operands to nditer"): np.nditer(args) + with pytest.raises(ValueError, match="Too many operands to nditer"): + np.nested_iters(args, [[0], []]) + def test_debug_print(capfd): """ From a380ff249e5c1841d0f0104079b444ce7c612d16 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 31 Dec 2024 14:11:50 +0100 Subject: [PATCH 3/5] MAINT: Add (unnecessary) error check on npyiter_cache_values The check is unnecessary here (it cannot fail) but it seemed clearer to not use that knowledge when it depends on where we call it. --- numpy/_core/src/multiarray/nditer_pywrap.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/numpy/_core/src/multiarray/nditer_pywrap.c b/numpy/_core/src/multiarray/nditer_pywrap.c index c0f2ec8ba056..b45493ff2f80 100644 --- a/numpy/_core/src/multiarray/nditer_pywrap.c +++ b/numpy/_core/src/multiarray/nditer_pywrap.c @@ -1369,7 +1369,9 @@ npyiter_remove_multi_index( NpyIter_RemoveMultiIndex(self->iter); /* RemoveMultiIndex invalidates cached values */ - npyiter_cache_values(self); + if (npyiter_cache_values(self) < 0) { + return NULL; + } /* RemoveMultiIndex also resets the iterator */ if (NpyIter_GetIterSize(self->iter) == 0) { self->started = 1; @@ -1395,7 +1397,9 @@ npyiter_enable_external_loop( NpyIter_EnableExternalLoop(self->iter); /* EnableExternalLoop invalidates cached values */ - npyiter_cache_values(self); + if (npyiter_cache_values(self) < 0) { + return NULL; + } /* EnableExternalLoop also resets the iterator */ if (NpyIter_GetIterSize(self->iter) == 0) { self->started = 1; From 81f56180dbca93f4f9f46452c1344d235d838cea Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 2 Jan 2025 14:25:02 +0100 Subject: [PATCH 4/5] Address Marten's review comments (mostly, some are slightly less clear if they are a big improvement) --- numpy/_core/src/multiarray/alloc.h | 5 ++++ numpy/_core/src/multiarray/nditer_pywrap.c | 27 ++++++++++++---------- numpy/_core/tests/test_nditer.py | 4 ++-- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/numpy/_core/src/multiarray/alloc.h b/numpy/_core/src/multiarray/alloc.h index f5600c99aaa5..a93402803674 100644 --- a/numpy/_core/src/multiarray/alloc.h +++ b/numpy/_core/src/multiarray/alloc.h @@ -98,6 +98,11 @@ _npy_init_workspace( TYPE *NAME; \ _npy_init_workspace((void **)&NAME, NAME##_static, (fixed_size), sizeof(TYPE), (size)) +/* Same as above, but additionally nulls the workspace */ +#define NPY_CALLOC_WORKSPACE(NAME, TYPE, fixed_size, size) \ + TYPE NAME##_static[fixed_size]; \ + TYPE *NAME; \ + _npy_cinit_workspace((void **)&NAME, NAME##_static, (fixed_size), sizeof(TYPE), (size)); static inline void _npy_free_workspace(void *buf, void *static_buf) diff --git a/numpy/_core/src/multiarray/nditer_pywrap.c b/numpy/_core/src/multiarray/nditer_pywrap.c index b45493ff2f80..5bfd3748ce95 100644 --- a/numpy/_core/src/multiarray/nditer_pywrap.c +++ b/numpy/_core/src/multiarray/nditer_pywrap.c @@ -613,16 +613,16 @@ npyiter_prepare_ops(PyObject *op_in, PyObject **out_owner, PyObject ***out_objs) } else { Py_INCREF(op_in); + *out_objs = out_owner; /* `out_owner` is in caller stack space */ *out_owner = op_in; - *out_objs = out_owner; return 1; } } /* * Converts the operand array and op_flags array into the form - * NpyIter_AdvancedNew needs. Sets nop, and on success, each - * op[i] owns a reference to an array object. + * NpyIter_AdvancedNew needs. On success, each op[i] owns a reference + * to an array object. */ static int npyiter_convert_ops(int nop, PyObject **op_objs, PyObject *op_flags_in, @@ -719,19 +719,16 @@ npyiter_init(NewNpyArrayIterObject *self, PyObject *args, PyObject *kwds) /* Need nop to set up workspaces */ PyObject **op_objs = NULL; - PyObject *op_in_owned; /* Sequence/object owning op_objs. */ + PyObject *op_in_owned = NULL; /* Sequence/object owning op_objs. */ int nop = npyiter_prepare_ops(op_in, &op_in_owned, &op_objs); if (nop < 0) { - npy_free_cache_dim_obj(itershape); - return -1; + goto pre_alloc_fail; } /* allocate workspace for Python objects (operands and dtypes) */ NPY_ALLOC_WORKSPACE(op, PyArrayObject *, 2 * 8, 2 * nop); if (op == NULL) { - npy_free_cache_dim_obj(itershape); - Py_DECREF(op_in_owned); - return -1; + goto pre_alloc_fail; } memset(op, 0, sizeof(PyObject *) * 2 * nop); PyArray_Descr **op_request_dtypes = (PyArray_Descr **)(op + nop); @@ -742,7 +739,7 @@ npyiter_init(NewNpyArrayIterObject *self, PyObject *args, PyObject *kwds) NPY_ALLOC_WORKSPACE(op_axes, int *, 8, nop); /* * Trying to allocate should be OK if one failed, check for error now - * that we can use `goto cleanup` to clean up everything. + * that we can use `goto finish` to clean up everything. * (NPY_ALLOC_WORKSPACE has to be done before a goto fail currently.) */ if (op_flags == NULL || op_axes_storage == NULL || op_axes == NULL) { @@ -826,6 +823,11 @@ npyiter_init(NewNpyArrayIterObject *self, PyObject *args, PyObject *kwds) npy_free_workspace(op_axes_storage); npy_free_workspace(op_axes); return res; + + pre_alloc_fail: + Py_XDECREF(op_in_owned); + npy_free_cache_dim_obj(itershape); + return -1; } @@ -957,15 +959,16 @@ NpyIter_NestedIters(PyObject *NPY_UNUSED(self), NPY_ALLOC_WORKSPACE(op_flags_inner, npy_uint32, 8, nop); NPY_ALLOC_WORKSPACE(op_axes_storage, int, 8 * NPY_MAXDIMS, nop * NPY_MAXDIMS); NPY_ALLOC_WORKSPACE(op_axes, int *, 2 * 8, 2 * nop); - int **op_axes_nop = op_axes + nop; /* * Trying to allocate should be OK if one failed, check for error now - * that we can use `goto cleanup` to clean up everything. + * that we can use `goto finish` to clean up everything. * (NPY_ALLOC_WORKSPACE has to be done before a goto fail currently.) */ if (op_flags == NULL || op_axes_storage == NULL || op_axes == NULL) { goto finish; } + /* Finalize shared workspace: */ + int **op_axes_nop = op_axes + nop; /* op and op_flags */ if (npyiter_convert_ops(nop, op_objs, op_flags_in, op, op_flags) != 1) { diff --git a/numpy/_core/tests/test_nditer.py b/numpy/_core/tests/test_nditer.py index bbf596bea41c..216682100e71 100644 --- a/numpy/_core/tests/test_nditer.py +++ b/numpy/_core/tests/test_nditer.py @@ -3379,7 +3379,7 @@ def test_partial_iteration_error(in_dtype, buf_dtype): def test_arbitrary_number_of_ops(): - # 2*16 + 1 is still just a few kiB, so should be fast an easy to deal with + # 2*16 + 1 is still just a few kiB, so should be fast and easy to deal with # but larger than any small custom integer. ops = [np.arange(10) for a in range(2**16 + 1)] @@ -3389,7 +3389,7 @@ def test_arbitrary_number_of_ops(): def test_arbitrary_number_of_ops_nested(): - # 2*16 + 1 is still just a few kiB, so should be fast an easy to deal with + # 2*16 + 1 is still just a few kiB, so should be fast and easy to deal with # but larger than any small custom integer. ops = [np.arange(10) for a in range(2**16 + 1)] From 50970482dc48230049bc6530ba3c80323d4af7ef Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 2 Jan 2025 17:40:46 +0100 Subject: [PATCH 5/5] More smaller fixes from review (and previous commmit) --- numpy/_core/src/multiarray/alloc.h | 5 ----- numpy/_core/src/multiarray/nditer_pywrap.c | 7 ++----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/numpy/_core/src/multiarray/alloc.h b/numpy/_core/src/multiarray/alloc.h index a93402803674..f5600c99aaa5 100644 --- a/numpy/_core/src/multiarray/alloc.h +++ b/numpy/_core/src/multiarray/alloc.h @@ -98,11 +98,6 @@ _npy_init_workspace( TYPE *NAME; \ _npy_init_workspace((void **)&NAME, NAME##_static, (fixed_size), sizeof(TYPE), (size)) -/* Same as above, but additionally nulls the workspace */ -#define NPY_CALLOC_WORKSPACE(NAME, TYPE, fixed_size, size) \ - TYPE NAME##_static[fixed_size]; \ - TYPE *NAME; \ - _npy_cinit_workspace((void **)&NAME, NAME##_static, (fixed_size), sizeof(TYPE), (size)); static inline void _npy_free_workspace(void *buf, void *static_buf) diff --git a/numpy/_core/src/multiarray/nditer_pywrap.c b/numpy/_core/src/multiarray/nditer_pywrap.c index 5bfd3748ce95..27c392db8720 100644 --- a/numpy/_core/src/multiarray/nditer_pywrap.c +++ b/numpy/_core/src/multiarray/nditer_pywrap.c @@ -811,9 +811,6 @@ npyiter_init(NewNpyArrayIterObject *self, PyObject *args, PyObject *kwds) res = 0; finish: - Py_DECREF(op_in_owned); - - npy_free_cache_dim_obj(itershape); for (int iop = 0; iop < nop; ++iop) { Py_XDECREF(op[iop]); Py_XDECREF(op_request_dtypes[iop]); @@ -822,12 +819,11 @@ npyiter_init(NewNpyArrayIterObject *self, PyObject *args, PyObject *kwds) npy_free_workspace(op_flags); npy_free_workspace(op_axes_storage); npy_free_workspace(op_axes); - return res; pre_alloc_fail: Py_XDECREF(op_in_owned); npy_free_cache_dim_obj(itershape); - return -1; + return res; } @@ -1205,6 +1201,7 @@ npyiter_dealloc(NewNpyArrayIterObject *self) self->nested_child = NULL; PyErr_Restore(exc, val, tb); } + PyMem_Free(self->writeflags); Py_TYPE(self)->tp_free((PyObject*)self); }