diff --git a/doc/release/upcoming_changes/27998.c_api.rst b/doc/release/upcoming_changes/27998.c_api.rst new file mode 100644 index 000000000000..edc6371af1f9 --- /dev/null +++ b/doc/release/upcoming_changes/27998.c_api.rst @@ -0,0 +1,10 @@ +New `NpyIter_GetTransferFlags` and ``NpyIter_IterationNeedsAPI`` change +----------------------------------------------------------------------- +NumPy now has the new `NpyIter_GetTransferFlags` function as a more precise +way checking of iterator/buffering needs. I.e. whether the Python API/GIL is +required or floating point errors may occur. +This function is also faster if you already know your needs without buffering. + +The ``NpyIter_IterationNeedsAPI`` function now performs all the checks that were +previously performed at setup time. While it was never necessary to call it +multiple times, doing so will now have a larger cost. diff --git a/doc/source/reference/c-api/iterator.rst b/doc/source/reference/c-api/iterator.rst index 817bcad7e4a2..5ab1d5a7ea7b 100644 --- a/doc/source/reference/c-api/iterator.rst +++ b/doc/source/reference/c-api/iterator.rst @@ -434,6 +434,9 @@ Construction and destruction is enabled, the caller must be sure to check whether ``NpyIter_IterationNeedsAPI(iter)`` is true, in which case it may not release the GIL during iteration. + If you are working with known dtypes `NpyIter_GetTransferFlags` is + a faster and more precise way to check for whether the iterator needs + the API due to buffering. .. c:macro:: NPY_ITER_ZEROSIZE_OK @@ -823,6 +826,20 @@ Construction and destruction Returns ``NPY_SUCCEED`` or ``NPY_FAIL``. +.. c:function:: NPY_ARRAYMETHOD_FLAGS NpyIter_GetTransferFlags(NpyIter *iter) + + .. versionadded:: 2.3 + + Fetches the `NPY_METH_RUNTIME_FLAGS` which provide the information on + whether buffering needs the Python GIL (`NPY_METH_REQUIRES_PYAPI`) or + floating point errors may be set (`NPY_METH_NO_FLOATINGPOINT_ERRORS`). + + Prior to NumPy 2.3, the public function available was + ``NpyIter_IterationNeedsAPI``, which is still available and additionally + checks for object (or similar) dtypes and not exclusively for + buffering/iteration needs itself. + In general, this function should be preferred. + .. c:function:: int NpyIter_Reset(NpyIter* iter, char** errmsg) Resets the iterator back to its initial state, at the beginning diff --git a/numpy/_core/code_generators/cversions.txt b/numpy/_core/code_generators/cversions.txt index 2024083d0ac9..41a432d35c54 100644 --- a/numpy/_core/code_generators/cversions.txt +++ b/numpy/_core/code_generators/cversions.txt @@ -77,5 +77,5 @@ 0x00000012 = 2b8f1f4da822491ff030b2b37dff07e3 # Version 19 (NumPy 2.1.0) Only header additions # Version 19 (NumPy 2.2.0) No change -# Version 19 (NumPy 2.3.0) No change -0x00000013 = 2b8f1f4da822491ff030b2b37dff07e3 +# Version 19 (NumPy 2.3.0) +0x00000013 = e56b74d32a934d085e7c3414cb9999b8, diff --git a/numpy/_core/code_generators/numpy_api.py b/numpy/_core/code_generators/numpy_api.py index ffdd70b6fe00..79a8bec18459 100644 --- a/numpy/_core/code_generators/numpy_api.py +++ b/numpy/_core/code_generators/numpy_api.py @@ -106,7 +106,7 @@ def get_annotations(): '__unused_indices__': ( [1, 4, 40, 41, 66, 67, 68, 81, 82, 83, 103, 115, 117, 122, 163, 164, 171, 173, 197, - 201, 202, 208, 219, 220, 221, 222, 223, 278, + 201, 202, 208, 219, 220, 221, 222, 278, 291, 293, 294, 295, 301] # range/slots reserved DType classes (see _public_dtype_api_table.h): + list(range(320, 361)) + [366, 367, 368] @@ -293,8 +293,8 @@ def get_annotations(): # Unused slot 220, was `PyArray_DatetimeToDatetimeStruct` # Unused slot 221, was `PyArray_TimedeltaToTimedeltaStruct` # Unused slot 222, was `PyArray_DatetimeStructToDatetime` - # Unused slot 223, was `PyArray_TimedeltaStructToTimedelta` # NDIter API + 'NpyIter_GetTransferFlags': (223, MinVersion("2.3")), 'NpyIter_New': (224,), 'NpyIter_MultiNew': (225,), 'NpyIter_AdvancedNew': (226,), @@ -407,6 +407,8 @@ def get_annotations(): # `PyDataType_GetArrFuncs` checks for the NumPy runtime version. '_PyDataType_GetArrFuncs': (365,), # End 2.0 API + # NpyIterGetTransferFlags (slot 223) added. + # End 2.3 API } ufunc_types_api = { diff --git a/numpy/_core/src/multiarray/common.h b/numpy/_core/src/multiarray/common.h index f4ba10d42e18..46fe2a6f572a 100644 --- a/numpy/_core/src/multiarray/common.h +++ b/numpy/_core/src/multiarray/common.h @@ -18,17 +18,6 @@ extern "C" { #define error_converting(x) (((x) == -1) && PyErr_Occurred()) -#ifdef NPY_ALLOW_THREADS -#define NPY_BEGIN_THREADS_NDITER(iter) \ - do { \ - if (!NpyIter_IterationNeedsAPI(iter)) { \ - NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter)); \ - } \ - } while(0) -#else -#define NPY_BEGIN_THREADS_NDITER(iter) -#endif - NPY_NO_EXPORT PyArray_Descr * PyArray_DTypeFromObjectStringDiscovery( diff --git a/numpy/_core/src/multiarray/ctors.c b/numpy/_core/src/multiarray/ctors.c index c9f9ac3941a9..0723e54f3441 100644 --- a/numpy/_core/src/multiarray/ctors.c +++ b/numpy/_core/src/multiarray/ctors.c @@ -2696,7 +2696,6 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order) npy_intp dst_count, src_count, count; npy_intp dst_size, src_size; - int needs_api; NPY_BEGIN_THREADS_DEF; @@ -2757,13 +2756,13 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order) /* Get all the values needed for the inner loop */ dst_iternext = NpyIter_GetIterNext(dst_iter, NULL); dst_dataptr = NpyIter_GetDataPtrArray(dst_iter); - /* Since buffering is disabled, we can cache the stride */ + /* The inner stride is also the fixed stride for the whole iteration. */ dst_stride = NpyIter_GetInnerStrideArray(dst_iter)[0]; dst_countptr = NpyIter_GetInnerLoopSizePtr(dst_iter); src_iternext = NpyIter_GetIterNext(src_iter, NULL); src_dataptr = NpyIter_GetDataPtrArray(src_iter); - /* Since buffering is disabled, we can cache the stride */ + /* The inner stride is also the fixed stride for the whole iteration. */ src_stride = NpyIter_GetInnerStrideArray(src_iter)[0]; src_countptr = NpyIter_GetInnerLoopSizePtr(src_iter); @@ -2773,15 +2772,6 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order) return -1; } - needs_api = NpyIter_IterationNeedsAPI(dst_iter) || - NpyIter_IterationNeedsAPI(src_iter); - - /* - * Because buffering is disabled in the iterator, the inner loop - * strides will be the same throughout the iteration loop. Thus, - * we can pass them to this function to take advantage of - * contiguous strides, etc. - */ NPY_cast_info cast_info; NPY_ARRAYMETHOD_FLAGS flags; if (PyArray_GetDTypeTransferFunction( @@ -2795,7 +2785,8 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order) NpyIter_Deallocate(src_iter); return -1; } - needs_api |= (flags & NPY_METH_REQUIRES_PYAPI) != 0; + /* No need to worry about API use in unbuffered iterator */ + int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { npy_clear_floatstatus_barrier((char *)src_iter); } diff --git a/numpy/_core/src/multiarray/einsum.c.src b/numpy/_core/src/multiarray/einsum.c.src index 81d3f3e1d79b..5b7dcf0ecb29 100644 --- a/numpy/_core/src/multiarray/einsum.c.src +++ b/numpy/_core/src/multiarray/einsum.c.src @@ -520,14 +520,16 @@ unbuffered_loop_nop1_ndim2(NpyIter *iter) return -1; } - /* - * Since the iterator wasn't tracking coordinates, the - * loop provided by the iterator is in Fortran-order. - */ + /* IterationNeedsAPI effectively only checks for object dtype here. */ int needs_api = NpyIter_IterationNeedsAPI(iter); if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(shape[1] * shape[0]); } + + /* + * Since the iterator wasn't tracking coordinates, the + * loop provided by the iterator is in Fortran-order. + */ for (coord = shape[1]; coord > 0; --coord) { sop(1, ptrs[0], strides[0], shape[0]); @@ -581,14 +583,16 @@ unbuffered_loop_nop1_ndim3(NpyIter *iter) return -1; } - /* - * Since the iterator wasn't tracking coordinates, the - * loop provided by the iterator is in Fortran-order. - */ + /* IterationNeedsAPI effectively only checks for object dtype here. */ int needs_api = NpyIter_IterationNeedsAPI(iter); if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(shape[2] * shape[1] * shape[0]); } + + /* + * Since the iterator wasn't tracking coordinates, the + * loop provided by the iterator is in Fortran-order. + */ for (coords[1] = shape[2]; coords[1] > 0; --coords[1]) { for (coords[0] = shape[1]; coords[0] > 0; --coords[0]) { sop(1, ptrs[0], strides[0], shape[0]); @@ -645,14 +649,16 @@ unbuffered_loop_nop2_ndim2(NpyIter *iter) return -1; } - /* - * Since the iterator wasn't tracking coordinates, the - * loop provided by the iterator is in Fortran-order. - */ + /* IterationNeedsAPI effectively only checks for object dtype here. */ int needs_api = NpyIter_IterationNeedsAPI(iter); if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(shape[1] * shape[0]); } + + /* + * Since the iterator wasn't tracking coordinates, the + * loop provided by the iterator is in Fortran-order. + */ for (coord = shape[1]; coord > 0; --coord) { sop(2, ptrs[0], strides[0], shape[0]); @@ -708,14 +714,16 @@ unbuffered_loop_nop2_ndim3(NpyIter *iter) return -1; } - /* - * Since the iterator wasn't tracking coordinates, the - * loop provided by the iterator is in Fortran-order. - */ + /* IterationNeedsAPI effectively only checks for object dtype here. */ int needs_api = NpyIter_IterationNeedsAPI(iter); if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(shape[2] * shape[1] * shape[0]); } + + /* + * Since the iterator wasn't tracking coordinates, the + * loop provided by the iterator is in Fortran-order. + */ for (coords[1] = shape[2]; coords[1] > 0; --coords[1]) { for (coords[0] = shape[1]; coords[0] > 0; --coords[0]) { sop(2, ptrs[0], strides[0], shape[0]); @@ -1120,7 +1128,6 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop, char **dataptr; npy_intp *stride; npy_intp *countptr; - int needs_api; NPY_BEGIN_THREADS_DEF; iternext = NpyIter_GetIterNext(iter, NULL); @@ -1130,9 +1137,12 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop, dataptr = NpyIter_GetDataPtrArray(iter); stride = NpyIter_GetInnerStrideArray(iter); countptr = NpyIter_GetInnerLoopSizePtr(iter); - needs_api = NpyIter_IterationNeedsAPI(iter); + /* IterationNeedsAPI additionally checks for object dtype here. */ + int needs_api = NpyIter_IterationNeedsAPI(iter); + if (!needs_api) { + NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter)); + } - NPY_BEGIN_THREADS_NDITER(iter); NPY_EINSUM_DBG_PRINT("Einsum loop\n"); do { sop(nop, dataptr, stride, *countptr); @@ -1140,7 +1150,7 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop, NPY_END_THREADS; /* If the API was needed, it may have thrown an error */ - if (NpyIter_IterationNeedsAPI(iter) && PyErr_Occurred()) { + if (needs_api && PyErr_Occurred()) { goto fail; } } diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index eadb7cc099d3..a6c92dc00edc 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -2752,6 +2752,7 @@ PyArray_CountNonzero(PyArrayObject *self) if (iter == NULL) { return -1; } + /* IterationNeedsAPI also checks dtype for whether `nonzero` may need it */ needs_api = NpyIter_IterationNeedsAPI(iter); /* Get the pointers for inner loop iteration */ @@ -2761,7 +2762,9 @@ PyArray_CountNonzero(PyArrayObject *self) return -1; } - NPY_BEGIN_THREADS_NDITER(iter); + if (!needs_api) { + NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter)); + } dataptr = NpyIter_GetDataPtrArray(iter); strideptr = NpyIter_GetInnerStrideArray(iter); @@ -2982,9 +2985,12 @@ PyArray_Nonzero(PyArrayObject *self) return NULL; } + /* IterationNeedsAPI also checks dtype for whether `nonzero` may need it */ needs_api = NpyIter_IterationNeedsAPI(iter); - NPY_BEGIN_THREADS_NDITER(iter); + if (!needs_api) { + NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter)); + } dataptr = NpyIter_GetDataPtrArray(iter); diff --git a/numpy/_core/src/multiarray/mapping.c b/numpy/_core/src/multiarray/mapping.c index d11fbb7ff870..103e493f0014 100644 --- a/numpy/_core/src/multiarray/mapping.c +++ b/numpy/_core/src/multiarray/mapping.c @@ -976,10 +976,7 @@ array_boolean_subscript(PyArrayObject *self, /* Get a dtype transfer function */ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); NPY_cast_info cast_info; - /* - * TODO: Ignoring cast flags, since this is only ever a copy. In - * principle that may not be quite right in some future? - */ + NPY_ARRAYMETHOD_FLAGS cast_flags; if (PyArray_GetDTypeTransferFunction( IsUintAligned(self) && IsAligned(self), @@ -992,6 +989,8 @@ array_boolean_subscript(PyArrayObject *self, NpyIter_Deallocate(iter); return NULL; } + cast_flags = PyArrayMethod_COMBINED_FLAGS( + cast_flags, NpyIter_GetTransferFlags(iter)); /* Get the values needed for the inner loop */ iternext = NpyIter_GetIterNext(iter, NULL); @@ -1002,7 +1001,10 @@ array_boolean_subscript(PyArrayObject *self, return NULL; } - NPY_BEGIN_THREADS_NDITER(iter); + /* NOTE: Don't worry about floating point errors as this is a copy. */ + if (!(cast_flags & NPY_METH_REQUIRES_PYAPI)) { + NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter)); + } innerstrides = NpyIter_GetInnerStrideArray(iter); dataptrs = NpyIter_GetDataPtrArray(iter); @@ -1195,8 +1197,11 @@ array_assign_boolean_subscript(PyArrayObject *self, return -1; } + cast_flags = PyArrayMethod_COMBINED_FLAGS( + cast_flags, NpyIter_GetTransferFlags(iter)); + if (!(cast_flags & NPY_METH_REQUIRES_PYAPI)) { - NPY_BEGIN_THREADS_NDITER(iter); + NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter)); } if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { npy_clear_floatstatus_barrier((char *)self); @@ -2662,7 +2667,9 @@ PyArray_MapIterCheckIndices(PyArrayMapIterObject *mit) return -1; } - NPY_BEGIN_THREADS_NDITER(op_iter); + if (!(NpyIter_GetTransferFlags(op_iter) & NPY_METH_REQUIRES_PYAPI)) { + NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(op_iter)); + } iterptr = NpyIter_GetDataPtrArray(op_iter); iterstride = NpyIter_GetInnerStrideArray(op_iter); do { diff --git a/numpy/_core/src/multiarray/nditer_api.c b/numpy/_core/src/multiarray/nditer_api.c index 344b33254980..c4656e9e04e7 100644 --- a/numpy/_core/src/multiarray/nditer_api.c +++ b/numpy/_core/src/multiarray/nditer_api.c @@ -866,18 +866,37 @@ NpyIter_RequiresBuffering(NpyIter *iter) NPY_NO_EXPORT npy_bool NpyIter_IterationNeedsAPI(NpyIter *iter) { - return (NIT_ITFLAGS(iter)&NPY_ITFLAG_NEEDSAPI) != 0; + int nop = NIT_NOP(iter); + /* If any of the buffer filling need the API, flag it as well. */ + if (NpyIter_GetTransferFlags(iter) & NPY_METH_REQUIRES_PYAPI) { + return NPY_TRUE; + } + + for (int iop = 0; iop < nop; ++iop) { + PyArray_Descr *rdt = NIT_DTYPES(iter)[iop]; + if ((rdt->flags & (NPY_ITEM_REFCOUNT | + NPY_ITEM_IS_POINTER | + NPY_NEEDS_PYAPI)) != 0) { + /* Iteration needs API access */ + return NPY_TRUE; + } + } + + return NPY_FALSE; } -/* - * Fetch the ArrayMethod (runtime) flags for all "transfer functions' (i.e. - * copy to buffer/casts). +/*NUMPY_API + * Fetch the NPY_ARRAYMETHOD_FLAGS (runtime) flags for all "transfer functions' + * (i.e. copy to buffer/casts). + * + * It is the preferred way to check whether the iteration requires to hold the + * GIL or may set floating point errors during buffer copies. * - * TODO: This should be public API, but that only makes sense when the - * ArrayMethod API is made public. + * I.e. use `NpyIter_GetTransferFlags(iter) & NPY_METH_REQUIRES_PYAPI` to check + * if you cannot release the GIL. */ -NPY_NO_EXPORT int +NPY_NO_EXPORT NPY_ARRAYMETHOD_FLAGS NpyIter_GetTransferFlags(NpyIter *iter) { return NIT_ITFLAGS(iter) >> NPY_ITFLAG_TRANSFERFLAGS_SHIFT; @@ -1417,8 +1436,6 @@ NpyIter_DebugPrint(NpyIter *iter) printf("ONEITERATION "); if (itflags&NPY_ITFLAG_DELAYBUF) printf("DELAYBUF "); - if (itflags&NPY_ITFLAG_NEEDSAPI) - printf("NEEDSAPI "); if (itflags&NPY_ITFLAG_REDUCE) printf("REDUCE "); if (itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS) diff --git a/numpy/_core/src/multiarray/nditer_constr.c b/numpy/_core/src/multiarray/nditer_constr.c index 2a6c26f7c106..241229b7e4de 100644 --- a/numpy/_core/src/multiarray/nditer_constr.c +++ b/numpy/_core/src/multiarray/nditer_constr.c @@ -432,27 +432,6 @@ NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags, } } - /* - * If REFS_OK was specified, check whether there are any - * reference arrays and flag it if so. - * - * NOTE: This really should be unnecessary, but chances are someone relies - * on it. The iterator itself does not require the API here - * as it only does so for casting/buffering. But in almost all - * use-cases the API will be required for whatever operation is done. - */ - if (flags & NPY_ITER_REFS_OK) { - for (iop = 0; iop < nop; ++iop) { - PyArray_Descr *rdt = op_dtype[iop]; - if ((rdt->flags & (NPY_ITEM_REFCOUNT | - NPY_ITEM_IS_POINTER | - NPY_NEEDS_PYAPI)) != 0) { - /* Iteration needs API access */ - NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI; - } - } - } - /* If buffering is set prepare it */ if (itflags & NPY_ITFLAG_BUFFER) { npyiter_find_buffering_setup(iter, buffersize); @@ -3566,11 +3545,6 @@ npyiter_allocate_transfer_functions(NpyIter *iter) NIT_ITFLAGS(iter) |= cflags << NPY_ITFLAG_TRANSFERFLAGS_SHIFT; assert(NIT_ITFLAGS(iter) >> NPY_ITFLAG_TRANSFERFLAGS_SHIFT == cflags); - /* If any of the dtype transfer functions needed the API, flag it. */ - if (cflags & NPY_METH_REQUIRES_PYAPI) { - NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI; - } - return 1; fail: diff --git a/numpy/_core/src/multiarray/nditer_impl.h b/numpy/_core/src/multiarray/nditer_impl.h index c8ac9e4fcce4..35629aab2c95 100644 --- a/numpy/_core/src/multiarray/nditer_impl.h +++ b/numpy/_core/src/multiarray/nditer_impl.h @@ -100,12 +100,10 @@ #define NPY_ITFLAG_ONEITERATION (1 << 9) /* Delay buffer allocation until first Reset* call */ #define NPY_ITFLAG_DELAYBUF (1 << 10) -/* Iteration needs API access during iternext */ -#define NPY_ITFLAG_NEEDSAPI (1 << 11) /* Iteration includes one or more operands being reduced */ -#define NPY_ITFLAG_REDUCE (1 << 12) +#define NPY_ITFLAG_REDUCE (1 << 11) /* Reduce iteration doesn't need to recalculate reduce loops next time */ -#define NPY_ITFLAG_REUSE_REDUCE_LOOPS (1 << 13) +#define NPY_ITFLAG_REUSE_REDUCE_LOOPS (1 << 12) /* * Offset of (combined) ArrayMethod flags for all transfer functions. * For now, we use the top 8 bits. @@ -372,12 +370,4 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs); NPY_NO_EXPORT void npyiter_clear_buffers(NpyIter *iter); -/* - * Function to get the ArrayMethod flags of the transfer functions. - * TODO: This function should be public and removed from `nditer_impl.h`, but - * this requires making the ArrayMethod flags public API first. - */ -NPY_NO_EXPORT int -NpyIter_GetTransferFlags(NpyIter *iter); - #endif /* NUMPY_CORE_SRC_MULTIARRAY_NDITER_IMPL_H_ */ diff --git a/numpy/_core/src/umath/reduction.c b/numpy/_core/src/umath/reduction.c index 1d3937eee1eb..5e78964b3ebd 100644 --- a/numpy/_core/src/umath/reduction.c +++ b/numpy/_core/src/umath/reduction.c @@ -339,10 +339,25 @@ PyUFunc_ReduceWrapper(PyArrayMethod_Context *context, } PyArrayMethod_StridedLoop *strided_loop; - NPY_ARRAYMETHOD_FLAGS flags = 0; + NPY_ARRAYMETHOD_FLAGS flags; + + npy_intp fixed_strides[3]; + NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); + if (wheremask != NULL) { + if (PyArrayMethod_GetMaskedStridedLoop(context, + 1, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { + goto fail; + } + } + else { + if (context->method->get_strided_loop(context, + 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { + goto fail; + } + } + flags = PyArrayMethod_COMBINED_FLAGS(flags, NpyIter_GetTransferFlags(iter)); int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; - needs_api |= NpyIter_IterationNeedsAPI(iter); if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* Start with the floating-point exception flags cleared */ npy_clear_floatstatus_barrier((char*)&iter); @@ -389,25 +404,6 @@ PyUFunc_ReduceWrapper(PyArrayMethod_Context *context, goto fail; } - /* - * Note that we need to ensure that the iterator is reset before getting - * the fixed strides. (The buffer information is uninitialized before.) - */ - npy_intp fixed_strides[3]; - NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); - if (wheremask != NULL) { - if (PyArrayMethod_GetMaskedStridedLoop(context, - 1, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { - goto fail; - } - } - else { - if (context->method->get_strided_loop(context, - 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { - goto fail; - } - } - if (!empty_iteration) { NpyIter_IterNextFunc *iternext; char **dataptr; diff --git a/numpy/_core/src/umath/ufunc_object.c b/numpy/_core/src/umath/ufunc_object.c index 1b9e3664525e..5c81a1f24d94 100644 --- a/numpy/_core/src/umath/ufunc_object.c +++ b/numpy/_core/src/umath/ufunc_object.c @@ -66,10 +66,6 @@ #include "npy_static_data.h" #include "multiarraymodule.h" -/* TODO: Only for `NpyIter_GetTransferFlags` until it is public */ -#define NPY_ITERATOR_IMPLEMENTATION_CODE -#include "nditer_impl.h" - /********** PRINTF DEBUG TRACING **************/ #define NPY_UF_DBG_TRACING 0 @@ -1699,7 +1695,6 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc, int i, j, idim, nop; const char *ufunc_name; int retval; - int needs_api = 0; /* Use remapped axes for generalized ufunc */ int broadcast_ndim, iter_ndim; @@ -2096,8 +2091,9 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc, &strided_loop, &auxdata, &flags) < 0) { goto fail; } - needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; - needs_api |= NpyIter_IterationNeedsAPI(iter); + flags = PyArrayMethod_COMBINED_FLAGS(flags, NpyIter_GetTransferFlags(iter)); + int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; + if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* Start with the floating-point exception flags cleared */ npy_clear_floatstatus_barrier((char*)&iter); @@ -2130,7 +2126,7 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc, dataptr, inner_dimensions, inner_strides, auxdata); } while (retval == 0 && iternext(iter)); - if (!needs_api && !NpyIter_IterationNeedsAPI(iter)) { + if (!needs_api) { NPY_END_THREADS; } } @@ -2578,7 +2574,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, int *op_axes[2] = {op_axes_arrays[0], op_axes_arrays[1]}; npy_uint32 op_flags[2]; int idim, ndim; - int needs_api, need_outer_iterator; + int need_outer_iterator; int res = 0; NPY_cast_info copy_info; @@ -2761,7 +2757,11 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, flags = PyArrayMethod_COMBINED_FLAGS(flags, copy_flags); } - needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; + if (iter != NULL) { + flags = PyArrayMethod_COMBINED_FLAGS(flags, NpyIter_GetTransferFlags(iter)); + } + + int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* Start with the floating-point exception flags cleared */ npy_clear_floatstatus_barrier((char*)&iter); @@ -2795,7 +2795,6 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, goto fail; } dataptr = NpyIter_GetDataPtrArray(iter); - needs_api |= NpyIter_IterationNeedsAPI(iter); /* Execute the loop with just the outer iterator */ count_m1 = PyArray_DIM(op[1], axis)-1; @@ -2983,7 +2982,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, op_axes_arrays[2]}; npy_uint32 op_flags[3]; int idim, ndim; - int needs_api, need_outer_iterator = 0; + int need_outer_iterator = 0; int res = 0; @@ -3182,7 +3181,11 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { goto fail; } - needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; + if (iter != NULL) { + flags = PyArrayMethod_COMBINED_FLAGS(flags, NpyIter_GetTransferFlags(iter)); + } + + int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* Start with the floating-point exception flags cleared */ npy_clear_floatstatus_barrier((char*)&iter); @@ -3206,7 +3209,6 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, npy_intp stride0_ind = PyArray_STRIDE(op[0], axis); int itemsize = descrs[0]->elsize; - needs_api |= NpyIter_IterationNeedsAPI(iter); /* Get the variables needed for the loop */ iternext = NpyIter_GetIterNext(iter, NULL); @@ -5612,9 +5614,9 @@ ufunc_at__slow_iter(PyUFuncObject *ufunc, NPY_ARRAYMETHOD_FLAGS flags, } return -1; } + flags = PyArrayMethod_COMBINED_FLAGS(flags, NpyIter_GetTransferFlags(iter_buffer)); int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; - needs_api |= NpyIter_IterationNeedsAPI(iter_buffer); if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* Start with the floating-point exception flags cleared */ npy_clear_floatstatus_barrier((char*)&iter);