From 20f61d11e2d08e6f3f768cdb2eeed990a4aceffe Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 7 Jan 2025 13:11:55 -0700 Subject: [PATCH 1/6] BUG: move reduction initialization to ufunc initialization --- numpy/_core/src/multiarray/multiarraymodule.c | 37 ++++---- numpy/_core/src/umath/legacy_array_method.c | 88 +++++++++++++------ 2 files changed, 82 insertions(+), 43 deletions(-) diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c index c9d46d859f60..d337a84e9baf 100644 --- a/numpy/_core/src/multiarray/multiarraymodule.c +++ b/numpy/_core/src/multiarray/multiarraymodule.c @@ -5033,6 +5033,24 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) { goto err; } + /* + * Initialize the default PyDataMem_Handler capsule singleton. + */ + PyDataMem_DefaultHandler = PyCapsule_New( + &default_handler, MEM_HANDLER_CAPSULE_NAME, NULL); + if (PyDataMem_DefaultHandler == NULL) { + goto err; + } + + /* + * Initialize the context-local current handler + * with the default PyDataMem_Handler capsule. + */ + current_handler = PyContextVar_New("current_allocator", PyDataMem_DefaultHandler); + if (current_handler == NULL) { + goto err; + } + if (initumath(m) != 0) { goto err; } @@ -5067,7 +5085,7 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) { * init_string_dtype() but that needs to happen after * the legacy dtypemeta classes are available. */ - + if (npy_cache_import_runtime( "numpy.dtypes", "_add_dtype_helper", &npy_runtime_imports._add_dtype_helper) == -1) { @@ -5081,23 +5099,6 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) { } PyDict_SetItemString(d, "StringDType", (PyObject *)&PyArray_StringDType); - /* - * Initialize the default PyDataMem_Handler capsule singleton. - */ - PyDataMem_DefaultHandler = PyCapsule_New( - &default_handler, MEM_HANDLER_CAPSULE_NAME, NULL); - if (PyDataMem_DefaultHandler == NULL) { - goto err; - } - /* - * Initialize the context-local current handler - * with the default PyDataMem_Handler capsule. - */ - current_handler = PyContextVar_New("current_allocator", PyDataMem_DefaultHandler); - if (current_handler == NULL) { - goto err; - } - // initialize static reference to a zero-like array npy_static_pydata.zero_pyint_like_arr = PyArray_ZEROS( 0, NULL, NPY_DEFAULT_INT, NPY_FALSE); diff --git a/numpy/_core/src/umath/legacy_array_method.c b/numpy/_core/src/umath/legacy_array_method.c index 9592df0e1366..487abb947e20 100644 --- a/numpy/_core/src/umath/legacy_array_method.c +++ b/numpy/_core/src/umath/legacy_array_method.c @@ -294,24 +294,7 @@ get_initial_from_ufunc( Py_DECREF(identity_obj); return 0; } - if (PyTypeNum_ISUNSIGNED(context->descriptors[1]->type_num) - && PyLong_CheckExact(identity_obj)) { - /* - * This is a bit of a hack until we have truly loop specific - * identities. Python -1 cannot be cast to unsigned so convert - * it to a NumPy scalar, but we use -1 for bitwise functions to - * signal all 1s. - * (A builtin identity would not overflow here, although we may - * unnecessary convert 0 and 1.) - */ - Py_SETREF(identity_obj, PyObject_CallFunctionObjArgs( - (PyObject *)&PyLongArrType_Type, identity_obj, NULL)); - if (identity_obj == NULL) { - return -1; - } - } - else if (context->descriptors[0]->type_num == NPY_OBJECT - && !reduction_is_empty) { + if (context->descriptors[0]->type_num == NPY_OBJECT && !reduction_is_empty) { /* Allows `sum([object()])` to work, but use 0 when empty. */ Py_DECREF(identity_obj); return 0; @@ -323,13 +306,6 @@ get_initial_from_ufunc( return -1; } - if (PyTypeNum_ISNUMBER(context->descriptors[0]->type_num)) { - /* For numbers we can cache to avoid going via Python ints */ - memcpy(context->method->legacy_initial, initial, - context->descriptors[0]->elsize); - context->method->get_reduction_initial = ©_cached_initial; - } - /* Reduction can use the initial value */ return 1; } @@ -427,11 +403,73 @@ PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc, }; PyBoundArrayMethodObject *bound_res = PyArrayMethod_FromSpec_int(&spec, 1); + if (bound_res == NULL) { return NULL; } PyArrayMethodObject *res = bound_res->method; + + // set cached initial value for numeric reductions + if (PyTypeNum_ISNUMBER(bound_res->dtypes[0]->type_num)) { + npy_bool reorderable; + PyObject *identity_obj = + PyUFunc_GetDefaultIdentity(ufunc, &reorderable); + + if (identity_obj == NULL) { + return NULL; + } + if (identity_obj == Py_None) { + /* UFunc has no identity, ignore */ + Py_INCREF(res); + Py_DECREF(bound_res); + + Py_DECREF(identity_obj); + return res; + } + if (PyTypeNum_ISUNSIGNED(bound_res->dtypes[1]->type_num) + && PyLong_CheckExact(identity_obj)) { + /* + * This is a bit of a hack until we have truly loop specific + * identities. Python -1 cannot be cast to unsigned so convert + * it to a NumPy scalar, but we use -1 for bitwise functions to + * signal all 1s. + * (A builtin identity would not overflow here, although we may + * unnecessary convert 0 and 1.) + */ + Py_SETREF(identity_obj, PyObject_CallFunctionObjArgs( + (PyObject *)&PyLongArrType_Type, identity_obj, NULL)); + if (identity_obj == NULL) { + return NULL; + } + } + char *initial = + PyMem_Calloc(1, bound_res->dtypes[0]->singleton->elsize); + if (initial == NULL) { + PyErr_NoMemory(); + Py_DECREF(identity_obj); + PyMem_Free(initial); + return NULL; + } + + int pack_res = PyArray_Pack(bound_res->dtypes[0]->singleton, initial, + identity_obj); + Py_DECREF(identity_obj); + if (pack_res < 0) { + PyMem_Free(initial); + return NULL; + }; + + /* For numbers we can cache to avoid going via Python ints */ + memcpy(bound_res->method->legacy_initial, initial, + bound_res->dtypes[0]->singleton->elsize); + bound_res->method->get_reduction_initial = ©_cached_initial; + + PyMem_Free(initial); + } + + Py_INCREF(res); Py_DECREF(bound_res); + return res; } From 507bb98af6dd85818f19503a719e4c62c7ba1ac9 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Wed, 8 Jan 2025 10:33:09 -0700 Subject: [PATCH 2/6] MAINT: refactor to call get_initial_from_ufunc during init --- numpy/_core/src/umath/legacy_array_method.c | 95 ++++++++++++--------- 1 file changed, 53 insertions(+), 42 deletions(-) diff --git a/numpy/_core/src/umath/legacy_array_method.c b/numpy/_core/src/umath/legacy_array_method.c index 487abb947e20..49dfcfded214 100644 --- a/numpy/_core/src/umath/legacy_array_method.c +++ b/numpy/_core/src/umath/legacy_array_method.c @@ -294,7 +294,24 @@ get_initial_from_ufunc( Py_DECREF(identity_obj); return 0; } - if (context->descriptors[0]->type_num == NPY_OBJECT && !reduction_is_empty) { + if (PyTypeNum_ISUNSIGNED(context->descriptors[1]->type_num) + && PyLong_CheckExact(identity_obj)) { + /* + * This is a bit of a hack until we have truly loop specific + * identities. Python -1 cannot be cast to unsigned so convert + * it to a NumPy scalar, but we use -1 for bitwise functions to + * signal all 1s. + * (A builtin identity would not overflow here, although we may + * unnecessary convert 0 and 1.) + */ + Py_SETREF(identity_obj, PyObject_CallFunctionObjArgs( + (PyObject *)&PyLongArrType_Type, identity_obj, NULL)); + if (identity_obj == NULL) { + return -1; + } + } + else if (context->descriptors[0]->type_num == NPY_OBJECT + && !reduction_is_empty) { /* Allows `sum([object()])` to work, but use 0 when empty. */ Py_DECREF(identity_obj); return 0; @@ -409,62 +426,56 @@ PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc, } PyArrayMethodObject *res = bound_res->method; - // set cached initial value for numeric reductions + // set cached initial value for numeric reductions to avoid creating + // a python int in every reduction if (PyTypeNum_ISNUMBER(bound_res->dtypes[0]->type_num)) { - npy_bool reorderable; - PyObject *identity_obj = - PyUFunc_GetDefaultIdentity(ufunc, &reorderable); + char *initial = + PyMem_Calloc(1, bound_res->dtypes[0]->singleton->elsize); - if (identity_obj == NULL) { + if (initial == NULL) { + PyErr_NoMemory(); return NULL; } - if (identity_obj == Py_None) { - /* UFunc has no identity, ignore */ - Py_INCREF(res); - Py_DECREF(bound_res); - Py_DECREF(identity_obj); - return res; - } - if (PyTypeNum_ISUNSIGNED(bound_res->dtypes[1]->type_num) - && PyLong_CheckExact(identity_obj)) { - /* - * This is a bit of a hack until we have truly loop specific - * identities. Python -1 cannot be cast to unsigned so convert - * it to a NumPy scalar, but we use -1 for bitwise functions to - * signal all 1s. - * (A builtin identity would not overflow here, although we may - * unnecessary convert 0 and 1.) - */ - Py_SETREF(identity_obj, PyObject_CallFunctionObjArgs( - (PyObject *)&PyLongArrType_Type, identity_obj, NULL)); - if (identity_obj == NULL) { - return NULL; - } - } - char *initial = - PyMem_Calloc(1, bound_res->dtypes[0]->singleton->elsize); - if (initial == NULL) { + PyArray_Descr **descrs = PyMem_Calloc(ufunc->nin + ufunc->nout, + sizeof(PyArray_Descr *)); + + if (descrs == NULL) { PyErr_NoMemory(); - Py_DECREF(identity_obj); PyMem_Free(initial); return NULL; } - int pack_res = PyArray_Pack(bound_res->dtypes[0]->singleton, initial, - identity_obj); - Py_DECREF(identity_obj); - if (pack_res < 0) { + + for (int i = 0; i < ufunc->nin + ufunc->nout; i++) { + // only dealing with numeric legacy dtypes so this should always be + // valid + descrs[i] = bound_res->dtypes[i]->singleton; + } + + PyArrayMethod_Context context = { + (PyObject *)ufunc, + bound_res->method, + descrs, + }; + + int ret = get_initial_from_ufunc(&context, 0, initial); + + if (ret < 0) { PyMem_Free(initial); + PyMem_Free(descrs); return NULL; - }; + } - /* For numbers we can cache to avoid going via Python ints */ - memcpy(bound_res->method->legacy_initial, initial, - bound_res->dtypes[0]->singleton->elsize); - bound_res->method->get_reduction_initial = ©_cached_initial; + // only use the initial value if it's valid + if (ret > 0) { + memcpy(context.method->legacy_initial, initial, + context.descriptors[0]->elsize); + context.method->get_reduction_initial = ©_cached_initial; + } PyMem_Free(initial); + PyMem_Free(descrs); } From b0358e0fa728917fa3dca875b7aaca8163eeb61a Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Wed, 8 Jan 2025 10:33:20 -0700 Subject: [PATCH 3/6] TST: add test for multithreaded reductions --- numpy/_core/tests/test_multithreading.py | 13 +++++++++++++ numpy/testing/_private/utils.py | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/numpy/_core/tests/test_multithreading.py b/numpy/_core/tests/test_multithreading.py index 754688501c2d..3b19d83fee71 100644 --- a/numpy/_core/tests/test_multithreading.py +++ b/numpy/_core/tests/test_multithreading.py @@ -120,3 +120,16 @@ def legacy_125(): task1.start() task2.start() + +def test_parallel_reduction(): + # gh-28041 + NUM_THREADS = 50 + + b = threading.Barrier(NUM_THREADS) + + x = np.arange(1000) + def closure(): + b.wait() + np.sum(x) + + run_threaded(closure, NUM_THREADS, max_workers=NUM_THREADS) diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py index 7cea2bf8bc40..c0d1736cf242 100644 --- a/numpy/testing/_private/utils.py +++ b/numpy/testing/_private/utils.py @@ -2685,9 +2685,9 @@ def _get_glibc_version(): _glibc_older_than = lambda x: (_glibcver != '0.0' and _glibcver < x) -def run_threaded(func, iters, pass_count=False): +def run_threaded(func, iters, pass_count=False, max_workers=8): """Runs a function many times in parallel""" - with concurrent.futures.ThreadPoolExecutor(max_workers=8) as tpe: + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as tpe: if pass_count: futures = [tpe.submit(func, i) for i in range(iters)] else: From 60e8e37544de83493876f5e17ee3f488799ca129 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Wed, 8 Jan 2025 12:31:18 -0700 Subject: [PATCH 4/6] MAINT: fix linter --- numpy/_core/tests/test_multithreading.py | 1 + 1 file changed, 1 insertion(+) diff --git a/numpy/_core/tests/test_multithreading.py b/numpy/_core/tests/test_multithreading.py index 3b19d83fee71..2512b7c199dc 100644 --- a/numpy/_core/tests/test_multithreading.py +++ b/numpy/_core/tests/test_multithreading.py @@ -128,6 +128,7 @@ def test_parallel_reduction(): b = threading.Barrier(NUM_THREADS) x = np.arange(1000) + def closure(): b.wait() np.sum(x) From d955688e1fb551e41caeada385554d28f7b672bf Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Wed, 8 Jan 2025 15:11:55 -0700 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: Sebastian Berg --- numpy/_core/src/umath/legacy_array_method.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/numpy/_core/src/umath/legacy_array_method.c b/numpy/_core/src/umath/legacy_array_method.c index 49dfcfded214..69aaa6c73c91 100644 --- a/numpy/_core/src/umath/legacy_array_method.c +++ b/numpy/_core/src/umath/legacy_array_method.c @@ -429,14 +429,6 @@ PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc, // set cached initial value for numeric reductions to avoid creating // a python int in every reduction if (PyTypeNum_ISNUMBER(bound_res->dtypes[0]->type_num)) { - char *initial = - PyMem_Calloc(1, bound_res->dtypes[0]->singleton->elsize); - - if (initial == NULL) { - PyErr_NoMemory(); - return NULL; - } - PyArray_Descr **descrs = PyMem_Calloc(ufunc->nin + ufunc->nout, sizeof(PyArray_Descr *)); @@ -459,7 +451,7 @@ PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc, descrs, }; - int ret = get_initial_from_ufunc(&context, 0, initial); + int ret = get_initial_from_ufunc(&context, 0, context.method->legacy_initial); if (ret < 0) { PyMem_Free(initial); @@ -467,14 +459,11 @@ PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc, return NULL; } - // only use the initial value if it's valid + // only use the cached initial value if it's valid if (ret > 0) { - memcpy(context.method->legacy_initial, initial, - context.descriptors[0]->elsize); context.method->get_reduction_initial = ©_cached_initial; } - PyMem_Free(initial); PyMem_Free(descrs); } From d301c0c71dec922223b92aa3556e6a32aef68a96 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Wed, 8 Jan 2025 15:19:41 -0700 Subject: [PATCH 6/6] MAINT: simplify further --- numpy/_core/src/umath/legacy_array_method.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/numpy/_core/src/umath/legacy_array_method.c b/numpy/_core/src/umath/legacy_array_method.c index 69aaa6c73c91..705262fedd38 100644 --- a/numpy/_core/src/umath/legacy_array_method.c +++ b/numpy/_core/src/umath/legacy_array_method.c @@ -428,18 +428,12 @@ PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc, // set cached initial value for numeric reductions to avoid creating // a python int in every reduction - if (PyTypeNum_ISNUMBER(bound_res->dtypes[0]->type_num)) { - PyArray_Descr **descrs = PyMem_Calloc(ufunc->nin + ufunc->nout, - sizeof(PyArray_Descr *)); + if (PyTypeNum_ISNUMBER(bound_res->dtypes[0]->type_num) && + ufunc->nin == 2 && ufunc->nout == 1) { - if (descrs == NULL) { - PyErr_NoMemory(); - PyMem_Free(initial); - return NULL; - } + PyArray_Descr *descrs[3]; - - for (int i = 0; i < ufunc->nin + ufunc->nout; i++) { + for (int i = 0; i < 3; i++) { // only dealing with numeric legacy dtypes so this should always be // valid descrs[i] = bound_res->dtypes[i]->singleton; @@ -454,8 +448,7 @@ PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc, int ret = get_initial_from_ufunc(&context, 0, context.method->legacy_initial); if (ret < 0) { - PyMem_Free(initial); - PyMem_Free(descrs); + Py_DECREF(bound_res); return NULL; } @@ -463,8 +456,6 @@ PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc, if (ret > 0) { context.method->get_reduction_initial = ©_cached_initial; } - - PyMem_Free(descrs); }