diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index e23cbe3c9db2..14c91b56e7c8 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1080,7 +1080,8 @@ PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd, * platforms (debian sparc) malloc does not provide enough alignment for * long double types */ - PyArray_UpdateFlags((PyArrayObject *)fa, NPY_ARRAY_UPDATE_ALL); + PyArray_UpdateFlags((PyArrayObject *)fa, + strides ? NPY_ARRAY_UPDATE_ALL : NPY_ARRAY_ALIGNED); /* * call the __array_finalize__ diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 63ed4f492d2d..db5eaf17ba83 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -2774,9 +2774,16 @@ reduce_type_resolver(PyUFuncObject *ufunc, PyArrayObject *arr, * resolution. */ if (odtype != NULL) { - type_tup = PyTuple_Pack(3, odtype, odtype, Py_None); - if (type_tup == NULL) { - return -1; + if (!(PyTypeNum_ISBOOL(odtype->type_num) && + PyArray_TYPE(arr) == odtype->type_num) && + !(PyTypeNum_ISFLOAT(odtype->type_num) && + PyArray_TYPE(arr) == odtype->type_num && + PyArray_DESCR(arr)->byteorder == odtype->byteorder && + odtype->byteorder == NPY_NATIVE)) { + type_tup = PyTuple_Pack(3, odtype, odtype, Py_None); + if (type_tup == NULL) { + return -1; + } } } @@ -2784,7 +2791,7 @@ reduce_type_resolver(PyUFuncObject *ufunc, PyArrayObject *arr, retcode = ufunc->type_resolver( ufunc, NPY_UNSAFE_CASTING, op, type_tup, dtypes); - Py_DECREF(type_tup); + Py_XDECREF(type_tup); if (retcode == -1) { return -1; } @@ -3954,8 +3961,9 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args, * is used for add and multiply reduction to avoid overflow */ int typenum = PyArray_TYPE(mp); - if ((PyTypeNum_ISBOOL(typenum) || PyTypeNum_ISINTEGER(typenum)) - && ((strcmp(ufunc->name,"add") == 0) + if ((PyTypeNum_ISBOOL(typenum) || PyTypeNum_ISINTEGER(typenum)) && + (ufunc->name[0] == 'a' || ufunc->name[0] == 'm') && + ((strcmp(ufunc->name,"add") == 0) || (strcmp(ufunc->name,"multiply") == 0))) { if (PyTypeNum_ISBOOL(typenum)) { typenum = NPY_LONG; @@ -4481,6 +4489,7 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data, int unused, const char *signature) { PyUFuncObject *ufunc; + npy_intp type; if (nin + nout > NPY_MAXARGS) { PyErr_Format(PyExc_ValueError, @@ -4525,7 +4534,8 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data, } ufunc->doc = doc; - ufunc->op_flags = PyArray_malloc(sizeof(npy_uint32)*ufunc->nargs); + ufunc->op_flags = PyArray_malloc(sizeof(npy_uint32)*(ufunc->nargs + + NPY_NTYPES)); if (ufunc->op_flags == NULL) { return PyErr_NoMemory(); } @@ -4546,6 +4556,25 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data, return NULL; } } + + /* + * stuff a jump index to the first entry of a certain dtype behind the public ABI + * the table allows faster lookup of inner loops + */ + for (type = 0; type < NPY_NTYPES; type++) { + npy_uint32 * typeskip = ufunc->op_flags + ufunc->nargs; + npy_intp j; + for (j = 0; j < ufunc->ntypes; j++) { + if (ufunc->types[j * ufunc->nargs] == type) { + typeskip[type] = j; + break; + } + } + /* no type registered, full scan required (can this even happen? */ + if (j == ufunc->ntypes) { + typeskip[type] = 0; + } + } return (PyObject *)ufunc; } diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c index ce9eec4b61b2..71c6766feebe 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.c +++ b/numpy/core/src/umath/ufunc_type_resolution.c @@ -1283,15 +1283,14 @@ PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc, const char *ufunc_name; PyObject *errmsg; int i, j; - - ufunc_name = ufunc->name ? ufunc->name : "(unknown)"; + npy_uint32 offset; /* * If there are user-loops search them first. * TODO: There needs to be a loop selection acceleration structure, * like a hash table. */ - if (ufunc->userloops) { + if (NPY_UNLIKELY(ufunc->userloops)) { switch (find_userloop(ufunc, dtypes, out_innerloop, out_innerloopdata)) { /* Error */ @@ -1303,11 +1302,21 @@ PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc, } } + /* + * op_flags has a jump index to the first entry of dtype[0] stuffed behind + * the public ABI + */ + if (NPY_LIKELY(dtypes[0]->type_num < NPY_NTYPES)) { + offset = ufunc->op_flags[nargs + dtypes[0]->type_num]; + } + else { + offset = 0; + } types = ufunc->types; - for (i = 0; i < ufunc->ntypes; ++i) { + for (i = offset; i < ufunc->ntypes; ++i) { /* Copy the types into an int array for matching */ for (j = 0; j < nargs; ++j) { - if (types[j] != dtypes[j]->type_num) { + if (types[i * nargs + j] != dtypes[j]->type_num) { break; } } @@ -1316,10 +1325,9 @@ PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc, *out_innerloopdata = ufunc->data[i]; return 0; } - - types += nargs; } + ufunc_name = ufunc->name ? ufunc->name : "(unknown)"; errmsg = PyUString_FromFormat("ufunc '%s' did not contain a loop " "with signature matching types ", ufunc_name); for (i = 0; i < nargs; ++i) {