thomasjpfan
diff --git a/‎numpy/core/src/multiarray/dtypemeta.h
Lines changed: 3 additions & 3 deletions b/‎numpy/core/src/multiarray/dtypemeta.h
Lines changed: 3 additions & 3 deletions
diff --git a/‎numpy/core/src/umath/dispatching.c
Lines changed: 4 additions & 0 deletions b/‎numpy/core/src/umath/dispatching.c
Lines changed: 4 additions & 0 deletions
diff --git a/‎numpy/core/src/umath/ufunc_object.c
Lines changed: 86 additions & 33 deletions b/‎numpy/core/src/umath/ufunc_object.c
Lines changed: 86 additions & 33 deletions
diff --git a/‎numpy/core/tests/test_custom_dtypes.py
Lines changed: 18 additions & 0 deletions b/‎numpy/core/tests/test_custom_dtypes.py
Lines changed: 18 additions & 0 deletions
diff --git a/‎numpy/core/tests/test_ufunc.py
Lines changed: 5 additions & 3 deletions b/‎numpy/core/tests/test_ufunc.py
Lines changed: 5 additions & 3 deletions
@@ -74,9 +74,9 @@ typedef struct {
 #define NPY_DTYPE(descr) ((PyArray_DTypeMeta *)Py_TYPE(descr))
 #define NPY_DT_SLOTS(dtype) ((NPY_DType_Slots *)(dtype)->dt_slots)
 
-#define NPY_DT_is_legacy(dtype) ((dtype)->flags & NPY_DT_LEGACY)
-#define NPY_DT_is_abstract(dtype) ((dtype)->flags & NPY_DT_ABSTRACT)
-#define NPY_DT_is_parametric(dtype) ((dtype)->flags & NPY_DT_PARAMETRIC)
+#define NPY_DT_is_legacy(dtype) (((dtype)->flags & NPY_DT_LEGACY) != 0)
+#define NPY_DT_is_abstract(dtype) (((dtype)->flags & NPY_DT_ABSTRACT) != 0)
+#define NPY_DT_is_parametric(dtype) (((dtype)->flags & NPY_DT_PARAMETRIC) != 0)
 
 /*
  * Macros for convenient classmethod calls, since these require
 
@@ -193,6 +193,10 @@ resolve_implementation_info(PyUFuncObject *ufunc,
                 /* Unspecified out always matches (see below for inputs) */
                 continue;
             }
+            if (resolver_dtype == (PyArray_DTypeMeta *)Py_None) {
+                /* always matches */
+                continue;
+            }
             if (given_dtype == resolver_dtype) {
                 continue;
             }
 
@@ -5880,15 +5880,13 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
     PyArrayObject *op2_array = NULL;
     PyArrayMapIterObject *iter = NULL;
     PyArrayIterObject *iter2 = NULL;
-    PyArray_Descr *dtypes[3] = {NULL, NULL, NULL};
     PyArrayObject *operands[3] = {NULL, NULL, NULL};
     PyArrayObject *array_operands[3] = {NULL, NULL, NULL};
 
-    int needs_api = 0;
+    PyArray_DTypeMeta *signature[3] = {NULL, NULL, NULL};
+    PyArray_DTypeMeta *operand_DTypes[3] = {NULL, NULL, NULL};
+    PyArray_Descr *operation_descrs[3] = {NULL, NULL, NULL};
 
-    PyUFuncGenericFunction innerloop;
-    void *innerloopdata;
-    npy_intp i;
     int nop;
 
     /* override vars */
@@ -5901,6 +5899,10 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
     int buffersize;
     int errormask = 0;
     char * err_msg = NULL;
+
+    PyArrayMethod_StridedLoop *strided_loop;
+    NpyAuxData *auxdata = NULL;
+
     NPY_BEGIN_THREADS_DEF;
 
     if (ufunc->nin > 2) {
@@ -5988,26 +5990,51 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
 
     /*
      * Create dtypes array for either one or two input operands.
-     * The output operand is set to the first input operand
+     * Compare to the logic in `convert_ufunc_arguments`.
+     * TODO: It may be good to review some of this behaviour, since the
+     *       operand array is special (it is written to) similar to reductions.
+     *       Using unsafe-casting as done here, is likely not desirable.
      */
     operands[0] = op1_array;
+    operand_DTypes[0] = NPY_DTYPE(PyArray_DESCR(op1_array));
+    Py_INCREF(operand_DTypes[0]);
+    int force_legacy_promotion = 0;
+    int allow_legacy_promotion = NPY_DT_is_legacy(operand_DTypes[0]);
+
     if (op2_array != NULL) {
         operands[1] = op2_array;
-        operands[2] = op1_array;
+        operand_DTypes[1] = NPY_DTYPE(PyArray_DESCR(op2_array));
+        Py_INCREF(operand_DTypes[1]);
+        allow_legacy_promotion &= NPY_DT_is_legacy(operand_DTypes[1]);
+        operands[2] = operands[0];
+        operand_DTypes[2] = operand_DTypes[0];
+        Py_INCREF(operand_DTypes[2]);
+
         nop = 3;
+        if (allow_legacy_promotion && ((PyArray_NDIM(op1_array) == 0)
+                                       != (PyArray_NDIM(op2_array) == 0))) {
+                /* both are legacy and only one is 0-D: force legacy */
+                force_legacy_promotion = should_use_min_scalar(2, operands, 0, NULL);
+            }
     }
     else {
-        operands[1] = op1_array;
+        operands[1] = operands[0];
+        operand_DTypes[1] = operand_DTypes[0];
+        Py_INCREF(operand_DTypes[1]);
         operands[2] = NULL;
         nop = 2;
     }
 
-    if (ufunc->type_resolver(ufunc, NPY_UNSAFE_CASTING,
-                            operands, NULL, dtypes) < 0) {
+    PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc,
+            operands, signature, operand_DTypes,
+            force_legacy_promotion, allow_legacy_promotion);
+    if (ufuncimpl == NULL) {
         goto fail;
     }
-    if (ufunc->legacy_inner_loop_selector(ufunc, dtypes,
-        &innerloop, &innerloopdata, &needs_api) < 0) {
+
+    /* Find the correct descriptors for the operation */
+    if (resolve_descriptors(nop, ufunc, ufuncimpl,
+            operands, operation_descrs, signature, NPY_UNSAFE_CASTING) < 0) {
         goto fail;
     }
 
@@ -6068,21 +6095,44 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
                         NPY_ITER_GROWINNER|
                         NPY_ITER_DELAY_BUFALLOC,
                         NPY_KEEPORDER, NPY_UNSAFE_CASTING,
-                        op_flags, dtypes,
+                        op_flags, operation_descrs,
                         -1, NULL, NULL, buffersize);
 
     if (iter_buffer == NULL) {
         goto fail;
     }
 
-    needs_api = needs_api | NpyIter_IterationNeedsAPI(iter_buffer);
-
     iternext = NpyIter_GetIterNext(iter_buffer, NULL);
     if (iternext == NULL) {
         NpyIter_Deallocate(iter_buffer);
         goto fail;
     }
 
+    PyArrayMethod_Context context = {
+            .caller = (PyObject *)ufunc,
+            .method = ufuncimpl,
+            .descriptors = operation_descrs,
+    };
+
+    NPY_ARRAYMETHOD_FLAGS flags;
+    /* Use contiguous strides; if there is such a loop it may be faster */
+    npy_intp strides[3] = {
+            operation_descrs[0]->elsize, operation_descrs[1]->elsize, 0};
+    if (nop == 3) {
+        strides[2] = operation_descrs[2]->elsize;
+    }
+
+    if (ufuncimpl->get_strided_loop(&context, 1, 0, strides,
+            &strided_loop, &auxdata, &flags) < 0) {
+        goto fail;
+    }
+    int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+    needs_api |= NpyIter_IterationNeedsAPI(iter_buffer);
+    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        /* Start with the floating-point exception flags cleared */
+        npy_clear_floatstatus_barrier((char*)&iter);
+    }
+
     if (!needs_api) {
         NPY_BEGIN_THREADS;
     }
@@ -6091,14 +6141,13 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
      * Iterate over first and second operands and call ufunc
      * for each pair of inputs
      */
-    i = iter->size;
-    while (i > 0)
+    int res = 0;
+    for (npy_intp i = iter->size; i > 0; i--)
     {
         char *dataptr[3];
         char **buffer_dataptr;
         /* one element at a time, no stride required but read by innerloop */
-        npy_intp count[3] = {1, 0xDEADBEEF, 0xDEADBEEF};
-        npy_intp stride[3] = {0xDEADBEEF, 0xDEADBEEF, 0xDEADBEEF};
+        npy_intp count = 1;
 
         /*
          * Set up data pointers for either one or two input operands.
@@ -6117,14 +6166,14 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
         /* Reset NpyIter data pointers which will trigger a buffer copy */
         NpyIter_ResetBasePointers(iter_buffer, dataptr, &err_msg);
         if (err_msg) {
+            res = -1;
             break;
         }
 
         buffer_dataptr = NpyIter_GetDataPtrArray(iter_buffer);
 
-        innerloop(buffer_dataptr, count, stride, innerloopdata);
-
-        if (needs_api && PyErr_Occurred()) {
+        res = strided_loop(&context, buffer_dataptr, &count, strides, auxdata);
+        if (res != 0) {
             break;
         }
 
@@ -6138,32 +6187,35 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
         if (iter2 != NULL) {
             PyArray_ITER_NEXT(iter2);
         }
-
-        i--;
     }
 
     NPY_END_THREADS;
 
-    if (err_msg) {
+    if (res != 0 && err_msg) {
         PyErr_SetString(PyExc_ValueError, err_msg);
     }
+    if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+        /* NOTE: We could check float errors even when `res < 0` */
+        res = _check_ufunc_fperr(errormask, NULL, "at");
+    }
 
+    NPY_AUXDATA_FREE(auxdata);
     NpyIter_Deallocate(iter_buffer);
 
     Py_XDECREF(op2_array);
     Py_XDECREF(iter);
     Py_XDECREF(iter2);
-    for (i = 0; i < 3; i++) {
-        Py_XDECREF(dtypes[i]);
+    for (int i = 0; i < 3; i++) {
+        Py_XDECREF(operation_descrs[i]);
         Py_XDECREF(array_operands[i]);
     }
 
     /*
-     * An error should only be possible if needs_api is true, but this is not
-     * strictly correct for old-style ufuncs (e.g. `power` released the GIL
-     * but manually set an Exception).
+     * An error should only be possible if needs_api is true or `res != 0`,
+     * but this is not strictly correct for old-style ufuncs
+     * (e.g. `power` released the GIL but manually set an Exception).
      */
-    if (PyErr_Occurred()) {
+    if (res != 0 || PyErr_Occurred()) {
         return NULL;
     }
     else {
@@ -6178,10 +6230,11 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
     Py_XDECREF(op2_array);
     Py_XDECREF(iter);
     Py_XDECREF(iter2);
-    for (i = 0; i < 3; i++) {
-        Py_XDECREF(dtypes[i]);
+    for (int i = 0; i < 3; i++) {
+        Py_XDECREF(operation_descrs[i]);
         Py_XDECREF(array_operands[i]);
     }
+    NPY_AUXDATA_FREE(auxdata);
 
     return NULL;
 }
 
@@ -117,18 +117,36 @@ def test_possible_and_impossible_reduce(self):
                 match="the resolved dtypes are not compatible"):
             np.multiply.reduce(a)
 
+    def test_basic_ufunc_at(self):
+        float_a = np.array([1., 2., 3.])
+        b = self._get_array(2.)
+
+        float_b = b.view(np.float64).copy()
+        np.multiply.at(float_b, [1, 1, 1], float_a)
+        np.multiply.at(b, [1, 1, 1], float_a)
+
+        assert_array_equal(b.view(np.float64), float_b)
+
     def test_basic_multiply_promotion(self):
         float_a = np.array([1., 2., 3.])
         b = self._get_array(2.)
 
         res1 = float_a * b
         res2 = b * float_a
+
         # one factor is one, so we get the factor of b:
         assert res1.dtype == res2.dtype == b.dtype
         expected_view = float_a * b.view(np.float64)
         assert_array_equal(res1.view(np.float64), expected_view)
         assert_array_equal(res2.view(np.float64), expected_view)
 
+        # Check that promotion works when `out` is used:
+        np.multiply(b, float_a, out=res2)
+        with pytest.raises(TypeError):
+            # The promoter accepts this (maybe it should not), but the SFloat
+            # result cannot be cast to integer:
+            np.multiply(b, float_a, out=np.arange(3))
+
     def test_basic_addition(self):
         a = self._get_array(2.)
         b = self._get_array(4.)
 
@@ -2397,14 +2397,16 @@ def test_reduce_casterrors(offset):
 
 @pytest.mark.parametrize("method",
         [np.add.accumulate, np.add.reduce,
-         pytest.param(lambda x: np.add.reduceat(x, [0]), id="reduceat")])
-def test_reducelike_floaterrors(method):
-    # adding inf and -inf creates an invalid float and should give a warning
+         pytest.param(lambda x: np.add.reduceat(x, [0]), id="reduceat"),
+         pytest.param(lambda x: np.log.at(x, [2]), id="at")])
+def test_ufunc_methods_floaterrors(method):
+    # adding inf and -inf (or log(-inf) creates an invalid float and warns
     arr = np.array([np.inf, 0, -np.inf])
     with np.errstate(all="warn"):
         with pytest.warns(RuntimeWarning, match="invalid value"):
             method(arr)
 
+    arr = np.array([np.inf, 0, -np.inf])
     with np.errstate(all="raise"):
         with pytest.raises(FloatingPointError):
             method(arr)