8000 ENH: add indexed lower loops and connect them to ufuncs · mattip/numpy@7910e2b · GitHub
[go: up one dir, main page]

Skip to content

Commit 7910e2b

Browse files
committed
ENH: add indexed lower loops and connect them to ufuncs
1 parent 3ba259d commit 7910e2b

File tree

8 files changed

+219
-18
lines changed

8 files changed

+219
-18
lines changed

numpy/core/code_generators/generate_umath.py

Lines changed: 93 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
1+
"""
2+
Generate the code to build all the internal ufuncs. At the base is the defdict:
3+
a dictionary ofUfunc classes. This is fed to make_code to generate
4+
__umath_generated.c
5+
"""
16
import os
27
import re
38
import struct
49
import sys
510
import textwrap
611
import argparse
712

13+
# identity objects
814
Zero = "PyLong_FromLong(0)"
915
One = "PyLong_FromLong(1)"
1016
True_ = "(Py_INCREF(Py_True), Py_True)"
@@ -125,7 +131,7 @@ def check_td_order(tds):
125131
_check_order(signatures[prev_i], sign)
126132

127133

128-
_fdata_map = dict(
134+
_floatformat_map = dict(
129135
e='npy_%sf',
130136
f='npy_%sf',
131137
d='npy_%s',
@@ -136,11 +142,13 @@ def check_td_order(tds):
136142
)
137143

138144
def build_func_data(types, f):
139-
func_data = [_fdata_map.get(t, '%s') % (f,) for t in types]
145+
func_data = [_floatformat_map.get(t, '%s') % (f,) for t in types]
140146
return func_data
141147

142148
def TD(types, f=None, astype=None, in_=None, out=None, cfunc_alias=None,
143149
simd=None, dispatch=None):
150+
"""Generate a TypeDescription instance for each item in types
151+
"""
144152
if f is not None:
145153
if isinstance(f, str):
146154
func_data = build_func_data(types, f)
@@ -188,12 +196,15 @@ class Ufunc:
188196
----------
189197
nin : number of input arguments
190198
nout : number of output arguments
191-
identity : identity element for a two-argument function
199+
identity : identity element for a two-argument function (like Zero)
192200
docstring : docstring for the ufunc
193-
type_descriptions : list of TypeDescription objects
201+
typereso: type resolver function of type PyUFunc_TypeResolutionFunc
202+
type_descriptions : TypeDescription objects
203+
signature: a generalized ufunc signature (like for matmul)
204+
indexed: add indexed loops (ufunc.at) for these type characters
194205
"""
195206
def __init__(self, nin, nout, identity, docstring, typereso,
196-
*type_descriptions, signature=None):
207+
*type_descriptions, signature=None, indexed=''):
197208
self.nin = nin
198209
self.nout = nout
199210
if identity is None:
@@ -203,6 +214,7 @@ def __init__(self, nin, nout, identity, docstring, typereso,
203214
self.typereso = typereso
204215
self.type_descriptions = []
205216
self.signature = signature
217+
self.indexed = indexed
206218
for td in type_descriptions:
207219
self.type_descriptions.extend(td)
208220
for td in self.type_descriptions:
@@ -347,6 +359,7 @@ def english_upper(s):
347359
TypeDescription('M', FullTypeDescr, 'mM', 'M'),
348360
],
349361
TD(O, f='PyNumber_Add'),
362+
indexed=flts + ints
350363
),
351364
'subtract':
352365
Ufunc(2, 1, None, # Zero is only a unit to the right, not the left
@@ -359,6 +372,7 @@ def english_upper(s):
359372
TypeDescription('M', FullTypeDescr, 'MM', 'm'),
360373
],
361374
TD(O, f='PyNumber_Subtract'),
375+
indexed=flts + ints
362376
),
363377
'multiply':
364378
Ufunc(2, 1, One,
@@ -374,6 +388,7 @@ def english_upper(s):
374388
TypeDescription('m', FullTypeDescr, 'dm', 'm'),
375389
],
376390
TD(O, f='PyNumber_Multiply'),
391+
indexed=flts + ints
377392
),
378393
#'true_divide' : aliased to divide in umathmodule.c:initumath
379394
'floor_divide':
@@ -388,6 +403,7 @@ def english_upper(s):
388403
TypeDescription('m', FullTypeDescr, 'mm', 'q'),
389404
],
390405
TD(O, f='PyNumber_FloorDivide'),
406+
indexed=flts + ints
391407
),
392408
'divide':
393409
Ufunc(2, 1, None, # One is only a unit to the right, not the left
@@ -1255,6 +1271,40 @@ def make_ufuncs(funcdict):
12551271
if uf.typereso is not None:
12561272
mlist.append(
12571273
r"((PyUFuncObject *)f)->type_resolver = &%s;" % uf.typereso)
1274+
for c in uf.indexed:
1275+
# Handle indexed loops by getting the underlying ArrayMethodObject
1276+
# from the list in f._loops and setting its field appropriately
1277+
fmt = textwrap.dedent("""
1278+
{{
1279+
PyArray_DTypeMeta *dtype = PyArray_DTypeFromTypeNum({typenum});
1280+
PyObject *info = get_info_no_cast((PyUFuncObject *)f, dtype, {count});
1281+
if (info == NULL) {{
1282+
return -1;
1283+
}}
1284+
if (info == Py_None) {{
1285+
PyErr_SetString(PyExc_RuntimeError,
1286+
"cannot add indexed loop to ufunc {name} with {typenum}");
1287+
return -1;
1288+
}}
1289+
if (!PyObject_TypeCheck(info, &PyArrayMethod_Type)) {{
1290+
PyErr_SetString(PyExc_RuntimeError,
1291+
"Not a PyArrayMethodObject in ufunc {name} with {typenum}");
1292+
}}
1293+
((PyArrayMethodObject*)info)->contiguous_indexed_loop = {funcname};
1294+
/* info is borrowed, no need to decref*/
1295+
}}
1296+
""")
1297+
cname = name
1298+
if cname == "floor_divide":
1299+
# XXX there must be a better way...
1300+
cname = "divide"
1301+
mlist.append(fmt.format(
1302+
typenum=f"NPY_{english_upper(chartoname[c])}",
1303+
count=uf.nin+uf.nout,
1304+
name=name,
1305+
funcname = f"{english_upper(chartoname[c])}_{cname}_indexed",
1306+
))
1307+
12581308
mlist.append(r"""PyDict_SetItemString(dictionary, "%s", f);""" % name)
12591309
mlist.append(r"""Py_DECREF(f);""")
12601310
code3list.append('\n'.join(mlist))
@@ -1277,8 +1327,46 @@ def make_code(funcdict, filename):
12771327
#include "loops.h"
12781328
#include "matmul.h"
12791329
#include "clip.h"
1330+
#include "dtypemeta.h"
12801331
#include "_umath_doc_generated.h"
1332+
12811333
%s
1334+
/*
1335+
* Return the PyArrayMethodObject or PyCapsule that matches a registered
1336+
* tuple of identical dtypes. Return a borrowed ref of the first match.
1337+
*/
1338+
static PyObject *
1339+
get_info_no_cast(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtype, int ndtypes)
1340+
{
1341+
1342+
PyObject *t_dtypes = PyTuple_New(ndtypes);
1343+
if (t_dtypes == NULL) {
1344+
return NULL;
1345+
}
1346+
for (int i=0; i < ndtypes; i++) {
1347+
PyTuple_SetItem(t_dtypes, i, (PyObject *)op_dtype);
1348+
}
1349+
PyObject *loops = ufunc->_loops;
1350+
Py_ssize_t length = PyList_Size(loops);
1351+
for (Py_ssize_t i = 0; i < length; i++) {
1352+
PyObject *item = PyList_GetItem(loops, i);
1353+
PyObject *cur_DType_tuple = PyTuple_GetItem(item, 0);
1354+
int cmp = PyObject_RichCompareBool(cur_DType_tuple, t_dtypes, Py_EQ);
1355+
if (cmp < 0) {
1356+
Py_DECREF(t_dtypes);
1357+
return NULL;
1358+
}
1359+
if (cmp == 0) {
1360+
continue;
1361+
}
1362+
/* Got the match */
1363+
Py_DECREF(t_dtypes);
1364+
return PyTuple_GetItem(item, 1);
1365+
}
1366+
Py_DECREF(t_dtypes);
1367+
Py_RETURN_NONE;
1368+
}
1369+
12821370
12831371
static int
12841372
InitOperators(PyObject *dictionary) {

numpy/core/src/multiarray/argfunc.dispatch.c.src

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ simd_@func@_@sfx@(npyv_lanetype_@sfx@ *ip, npy_intp len)
194194
npyv_@bsfx@ nnan_ab = npyv_and_@bsfx@(nnan_a, nnan_b);
195195
npyv_@bsfx@ nnan_cd = npyv_and_@bsfx@(nnan_c, nnan_d);
196196
npy_uint64 nnan = npyv_tobits_@bsfx@(npyv_and_@bsfx@(nnan_ab, nnan_cd));
197-
if (nnan != ((1LL << vstep) - 1)) {
197+
if ((long long int)nnan != ((1LL << vstep) - 1)) {
198198
npy_uint64 nnan_4[4];
199199
nnan_4[0] = npyv_tobits_@bsfx@(nnan_a);
200200
nnan_4[1] = npyv_tobits_@bsfx@(nnan_b);
@@ -219,7 +219,7 @@ simd_@func@_@sfx@(npyv_lanetype_@sfx@ *ip, npy_intp len)
219219
#if @is_fp@
220220
npyv_@bsfx@ nnan_a = npyv_notnan_@sfx@(a);
221221
npy_uint64 nnan = npyv_tobits_@bsfx@(nnan_a);
222-
if (nnan != ((1LL << vstep) - 1)) {
222+
if ((long long int)nnan != ((1LL << vstep) - 1)) {
223223
for (int vi = 0; vi < vstep; ++vi) {
224224
if (!((nnan >> vi) & 1)) {
225225
return i + vi;

numpy/core/src/multiarray/array_method.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,9 @@ fill_arraymethod_from_slots(
291291
case NPY_METH_get_reduction_initial:
292292
meth->get_reduction_initial = slot->pfunc;
293293
continue;
294+
case NPY_METH_contiguous_indexed_loop:
295+
meth->contiguous_indexed_loop = slot->pfunc;
296+
continue;
294297
default:
295298
break;
296299
}
@@ -891,7 +894,7 @@ generic_masked_strided_loop(PyArrayMethod_Context *context,
891894
/*
892895
* Fetches a strided-loop function that supports a boolean mask as additional
893896
* (last) operand to the strided-loop. It is otherwise largely identical to
894-
* the `get_loop` method which it wraps.
897+
* the `get_strided_loop` method which it wraps.
895898
* This is the core implementation for the ufunc `where=...` keyword argument.
896899
*
897900
* NOTE: This function does not support `move_references` or inner dimensions.

numpy/core/src/multiarray/array_method.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ struct PyArrayMethodObject_tag;
7373
* TODO: Before making this public, we should review which information should
7474
* be stored on the Context/BoundArrayMethod vs. the ArrayMethod.
7575
*/
76-
typedef struct {
76+
typedef struct PyArrayMethod_Context_tag {
7777
PyObject *caller; /* E.g. the original ufunc, may be NULL */
7878
struct PyArrayMethodObject_tag *method;
7979

@@ -223,6 +223,7 @@ typedef struct PyArrayMethodObject_tag {
223223
PyArrayMethod_StridedLoop *contiguous_loop;
224224
PyArrayMethod_StridedLoop *unaligned_strided_loop;
225225
PyArrayMethod_StridedLoop *unaligned_contiguous_loop;
226+
PyArrayMethod_StridedLoop *contiguous_indexed_loop;
226227
/* Chunk only used for wrapping array method defined in umath */
227228
struct PyArrayMethodObject_tag *wrapped_meth;
228229
PyArray_DTypeMeta **wrapped_dtypes;
@@ -266,6 +267,7 @@ extern NPY_NO_EXPORT PyTypeObject PyBoundArrayMethod_Type;
266267
#define NPY_METH_contiguous_loop 5
267268
#define NPY_METH_unaligned_strided_loop 6
268269
#define NPY_METH_unaligned_contiguous_loop 7
270+
#define NPY_METH_contiguous_indexed_loop 8
269271

270272

271273
/*

numpy/core/src/umath/loops.c.src

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -541,15 +541,21 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
541541
}
542542
}
543543

544-
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
545-
@TYPE@_@kind@@isa@_indexed(char const *ip1, npy_intp const *indx, char *value,
546-
npy_intp const *dimensions, npy_intp const *steps) {
544+
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ int
545+
@TYPE@_@kind@@isa@_indexed(PyArrayMethod_Context *NPY_UNUSED(context), char * const*args, npy_intp const *dimensions, npy_intp const *steps, NpyAuxData *NPY_UNUSED(func))
546+
{
547+
char *ip1 = args[0];
548+
npy_intp *indx = (npy_intp *)args[1];
549+
char *value = args[2];
547550
npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
548551
npy_intp n = dimensions[0];
549552
npy_intp i;
553+
@type@ *indexed;
550554
for(i = 0; i < n; i++, indx += is2, value += os1) {
551-
@type@ op1 = *(@type@ *)(ip1 + is1 * indx[0]) + value[0];
555+
indexed = (@type@ *)(ip1 + is1 * indx[0]);
556+
indexed[0] = indexed[0] @OP@ *(@type@ *)value;
552557
}
558+
return 0;
553559
}
554560

555561
#endif
@@ -1546,6 +1552,23 @@ LONGDOUBLE_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps
15461552
}
15471553
}
15481554
}
1555+
1556+
NPY_NO_EXPORT void
1557+
LONGDOUBLE_@kind@_indexed(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1558+
{
1559+
char *ip1 = args[0];
1560+
npy_intp *indx = (npy_intp *)args[1];
1561+
char *value = args[2];
1562+
npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
1563+
npy_intp n = dimensions[0];
1564+
npy_intp i;
1565+
npy_longdouble *indexed;
1566+
for(i = 0; i < n; i++, indx += is2, value += os1) {
1567+
indexed = (npy_longdouble *)(ip1 + is1 * indx[0]);
1568+
indexed[0] = indexed[0] @OP@ *(npy_longdouble *)value;
1569+
}
1570+
}
1571+
15491572
/**end repeat**/
15501573

15511574
/**begin repeat
@@ -1651,6 +1674,24 @@ HALF_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void
16511674
}
16521675
}
16531676
}
1677+
1678+
NPY_NO_EXPORT int
1679+
HALF_@kind@_indexed(void *NPY_UNUSED(context), char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1680+
{
1681+
char *ip1 = args[0];
1682+
npy_intp *indx = (npy_intp *)args[1];
1683+
char *value = args[2];
1684+
npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
1685+
npy_intp n = dimensions[0];
1686+
npy_intp i;
1687+
npy_half *indexed;
1688+
for(i = 0; i < n; i++, indx += is2, value += os1) {
1689+
indexed = (npy_half *)(ip1 + is1 * indx[0]);
1690+
const float v = npy_half_to_float(*(npy_half *)value);
1691+
indexed[0] = npy_float_to_half(npy_half_to_float(indexed[0]) @OP@ v);
1692+
}
1693+
return 0;
1694+
}
16541695
/**end repeat**/
16551696

16561697
#define _HALF_LOGICAL_AND(a,b) (!npy_half_iszero(a) && !npy_half_iszero(b))

numpy/core/src/umath/loops.h.src

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@
3333
// #define BOOL_fmax BOOL_maximum
3434
// #define BOOL_fmin BOOL_minimum
3535

36+
typedef struct PyArrayMethod_Context_tag PyArrayMethod_Context;
37+
typedef struct NpyAuxData_tag NpyAuxData;
38+
3639
#ifndef NPY_DISABLE_OPTIMIZATION
3740
#include "loops_comparison.dispatch.h"
3841
#endif
@@ -81,6 +84,11 @@ BOOL_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void
8184
*/
8285
NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_divide,
8386
(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)))
87+
88+
NPY_NO_EXPORT int
89+
@TYPE@_divide_indexed(PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, npy_intp const *dimensions, npy_intp const *steps, NpyAuxData *NPY_UNUSED(func));
90+
91+
/**end repeat3**/
8492
/**end repeat**/
8593

8694
#ifndef NPY_DISABLE_OPTIMIZATION
@@ -163,6 +171,9 @@ NPY_NO_EXPORT void
163171
NPY_NO_EXPORT void
164172
@S@@TYPE@_@kind@@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
165173

174+
NPY_NO_EXPORT int
175+
@S@@TYPE@_@kind@@isa@_indexed(PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, npy_intp const *dimensions, npy_intp const *steps, NpyAuxData *NPY_UNUSED(func));
176+
166177
/**end repeat3**/
167178

168179
/**begin repeat3
@@ -285,10 +296,13 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@,
285296
/**begin repeat1
286297
* Arithmetic
287298
* # kind = add, subtract, multiply, divide#
288-
* # OP = +, -, *, /#
289299
*/
290300
NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@,
291301
(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)))
302+
303+
NPY_NO_EXPORT int
304+
@TYPE@_@kind@_indexed(PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, npy_intp const *dimensions, npy_intp const *steps, NpyAuxData *NPY_UNUSED(func));
305+
292306
/**end repeat1**/
293307
/**end repeat**/
294308

@@ -424,6 +438,11 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@, (
424438
*/
425439
NPY_NO_EXPORT void
426440
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
441+
442+
NPY_NO_EXPORT int
443+
@TYPE@_@kind@_indexed(PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, npy_intp const *dimensions, npy_intp const *steps, NpyAuxData *NPY_UNUSED(func));
444+
445+
/**end repeat1**/
427446
/**end repeat1**/
428447

429448
/**begin repeat1

0 commit comments

Comments
 (0)
0