8000 ENH: create and use indexed inner loops by mattip · Pull Request #23136 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

ENH: create and use indexed inner loops #23136

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Feb 7, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
ENH: add indexed lower loops and connect them to ufuncs
  • Loading branch information
mattip committed Jan 30, 2023
commit 7910e2bf14b8bef95c3180fbf667458e29426a0e
98 changes: 93 additions & 5 deletions numpy/core/code_generators/generate_umath.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
"""
Generate the code to build all the internal ufuncs. At the base is the defdict:
a dictionary ofUfunc classes. This is fed to make_code to generate
__umath_generated.c
"""
import os
import re
import struct
import sys
import textwrap
import argparse

# identity objects
Zero = "PyLong_FromLong(0)"
One = "PyLong_FromLong(1)"
True_ = "(Py_INCREF(Py_True), Py_True)"
Expand Down Expand Up @@ -125,7 +131,7 @@ def check_td_order(tds):
_check_order(signatures[prev_i], sign)


_fdata_map = dict(
_floatformat_map = dict(
e='npy_%sf',
f='npy_%sf',
d='npy_%s',
Expand All @@ -136,11 +142,13 @@ def check_td_order(tds):
)

def build_func_data(types, f):
func_data = [_fdata_map.get(t, '%s') % (f,) for t in types]
func_data = [_floatformat_map.get(t, '%s') % (f,) for t in types]
return func_data

def TD(types, f=None, astype=None, in_=None, out=None, cfunc_alias=None,
simd=None, dispatch=None):
"""Generate a TypeDescription instance for each item in types
"""
if f is not None:
if isinstance(f, str):
func_data = build_func_data(types, f)
Expand Down Expand Up @@ -188,12 +196,15 @@ class Ufunc:
----------
nin : number of input arguments
nout : number of output arguments
identity : identity element for a two-argument function
identity : identity element for a two-argument function (like Zero)
docstring : docstring for the ufunc
type_descriptions : list of TypeDescription objects
typereso: type resolver function of type PyUFunc_TypeResolutionFunc
type_descriptions : TypeDescription objects
signature: a generalized ufunc signature (like for matmul)
indexed: add indexed loops (ufunc.at) for these type characters
"""
def __init__(self, nin, nout, identity, docstring, typereso,
*type_descriptions, signature=None):
*type_descriptions, signature=None, indexed=''):
self.nin = nin
self.nout = nout
if identity is None:
Expand All @@ -203,6 +214,7 @@ def __init__(self, nin, nout, identity, docstring, typereso,
self.typereso = typereso
self.type_descriptions = []
self.signature = signature
self.indexed = indexed
for td in type_descriptions:
self.type_descriptions.extend(td)
for td in self.type_descriptions:
Expand Down Expand Up @@ -347,6 +359,7 @@ def english_upper(s):
TypeDescription('M', FullTypeDescr, 'mM', 'M'),
],
TD(O, f='PyNumber_Add'),
indexed=flts + ints
),
'subtract':
Ufunc(2, 1, None, # Zero is only a unit to the right, not the left
Expand All @@ -359,6 +372,7 @@ def english_upper(s):
TypeDescription('M', FullTypeDescr, 'MM', 'm'),
],
TD(O, f='PyNumber_Subtract'),
indexed=flts + ints
),
'multiply':
Ufunc(2, 1, One,
Expand All @@ -374,6 +388,7 @@ def english_upper(s):
TypeDescription('m', FullTypeDescr, 'dm', 'm'),
],
TD(O, f='PyNumber_Multiply'),
indexed=flts + ints
),
#'true_divide' : aliased to divide in umathmodule.c:initumath
'floor_divide':
Expand All @@ -388,6 +403,7 @@ def english_upper(s):
TypeDescription('m', FullTypeDescr, 'mm', 'q'),
],
TD(O, f='PyNumber_FloorDivide'),
indexed=flts + ints
),
'divide':
Ufunc(2, 1, None, # One is only a unit to the right, not the left
Expand Down Expand Up @@ -1255,6 +1271,40 @@ def make_ufuncs(funcdict):
if uf.typereso is not None:
mlist.append(
r"((PyUFuncObject *)f)->type_resolver = &%s;" % uf.typereso)
for c in uf.indexed:
# Handle indexed loops by getting the underlying ArrayMethodObject
# from the list in f._loops and setting its field appropriately
fmt = textwrap.dedent("""
{{
PyArray_DTypeMeta *dtype = PyArray_DTypeFromTypeNum({typenum});
PyObject *info = get_info_no_cast((PyUFuncObject *)f, dtype, {count});
if (info == NULL) {{
return -1;
}}
if (info == Py_None) {{
PyErr_SetString(PyExc_RuntimeError,
"cannot add indexed loop to ufunc {name} with {typenum}");
return -1;
}}
if (!PyObject_TypeCheck(info, &PyArrayMethod_Type)) {{
PyErr_SetString(PyExc_RuntimeError,
"Not a PyArrayMethodObject in ufunc {name} with {typenum}");
}}
((PyArrayMethodObject*)info)->contiguous_indexed_loop = {funcname};
/* info is borrowed, no need to decref*/
}}
""")
cname = name
if cname == "floor_divide":
# XXX there must be a better way...
cname = "divide"
mlist.append(fmt.format(
typenum=f"NPY_{english_upper(chartoname[c])}",
count=uf.nin+uf.nout,
name=name,
funcname = f"{english_upper(chartoname[c])}_{cname}_indexed",
))

mlist.append(r"""PyDict_SetItemString(dictionary, "%s", f);""" % name)
mlist.append(r"""Py_DECREF(f);""")
code3list.append('\n'.join(mlist))
Expand All @@ -1277,8 +1327,46 @@ def make_code(funcdict, filename):
#include "loops.h"
#include "matmul.h"
#include "clip.h"
#include "dtypemeta.h"
#include "_umath_doc_generated.h"

%s
/*
* Return the PyArrayMethodObject or PyCapsule that matches a registered
* tuple of identical dtypes. Return a borrowed ref of the first match.
*/
static PyObject *
get_info_no_cast(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtype, int ndtypes)
{

PyObject *t_dtypes = PyTuple_New(ndtypes);
if (t_dtypes == NULL) {
return NULL;
}
for (int i=0; i < ndtypes; i++) {
PyTuple_SetItem(t_dtypes, i, (PyObject *)op_dtype);
}
PyObject *loops = ufunc->_loops;
Py_ssize_t length = PyList_Size(loops);
for (Py_ssize_t i = 0; i < length; i++) {
PyObject *item = PyList_GetItem(loops, i);
PyObject *cur_DType_tuple = PyTuple_GetItem(item, 0);
int cmp = PyObject_RichCompareBool(cur_DType_tuple, t_dtypes, Py_EQ);
if (cmp < 0) {
Py_DECREF(t_dtypes);
return NULL;
}
if (cmp == 0) {
continue;
}
/* Got the match */
Py_DECREF(t_dtypes);
return PyTuple_GetItem(item, 1);
}
Py_DECREF(t_dtypes);
Py_RETURN_NONE;
}


static int
InitOperators(PyObject *dictionary) {
Expand Down
4 changes: 2 additions & 2 deletions numpy/core/src/multiarray/argfunc.dispatch.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ simd_@func@_@sfx@(npyv_lanetype_@sfx@ *ip, npy_intp len)
npyv_@bsfx@ nnan_ab = npyv_and_@bsfx@(nnan_a, nnan_b);
npyv_@bsfx@ nnan_cd = npyv_and_@bsfx@(nnan_c, nnan_d);
npy_uint64 nnan = npyv_tobits_@bsfx@(npyv_and_@bsfx@(nnan_ab, nnan_cd));
if (nnan != ((1LL << vstep) - 1)) {
if ((long long int)nnan != ((1LL << vstep) - 1)) {
npy_uint64 nnan_4[4];
nnan_4[0] = npyv_tobits_@bsfx@(nnan_a);
nnan_4[1] = npyv_tobits_@bsfx@(nnan_b);
Expand All @@ -219,7 +219,7 @@ simd_@func@_@sfx@(npy F438 v_lanetype_@sfx@ *ip, npy_intp len)
#if @is_fp@
npyv_@bsfx@ nnan_a = npyv_notnan_@sfx@(a);
npy_uint64 nnan = npyv_tobits_@bsfx@(nnan_a);
if (nnan != ((1LL << vstep) - 1)) {
if ((long long int)nnan != ((1LL << vstep) - 1)) {
for (int vi = 0; vi < vstep; ++vi) {
if (!((nnan >> vi) & 1)) {
return i + vi;
Expand Down
5 changes: 4 additions & 1 deletion numpy/core/src/multiarray/array_method.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,9 @@ fill_arraymethod_from_slots(
case NPY_METH_get_reduction_initial:
meth->get_reduction_initial = slot->pfunc;
continue;
case NPY_METH_contiguous_indexed_loop:
meth->contiguous_indexed_loop = slot->pfunc;
continue;
default:
break;
}
Expand Down Expand Up @@ -891,7 +894,7 @@ generic_masked_strided_loop(PyArrayMethod_Context *context,
/*
* Fetches a strided-loop function that supports a boolean mask as additional
* (last) operand to the strided-loop. It is otherwise largely identical to
* the `get_loop` method which it wraps.
* the `get_strided_loop` method which it wraps.
* This is the core implementation for the ufunc `where=...` keyword argument.
*
* NOTE: This function does not support `move_references` or inner dimensions.
Expand Down
4 changes: 3 additions & 1 deletion numpy/core/src/multiarray/array_method.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ struct PyArrayMethodObject_tag;
* TODO: Before making this public, we should review which information should
* be stored on the Context/BoundArrayMethod vs. the ArrayMethod.
*/
typedef struct {
typedef struct PyArrayMethod_Context_t 10000 ag {
PyObject *caller; /* E.g. the original ufunc, may be NULL */
struct PyArrayMethodObject_tag *method;

Expand Down Expand Up @@ -223,6 +223,7 @@ typedef struct PyArrayMethodObject_tag {
PyArrayMethod_StridedLoop *contiguous_loop;
PyArrayMethod_StridedLoop *unaligned_strided_loop;
PyArrayMethod_StridedLoop *unaligned_contiguous_loop;
PyArrayMethod_StridedLoop *contiguous_indexed_loop;
/* Chunk only used for wrapping array method defined in umath */
struct PyArrayMethodObject_tag *wrapped_meth;
PyArray_DTypeMeta **wrapped_dtypes;
Expand Down Expand Up @@ -266,6 +267,7 @@ extern NPY_NO_EXPORT PyTypeObject PyBoundArrayMethod_Type;
#define NPY_METH_contiguous_loop 5
#define NPY_METH_unaligned_strided_loop 6
#define NPY_METH_unaligned_contiguous_loop 7
#define NPY_METH_contiguous_indexed_loop 8


/*
Expand Down
49 changes: 45 additions & 4 deletions numpy/core/src/umath/loops.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -541,15 +541,21 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
}
}

NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
@TYPE@_@kind@@isa@_indexed(char const *ip1, npy_intp const *indx, char *value,
npy_intp const *dimensions, npy_intp const *steps) {
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ int
@TYPE@_@kind@@isa@_indexed(PyArrayMethod_Context *NPY_UNUSED(context), char * const*args, npy_intp const *dimensions, npy_intp const *steps, NpyAuxData *NPY_UNUSED(func))
{
char *ip1 = args[0];
npy_intp *indx = (npy_intp *)args[1];
char *value = args[2];
npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
npy_intp n = dimensions[0];
npy_intp i;
@type@ *indexed;
for(i = 0; i < n; i++, indx += is2, value += os1) {
@type@ op1 = *(@type@ *)(ip1 + is1 * indx[0]) + value[0];
indexed = (@type@ *)(ip1 + is1 * indx[0]);
indexed[0] = indexed[0] @OP@ *(@type@ *)value;
}
return 0;
}

#endif
Expand Down Expand Up @@ -1546,6 +1552,23 @@ LONGDOUBLE_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps
}
}
}

NPY_NO_EXPORT void
LONGDOUBLE_@kind@_indexed(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
{
char *ip1 = args[0];
npy_intp *indx = (npy_intp *)args[1];
char *value = args[2];
npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
npy_intp n = dimensions[0];
npy_intp i;
npy_longdouble *indexed;
for(i = 0; i < n; i++, indx += is2, value += os1) {
indexed = (npy_longdouble *)(ip1 + is1 * indx[0]);
indexed[0] = indexed[0] @OP@ *(npy_longdouble *)value;
}
}

/**end repeat**/

/**begin repeat
Expand Down Expand Up @@ -1651,6 +1674,24 @@ HALF_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void
}
}
}

NPY_NO_EXPORT int
HALF_@kind@_indexed(void *NPY_UNUSED(context), char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
{
char *ip1 = args[0];
npy_intp *indx = (npy_intp *)args[1];
char *value = args[2];
npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
npy_intp n = dimensions[0];
npy_intp i;
npy_half *indexed;
for(i = 0; i < n; i++, indx += is2, value += os1) {
indexed = (npy_half *)(ip1 + is1 * indx[0]);
const float v = npy_half_to_float(*(npy_half *)value);
indexed[0] = npy_float_to_half(npy_half_to_float(indexed[0]) @OP@ v);
}
return 0;
}
/**end repeat**/

#define _HALF_LOGICAL_AND(a,b) (!npy_half_iszero(a) && !npy_half_iszero(b))
Expand Down
21 changes: 20 additions & 1 deletion numpy/core/src/umath/loops.h.src
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
// #define BOOL_fmax BOOL_maximum
// #define BOOL_fmin BOOL_minimum

typedef struct PyArrayMethod_Context_tag PyArrayMethod_Context;
typedef struct NpyAuxData_tag NpyAuxData;

#ifndef NPY_DISABLE_OPTIMIZATION
#include "loops_comparison.dispatch.h"
#endif
Expand Down Expand Up @@ -81,6 +84,11 @@ BOOL_@kind@(char **args 57AE , npy_intp const *dimensions, npy_intp const *steps, void
*/
NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_divide,
(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)))

NPY_NO_EXPORT int
@TYPE@_divide_indexed(PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, npy_intp const *dimensions, npy_intp const *steps, NpyAuxData *NPY_UNUSED(func));

/**end repeat3**/
/**end repeat**/

#ifndef NPY_DISABLE_OPTIMIZATION
Expand Down Expand Up @@ -163,6 +171,9 @@ NPY_NO_EXPORT void
NPY_NO_EXPORT void
@S@@TYPE@_@kind@@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));

NPY_NO_EXPORT int
@S@@TYPE@_@kind@@isa@_indexed(PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, npy_intp const *dimensions, npy_intp const *steps, NpyAuxData *NPY_UNUSED(func));

/**end repeat3**/

/**begin repeat3
Expand Down Expand Up @@ -285,10 +296,13 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@,
/**begin repeat1
* Arithmetic
* # kind = add, subtract, multiply, divide#
* # OP = +, -, *, /#
*/
NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@,
(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)))

NPY_NO_EXPORT int
@TYPE@_@kind@_indexed(PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, npy_intp const *dimensions, npy_intp const *steps, NpyAuxData *NPY_UNUSED(func));

/**end repeat1**/
/**end repeat**/

Expand Down Expand Up @@ -424,6 +438,11 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@, (
*/
NPY_NO_EXPORT void
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));

NPY_NO_EXPORT int
@TYPE@_@kind@_indexed(PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, npy_intp const *dimensions, npy_intp const *steps, NpyAuxData *NPY_UNUSED(func));

/**end repeat1**/
/**end repeat1**/

/**begin repeat1
Expand Down
Loading
0