-
-
Notifications
You must be signed in to change notification settings - Fork 10.9k
ENH, SIMD: Ditching the old CPU dispatcher(Arithmetic) #17985
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
7bd6de3
d084917
0985a73
81bb563
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,14 +45,20 @@ class TypeDescription: | |
astype : dict or None, optional | ||
If astype['x'] is 'y', uses PyUFunc_x_x_As_y_y/PyUFunc_xx_x_As_yy_y | ||
instead of PyUFunc_x_x/PyUFunc_xx_x. | ||
cfunc_alias : str or none, optional | ||
appended to inner loop C function name, e.g. FLOAT_{cfunc_alias} (see make_arrays) | ||
charris marked this conversation as resolved.
Show resolved
Hide resolved
|
||
NOTE: it doesn't support 'astype' | ||
simd: list | ||
Available SIMD ufunc loops, dispatched at runtime in specified order | ||
Currently only supported for simples types (see make_arrays) | ||
dispatch: list | ||
Available SIMD ufunc loops, dispatched at runtime in specified order | ||
Currently only supported for simples types (see make_arrays) | ||
dispatch: str or None, optional | ||
Dispatch-able source name without its extension '.dispatch.c' that | ||
contains the definition of ufunc, dispatched at runtime depending on the | ||
specified targets of the dispatch-able source. | ||
NOTE: it doesn't support 'astype' | ||
seiko2plus marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
def __init__(self, type, f=None, in_=None, out=None, astype=None, simd=None, dispatch=None): | ||
def __init__(self, type, f=None, in_=None, out=None, astype=None, cfunc_alias=None, | ||
simd=None, dispatch=None): | ||
self.type = type | ||
self.func_data = f | ||
if astype is None: | ||
|
@@ -64,6 +70,7 @@ def __init__(self, type, f=None, in_=None, out=None, astype=None, simd=None, dis | |
if out is not None: | ||
out = out.replace('P', type) | ||
self.out = out | ||
self.cfunc_alias = cfunc_alias | ||
self.simd = simd | ||
self.dispatch = dispatch | ||
|
||
|
@@ -90,7 +97,8 @@ def build_func_data(types, f): | |
func_data = [_fdata_map.get(t, '%s') % (f,) for t in types] | ||
return func_data | ||
|
||
def TD(types, f=None, astype=None, in_=None, out=None, simd=None, dispatch=None): | ||
def TD(types, f=None, astype=None, in_=None, out=None, cfunc_alias=None, | ||
simd=None, dispatch=None): | ||
if f is not None: | ||
if isinstance(f, str): | ||
func_data = build_func_data(types, f) | ||
|
@@ -119,13 +127,15 @@ def TD(types, f=None, astype=None, in_=None, out=None, simd=None, dispatch=None) | |
simdt = [k for k, v in simd if t in v] | ||
else: | ||
simdt = [] | ||
|
||
# [(dispatch file name without extension '.dispatch.c*', list of types)] | ||
if dispatch: | ||
dispt = [k for k, v in dispatch if t in v] | ||
dispt = ([k for k, v in dispatch if t in v]+[None])[0] | ||
else: | ||
dispt = [] | ||
dispt = None | ||
tds.append(TypeDescription( | ||
t, f=fd, in_=i, out=o, astype=astype, simd=simdt, dispatch=dispt | ||
t, f=fd, in_=i, out=o, astype=astype, cfunc_alias=cfunc_alias, | ||
simd=simdt, dispatch=dispt | ||
)) | ||
return tds | ||
|
||
|
@@ -280,7 +290,7 @@ def english_upper(s): | |
Ufunc(2, 1, Zero, | ||
docstrings.get('numpy.core.umath.add'), | ||
'PyUFunc_AdditionTypeResolver', | ||
TD(notimes_or_obj, simd=[('avx512f', cmplxvec),('avx2', ints)]), | ||
TD(notimes_or_obj, simd=[('avx2', ints)], dispatch=[('loops_arithm_fp', 'fdFD')]), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to be clear: this adds There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it dispatches single/double for both real and complex the definitions of these loops moved from
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ahh, my bad, it is in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
AArch64 and Power9 have support for quad precision floats. The astronomy community would like to have them and I think we will see more support coming in modern architectures. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AArch64 and Power9 have scalar support but not SIMD. Power9 users have to build NumPy with CFLAGS |
||
[TypeDescription('M', FullTypeDescr, 'Mm', 'M'), | ||
TypeDescription('m', FullTypeDescr, 'mm', 'm'), | ||
TypeDescription('M', FullTypeDescr, 'mM', 'M'), | ||
|
@@ -291,7 +301,7 @@ def english_upper(s): | |
Ufunc(2, 1, None, # Zero is only a unit to the right, not the left | ||
docstrings.get('numpy.core.umath.subtract'), | ||
'PyUFunc_SubtractionTypeResolver', | ||
TD(ints + inexact, simd=[('avx512f', cmplxvec),('avx2', ints)]), | ||
TD(ints + inexact, simd=[('avx2', ints)], dispatch=[('loops_arithm_fp', 'fdFD')]), | ||
[TypeDescription('M', FullTypeDescr, 'Mm', 'M'), | ||
TypeDescription('m', FullTypeDescr, 'mm', 'm'), | ||
TypeDescription('M', FullTypeDescr, 'MM', 'm'), | ||
|
@@ -302,7 +312,7 @@ def english_upper(s): | |
Ufunc(2, 1, One, | ||
docstrings.get('numpy.core.umath.multiply'), | ||
'PyUFunc_MultiplicationTypeResolver', | ||
TD(notimes_or_obj, simd=[('avx512f', cmplxvec),('avx2', ints)]), | ||
TD(notimes_or_obj, simd=[('avx2', ints)], dispatch=[('loops_arithm_fp', 'fdFD')]), | ||
[Type 9E88 Description('m', FullTypeDescr, 'mq', 'm'), | ||
TypeDescription('m', FullTypeDescr, 'qm', 'm'), | ||
TypeDescription('m', FullTypeDescr, 'md', 'm'), | ||
|
@@ -326,10 +336,10 @@ def english_upper(s): | |
Ufunc(2, 1, None, # One is only a unit to the right, not the left | ||
docstrings.get('numpy.core.umath.true_divide'), | ||
'PyUFunc_TrueDivisionTypeResolver', | ||
TD(flts+cmplx), | ||
[TypeDescription('m', FullTypeDescr, 'mq', 'm'), | ||
TypeDescription('m', FullTypeDescr, 'md', 'm'), | ||
TypeDescription('m', FullTypeDescr, 'mm', 'd'), | ||
TD(flts+cmplx, cfunc_alias='divide', dispatch=[('loops_arithm_fp', 'fd')]), | ||
[TypeDescription('m', FullTypeDescr, 'mq', 'm', cfunc_alias='divide'), | ||
TypeDescription('m', FullTypeDescr, 'md', 'm', cfunc_alias='divide'), | ||
TypeDescription('m', FullTypeDescr, 'mm', 'd', cfunc_alias='divide'), | ||
], | ||
TD(O, f='PyNumber_TrueDivide'), | ||
), | ||
|
@@ -1000,6 +1010,7 @@ def make_arrays(funcdict): | |
# later | ||
code1list = [] | ||
code2list = [] | ||
dispdict = {} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe |
||
names = sorted(funcdict.keys()) | ||
for name in names: | ||
uf = funcdict[name] | ||
|
@@ -1010,44 +1021,33 @@ def make_arrays(funcdict): | |
sub = 0 | ||
|
||
for t in uf.type_descriptions: | ||
cfunc_alias = t.cfunc_alias if t.cfunc_alias else name | ||
cfunc_fname = None | ||
if t.func_data is FullTypeDescr: | ||
tname = english_upper(chartoname[t.type]) | ||
datalist.append('(void *)NULL') | ||
funclist.append( | ||
'%s_%s_%s_%s' % (tname, t.in_, t.out, name)) | ||
cfunc_fname = f"{tname}_{t.in_}_{t.out}_{cfunc_alias}" | ||
elif isinstance(t.func_data, FuncNameSuffix): | ||
datalist.append('(void *)NULL') | ||
tname = english_upper(chartoname[t.type]) | ||
funclist.append( | ||
'%s_%s_%s' % (tname, name, t.func_data.suffix)) | ||
cfunc_fname = f"{tname}_{cfunc_alias}_{t.func_data.suffix}" | ||
elif t.func_data is None: | ||
datalist.append('(void *)NULL') | ||
tname = english_upper(chartoname[t.type]) | ||
funclist.append('%s_%s' % (tname, name)) | ||
cfunc_fname = f"{tname}_{cfunc_alias}" | ||
if t.simd is not None: | ||
for vt in t.simd: | ||
code2list.append(textwrap.dedent("""\ | ||
#ifdef HAVE_ATTRIBUTE_TARGET_{ISA} | ||
if (NPY_CPU_HAVE({ISA})) {{ | ||
{fname}_functions[{idx}] = {type}_{fname}_{isa}; | ||
{fname}_functions[{idx}] = {cname}_{isa}; | ||
}} | ||
#endif | ||
""").format( | ||
ISA=vt.upper(), isa=vt, | ||
fname=name, type=tname, idx=k | ||
)) | ||
if t.dispatch is not None: | ||
for dname in t.dispatch: | ||
code2list.append(textwrap.dedent("""\ | ||
#ifndef NPY_DISABLE_OPTIMIZATION | ||
#include "{dname}.dispatch.h" | ||
#endif | ||
NPY_CPU_DISPATCH_CALL_XB({name}_functions[{k}] = {tname}_{name}); | ||
""").format( | ||
dname=dname, name=name, tname=tname, k=k | ||
fname=name, cname=cfunc_fname, idx=k | ||
)) | ||
else: | ||
funclist.append('NULL') | ||
try: | ||
thedict = arity_lookup[uf.nin, uf.nout] | ||
except KeyError as e: | ||
|
@@ -1077,6 +1077,13 @@ def make_arrays(funcdict): | |
#datalist.append('(void *)%s' % t.func_data) | ||
sub += 1 | ||
|
||
if cfunc_fname: | ||
funclist.append(cfunc_fname) | ||
if t.dispatch: | ||
dispdict.setdefault(t.dispatch, []).append((name, k, cfunc_fname)) | ||
else: | ||
funclist.append('NULL') | ||
|
||
for x in t.in_ + t.out: | ||
siglist.append('NPY_%s' % (english_upper(chartoname[x]),)) | ||
|
||
|
@@ -1091,6 +1098,17 @@ def make_arrays(funcdict): | |
% (name, datanames)) | ||
code1list.append("static char %s_signatures[] = {%s};" | ||
% (name, signames)) | ||
|
||
for dname, funcs in dispdict.items(): | ||
code2list.append(textwrap.dedent(f""" | ||
#ifndef NPY_DISABLE_OPTIMIZATION | ||
#include "{dname}.dispatch.h" | ||
#endif | ||
""")) | ||
for (ufunc_name, func_idx, cfunc_name) in funcs: | ||
code2list.append(textwrap.dedent(f"""\ | ||
NPY_CPU_DISPATCH_CALL_XB({ufunc_name}_functions[{func_idx}] = {cfunc_name}); | ||
""")) | ||
return "\n".join(code1list), "\n".join(code2list) | ||
|
||
def make_ufuncs(funcdict): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it is better in general to use full names like "arithmetic" rather than shortened names. Seven character names are long gone :) This can be changed later though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will consider it a policy to follow in my upcoming patches.