-
-
Notifications
You must be signed in to change notification settings - Fork 10.9k
ENH, SIMD: Ditching the old CPU dispatcher(Arithmetic) #17985
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
7bd6de3
d084917
0985a73
81bb563
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
The first patch in a series of pull-requests aims to facilitate the migration process to our new SIMD interface(NPYV). It is basically a process that focuses on getting rid of the main umath SIMD source `simd.inc`, which contains almost all SIMD kernels, by splitting it into several dispatch-able sources without changing the base code, which facilitates the review process in order to speed up access to the nominal target. In this patch, we have moved the arithmetic operations of real and complex for single/double precision to the new CPU dispatcher. NOTE: previously, the SIMD code of AVX2 and AVX512F for single/double precision wasn't dispatched in runtime before.
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,18 +46,19 @@ class TypeDescription: | |
If astype['x'] is 'y', uses PyUFunc_x_x_As_y_y/PyUFunc_xx_x_As_yy_y | ||
instead of PyUFunc_x_x/PyUFunc_xx_x. | ||
cfunc_alias : str or none, optional | ||
replaces the suffix of C function name instead of using ufunc_name, | ||
e.g. "FLOAT_{cfunc_alias}" instead of "FLOAT_{ufunc_name}" (see make_arrays) | ||
appended to inner loop C function name, e.g. FLOAT_{cfunc_alias} (see make_arrays) | ||
charris marked this conversation as resolved.
Show resolved
Hide resolved
|
||
NOTE: it doesn't support 'astype' | ||
simd: list | ||
Available SIMD ufunc loops, dispatched at runtime in specified order | ||
Currently only supported for simples types (see make_arrays) | ||
dispatch: str or None, optional | ||
Dispatch-able source name without its extension '.dispatch.c' that contains the definition of ufunc, | ||
dispatched at runtime depending on the specified targets of the dispatch-able source. | ||
Dispatch-able source name without its extension '.dispatch.c' that | ||
contains the definition of ufunc, dispatched at runtime depending on the | ||
specified targets of the dispatch-able source. | ||
NOTE: it doesn't support 'astype' | ||
seiko2plus marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
def __init__(self, type, f=None, in_=None, out=None, astype=None, cfunc_alias=None, simd=None, dispatch=None): | ||
def __init__(self, type, f=None, in_=None, out=None, astype=None, cfunc_alias=None, | ||
simd=None, dispatch=None): | ||
self.type = type | ||
self.func_data = f | ||
if astype is None: | ||
|
@@ -96,7 +97,8 @@ def build_func_data(types, f): | |
func_data = [_fdata_map.get(t, '%s') % (f,) for t in types] | ||
return func_data | ||
|
||
def TD(types, f=None, astype=None, in_=None, out=None, cfunc_alias=None, simd=None, dispatch=None): | ||
def TD(types, f=None, astype=None, in_=None, out=None, cfunc_alias=None, | ||
simd=None, dispatch=None): | ||
if f is not None: | ||
if isinstance(f, str): | ||
func_data = build_func_data(types, f) | ||
|
@@ -132,7 +134,8 @@ def TD(types, f=None, astype=None, in_=None, out=None, cfunc_alias=None, simd=No | |
else: | ||
dispt = None | ||
tds.append(TypeDescription( | ||
t, f=fd, in_=i, out=o, astype=astype, cfunc_alias=cfunc_alias, simd=simdt, dispatch=dispt | ||
t, f=fd, in_=i, out=o, astype=astype, cfunc_alias=cfunc_alias, | ||
simd=simdt, dispatch=dispt | ||
)) | ||
return tds | ||
|
||
|
@@ -287,7 +290,7 @@ def english_upper(s): | |
Ufunc(2, 1, Zero, | ||
docstrings.get('numpy.core.umath.add'), | ||
'PyUFunc_AdditionTypeResolver', | ||
TD(notimes_or_obj, simd=[('avx512f', cmplxvec),('avx2', ints)]), | ||
TD(notimes_or_obj, simd=[('avx2', ints)], dispatch=[('loops_arithm_fp', 'fdFD')]), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to be clear: this adds There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it dispatches single/double for both real and complex the definitions of these loops moved from
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ahh, my bad, it is in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
AArch64 and Power9 have support for quad precision floats. The astronomy community would like to have them and I think we will see more support coming in modern architectures. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AArch64 and Power9 have scalar support but not SIMD. Power9 users have to build NumPy with CFLAGS |
||
[TypeDescription('M', FullTypeDescr, 'Mm', 'M'), | ||
TypeDescription('m', FullTypeDescr, 'mm', 'm'), | ||
TypeDescription('M', FullTypeDescr, 'mM', 'M'), | ||
|
@@ -298,7 +301,7 @@ def english_upper(s): | |
Ufunc(2, 1, None, # Zero is only a unit to the right, not the left | ||
docstrings.get('numpy.core.umath.subtract'), | ||
'PyUFunc_SubtractionTypeResolver', | ||
TD(ints + inexact, simd=[('avx512f', cmplxvec),('avx2', ints)]), | ||
TD(ints + inexact, simd=[('avx2', ints)], dispatch=[('loops_arithm_fp', 'fdFD')]), | ||
[TypeDescription('M', FullTypeDescr, 'Mm', 'M'), | ||
TypeDescription('m', FullTypeDescr, 'mm', 'm'), | ||
TypeDescription('M', FullTypeDescr, 'MM', 'm'), | ||
|
@@ -309,7 +312,7 @@ def english_upper(s): | |
Ufunc(2, 1, One, | ||
docstrings.get('numpy.core.umath.multiply'), | ||
'PyUFunc_MultiplicationTypeResolver', | ||
TD(notimes_or_obj, simd=[('avx512f', cmplxvec),('avx2', ints)]), | ||
TD(notimes_or_obj, simd=[('avx2', ints)], dispatch=[('loops_arithm_fp', 'fdFD')]), | ||
[TypeDescription('m', FullTypeDescr, 'mq', 'm'), | ||
TypeDescription('m', FullTypeDescr, 'qm', 'm'), | ||
TypeDescription('m', FullTypeDescr, 'md', 'm'), | ||
|
@@ -333,10 +336,10 @@ def english_upper(s): | |
Ufunc(2, 1, None, # One is only a unit to the right, not the left | ||
docstrings.get('numpy.core.umath.true_divide'), | ||
'PyUFunc_TrueDivisionTypeResolver', | ||
TD(flts+cmplx), | ||
[TypeDescription('m', FullTypeDescr, 'mq', 'm'), | ||
TypeDescription('m', FullTypeDescr, 'md', 'm'), | ||
TypeDescription('m', FullTypeDescr, 'mm', 'd'), | ||
TD(flts+cmplx, cfunc_alias='divide', dispatch=[('loops_arithm_fp', 'fd')]), | ||
[TypeDescription('m', FullTypeDescr, 'mq', 'm', cfunc_alias='divide'), | ||
TypeDescription('m', FullTypeDescr, 'md', 'm', cfunc_alias='divide'), | ||
TypeDescription('m', FullTypeDescr, 'mm', 'd', cfunc_alias='divide'), | ||
], | ||
TD(O, f='PyNumber_TrueDivide'), | ||
), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it is better in general to use full names like "arithmetic" rather than shortened names. Seven character names are long gone :) This can be changed later though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will consider it a policy to follow in my upcoming patches.