8000 Merge pull request #13421 from seiko2plus/core_improve_infa_runtime · numpy/numpy@fed1fb4 · GitHub
[go: up one dir, main page]

Skip to content

Commit fed1fb4

Browse files
authored
Merge pull request #13421 from seiko2plus/core_improve_infa_runtime
ENH: improve runtime detection of CPU features
2 parents b69cf68 + 64f7074 commit fed1fb4

File tree

11 files changed

+651
-114
lines changed

11 files changed

+651
-114
lines changed

doc/release/13421.improvement.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Improve detection of CPU features
2+
=================================
3+
4+
Replace ``npy_cpu_supports`` which was a gcc-specific mechanism to test support
5+
of avx with more general functions ``npy_cpu_init`` and ``npy_cpu_have``, and
6+
expose the results via a ``NPY_CPU_HAVE`` c-macro as well as a python-level
7+
``__cpu_features__`` dictionary.
8+

numpy/core/code_generators/generate_umath.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,7 +1014,7 @@ def make_arrays(funcdict):
10141014
for vt in t.simd:
10151015
code2list.append(textwrap.dedent("""\
10161016
#ifdef HAVE_ATTRIBUTE_TARGET_{ISA}
1017-
if (npy_cpu_supports("{isa}")) {{
1017+
if (NPY_CPU_HAVE({ISA})) {{
10181018
{fname}_functions[{idx}] = {type}_{fname}_{isa};
10191019
}}
10201020
#endif
@@ -1138,7 +1138,6 @@ def make_code(funcdict, filename):
11381138
11391139
Please make changes to the code generator program (%s)
11401140
**/
1141-
#include "cpuid.h"
11421141
#include "ufunc_object.h"
11431142
#include "ufunc_type_resolution.h"
11441143
#include "loops.h"

numpy/core/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,7 @@ def get_mathlib_info(*args):
745745
join('src', 'common', 'ucsnarrow.c'),
746746
join('src', 'common', 'ufunc_override.c'),
747747
join('src', 'common', 'numpyos.c'),
748+
join('src', 'common', 'npy_cpu_features.c.src'),
748749
]
749750

750751
if os.environ.get('NPY_USE_BLAS_ILP64', "0") != "0":
@@ -898,7 +899,6 @@ def generate_umath_c(ext, build_dir):
898899
join('src', 'umath', 'clip.c.src'),
899900
join('src', 'umath', 'ufunc_object.c'),
900901
join('src', 'umath', 'extobj.c'),
901-
join('src', 'umath', 'cpuid.c'),
902902
join('src', 'umath', 'scalarmath.c.src'),
903903
join('src', 'umath', 'ufunc_type_resolution.c'),
904904
join('src', 'umath', 'override.c'),

numpy/core/setup_common.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,11 +132,6 @@ def check_api_version(apiversion, codegen_dir):
132132
("__builtin_bswap64", '5u'),
133133
("__builtin_expect", '5, 0'),
134134
("__builtin_mul_overflow", '5, 5, (int*)5'),
135-
# broken on OSX 10.11, make sure its not optimized away
136-
("volatile int r = __builtin_cpu_supports", '"sse"',
137-
"stdio.h", "__BUILTIN_CPU_SUPPORTS"),
138-
("volatile int r = __builtin_cpu_supports", '"avx512f"',
139-
"stdio.h", "__BUILTIN_CPU_SUPPORTS_AVX512F"),
140135
# MMX only needed for icc, but some clangs don't have it
141136
("_m_from_int64", '0', "emmintrin.h"),
142137
("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE

numpy/core/src/common/npy_config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define _NPY_NPY_CONFIG_H_
33

44
#include "config.h"
5+
#include "npy_cpu_features.h"
56
#include "numpy/numpyconfig.h"
67
#include "numpy/npy_cpu.h"
78
#include "numpy/npy_os.h"

0 commit comments

Comments
 (0)
0