diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h index 11a988163ad5..bd221547f66d 100644 --- a/numpy/core/include/numpy/npy_common.h +++ b/numpy/core/include/numpy/npy_common.h @@ -29,12 +29,12 @@ #endif /* compile target attributes */ -#ifdef HAVE_ATTRIBUTE_TARGET_AVX +#if defined HAVE_ATTRIBUTE_TARGET_AVX && defined HAVE_LINK_AVX #define NPY_GCC_TARGET_AVX __attribute__((target("avx"))) #else #define NPY_GCC_TARGET_AVX #endif -#ifdef HAVE_ATTRIBUTE_TARGET_AVX2 +#if defined HAVE_ATTRIBUTE_TARGET_AVX2 && defined HAVE_LINK_AVX2 #define NPY_GCC_TARGET_AVX2 __attribute__((target("avx2"))) #else #define NPY_GCC_TARGET_AVX2 diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 0b055dba460d..07c8478cd930 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -152,12 +152,14 @@ def check_funcs(funcs_name): for tup in OPTIONAL_INTRINSICS: headers = None if len(tup) == 2: - f, args = tup + f, args, m = tup[0], tup[1], fname2def(tup[0]) + elif len(tup) == 3: + f, args, headers, m = tup[0], tup[1], [tup[2]], fname2def(tup[0]) else: - f, args, headers = tup[0], tup[1], [tup[2]] + f, args, headers, m = tup[0], tup[1], [tup[2]], fname2def(tup[3]) if config.check_func(f, decl=False, call=True, call_args=args, headers=headers): - moredefs.append((fname2def(f), 1)) + moredefs.append((m, 1)) for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES: if config.check_gcc_function_attribute(dec, fn): diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index cc2047b45bbe..a97b02645827 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -116,7 +116,7 @@ def check_api_version(apiversion, codegen_dir): ] # optional gcc compiler builtins and their call arguments and optional a -# required header +# required header and definition name (HAVE_ prepended) # call arguments are required as the compiler will do strict signature checking OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'), ("__builtin_isinf", '5.'), @@ -125,12 +125,19 @@ def check_api_version(apiversion, codegen_dir): ("__builtin_bswap64", '5u'), ("__builtin_expect", '5, 0'), ("__builtin_mul_overflow", '5, 5, (int*)5'), - ("__builtin_cpu_supports", '"sse"'), + # broken on OSX 10.11, make sure its not optimized away + ("volatile int r = __builtin_cpu_supports", '"sse"', + "stdio.h", "__BUILTIN_CPU_SUPPORTS"), ("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE ("_mm_prefetch", '(float*)0, _MM_HINT_NTA', "xmmintrin.h"), # SSE ("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2 ("__builtin_prefetch", "(float*)0, 0, 3"), + # check that the linker can handle avx + ("__asm__ volatile", '"vpand %xmm1, %xmm2, %xmm3"', + "stdio.h", "LINK_AVX"), + ("__asm__ volatile", '"vpand %ymm1, %ymm2, %ymm3"', + "stdio.h", "LINK_AVX2"), ] # function attributes