diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c6171571857af6..05f20e12f4653d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -260,7 +260,7 @@ jobs: free-threading: ${{ matrix.free-threading }} os: ${{ matrix.os }} - build-ubuntu-ssltests: + build-ubuntu-ssltests-openssl: name: 'Ubuntu SSL tests with OpenSSL' runs-on: ${{ matrix.os }} timeout-minutes: 60 @@ -322,6 +322,81 @@ jobs: - name: SSL tests run: ./python Lib/test/ssltests.py + build-ubuntu-ssltests-awslc: + name: 'Ubuntu SSL tests with AWS-LC' + runs-on: ${{ matrix.os }} + timeout-minutes: 60 + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + strategy: + fail-fast: false + matrix: + os: [ubuntu-24.04] + awslc_ver: [1.55.0] + env: + AWSLC_VER: ${{ matrix.awslc_ver}} + MULTISSL_DIR: ${{ github.workspace }}/multissl + OPENSSL_DIR: ${{ github.workspace }}/multissl/aws-lc/${{ matrix.awslc_ver }} + LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/aws-lc/${{ matrix.awslc_ver }}/lib + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Runner image version + run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" + - name: Restore config.cache + uses: actions/cache@v4 + with: + path: config.cache + key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Install dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Configure SSL lib env vars + run: | + echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" + echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/aws-lc/${AWSLC_VER}" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/aws-lc/${AWSLC_VER}/lib" >> "$GITHUB_ENV" + - name: 'Restore AWS-LC build' + id: cache-aws-lc + uses: actions/cache@v4 + with: + path: ./multissl/aws-lc/${{ matrix.awslc_ver }} + key: ${{ matrix.os }}-multissl-aws-lc-${{ matrix.awslc_ver }} + - name: Install AWS-LC + if: steps.cache-aws-lc.outputs.cache-hit != 'true' + run: | + python3 Tools/ssl/multissltests.py \ + --steps=library \ + --base-directory "$MULTISSL_DIR" \ + --awslc ${{ matrix.awslc_ver }} \ + --system Linux + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + with: + save: false + - name: Configure CPython + run: | + ./configure CFLAGS="-fdiagnostics-format=json" \ + --config-cache \ + --enable-slower-safety \ + --with-pydebug \ + --with-openssl="$OPENSSL_DIR" \ + --with-builtin-hashlib-hashes=blake2 \ + --with-ssl-default-suites=openssl + - name: Build CPython + run: make -j + - name: Display build info + run: make pythoninfo + - name: Verify python is linked to AWS-LC + run: ./python -c 'import ssl; print(ssl.OPENSSL_VERSION)' | grep AWS-LC + - name: SSL tests + run: ./python Lib/test/ssltests.py + build-wasi: name: 'WASI' needs: build-context @@ -628,7 +703,8 @@ jobs: - build-windows-msi - build-macos - build-ubuntu - - build-ubuntu-ssltests + - build-ubuntu-ssltests-awslc + - build-ubuntu-ssltests-openssl - build-wasi - test-hypothesis - build-asan @@ -643,7 +719,8 @@ jobs: with: allowed-failures: >- build-windows-msi, - build-ubuntu-ssltests, + build-ubuntu-ssltests-awslc, + build-ubuntu-ssltests-openssl, test-hypothesis, cifuzz, allowed-skips: >- @@ -661,7 +738,8 @@ jobs: check-generated-files, build-macos, build-ubuntu, - build-ubuntu-ssltests, + build-ubuntu-ssltests-awslc, + build-ubuntu-ssltests-openssl, build-wasi, test-hypothesis, build-asan, diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h new file mode 100644 index 00000000000000..059653c844394f --- /dev/null +++ b/Include/internal/pycore_cpuinfo.h @@ -0,0 +1,179 @@ +/* + * @author Bénédikt Tran + * + * Interface for detecting the different CPUID flags in an opaque manner. + * See https://en.wikipedia.org/wiki/CPUID for details on the bit values. + * + * If a module requires to support SIMD instructions, it should determine + * the compiler flags and the instruction sets required for the intrinsics + * to work. + * + * For the headers and expected CPUID bits needed by Intel intrinsics, see + * https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html. + */ + +#ifndef Py_INTERNAL_CPUINFO_H +#define Py_INTERNAL_CPUINFO_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "Python.h" +#include "pycore_cpuinfo_cpuid_features.h" +#include "pycore_cpuinfo_xsave_features.h" + +typedef struct _Py_cpuid_features_s { + uint32_t maxleaf; + /* + * Macro to declare a member flag of '_Py_cpuid_features' as a uint8_t. + * Whenever this macro is used, do not forget to update the number of + * fields and the bitsize of the 'ready' member (see structure end). + */ +#define _Py_CPUID_DECL_FLAG(MEMBER_NAME) uint8_t MEMBER_NAME:1 + // --- Streaming SIMD Extensions ------------------------------------------ + _Py_CPUID_DECL_FLAG(sse); + _Py_CPUID_DECL_FLAG(sse2); + _Py_CPUID_DECL_FLAG(sse3); + _Py_CPUID_DECL_FLAG(ssse3); // Supplemental SSE3 instructions + _Py_CPUID_DECL_FLAG(sse41); // SSE4.1 + _Py_CPUID_DECL_FLAG(sse42); // SSE4.2 + + // --- Advanced Vector Extensions ----------------------------------------- + _Py_CPUID_DECL_FLAG(avx); + _Py_CPUID_DECL_FLAG(avx_ifma); + _Py_CPUID_DECL_FLAG(avx_ne_convert); + + _Py_CPUID_DECL_FLAG(avx_vnni); + _Py_CPUID_DECL_FLAG(avx_vnni_int8); + _Py_CPUID_DECL_FLAG(avx_vnni_int16); + + // --- Advanced Vector Extensions 2 --------------------------------------- + _Py_CPUID_DECL_FLAG(avx2); + + // --- Advanced Vector Extensions (512-bit) ------------------------------- + /* + * AVX-512 instruction set are grouped by the processor generation + * that implements them (see https://en.wikipedia.org/wiki/AVX-512). + * + * We do not include GFNI, VPCLMULQDQ and VAES instructions since + * they are not exactly AVX-512 per se, nor do we include BF16 or + * FP16 since they operate on bfloat16 and binary16 (half-float). + * + * See https://en.wikipedia.org/wiki/AVX-512#Instruction_set for + * the suffix meanings (for instance 'f' stands for 'Foundation'). + */ + _Py_CPUID_DECL_FLAG(avx512_f); + _Py_CPUID_DECL_FLAG(avx512_cd); + + _Py_CPUID_DECL_FLAG(avx512_er); + _Py_CPUID_DECL_FLAG(avx512_pf); + + _Py_CPUID_DECL_FLAG(avx512_4fmaps); + _Py_CPUID_DECL_FLAG(avx512_4vnniw); + + _Py_CPUID_DECL_FLAG(avx512_vpopcntdq); + + _Py_CPUID_DECL_FLAG(avx512_vl); + _Py_CPUID_DECL_FLAG(avx512_dq); + _Py_CPUID_DECL_FLAG(avx512_bw); + + _Py_CPUID_DECL_FLAG(avx512_ifma); + _Py_CPUID_DECL_FLAG(avx512_vbmi); + + _Py_CPUID_DECL_FLAG(avx512_vnni); + + _Py_CPUID_DECL_FLAG(avx512_vbmi2); + _Py_CPUID_DECL_FLAG(avx512_bitalg); + + _Py_CPUID_DECL_FLAG(avx512_vp2intersect); + + // --- Instructions ------------------------------------------------------- + _Py_CPUID_DECL_FLAG(cmov); + _Py_CPUID_DECL_FLAG(fma); + _Py_CPUID_DECL_FLAG(popcnt); + _Py_CPUID_DECL_FLAG(pclmulqdq); + + _Py_CPUID_DECL_FLAG(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV + _Py_CPUID_DECL_FLAG(osxsave); // XSAVE is enabled by the OS + + // --- XCR0 register bits ------------------------------------------------- + _Py_CPUID_DECL_FLAG(xcr0_sse); + // On some Intel CPUs, it is possible for the CPU to support AVX2 + // instructions even though the underlying OS does not know about + // AVX. In particular, only (SSE) XMM registers will be saved and + // restored on context-switch, but not (AVX) YMM registers. + _Py_CPUID_DECL_FLAG(xcr0_avx); + _Py_CPUID_DECL_FLAG(xcr0_avx512_opmask); + _Py_CPUID_DECL_FLAG(xcr0_avx512_zmm_hi256); + _Py_CPUID_DECL_FLAG(xcr0_avx512_hi16_zmm); +#undef _Py_CPUID_DECL_FLAG + // Whenever a field is added or removed above, update the + // number of fields (40) and adjust the bitsize of 'ready' + // so that the size of this structure is a multiple of 8. + uint8_t ready; // set if the structure is ready for usage +} _Py_cpuid_features; + +/* + * Explicitly set all members to zero to guarantee that + * we never have a non-initialized attribute at runtime + * which could lead to an illegal instruction error. + * + * This readiness state of 'flags' is ignored and left untouched. + * + * Note: This function does not set any exception and thus never fails. + */ +PyAPI_FUNC(void) +_Py_cpuid_disable_features(_Py_cpuid_features *flags); + +/* + * Check whether the structure is ready and flags are inter-compatible, + * returning 1 on success and 0 otherwise. + * + * The caller should disable all CPUID detected features if the check + * fails to avoid encountering runtime illegal instruction errors. + * + * Note: This function does not set any exception and thus never fails. + */ +PyAPI_FUNC(int) +_Py_cpuid_check_features(const _Py_cpuid_features *flags); + +/* + * Return 1 if all expected flags are set in 'actual', 0 otherwise. + * + * If 'actual' or 'expect' are not ready yet, this also returns 0. + * + * Note: This function does not set any exception and thus never fails. + */ +PyAPI_FUNC(int) +_Py_cpuid_has_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect); + +/* + * Return 1 if 'actual' and 'expect' are identical, 0 otherwise. + * + * If 'actual' or 'expect' are not ready yet, this also returns 0. + * + * Note: This function does not set any exception and thus never fails. + */ +PyAPI_FUNC(int) +_Py_cpuid_match_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect); + +/* + * Detect the available host features, storing the result in 'flags'. + * + * Note: This function does not set any exception and thus never fails. + */ +PyAPI_FUNC(void) +_Py_cpuid_detect_features(_Py_cpuid_features *flags); + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_INTERNAL_CPUINFO_H */ diff --git a/Include/internal/pycore_cpuinfo_cpuid_features.h b/Include/internal/pycore_cpuinfo_cpuid_features.h new file mode 100644 index 00000000000000..8db54e7af37fb1 --- /dev/null +++ b/Include/internal/pycore_cpuinfo_cpuid_features.h @@ -0,0 +1,101 @@ +/** + * @author Bénédikt Tran + * @seealso @file Tools/cpuinfo/libcpuinfo/features/cpuid.py + * + * The enumeration describes masks to apply on CPUID output registers. + * + * Member names are _Py_CPUID_MASK__L[S]_, + * where <> (resp. []) denotes a required (resp. optional) group and: + * + * - REGISTER is EAX, EBX, ECX or EDX, + * - LEAF is the initial value of the EAX register (1 or 7), + * - SUBLEAF is the initial value of the ECX register (omitted if 0), and + * - FEATURE is a SIMD feature (with one or more specialized instructions). + * + * For maintainability, the flags are ordered by registers, leafs, subleafs, + * and bits. See https://en.wikipedia.org/wiki/CPUID for the values. + * + * Note 1: The LEAF is also called the 'page' or the 'level'. + * Note 2: The SUBLEAF is also referred to as the 'count'. + * + * The LEAF value should only 1 or 7 as other values may have different + * meanings depending on the underlying architecture. + */ + +#ifndef Py_INTERNAL_CPUINFO_CPUID_FEATURES_H +#define Py_INTERNAL_CPUINFO_CPUID_FEATURES_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "Python.h" + +/*[python input] +import os, sys +sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools/cpuinfo"))) +from libcpuinfo.features.cpuid import make_cpuid_features_constants +print(make_cpuid_features_constants()) +[python start generated code]*/ +// clang-format off +/** Constants for CPUID features */ +/* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ +#define _Py_CPUID_MASK_ECX_L1_SSE3 0x00000001 // bit = 0 +#define _Py_CPUID_MASK_ECX_L1_PCLMULQDQ 0x00000002 // bit = 1 +#define _Py_CPUID_MASK_ECX_L1_SSSE3 0x00000200 // bit = 9 +#define _Py_CPUID_MASK_ECX_L1_FMA 0x00001000 // bit = 12 +#define _Py_CPUID_MASK_ECX_L1_SSE4_1 0x00080000 // bit = 19 +#define _Py_CPUID_MASK_ECX_L1_SSE4_2 0x00100000 // bit = 20 +#define _Py_CPUID_MASK_ECX_L1_POPCNT 0x00800000 // bit = 23 +#define _Py_CPUID_MASK_ECX_L1_XSAVE 0x04000000 // bit = 26 +#define _Py_CPUID_MASK_ECX_L1_OSXSAVE 0x08000000 // bit = 27 +#define _Py_CPUID_MASK_ECX_L1_AVX 0x10000000 // bit = 28 + +/* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ +#define _Py_CPUID_MASK_EDX_L1_CMOV 0x00008000 // bit = 15 +#define _Py_CPUID_MASK_EDX_L1_SSE 0x02000000 // bit = 25 +#define _Py_CPUID_MASK_EDX_L1_SSE2 0x04000000 // bit = 26 + +/* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ +#define _Py_CPUID_MASK_EBX_L7_AVX2 0x00000020 // bit = 5 +#define _Py_CPUID_MASK_EBX_L7_AVX512_F 0x00010000 // bit = 16 +#define _Py_CPUID_MASK_EBX_L7_AVX512_DQ 0x00020000 // bit = 17 +#define _Py_CPUID_MASK_EBX_L7_AVX512_IFMA 0x00200000 // bit = 21 +#define _Py_CPUID_MASK_EBX_L7_AVX512_PF 0x04000000 // bit = 26 +#define _Py_CPUID_MASK_EBX_L7_AVX512_ER 0x08000000 // bit = 27 +#define _Py_CPUID_MASK_EBX_L7_AVX512_CD 0x10000000 // bit = 28 +#define _Py_CPUID_MASK_EBX_L7_AVX512_BW 0x40000000 // bit = 30 +#define _Py_CPUID_MASK_EBX_L7_AVX512_VL 0x80000000 // bit = 31 + +/* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ +#define _Py_CPUID_MASK_ECX_L7_AVX512_VBMI 0x00000002 // bit = 1 +#define _Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 0x00000040 // bit = 6 +#define _Py_CPUID_MASK_ECX_L7_AVX512_VNNI 0x00000800 // bit = 11 +#define _Py_CPUID_MASK_ECX_L7_AVX512_BITALG 0x00001000 // bit = 12 +#define _Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ 0x00004000 // bit = 14 + +/* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ +#define _Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW 0x00000004 // bit = 2 +#define _Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS 0x00000008 // bit = 3 +#define _Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT 0x00000100 // bit = 8 + +/* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ +#define _Py_CPUID_MASK_EAX_L7S1_AVX_VNNI 0x00000010 // bit = 4 +#define _Py_CPUID_MASK_EAX_L7S1_AVX_IFMA 0x00800000 // bit = 23 + +/* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ +#define _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 0x00000010 // bit = 4 +#define _Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT 0x00000020 // bit = 5 +#define _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 0x00000400 // bit = 10 +// clang-format on +/*[python end generated code: output=e9112f064e2effec input=71ec6b4356052ec3]*/ + +#ifdef __cplusplus +} +#endif + +#endif // !Py_INTERNAL_CPUINFO_CPUID_FEATURES_H diff --git a/Include/internal/pycore_cpuinfo_xsave_features.h b/Include/internal/pycore_cpuinfo_xsave_features.h new file mode 100644 index 00000000000000..e8719261b07604 --- /dev/null +++ b/Include/internal/pycore_cpuinfo_xsave_features.h @@ -0,0 +1,43 @@ +/** + * @author Bénédikt Tran + * @seealso @file Tools/cpuinfo/libcpuinfo/features/xsave.py + * + * XSAVE state components (XCR0 control register). + * + * See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. + */ + +#ifndef Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H +#define Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "Python.h" + +/*[python input] +import os, sys +sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools/cpuinfo"))) +from libcpuinfo.features.xsave import make_xsave_features_constants +print(make_xsave_features_constants()) +[python start generated code]*/ +// clang-format off +/** Constants for XSAVE components */ +#define _Py_XSAVE_MASK_XCR0_SSE 0x00000002 // bit = 1 +#define _Py_XSAVE_MASK_XCR0_AVX 0x00000004 // bit = 2 +#define _Py_XSAVE_MASK_XCR0_AVX512_OPMASK 0x00000020 // bit = 5 +#define _Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 0x00000040 // bit = 6 +#define _Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM 0x00000080 // bit = 7 +// clang-format on +/*[python end generated code: output=ac059b802b4317cb input=0a1b0774d3271477]*/ + +#ifdef __cplusplus +} +#endif + +#endif // !Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H diff --git a/Makefile.pre.in b/Makefile.pre.in index 66b34b779f27cb..5c3467a5e33446 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -435,6 +435,7 @@ PYTHON_OBJS= \ Python/codegen.o \ Python/compile.o \ Python/context.o \ + Python/cpuinfo.o \ Python/critical_section.o \ Python/crossinterp.o \ Python/dynamic_annotations.o \ @@ -1294,6 +1295,9 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_complexobject.h \ $(srcdir)/Include/internal/pycore_condvar.h \ $(srcdir)/Include/internal/pycore_context.h \ + $(srcdir)/Include/internal/pycore_cpuinfo.h \ + $(srcdir)/Include/internal/pycore_cpuinfo_cpuid_features.h \ + $(srcdir)/Include/internal/pycore_cpuinfo_xsave_features.h \ $(srcdir)/Include/internal/pycore_critical_section.h \ $(srcdir)/Include/internal/pycore_crossinterp.h \ $(srcdir)/Include/internal/pycore_crossinterp_data_registry.h \ @@ -1934,6 +1938,9 @@ Python/sysmodule.o: $(srcdir)/Python/sysmodule.c Makefile $(srcdir)/Include/pydt $(MULTIARCH_CPPFLAGS) \ -o $@ $(srcdir)/Python/sysmodule.c +Python/cpuinfo.o: $(srcdir)/Python/cpuinfo.c Makefile + $(CC) -c $(PY_CORE_CFLAGS) @CORE_CPUINFO_CFLAGS@ -o $@ $(srcdir)/Python/cpuinfo.c + $(IO_OBJS): $(IO_H) .PHONY: regen-pegen-metaparser diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 163f238a4268d0..2f8baea62d77fc 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -16,29 +16,11 @@ #include "Python.h" #include "hashlib.h" -#include "pycore_strhex.h" // _Py_strhex() +#include "pycore_cpuinfo.h" // _Py_cpuid_features +#include "pycore_strhex.h" // _Py_strhex() #include "pycore_typeobject.h" #include "pycore_moduleobject.h" -// QUICK CPU AUTODETECTION -// -// See https://github.com/python/cpython/pull/119316 -- we only enable -// vectorized versions for Intel CPUs, even though HACL*'s "vec128" modules also -// run on ARM NEON. (We could enable them on POWER -- but I don't have access to -// a test machine to see if that speeds anything up.) -// -// Note that configure.ac and the rest of the build are written in such a way -// that if the configure script finds suitable flags to compile HACL's SIMD128 -// (resp. SIMD256) files, then Hacl_Hash_Blake2b_Simd128.c (resp. ...) will be -// pulled into the build automatically, and then only the CPU autodetection will -// need to be updated here. - -#if defined(__x86_64__) && defined(__GNUC__) -#include -#elif defined(_M_X64) -#include -#endif - #include // SIMD256 can't be compiled on macOS ARM64, and performance of SIMD128 isn't @@ -129,75 +111,19 @@ _blake2_free(void *module) static void blake2module_init_cpu_features(Blake2State *state) { - /* This must be kept in sync with hmacmodule_init_cpu_features() - * in hmacmodule.c */ - int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; - int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; -#if defined(__x86_64__) && defined(__GNUC__) - __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); - __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); -#elif defined(_M_X64) - int info1[4] = {0}; - __cpuidex(info1, 1, 0); - eax1 = info1[0], ebx1 = info1[1], ecx1 = info1[2], edx1 = info1[3]; - - int info7[4] = {0}; - __cpuidex(info7, 7, 0); - eax7 = info7[0], ebx7 = info7[1], ecx7 = info7[2], edx7 = info7[3]; -#endif - // fmt: off - (void)eax1; (void)ebx1; (void)ecx1; (void)edx1; - (void)eax7; (void)ebx7; (void)ecx7; (void)edx7; - // fmt: on - -#define EBX_AVX2 (1 << 5) -#define ECX_SSE3 (1 << 0) -#define ECX_SSSE3 (1 << 9) -#define ECX_SSE4_1 (1 << 19) -#define ECX_SSE4_2 (1 << 20) -#define ECX_AVX (1 << 28) -#define EDX_SSE (1 << 25) -#define EDX_SSE2 (1 << 26) -#define EDX_CMOV (1 << 15) - - bool avx = (ecx1 & ECX_AVX) != 0; - bool avx2 = (ebx7 & EBX_AVX2) != 0; - - bool sse = (edx1 & EDX_SSE) != 0; - bool sse2 = (edx1 & EDX_SSE2) != 0; - bool cmov = (edx1 & EDX_CMOV) != 0; - - bool sse3 = (ecx1 & ECX_SSE3) != 0; - bool sse41 = (ecx1 & ECX_SSE4_1) != 0; - bool sse42 = (ecx1 & ECX_SSE4_2) != 0; - -#undef EDX_CMOV -#undef EDX_SSE2 -#undef EDX_SSE -#undef ECX_AVX -#undef ECX_SSE4_2 -#undef ECX_SSE4_1 -#undef ECX_SSSE3 -#undef ECX_SSE3 -#undef EBX_AVX2 - + _Py_cpuid_features flags; + _Py_cpuid_detect_features(&flags); #if _Py_HACL_CAN_COMPILE_VEC128 - // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection - state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; + state->can_run_simd128 = flags.sse && flags.sse2 && flags.sse3 + && flags.sse41 && flags.sse42 + && flags.cmov; #else - // fmt: off - (void)sse; (void)sse2; (void)sse3; (void)sse41; (void)sse42; (void)cmov; - // fmt: on state->can_run_simd128 = false; #endif #if _Py_HACL_CAN_COMPILE_VEC256 - // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection - state->can_run_simd256 = state->can_run_simd128 && avx && avx2; + state->can_run_simd256 = flags.avx && flags.avx2; #else - // fmt: off - (void)avx; (void)avx2; - // fmt: on state->can_run_simd256 = false; #endif } diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 95e400231bb65c..064e31fe830deb 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -17,6 +17,7 @@ #endif #include "Python.h" +#include "pycore_cpuinfo.h" // _Py_cpuid_features #include "pycore_hashtable.h" #include "pycore_strhex.h" // _Py_strhex() @@ -1552,73 +1553,19 @@ hmacmodule_init_globals(PyObject *module, hmacmodule_state *state) static void hmacmodule_init_cpu_features(hmacmodule_state *state) { - int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; - int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; -#if defined(__x86_64__) && defined(__GNUC__) - __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); - __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); -#elif defined(_M_X64) - int info1[4] = {0}; - __cpuidex(info1, 1, 0); - eax1 = info1[0], ebx1 = info1[1], ecx1 = info1[2], edx1 = info1[3]; - - int info7[4] = {0}; - __cpuidex(info7, 7, 0); - eax7 = info7[0], ebx7 = info7[1], ecx7 = info7[2], edx7 = info7[3]; -#endif - // fmt: off - (void)eax1; (void)ebx1; (void)ecx1; (void)edx1; - (void)eax7; (void)ebx7; (void)ecx7; (void)edx7; - // fmt: on - -#define EBX_AVX2 (1 << 5) -#define ECX_SSE3 (1 << 0) -#define ECX_SSSE3 (1 << 9) -#define ECX_SSE4_1 (1 << 19) -#define ECX_SSE4_2 (1 << 20) -#define ECX_AVX (1 << 28) -#define EDX_SSE (1 << 25) -#define EDX_SSE2 (1 << 26) -#define EDX_CMOV (1 << 15) - - bool avx = (ecx1 & ECX_AVX) != 0; - bool avx2 = (ebx7 & EBX_AVX2) != 0; - - bool sse = (edx1 & EDX_SSE) != 0; - bool sse2 = (edx1 & EDX_SSE2) != 0; - bool cmov = (edx1 & EDX_CMOV) != 0; - - bool sse3 = (ecx1 & ECX_SSE3) != 0; - bool sse41 = (ecx1 & ECX_SSE4_1) != 0; - bool sse42 = (ecx1 & ECX_SSE4_2) != 0; - -#undef EDX_CMOV -#undef EDX_SSE2 -#undef EDX_SSE -#undef ECX_AVX -#undef ECX_SSE4_2 -#undef ECX_SSE4_1 -#undef ECX_SSSE3 -#undef ECX_SSE3 -#undef EBX_AVX2 - + _Py_cpuid_features flags; + _Py_cpuid_detect_features(&flags); #if _Py_HACL_CAN_COMPILE_VEC128 - // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection - state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; + state->can_run_simd128 = flags.sse && flags.sse2 && flags.sse3 + && flags.sse41 && flags.sse42 + && flags.cmov; #else - // fmt: off - (void)sse; (void)sse2; (void)sse3; (void)sse41; (void)sse42; (void)cmov; - // fmt: on state->can_run_simd128 = false; #endif #if _Py_HACL_CAN_COMPILE_VEC256 - // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection - state->can_run_simd256 = state->can_run_simd128 && avx && avx2; + state->can_run_simd256 = flags.avx && flags.avx2; #else - // fmt: off - (void)avx; (void)avx2; - // fmt: on state->can_run_simd256 = false; #endif } diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index b911c9385634d7..2d843ea3bff576 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -229,6 +229,9 @@ + + + @@ -595,6 +598,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 0e6d42cc959ba5..8afc2010ef93ca 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -600,6 +600,15 @@ Include\internal + + Include\internal + + + Include\internal + + + Include\internal + Include\internal @@ -1349,6 +1358,9 @@ Python + + Source Files + Python diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c new file mode 100644 index 00000000000000..8b7acca3b37e73 --- /dev/null +++ b/Python/cpuinfo.c @@ -0,0 +1,596 @@ +#include "pycore_cpuinfo.h" + +/* Check one or more CPUID register bits. */ +#define CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) +#define CPUID_CHECK_REG(REG, FEAT) CHECK_REG(REG, (_Py_CPUID_MASK_ ## FEAT)) +#define XSAVE_CHECK_REG(REG, FEAT) CHECK_REG(REG, (_Py_XSAVE_MASK_ ## FEAT)) + +// For now, we only try to enable SIMD instructions for x86-64 Intel CPUs. +// In the future, we should carefully enable support for ARM NEON and POWER +// as well as AMD. See https://sourceforge.net/p/predef/wiki/Architectures. +#define HAS_CPUID_SUPPORT +#if defined(__x86_64__) && defined(__GNUC__) +# include // __cpuid_count() +# include // _xgetbv() +#elif defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64) +# include // __cpuidex() +# include // _xgetbv() +#else +# undef HAS_CPUID_SUPPORT +#endif + +// Below, we declare macros for guarding the detection of SSE, AVX/AVX2 +// and AVX-512 instructions. If the compiler does not even recognize the +// corresponding flags or if we are not on an 64-bit platform we do not +// even try to inspect the output of CPUID for those specific features. +#ifdef HAS_CPUID_SUPPORT +#if defined(_Py_CPUINFO_USE_XGETBV_FUNC) \ + || defined(_Py_CPUINFO_USE_XGETBV_OPCODE) +# define HAS_XGETBV_SUPPORT +#endif + +#undef HAS_XGETBV_SUPPORT + +#if defined(_Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ + // macros above should be sorted in alphabetical order +# define SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD +#endif + +#if defined(_Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ + // macros above should be sorted in alphabetical order +# define SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#endif + +#if defined(_Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) +# define SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD +#endif + +#if defined(_Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ + // macros above should be sorted in alphabetical order +# define SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +#endif +#endif // HAS_CPUID_SUPPORT + +// On macOS, checking the XCR0 register is NOT a guaranteed way +// to ensure the usability of AVX-512. As such, we disable the +// entire set of AVX-512 instructions. +// +// See https://stackoverflow.com/a/72523150/9579194. +#if defined(__APPLE__) +# undef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD + // Additionally, AVX2 cannot be compiled on macOS ARM64 (yet it can be + // compiled on x86_64). However, since autoconf incorrectly assumes so + // when compiling a universal2 binary, we disable SIMD on such builds. +# if defined(__aarch64__) || defined(__arm64__) +# undef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +# undef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD +# endif +#endif + +// Below, we declare macros indicating how CPUID can be called at runtime, +// so that we only call CPUID with specific inputs when needed. + +#if defined(SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD) \ + || defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) +/* Indicate that cpuid should be called once with EAX=1 and ECX=0. */ +# ifndef HAS_CPUID_SUPPORT +# error "HAS_CPUID_SUPPORT must be set" +# endif +# define SHOULD_PARSE_CPUID_L1 +#endif + +#if defined(SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD) \ + || defined(SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD) +/* Indicate that cpuid should be called once with EAX=7 and ECX=0. */ +# ifndef HAS_CPUID_SUPPORT +# error "HAS_CPUID_SUPPORT must be set" +# endif +# define SHOULD_PARSE_CPUID_L7 +# define SHOULD_PARSE_CPUID_L7S0 +#endif + +#if defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) +/* Indicate that cpuid should be called once with EAX=7 and ECX=1. */ +# ifndef HAS_CPUID_SUPPORT +# error "HAS_CPUID_SUPPORT must be set" +# endif +# define SHOULD_PARSE_CPUID_L7 +# define SHOULD_PARSE_CPUID_L7S1 +#endif + +#if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) +#error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" +#endif +#if defined(SHOULD_PARSE_CPUID_L7S1) && !defined(SHOULD_PARSE_CPUID_L7) +#error "SHOULD_PARSE_CPUID_L7S1 requires SHOULD_PARSE_CPUID_L7" +#endif + +/* + * Call __cpuid_count() or equivalent and get + * its EAX, EBX, ECX and EDX output registers. + * + * If CPUID is not supported, registers are set to 0. + */ +static void +get_cpuid_info(uint32_t level /* input eax */, + uint32_t count /* input ecx */, + uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) +{ + *eax = *ebx = *ecx = *edx = 0; // ensure the output to be initialized +#if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) + uint32_t r_eax = 0, r_ebx = 0, r_ecx = 0, r_edx = 0; + __cpuid_count(level, count, r_eax, r_ebx, r_ecx, r_edx); + *eax = r_eax, *ebx = r_ebx, *ecx = r_ecx, *edx = r_edx; +#elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) + uint32_t info[4] = {0}; + __cpuidex(info, level, count); + *eax = info[0], *ebx = info[1], *ecx = info[2], *edx = info[3]; +#else + (void)level, (void)count; +#endif +} + +#if defined(HAS_XGETBV_SUPPORT) && defined(SHOULD_PARSE_CPUID_L1) +static uint64_t /* should only be used after calling cpuid(1, 0, ...) */ +get_xgetbv(uint32_t index) +{ + assert(index == 0); // only XCR0 is supported for now +#if defined(_Py_CPUINFO_USE_XGETBV_FUNC) + /* directly use the compiler's helper if -mxsave is available */ + return (uint64_t)_xgetbv(index); +#elif defined(__x86_64__) && defined(__GNUC__) + uint32_t eax = 0, edx = 0; + __asm__ volatile( + /* raw opcode for xgetbv for compatibility with older toolchains */ + ".byte 0x0f, 0x01, 0xd0" + : "=a" (eax), "=d" (edx) + : "c" (index) + ); + return ((uint64_t)edx << 32) | eax; +#elif defined(_M_X64) + return (uint64_t)_xgetbv(index); +#else + (void)index; + return 0; +#endif +} +#else +#define get_xgetbv(_INDEX) 0 +#endif + +/* Highest Function Parameter and Manufacturer ID (LEAF=0, SUBLEAF=0). */ +static uint32_t +detect_cpuid_maxleaf(void) +{ + uint32_t maxleaf = 0, _ebx = 0, _ecx = 0, _edx = 0; + get_cpuid_info(0, 0, &maxleaf, &_ebx, &_ecx, &_edx); + return maxleaf; +} + +/* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ +#ifdef SHOULD_PARSE_CPUID_L1 +static void /* should only be used after calling cpuid(1, 0, ...) */ +detect_cpuid_features(_Py_cpuid_features *flags, uint32_t ecx, uint32_t edx) +{ + assert(flags->ready == 0); + assert(flags->maxleaf >= 1); + (void)flags, (void)ecx, (void)edx; // silence -Wunused-parameter + // Keep the ordering and newlines as they are declared in the structure. +#ifdef SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD +#ifdef _Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS + flags->sse = CPUID_CHECK_REG(edx, EDX_L1_SSE); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS + flags->sse2 = CPUID_CHECK_REG(edx, EDX_L1_SSE2); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS + flags->sse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSE3); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS + flags->ssse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSSE3); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS + flags->sse41 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_1); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS + flags->sse42 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_2); +#endif +#endif // SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD + +#ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#ifdef _Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS + flags->avx = CPUID_CHECK_REG(ecx, ECX_L1_AVX); +#endif +#endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD + +#ifdef HAS_CPUID_SUPPORT + flags->cmov = CPUID_CHECK_REG(edx, EDX_L1_CMOV); + flags->fma = CPUID_CHECK_REG(ecx, ECX_L1_FMA); + flags->popcnt = CPUID_CHECK_REG(ecx, ECX_L1_POPCNT); + flags->pclmulqdq = CPUID_CHECK_REG(ecx, ECX_L1_PCLMULQDQ); + + flags->xsave = CPUID_CHECK_REG(ecx, ECX_L1_XSAVE); + flags->osxsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); +#endif +} +#endif + +/* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ +#ifdef SHOULD_PARSE_CPUID_L7S0 +static void /* should only be used after calling cpuid(7, 0, ...) */ +detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, + uint32_t ebx, uint32_t ecx, uint32_t edx) +{ + assert(flags->ready == 0); + assert(flags->maxleaf >= 7); + (void)flags, (void)ebx, (void)ecx, (void)edx; + // Keep the ordering and newlines as they are declared in the structure. +#ifdef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD +#ifdef _Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS + flags->avx2 = CPUID_CHECK_REG(ebx, EBX_L7_AVX2); +#endif +#endif // SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD + +#ifdef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS + flags->avx512_f = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_F); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS + flags->avx512_cd = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_CD); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS + flags->avx512_er = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_ER); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS + flags->avx512_pf = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_PF); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS + flags->avx512_4fmaps = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4FMAPS); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS + flags->avx512_4vnniw = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4VNNIW); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS + flags->avx512_vpopcntdq = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VPOPCNTDQ); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS + flags->avx512_vl = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_VL); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS + flags->avx512_dq = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_DQ); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS + flags->avx512_bw = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_BW); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS + flags->avx512_ifma = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_IFMA); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS + flags->avx512_vbmi = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS + flags->avx512_vnni = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VNNI); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS + flags->avx512_vbmi2 = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI2); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS + flags->avx512_bitalg = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_BITALG); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS + flags->avx512_vp2intersect = CPUID_CHECK_REG(edx, EDX_L7_AVX512_VP2INTERSECT); +#endif +#endif // SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +} +#endif + +/* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ +#ifdef SHOULD_PARSE_CPUID_L7S1 +static void /* should only be used after calling cpuid(7, 1, ...) */ +detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, + uint32_t eax, uint32_t edx) +{ + assert(flags->ready == 0); + assert(flags->maxleaf >= 7); + (void)flags, (void)eax, (void)edx; + // Keep the ordering and newlines as they are declared in the structure. +#ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#ifdef _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS + flags->avx_ne_convert = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_NE_CONVERT); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS + flags->avx_ifma = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_IFMA); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS + flags->avx_vnni = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_VNNI); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS + flags->avx_vnni_int8 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT8); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS + flags->avx_vnni_int16 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT16); +#endif +#endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +} +#endif + +#ifdef SHOULD_PARSE_CPUID_L1 +static void /* should only be used after calling cpuid(1, 0, ...) */ +detect_cpuid_xsave_state(_Py_cpuid_features *flags) +{ + assert(flags->ready == 0); + assert(flags->maxleaf >= 1); + (void)flags; + // Keep the ordering and newlines as they are declared in the structure. + uint64_t xcr0 = flags->xsave && flags->osxsave ? get_xgetbv(0) : 0; + flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); + flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX); + flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); + flags->xcr0_avx512_zmm_hi256 = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); + flags->xcr0_avx512_hi16_zmm = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); +} +#endif + +static void +cpuid_features_finalize(_Py_cpuid_features *flags) +{ + assert(flags->ready == 0); + + // Here, any flag that may depend on others should be correctly set + // at runtime to avoid illegal instruction errors. + + flags->ready = 1; +} + +int +_Py_cpuid_check_features(const _Py_cpuid_features *flags) +{ + if (flags->ready != 1) { + return 0; + } + + // AVX-512/F is required to support any other AVX-512 instruction set + uint8_t avx512_require_f = ( + // newlines are placed according to processor generations + flags->avx512_cd || + flags->avx512_er || flags->avx512_pf || + flags->avx512_4fmaps || flags->avx512_4vnniw || + flags->avx512_vpopcntdq || + flags->avx512_vl || flags->avx512_dq || flags->avx512_bw || + flags->avx512_ifma || flags->avx512_vbmi || + flags->avx512_vnni || + flags->avx512_vbmi2 || flags->avx512_bitalg || + flags->avx512_vp2intersect + ); + + if (!flags->avx512_f && !avx512_require_f) { + return 0; + } + + return 1; +} + +/* + * Apply a 1-parameter macro MACRO(FLAG) on all members + * of a '_Py_cpuid_features' object ('ready' is omitted). + */ +#define CPUID_APPLY_MACRO(MACRO) \ + do { \ + MACRO(sse); \ + MACRO(sse2); \ + MACRO(sse3); \ + MACRO(ssse3); \ + MACRO(sse41); \ + MACRO(sse42); \ + \ + MACRO(avx); \ + MACRO(avx_ifma); \ + MACRO(avx_ne_convert); \ + \ + MACRO(avx_vnni); \ + MACRO(avx_vnni_int8); \ + MACRO(avx_vnni_int16); \ + \ + MACRO(avx2); \ + \ + MACRO(avx512_f); \ + MACRO(avx512_cd); \ + \ + MACRO(avx512_er); \ + MACRO(avx512_pf); \ + \ + MACRO(avx512_4fmaps); \ + MACRO(avx512_4vnniw); \ + \ + MACRO(avx512_vpopcntdq); \ + \ + MACRO(avx512_vl); \ + MACRO(avx512_dq); \ + MACRO(avx512_bw); \ + \ + MACRO(avx512_ifma); \ + MACRO(avx512_vbmi); \ + \ + MACRO(avx512_vnni); \ + \ + MACRO(avx512_vbmi2); \ + MACRO(avx512_bitalg); \ + \ + MACRO(avx512_vp2intersect); \ + \ + MACRO(cmov); \ + MACRO(fma); \ + MACRO(popcnt); \ + MACRO(pclmulqdq); \ + \ + MACRO(xsave); \ + MACRO(osxsave); \ + \ + MACRO(xcr0_sse); \ + MACRO(xcr0_avx); \ + MACRO(xcr0_avx512_opmask); \ + MACRO(xcr0_avx512_zmm_hi256); \ + MACRO(xcr0_avx512_hi16_zmm); \ + } while (0) + +void +_Py_cpuid_disable_features(_Py_cpuid_features *flags) +{ + flags->maxleaf = 0; +#define CPUID_DISABLE(FLAG) flags->FLAG = 0 + CPUID_APPLY_MACRO(CPUID_DISABLE); +#undef CPUID_DISABLE +} + +int +_Py_cpuid_has_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect) +{ + if (!actual->ready || !expect->ready) { + return 0; + } + if (actual->maxleaf < expect->maxleaf) { + return 0; + } +#define CPUID_CHECK_FEATURE(FLAG) \ + do { \ + if (expect->FLAG && !actual->FLAG) { \ + return 0; \ + } \ + } while (0) + CPUID_APPLY_MACRO(CPUID_CHECK_FEATURE); +#undef CPUID_CHECK_FEATURE + return 1; +} + +int +_Py_cpuid_match_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect) +{ + if (!actual->ready || !expect->ready) { + return 0; + } + if (actual->maxleaf != expect->maxleaf) { + return 0; + } +#define CPUID_MATCH_FEATURE(FLAG) \ + do { \ + if (expect->FLAG != actual->FLAG) { \ + return 0; \ + } \ + } while (0) + CPUID_APPLY_MACRO(CPUID_MATCH_FEATURE); +#undef CPUID_MATCH_FEATURE + return 1; +} + +#undef CPUID_APPLY_MACRO + +#ifdef SHOULD_PARSE_CPUID_L1 +static void +cpuid_detect_l1_features(_Py_cpuid_features *flags) +{ + assert(flags->ready == 0); + if (flags->maxleaf >= 1) { + uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); + detect_cpuid_features(flags, ecx, edx); + detect_cpuid_xsave_state(flags); + } +} +#else +#define cpuid_detect_l1_features(FLAGS) +#endif + +#ifdef SHOULD_PARSE_CPUID_L7S0 +static void +cpuid_detect_l7s0_features(_Py_cpuid_features *flags) +{ + assert(flags->ready == 0); + assert(flags->maxleaf >= 7); + uint32_t _eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 0, &_eax, &ebx, &ecx, &edx); + detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); +} +#else +#define cpuid_detect_l7s0_features(FLAGS) +#endif + +#ifdef SHOULD_PARSE_CPUID_L7S1 +static void +cpuid_detect_l7s1_features(_Py_cpuid_features *flags) +{ + assert(flags->ready == 0); + assert(flags->maxleaf >= 7); + uint32_t eax = 0, _ebx = 0, _ecx = 0, edx = 0; + get_cpuid_info(7, 1, &eax, &_ebx, &_ecx, &edx); + detect_cpuid_extended_features_L7S1(flags, eax, edx); +} +#else +#define cpuid_detect_l7s1_features(FLAGS) +#endif + +#ifdef SHOULD_PARSE_CPUID_L7 +static void +cpuid_detect_l7_features(_Py_cpuid_features *flags) +{ + assert(flags->ready == 0); + if (flags->maxleaf >= 7) { + cpuid_detect_l7s0_features(flags); + cpuid_detect_l7s1_features(flags); + } +} +#else +#define cpuid_detect_l7_features(FLAGS) +#endif + +void +_Py_cpuid_detect_features(_Py_cpuid_features *flags) +{ + if (flags->ready) { + return; + } + _Py_cpuid_disable_features(flags); + flags->maxleaf = detect_cpuid_maxleaf(); + cpuid_detect_l1_features(flags); + cpuid_detect_l7_features(flags); + cpuid_features_finalize(flags); + if (!_Py_cpuid_check_features(flags)) { + _Py_cpuid_disable_features(flags); + } +} diff --git a/Tools/cpuinfo/.ruff.toml b/Tools/cpuinfo/.ruff.toml new file mode 100644 index 00000000000000..e49d04c2d4e863 --- /dev/null +++ b/Tools/cpuinfo/.ruff.toml @@ -0,0 +1,16 @@ +# Python 3.12 is required for 'type' statements +target-version = "py312" +line-length = 79 + +[format] +skip-magic-trailing-comma = false + +[lint] +select = [ + "I", # isort + "F841", # unused variable + "RUF100", # Ban unused `# noqa` comments + "PGH004", # Ban blanket `# noqa` comments (only ignore specific error codes) +] + + diff --git a/Tools/cpuinfo/libcpuinfo/__init__.py b/Tools/cpuinfo/libcpuinfo/__init__.py new file mode 100644 index 00000000000000..a935debd4f4bbc --- /dev/null +++ b/Tools/cpuinfo/libcpuinfo/__init__.py @@ -0,0 +1,20 @@ +""" +This package provides functions to generate flags for CPUID and XSAVE. + +The constants are macros generated by Argument Clinic as follows: + + #define 0x // bit = BIT + ^ ^ + +where ^ indicates a column that is a multiple of 4, has +exactly 8 characters and has at most 2 characters. + +A C enumeration is NOT generated as the largest member may not fit +on an 'int', which is forbidden as ISO C restricts enumerator values +to that range. + +.. note:: + + This package must not be used directly and should only be + invoked from an Argument Clinic "[python input]" directive. +""" diff --git a/Tools/cpuinfo/libcpuinfo/features/__init__.py b/Tools/cpuinfo/libcpuinfo/features/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/cpuinfo/libcpuinfo/features/cpuid.py b/Tools/cpuinfo/libcpuinfo/features/cpuid.py new file mode 100644 index 00000000000000..65a37860a2778d --- /dev/null +++ b/Tools/cpuinfo/libcpuinfo/features/cpuid.py @@ -0,0 +1,131 @@ +""" +Generate an enumeration describing masks to apply on CPUID output registers. + +Constants are _Py_CPUID_MASK__L[S]_, +where <> (resp. []) denotes a required (resp. optional) group and: + +- REGISTER is EAX, EBX, ECX or EDX, +- LEAF is the initial value of the EAX register (1 or 7), +- SUBLEAF is the initial value of the ECX register (omitted if 0), and +- FEATURE is a SIMD feature (with one or more specialized instructions). + +For maintainability, the flags are ordered by registers, leafs, subleafs, +and bits. See https://en.wikipedia.org/wiki/CPUID for the values. + +Note 1: The LEAF is also called the 'page' or the 'level'. +Note 2: The SUBLEAF is also referred to as the 'count'. + +The LEAF value should only 1 or 7 as other values may have different +meanings depending on the underlying architecture. + +.. seealso:: :file:`Include/internal/pycore_cpuinfo_cpuid_features.h` +""" + +from __future__ import annotations + +__all__ = ["make_cpuid_features_constants"] + +from typing import TYPE_CHECKING + +import libcpuinfo.util as util +from libcpuinfo.util import DOXYGEN_STYLE + +if TYPE_CHECKING: + from typing import Final + + type Leaf = int + type SubLeaf = int + type Registry = str + type FeatureFamily = tuple[Leaf, SubLeaf, Registry] + + type Feature = str + type BitIndex = int + +CPUID_FEATURES: Final[dict[FeatureFamily, dict[Feature, BitIndex]]] = { + # See https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits. + (1, 0, "ECX"): { + "SSE3": 0, + "PCLMULQDQ": 1, + "SSSE3": 9, + "FMA": 12, + "SSE4_1": 19, + "SSE4_2": 20, + "POPCNT": 23, + "XSAVE": 26, + "OSXSAVE": 27, + "AVX": 28, + }, + (1, 0, "EDX"): { + "CMOV": 15, + "SSE": 25, + "SSE2": 26, + }, + # See https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features. + (7, 0, "EBX"): { + "AVX2": 5, + "AVX512_F": 16, + "AVX512_DQ": 17, + "AVX512_IFMA": 21, + "AVX512_PF": 26, + "AVX512_ER": 27, + "AVX512_CD": 28, + "AVX512_BW": 30, + "AVX512_VL": 31, + }, + (7, 0, "ECX"): { + "AVX512_VBMI": 1, + "AVX512_VBMI2": 6, + "AVX512_VNNI": 11, + "AVX512_BITALG": 12, + "AVX512_VPOPCNTDQ": 14, + }, + (7, 0, "EDX"): { + "AVX512_4VNNIW": 2, + "AVX512_4FMAPS": 3, + "AVX512_VP2INTERSECT": 8, + }, + # See https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=1:_Extended_Features. + (7, 1, "EAX"): { + "AVX_VNNI": 4, + "AVX_IFMA": 23, + }, + (7, 1, "EDX"): { + "AVX_VNNI_INT8": 4, + "AVX_NE_CONVERT": 5, + "AVX_VNNI_INT16": 10, + }, +} + + +def get_constant_name( + leaf: Leaf, subleaf: SubLeaf, registry: Registry, name: Feature +) -> str: + node = f"L{leaf}S{subleaf}" if subleaf else f"L{leaf}" + return f"_Py_CPUID_MASK_{registry}_{node}_{name}" + + +_NAME_MAXSIZE: Final[int] = util.next_block( + max( + len(get_constant_name(*family, name)) + for family, values in CPUID_FEATURES.items() + for name in values + ) +) + + +def make_cpuid_features_constants() -> str: + """Used by :file:`Include/internal/pycore_cpuinfo_cpuid_features.h`.""" + writer = util.CWriter() + writer.comment("Constants for CPUID features", style=DOXYGEN_STYLE) + for family, values in CPUID_FEATURES.items(): + leaf, subleaf, registry = family + writer.comment(f"CPUID (LEAF={leaf}, SUBLEAF={subleaf}) [{registry}]") + for feature_name, bit in values.items(): + if not feature_name: + raise ValueError(f"invalid entry for {family}") + if not 0 <= bit < 32: + raise ValueError(f"invalid bit value for {feature_name!r}") + key = get_constant_name(leaf, subleaf, registry, feature_name) + writer.write(util.make_constant(key, bit, _NAME_MAXSIZE)) + writer.write_blankline() + return writer.build() diff --git a/Tools/cpuinfo/libcpuinfo/features/xsave.py b/Tools/cpuinfo/libcpuinfo/features/xsave.py new file mode 100644 index 00000000000000..474162dfc4463b --- /dev/null +++ b/Tools/cpuinfo/libcpuinfo/features/xsave.py @@ -0,0 +1,51 @@ +""" +Generate constants for XSAVE state components (XCR0 control register). + +See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. + +.. seealso:: :file:`Include/internal/pycore_cpuinfo_xsave_features.h` +""" + +from __future__ import annotations + +__all__ = ["make_xsave_features_constants"] + +from typing import TYPE_CHECKING + +import libcpuinfo.util as util +from libcpuinfo.util import DOXYGEN_STYLE + +if TYPE_CHECKING: + from typing import Final + + type Feature = str + type BitIndex = int + +XSAVE_FEATURES: Final[dict[Feature, BitIndex]] = { + "SSE": 1, + "AVX": 2, + "AVX512_OPMASK": 5, + "AVX512_ZMM_HI256": 6, + "AVX512_HI16_ZMM": 7, +} + + +def get_constant_name(feature: Feature) -> str: + return f"_Py_XSAVE_MASK_XCR0_{feature}" + + +_NAME_MAXSIZE: Final[int] = util.next_block( + max(map(len, map(get_constant_name, XSAVE_FEATURES))) +) + + +def make_xsave_features_constants() -> str: + """Used by :file:`Include/internal/pycore_cpuinfo_xsave_features.h`.""" + writer = util.CWriter() + writer.comment("Constants for XSAVE components", style=DOXYGEN_STYLE) + for feature_name, bit in XSAVE_FEATURES.items(): + if not 0 <= bit < 32: + raise ValueError(f"invalid bit value for {feature_name!r}") + key = get_constant_name(feature_name) + writer.write(util.make_constant(key, bit, _NAME_MAXSIZE)) + return writer.build() diff --git a/Tools/cpuinfo/libcpuinfo/util.py b/Tools/cpuinfo/libcpuinfo/util.py new file mode 100644 index 00000000000000..9d478ca686f65b --- /dev/null +++ b/Tools/cpuinfo/libcpuinfo/util.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +__all__ = [ + "next_block", "make_constant", + "Style", "C99_STYLE", "C11_STYLE", "DOXYGEN_STYLE", + "CWriter", +] # fmt: skip + +import contextlib +import enum +from io import StringIO +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Iterator + from typing import Any, Final, Literal + + +def next_block(w: int) -> int: + """Compute the smallest multiple of 4 strictly larger than *w*.""" + return ((w + 3) & ~0x03) if (w % 4) else (w + 4) + + +_MASKSIZE: Final[int] = next_block(len("0x00000000")) + + +def make_constant(key: str, bit: int, name_maxsize: int) -> str: + assert bit <= 32, f"{key}: mask does not on an uint32_t" + member_name = key.ljust(name_maxsize) + member_mask = format(1 << bit, "008x") + member_mask = f"0x{member_mask}".ljust(_MASKSIZE) + return f"#define {member_name}{member_mask}// bit = {bit}" + + +class Style(enum.IntEnum): + C99 = enum.auto() + C11 = enum.auto() + DOXYGEN = enum.auto() + + +C99_STYLE: Final[Literal[Style.C99]] = Style.C99 +C11_STYLE: Final[Literal[Style.C11]] = Style.C11 +DOXYGEN_STYLE: Final[Literal[Style.DOXYGEN]] = Style.DOXYGEN + +_COMMENT_INLINE_STYLE: Final[dict[Style, tuple[str, str, str]]] = { + C99_STYLE: ("// ", "", ""), + C11_STYLE: ("/* ", " */", ""), + DOXYGEN_STYLE: ("/** ", " */", ""), +} + +_COMMENT_BLOCK_STYLE: Final[dict[Style, tuple[str, str, str]]] = { + C99_STYLE: ("// ", "", ""), + C11_STYLE: ("/*", " */", " * "), + DOXYGEN_STYLE: ("/**", " */", " * "), +} + + +class CWriter: + def __init__(self, *, indentsize: int = 4) -> None: + self._stream = StringIO() + self._indent = " " * indentsize + self._prefix = "" + + def comment( + self, text: str, *, level: int = 0, style: Style = C11_STYLE + ) -> None: + """Add a C comment, possibly using doxygen style.""" + if len(text) < 72 and "\n" not in text: + prolog, epilog, _ = _COMMENT_INLINE_STYLE[style] + self.write(prolog, text, epilog, sep="", level=level) + else: + prolog, epilog, prefix = _COMMENT_BLOCK_STYLE[style] + self.write(prolog, level=level) + with self.prefixed(prefix): + for line in text.splitlines(): + self.write(line, level=level) + self.write(epilog, level=level) + + @contextlib.contextmanager + def prefixed(self, prefix: str) -> Iterator[None]: + old_prefix = self._prefix + self._prefix = prefix + try: + yield + finally: + self._prefix = old_prefix + + def _prefix_at(self, level: int) -> str: + return "".join((self._indent * level, self._prefix)) + + def write( + self, *args: Any, sep: str = " ", end: str = "\n", level: int = 0 + ) -> None: + if prefix := self._prefix_at(level): + self._write(prefix, sep="", end="") + self._write(*args, sep=sep, end=end) + + def write_blankline(self) -> None: + self._write() + + def _write(self, *args: Any, sep: str = " ", end: str = "\n") -> None: + print(*args, sep=sep, end=end, file=self._stream) + + def build(self) -> str: + # inject directives to temporarily disable external C formatters + return "\n".join( + ( + "// clang-format off", + self._stream.getvalue().rstrip(), + "// clang-format on", + ) + ) diff --git a/Tools/cpuinfo/mypy.ini b/Tools/cpuinfo/mypy.ini new file mode 100644 index 00000000000000..914ca082b72189 --- /dev/null +++ b/Tools/cpuinfo/mypy.ini @@ -0,0 +1,9 @@ +[mypy] +files = Tools/cpuinfo/ +pretty = True + +python_version = 3.12 +strict = True +extra_checks = True +enable_error_code = ignore-without-code,redundant-expr,truthy-bool +warn_unreachable = True diff --git a/configure b/configure index 75ae1699a8e451..5656baadb32af5 100755 --- a/configure +++ b/configure @@ -725,6 +725,7 @@ LIBHACL_BLAKE2_SIMD128_OBJS LIBHACL_SIMD128_FLAGS LIBHACL_LDFLAGS LIBHACL_CFLAGS +CORE_CPUINFO_CFLAGS MODULE_UNICODEDATA_FALSE MODULE_UNICODEDATA_TRUE MODULE__MULTIBYTECODEC_FALSE @@ -32538,6 +32539,1790 @@ then : fi + + +# Detection of supported SIMD instruction sets for CPython. Since +# we do not necessarily know which instruction sets will be used, +# we disable SIMD support on some older Android platforms. +# +# See _Py_cpuid_features in pycore_cpuinfo.h for how to order fields +# and where to put blank lines to separate processor generations for +# AVX-512 instructions. +# +# Although AVX support is not guaranteed on Android [1], this is safe +# because we do a runtime CPUID check. +# +# [1]: https://developer.android.com/ndk/guides/abis#86-64 +if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ + { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } +then + # SSE + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse" >&5 +printf %s "checking whether C compiler accepts -msse... " >&6; } +if test ${ax_cv_check_cflags__Werror__msse+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -msse" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__msse=yes +else case e in #( + e) ax_cv_check_cflags__Werror__msse=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse" = xyes +then : + ac_cv_can_compile_simd_sse=yes +else case e in #( + e) ac_cv_can_compile_simd_sse=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_sse" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse2" >&5 +printf %s "checking whether C compiler accepts -msse2... " >&6; } +if test ${ax_cv_check_cflags__Werror__msse2+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -msse2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__msse2=yes +else case e in #( + e) ax_cv_check_cflags__Werror__msse2=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse2" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse2" = xyes +then : + ac_cv_can_compile_simd_sse2=yes +else case e in #( + e) ac_cv_can_compile_simd_sse2=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_sse2" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse3" >&5 +printf %s "checking whether C compiler accepts -msse3... " >&6; } +if test ${ax_cv_check_cflags__Werror__msse3+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -msse3" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__msse3=yes +else case e in #( + e) ax_cv_check_cflags__Werror__msse3=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse3" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse3" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse3" = xyes +then : + ac_cv_can_compile_simd_sse3=yes +else case e in #( + e) ac_cv_can_compile_simd_sse3=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_sse3" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mssse3" >&5 +printf %s "checking whether C compiler accepts -mssse3... " >&6; } +if test ${ax_cv_check_cflags__Werror__mssse3+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mssse3" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mssse3=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mssse3=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mssse3" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mssse3" >&6; } +if test "x$ax_cv_check_cflags__Werror__mssse3" = xyes +then : + ac_cv_can_compile_simd_ssse3=yes +else case e in #( + e) ac_cv_can_compile_simd_ssse3=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_ssse3" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.1" >&5 +printf %s "checking whether C compiler accepts -msse4.1... " >&6; } +if test ${ax_cv_check_cflags__Werror__msse4_1+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -msse4.1" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__msse4_1=yes +else case e in #( + e) ax_cv_check_cflags__Werror__msse4_1=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse4_1" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse4_1" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse4_1" = xyes +then : + ac_cv_can_compile_simd_sse4_1=yes +else case e in #( + e) ac_cv_can_compile_simd_sse4_1=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_sse4_1" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.2" >&5 +printf %s "checking whether C compiler accepts -msse4.2... " >&6; } +if test ${ax_cv_check_cflags__Werror__msse4_2+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -msse4.2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__msse4_2=yes +else case e in #( + e) ax_cv_check_cflags__Werror__msse4_2=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse4_2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse4_2" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse4_2" = xyes +then : + ac_cv_can_compile_simd_sse4_2=yes +else case e in #( + e) ac_cv_can_compile_simd_sse4_2=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_sse4_2" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # AVX + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx" >&5 +printf %s "checking whether C compiler accepts -mavx... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx" = xyes +then : + ac_cv_can_compile_simd_avx=yes +else case e in #( + e) ac_cv_can_compile_simd_avx=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxifma" >&5 +printf %s "checking whether C compiler accepts -mavxifma... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavxifma+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavxifma" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavxifma=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavxifma=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxifma" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxifma" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxifma" = xyes +then : + ac_cv_can_compile_simd_avx_ifma=yes +else case e in #( + e) ac_cv_can_compile_simd_avx_ifma=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx_ifma" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxneconvert" >&5 +printf %s "checking whether C compiler accepts -mavxneconvert... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavxneconvert+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavxneconvert" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavxneconvert=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavxneconvert=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxneconvert" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxneconvert" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxneconvert" = xyes +then : + ac_cv_can_compile_simd_avx_ne_convert=yes +else case e in #( + e) ac_cv_can_compile_simd_avx_ne_convert=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx_ne_convert" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnni" >&5 +printf %s "checking whether C compiler accepts -mavxvnni... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavxvnni+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavxvnni" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavxvnni=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavxvnni=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxvnni" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxvnni" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxvnni" = xyes +then : + ac_cv_can_compile_simd_avx_vnni=yes +else case e in #( + e) ac_cv_can_compile_simd_avx_vnni=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx_vnni" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint8" >&5 +printf %s "checking whether C compiler accepts -mavxvnniint8... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavxvnniint8+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavxvnniint8" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavxvnniint8=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavxvnniint8=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxvnniint8" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxvnniint8" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxvnniint8" = xyes +then : + ac_cv_can_compile_simd_avx_vnni_int8=yes +else case e in #( + e) ac_cv_can_compile_simd_avx_vnni_int8=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx_vnni_int8" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint16" >&5 +printf %s "checking whether C compiler accepts -mavxvnniint16... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavxvnniint16+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavxvnniint16" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavxvnniint16=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavxvnniint16=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxvnniint16" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxvnniint16" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxvnniint16" = xyes +then : + ac_cv_can_compile_simd_avx_vnni_int16=yes +else case e in #( + e) ac_cv_can_compile_simd_avx_vnni_int16=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx_vnni_int16" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # AVX-2 + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 +printf %s "checking whether C compiler accepts -mavx2... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx2+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx2=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx2=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx2" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx2" = xyes +then : + ac_cv_can_compile_simd_avx2=yes +else case e in #( + e) ac_cv_can_compile_simd_avx2=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx2" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # AVX-512 + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512f" >&5 +printf %s "checking whether C compiler accepts -mavx512f... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512f+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512f" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512f=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512f=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512f" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512f" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512f" = xyes +then : + ac_cv_can_compile_simd_avx512_f=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_f=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_f" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512cd" >&5 +printf %s "checking whether C compiler accepts -mavx512cd... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512cd+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512cd" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512cd=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512cd=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512cd" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512cd" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512cd" = xyes +then : + ac_cv_can_compile_simd_avx512_cd=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_cd=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_cd" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512er" >&5 +printf %s "checking whether C compiler accepts -mavx512er... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512er+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512er" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512er=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512er=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512er" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512er" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512er" = xyes +then : + ac_cv_can_compile_simd_avx512_er=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_er=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_er" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512pf" >&5 +printf %s "checking whether C compiler accepts -mavx512pf... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512pf+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512pf" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512pf=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512pf=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512pf" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512pf" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512pf" = xyes +then : + ac_cv_can_compile_simd_avx512_pf=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_pf=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_pf" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124fmaps" >&5 +printf %s "checking whether C compiler accepts -mavx5124fmaps... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx5124fmaps+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx5124fmaps" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx5124fmaps=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx5124fmaps=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx5124fmaps" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx5124fmaps" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx5124fmaps" = xyes +then : + ac_cv_can_compile_simd_avx512_4fmaps=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_4fmaps=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_4fmaps" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124vnniw" >&5 +printf %s "checking whether C compiler accepts -mavx5124vnniw... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx5124vnniw+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx5124vnniw" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx5124vnniw=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx5124vnniw=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx5124vnniw" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx5124vnniw" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx5124vnniw" = xyes +then : + ac_cv_can_compile_simd_avx512_4vnniw=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_4vnniw=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_4vnniw" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vpopcntdq" >&5 +printf %s "checking whether C compiler accepts -mavx512vpopcntdq... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512vpopcntdq+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512vpopcntdq" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512vpopcntdq=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512vpopcntdq=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vpopcntdq" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vpopcntdq" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vpopcntdq" = xyes +then : + ac_cv_can_compile_simd_avx512_vpopcntdq=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_vpopcntdq=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_vpopcntdq" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vl" >&5 +printf %s "checking whether C compiler accepts -mavx512vl... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512vl+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512vl" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512vl=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512vl=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vl" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vl" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vl" = xyes +then : + ac_cv_can_compile_simd_avx512_vl=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_vl=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_vl" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512dq" >&5 +printf %s "checking whether C compiler accepts -mavx512dq... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512dq+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512dq" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512dq=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512dq=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512dq" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512dq" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512dq" = xyes +then : + ac_cv_can_compile_simd_avx512_dq=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_dq=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_dq" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bw" >&5 +printf %s "checking whether C compiler accepts -mavx512bw... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512bw+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512bw" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512bw=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512bw=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512bw" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512bw" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512bw" = xyes +then : + ac_cv_can_compile_simd_avx512_bw=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_bw=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_bw" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512ifma" >&5 +printf %s "checking whether C compiler accepts -mavx512ifma... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512ifma+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512ifma" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512ifma=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512ifma=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512ifma" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512ifma" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512ifma" = xyes +then : + ac_cv_can_compile_simd_avx512_ifma=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_ifma=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_ifma" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi" >&5 +printf %s "checking whether C compiler accepts -mavx512vbmi... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512vbmi+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512vbmi" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512vbmi=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512vbmi=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vbmi" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vbmi" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vbmi" = xyes +then : + ac_cv_can_compile_simd_avx512_vbmi=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_vbmi=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_vbmi" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vnni" >&5 +printf %s "checking whether C compiler accepts -mavx512vnni... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512vnni+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512vnni" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512vnni=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512vnni=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vnni" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vnni" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vnni" = xyes +then : + ac_cv_can_compile_simd_avx512_vnni=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_vnni=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_vnni" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi2" >&5 +printf %s "checking whether C compiler accepts -mavx512vbmi2... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512vbmi2+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512vbmi2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512vbmi2=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512vbmi2=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vbmi2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vbmi2" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vbmi2" = xyes +then : + ac_cv_can_compile_simd_avx512_vbmi2=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_vbmi2=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_vbmi2" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bitalg" >&5 +printf %s "checking whether C compiler accepts -mavx512bitalg... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512bitalg+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512bitalg" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512bitalg=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512bitalg=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512bitalg" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512bitalg" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512bitalg" = xyes +then : + ac_cv_can_compile_simd_avx512_bitalg=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_bitalg=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_bitalg" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vp2intersect" >&5 +printf %s "checking whether C compiler accepts -mavx512vp2intersect... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512vp2intersect+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512vp2intersect" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512vp2intersect=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512vp2intersect=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vp2intersect" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vp2intersect" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vp2intersect" = xyes +then : + ac_cv_can_compile_simd_avx512_vp2intersect=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_vp2intersect=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_vp2intersect" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mxsave" >&5 +printf %s "checking whether C compiler accepts -mxsave... " >&6; } +if test ${ax_cv_check_cflags__Werror__mxsave+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mxsave" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mxsave=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mxsave=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mxsave" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mxsave" >&6; } +if test "x$ax_cv_check_cflags__Werror__mxsave" = xyes +then : + CORE_CPUINFO_CFLAGS=-mxsave +else case e in #( + e) CORE_CPUINFO_CFLAGS= ;; +esac +fi + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking _xgetbv(0) is natively supported" >&5 +printf %s "checking _xgetbv(0) is natively supported... " >&6; } +if test ${ac_cv_use_xgetbv_func+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + save_CFLAGS=$CFLAGS +save_CPPFLAGS=$CPPFLAGS +save_LDFLAGS=$LDFLAGS +save_LIBS=$LIBS + + + CFLAGS="$CFLAGS -Werror -mxsave" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +_xgetbv(0) + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_use_xgetbv_func=yes +else case e in #( + e) ac_cv_use_xgetbv_func=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +CFLAGS=$save_CFLAGS +CPPFLAGS=$save_CPPFLAGS +LDFLAGS=$save_LDFLAGS +LIBS=$save_LIBS + + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_use_xgetbv_func" >&5 +printf "%s\n" "$ac_cv_use_xgetbv_func" >&6; } +if test "$ac_cv_use_xgetbv_func" = "yes" ; then + +printf "%s\n" "#define _Py_CPUINFO_USE_XGETBV_FUNC 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking xgetbv(0) opcode is supported" >&5 +printf %s "checking xgetbv(0) opcode is supported... " >&6; } +if test ${ac_cv_use_xgetbv_opcode+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + save_CFLAGS=$CFLAGS +save_CPPFLAGS=$CPPFLAGS +save_LDFLAGS=$LDFLAGS +save_LIBS=$LIBS + + + CFLAGS="$CFLAGS -Werror" + if test "$cross_compiling" = yes +then : + ac_cv_use_xgetbv_opcode=no +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ + + int main(void) + { + uint32_t eax = 0, edx = 0; + __asm__ __volatile__( + ".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (0)); + return 0; + } + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO" +then : + ac_cv_use_xgetbv_opcode=yes +else case e in #( + e) ac_cv_use_xgetbv_opcode=no ;; +esac +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi + +CFLAGS=$save_CFLAGS +CPPFLAGS=$save_CPPFLAGS +LDFLAGS=$save_LDFLAGS +LIBS=$save_LIBS + + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_use_xgetbv_opcode" >&5 +printf "%s\n" "$ac_cv_use_xgetbv_opcode" >&6; } +if test "$ac_cv_use_xgetbv_opcode" = "yes" ; then + +printf "%s\n" "#define _Py_CPUINFO_USE_XGETBV_OPCODE 1" >>confdefs.h + +fi + ############################################################################### # HACL* compilation and linking configuration (contact: @picnixz) # @@ -32588,48 +34373,19 @@ else use_hacl_universal2_impl=no fi -# The SIMD files use aligned_alloc, which is not available on older versions of -# Android. -# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. +# The HACL* SIMD-128 files use aligned_alloc, which is not available +# on older versions of Android. In addition, since the *mmintrin.h +# headers are x86-family-specific, they cannot be used on WASI. if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse -msse2 -msse3 -msse4.1 -msse4.2" >&5 -printf %s "checking whether C compiler accepts -msse -msse2 -msse3 -msse4.1 -msse4.2... " >&6; } -if test ${ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -Werror -msse -msse2 -msse3 -msse4.1 -msse4.2" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2=yes -else case e in #( - e) ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2" >&5 -printf "%s\n" "$ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2" >&6; } -if test "x$ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2" = xyes -then : - + # SIMD-128 + if test "$ac_cv_can_compile_simd_sse" = "yes" \ + -a "$ac_cv_can_compile_simd_sse2" = "yes" \ + -a "$ac_cv_can_compile_simd_sse3" = "yes" \ + -a "$ac_cv_can_compile_simd_sse4_1" = "yes" \ + -a "$ac_cv_can_compile_simd_sse4_2" = "yes" + then LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2" @@ -32650,65 +34406,22 @@ printf "%s\n" "universal2" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: standard" >&5 printf "%s\n" "standard" >&6; } fi - - -else case e in #( - e) : ;; -esac -fi - + fi fi -# The SIMD files use aligned_alloc, which is not available on older versions of -# Android. -# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. -# -# Although AVX support is not guaranteed on Android -# (https://developer.android.com/ndk/guides/abis#86-64), this is safe because we do a -# runtime CPUID check. +# The HACL* SIMD-256 files use aligned_alloc, which is not available +# on older versions of Android. In addition, since the *mmintrin.h +# headers are x86-family-specific, they cannot be used on WASI. if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 -printf %s "checking whether C compiler accepts -mavx2... " >&6; } -if test ${ax_cv_check_cflags__Werror__mavx2+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -Werror -mavx2" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ax_cv_check_cflags__Werror__mavx2=yes -else case e in #( - e) ax_cv_check_cflags__Werror__mavx2=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx2" >&5 -printf "%s\n" "$ax_cv_check_cflags__Werror__mavx2" >&6; } -if test "x$ax_cv_check_cflags__Werror__mavx2" = xyes -then : - + if test "$ac_cv_can_compile_simd_avx2" = "yes" + then LIBHACL_SIMD256_FLAGS="-mavx2" + printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC256 1" >>confdefs.h @@ -32727,12 +34440,7 @@ printf "%s\n" "universal2" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: standard" >&5 printf "%s\n" "standard" >&6; } fi - -else case e in #( - e) : ;; -esac -fi - + fi fi diff --git a/configure.ac b/configure.ac index 4da1ba78b54b0d..5a37d128d1ce4b 100644 --- a/configure.ac +++ b/configure.ac @@ -7965,6 +7965,137 @@ PY_STDLIB_MOD_SIMPLE([_codecs_tw]) PY_STDLIB_MOD_SIMPLE([_multibytecodec]) PY_STDLIB_MOD_SIMPLE([unicodedata]) +dnl PY_SIMD_DETECT(INSTRUCTION-SET-NAME, COMPILER-FLAG, [NORMALIZED-NAME]) +dnl ---------------------------------------------------------------------- +dnl +dnl Check if the compiler supports a given COMPILER-FLAG and define: +dnl +dnl ac_cv_can_compile_simd_ = yes +dnl #define _Py_CAN_COMPILE_SIMD__INSTRUCTIONS 1 +dnl +dnl or +dnl +dnl ac_cv_can_compile_simd_ = no +dnl #undef _Py_CAN_COMPILE_SIMD__INSTRUCTIONS +dnl +dnl where and are the lowercased and uppercased versions +dnl of NORMALIZED-NAME; by default, the latter is INSTRUCTION-SET-NAME. +dnl +AC_DEFUN([PY_SIMD_DETECT], [ + AS_VAR_PUSHDEF([py_var], [m4_ifblank([$3], + [[ac_cv_can_compile_simd_]m4_tolower([$1])], + [[ac_cv_can_compile_simd_]m4_tolower([$3])])]) + AS_VAR_PUSHDEF([py_define], [m4_ifblank([$3], + [[_Py_CAN_COMPILE_SIMD_]m4_toupper([$1])[_INSTRUCTIONS]], + [[_Py_CAN_COMPILE_SIMD_]m4_toupper([$3])[_INSTRUCTIONS]])]) + AX_CHECK_COMPILE_FLAG([$2], + [AS_VAR_SET([py_var], [yes])], + [AS_VAR_SET([py_var], [no])], + [-Werror]) + AS_VAR_IF([py_var], [yes], [ + AC_DEFINE([py_define], [1], [Define if '$2' is a valid compiler flag.]) + ]) + AS_VAR_POPDEF([py_var]) + AS_VAR_POPDEF([py_define]) +]) + +# Detection of supported SIMD instruction sets for CPython. Since +# we do not necessarily know which instruction sets will be used, +# we disable SIMD support on some older Android platforms. +# +# See _Py_cpuid_features in pycore_cpuinfo.h for how to order fields +# and where to put blank lines to separate processor generations for +# AVX-512 instructions. +# +# Although AVX support is not guaranteed on Android [1], this is safe +# because we do a runtime CPUID check. +# +# [1]: https://developer.android.com/ndk/guides/abis#86-64 +if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ + { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } +then + # SSE + PY_SIMD_DETECT([SSE], [-msse]) + PY_SIMD_DETECT([SSE2], [-msse2]) + PY_SIMD_DETECT([SSE3], [-msse3]) + PY_SIMD_DETECT([SSSE3], [-mssse3]) + PY_SIMD_DETECT([SSE4.1], [-msse4.1], [SSE4_1]) + PY_SIMD_DETECT([SSE4.2], [-msse4.2], [SSE4_2]) + # AVX + PY_SIMD_DETECT([AVX], [-mavx]) + PY_SIMD_DETECT([AVX_IFMA], [-mavxifma]) + PY_SIMD_DETECT([AVX_NE_CONVERT], [-mavxneconvert]) + # + PY_SIMD_DETECT([AVX_VNNI], [-mavxvnni]) + PY_SIMD_DETECT([AVX_VNNI_INT8], [-mavxvnniint8]) + PY_SIMD_DETECT([AVX_VNNI_INT16], [-mavxvnniint16]) + # AVX-2 + PY_SIMD_DETECT([AVX2], [-mavx2]) + # AVX-512 + PY_SIMD_DETECT([AVX512_F], [-mavx512f]) + PY_SIMD_DETECT([AVX512_CD], [-mavx512cd]) + # + PY_SIMD_DETECT([AVX512_ER], [-mavx512er]) + PY_SIMD_DETECT([AVX512_PF], [-mavx512pf]) + # + PY_SIMD_DETECT([AVX512_4FMAPS], [-mavx5124fmaps]) + PY_SIMD_DETECT([AVX512_4VNNIW], [-mavx5124vnniw]) + # + PY_SIMD_DETECT([AVX512_VPOPCNTDQ], [-mavx512vpopcntdq]) + # + PY_SIMD_DETECT([AVX512_VL], [-mavx512vl]) + PY_SIMD_DETECT([AVX512_DQ], [-mavx512dq]) + PY_SIMD_DETECT([AVX512_BW], [-mavx512bw]) + # + PY_SIMD_DETECT([AVX512_IFMA], [-mavx512ifma]) + PY_SIMD_DETECT([AVX512_VBMI], [-mavx512vbmi]) + # + PY_SIMD_DETECT([AVX512_VNNI], [-mavx512vnni]) + # + PY_SIMD_DETECT([AVX512_VBMI2], [-mavx512vbmi2]) + PY_SIMD_DETECT([AVX512_BITALG], [-mavx512bitalg]) + # + PY_SIMD_DETECT([AVX512_VP2INTERSECT], [-mavx512vp2intersect]) +fi + +dnl Check that -mxsave can be used for cpuinfo.c as the latter +dnl requires to be compiled with this option for xgetbv() support. +AX_CHECK_COMPILE_FLAG([-mxsave], + [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [-mxsave])], + [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [])], + [-Werror]) +AC_SUBST([CORE_CPUINFO_CFLAGS]) + +AC_CACHE_CHECK([_xgetbv(0) is natively supported], [ac_cv_use_xgetbv_func], [ + WITH_SAVE_ENV([ + CFLAGS="$CFLAGS -Werror -mxsave" + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[@%:@include ]], [[_xgetbv(0)]])], + [ac_cv_use_xgetbv_func=yes], + [ac_cv_use_xgetbv_func=no])])]) +if test "$ac_cv_use_xgetbv_func" = "yes" ; then + AC_DEFINE([_Py_CPUINFO_USE_XGETBV_FUNC], [1], [_xgetbv() is preferred]) +fi + +AC_CACHE_CHECK([xgetbv(0) opcode is supported], [ac_cv_use_xgetbv_opcode], [ + WITH_SAVE_ENV([ + CFLAGS="$CFLAGS -Werror" + AC_RUN_IFELSE([AC_LANG_PROGRAM([[@%:@include ]], [[ + int main(void) + { + uint32_t eax = 0, edx = 0; + __asm__ __volatile__( + ".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (0)); + return 0; + } + ]])], + [ac_cv_use_xgetbv_opcode=yes], + [ac_cv_use_xgetbv_opcode=no], + [ac_cv_use_xgetbv_opcode=no])])]) +if test "$ac_cv_use_xgetbv_opcode" = "yes" ; then + AC_DEFINE([_Py_CPUINFO_USE_XGETBV_OPCODE], [1], [XGETBV opcode is preferred]) +fi + ############################################################################### # HACL* compilation and linking configuration (contact: @picnixz) # @@ -8016,14 +8147,19 @@ else use_hacl_universal2_impl=no fi -# The SIMD files use aligned_alloc, which is not available on older versions of -# Android. -# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. +# The HACL* SIMD-128 files use aligned_alloc, which is not available +# on older versions of Android. In addition, since the *mmintrin.h +# headers are x86-family-specific, they cannot be used on WASI. if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } then - dnl This can be extended here to detect e.g. Power8, which HACL* should also support. - AX_CHECK_COMPILE_FLAG([-msse -msse2 -msse3 -msse4.1 -msse4.2],[ + # SIMD-128 + if test "$ac_cv_can_compile_simd_sse" = "yes" \ + -a "$ac_cv_can_compile_simd_sse2" = "yes" \ + -a "$ac_cv_can_compile_simd_sse3" = "yes" \ + -a "$ac_cv_can_compile_simd_sse4_1" = "yes" \ + -a "$ac_cv_can_compile_simd_sse4_2" = "yes" + then [LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2"] AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC128], [1], [ @@ -8040,24 +8176,21 @@ then [LIBHACL_BLAKE2_SIMD128_OBJS="Modules/_hacl/Hacl_Hash_Blake2s_Simd128.o"] AC_MSG_RESULT([standard]) fi - - ], [], [-Werror]) + fi fi AC_SUBST([LIBHACL_SIMD128_FLAGS]) AC_SUBST([LIBHACL_BLAKE2_SIMD128_OBJS]) -# The SIMD files use aligned_alloc, which is not available on older versions of -# Android. -# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. -# -# Although AVX support is not guaranteed on Android -# (https://developer.android.com/ndk/guides/abis#86-64), this is safe because we do a -# runtime CPUID check. +# The HACL* SIMD-256 files use aligned_alloc, which is not available +# on older versions of Android. In addition, since the *mmintrin.h +# headers are x86-family-specific, they cannot be used on WASI. if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } then - AX_CHECK_COMPILE_FLAG([-mavx2],[ + if test "$ac_cv_can_compile_simd_avx2" = "yes" + then [LIBHACL_SIMD256_FLAGS="-mavx2"] + AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC256], [1], [ HACL* library can compile SIMD256 implementations]) @@ -8073,7 +8206,7 @@ then [LIBHACL_BLAKE2_SIMD256_OBJS="Modules/_hacl/Hacl_Hash_Blake2b_Simd256.o"] AC_MSG_RESULT([standard]) fi - ], [], [-Werror]) + fi fi AC_SUBST([LIBHACL_SIMD256_FLAGS]) AC_SUBST([LIBHACL_BLAKE2_SIMD256_OBJS]) diff --git a/pyconfig.h.in b/pyconfig.h.in index d7c496fccc682c..eae7c2d874a3d5 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -2017,6 +2017,99 @@ /* Maximum length in bytes of a thread name */ #undef _PYTHREAD_NAME_MAXLEN +/* Define if '-mavx2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS + +/* Define if '-mavx5124fmaps' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS + +/* Define if '-mavx5124vnniw' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS + +/* Define if '-mavx512bitalg' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS + +/* Define if '-mavx512bw' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS + +/* Define if '-mavx512cd' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS + +/* Define if '-mavx512dq' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS + +/* Define if '-mavx512er' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS + +/* Define if '-mavx512f' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS + +/* Define if '-mavx512ifma' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS + +/* Define if '-mavx512pf' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS + +/* Define if '-mavx512vbmi2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS + +/* Define if '-mavx512vbmi' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS + +/* Define if '-mavx512vl' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS + +/* Define if '-mavx512vnni' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS + +/* Define if '-mavx512vp2intersect' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS + +/* Define if '-mavx512vpopcntdq' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS + +/* Define if '-mavxifma' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS + +/* Define if '-mavx' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS + +/* Define if '-mavxneconvert' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS + +/* Define if '-mavxvnni' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS + +/* Define if '-mavxvnniint16' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS + +/* Define if '-mavxvnniint8' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS + +/* Define if '-msse2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS + +/* Define if '-msse3' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS + +/* Define if '-msse4.1' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS + +/* Define if '-msse4.2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS + +/* Define if '-msse' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS + +/* Define if '-mssse3' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS + +/* _xgetbv() is preferred */ +#undef _Py_CPUINFO_USE_XGETBV_FUNC + +/* XGETBV opcode is preferred */ +#undef _Py_CPUINFO_USE_XGETBV_OPCODE + /* Defined if _Complex C type can be used with libffi. */ #undef _Py_FFI_SUPPORT_C_COMPLEX