diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 07aa89f573f05f..ae37e2d3383f9b 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -43,12 +43,11 @@ // SIMD256 can't be compiled on macOS ARM64, and performance of SIMD128 isn't // great; but when compiling a universal2 binary, autoconf will set -// HACL_CAN_COMPILE_SIMD128 and HACL_CAN_COMPILE_SIMD256 because they *can* be -// compiled on x86_64. If we're on macOS ARM64, disable these preprocessor -// symbols. +// _Py_HACL_CAN_COMPILE_VEC{128,256} because they *can* be compiled on x86_64. +// If we're on macOS ARM64, we however disable these preprocessor symbols. #if defined(__APPLE__) && defined(__arm64__) -# undef HACL_CAN_COMPILE_SIMD128 -# undef HACL_CAN_COMPILE_SIMD256 +# undef _Py_HACL_CAN_COMPILE_VEC128 +# undef _Py_HACL_CAN_COMPILE_VEC256 #endif // ECX @@ -114,31 +113,32 @@ void detect_cpu_features(cpu_flags *flags) { } } -#ifdef HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 static inline bool has_simd128(cpu_flags *flags) { - // For now this is Intel-only, could conceivably be #ifdef'd to something + // For now this is Intel-only, could conceivably be if'd to something // else. return flags->sse && flags->sse2 && flags->sse3 && flags->sse41 && flags->sse42 && flags->cmov; } #endif -#ifdef HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 static inline bool has_simd256(cpu_flags *flags) { return flags->avx && flags->avx2; } #endif -// Small mismatch between the variable names Python defines as part of configure -// at the ones HACL* expects to be set in order to enable those headers. -#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 -#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256 +// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable +// the corresponding SIMD instructions so we need to "forward" the values +// we just deduced above. +#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128 +#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_Hash_Blake2b.h" #include "_hacl/Hacl_Hash_Blake2s.h" -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_Hash_Blake2b_Simd256.h" #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 #include "_hacl/Hacl_Hash_Blake2s_Simd128.h" #endif @@ -165,7 +165,7 @@ blake2_get_state(PyObject *module) return (Blake2State *)state; } -#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) +#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256) static inline Blake2State* blake2_get_state_from_type(PyTypeObject *module) { @@ -329,18 +329,18 @@ static inline bool is_blake2s(blake2_impl impl) { } static inline blake2_impl type_to_impl(PyTypeObject *type) { -#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) +#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256) Blake2State* st = blake2_get_state_from_type(type); #endif if (!strcmp(type->tp_name, blake2b_type_spec.name)) { -#ifdef HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 if (has_simd256(&st->flags)) return Blake2b_256; else #endif return Blake2b; } else if (!strcmp(type->tp_name, blake2s_type_spec.name)) { -#ifdef HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 if (has_simd128(&st->flags)) return Blake2s_128; else @@ -356,10 +356,10 @@ typedef struct { union { Hacl_Hash_Blake2s_state_t *blake2s_state; Hacl_Hash_Blake2b_state_t *blake2b_state; -#ifdef HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 Hacl_Hash_Blake2s_Simd128_state_t *blake2s_128_state; #endif -#ifdef HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 Hacl_Hash_Blake2b_Simd256_state_t *blake2b_256_state; #endif }; @@ -425,14 +425,14 @@ static void update(Blake2Object *self, uint8_t *buf, Py_ssize_t len) { switch (self->impl) { - // These need to be ifdef'd out otherwise it's an unresolved symbol at - // link-time. -#ifdef HACL_CAN_COMPILE_SIMD256 + // blake2b_256_state and blake2s_128_state must be if'd since + // otherwise this results in an unresolved symbol at link-time. +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: HACL_UPDATE(Hacl_Hash_Blake2b_Simd256_update,self->blake2b_256_state, buf, len); return; #endif -#ifdef HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: HACL_UPDATE(Hacl_Hash_Blake2s_Simd128_update,self->blake2s_128_state, buf, len); return; @@ -468,12 +468,12 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, // Ensure that the states are NULL-initialized in case of an error. // See: py_blake2_clear() for more details. switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: self->blake2b_256_state = NULL; break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: self->blake2s_128_state = NULL; break; @@ -591,7 +591,7 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, }; switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: { self->blake2b_256_state = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(¶ms, last_node, key->buf); if (self->blake2b_256_state == NULL) { @@ -601,7 +601,7 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, break; } #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: { self->blake2s_128_state = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(¶ms, last_node, key->buf); if (self->blake2s_128_state == NULL) { @@ -733,7 +733,7 @@ blake2_blake2b_copy_locked(Blake2Object *self, Blake2Object *cpy) { assert(cpy != NULL); switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: { cpy->blake2b_256_state = Hacl_Hash_Blake2b_Simd256_copy(self->blake2b_256_state); if (cpy->blake2b_256_state == NULL) { @@ -742,7 +742,7 @@ blake2_blake2b_copy_locked(Blake2Object *self, Blake2Object *cpy) break; } #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: { cpy->blake2s_128_state = Hacl_Hash_Blake2s_Simd128_copy(self->blake2s_128_state); if (cpy->blake2s_128_state == NULL) { @@ -853,12 +853,12 @@ _blake2_blake2b_digest_impl(Blake2Object *self) ENTER_HASHLIB(self); uint8_t digest_length = 0; switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: digest_length = Hacl_Hash_Blake2b_Simd256_digest(self->blake2b_256_state, digest); break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: digest_length = Hacl_Hash_Blake2s_Simd128_digest(self->blake2s_128_state, digest); break; @@ -891,12 +891,12 @@ _blake2_blake2b_hexdigest_impl(Blake2Object *self) ENTER_HASHLIB(self); uint8_t digest_length = 0; switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: digest_length = Hacl_Hash_Blake2b_Simd256_digest(self->blake2b_256_state, digest); break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: digest_length = Hacl_Hash_Blake2s_Simd128_digest(self->blake2s_128_state, digest); break; @@ -947,11 +947,11 @@ py_blake2b_get_digest_size(PyObject *op, void *Py_UNUSED(closure)) { Blake2Object *self = _Blake2Object_CAST(op); switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: return PyLong_FromLong(Hacl_Hash_Blake2b_Simd256_info(self->blake2b_256_state).digest_length); #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: return PyLong_FromLong(Hacl_Hash_Blake2s_Simd128_info(self->blake2s_128_state).digest_length); #endif @@ -982,7 +982,7 @@ py_blake2_clear(PyObject *op) // it. If an error occurs in the constructor, we should only free // states that were allocated (i.e. that are not NULL). switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: if (self->blake2b_256_state != NULL) { Hacl_Hash_Blake2b_Simd256_free(self->blake2b_256_state); @@ -990,7 +990,7 @@ py_blake2_clear(PyObject *op) } break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: if (self->blake2s_128_state != NULL) { Hacl_Hash_Blake2s_Simd128_free(self->blake2s_128_state); diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 059e15ba0d35fd..8cd470f4f80b3a 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -31,14 +31,15 @@ #endif #if defined(__APPLE__) && defined(__arm64__) -# undef HACL_CAN_COMPILE_SIMD128 -# undef HACL_CAN_COMPILE_SIMD256 +# undef _Py_HACL_CAN_COMPILE_VEC128 +# undef _Py_HACL_CAN_COMPILE_VEC256 #endif -// Small mismatch between the variable names Python defines as part of configure -// at the ones HACL* expects to be set in order to enable those headers. -#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 -#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256 +// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable +// the corresponding SIMD instructions so we need to "forward" the values +// we just deduced above. +#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128 +#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_HMAC.h" #include "_hacl/Hacl_Streaming_HMAC.h" // Hacl_Agile_Hash_* identifiers @@ -464,7 +465,7 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) { switch (kind) { case Py_hmac_kind_hmac_blake2s_32: { -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 if (state->can_run_simd128) { return Py_hmac_kind_hmac_vectorized_blake2s_32; } @@ -472,7 +473,7 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) return kind; } case Py_hmac_kind_hmac_blake2b_32: { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 if (state->can_run_simd256) { return Py_hmac_kind_hmac_vectorized_blake2b_32; } @@ -1761,7 +1762,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state) #undef ECX_SSE3 #undef EBX_AVX2 -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; #else @@ -1771,7 +1772,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state) state->can_run_simd128 = false; #endif -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd256 = state->can_run_simd128 && avx && avx2; #else diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 32a8f2dbad3d5e..b911c9385634d7 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -419,8 +419,12 @@ - HACL_CAN_COMPILE_SIMD128;%(PreprocessorDefinitions) - HACL_CAN_COMPILE_SIMD256;%(PreprocessorDefinitions) + + _Py_HACL_CAN_COMPILE_VEC128;%(PreprocessorDefinitions) + + + _Py_HACL_CAN_COMPILE_VEC256;%(PreprocessorDefinitions) + diff --git a/configure b/configure index 47b6c0ebc88a01..cc2636348ad4cc 100755 --- a/configure +++ b/configure @@ -32625,7 +32625,7 @@ then : LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2" -printf "%s\n" "#define HACL_CAN_COMPILE_SIMD128 1" >>confdefs.h +printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC128 1" >>confdefs.h # macOS universal2 builds *support* the -msse etc flags because they're @@ -32701,7 +32701,7 @@ then : LIBHACL_SIMD256_FLAGS="-mavx2" -printf "%s\n" "#define HACL_CAN_COMPILE_SIMD256 1" >>confdefs.h +printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC256 1" >>confdefs.h # macOS universal2 builds *support* the -mavx2 compiler flag because it's diff --git a/configure.ac b/configure.ac index a4a747180bfdd1..912f77b9ae2cc5 100644 --- a/configure.ac +++ b/configure.ac @@ -8017,7 +8017,8 @@ then AX_CHECK_COMPILE_FLAG([-msse -msse2 -msse3 -msse4.1 -msse4.2],[ [LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2"] - AC_DEFINE([HACL_CAN_COMPILE_SIMD128], [1], [HACL* library can compile SIMD128 implementations]) + AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC128], [1], [ + HACL* library can compile SIMD128 implementations]) # macOS universal2 builds *support* the -msse etc flags because they're # available on x86_64. However, performance of the HACL SIMD128 implementation @@ -8048,7 +8049,8 @@ if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ then AX_CHECK_COMPILE_FLAG([-mavx2],[ [LIBHACL_SIMD256_FLAGS="-mavx2"] - AC_DEFINE([HACL_CAN_COMPILE_SIMD256], [1], [HACL* library can compile SIMD256 implementations]) + AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC256], [1], [ + HACL* library can compile SIMD256 implementations]) # macOS universal2 builds *support* the -mavx2 compiler flag because it's # available on x86_64; but the HACL SIMD256 build then fails because the diff --git a/pyconfig.h.in b/pyconfig.h.in index d4f1da7fb10776..d7c496fccc682c 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -50,12 +50,6 @@ /* Define if getpgrp() must be called as getpgrp(0). */ #undef GETPGRP_HAVE_ARG -/* HACL* library can compile SIMD128 implementations */ -#undef HACL_CAN_COMPILE_SIMD128 - -/* HACL* library can compile SIMD256 implementations */ -#undef HACL_CAN_COMPILE_SIMD256 - /* Define if you have the 'accept' function. */ #undef HAVE_ACCEPT @@ -2026,6 +2020,12 @@ /* Defined if _Complex C type can be used with libffi. */ #undef _Py_FFI_SUPPORT_C_COMPLEX +/* HACL* library can compile SIMD128 implementations */ +#undef _Py_HACL_CAN_COMPILE_VEC128 + +/* HACL* library can compile SIMD256 implementations */ +#undef _Py_HACL_CAN_COMPILE_VEC256 + /* Define to force use of thread-safe errno, h_errno, and other functions */ #undef _REENTRANT