8000 [3.14] gh-135755: rename undocumented `HACL_CAN_COMPILE_SIMD{128,256}` macros (GH-135847) by picnixz · Pull Request #136045 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

[3.14] gh-135755: rename undocumented HACL_CAN_COMPILE_SIMD{128,256} macros (GH-135847) #136045

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 38 additions & 38 deletions Modules/blake2module.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,11 @@

// SIMD256 can't be compiled on macOS ARM64, and performance of SIMD128 isn't
// great; but when compiling a universal2 binary, autoconf will set
// HACL_CAN_COMPILE_SIMD128 and HACL_CAN_COMPILE_SIMD256 because they *can* be
// compiled on x86_64. If we're on macOS ARM64, disable these preprocessor
// symbols.
// _Py_HACL_CAN_COMPILE_VEC{128,256} because they *can* be compiled on x86_64.
// If we're on macOS ARM64, we however disable these preprocessor symbols.
#if defined(__APPLE__) && defined(__arm64__)
# undef HACL_CAN_COMPILE_SIMD128
# undef HACL_CAN_COMPILE_SIMD256
# undef _Py_HACL_CAN_COMPILE_VEC128
# undef _Py_HACL_CAN_COMPILE_VEC256
#endif

// ECX
Expand Down Expand Up @@ -114,31 +113,32 @@ void detect_cpu_features(cpu_flags *flags) {
}
}

#ifdef HACL_CAN_COMPILE_SIMD128
#if _Py_HACL_CAN_COMPILE_VEC128
static inline bool has_simd128(cpu_flags *flags) {
// For now this is Intel-only, could conceivably be #ifdef'd to something
// For now this is Intel-only, could conceivably be if'd to something
// else.
return flags->sse && flags->sse2 && flags->sse3 && flags->sse41 && flags->sse42 && flags->cmov;
}
#endif

#ifdef HACL_CAN_COMPILE_SIMD256
#if _Py_HACL_CAN_COMPILE_VEC256
static inline bool has_simd256(cpu_flags *flags) {
return flags->avx && flags->avx2;
}
#endif

// Small mismatch between the variable names Python defines as part of configure
// at the ones HACL* expects to be set in order to enable those headers.
#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128
#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256
// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable
// the corresponding SIMD instructions so we need to "forward" the values
// we just deduced above.
#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128
#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256

#include "_hacl/Hacl_Hash_Blake2b.h"
#include "_hacl/Hacl_Hash_Blake2s.h"
#if HACL_CAN_COMPILE_SIMD256
#if _Py_HACL_CAN_COMPILE_VEC256
#include "_hacl/Hacl_Hash_Blake2b_Simd256.h"
#endif
#if HACL_CAN_COMPILE_SIMD128
#if _Py_HACL_CAN_COMPILE_VEC128
#include "_hacl/Hacl_Hash_Blake2s_Simd128.h"
#endif

Expand All @@ -165,7 +165,7 @@ blake2_get_state(PyObject *module)
return (Blake2State *)state;
}

#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256)
#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256)
static inline Blake2State*
blake2_get_state_from_type(PyTypeObject *module)
{
Expand Down Expand Up @@ -329,18 +329,18 @@ static inline bool is_blake2s(blake2_impl impl) {
}

static inline blake2_impl type_to_impl(PyTypeObject *type) {
#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256)
#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256)
Blake2State* st = blake2_get_state_from_type(type);
#endif
if (!strcmp(type->tp_name, blake2b_type_spec.name)) {
#ifdef HACL_CAN_COMPILE_SIMD256
#if _Py_HACL_CAN_COMPILE_VEC256
if (has_simd256(&st->flags))
return Blake2b_256;
else
#endif
return Blake2b;
} else if (!strcmp(type->tp_name, blake2s_type_spec.name)) {
#ifdef HACL_CAN_COMPILE_SIMD128
#if _Py_HACL_CAN_COMPILE_VEC128
if (has_simd128(&st->flags))
return Blake2s_128;
else
Expand All @@ -356,10 +356,10 @@ typedef struct {
union {
Hacl_Hash_Blake2s_state_t *blake2s_state;
Hacl_Hash_Blake2b_state_t *blake2b_state;
#ifdef HACL_CAN_COMPILE_SIMD128
#if _Py_HACL_CAN_COMPILE_VEC128
Hacl_Hash_Blake2s_Simd128_state_t *blake2s_128_state;
#endif
#ifdef HACL_CAN_COMPILE_SIMD256
#if _Py_HACL_CAN_COMPILE_VEC256
Hacl_Hash_Blake2b_Simd256_state_t *blake2b_256_state;
#endif
};
Expand Down Expand Up @@ -425,14 +425,14 @@ static void
update(Blake2Object *self, uint8_t *buf, Py_ssize_t len)
{
switch (self->impl) {
// These need to be ifdef'd out otherwise it's an unresolved symbol at
// link-time.
#ifdef HACL_CAN_COMPILE_SIMD256
// blake2b_256_state and blake2s_128_state must be if'd since
// otherwise this results in an unresolved symbol at link-time.
#if _Py_HACL_CAN_COMPILE_VEC256
case Blake2b_256:
HACL_UPDATE(Hacl_Hash_Blake2b_Simd256_update,self->blake2b_256_state, buf, len);
return;
#endif
#ifdef HACL_CAN_COMPILE_SIMD128
#if _Py_HACL_CAN_COMPILE_VEC128
case Blake2s_128:
HACL_UPDATE(Hacl_Hash_Blake2s_Simd128_update,self->blake2s_128_state, buf, len);
return;
Expand Down Expand Up @@ -468,12 +468,12 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size,
// Ensure that the states are NULL-initialized in case of an error.
// See: py_blake2_clear() for more details.
switch (self->impl) {
#if HACL_CAN_COMPILE_SIMD256
#if _Py_HACL_CAN_COMPILE_VEC256
case Blake2b_256:
self->blake2b_256_state = NULL;
break;
#endif
#if HACL_CAN_COMPILE_SIMD128
#if _Py_HACL_CAN_COMPILE_VEC128
case Blake2s_128:
self->blake2s_128_state = NULL;
break;
Expand Down Expand Up @@ -591,7 +591,7 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size,
};

switch (self->impl) {
#if HACL_CAN_COMPILE_SIMD256
#if _Py_HACL_CAN_COMPILE_VEC256
case Blake2b_256: {
self->blake2b_256_state = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(&params, last_node, key->buf);
if (self->blake2b_256_state == NULL) {
Expand All @@ -601,7 +601,7 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size,
break;
}
#endif
#if HACL_CAN_COMPILE_SIMD128
#if _Py_HACL_CAN_COMPILE_VEC128
case Blake2s_128: {
self->blake2s_128_state = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(&params, last_node, key->buf);
if (self->blake2s_128_state == NULL) {
Expand Down Expand Up @@ -733,7 +733,7 @@ blake2_blake2b_copy_locked(Blake2Object *self, Blake2Object *cpy)
{
assert(cpy != NULL);
switch (self->impl) {
#if HACL_CAN_COMPILE_SIMD256
#if _Py_HACL_CAN_COMPILE_VEC256
case Blake2b_256: {
cpy->blake2b_256_state = Hacl_Hash_Blake2b_Simd256_copy(self->blake2b_256_state);
if (cpy->blake2b_256_state == NULL) {
Expand All @@ -742,7 +742,7 @@ blake2_blake2b_copy_locked(Blake2Object *self, Blake2Object *cpy)
break;
}
#endif
#if HACL_CAN_COMPILE_SIMD128
#if _Py_HACL_CAN_COMPILE_VEC128
case Blake2s_128: {
cpy->blake2s_128_state = Hacl_Hash_Blake2s_Simd128_copy(self->blake2s_128_state);
if (cpy->blake2s_128_state == NULL) {
Expand Down Expand Up @@ -853,12 +853,12 @@ _blake2_blake2b_digest_impl(Blake2Object *self)
ENTER_HASHLIB(self);
uint8_t digest_length = 0;
switch (self->impl) {
#if HACL_CAN_COMPILE_SIMD256
#if _Py_HACL_CAN_COMPILE_VEC256
case Blake2b_256:
digest_length = Hacl_Hash_Blake2b_Simd256_digest(self->blake2b_256_state, digest);
break;
#endif
#if HACL_CAN_COMPILE_SIMD128
#if _Py_HACL_CAN_COMPILE_VEC128
case Blake2s_128:
digest_length = Hacl_Hash_Blake2s_Simd128_digest(self->blake2s_128_state, digest);
break;
Expand Down Expand Up @@ -891,12 +891,12 @@ _blake2_blake2b_hexdigest_impl(Blake2Object *self)
ENTER_HASHLIB(self);
uint8_t digest_length = 0;
switch (self->impl) {
#if HACL_CAN_COMPILE_SIMD256
#if _Py_HACL_CAN_COMPILE_VEC256
case Blake2b_256:
digest_length = Hacl_Hash_Blake2b_Simd256_digest(self->blake2b_256_state, digest);
break;
#endif
#if HACL_CAN_COMPILE_SIMD128
#if _Py_HACL_CAN_COMPILE_VEC128
case Blake2s_128:
digest_length = Hacl_Hash_Blake2s_Simd128_digest(self->blake2s_128_state, digest);
break;
Expand Down Expand Up @@ -947,11 +947,11 @@ py_blake2b_get_digest_size(PyObject *op, void *Py_UNUSED(closure))
{
Blake2Object *self = _Blake2Object_CAST(op);
switch (self->impl) {
#if HACL_CAN_COMPILE_SIMD256
#if _Py_HACL_CAN_COMPILE_VEC256
case Blake2b_256:
return PyLong_FromLong(Hacl_Hash_Blake2b_Simd256_info(self->blake2b_256_state).digest_length);
#endif
#if HACL_CAN_COMPILE_SIMD128
#if _Py_HACL_CAN_COMPILE_VEC128
case Blake2s_128:
return PyLong_FromLong(Hacl_Hash_Blake2s_Simd128_info(self->blake2s_128_state).digest_length);
#endif
Expand Down Expand Up @@ -982,15 +982,15 @@ py_blake2_clear(PyObject *op)
// it. If an error occurs in the constructor, we should only free
// states that were allocated (i.e. that are not NULL).
switch (self->impl) {
#if HACL_CAN_COMPILE_SIMD256
#if _Py_HACL_CAN_COMPILE_VEC256
case Blake2b_256:
if (self->blake2b_256_state != NULL) {
Hacl_Hash_Blake2b_Simd256_free(self->blake2b_256_state);
self->blake2b_256_state = NULL;
}
break;
#endif
#if HACL_CAN_COMPILE_SIMD128
#if _Py_HACL_CAN_COMPILE_VEC128
case Blake2s_128:
if (self->blake2s_128_state != NULL) {
Hacl_Hash_Blake2s_Simd128_free(self->blake2s_128_state);
Expand Down
21 changes: 11 additions & 10 deletions Modules/hmacmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,15 @@
#endif

#if defined(__APPLE__) && defined(__arm64__)
# undef HACL_CAN_COMPILE_SIMD128
# undef HACL_CAN_COMPILE_SIMD256
# undef _Py_HACL_CAN_COMPILE_VEC128
# undef _Py_HACL_CAN_COMPILE_VEC256
#endif

// Small mismatch between the variable names Python defines as part of configure
// at the ones HACL* expects to be set in order to enable those headers.
#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128
#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256
// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable
// the corresponding SIMD instructions so we need to "forward" the values
// we just deduced above.
#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128
#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256

#include "_hacl/Hacl_HMAC.h"
#include "_hacl/Hacl_Streaming_HMAC.h" // Hacl_Agile_Hash_* identifiers
Expand Down Expand Up @@ -464,15 +465,15 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind)
{
switch (kind) {
case Py_hmac_kind_hmac_blake2s_32: {
#if HACL_CAN_COMPILE_SIMD128
#if _Py_HACL_CAN_COMPILE_VEC128
if (state->can_run_simd128) {
return Py_hmac_kind_hmac_vectorized_blake2s_32;
}
#endif
return kind;
}
case Py_hmac_kind_hmac_blake2b_32: {
#if HACL_CAN_COMPILE_SIMD256
#if _Py_HACL_CAN_COMPILE_VEC256
if (state->can_run_simd256) {
return Py_hmac_kind_hmac_vectorized_blake2b_32;
}
Expand Down Expand Up @@ -1761,7 +1762,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state)
#undef ECX_SSE3
#undef EBX_AVX2

#if HACL_CAN_COMPILE_SIMD128
#if _Py_HACL_CAN_COMPILE_VEC128
// TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection
state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov;
#else
Expand All @@ -1771,7 +1772,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state)
state->can_run_simd128 = false;
#endif

#if HACL_CAN_COMPILE_SIMD256
#if _Py_HACL_CAN_COMPILE_VEC256
// TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection
state->can_run_simd256 = state->can_run_simd128 && avx && avx2;
#else
Expand Down
8 changes: 6 additions & 2 deletions PCbuild/pythoncore.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -419,8 +419,12 @@
<ClCompile Include="..\Modules\_abc.c" />
<ClCompile Include="..\Modules\_bisectmodule.c" />
<ClCompile Include="..\Modules\blake2module.c">
<PreprocessorDefinitions Condition="'$(Platform)' == 'x64'">HACL_CAN_COMPILE_SIMD128;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(Platform)' == 'x64'">HACL_CAN_COMPILE_SIMD256;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(Platform)' == 'x64'">
_Py_HACL_CAN_COMPILE_VEC128;%(PreprocessorDefinitions)
</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(Platform)' == 'x64'">
_Py_HACL_CAN_COMPILE_VEC256;%(PreprocessorDefinitions)
</PreprocessorDefinitions>
</ClCompile>
<ClCompile Include="..\Modules\_codecsmodule.c" />
<ClCompile Include="..\Modules\_collectionsmodule.c" />
Expand Down
4 changes: 2 additions & 2 deletions configure

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -8017,7 +8017,8 @@ then
AX_CHECK_COMPILE_FLAG([-msse -msse2 -msse3 -msse4.1 -msse4.2],[
[LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2"]

AC_DEFINE([HACL_CAN_COMPILE_SIMD128], [1], [HACL* library can compile SIMD128 implementations])
AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC128], [1], [
HACL* library can compile SIMD128 implementations])

# macOS universal2 builds *support* the -msse etc flags because they're
# available on x86_64. However, performance of the HACL SIMD128 implementation
Expand Down Expand Up @@ -8048,7 +8049,8 @@ if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \
then
AX_CHECK_COMPILE_FLAG([-mavx2],[
[LIBHACL_SIMD256_FLAGS="-mavx2"]
AC_DEFINE([HACL_CAN_COMPILE_SIMD256], [1], [HACL* library can compile SIMD256 implementations])
AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC256], [1], [
HACL* library can compile SIMD256 implementations])

# macOS universal2 builds *support* the -mavx2 compiler flag because it's
# available on x86_64; but the HACL SIMD256 build then fails because the
Expand Down
12 changes: 6 additions & 6 deletions pyconfig.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,6 @@
/* Define if getpgrp() must be called as getpgrp(0). */
#undef GETPGRP_HAVE_ARG

/* HACL* library can compile SIMD128 implementations */
#undef HACL_CAN_COMPILE_SIMD128

/* HACL* library can compile SIMD256 implementations */
#undef HACL_CAN_COMPILE_SIMD256

/* Define if you have the 'accept' function. */
#undef HAVE_ACCEPT

Expand Down Expand Up @@ -2026,6 +2020,12 @@
/* Defined if _Complex C type can be used with libffi. */
#undef _Py_FFI_SUPPORT_C_COMPLEX

/* HACL* library can compile SIMD128 implementations */
#undef _Py_HACL_CAN_COMPILE_VEC128

/* HACL* library can compile SIMD256 implementations */
#undef _Py_HACL_CAN_COMPILE_VEC256

/* Define to force use of thread-safe errno, h_errno, and other functions */
#undef _REENTRANT

Expand Down
Loading
0