8000 CI: Replace QEMU armhf with native (32-bit compatibility mode) by seiko2plus · Pull Request #28653 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

CI: Replace QEMU armhf with native (32-bit compatibility mode) #28653

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 6, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
BUG, SIMD: Fix ARMv8 feature detection in 32-bit mode
  Fix detection of `FPHP`, `ASIMDHP`, `ASIMDDP`, `ASIMDFHM` features
  on ARMv8 32-bit mode (aarch32). Fix memory leaks in CPU feature
  detection on Android by adding missing free() calls.
  • Loading branch information
seiko2plus committed Apr 6, 2025
commit 0668e83d4f9a7cdadb6d2e3a1014246af68d6564
41 changes: 20 additions & 21 deletions numpy/_core/src/common/npy_cpu_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -772,34 +772,33 @@ npy__cpu_init_features_linux(void)
#endif
}
#ifdef __arm__
npy__cpu_have[NPY_CPU_FEATURE_NEON] = (hwcap & NPY__HWCAP_NEON) != 0;
if (npy__cpu_have[NPY_CPU_FEATURE_NEON]) {
npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16] = (hwcap & NPY__HWCAP_HALF) != 0;
npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] = (hwcap & NPY__HWCAP_VFPv4) != 0;
}
// Detect Arm8 (aarch32 state)
if ((hwcap2 & NPY__HWCAP2_AES) || (hwcap2 & NPY__HWCAP2_SHA1) ||
(hwcap2 & NPY__HWCAP2_SHA2) || (hwcap2 & NPY__HWCAP2_PMULL) ||
(hwcap2 & NPY__HWCAP2_CRC32))
{
hwcap = hwcap2;
npy__cpu_have[NPY_CPU_FEATURE_ASIMD] = npy__cpu_have[NPY_CPU_FEATURE_NEON];
}
#else
if (1)
{
if (!(hwcap & (NPY__HWCAP_FP | NPY__HWCAP_ASIMD))) {
// Is this could happen? maybe disabled by kernel
// BTW this will break the baseline of AARCH64
return 1;
}
#endif
npy__cpu_have[NPY_CPU_FEATURE_FPHP] = (hwcap & NPY__HWCAP_FPHP) != 0;
npy__cpu_have[NPY_CPU_FEATURE_ASIMDHP] = (hwcap & NPY__HWCAP_ASIMDHP) != 0;
npy__cpu_have[NPY_CPU_FEATURE_ASIMDDP] = (hwcap & NPY__HWCAP_ASIMDDP) != 0;
npy__cpu_have[NPY_CPU_FEATURE_ASIMDFHM] = (hwcap & NPY__HWCAP_ASIMDFHM) != 0;
npy__cpu_have[NPY_CPU_FEATURE_SVE] = (hwcap & NPY__HWCAP_SVE) != 0;
npy__cpu_init_features_arm8();
} else {
npy__cpu_have[NPY_CPU_FEATURE_NEON] = (hwcap & NPY__HWCAP_NEON) != 0;
if (npy__cpu_have[NPY_CPU_FEATURE_NEON]) {
npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16] = (hwcap & NPY__HWCAP_HALF) != 0;
npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] = (hwcap & NPY__HWCAP_VFPv4) != 0;
}
if (!(hwcap & (NPY__HWCAP_FP | NPY__HWCAP_ASIMD))) {
// Is this could happen? maybe disabled by kernel
// BTW this will break the baseline of AARCH64
return 1;
}
npy__cpu_init_features_arm8();
#endif
npy__cpu_have[NPY_CPU_FEATURE_FPHP] = (hwcap & NPY__HWCAP_FPHP) != 0;
npy__cpu_have[NPY_CPU_FEATURE_ASIMDHP] = (hwcap & NPY__HWCAP_ASIMDHP) != 0;
npy__cpu_have[NPY_CPU_FEATURE_ASIMDDP] = (hwcap & NPY__HWCAP_ASIMDDP) != 0;
npy__cpu_have[NPY_CPU_FEATURE_ASIMDFHM] = (hwcap & NPY__HWCAP_ASIMDFHM) != 0;
#ifndef __arm__
npy__cpu_have[NPY_CPU_FEATURE_SVE] = (hwcap & NPY__HWCAP_SVE) != 0;
#endif
return 1;
}
#endif
Expand Down
113 changes: 69 additions & 44 deletions numpy/_core/src/common/npy_cpuinfo_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,25 +36,43 @@
#define NPY__HWCAP 16
#define NPY__HWCAP2 26

// arch/arm/include/uapi/asm/hwcap.h
#define NPY__HWCAP_HALF (1 << 1)
#define NPY__HWCAP_NEON (1 << 12)
#define NPY__HWCAP_VFPv3 (1 << 13)
#define NPY__HWCAP_VFPv4 (1 << 16)
#define NPY__HWCAP2_AES (1 << 0)
#define NPY__HWCAP2_PMULL (1 << 1)
#define NPY__HWCAP2_SHA1 (1 << 2)
#define NPY__HWCAP2_SHA2 (1 << 3)
#define NPY__HWCAP2_CRC32 (1 << 4)
// arch/arm64/include/uapi/asm/hwcap.h
#define NPY__HWCAP_FP (1 << 0)
#define NPY__HWCAP_ASIMD (1 << 1)
#define NPY__HWCAP_FPHP (1 << 9)
#define NPY__HWCAP_ASIMDHP (1 << 10)
#define NPY__HWCAP_ASIMDDP (1 << 20)
#define NPY__HWCAP_SVE (1 << 22)
#define NPY__HWCAP_ASIMDFHM (1 << 23)
/*
#ifdef __arm__
// arch/arm/include/uapi/asm/hwcap.h
#define NPY__HWCAP_HALF (1 << 1)
#define NPY__HWCAP_NEON (1 << 12)
#define NPY__HWCAP_VFPv3 (1 << 13)
#define NPY__HWCAP_VFPv4 (1 << 16)

#define NPY__HWCAP_FPHP (1 << 22)
#define NPY__HWCAP_ASIMDHP (1 << 23)
#define NPY__HWCAP_ASIMDDP (1 << 24)
#define NPY__HWCAP_ASIMDFHM (1 << 25)

#define NPY__HWCAP2_AES (1 << 0)
#define NPY__HWCAP2_PMULL (1 << 1)
#define NPY__HWCAP2_SHA1 (1 << 2)
#define NPY__HWCAP2_SHA2 (1 << 3)
#define NPY__HWCAP2_CRC32 (1 << 4)
#else
// arch/arm64/include/uapi/asm/hwcap.h
#define NPY__HWCAP_FP (1 << 0)
#define NPY__HWCAP_ASIMD (1 << 1)

#define NPY__HWCAP_FPHP (1 << 9)
#define NPY__HWCAP_ASIMDHP (1 << 10)
#define NPY__HWCAP_ASIMDDP (1 << 20)
#define NPY__HWCAP_ASIMDFHM (1 << 23)

#define NPY__HWCAP_AES (1 << 3)
#define NPY__HWCAP_PMULL (1 << 4)
#define NPY__HWCAP_SHA1 (1 << 5)
#define NPY__HWCAP_SHA2 (1 << 6)
#define NPY__HWCAP_CRC32 (1 << 7)
#define NPY__HWCAP_SVE (1 << 22)
#endif


/*
* Get the size of a file by reading it until the end. This is needed
* because files under /proc do not always return a valid size when
* using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed.
Expand Down Expand Up @@ -87,7 +105,7 @@ get_file_size(const char* pathname)
return result;
}

/*
/*
* Read the content of /proc/cpuinfo into a user-provided buffer.
* Return the length of the data, or -1 on error. Does *not*
* zero-terminate the content. Will not read more
Expand Down Expand Up @@ -123,7 +141,7 @@ read_file(const char* pathname, char* buffer, size_t buffsize)
return count;
}

/*
/*
* Extract the content of a the first occurrence of a given field in
* the content of /proc/cpuinfo and return it as a heap-allocated
* string that must be freed by the caller.
Expand Down Expand Up @@ -182,7 +200,7 @@ extract_cpuinfo_field(const char* buffer, int buflen, const char* field)
return result;
}

/*
/*
* Checks that a space-separated list of items contains one given 'item'.
* Returns 1 if found, 0 otherwise.
*/
Expand Down Expand Up @@ -220,44 +238,51 @@ has_list_item(const char* list, const char* item)
return 0;
}

static void setHwcap(char* cpuFeatures, unsigned long* hwcap) {
*hwcap |= has_list_item(cpuFeatures, "neon") ? NPY__HWCAP_NEON : 0;
*hwcap |= has_list_item(cpuFeatures, "half") ? NPY__HWCAP_HALF : 0;
*hwcap |= has_list_item(cpuFeatures, "vfpv3") ? NPY__HWCAP_VFPv3 : 0;
*hwcap |= has_list_item(cpuFeatures, "vfpv4") ? NPY__HWCAP_VFPv4 : 0;

*hwcap |= has_list_item(cpuFeatures, "asimd") ? NPY__HWCAP_ASIMD : 0;
*hwcap |= has_list_item(cpuFeatures, "fp") ? NPY__HWCAP_FP : 0;
*hwcap |= has_list_item(cpuFeatures, "fphp") ? NPY__HWCAP_FPHP : 0;
*hwcap |= has_list_item(cpuFeatures, "asimdhp") ? NPY__HWCAP_ASIMDHP : 0;
*hwcap |= has_list_item(cpuFeatures, "asimddp") ? NPY__HWCAP_ASIMDDP : 0;
*hwcap |= has_list_item(cpuFeatures, "asimdfhm") ? NPY__HWCAP_ASIMDFHM : 0;
}

static int
get_feature_from_proc_cpuinfo(unsigned long *hwcap, unsigned long *hwcap2) {
char* cpuinfo = NULL;
int cpuinfo_len;
cpuinfo_len = get_file_size("/proc/cpuinfo");
*hwcap = 0;
*hwcap2 = 0;

int cpuinfo_len = get_file_size("/proc/cpuinfo");
if (cpuinfo_len < 0) {
return 0;
}
cpuinfo = malloc(cpuinfo_len);
char *cpuinfo = malloc(cpuinfo_len);
if (cpuinfo == NULL) {
return 0;
}

cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len);
char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
if(cpuFeatures == NULL) {
char *cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
if (cpuFeatures == NULL) {
free(cpuinfo);
return 0;
}
setHwcap(cpuFeatures, hwcap);
*hwcap2 |= *hwcap;
*hwcap |= has_list_item(cpuFeatures, "fphp") ? NPY__HWCAP_FPHP : 0;
*hwcap |= has_list_item(cpuFeatures, "asimdhp") ? NPY__HWCAP_ASIMDHP : 0;
*hwcap |= has_list_item(cpuFeatures, "asimddp") ? NPY__HWCAP_ASIMDDP : 0;
*hwcap |= has_list_item(cpuFeatures, "asimdfhm") ? NPY__HWCAP_ASIMDFHM : 0;
#ifdef __arm__
*hwcap |= has_list_item(cpuFeatures, "neon") ? NPY__HWCAP_NEON : 0;
*hwcap |= has_list_item(cpuFeatures, "half") ? NPY__HWCAP_HALF : 0;
*hwcap |= has_list_item(cpuFeatures, "vfpv3") ? NPY__HWCAP_VFPv3 : 0;
*hwcap |= has_list_item(cpuFeatures, "vfpv4") ? NPY__HWCAP_VFPv4 : 0;
*hwcap2 |= has_list_item(cpuFeatures, "aes") ? NPY__HWCAP2_AES : 0;
*hwcap2 |= has_list_item(cpuFeatures, "pmull") ? NPY__HWCAP2_PMULL : 0;
*hwcap2 |= has_list_item(cpuFeatures, "sha1") ? NPY__HWCAP2_SHA1 : 0;
*hwcap2 |= has_list_item(cpuFeatures, "sha2") ? NPY__HWCAP2_SHA2 : 0;
*hwcap2 |= has_list_item(cpuFeatures, "crc32") ? NPY__HWCAP2_CRC32 : 0;
#else
*hwcap |= has_list_item(cpuFeatures, "asimd") ? NPY__HWCAP_ASIMD : 0;
*hwcap |= has_list_item(cpuFeatures, "fp") ? NPY__HWCAP_FP : 0;
*hwcap |= has_list_item(cpuFeatures, "aes") ? NPY__HWCAP_AES : 0;
*hwcap |= has_list_item(cpuFeatures, "pmull") ? NPY__HWCAP_PMULL : 0;
*hwcap |= has_list_item(cpuFeatures, "sha1") ? NPY__HWCAP_SHA1 : 0;
*hwcap |= has_list_item(cpuFeatures, "sha2") ? NPY__HWCAP_SHA2 : 0;
*hwcap |= has_list_item(cpuFeatures, "crc32") ? NPY__HWCAP_CRC32 : 0;
#endif
free(cpuinfo);
free(cpuFeatures);
return 1;
}
#endif /* NUMPY_CORE_SRC_COMMON_NPY_CPUINFO_PARSER_H_ */
7 changes: 5 additions & 2 deletions numpy/_core/tests/test_cpu_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,8 +407,11 @@ class Test_ARM_Features(AbstractTest):
def load_flags(self):
self.load_flags_cpuinfo("Features")
arch = self.get_cpuinfo_item("CPU architecture")
# in case of mounting virtual filesystem of aarch64 kernel
is_rootfs_v8 = int('0' + next(iter(arch))) > 7 if arch else 0
# in case of mounting virtual filesystem of aarch64 kernel without linux32
is_rootfs_v8 = (
not re.match("^armv[0-9]+l$", machine) and
(int('0' + next(iter(arch))) > 7 if arch else 0)
)
if re.match("^(aarch64|AARCH64)", machine) or is_rootfs_v8:
self.features_map = {
"NEON": "ASIMD", "HALF": "ASIMD", "VFPV4": "ASIMD"
Expand Down
Loading
0