8000 BUG, SIMD: Fix ARMv8 feature detection in 32-bit mode · numpy/numpy@37d82ae · GitHub
[go: up one dir, main page]

Skip to content

Commit 37d82ae

Browse files
committed
BUG, SIMD: Fix ARMv8 feature detection in 32-bit mode
Fix detection of `FPHP`, `ASIMDHP`, `ASIMDDP`, `ASIMDFHM` features on ARMv8 32-bit mode (aarch32). Fix memory leaks in CPU feature detection on Android by adding missing free() calls.
1 parent a3828f1 commit 37d82ae

File tree

3 files changed

+114
-87
lines changed

3 files changed

+114
-87
lines changed

numpy/_core/src/common/npy_cpu_features.c

Lines changed: 40 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -689,15 +689,6 @@ npy__cpu_init_features(void)
689689

690690
#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM64)
691691

692-
static inline void
693-
npy__cpu_init_features_arm8(void)
694-
{
695-
npy__cpu_have[NPY_CPU_FEATURE_NEON] =
696-
npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16] =
697-
npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] =
698-
npy__cpu_have[NPY_CPU_FEATURE_ASIMD] = 1;
699-
}
700-
701692
#if defined(__linux__) || defined(__FreeBSD__)
702693
/*
703694
* we aren't sure of what kind kernel or clib we deal with
@@ -708,10 +699,9 @@ npy__cpu_init_features_arm8(void)
708699

709700
#if defined(__linux__)
710701
__attribute__((weak)) unsigned long getauxval(unsigned long); // linker should handle it
711-
#endif
712-
#ifdef __FreeBSD__
713-
__attribute__((weak)) int elf_aux_info(int, void *, int); // linker should handle it
714702

703+
#elif defined(__FreeBSD__)
704+
__attribute__((weak)) int elf_aux_info(int, void *, int); // linker should handle it
715705
static unsigned long getauxval(unsigned long k)
716706
{
717707
unsigned long val = 0ul;
@@ -721,11 +711,12 @@ static unsigned long getauxval(unsigned long k)
721711
return val;
722712
}
723713
#endif
714+
724715
static int
725716
npy__cpu_init_features_linux(void)
726717
{
727718
unsigned long hwcap = 0, hwcap2 = 0;
728-
#ifdef __linux__
719+
#ifdef __linux__
729720
if (getauxval != 0) {
730721
hwcap = getauxval(NPY__HWCAP);
731722
#ifdef __arm__
@@ -752,56 +743,61 @@ npy__cpu_init_features_linux(void)
752743
close(fd);
753744
}
754745
}
755-
#else
746+
#else // freebsd
756747
hwcap = getauxval(NPY__HWCAP);
757748
#ifdef __arm__
758749
hwcap2 = getauxval(NPY__HWCAP2);
759750
#endif
760-
#endif
751+
#endif
752+
761753
if (hwcap == 0 && hwcap2 == 0) {
762-
#ifdef __linux__
754+
#ifdef __linux__
763755
/*
764756
* try parsing with /proc/cpuinfo, if sandboxed
765757
* failback to compiler definitions
766758
*/
767-
if(!get_feature_from_proc_cpuinfo(&hwcap, &hwcap2)) {
759+
if (!get_feature_from_proc_cpuinfo(&hwcap, &hwcap2)) {
768760
return 0;
769761
}
770-
#else
762+
#else // freebsd
771763
return 0;
772-
#endif
764+
#endif
773765
}
766+
774767
#ifdef __arm__
768+
npy__cpu_have[NPY_CPU_FEATURE_NEON] = (hwcap & NPY__HWCAP_NEON) != 0;
769+
if (npy__cpu_have[NPY_CPU_FEATURE_NEON]) {
770+
npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16] = (hwcap & NPY__HWCAP_HALF) != 0;
771+
npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] = (hwcap & NPY__HWCAP_VFPv4) != 0;
772+
}
775773
// Detect Arm8 (aarch32 state)
776774
if ((hwcap2 & NPY__HWCAP2_AES) || (hwcap2 & NPY__HWCAP2_SHA1) ||
777775
(hwcap2 & NPY__HWCAP2_SHA2) || (hwcap2 & NPY__HWCAP2_PMULL) ||
778776
(hwcap2 & NPY__HWCAP2_CRC32))
779777
{
780-
hwcap = hwcap2;
778+
npy__cpu_have[NPY_CPU_FEATURE_ASIMD] = npy__cpu_have[NPY_CPU_FEATURE_NEON];
779+
}
781780
#else
782-
if (1)
783-
{
784-
if (!(hwcap & (NPY__HWCAP_FP | NPY__HWCAP_ASIMD))) {
785-
// Is this could happen? maybe disabled by kernel
786-
// BTW this will break the baseline of AARCH64
787-
return 1;
788-
}
789-
#endif
790-
npy__cpu_have[NPY_CPU_FEATURE_FPHP] = (hwcap & NPY__HWCAP_FPHP) != 0;
791-
npy__cpu_have[NPY_CPU_FEATURE_ASIMDHP] = (hwcap & NPY__HWCAP_ASIMDHP) != 0;
792-
npy__cpu_have[NPY_CPU_FEATURE_ASIMDDP] = (hwcap & NPY__HWCAP_ASIMDDP) != 0;
793-
npy__cpu_have[NPY_CPU_FEATURE_ASIMDFHM] = (hwcap & NPY__HWCAP_ASIMDFHM) != 0;
794-
npy__cpu_have[NPY_CPU_FEATURE_SVE] = (hwcap & NPY__HWCAP_SVE) != 0;
795-
npy__cpu_init_features_arm8();
796-
} else {
797-
npy__cpu_have[NPY_CPU_FEATURE_NEON] = (hwcap & NPY__HWCAP_NEON) != 0;
798-
if (npy__cpu_have[NPY_CPU_FEATURE_NEON]) {
799-
npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16] = (hwcap & NPY__HWCAP_HALF) != 0;
800-
npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] = (hwcap & NPY__HWCAP_VFPv4) != 0;
801-
}
781+
if (!(hwcap & (NPY__HWCAP_FP | NPY__HWCAP_ASIMD))) {
782+
// Is this could happen? maybe disabled by kernel
783+
// BTW this will break the baseline of AARCH64
784+
return 1;
802785
}
786+
npy__cpu_have[NPY_CPU_FEATURE_NEON] =
787+
npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16] =
788+
npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] =
789+
npy__cpu_have[NPY_CPU_FEATURE_ASIMD] = 1;
790+
#endif
791+
npy__cpu_have[NPY_CPU_FEATURE_FPHP] = (hwcap & NPY__HWCAP_FPHP) != 0;
792+
npy__cpu_have[NPY_CPU_FEATURE_ASIMDHP] = (hwcap & NPY__HWCAP_ASIMDHP) != 0;
793+
npy__cpu_have[NPY_CPU_FEATURE_ASIMDDP] = (hwcap & NPY__HWCAP_ASIMDDP) != 0;
794+
npy__cpu_have[NPY_CPU_FEATURE_ASIMDFHM] = (hwcap & NPY__HWCAP_ASIMDFHM) != 0;
795+
#ifndef __arm__
796+
npy__cpu_have[NPY_CPU_FEATURE_SVE] = (hwcap & NPY__HWCAP_SVE) != 0;
797+
#endif
803798
return 1;
804799
}
800+
805801
#endif
806802

807803
static void
@@ -829,7 +825,10 @@ npy__cpu_init_features(void)
829825
#if defined(NPY_HAVE_SVE) || defined(__ARM_FEATURE_SVE)
830826
npy__cpu_have[NPY_CPU_FEATURE_SVE] = 1;
831827
#endif
832-
npy__cpu_init_features_arm8();
828+
npy__cpu_have[NPY_CPU_FEATURE_NEON] =
829+
npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16] =
830+
npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] =
831+
npy__cpu_have[NPY_CPU_FEATURE_ASIMD] = 1;
833832
#else
834833
#if defined(NPY_HAVE_NEON) || defined(__ARM_NEON__)
835834
npy__cpu_have[NPY_CPU_FEATURE_NEON] = 1;

numpy/_core/src/common/npy_cpuinfo_parser.h

Lines changed: 69 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -36,25 +36,43 @@
3636
#define NPY__HWCAP 16
3737
#define NPY__HWCAP2 26
3838

39-
// arch/arm/include/uapi/asm/hwcap.h
40-
#define NPY__HWCAP_HALF (1 << 1)
41-
#define NPY__HWCAP_NEON (1 << 12)
42-
#define NPY__HWCAP_VFPv3 (1 << 13)
43-
#define NPY__HWCAP_VFPv4 (1 << 16)
44-
#define NPY__HWCAP2_AES (1 << 0)
45-
#define NPY__HWCAP2_PMULL (1 << 1)
46-
#define NPY__HWCAP2_SHA1 (1 << 2)
47-
#define NPY__HWCAP2_SHA2 (1 << 3)
48-
#define NPY__HWCAP2_CRC32 (1 << 4)
49-
// arch/arm64/include/uapi/asm/hwcap.h
50-
#define NPY__HWCAP_FP (1 << 0)
51-
#define NPY__HWCAP_ASIMD (1 << 1)
52-
#define NPY__HWCAP_FPHP (1 << 9)
53-
#define NPY__HWCAP_ASIMDHP (1 << 10)
54-
#define NPY__HWCAP_ASIMDDP (1 << 20)
55-
#define NPY__HWCAP_SVE (1 << 22)
56-
#define NPY__HWCAP_ASIMDFHM (1 << 23)
57-
/*
39+
#ifdef __arm__
40+
// arch/arm/include/uapi/asm/hwcap.h
41+
#define NPY__HWCAP_HALF (1 << 1)
42+
#define NPY__HWCAP_NEON (1 << 12)
43+
#define NPY__HWCAP_VFPv3 (1 << 13)
44+
#define NPY__HWCAP_VFPv4 (1 << 16)
45+
46+
#define NPY__HWCAP_FPHP (1 << 22)
47+
#define NPY__HWCAP_ASIMDHP (1 << 23)
48+
#define NPY__HWCAP_ASIMDDP (1 << 24)
49+
#define NPY__HWCAP_ASIMDFHM (1 << 25)
50+
51+
#define NPY__HWCAP2_AES (1 << 0)
52+
#define NPY__HWCAP2_PMULL (1 << 1)
53+
#define NPY__HWCAP2_SHA1 (1 << 2)
54+
#define NPY__HWCAP2_SHA2 (1 << 3)
55+
#define NPY__HWCAP2_CRC32 (1 << 4)
56+
#else
57+
// arch/arm64/include/uapi/asm/hwcap.h
58+
#define NPY__HWCAP_FP (1 << 0)
59+
#define NPY__HWCAP_ASIMD (1 << 1)
60+
61+
#define NPY__HWCAP_FPHP (1 << 9)
62+
#define NPY__HWCAP_ASIMDHP (1 << 10)
63+
#define NPY__HWCAP_ASIMDDP (1 << 20)
64+
#define NPY__HWCAP_ASIMDFHM (1 << 23)
65+
66+
#define NPY__HWCAP_AES (1 << 3)
67+
#define NPY__HWCAP_PMULL (1 << 4)
68+
#define NPY__HWCAP_SHA1 (1 << 5)
69+
#define NPY__HWCAP_SHA2 (1 << 6)
70+
#define NPY__HWCAP_CRC32 (1 << 7)
71+
#define NPY__HWCAP_SVE (1 << 22)
72+
#endif
73+
74+
75+
/*
5876
* Get the size of a file by reading it until the end. This is needed
5977
* because files under /proc do not always return a valid size when
6078
* using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed.
@@ -87,7 +105,7 @@ get_file_size(const char* pathname)
87105
return result;
88106
}
89107

90-
/*
108+
/*
91109
* Read the content of /proc/cpuinfo into a user-provided buffer.
92110
* Return the length of the data, or -1 on error. Does *not*
93111
* zero-terminate the content. Will not read more
@@ -123,7 +141,7 @@ read_file(const char* pathname, char* buffer, size_t buffsize)
123141
return count;
124142
}
125143

126-
/*
144+
/*
127145
* Extract the content of a the first occurrence of a given field in
128146
* the content of /proc/cpuinfo and return it as a heap-allocated
129147
* string that must be freed by the caller.
@@ -182,7 +200,7 @@ extract_cpuinfo_field(const char* buffer, int buflen, const char* field)
182200
return result;
183201
}
184202

185-
/*
203+
/*
186204
* Checks that a space-separated list of items contains one given 'item'.
187205
* Returns 1 if found, 0 otherwise.
188206
*/
@@ -220,44 +238,51 @@ has_list_item(const char* list, const char* item)
220238
return 0;
221239
}
222240

223-
static void setHwcap(char* cpuFeatures, unsigned long* hwcap) {
224-
*hwcap |= has_list_item(cpuFeatures, "neon") ? NPY__HWCAP_NEON : 0;
225-
*hwcap |= has_list_item(cpuFeatures, "half") ? NPY__HWCAP_HALF : 0;
226-
*hwcap |= has_list_item(cpuFeatures, "vfpv3") ? NPY__HWCAP_VFPv3 : 0;
227-
*hwcap |= has_list_item(cpuFeatures, "vfpv4") ? NPY__HWCAP_VFPv4 : 0;
228-
229-
*hwcap |= has_list_item(cpuFeatures, "asimd") ? NPY__HWCAP_ASIMD : 0;
230-
*hwcap |= has_list_item(cpuFeatures, "fp") ? NPY__HWCAP_FP : 0;
231-
*hwcap |= has_list_item(cpuFeatures, "fphp") ? NPY__HWCAP_FPHP : 0;
232-
*hwcap |= has_list_item(cpuFeatures, "asimdhp") ? NPY__HWCAP_ASIMDHP : 0;
233-
*hwcap |= has_list_item(cpuFeatures, "asimddp") ? NPY__HWCAP_ASIMDDP : 0;
234-
*hwcap |= has_list_item(cpuFeatures, "asimdfhm") ? NPY__HWCAP_ASIMDFHM : 0;
235-
}
236-
237241
static int
238242
get_feature_from_proc_cpuinfo(unsigned long *hwcap, unsigned long *hwcap2) {
239-
char* cpuinfo = NULL;
240-
int cpuinfo_len;
241-
cpuinfo_len = get_file_size("/proc/cpuinfo");
243+
*hwcap = 0;
244+
*hwcap2 = 0;
245+
246+
int cpuinfo_len = get_file_size("/proc/cpuinfo");
242247
if (cpuinfo_len < 0) {
243248
return 0;
244249
}
245-
cpuinfo = malloc(cpuinfo_len);
250+
char *cpuinfo = malloc(cpuinfo_len);
246251
if (cpuinfo == NULL) {
247252
return 0;
248253
}
254+
249255
cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len);
250-
char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
251-
if(cpuFeatures == NULL) {
256+
char *cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
257+
if (cpuFeatures == NULL) {
258+
free(cpuinfo);
252259
return 0;
253260
}
254-
setHwcap(cpuFeatures, hwcap);
255-
*hwcap2 |= *hwcap;
261+
*hwcap |= has_list_item(cpuFeatures, "fphp") ? NPY__HWCAP_FPHP : 0;
262+
*hwcap |= has_list_item(cpuFeatures, "asimdhp") ? NPY__HWCAP_ASIMDHP : 0;
263+
*hwcap |= has_list_item(cpuFeatures, "asimddp") ? NPY__HWCAP_ASIMDDP : 0;
264+
*hwcap |= has_list_item(cpuFeatures, "asimdfhm") ? NPY__HWCAP_ASIMDFHM : 0;
265+
#ifdef __arm__
266+
*hwcap |= has_list_item(cpuFeatures, "neon") ? NPY__HWCAP_NEON : 0;
267+
*hwcap |= has_list_item(cpuFeatures, "half") ? NPY__HWCAP_HALF : 0;
268+
*hwcap |= has_list_item(cpuFeatures, "vfpv3") ? NPY__HWCAP_VFPv3 : 0;
269+
*hwcap |= has_list_item(cpuFeatures, "vfpv4") ? NPY__HWCAP_VFPv4 : 0;
256270
*hwcap2 |= has_list_item(cpuFeatures, "aes") ? NPY__HWCAP2_AES : 0;
257271
*hwcap2 |= has_list_item(cpuFeatures, "pmull") ? NPY__HWCAP2_PMULL : 0;
258272
*hwcap2 |= has_list_item(cpuFeatures, "sha1") ? NPY__HWCAP2_SHA1 : 0;
259273
*hwcap2 |= has_list_item(cpuFeatures, "sha2") ? NPY__HWCAP2_SHA2 : 0;
260274
*hwcap2 |= has_list_item(cpuFeatures, "crc32") ? NPY__HWCAP2_CRC32 : 0;
275+
#else
276+
*hwcap |= has_list_item(cpuFeatures, "asimd") ? NPY__HWCAP_ASIMD : 0;
277+
*hwcap |= has_list_item(cpuFeatures, "fp") ? NPY__HWCAP_FP : 0;
278+
*hwcap |= has_list_item(cpuFeatures, "aes") ? NPY__HWCAP_AES : 0;
279+
*hwcap |= has_list_item(cpuFeatures, "pmull") ? NPY__HWCAP_PMULL : 0;
280+
*hwcap |= has_list_item(cpuFeatures, "sha1") ? NPY__HWCAP_SHA1 : 0;
281+
*hwcap |= has_list_item(cpuFeatures, "sha2") ? NPY__HWCAP_SHA2 : 0;
282+
*hwcap |= has_list_item(cpuFeatures, "crc32") ? NPY__HWCAP_CRC32 : 0;
283+
#endif
284+
free(cpuinfo);
285+
free(cpuFeatures);
261286
return 1;
262287
}
263288
#endif /* NUMPY_CORE_SRC_COMMON_NPY_CPUINFO_PARSER_H_ */

numpy/_core/tests/test_cpu_features.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,11 @@ class Test_ARM_Features(AbstractTest):
407407
def load_flags(self):
408408
self.load_flags_cpuinfo("Features")
409409
arch = self.get_cpuinfo_item("CPU architecture")
410-
# in case of mounting virtual filesystem of aarch64 kernel
411-
is_rootfs_v8 = int('0' + next(iter(arch))) > 7 if arch else 0
410+
# in case of mounting virtual filesystem of aarch64 kernel without linux32
411+
is_rootfs_v8 = (
412+
not re.match("^armv[0-9]+l$", machine) and
413+
(int('0' + next(iter(arch))) > 7 if arch else 0)
414+
)
412415
if re.match("^(aarch64|AARCH64)", machine) or is_rootfs_v8:
413416
self.features_map = {
414417
"NEON": "ASIMD", "HALF": "ASIMD", "VFPV4": "ASIMD"

0 commit comments

Comments
 (0)
0