8000 CI: Replace QEMU armhf with native (32-bit compatibility mode) (#28653) · charris/numpy@8f4eef7 · GitHub
[go: up one dir, main page]

Skip to content

Commit 8f4eef7

Browse files
seiko2pluscharris
authored andcommitted
CI: Replace QEMU armhf with native (32-bit compatibility mode) (numpy#28653)
* CI: Tests NumPy on 32-bit ARM hard-float (armhf) via compatibility mode * BUG, SIMD: Fix floating-point errors with positive infinity input in sqrt on armhf Guards against passing positive infinity to vrsqrteq_f32 in sqrt operation, which would raise invalid floating-point errors on ARMv7 architectures. * TEST: Mark linspace subnormal test as xfail on ARM32 platforms Adds an xfail marker to the linspace subnormal test case for ARMv7 and AArch32 platforms. These platforms seem to flush subnormals to zero (FTZ) even when not explicitly enabled via the FPSCR register, causing the test to fail. * BUG, SIMD: Fix ARMv8 feature detection in 32-bit mode Fix detection of `FPHP`, `ASIMDHP`, `ASIMDDP`, `ASIMDFHM` features on ARMv8 32-bit mode (aarch32). Fix memory leaks in CPU feature detection on Android by adding missing free() calls. * CI: Remove QEMU-based armhf testing Remove QEMU-based armhf testing as we now use native 32-bit compatibility mode running on ARM64 GitHub runners in a separate implementation.
1 parent 3ac41fe commit 8f4eef7

File tree

7 files changed

+156
-89
lines changed

7 files changed

+156
-89
lines changed

.github/workflows/linux.yml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,51 @@ jobs:
155155
env:
156156
PYTHONOPTIMIZE: 2
157157

158+
159+
armhf_test:
160+
# Tests NumPy on 32-bit ARM hard-float (armhf) via compatibility mode
161+
# running on aarch64 (ARM 64-bit) GitHub runners.
162+
needs: [smoke_test]
163+
if: github.repository == 'numpy/numpy'
164+
runs-on: ubuntu-22.04-arm
165+
steps:
166+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
167+
with:
168+
submodules: recursive
169+
fetch-tags: true
170+
persist-credentials: false
171+
172+
- name: Creates new container
173+
run: |
174+
docker run --name the_container --interactive \
175+
-v $(pwd):/numpy arm32v7/ubuntu:22.04 /bin/linux32 /bin/bash -c "
176+
apt update &&
177+
apt install -y ninja-build cmake git python3 python-is-python3 python3-dev python3-pip python3-venv &&
178+
python -m pip install -r /numpy/requirements/build_requirements.txt &&
179+
python -m pip install -r /numpy/requirements/test_requirements.txt
180+
"
181+
docker commit the_container the_container
182+
183+
- name: Meson Build
184+
run: |
185+
docker run --rm -e "TERM=xterm-256color" \
186+
-v $(pwd):/numpy the_container \
187+
/bin/script -e -q -c "/bin/linux32 /bin/bash --noprofile --norc -eo pipefail -c '
188+
cd /numpy && spin build
189+
'"
190+
191+
- name: Meson Log
192+
if: always()
193+
run: 'cat build/meson-logs/meson-log.txt'
194+
195+
- name: Run Tests
196+
run: |
197+
docker run --rm -e "TERM=xterm-256color" \
198+
-v $(pwd):/numpy the_container \
199+
/bin/script -e -q -c "/bin/linux32 /bin/bash --noprofile --norc -eo pipefail -c '
200+
cd /numpy && spin test -m full -- --timeout=600 --durations=10
201+
'"
202+
158203
benchmark:
159204
needs: [smoke_test]
160205
runs-on: ubuntu-latest

.github/workflows/linux_qemu.yml

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,6 @@ jobs:
3838
fail-fast: false
3939
matrix:
4040
BUILD_PROP:
41-
- [
42-
"armhf",
43-
"arm-linux-gnueabihf",
44-
"arm32v7/ubuntu:22.04",
45-
"-Dallow-noblas=true",
46-
# test_unary_spurious_fpexception is currently skipped
47-
# FIXME(@seiko2plus): Requires confirmation for the following issue:
48-
# The presence of an FP invalid exception caused by sqrt. Unsure if this is a qemu bug or not.
49-
"(test_kind or test_multiarray or test_simd or test_umath or test_ufunc) and not test_unary_spurious_fpexception",
50-
"arm"
51-
]
5241
- [
5342
"ppc64le",
5443
"powerpc64le-linux-gnu",

numpy/_core/src/common/npy_cpu_features.c

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -749,34 +749,33 @@ npy__cpu_init_features_linux(void)
749749
#endif
750750
}
751751
#ifdef __arm__
752+
npy__cpu_have[NPY_CPU_FEATURE_NEON] = (hwcap & NPY__HWCAP_NEON) != 0;
753+
if (npy__cpu_have[NPY_CPU_FEATURE_NEON]) {
754+
npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16] = (hwcap & NPY__HWCAP_HALF) != 0;
755+
npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] = (hwcap & NPY__HWCAP_VFPv4) != 0;
756+
}
752757
// Detect Arm8 (aarch32 state)
753758
if ((hwcap2 & NPY__HWCAP2_AES) || (hwcap2 & NPY__HWCAP2_SHA1) ||
754759
(hwcap2 & NPY__HWCAP2_SHA2) || (hwcap2 & NPY__HWCAP2_PMULL) ||
755760
(hwcap2 & NPY__HWCAP2_CRC32))
756761
{
757-
hwcap = hwcap2;
762+
npy__cpu_have[NPY_CPU_FEATURE_ASIMD] = npy__cpu_have[NPY_CPU_FEATURE_NEON];
763+
}
758764
#else
759-
if (1)
760-
{
761-
if (!(hwcap & (NPY__HWCAP_FP | NPY__HWCAP_ASIMD))) {
762-
// Is this could happen? maybe disabled by kernel
763-
// BTW this will break the baseline of AARCH64
764-
return 1;
765-
}
766-
#endif
767-
npy__cpu_have[NPY_CPU_FEATURE_FPHP] = (hwcap & NPY__HWCAP_FPHP) != 0;
768-
npy__cpu_have[NPY_CPU_FEATURE_ASIMDHP] = (hwcap & NPY__HWCAP_ASIMDHP) != 0;
769-
npy__cpu_have[NPY_CPU_FEATURE_ASIMDDP] = (hwcap & NPY__HWCAP_ASIMDDP) != 0;
770-
npy__cpu_have[NPY_CPU_FEATURE_ASIMDFHM] = (hwcap & NPY__HWCAP_ASIMDFHM) != 0;
771-
npy__cpu_have[NPY_CPU_FEATURE_SVE] = (hwcap & NPY__HWCAP_SVE) != 0;
772-
npy__cpu_init_features_arm8();
773-
} else {
774-
npy__cpu_have[NPY_CPU_FEATURE_NEON] = (hwcap & NPY__HWCAP_NEON) != 0;
775-
if (npy__cpu_have[NPY_CPU_FEATURE_NEON]) {
776-
npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16] = (hwcap & NPY__HWCAP_HALF) != 0;
777-
npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] = (hwcap & NPY__HWCAP_VFPv4) != 0;
778-
}
765+
if (!(hwcap & (NPY__HWCAP_FP | NPY__HWCAP_ASIMD))) {
766+
// Is this could happen? maybe disabled by kernel
767+
// BTW this will break the baseline of AARCH64
768+
return 1;
779769
}
770+
npy__cpu_init_features_arm8();
771+
#endif
772+
npy__cpu_have[NPY_CPU_FEATURE_FPHP] = (hwcap & NPY__HWCAP_FPHP) != 0;
773+
npy__cpu_have[NPY_CPU_FEATURE_ASIMDHP] = (hwcap & NPY__HWCAP_ASIMDHP) != 0;
774+
npy__cpu_have[NPY_CPU_FEATURE_ASIMDDP] = (hwcap & NPY__HWCAP_ASIMDDP) != 0;
775+
npy__cpu_have[NPY_CPU_FEATURE_ASIMDFHM] = (hwcap & NPY__HWCAP_ASIMDFHM) != 0;
776+
#ifndef __arm__
777+
npy__cpu_have[NPY_CPU_FEATURE_SVE] = (hwcap & NPY__HWCAP_SVE) != 0;
778+
#endif
780779
return 1;
781780
}
782781
#endif

numpy/_core/src/common/npy_cpuinfo_parser.h

Lines changed: 69 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -36,25 +36,43 @@
3636
#define NPY__HWCAP 16
3737
#define NPY__HWCAP2 26
3838

39-
// arch/arm/include/uapi/asm/hwcap.h
40-
#define NPY__HWCAP_HALF (1 << 1)
41-
#define NPY__HWCAP_NEON (1 << 12)
42-
#define NPY__HWCAP_VFPv3 (1 << 13)
43-
#define NPY__HWCAP_VFPv4 (1 << 16)
44-
#define NPY__HWCAP2_AES (1 << 0)
45-
#define NPY__HWCAP2_PMULL (1 << 1)
46-
#define NPY__HWCAP2_SHA1 (1 << 2)
47-
#define NPY__HWCAP2_SHA2 (1 << 3)
48-
#define NPY__HWCAP2_CRC32 (1 << 4)
49-
// arch/arm64/include/uapi/asm/hwcap.h
50-
#define NPY__HWCAP_FP (1 << 0)
51-
#define NPY__HWCAP_ASIMD (1 << 1)
52-
#define NPY__HWCAP_FPHP (1 << 9)
53-
#define NPY__HWCAP_ASIMDHP (1 << 10)
54-
#define NPY__HWCAP_ASIMDDP (1 << 20)
55-
#define NPY__HWCAP_SVE (1 << 22)
56-
#define NPY__HWCAP_ASIMDFHM (1 << 23)
57-
/*
39+
#ifdef __arm__
40+
// arch/arm/include/uapi/asm/hwcap.h
41+
#define NPY__HWCAP_HALF (1 << 1)
42+
#define NPY__HWCAP_NEON (1 << 12)
43+
#define NPY__HWCAP_VFPv3 (1 << 13)
44+
#define NPY__HWCAP_VFPv4 (1 << 16)
45+
46+
#define NPY__HWCAP_FPHP (1 << 22)
47+
#define NPY__HWCAP_ASIMDHP (1 << 23)
48+
#define NPY__HWCAP_ASIMDDP (1 << 24)
49+
#define NPY__HWCAP_ASIMDFHM (1 << 25)
50+
51+
#define NPY__HWCAP2_AES (1 << 0)
52+
#define NPY__HWCAP2_PMULL (1 << 1)
53+
#define NPY__HWCAP2_SHA1 (1 << 2)
54+
#define NPY__HWCAP2_SHA2 (1 << 3)
55+
#define NPY__HWCAP2_CRC32 (1 << 4)
56+
#else
57+
// arch/arm64/include/uapi/asm/hwcap.h
58+
#define NPY__HWCAP_FP (1 << 0)
59+
#define NPY__HWCAP_ASIMD (1 << 1)
60+
61+
#define NPY__HWCAP_FPHP (1 << 9)
62+
#define NPY__HWCAP_ASIMDHP (1 << 10)
63+
#define NPY__HWCAP_ASIMDDP (1 << 20)
64+
#define NPY__HWCAP_ASIMDFHM (1 << 23)
65+
66+
#define NPY__HWCAP_AES (1 << 3)
67+
#define NPY__HWCAP_PMULL (1 << 4)
68+
#define NPY__HWCAP_SHA1 (1 << 5)
69+
#define NPY__HWCAP_SHA2 (1 << 6)
70+
#define NPY__HWCAP_CRC32 (1 << 7)
71+
#define NPY__HWCAP_SVE (1 << 22)
72+
#endif
73+
74+
75+
/*
5876
* Get the size of a file by reading it until the end. This is needed
5977
* because files under /proc do not always return a valid size when
6078
* using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed.
@@ -87,7 +105,7 @@ get_file_size(const char* pathname)
87105
return result;
88106
}
89107

90-
/*
108+
/*
91109
* Read the content of /proc/cpuinfo into a user-provided buffer.
92110
* Return the length of the data, or -1 on error. Does *not*
93111
* zero-terminate the content. Will not read more
@@ -123,7 +141,7 @@ read_file(const char* pathname, char* buffer, size_t buffsize)
123141
return count;
124142
}
125143

126-
/*
144+
/*
127145
* Extract the content of a the first occurrence of a given field in
128146
* the content of /proc/cpuinfo and return it as a heap-allocated
129147
* string that must be freed by the caller.
@@ -182,7 +200,7 @@ extract_cpuinfo_field(const char* buffer, int buflen, const char* field)
182200
return result;
183201
}
184202

185-
/*
203+
/*
186204
* Checks that a space-separated list of items contains one given 'item'.
187205
* Returns 1 if found, 0 otherwise.
188206
*/
@@ -220,44 +238,51 @@ has_list_item(const char* list, const char* item)
220238
return 0;
221239
}
222240

223-
static void setHwcap(char* cpuFeatures, unsigned long* hwcap) {
224-
*hwcap |= has_list_item(cpuFeatures, "neon") ? NPY__HWCAP_NEON : 0;
225-
*hwcap |= has_list_item(cpuFeatures, "half") ? NPY__HWCAP_HALF : 0;
226-
*hwcap |= has_list_item(cpuFeatures, "vfpv3") ? NPY__HWCAP_VFPv3 : 0;
227-
*hwcap |= has_list_item(cpuFeatures, "vfpv4") ? NPY__HWCAP_VFPv4 : 0;
228-
229-
*hwcap |= has_list_item(cpuFeatures, "asimd") ? NPY__HWCAP_ASIMD : 0;
230-
*hwcap |= has_list_item(cpuFeatures, "fp") ? NPY__HWCAP_FP : 0;
231-
*hwcap |= has_list_item(cpuFeatures, "fphp") ? NPY__HWCAP_FPHP : 0;
232-
*hwcap |= has_list_item(cpuFeatures, "asimdhp") ? NPY__HWCAP_ASIMDHP : 0;
233-
*hwcap |= has_list_item(cpuFeatures, "asimddp") ? NPY__HWCAP_ASIMDDP : 0;
234-
*hwcap |= has_list_item(cpuFeatures, "asimdfhm") ? NPY__HWCAP_ASIMDFHM : 0;
235-
}
236-
237241
static int
238242
get_feature_from_proc_cpuinfo(unsigned long *hwcap, unsigned long *hwcap2) {
239-
char* cpuinfo = NULL;
240-
int cpuinfo_len;
241-
cpuinfo_len = get_file_size("/proc/cpuinfo");
243+
*hwcap = 0;
244+
*hwcap2 = 0;
245+
246+
int cpuinfo_len = get_file_size("/proc/cpuinfo");
242247
if (cpuinfo_len < 0) {
243248
return 0;
244249
}
245-
cpuinfo = malloc(cpuinfo_len);
250+
char *cpuinfo = malloc(cpuinfo_len);
246251
if (cpuinfo == NULL) {
247252
return 0;
248253
}
254+
249255
cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len);
250-
char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
251-
if(cpuFeatures == NULL) {
256+
char *cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
257+
if (cpuFeatures == NULL) {
258+
free(cpuinfo);
252259
return 0;
253260
}
254-
setHwcap(cpuFeatures, hwcap);
255-
*hwcap2 |= *hwcap;
261+
*hwcap |= has_list_item(cpuFeatures, "fphp") ? NPY__HWCAP_FPHP : 0;
262+
*hwcap |= has_list_item(cpuFeatures, "asimdhp") ? NPY__HWCAP_ASIMDHP : 0;
263+
*hwcap |= has_list_item(cpuFeatures, "asimddp") ? NPY__HWCAP_ASIMDDP : 0;
264+
*hwcap |= has_list_item(cpuFeatures, "asimdfhm") ? NPY__HWCAP_ASIMDFHM : 0;
265+
#ifdef __arm__
266+
*hwcap |= has_list_item(cpuFeatures, "neon") ? NPY__HWCAP_NEON : 0;
267+
*hwcap |= has_list_item(cpuFeatures, "half") ? NPY__HWCAP_HALF : 0;
268+
*hwcap |= has_list_item(cpuFeatures, "vfpv3") ? NPY__HWCAP_VFPv3 : 0;
269+
*hwcap |= has_list_item(cpuFeatures, "vfpv4") ? NPY__HWCAP_VFPv4 : 0;
256270
*hwcap2 |= has_list_item(cpuFeatures, "aes") ? NPY__HWCAP2_AES : 0;
257271
*hwcap2 |= has_list_item(cpuFeatures, "pmull") ? NPY__HWCAP2_PMULL : 0;
258272
*hwcap2 |= has_list_item(cpuFeatures, "sha1") ? NPY__HWCAP2_SHA1 : 0;
259273
*hwcap2 |= has_list_item(cpuFeatures, "sha2") ? NPY__HWCAP2_SHA2 : 0;
260274
*hwcap2 |= has_list_item(cpuFeatures, "crc32") ? NPY__HWCAP2_CRC32 : 0;
275+
#else
276+
*hwcap |= has_list_item(cpuFeatures, "asimd") ? NPY__HWCAP_ASIMD : 0;
277+
*hwcap |= has_list_item(cpuFeatures, "fp") ? NPY__HWCAP_FP : 0;
278+
*hwcap |= has_list_item(cpuFeatures, "aes") ? NPY__HWCAP_AES : 0;
279+
*hwcap |= has_list_item(cpuFeatures, "pmull") ? NPY__HWCAP_PMULL : 0;
280+
*hwcap |= has_list_item(cpuFeatures, "sha1") ? NPY__HWCAP_SHA1 : 0;
281+
*hwcap |= has_list_item(cpuFeatures, "sha2") ? NPY__HWCAP_SHA2 : 0;
282+
*hwcap |= has_list_item(cpuFeatures, "crc32") ? NPY__HWCAP_CRC32 : 0;
283+
#endif
284+
free(cpuinfo);
285+
free(cpuFeatures);
261286
return 1;
262287
}
263288
#endif /* NUMPY_CORE_SRC_COMMON_NPY_CPUINFO_PARSER_H_ */

numpy/_core/src/common/simd/neon/math.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,13 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
2828
// Based on ARM doc, see https://developer.arm.com/documentation/dui0204/j/CIHDIACI
2929
NPY_FINLINE npyv_f32 npyv_sqrt_f32(npyv_f32 a)
3030
{
31+
const npyv_f32 one = vdupq_n_f32(1.0f);
3132
const npyv_f32 zero = vdupq_n_f32(0.0f);
3233
const npyv_u32 pinf = vdupq_n_u32(0x7f800000);
3334
npyv_u32 is_zero = vceqq_f32(a, zero), is_inf = vceqq_u32(vreinterpretq_u32_f32(a), pinf);
34-
// guard against floating-point division-by-zero error
35-
npyv_f32 guard_byz = vbslq_f32(is_zero, vreinterpretq_f32_u32(pinf), a);
35+
npyv_u32 is_special = vorrq_u32(is_zero, is_inf);
36+
// guard against division-by-zero and infinity input to vrsqrte to avoid invalid fp error
37+
npyv_f32 guard_byz = vbslq_f32(is_special, one, a);
3638
// estimate to (1/√a)
3739
npyv_f32 rsqrte = vrsqrteq_f32(guard_byz);
3840
/**
@@ -47,10 +49,8 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
4749
rsqrte = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a, rsqrte), rsqrte), rsqrte);
4850
// a * (1/√a)
4951
npyv_f32 sqrt = vmulq_f32(a, rsqrte);
50-
// return zero if the a is zero
51-
// - return zero if a is zero.
52-
// - return positive infinity if a is positive infinity
53-
return vbslq_f32(vorrq_u32(is_zero, is_inf), a, sqrt);
52+
// Handle special cases: return a for zeros and positive infinities
53+
return vbslq_f32(is_special, a, sqrt);
5454
}
5555
#endif // NPY_SIMD_F64
5656

numpy/_core/tests/test_cpu_features.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -401,12 +401,15 @@ class Test_ARM_Features(AbstractTest):
401401
def load_flags(self):
402402
self.load_flags_cpuinfo("Features")
403403
arch = self.get_cpuinfo_item("CPU architecture")
404-
# in case of mounting virtual filesystem of aarch64 kernel
405-
is_rootfs_v8 = int('0'+next(iter(arch))) > 7 if arch else 0
406-
if re.match("^(aarch64|AARCH64)", machine) or is_rootfs_v8:
407-
self.features_map = dict(
408-
NEON="ASIMD", HALF="ASIMD", VFPV4="ASIMD"
409-
)
404+
# in case of mounting virtual filesystem of aarch64 kernel without linux32
405+
is_rootfs_v8 = (
406+
not re.match("^armv[0-9]+l$", machine) and
407+
(int('0' + next(iter(arch))) > 7 if arch else 0)
408+
)
409+
if re.match("^(aarch64|AARCH64)", machine) or is_rootfs_v8:
410+
self.features_map = {
411+
"NEON": "ASIMD", "HALF": "ASIMD", "VFPV4": "ASIMD"
412+
}
410413
else:
411414
self.features_map = dict(
412415
# ELF auxiliary vector and /proc/cpuinfo on Linux kernel(armv8 aarch32)

numpy/_core/tests/test_function_base.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import sys
2-
2+
import platform
33
import pytest
44

55
import numpy as np
@@ -14,6 +14,9 @@
1414
IS_PYPY
1515
)
1616

17+
def _is_armhf():
18+
# Check if the current platform is ARMHF (32-bit ARM architecture)
19+
return platform.machine().startswith('arm') and platform.architecture()[0] == '32bit'
1720

1821
class PhysicalQuantity(float):
1922
def __new__(cls, value):
@@ -415,6 +418,9 @@ def __mul__(self, other):
415418

416419
assert_equal(linspace(one, five), linspace(1, 5))
417420

421+
# even when not explicitly enabled via FPSCR register
422+
@pytest.mark.xfail(_is_armhf(),
423+
reason="ARMHF/AArch32 platforms seem to FTZ subnormals")
418424
def test_denormal_numbers(self):
419425
# Regression test for gh-5437. Will probably fail when compiled
420426
# with ICC, which flushes denormals to zero

0 commit comments

Comments
 (0)
0