charris
diff --git a/‎.github/workflows/linux.yml
Lines changed: 45 additions & 0 deletions b/‎.github/workflows/linux.yml
Lines changed: 45 additions & 0 deletions
diff --git a/‎.github/workflows/linux_qemu.yml
Lines changed: 0 additions & 11 deletions b/‎.github/workflows/linux_qemu.yml
Lines changed: 0 additions & 11 deletions
diff --git a/‎numpy/_core/src/common/npy_cpu_features.c
Lines changed: 20 additions & 21 deletions b/‎numpy/_core/src/common/npy_cpu_features.c
Lines changed: 20 additions & 21 deletions
diff --git a/‎numpy/_core/src/common/npy_cpuinfo_parser.h
Lines changed: 69 additions & 44 deletions b/‎numpy/_core/src/common/npy_cpuinfo_parser.h
Lines changed: 69 additions & 44 deletions
diff --git a/‎numpy/_core/src/common/simd/neon/math.h
Lines changed: 6 additions & 6 deletions b/‎numpy/_core/src/common/simd/neon/math.h
Lines changed: 6 additions & 6 deletions
diff --git a/‎numpy/_core/tests/test_cpu_features.py
Lines changed: 9 additions & 6 deletions b/‎numpy/_core/tests/test_cpu_features.py
Lines changed: 9 additions & 6 deletions
diff --git a/‎numpy/_core/tests/test_function_base.py
Lines changed: 7 additions & 1 deletion b/‎numpy/_core/tests/test_function_base.py
Lines changed: 7 additions & 1 deletion
@@ -155,6 +155,51 @@ jobs:
       env:
         PYTHONOPTIMIZE: 2
 
+
+  armhf_test:
+    # Tests NumPy on 32-bit ARM hard-float (armhf) via compatibility mode
+    # running on aarch64 (ARM 64-bit) GitHub runners.
+    needs: [smoke_test]
+    if: github.repository == 'numpy/numpy'
+    runs-on: ubuntu-22.04-arm
+    steps:
+    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      with:
+        submodules: recursive
+        fetch-tags: true
+        persist-credentials: false
+    
+    - name: Creates new container
+      run: |
+        docker run --name the_container --interactive \
+          -v $(pwd):/numpy arm32v7/ubuntu:22.04 /bin/linux32 /bin/bash -c "
+          apt update &&
+          apt install -y ninja-build cmake git python3 python-is-python3 python3-dev python3-pip python3-venv &&
+          python -m pip install -r /numpy/requirements/build_requirements.txt &&
+          python -m pip install -r /numpy/requirements/test_requirements.txt
+        "
+        docker commit the_container the_container
+
+    - name: Meson Build
+      run: |
+        docker run --rm -e "TERM=xterm-256color" \
+          -v $(pwd):/numpy the_container \
+          /bin/script -e -q -c "/bin/linux32 /bin/bash --noprofile --norc -eo pipefail -c '
+            cd /numpy && spin build 
+          '"
+
+    - name: Meson Log
+      if: always()
+      run: 'cat build/meson-logs/meson-log.txt'
+
+    - name: Run Tests
+      run: |
+        docker run --rm -e "TERM=xterm-256color" \
+          -v $(pwd):/numpy the_container \
+          /bin/script -e -q -c "/bin/linux32 /bin/bash --noprofile --norc -eo pipefail -c '
+            cd /numpy && spin test -m full -- --timeout=600 --durations=10
+          '"
+
   benchmark:
     needs: [smoke_test]
     runs-on: ubuntu-latest
 
@@ -38,17 +38,6 @@ jobs:
       fail-fast: false
       matrix:
         BUILD_PROP:
-          - [
-              "armhf",
-              "arm-linux-gnueabihf",
-              "arm32v7/ubuntu:22.04",
-              "-Dallow-noblas=true",
-              # test_unary_spurious_fpexception is currently skipped
-              # FIXME(@seiko2plus): Requires confirmation for the following issue:
-              # The presence of an FP invalid exception caused by sqrt. Unsure if this is a qemu bug or not.
-              "(test_kind or test_multiarray or test_simd or test_umath or test_ufunc) and not test_unary_spurious_fpexception",
-              "arm"
-            ]
           - [
               "ppc64le",
               "powerpc64le-linux-gnu",
 
@@ -749,34 +749,33 @@ npy__cpu_init_features_linux(void)
     #endif
     }
 #ifdef __arm__
+    npy__cpu_have[NPY_CPU_FEATURE_NEON]       = (hwcap & NPY__HWCAP_NEON)   != 0;
+    if (npy__cpu_have[NPY_CPU_FEATURE_NEON]) {
+        npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16]  = (hwcap & NPY__HWCAP_HALF) != 0;
+        npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] = (hwcap & NPY__HWCAP_VFPv4) != 0;
+    }
     // Detect Arm8 (aarch32 state)
     if ((hwcap2 & NPY__HWCAP2_AES)  || (hwcap2 & NPY__HWCAP2_SHA1)  ||
         (hwcap2 & NPY__HWCAP2_SHA2) || (hwcap2 & NPY__HWCAP2_PMULL) ||
         (hwcap2 & NPY__HWCAP2_CRC32))
     {
-        hwcap = hwcap2;
+        npy__cpu_have[NPY_CPU_FEATURE_ASIMD] = npy__cpu_have[NPY_CPU_FEATURE_NEON];
+    }
 #else
-    if (1)
-    {
-        if (!(hwcap & (NPY__HWCAP_FP | NPY__HWCAP_ASIMD))) {
-            // Is this could happen? maybe disabled by kernel
-            // BTW this will break the baseline of AARCH64
-            return 1;
-        }
-#endif
-        npy__cpu_have[NPY_CPU_FEATURE_FPHP]       = (hwcap & NPY__HWCAP_FPHP)     != 0;
-        npy__cpu_have[NPY_CPU_FEATURE_ASIMDHP]    = (hwcap & NPY__HWCAP_ASIMDHP)  != 0;
-        npy__cpu_have[NPY_CPU_FEATURE_ASIMDDP]    = (hwcap & NPY__HWCAP_ASIMDDP)  != 0;
-        npy__cpu_have[NPY_CPU_FEATURE_ASIMDFHM]   = (hwcap & NPY__HWCAP_ASIMDFHM) != 0;
-        npy__cpu_have[NPY_CPU_FEATURE_SVE]        = (hwcap & NPY__HWCAP_SVE)      != 0;
-        npy__cpu_init_features_arm8();
-    } else {
-        npy__cpu_have[NPY_CPU_FEATURE_NEON]       = (hwcap & NPY__HWCAP_NEON)   != 0;
-        if (npy__cpu_have[NPY_CPU_FEATURE_NEON]) {
-            npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16]  = (hwcap & NPY__HWCAP_HALF) != 0;
-            npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] = (hwcap & NPY__HWCAP_VFPv4) != 0;
-        }
+    if (!(hwcap & (NPY__HWCAP_FP | NPY__HWCAP_ASIMD))) {
+        // Is this could happen? maybe disabled by kernel
+        // BTW this will break the baseline of AARCH64
+        return 1;
     }
+    npy__cpu_init_features_arm8();
+#endif
+    npy__cpu_have[NPY_CPU_FEATURE_FPHP]       = (hwcap & NPY__HWCAP_FPHP)     != 0;
+    npy__cpu_have[NPY_CPU_FEATURE_ASIMDHP]    = (hwcap & NPY__HWCAP_ASIMDHP)  != 0;
+    npy__cpu_have[NPY_CPU_FEATURE_ASIMDDP]    = (hwcap & NPY__HWCAP_ASIMDDP)  != 0;
+    npy__cpu_have[NPY_CPU_FEATURE_ASIMDFHM]   = (hwcap & NPY__HWCAP_ASIMDFHM) != 0;
+#ifndef __arm__
+    npy__cpu_have[NPY_CPU_FEATURE_SVE]        = (hwcap & NPY__HWCAP_SVE)      != 0;
+#endif
     return 1;
 }
 #endif
 
@@ -36,25 +36,43 @@
 #define NPY__HWCAP  16
 #define NPY__HWCAP2 26
 
-// arch/arm/include/uapi/asm/hwcap.h
-#define NPY__HWCAP_HALF   (1 << 1)
-#define NPY__HWCAP_NEON   (1 << 12)
-#define NPY__HWCAP_VFPv3  (1 << 13)
-#define NPY__HWCAP_VFPv4  (1 << 16)
-#define NPY__HWCAP2_AES   (1 << 0)
-#define NPY__HWCAP2_PMULL (1 << 1)
-#define NPY__HWCAP2_SHA1  (1 << 2)
-#define NPY__HWCAP2_SHA2  (1 << 3)
-#define NPY__HWCAP2_CRC32 (1 << 4)
-// arch/arm64/include/uapi/asm/hwcap.h
-#define NPY__HWCAP_FP       (1 << 0)
-#define NPY__HWCAP_ASIMD    (1 << 1)
-#define NPY__HWCAP_FPHP     (1 << 9)
-#define NPY__HWCAP_ASIMDHP  (1 << 10)
-#define NPY__HWCAP_ASIMDDP  (1 << 20)
-#define NPY__HWCAP_SVE      (1 << 22)
-#define NPY__HWCAP_ASIMDFHM (1 << 23)
-/* 
+#ifdef __arm__
+    // arch/arm/include/uapi/asm/hwcap.h
+    #define NPY__HWCAP_HALF	    (1 << 1)
+    #define NPY__HWCAP_NEON	    (1 << 12)
+    #define NPY__HWCAP_VFPv3	(1 << 13)
+    #define NPY__HWCAP_VFPv4	(1 << 16)
+
+    #define NPY__HWCAP_FPHP	    (1 << 22)
+    #define NPY__HWCAP_ASIMDHP	(1 << 23)
+    #define NPY__HWCAP_ASIMDDP	(1 << 24)
+    #define NPY__HWCAP_ASIMDFHM	(1 << 25)
+
+    #define NPY__HWCAP2_AES	    (1 << 0)
+    #define NPY__HWCAP2_PMULL	(1 << 1)
+    #define NPY__HWCAP2_SHA1	(1 << 2)
+    #define NPY__HWCAP2_SHA2	(1 << 3)
+    #define NPY__HWCAP2_CRC32	(1 << 4)
+#else
+    // arch/arm64/include/uapi/asm/hwcap.h
+    #define NPY__HWCAP_FP		(1 << 0)
+    #define NPY__HWCAP_ASIMD	(1 << 1)
+
+    #define NPY__HWCAP_FPHP		(1 << 9)
+    #define NPY__HWCAP_ASIMDHP	(1 << 10)
+    #define NPY__HWCAP_ASIMDDP	(1 << 20)
+    #define NPY__HWCAP_ASIMDFHM	(1 << 23)
+
+    #define NPY__HWCAP_AES		(1 << 3)
+    #define NPY__HWCAP_PMULL	(1 << 4)
+    #define NPY__HWCAP_SHA1		(1 << 5)
+    #define NPY__HWCAP_SHA2		(1 << 6)
+    #define NPY__HWCAP_CRC32	(1 << 7)
+    #define NPY__HWCAP_SVE		(1 << 22)
+#endif
+
+
+/*
  * Get the size of a file by reading it until the end. This is needed
  * because files under /proc do not always return a valid size when
  * using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed.
@@ -87,7 +105,7 @@ get_file_size(const char* pathname)
     return result;
 }
 
-/* 
+/*
  * Read the content of /proc/cpuinfo into a user-provided buffer.
  * Return the length of the data, or -1 on error. Does *not*
  * zero-terminate the content. Will not read more
@@ -123,7 +141,7 @@ read_file(const char*  pathname, char*  buffer, size_t  buffsize)
     return count;
 }
 
-/* 
+/*
  * Extract the content of a the first occurrence of a given field in
  * the content of /proc/cpuinfo and return it as a heap-allocated
  * string that must be freed by the caller.
@@ -182,7 +200,7 @@ extract_cpuinfo_field(const char* buffer, int buflen, const char* field)
     return result;
 }
 
-/* 
+/*
  * Checks that a space-separated list of items contains one given 'item'.
  * Returns 1 if found, 0 otherwise.
  */
@@ -220,44 +238,51 @@ has_list_item(const char* list, const char* item)
     return 0;
 }
 
-static void setHwcap(char* cpuFeatures, unsigned long* hwcap) {
-    *hwcap |= has_list_item(cpuFeatures, "neon") ? NPY__HWCAP_NEON : 0;
-    *hwcap |= has_list_item(cpuFeatures, "half") ? NPY__HWCAP_HALF : 0;
-    *hwcap |= has_list_item(cpuFeatures, "vfpv3") ? NPY__HWCAP_VFPv3 : 0;
-    *hwcap |= has_list_item(cpuFeatures, "vfpv4") ? NPY__HWCAP_VFPv4 : 0;
-
-    *hwcap |= has_list_item(cpuFeatures, "asimd") ? NPY__HWCAP_ASIMD : 0;
-    *hwcap |= has_list_item(cpuFeatures, "fp") ? NPY__HWCAP_FP : 0;
-    *hwcap |= has_list_item(cpuFeatures, "fphp") ? NPY__HWCAP_FPHP : 0;
-    *hwcap |= has_list_item(cpuFeatures, "asimdhp") ? NPY__HWCAP_ASIMDHP : 0;
-    *hwcap |= has_list_item(cpuFeatures, "asimddp") ? NPY__HWCAP_ASIMDDP : 0;
-    *hwcap |= has_list_item(cpuFeatures, "asimdfhm") ? NPY__HWCAP_ASIMDFHM : 0;
-}
-
 static int
 get_feature_from_proc_cpuinfo(unsigned long *hwcap, unsigned long *hwcap2) {
-    char* cpuinfo = NULL;
-    int cpuinfo_len;
-    cpuinfo_len = get_file_size("/proc/cpuinfo");
+    *hwcap = 0;
+    *hwcap2 = 0;
+
+    int cpuinfo_len = get_file_size("/proc/cpuinfo");
     if (cpuinfo_len < 0) {
         return 0;
     }
-    cpuinfo = malloc(cpuinfo_len);
+    char *cpuinfo = malloc(cpuinfo_len);
     if (cpuinfo == NULL) {
         return 0;
     }
+
     cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len);
-    char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
-    if(cpuFeatures == NULL) {
+    char *cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
+    if (cpuFeatures == NULL) {
+        free(cpuinfo);
         return 0;
     }
-    setHwcap(cpuFeatures, hwcap);
-    *hwcap2 |= *hwcap;
+    *hwcap |= has_list_item(cpuFeatures, "fphp") ? NPY__HWCAP_FPHP : 0;
+    *hwcap |= has_list_item(cpuFeatures, "asimdhp") ? NPY__HWCAP_ASIMDHP : 0;
+    *hwcap |= has_list_item(cpuFeatures, "asimddp") ? NPY__HWCAP_ASIMDDP : 0;
+    *hwcap |= has_list_item(cpuFeatures, "asimdfhm") ? NPY__HWCAP_ASIMDFHM : 0;
+#ifdef __arm__
+    *hwcap |= has_list_item(cpuFeatures, "neon") ? NPY__HWCAP_NEON : 0;
+    *hwcap |= has_list_item(cpuFeatures, "half") ? NPY__HWCAP_HALF : 0;
+    *hwcap |= has_list_item(cpuFeatures, "vfpv3") ? NPY__HWCAP_VFPv3 : 0;
+    *hwcap |= has_list_item(cpuFeatures, "vfpv4") ? NPY__HWCAP_VFPv4 : 0;
     *hwcap2 |= has_list_item(cpuFeatures, "aes") ? NPY__HWCAP2_AES : 0;
     *hwcap2 |= has_list_item(cpuFeatures, "pmull") ? NPY__HWCAP2_PMULL : 0;
     *hwcap2 |= has_list_item(cpuFeatures, "sha1") ? NPY__HWCAP2_SHA1 : 0;
     *hwcap2 |= has_list_item(cpuFeatures, "sha2") ? NPY__HWCAP2_SHA2 : 0;
     *hwcap2 |= has_list_item(cpuFeatures, "crc32") ? NPY__HWCAP2_CRC32 : 0;
+#else
+    *hwcap |= has_list_item(cpuFeatures, "asimd") ? NPY__HWCAP_ASIMD : 0;
+    *hwcap |= has_list_item(cpuFeatures, "fp") ? NPY__HWCAP_FP : 0;
+    *hwcap |= has_list_item(cpuFeatures, "aes") ? NPY__HWCAP_AES : 0;
+    *hwcap |= has_list_item(cpuFeatures, "pmull") ? NPY__HWCAP_PMULL : 0;
+    *hwcap |= has_list_item(cpuFeatures, "sha1") ? NPY__HWCAP_SHA1 : 0;
+    *hwcap |= has_list_item(cpuFeatures, "sha2") ? NPY__HWCAP_SHA2 : 0;
+    *hwcap |= has_list_item(cpuFeatures, "crc32") ? NPY__HWCAP_CRC32 : 0;
+#endif
+    free(cpuinfo);
+    free(cpuFeatures);
     return 1;
 }
 #endif  /* NUMPY_CORE_SRC_COMMON_NPY_CPUINFO_PARSER_H_ */
@@ -28,11 +28,13 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
     // Based on ARM doc, see https://developer.arm.com/documentation/dui0204/j/CIHDIACI
     NPY_FINLINE npyv_f32 npyv_sqrt_f32(npyv_f32 a)
     {
+        const npyv_f32 one = vdupq_n_f32(1.0f);
         const npyv_f32 zero = vdupq_n_f32(0.0f);
         const npyv_u32 pinf = vdupq_n_u32(0x7f800000);
         npyv_u32 is_zero = vceqq_f32(a, zero), is_inf = vceqq_u32(vreinterpretq_u32_f32(a), pinf);
-        // guard against floating-point division-by-zero error
-        npyv_f32 guard_byz = vbslq_f32(is_zero, vreinterpretq_f32_u32(pinf), a);
+        npyv_u32 is_special = vorrq_u32(is_zero, is_inf);
+        // guard against division-by-zero and infinity input to vrsqrte to avoid invalid fp error
+        npyv_f32 guard_byz = vbslq_f32(is_special, one, a);
         // estimate to (1/√a)
         npyv_f32 rsqrte = vrsqrteq_f32(guard_byz);
         /**
@@ -47,10 +49,8 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
         rsqrte = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a, rsqrte), rsqrte), rsqrte);
         // a * (1/√a)
         npyv_f32 sqrt = vmulq_f32(a, rsqrte);
-        // return zero if the a is zero
-        // - return zero if a is zero.
-        // - return positive infinity if a is positive infinity
-        return vbslq_f32(vorrq_u32(is_zero, is_inf), a, sqrt);
+        // Handle special cases: return a for zeros and positive infinities
+        return vbslq_f32(is_special, a, sqrt);
     }
 #endif // NPY_SIMD_F64
 
 
@@ -401,12 +401,15 @@ class Test_ARM_Features(AbstractTest):
     def load_flags(self):
         self.load_flags_cpuinfo("Features")
         arch = self.get_cpuinfo_item("CPU architecture")
-        # in case of mounting virtual filesystem of aarch64 kernel
-        is_rootfs_v8 = int('0'+next(iter(arch))) > 7 if arch else 0
-        if  re.match("^(aarch64|AARCH64)", machine) or is_rootfs_v8:
-            self.features_map = dict(
-                NEON="ASIMD", HALF="ASIMD", VFPV4="ASIMD"
-            )
+        # in case of mounting virtual filesystem of aarch64 kernel without linux32
+        is_rootfs_v8 = (
+            not re.match("^armv[0-9]+l$", machine) and
+            (int('0' + next(iter(arch))) > 7 if arch else 0)
+        )
+        if re.match("^(aarch64|AARCH64)", machine) or is_rootfs_v8:
+            self.features_map = {
+                "NEON": "ASIMD", "HALF": "ASIMD", "VFPV4": "ASIMD"
+            }
         else:
             self.features_map = dict(
                 # ELF auxiliary vector and /proc/cpuinfo on Linux kernel(armv8 aarch32)
 
@@ -1,5 +1,5 @@
 import sys
-
+import platform
 import pytest
 
 import numpy as np
@@ -14,6 +14,9 @@
     IS_PYPY
     )
 
+def _is_armhf():
+    # Check if the current platform is ARMHF (32-bit ARM architecture)
+    return platform.machine().startswith('arm') and platform.architecture()[0] == '32bit'
 
 class PhysicalQuantity(float):
     def __new__(cls, value):
@@ -415,6 +418,9 @@ def __mul__(self, other):
 
         assert_equal(linspace(one, five), linspace(1, 5))
 
+    # even when not explicitly enabled via FPSCR register
+    @pytest.mark.xfail(_is_armhf(),
+                       reason="ARMHF/AArch32 platforms seem to FTZ subnormals")
     def test_denormal_numbers(self):
         # Regression test for gh-5437. Will probably fail when compiled
         # with ICC, which flushes denormals to zero