numpy · mattip · Jan 5, 2021 · Dec 24, 2020 · Dec 25, 2020 · Dec 28, 2020
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h
@@ -10,14 +10,6 @@
 #include <npy_config.h>
 #endif
 
-// int*, int64* should be propertly aligned on ARMv7 to avoid bus error
-#if !defined(NPY_STRONG_ALIGNMENT) && defined(__arm__) && !(defined(__aarch64__) || defined(_M_ARM64))
-#define NPY_STRONG_ALIGNMENT 1
-#endif
-#if !defined(NPY_STRONG_ALIGNMENT)
-#define NPY_STRONG_ALIGNMENT 0
-#endif
-
 // compile time environment variables
 #ifndef NPY_RELAXED_STRIDES_CHECKING
     #define NPY_RELAXED_STRIDES_CHECKING 0

diff --git a/numpy/core/include/numpy/npy_cpu.h b/numpy/core/include/numpy/npy_cpu.h
@@ -110,10 +110,16 @@
     information about your platform (OS, CPU and compiler)
 #endif
 
-#if (defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64))
-#define NPY_CPU_HAVE_UNALIGNED_ACCESS 1
-#else
-#define NPY_CPU_HAVE_UNALIGNED_ACCESS 0
+/* 
+ * Except for the following architectures, memory access is limited to the natural
+ * alignment of data types otherwise it may lead to bus error or performance regression.
+ * For more details about unaligned access, see https://www.kernel.org/doc/Documentation/unaligned-memory-access.txt.
+*/
+#if defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64) || defined(__aarch64__) || defined(__powerpc64__)
+    #define NPY_ALIGNMENT_REQUIRED 0
+#endif
+#ifndef NPY_ALIGNMENT_REQUIRED
+    #define NPY_ALIGNMENT_REQUIRED 1
 #endif
 
 #endif
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
@@ -267,7 +267,7 @@ npy_memchr(char * haystack, char needle,
     }
     else {
         /* usually find elements to skip path */
-        if (NPY_CPU_HAVE_UNALIGNED_ACCESS && needle == 0 && stride == 1) {
+        if (!NPY_ALIGNMENT_REQUIRED && needle == 0 && stride == 1) {
             /* iterate until last multiple of 4 */
             char * block_end = haystack + size - (size % sizeof(unsigned int));
             while (p < block_end) {

diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c
@@ -1521,7 +1521,7 @@ pack_inner(const char *inptr,
             bb[2] = npyv_tobits_b8(npyv_cmpneq_u8(v2, v_zero));
             bb[3] = npyv_tobits_b8(npyv_cmpneq_u8(v3, v_zero));
             if(out_stride == 1 && 
-                (!NPY_STRONG_ALIGNMENT || isAligned)) {
+                (!NPY_ALIGNMENT_REQUIRED || isAligned)) {
                 npy_uint64 *ptr64 = (npy_uint64*)outptr;
             #if NPY_SIMD_WIDTH == 16
                 npy_uint64 bcomp = bb[0] | (bb[1] << 16) | (bb[2] << 32) | (bb[3] << 48);

diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
@@ -2245,7 +2245,7 @@ count_boolean_trues(int ndim, char *data, npy_intp const *ashape, npy_intp const
             count += count_nonzero_bytes((const npy_uint8 *)d, stride);
             d += stride;
 #else
-            if (NPY_CPU_HAVE_UNALIGNED_ACCESS ||
+            if (!NPY_ALIGNMENT_REQUIRED ||
                     npy_is_aligned(d, sizeof(npy_uint64))) {
                 npy_uintp stride = 6 * sizeof(npy_uint64);
                 for (; d < e - (shape[0] % stride); d += stride) {

diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -29,7 +29,7 @@
  * instructions (16 byte).
  * So this flag can only be enabled if autovectorization is disabled.
  */
-#if NPY_CPU_HAVE_UNALIGNED_ACCESS
+#if NPY_ALIGNMENT_REQUIRED
 #  define NPY_USE_UNALIGNED_ACCESS 0
 #else
 #  define NPY_USE_UNALIGNED_ACCESS 0