diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h index d5a586c56a39..c8495db8e58f 100644 --- a/numpy/core/include/numpy/npy_common.h +++ b/numpy/core/include/numpy/npy_common.h @@ -10,14 +10,6 @@ #include #endif -// int*, int64* should be propertly aligned on ARMv7 to avoid bus error -#if !defined(NPY_STRONG_ALIGNMENT) && defined(__arm__) && !(defined(__aarch64__) || defined(_M_ARM64)) -#define NPY_STRONG_ALIGNMENT 1 -#endif -#if !defined(NPY_STRONG_ALIGNMENT) -#define NPY_STRONG_ALIGNMENT 0 -#endif - // compile time environment variables #ifndef NPY_RELAXED_STRIDES_CHECKING #define NPY_RELAXED_STRIDES_CHECKING 0 diff --git a/numpy/core/include/numpy/npy_cpu.h b/numpy/core/include/numpy/npy_cpu.h index 4dbf9d84e384..065176ac5fb6 100644 --- a/numpy/core/include/numpy/npy_cpu.h +++ b/numpy/core/include/numpy/npy_cpu.h @@ -110,10 +110,16 @@ information about your platform (OS, CPU and compiler) #endif -#if (defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64)) -#define NPY_CPU_HAVE_UNALIGNED_ACCESS 1 -#else -#define NPY_CPU_HAVE_UNALIGNED_ACCESS 0 +/* + * Except for the following architectures, memory access is limited to the natural + * alignment of data types otherwise it may lead to bus error or performance regression. + * For more details about unaligned access, see https://www.kernel.org/doc/Documentation/unaligned-memory-access.txt. +*/ +#if defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64) || defined(__aarch64__) || defined(__powerpc64__) + #define NPY_ALIGNMENT_REQUIRED 0 +#endif +#ifndef NPY_ALIGNMENT_REQUIRED + #define NPY_ALIGNMENT_REQUIRED 1 #endif #endif diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h index ef9bc79da325..2f2e7e25bea2 100644 --- a/numpy/core/src/multiarray/common.h +++ b/numpy/core/src/multiarray/common.h @@ -267,7 +267,7 @@ npy_memchr(char * haystack, char needle, } else { /* usually find elements to skip path */ - if (NPY_CPU_HAVE_UNALIGNED_ACCESS && needle == 0 && stride == 1) { + if (!NPY_ALIGNMENT_REQUIRED && needle == 0 && stride == 1) { /* iterate until last multiple of 4 */ char * block_end = haystack + size - (size % sizeof(unsigned int)); while (p < block_end) { diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c index 6ae4dda6bba9..fa5d7db75e88 100644 --- a/numpy/core/src/multiarray/compiled_base.c +++ b/numpy/core/src/multiarray/compiled_base.c @@ -1521,7 +1521,7 @@ pack_inner(const char *inptr, bb[2] = npyv_tobits_b8(npyv_cmpneq_u8(v2, v_zero)); bb[3] = npyv_tobits_b8(npyv_cmpneq_u8(v3, v_zero)); if(out_stride == 1 && - (!NPY_STRONG_ALIGNMENT || isAligned)) { + (!NPY_ALIGNMENT_REQUIRED || isAligned)) { npy_uint64 *ptr64 = (npy_uint64*)outptr; #if NPY_SIMD_WIDTH == 16 npy_uint64 bcomp = bb[0] | (bb[1] << 16) | (bb[2] << 32) | (bb[3] << 48); diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 77fff5eb47c3..8e4b2ebe120e 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -2245,7 +2245,7 @@ count_boolean_trues(int ndim, char *data, npy_intp const *ashape, npy_intp const count += count_nonzero_bytes((const npy_uint8 *)d, stride); d += stride; #else - if (NPY_CPU_HAVE_UNALIGNED_ACCESS || + if (!NPY_ALIGNMENT_REQUIRED || npy_is_aligned(d, sizeof(npy_uint64))) { npy_uintp stride = 6 * sizeof(npy_uint64); for (; d < e - (shape[0] % stride); d += stride) { diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src index 0590558be2f5..b8ebee6ed96b 100644 --- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src +++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src @@ -29,7 +29,7 @@ * instructions (16 byte). * So this flag can only be enabled if autovectorization is disabled. */ -#if NPY_CPU_HAVE_UNALIGNED_ACCESS +#if NPY_ALIGNMENT_REQUIRED # define NPY_USE_UNALIGNED_ACCESS 0 #else # define NPY_USE_UNALIGNED_ACCESS 0