8000 MAINT: CPUs that support unaligned access. (#18065) · numpy/numpy@da887a6 · GitHub
[go: up one dir, main page]

Skip to content

Commit da887a6

Browse files
authored
MAINT: CPUs that support unaligned access. (#18065)
* add CPUs that support unaligned access. * add comments demonstrate the common scenoirs of unaligned access.
1 parent 444f696 commit da887a6

File tree

6 files changed

+14
-16
lines changed

6 files changed

+14
-16
lines changed

numpy/core/include/numpy/npy_common.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,6 @@
1010
#include <npy_config.h>
1111
#endif
1212

13-
// int*, int64* should be propertly aligned on ARMv7 to avoid bus error
14-
#if !defined(NPY_STRONG_ALIGNMENT) && defined(__arm__) && !(defined(__aarch64__) || defined(_M_ARM64))
15-
#define NPY_STRONG_ALIGNMENT 1
16-
#endif
17-
#if !defined(NPY_STRONG_ALIGNMENT)
18-
#define NPY_STRONG_ALIGNMENT 0
19-
#endif
20-
2113
// compile time environment variables
2214
#ifndef NPY_RELAXED_STRIDES_CHECKING
2315
#define NPY_RELAXED_STRIDES_CHECKING 0

numpy/core/include/numpy/npy_cpu.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,16 @@
110110
information about your platform (OS, CPU and compiler)
111111
#endif
112112

113-
#if (defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64))
114-
#define NPY_CPU_HAVE_UNALIGNED_ACCESS 1
115-
#else
116-
#define NPY_CPU_HAVE_UNALIGNED_ACCESS 0
113+
/*
114+
* Except for the following architectures, memory access is limited to the natural
115+
* alignment of data types otherwise it may lead to bus error or performance regression.
116+
* For more details about unaligned access, see https://www.kernel.org/doc/Documentation/unaligned-memory-access.txt.
117+
*/
118+
#if defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64) || defined(__aarch64__) || defined(__powerpc64__)
119+
#define NPY_ALIGNMENT_REQUIRED 0
120+
#endif
121+
#ifndef NPY_ALIGNMENT_REQUIRED
122+
#define NPY_ALIGNMENT_REQUIRED 1
117123
#endif
118124

119125
#endif

numpy/core/src/multiarray/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ npy_memchr(char * haystack, char needle,
267267
}
268268
else {
269269
/* usually find elements to skip path */
270-
if (NPY_CPU_HAVE_UNALIGNED_ACCESS && needle == 0 && stride == 1) {
270+
if (!NPY_ALIGNMENT_REQUIRED && needle == 0 && stride == 1) {
271271
/* iterate until last multiple of 4 */
272272
char * block_end = haystack + size - (size % sizeof(unsigned int));
273273
while (p < block_end) {

numpy/core/src/multiarray/compiled_base.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1521,7 +1521,7 @@ pack_inner(const char *inptr,
15211521
bb[2] = npyv_tobits_b8(npyv_cmpneq_u8(v2, v_zero));
15221522
bb[3] = npyv_tobits_b8(npyv_cmpneq_u8(v3, v_zero));
15231523
if(out_stride == 1 &&
1524-
(!NPY_STRONG_ALIGNMENT || isAligned)) {
1524+
(!NPY_ALIGNMENT_REQUIRED || isAligned)) {
15251525
npy_uint64 *ptr64 = (npy_uint64*)outptr;
15261526
#if NPY_SIMD_WIDTH == 16
15271527
npy_uint64 bcomp = bb[0] | (bb[1] << 16) | (bb[2] << 32) | (bb[3] << 48);

numpy/core/src/multiarray/item_selection.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2245,7 +2245,7 @@ count_boolean_trues(int ndim, char *data, npy_intp const *ashape, npy_intp const
22452245
count += count_nonzero_bytes((const npy_uint8 *)d, stride);
22462246
d += stride;
22472247
#else
2248-
if (NPY_CPU_HAVE_UNALIGNED_ACCESS ||
2248+
if (!NPY_ALIGNMENT_REQUIRED ||
22492249
npy_is_aligned(d, sizeof(npy_uint64))) {
22502250
npy_uintp stride = 6 * sizeof(npy_uint64);
22512251
for (; d < e - (shape[0] % stride); d += stride) {

numpy/core/src/multiarray/lowlevel_strided_loops.c.src

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
* instructions (16 byte).
3030
* So this flag can only be enabled if autovectorization is disabled.
3131
*/
32-
#if NPY_CPU_HAVE_UNALIGNED_ACCESS
32+
#if NPY_ALIGNMENT_REQUIRED
3333
# define NPY_USE_UNALIGNED_ACCESS 0
3434
#else
3535
# define NPY_USE_UNALIGNED_ACCESS 0

0 commit comments

Comments
 (0)
0