8000 MAINT: Further fixups to uint alignment checks · numpy/numpy@f54f39f · GitHub
[go: up one dir, main page]

Skip to content

Commit f54f39f

Browse files
committed
MAINT: Further fixups to uint alignment checks
1 parent aef982e commit f54f39f

File tree

5 files changed

+74
-21
lines changed

5 files changed

+74
-21
lines changed

doc/source/reference/alignment.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ Here is how the variables above are used:
9797

9898
Note that the strided-copy and strided-cast code are deeply intertwined and so
9999
any arrays being processed by them must be both uint and true aligned, even
100-
though te copy-code only needs uint alignment and the cast code only true
100+
though the copy-code only needs uint alignment and the cast code only true
101101
alignment. If there is ever a big rewrite of this code it would be good to
102102
allow them to use different alignments.
103103

numpy/core/src/multiarray/array_assign_array.c

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,38 @@
2424

2525
#include "array_assign.h"
2626

27+
/* Check both uint and true alignment */
28+
NPY_NO_EXPORT int
29+
copycast_isaligned(int ndim, npy_intp *shape,
30+
PyArray_Descr *dtype, char *data, npy_intp *strides)
31+
{
32+
int aligned;
33+
int big_aln, small_aln;
34+
35+
int uint_aln = npy_uint_alignment(dtype->elsize);
36+
int true_aln = dtype->alignment;
37+
38+
/* uint alignment can be 0, meaning not uint alignable */
39+
if (uint_aln == 0) {
40+
return 0;
41+
}
42+
43+
if (true_aln >= uint_aln) {
44+
big_aln = true_aln;
45+
small_aln = uint_aln;
46+
}
47+
else {
48+
big_aln = uint_aln;
49+
small_aln = true_aln;
50+
}
51+
52+
aligned = raw_array_is_aligned(ndim, shape, data, strides, big_aln);
53+
if (aligned && big_aln % small_aln != 0) {
54+
aligned = raw_array_is_aligned(ndim, shape, data, strides, small_aln);
55+
}
56+
return aligned;
57+
}
58+
2759
/*
2860
* Assigns the array from 'src' to 'dst'. The strides must already have
2961
* been broadcast.
@@ -48,15 +80,9 @@ raw_array_assign_array(int ndim, npy_intp *shape,
4880

4981
NPY_BEGIN_THREADS_DEF;
5082

51-
/* Check both uint and true alignment */
52-
aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
53-
npy_uint_alignment(dst_dtype->elsize)) &&
54-
raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
55-
dst_dtype->alignment) &&
56-
raw_array_is_aligned(ndim, shape, src_data, src_strides,
57-
npy_uint_alignment(src_dtype->elsize));
58< 8000 code>-
raw_array_is_aligned(ndim, shape, src_data, src_strides,
59-
src_dtype->alignment);
83+
aligned =
84+
copycast_isaligned(ndim, shape, dst_dtype, dst_data, dst_strides) &&
85+
copycast_isaligned(ndim, shape, src_dtype, src_data, src_strides);
6086

6187
/* Use raw iteration with no heap allocation */
6288
if (PyArray_PrepareTwoRawArrayIter(
@@ -137,15 +163,9 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp *shape,
137163

138164
NPY_BEGIN_THREADS_DEF;
139165

140-
/* Check both uint and true alignment */
141-
aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
142-
npy_uint_alignment(dst_dtype->elsize)) &&
143-
raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
144-
dst_dtype->alignment) &&
145-
raw_array_is_aligned(ndim, shape, src_data, src_strides,
146-
npy_uint_alignment(src_dtype->elsize));
147-
raw_array_is_aligned(ndim, shape, src_data, src_strides,
148-
src_dtype->alignment);
166+
aligned =
167+
copycast_isaligned(ndim, shape, dst_dtype, dst_data, dst_strides) &&
168+
copycast_isaligned(ndim, shape, src_dtype, src_data, src_strides);
149169

150170
/* Use raw iteration with no heap allocation */
151171
if (PyArray_PrepareThreeRawArrayIter(

numpy/core/src/multiarray/nditer_constr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1132,7 +1132,7 @@ npyiter_prepare_one_operand(PyArrayObject **op,
11321132
/* Check if the operand is aligned */
11331133
if (op_flags & NPY_ITER_ALIGNED) {
11341134
/* Check alignment */
1135-
if (!(IsUintAligned(*op) && IsAligned(*op))) {
1135+
if (!IsAligned(*op)) {
11361136
NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
11371137
"because of NPY_ITER_ALIGNED\n");
11381138
*op_itflags |= NPY_OP_ITFLAG_CAST;

numpy/core/src/umath/ufunc_object.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3066,7 +3066,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
30663066
PyArray_free(remap_axis_memory);
30673067
PyArray_free(remap_axis);
30683068

3069-
NPY_UF_DBG_PRINT1("Returning code %d\n", reval);
3069+
NPY_UF_DBG_PRINT1("Returning code %d\n", retval);
30703070

30713071
return retval;
30723072

numpy/core/tests/test_multiarray.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8020,6 +8020,39 @@ def test_various_alignments(self):
80208020
for shape in [n, (1, 2, 3, n)]:
80218021
self.check(shape, np.dtype(dtype), order, align)
80228022

8023+
def test_strided_loop_alignments(self):
8024+
# particularly test that complex64 and float128 use right alignment
8025+
# code-paths, since these are particularly problematic. It is useful to
8026+
# turn on USE_DEBUG for this test, so lowlevel-loop asserts are run.
8027+
for align in [1, 2, 4, 8, 12, 16, None]:
8028+
xf64 = _aligned_zeros(3, np.float64)
8029+
8030+
xc64 = _aligned_zeros(3, np.complex64, align=align)
8031+
xf128 = _aligned_zeros(3, np.longdouble, align=align)
8032+
8033+
# test casting, both to and from misaligned
8034+
with suppress_warnings() as sup:
8035+
sup.filter(np.ComplexWarning, "Casting complex values")
8036+
xc64.astype('f8')
8037+
xf64.astype(np.complex64)
8038+
test = xc64 + xf64
8039+
8040+
xf128.astype('f8')
8041+
xf64.astype(np.longdouble)
8042+
test = xf128 + xf64
8043+
8044+
test = xf128 + xc64
8045+
8046+
# test copy, both to and from misaligned
8047+
# contig copy
8048+
xf64[:] = xf64.copy()
8049+
xc64[:] = xc64.copy()
8050+
xf128[:] = xf128.copy()
8051+
# strided copy
8052+
xf64[::2] = xf64[::2].copy()
8053+
xc64[::2] = xc64[::2].copy()
8054+
xf128[::2] = xf128[::2].copy()
8055+
80238056
def test_getfield():
80248057
a = np.arange(32, dtype='uint16')
80258058
if sys.byteorder == 'little':

0 commit comments

Comments
 (0)
0