8000 MAINT: Further fixups to uint alignment checks by charris · Pull Request #12706 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

MAINT: Further fixups to uint alignment checks #12706

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/reference/alignment.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ Here is how the variables above are used:

Note that the strided-copy and strided-cast code are deeply intertwined and so
any arrays being processed by them must be both uint and true aligned, even
though te copy-code only needs uint alignment and the cast code only true
though the copy-code only needs uint alignment and the cast code only true
alignment. If there is ever a big rewrite of this code it would be good to
allow them to use different alignments.

Expand Down
56 changes: 38 additions & 18 deletions numpy/core/src/multiarray/array_assign_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,38 @@

#include "array_assign.h"

/* Check both uint and true alignment */
NPY_NO_EXPORT int
copycast_isaligned(int ndim, npy_intp *shape,
PyArray_Descr *dtype, char *data, npy_intp *strides)
{
int aligned;
int big_aln, small_aln;

int uint_aln = npy_uint_alignment(dtype->elsize);
int true_aln = dtype->alignment;

/* uint alignment can be 0, meaning not uint alignable */
if (uint_aln == 0) {
return 0;
}

if (true_aln >= uint_aln) {
big_aln = true_aln;
small_aln = uint_aln;
}
else {
big_aln = uint_aln;
small_aln = true_aln;
}

aligned = raw_array_is_aligned(ndim, shape, data, strides, big_aln);
if (aligned && big_aln % small_aln != 0) {
aligned = raw_array_is_aligned(ndim, shape, data, strides, small_aln);
}
return aligned;
}

/*
* Assigns the array from 'src' to 'dst'. The strides must already have
* been broadcast.
Expand All @@ -48,15 +80,9 @@ raw_array_assign_array(int ndim, npy_intp *shape,

NPY_BEGIN_THREADS_DEF;

/* Check both uint and true alignment */
aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
npy_uint_alignment(dst_dtype->elsize)) &&
raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
dst_dtype->alignment) &&
raw_array_is_aligned(ndim, shape, src_data, src_strides,
npy_uint_alignment(src_dtype->elsize));
raw_array_is_aligned(ndim, shape, src_data, src_strides,
src_dtype->alignment);
aligned =
copycast_isaligned(ndim, shape, dst_dtype, dst_data, dst_strides) &&
copycast_isaligned(ndim, shape, src_dtype, src_data, src_strides);

/* Use raw iteration with no heap allocation */
if (PyArray_PrepareTwoRawArrayIter(
Expand Down Expand Up @@ -137,15 +163,9 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp *shape,

NPY_BEGIN_THREADS_DEF;

/* Check both uint and true alignment */
aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
npy_uint_alignment(dst_dtype->elsize)) &&
raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
dst_dtype->alignment) &&
raw_array_is_aligned(ndim, shape, src_data, src_strides,
npy_uint_alignment(src_dtype->elsize));
raw_array_is_aligned(ndim, shape, src_data, src_strides,
src_dtype->alignment);
aligned =
copycast_isaligned(ndim, shape, dst_dtype, dst_data, dst_strides) &&
copycast_isaligned(ndim, shape, src_dtype, src_data, src_strides);

/* Use raw iteration with no heap allocation */
if (PyArray_PrepareThreeRawArrayIter(
Expand Down
2 changes: 1 addition & 1 deletion numpy/core/src/multiarray/nditer_constr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1132,7 +1132,7 @@ npyiter_prepare_one_operand(PyArrayObject **op,
/* Check if the operand is aligned */
if (op_flags & NPY_ITER_ALIGNED) {
/* Check alignment */
if (!(IsUintAligned(*op) && IsAligned(*op))) {
if (!IsAligned(*op)) {
NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
"because of NPY_ITER_ALIGNED\n");
*op_itflags |= NPY_OP_ITFLAG_CAST;
Expand Down
2 changes: 1 addition & 1 deletion numpy/core/src/umath/ufunc_object.c
Original file line number Diff line number Diff line change
Expand Up @@ -3064,7 +3064,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
Py_XDECREF(full_args.in);
Py_XDECREF(full_args.out);

NPY_UF_DBG_PRINT1("Returning code %d\n", reval);
NPY_UF_DBG_PRINT1("Returning code %d\n", retval);

return retval;

Expand Down
33 changes: 33 additions & 0 deletions numpy/core/tests/test_multiarray.py
A20B
Original file line number Diff line numberDiff line change
Expand Up @@ -8020,6 +8020,39 @@ def test_various_alignments(self):
for shape in [n, (1, 2, 3, n)]:
self.check(shape, np.dtype(dtype), order, align)

def test_strided_loop_alignments(self):
# particularly test that complex64 and float128 use right alignment
# code-paths, since these are particularly problematic. It is useful to
# turn on USE_DEBUG for this test, so lowlevel-loop asserts are run.
for align in [1, 2, 4, 8, 12, 16, None]:
xf64 = _aligned_zeros(3, np.float64)

xc64 = _aligned_zeros(3, np.complex64, align=align)
xf128 = _aligned_zeros(3, np.longdouble, align=align)

# test casting, both to and from misaligned
with suppress_warnings() as sup:
sup.filter(np.ComplexWarning, "Casting complex values")
xc64.astype('f8')
xf64.astype(np.complex64)
test = xc64 + xf64

xf128.astype('f8')
xf64.astype(np.longdouble)
test = xf128 + xf64

test = xf128 + xc64

# test copy, both to and from misaligned
# contig copy
xf64[:] = xf64.copy()
xc64[:] = xc64.copy()
xf128[:] = xf128.copy()
# strided copy
xf64[::2] = xf64[::2].copy()
xc64[::2] = xc64[::2].copy()
xf128[::2] = xf128[::2].copy()

def test_getfield():
a = np.arange(32, dtype='uint16')
if sys.byteorder == 'little':
Expand Down
0