numpy
diff --git a/‎doc/source/reference/alignment.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/source/reference/alignment.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎numpy/core/src/multiarray/array_assign_array.c‎
Lines changed: 38 additions & 18 deletions b/‎numpy/core/src/multiarray/array_assign_array.c‎
Lines changed: 38 additions & 18 deletions
diff --git a/‎numpy/core/src/multiarray/nditer_constr.c‎
Lines changed: 1 addition & 1 deletion b/‎numpy/core/src/multiarray/nditer_constr.c‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎numpy/core/src/umath/ufunc_object.c‎
Lines changed: 1 addition & 1 deletion b/‎numpy/core/src/umath/ufunc_object.c‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎numpy/core/tests/test_multiarray.py‎
Lines changed: 33 additions & 0 deletions b/‎numpy/core/tests/test_multiarray.py‎
Lines changed: 33 additions & 0 deletions
@@ -97,7 +97,7 @@ Here is how the variables above are used:
 
 Note that the strided-copy and strided-cast code are deeply intertwined and so
 any arrays being processed by them must be both uint and true aligned, even
-though te copy-code only needs uint alignment and the cast code only true
+though the copy-code only needs uint alignment and the cast code only true
 alignment.  If there is ever a big rewrite of this code it would be good to
 allow them to use different alignments.
 
 
@@ -24,6 +24,38 @@
 
 #include "array_assign.h"
 
+/* Check both uint and true alignment */
+NPY_NO_EXPORT int
+copycast_isaligned(int ndim, npy_intp *shape,
+        PyArray_Descr *dtype, char *data, npy_intp *strides)
+{
+    int aligned;
+    int big_aln, small_aln;
+
+    int uint_aln = npy_uint_alignment(dtype->elsize);
+    int true_aln = dtype->alignment;
+
+    /* uint alignment can be 0, meaning not uint alignable */
+    if (uint_aln == 0) {
+        return 0;
+    }
+
+    if (true_aln >= uint_aln) {
+        big_aln = true_aln;
+        small_aln = uint_aln;
+    }
+    else {
+        big_aln = uint_aln;
+        small_aln = true_aln;
+    }
+
+    aligned = raw_array_is_aligned(ndim, shape, data, strides, big_aln);
+    if (aligned && big_aln % small_aln != 0) {
+        aligned = raw_array_is_aligned(ndim, shape, data, strides, small_aln);
+    }
+    return aligned;
+}
+
 /*
  * Assigns the array from 'src' to 'dst'. The strides must already have
  * been broadcast.
@@ -48,15 +80,9 @@ raw_array_assign_array(int ndim, npy_intp *shape,
 
     NPY_BEGIN_THREADS_DEF;
 
-    /* Check both uint and true alignment */
-    aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
-                                   npy_uint_alignment(dst_dtype->elsize)) &&
-              raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
-                                   dst_dtype->alignment) &&
-              raw_array_is_aligned(ndim, shape, src_data, src_strides,
-                                   npy_uint_alignment(src_dtype->elsize));
-              raw_array_is_aligned(ndim, shape, src_data, src_strides,
-                                   src_dtype->alignment);
+    aligned =
+        copycast_isaligned(ndim, shape, dst_dtype, dst_data, dst_strides) &&
+        copycast_isaligned(ndim, shape, src_dtype, src_data, src_strides);
 
     /* Use raw iteration with no heap allocation */
     if (PyArray_PrepareTwoRawArrayIter(
@@ -137,15 +163,9 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp *shape,
 
     NPY_BEGIN_THREADS_DEF;
 
-    /* Check both uint and true alignment */
-    aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
-                                   npy_uint_alignment(dst_dtype->elsize)) &&
-              raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
-                                   dst_dtype->alignment) &&
-              raw_array_is_aligned(ndim, shape, src_data, src_strides,
-                                   npy_uint_alignment(src_dtype->elsize));
-              raw_array_is_aligned(ndim, shape, src_data, src_strides,
-                                   src_dtype->alignment);
+    aligned =
+        copycast_isaligned(ndim, shape, dst_dtype, dst_data, dst_strides) &&
+        copycast_isaligned(ndim, shape, src_dtype, src_data, src_strides);
 
     /* Use raw iteration with no heap allocation */
     if (PyArray_PrepareThreeRawArrayIter(
 
@@ -1132,7 +1132,7 @@ npyiter_prepare_one_operand(PyArrayObject **op,
         /* Check if the operand is aligned */
         if (op_flags & NPY_ITER_ALIGNED) {
             /* Check alignment */
-            if (!(IsUintAligned(*op) && IsAligned(*op))) {
+            if (!IsAligned(*op)) {
                 NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
                                     "because of NPY_ITER_ALIGNED\n");
                 *op_itflags |= NPY_OP_ITFLAG_CAST;
 
@@ -3066,7 +3066,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     PyArray_free(remap_axis_memory);
     PyArray_free(remap_axis);
 
-    NPY_UF_DBG_PRINT1("Returning code %d\n", reval);
+    NPY_UF_DBG_PRINT1("Returning code %d\n", retval);
 
     return retval;
 
 
@@ -8020,6 +8020,39 @@ def test_various_alignments(self):
                         for shape in [n, (1, 2, 3, n)]:
                             self.check(shape, np.dtype(dtype), order, align)
 
+    def test_strided_loop_alignments(self):
+        # particularly test that complex64 and float128 use right alignment
+        # code-paths, since these are particularly problematic. It is useful to
+        # turn on USE_DEBUG for this test, so lowlevel-loop asserts are run.
+        for align in [1, 2, 4, 8, 12, 16, None]:
+            xf64 = _aligned_zeros(3, np.float64)
+
+            xc64 = _aligned_zeros(3, np.complex64, align=align)
+            xf128 = _aligned_zeros(3, np.longdouble, align=align)
+
+            # test casting, both to and from misaligned
+            with suppress_warnings() as sup:
+                sup.filter(np.ComplexWarning, "Casting complex values")
+                xc64.astype('f8')
+            xf64.astype(np.complex64)
+            test = xc64 + xf64
+
+            xf128.astype('f8')
+            xf64.astype(np.longdouble)
+            test = xf128 + xf64
+
+            test = xf128 + xc64
+
+            # test copy, both to and from misaligned
+            # contig copy
+            xf64[:] = xf64.copy()
+            xc64[:] = xc64.copy()
+            xf128[:] = xf128.copy()
+            # strided copy
+            xf64[::2] = xf64[::2].copy()
+            xc64[::2] = xc64[::2].copy()
+            xf128[::2] = xf128[::2].copy()
+
 def test_getfield():
     a = np.arange(32, dtype='uint16')
     if sys.byteorder == 'little':