From fb0b703b9cfcb8449ab0cc615f877cdd16e14598 Mon Sep 17 00:00:00 2001 From: mattip Date: Mon, 10 Dec 2018 10:26:55 +0200 Subject: [PATCH 1/2] WIP, BUG: reorder operations for VS2015 --- numpy/core/src/umath/loops.c.src | 8 ++++---- numpy/core/src/umath/simd.inc.src | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index f96e621b85d0..312b759e66fc 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1861,7 +1861,7 @@ NPY_NO_EXPORT void if (!run_unary_reduce_simd_@kind@_@TYPE@(args, dimensions, steps)) { BINARY_REDUCE_LOOP(@type@) { const @type@ in2 = *(@type@ *)ip2; - io1 = (npy_isnan(io1) || io1 @OP@ in2) ? io1 : in2; + io1 = (io1 @OP@ in2 || npy_isnan(io1)) ? io1 : in2; } *((@type@ *)iop1) = io1; } @@ -1870,7 +1870,7 @@ NPY_NO_EXPORT void BINARY_LOOP { @type@ in1 = *(@type@ *)ip1; const @type@ in2 = *(@type@ *)ip2; - in1 = (npy_isnan(in1) || in1 @OP@ in2) ? in1 : in2; + in1 = (in1 @OP@ in2 || npy_isnan(in1)) ? in1 : in2; *((@type@ *)op1) = in1; } } @@ -1889,7 +1889,7 @@ NPY_NO_EXPORT void if (IS_BINARY_REDUCE) { BINARY_REDUCE_LOOP(@type@) { const @type@ in2 = *(@type@ *)ip2; - io1 = (npy_isnan(in2) || io1 @OP@ in2) ? io1 : in2; + io1 = (io1 @OP@ in2 || npy_isnan(in2)) ? io1 : in2; } *((@type@ *)iop1) = io1; } @@ -1897,7 +1897,7 @@ NPY_NO_EXPORT void BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; const @type@ in2 = *(@type@ *)ip2; - *((@type@ *)op1) = (npy_isnan(in2) || in1 @OP@ in2) ? in1 : in2; + *((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in2)) ? in1 : in2; } } npy_clear_floatstatus_barrier((char*)dimensions); diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index a3e00b5c1be4..88f2907ef8a2 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -1029,7 +1029,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) { const npy_intp stride = VECTOR_SIZE_BYTES / (npy_intp)sizeof(@type@); LOOP_BLOCK_ALIGN_VAR(ip, @type@, VECTOR_SIZE_BYTES) { - *op = (npy_isnan(*op) || *op @OP@ ip[i]) ? *op : ip[i]; + *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i]; } assert(n < (stride) || npy_is_aligned(&ip[i], VECTOR_SIZE_BYTES)); if (i + 3 * stride <= n) { @@ -1053,11 +1053,11 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) } else { @type@ tmp = sse2_horizontal_@VOP@_@vtype@(c1); - *op = (npy_isnan(*op) || *op @OP@ tmp) ? *op : tmp; + *op = (*op @OP@ tmp || npy_isnan(*op)) ? *op : tmp; } } LOOP_BLOCKED_END { - *op = (npy_isnan(*op) || *op @OP@ ip[i]) ? *op : ip[i]; + *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i]; } npy_clear_floatstatus_barrier((char*)op); } From 31bd28c5f243479866ed053323d000172583f0fe Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 12 Dec 2018 06:22:52 +0200 Subject: [PATCH 2/2] STY: add comment for future maintainers --- numpy/core/src/umath/loops.c.src | 4 ++++ numpy/core/src/umath/simd.inc.src | 3 +++ 2 files changed, 7 insertions(+) diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 312b759e66fc..ae3ece77bb71 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1861,6 +1861,7 @@ NPY_NO_EXPORT void if (!run_unary_reduce_simd_@kind@_@TYPE@(args, dimensions, steps)) { BINARY_REDUCE_LOOP(@type@) { const @type@ in2 = *(@type@ *)ip2; + /* Order of operations important for MSVC 2015 */ io1 = (io1 @OP@ in2 || npy_isnan(io1)) ? io1 : in2; } *((@type@ *)iop1) = io1; @@ -1870,6 +1871,7 @@ NPY_NO_EXPORT void BINARY_LOOP { @type@ in1 = *(@type@ *)ip1; const @type@ in2 = *(@type@ *)ip2; + /* Order of operations important for MSVC 2015 */ in1 = (in1 @OP@ in2 || npy_isnan(in1)) ? in1 : in2; *((@type@ *)op1) = in1; } @@ -1889,6 +1891,7 @@ NPY_NO_EXPORT void if (IS_BINARY_REDUCE) { BINARY_REDUCE_LOOP(@type@) { const @type@ in2 = *(@type@ *)ip2; + /* Order of operations important for MSVC 2015 */ io1 = (io1 @OP@ in2 || npy_isnan(in2)) ? io1 : in2; } *((@type@ *)iop1) = io1; @@ -1897,6 +1900,7 @@ NPY_NO_EXPORT void BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; const @type@ in2 = *(@type@ *)ip2; + /* Order of operations important for MSVC 2015 */ *((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in2)) ? in1 : in2; } } diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 88f2907ef8a2..ee7030855db5 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -1029,6 +1029,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) { const npy_intp stride = VECTOR_SIZE_BYTES / (npy_intp)sizeof(@type@); LOOP_BLOCK_ALIGN_VAR(ip, @type@, VECTOR_SIZE_BYTES) { + /* Order of operations important for MSVC 2015 */ *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i]; } assert(n < (stride) || npy_is_aligned(&ip[i], VECTOR_SIZE_BYTES)); @@ -1053,10 +1054,12 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) } else { @type@ tmp = sse2_horizontal_@VOP@_@vtype@(c1); + /* Order of operations important for MSVC 2015 */ *op = (*op @OP@ tmp || npy_isnan(*op)) ? *op : tmp; } } LOOP_BLOCKED_END { + /* Order of operations important for MSVC 2015 */ *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i]; } npy_clear_floatstatus_barrier((char*)op);