From 24b2a2d36a7e8356310cd16dbe60abd9d0e682dc Mon Sep 17 00:00:00 2001 From: Jaime Fernandez Date: Mon, 28 Mar 2016 01:23:42 +0200 Subject: [PATCH 1/4] BUG: shift operator cycles, fixes #2449 --- numpy/core/src/umath/loops.c.src | 114 ++++++++++++++++++++++++-- numpy/core/src/umath/scalarmath.c.src | 45 ++++++++-- numpy/core/tests/test_scalarmath.py | 32 ++++++++ 3 files changed, 180 insertions(+), 11 deletions(-) diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 0d9806f5d6ab..9b90b67be38c 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -772,6 +772,7 @@ BOOL__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UN * npy_long, npy_ulong, npy_longlong, npy_ulonglong# * #ftype = npy_float, npy_float, npy_float, npy_float, npy_double, npy_double, * npy_double, npy_double, npy_double, npy_double# + * #issigned = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0# */ #define @TYPE@_floor_divide @TYPE@_divide @@ -824,15 +825,15 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 void /**begin repeat1 * Arithmetic - * #kind = add, subtract, multiply, bitwise_and, bitwise_or, bitwise_xor, - * left_shift, right_shift# - * #OP = +, -,*, &, |, ^, <<, >># + * #kind = add, subtract, multiply, bitwise_and, bitwise_or, bitwise_xor# + * #OP = +, -,*, &, |, ^# */ NPY_NO_EXPORT NPY_GCC_OPT_3 void -@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, + void *NPY_UNUSED(func)) { - if(IS_BINARY_REDUCE) { + if (IS_BINARY_REDUCE) { BINARY_REDUCE_LOOP(@type@) { io1 @OP@= *(@type@ *)ip2; } @@ -845,6 +846,109 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 void /**end repeat1**/ +/* + * Arithmetic bit shift operations. + * + * Intel hardware masks bit shift values, so large shifts wrap around + * and can produce surprising results. The special handling ensures that + * behavior is independent of compiler or hardware. + * TODO: We could implement consistent behavior for negative shifts, + * which is undefined in C. + */ + +#define LEFT_SHIFT_OP \ + do { \ + if (NPY_LIKELY(in2 < sizeof(@type@) * 8)) { \ + *out = in1 << in2; \ + } \ + else { \ + *out = 0; \ + } \ + } while (0) + + +NPY_NO_EXPORT NPY_GCC_OPT_3 void +@TYPE@_left_shift(char **args, npy_intp *dimensions, npy_intp *steps, + void *NPY_UNUSED(func)) +{ + if (IS_BINARY_REDUCE) { + BINARY_REDUCE_LOOP(@type@) { + @type@ ip2_val = *(@type@ *)ip2; + + if (NPY_LIKELY(ip2_val < sizeof(@type@) * 8)) { + io1 <<= ip2_val; + } + else { + io1 = 0; + } + } + *((@type@ *)iop1) = io1; + } + else { + BINARY_LOOP_FAST(@type@, @type@, LEFT_SHIFT_OP); + } +} + +#undef LEFT_SHIFT_OP + +#define RIGHT_SHIFT_OP_SIGNED \ + do { \ + if (NPY_LIKELY(in2 < sizeof(@type@) * 8)) { \ + *out = in1 >> in2; \ + } \ + else if (in1 < 0) { \ + *out = -1; \ + } \ + else { \ + *out = 0; \ + } \ + } while (0) + +#define RIGHT_SHIFT_OP_UNSIGNED \ + do { \ + if (NPY_LIKELY(in2 < sizeof(@type@) * 8)) { \ + *out = in1 >> in2; \ + } \ + else { \ + *out = 0; \ + } \ + } while (0) + +NPY_NO_EXPORT NPY_GCC_OPT_3 void +@TYPE@_right_shift(char **args, npy_intp *dimensions, npy_intp *steps, + void *NPY_UNUSED(func)) +{ + if (IS_BINARY_REDUCE) { + BINARY_REDUCE_LOOP(@type@) { + @type@ ip2_val = *(@type@ *)ip2; + + if (NPY_LIKELY(ip2_val < sizeof(@type@) * 8)) { + io1 >>= ip2_val; + } +#if @issigned@ + else if (io1 < 0) { + io1 = -1; + } +#endif + else { + io1 = 0; + } + } + *((@type@ *)iop1) = io1; + } + else { +#if @issigned@ + BINARY_LOOP_FAST(@type@, @type@, RIGHT_SHIFT_OP_SIGNED); +#else + BINARY_LOOP_FAST(@type@, @type@, RIGHT_SHIFT_OP_UNSIGNED); +#endif + } +} + +#undef RIGHT_SHIFT_OP_SIGNED +#undef RIGHT_SHIFT_OP_UNSIGNED + + /**begin repeat1 * #kind = equal, not_equal, greater, greater_equal, less, less_equal, * logical_and, logical_or# diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src index c651383eb17f..84dc8e0f7ebe 100644 --- a/numpy/core/src/umath/scalarmath.c.src +++ b/numpy/core/src/umath/scalarmath.c.src @@ -245,25 +245,58 @@ static void /**end repeat**/ - -/* QUESTION: Should we check for overflow / underflow in (l,r)shift? */ - /**begin repeat * #name = byte, ubyte, short, ushort, int, uint, * long, ulong, longlong, ulonglong# * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, * npy_long, npy_ulong, npy_longlong, npy_ulonglong# + * #issigned = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0# */ /**begin repeat1 - * #oper = and, xor, or, lshift, rshift# - * #op = &, ^, |, <<, >># + * #oper = and, xor, or# + * #op = &, ^, |# */ #define @name@_ctype_@oper@(arg1, arg2, out) *(out) = (arg1) @op@ (arg2) /**end repeat1**/ +#define @name@_ctype_lshift(arg1, arg2, out) \ + do { \ + if (NPY_LIKELY((arg2) < sizeof(@type@) * 8)) { \ + *(out) = (arg1) << (arg2); \ + } \ + else { \ + *(out) = 0; \ + } \ + } while (0) + +#if @issigned@ + #define @name@_ctype_rshift(arg1, arg2, out) \ + do { \ + if (NPY_LIKELY((arg2) < sizeof(@type@) * 8)) { \ + *(out) = (arg1) >> (arg2); \ + } \ + else if ((arg1) < 0) { \ + *(out) = -1; \ + } \ + else { \ + *(out) = 0; \ + } \ + } while (0) +#else + #define @name@_ctype_rshift(arg1, arg2, out) \ + do { \ + if (NPY_LIKELY((arg2) < sizeof(@type@) * 8)) { \ + *(out) = (arg1) >> (arg2); \ + } \ + else { \ + *(out) = 0; \ + } \ + } while (0) +#endif + /**end repeat**/ /**begin repeat @@ -575,7 +608,7 @@ static void * 1) Convert the types to the common type if both are scalars (0 return) * 2) If both are not scalars use ufunc machinery (-2 return) * 3) If both are scalars but cannot be cast to the right type - * return NotImplmented (-1 return) + * return NotImplemented (-1 return) * * 4) Perform the function on the C-type. * 5) If an error condition occurred, check to see diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py index b8f4388b16b4..52c9d3bc63f6 100644 --- a/numpy/core/tests/test_scalarmath.py +++ b/numpy/core/tests/test_scalarmath.py @@ -525,5 +525,37 @@ def test_numpy_abs(self): self._test_abs_func(np.abs) +class TestBitShifts(TestCase): + + def test_left_shift(self): + # gh-2449 + for dt in np.typecodes['AllInteger']: + arr = np.array([5, -5], dtype=dt) + scl_pos, scl_neg = arr + for shift in np.array([arr.dtype.itemsize * 8], dtype=dt): + res_pos = scl_pos << shift + res_neg = scl_neg << shift + assert_equal(res_pos, 0) + assert_equal(res_neg, 0) + # Result on scalars should be the same as on arrays + assert_array_equal(arr << shift, [res_pos, res_neg]) + + def test_right_shift(self): + # gh-2449 + for dt in np.typecodes['AllInteger']: + arr = np.array([5, -5], dtype=dt) + scl_pos, scl_neg = arr + for shift in np.array([arr.dtype.itemsize * 8], dtype=dt): + res_pos = scl_pos >> shift + res_neg = scl_neg >> shift + assert_equal(res_pos, 0) + if dt in np.typecodes['UnsignedInteger']: + assert_equal(res_neg, 0) + else: + assert_equal(res_neg, -1) + # Result on scalars should be the same as on arrays + assert_array_equal(arr >> shift, [res_pos, res_neg], dt) + + if __name__ == "__main__": run_module_suite() From fca077c7e2fc8f21c639ba479267a34eb16a2810 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sat, 8 Jun 2019 21:13:09 -0700 Subject: [PATCH 2/4] MAINT: Respond to review comments on gh-7473 This: * Inlines the macros in loops.c.src * Replaces 8 with `CHAR_BIT `. The `NPY_SIZEOF_*` macros are not used here because it's too much work to apply them to the signed types, and they expand to the same thing anyway. * Removes the reduce loop specializations which likely no one cares about * Uses pytest.mark.parametrize to shorten the test --- numpy/core/src/umath/loops.c.src | 104 +++++++------------------- numpy/core/src/umath/scalarmath.c.src | 56 +++++++------- numpy/core/tests/test_scalarmath.py | 46 ++++++------ 3 files changed, 77 insertions(+), 129 deletions(-) diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 2cf43d10f1e0..eccb9d82cdf6 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -777,7 +777,7 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void /**begin repeat2 * Arithmetic * #kind = add, subtract, multiply, bitwise_and, bitwise_or, bitwise_xor# - * #OP = +, -,*, &, |, ^# + * #OP = +, -, *, &, |, ^# */ #if @CHK@ @@ -808,98 +808,48 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void * which is undefined in C. */ -#define LEFT_SHIFT_OP \ - do { \ - if (NPY_LIKELY(in2 < sizeof(@type@) * 8)) { \ - *out = in1 << in2; \ - } \ - else { \ - *out = 0; \ - } \ - } while (0) - - NPY_NO_EXPORT NPY_GCC_OPT_3 void @TYPE@_left_shift@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { - if (IS_BINARY_REDUCE) { - BINARY_REDUCE_LOOP(@type@) { - @type@ ip2_val = *(@type@ *)ip2; - - if (NPY_LIKELY(ip2_val < sizeof(@type@) * 8)) { - io1 <<= ip2_val; - } - else { - io1 = 0; - } + BINARY_LOOP_FAST(@type@, @type@, + if (NPY_LIKELY(in2 < sizeof(@type@) * CHAR_BIT)) { + *out = in1 << in2; } - *((@type@ *)iop1) = io1; - } - else { - BINARY_LOOP_FAST(@type@, @type@, LEFT_SHIFT_OP); - } -} - -#undef LEFT_SHIFT_OP - -#define RIGHT_SHIFT_OP_SIGNED \ - do { \ - if (NPY_LIKELY(in2 < sizeof(@type@) * 8)) { \ - *out = in1 >> in2; \ - } \ - else if (in1 < 0) { \ - *out = -1; \ - } \ - else { \ - *out = 0; \ - } \ - } while (0) - -#define RIGHT_SHIFT_OP_UNSIGNED \ - do { \ - if (NPY_LIKELY(in2 < sizeof(@type@) * 8)) { \ - *out = in1 >> in2; \ - } \ - else { \ - *out = 0; \ - } \ - } while (0) + else { + *out = 0; + } + ); +} NPY_NO_EXPORT NPY_GCC_OPT_3 void @TYPE@_right_shift@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { - if (IS_BINARY_REDUCE) { - BINARY_REDUCE_LOOP(@type@) { - @type@ ip2_val = *(@type@ *)ip2; - - if (NPY_LIKELY(ip2_val < sizeof(@type@) * 8)) { - io1 >>= ip2_val; - } #if @SIGNED@ - else if (io1 < 0) { - io1 = -1; - } -#endif - else { - io1 = 0; - } + BINARY_LOOP_FAST(@type@, @type@, { + if (NPY_LIKELY(in2 < sizeof(@type@) * CHAR_BIT)) { + *out = in1 >> in2; } - *((@type@ *)iop1) = io1; - } - else { -#if @SIGNED@ - BINARY_LOOP_FAST(@type@, @type@, RIGHT_SHIFT_OP_SIGNED); + else if (in1 < 0) { + *out = (@type@)-1; /* shift right preserves the sign bit */ + } + else { + *out = 0; + } + }); #else - BINARY_LOOP_FAST(@type@, @type@, RIGHT_SHIFT_OP_UNSIGNED); + BINARY_LOOP_FAST(@type@, @type@, { + if (NPY_LIKELY(in2 < sizeof(@type@) * CHAR_BIT)) { + *out = in1 >> in2; + } + else { + *out = 0; + } + }); #endif - } } -#undef RIGHT_SHIFT_OP_SIGNED -#undef RIGHT_SHIFT_OP_UNSIGNED - /**begin repeat2 * #kind = equal, not_equal, greater, greater_equal, less, less_equal, diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src index 8b586caf6f1f..b691115e7ab6 100644 --- a/numpy/core/src/umath/scalarmath.c.src +++ b/numpy/core/src/umath/scalarmath.c.src @@ -263,38 +263,40 @@ static void /**end repeat1**/ -#define @name@_ctype_lshift(arg1, arg2, out) \ - do { \ - if (NPY_LIKELY((arg2) < sizeof(@type@) * 8)) { \ - *(out) = (arg1) << (arg2); \ - } \ - else { \ - *(out) = 0; \ - } \ +/* Note: these need to be kept in sync with the shift ufuncs */ + +#define @name@_ctype_lshift(arg1, arg2, out) \ + do { \ + if (NPY_LIKELY((arg2) < sizeof(@type@) * CHAR_BIT)) { \ + *(out) = (arg1) << (arg2); \ + } \ + else { \ + *(out) = 0; \ + } \ } while (0) #if @issigned@ - #define @name@_ctype_rshift(arg1, arg2, out) \ - do { \ - if (NPY_LIKELY((arg2) < sizeof(@type@) * 8)) { \ - *(out) = (arg1) >> (arg2); \ - } \ - else if ((arg1) < 0) { \ - *(out) = -1; \ - } \ - else { \ - *(out) = 0; \ - } \ + #define @name@_ctype_rshift(arg1, arg2, out) \ + do { \ + if (NPY_LIKELY((arg2) < sizeof(@type@) * CHAR_BIT)) { \ + *(out) = (arg1) >> (arg2); \ + } \ + else if ((arg1) < 0) { \ + *(out) = -1; \ + } \ + else { \ + *(out) = 0; \ + } \ } while (0) #else - #define @name@_ctype_rshift(arg1, arg2, out) \ - do { \ - if (NPY_LIKELY((arg2) < sizeof(@type@) * 8)) { \ - *(out) = (arg1) >> (arg2); \ - } \ - else { \ - *(out) = 0; \ - } \ + #define @name@_ctype_rshift(arg1, arg2, out) \ + do { \ + if (NPY_LIKELY((arg2) < sizeof(@type@) * CHAR_BIT)) { \ + *(out) = (arg1) >> (arg2); \ + } \ + else { \ + *(out) = 0; \ + } \ } while (0) #endif diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py index 7a1771ed8b4a..854df5590157 100644 --- a/numpy/core/tests/test_scalarmath.py +++ b/numpy/core/tests/test_scalarmath.py @@ -668,31 +668,27 @@ def test_numpy_abs(self): class TestBitShifts(object): - def test_left_shift(self): + @pytest.mark.parametrize('type_code', np.typecodes['AllInteger']) + @pytest.mark.parametrize('op', + [operator.rshift, operator.lshift], ids=['>>', '<<']) + def test_shift_all_bits(self, type_code, op): + """ Shifts where the shift amount is the width of the type or wider """ # gh-2449 - for dt in np.typecodes['AllInteger']: - arr = np.array([5, -5], dtype=dt) - scl_pos, scl_neg = arr - for shift in np.array([arr.dtype.itemsize * 8], dtype=dt): - res_pos = scl_pos << shift - res_neg = scl_neg << shift - assert_equal(res_pos, 0) - assert_equal(res_neg, 0) - # Result on scalars should be the same as on arrays - assert_array_equal(arr << shift, [res_pos, res_neg]) - - def test_right_shift(self): - # gh-2449 - for dt in np.typecodes['AllInteger']: - arr = np.array([5, -5], dtype=dt) - scl_pos, scl_neg = arr - for shift in np.array([arr.dtype.itemsize * 8], dtype=dt): - res_pos = scl_pos >> shift - res_neg = scl_neg >> shift - assert_equal(res_pos, 0) - if dt in np.typecodes['UnsignedInteger']: - assert_equal(res_neg, 0) + dt = np.dtype(type_code) + nbits = dt.itemsize * 8 + for val in [5, -5]: + for shift in [nbits, nbits + 4]: + val_scl = dt.type(val) + shift_scl = dt.type(shift) + res_scl = op(val_scl, shift_scl) + if val_scl < 0 and op is operator.rshift: + # sign bit is preserved + assert_equal(res_scl, -1) else: - assert_equal(res_neg, -1) + assert_equal(res_scl, 0) + # Result on scalars should be the same as on arrays - assert_array_equal(arr >> shift, [res_pos, res_neg], dt) + val_arr = np.array([val]*32, dtype=dt) + shift_arr = np.array([shift]*32, dtype=dt) + res_arr = op(val_arr, shift_arr) + assert_equal(res_arr, res_scl) From 79d7bc276afbe89c746e462d28d4bfbb4fc56148 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Tue, 11 Jun 2019 21:49:15 -0700 Subject: [PATCH 3/4] MAINT: Move shift implementation to npy_math This avoids duplication, as suggested in review. The suffices `l`, `ull` etc are for consistency with `strtoull` and friends. `h` and `uhh` are inspired from the C99 printf specifiers. --- numpy/core/include/numpy/npy_math.h | 22 +++++++++ .../core/src/npymath/npy_math_internal.h.src | 41 +++++++++++++++++ numpy/core/src/umath/loops.c.src | 45 +++++++------------ numpy/core/src/umath/scalarmath.c.src | 40 ++--------------- 4 files changed, 81 insertions(+), 67 deletions(-) diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h index 6a78ff3c2b54..3f3532d3601e 100644 --- a/numpy/core/include/numpy/npy_math.h +++ b/numpy/core/include/numpy/npy_math.h @@ -162,6 +162,28 @@ NPY_INPLACE npy_long npy_lcml(npy_long a, npy_long b); NPY_INPLACE npy_longlong npy_gcdll(npy_longlong a, npy_longlong b); NPY_INPLACE npy_longlong npy_lcmll(npy_longlong a, npy_longlong b); +NPY_INPLACE npy_ubyte npy_rshiftuhh(npy_ubyte a, npy_ubyte b); +NPY_INPLACE npy_ubyte npy_lshiftuhh(npy_ubyte a, npy_ubyte b); +NPY_INPLACE npy_ushort npy_rshiftuh(npy_ushort a, npy_ushort b); +NPY_INPLACE npy_ushort npy_lshiftuh(npy_ushort a, npy_ushort b); +NPY_INPLACE npy_uint npy_rshiftu(npy_uint a, npy_uint b); +NPY_INPLACE npy_uint npy_lshiftu(npy_uint a, npy_uint b); +NPY_INPLACE npy_ulong npy_rshiftul(npy_ulong a, npy_ulong b); +NPY_INPLACE npy_ulong npy_lshiftul(npy_ulong a, npy_ulong b); +NPY_INPLACE npy_ulonglong npy_rshiftull(npy_ulonglong a, npy_ulonglong b); +NPY_INPLACE npy_ulonglong npy_lshiftull(npy_ulonglong a, npy_ulonglong b); + +NPY_INPLACE npy_byte npy_rshifthh(npy_byte a, npy_byte b); +NPY_INPLACE npy_byte npy_lshifthh(npy_byte a, npy_byte b); +NPY_INPLACE npy_short npy_rshifth(npy_short a, npy_short b); +NPY_INPLACE npy_short npy_lshifth(npy_short a, npy_short b); +NPY_INPLACE npy_int npy_rshift(npy_int a, npy_int b); +NPY_INPLACE npy_int npy_lshift(npy_int a, npy_int b); +NPY_INPLACE npy_long npy_rshiftl(npy_long a, npy_long b); +NPY_INPLACE npy_long npy_lshiftl(npy_long a, npy_long b); +NPY_INPLACE npy_longlong npy_rshiftll(npy_longlong a, npy_longlong b); +NPY_INPLACE npy_longlong npy_lshiftll(npy_longlong a, npy_longlong b); + /* * C99 double math funcs */ diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src index fa820baac3b8..18b6d1434ef4 100644 --- a/numpy/core/src/npymath/npy_math_internal.h.src +++ b/numpy/core/src/npymath/npy_math_internal.h.src @@ -716,3 +716,44 @@ npy_@func@@c@(@type@ a, @type@ b) return npy_@func@u@c@(a < 0 ? -a : a, b < 0 ? -b : b); } /**end repeat**/ + +/* Unlike LCM and GCD, we need byte and short variants for the shift operators, + * since the result is dependent on the width of the type + */ +/**begin repeat + * + * #type = byte, short, int, long, longlong# + * #c = hh,h,,l,ll# + */ +/**begin repeat1 + * + * #u = u,# + * #is_signed = 0,1# + */ +NPY_INPLACE npy_@u@@type@ +npy_lshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b) +{ + if (NPY_LIKELY((size_t)b < sizeof(a) * CHAR_BIT)) { + return a << b; + } + else { + return 0; + } +} +NPY_INPLACE npy_@u@@type@ +npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b) +{ + if (NPY_LIKELY((size_t)b < sizeof(a) * CHAR_BIT)) { + return a >> b; + } +#if @is_signed@ + else if (a < 0) { + return (npy_@u@@type@)-1; /* preserve the sign bit */ + } +#endif + else { + return 0; + } +} +/**end repeat1**/ +/**end repeat**/ diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index eccb9d82cdf6..3d0b41318c5b 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -699,6 +699,7 @@ BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED * #ftype = npy_float, npy_float, npy_float, npy_float, npy_double, npy_double, * npy_double, npy_double, npy_double, npy_double# * #SIGNED = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0# + * #c = hh,uhh,h,uh,,u,l,ul,ll,ull# */ #define @TYPE@_floor_divide @TYPE@_divide @@ -808,46 +809,30 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void * which is undefined in C. */ +#define INT_left_shift_needs_clear_floatstatus +#define UINT_left_shift_needs_clear_floatstatus + NPY_NO_EXPORT NPY_GCC_OPT_3 void @TYPE@_left_shift@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { - BINARY_LOOP_FAST(@type@, @type@, - if (NPY_LIKELY(in2 < sizeof(@type@) * CHAR_BIT)) { - *out = in1 << in2; - } - else { - *out = 0; - } - ); + BINARY_LOOP_FAST(@type@, @type@, *out = npy_lshift@c@(in1, in2)); + +#ifdef @TYPE@_left_shift_needs_clear_floatstatus + // For some reason, our macOS CI sets an "invalid" flag here, but only + // for some types. + npy_clear_floatstatus_barrier((char*)dimensions); +#endif } +#undef INT_left_shift_needs_clear_floatstatus +#undef UINT_left_shift_needs_clear_floatstatus + NPY_NO_EXPORT NPY_GCC_OPT_3 void @TYPE@_right_shift@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { -#if @SIGNED@ - BINARY_LOOP_FAST(@type@, @type@, { - if (NPY_LIKELY(in2 < sizeof(@type@) * CHAR_BIT)) { - *out = in1 >> in2; - } - else if (in1 < 0) { - *out = (@type@)-1; /* shift right preserves the sign bit */ - } - else { - *out = 0; - } - }); -#else - BINARY_LOOP_FAST(@type@, @type@, { - if (NPY_LIKELY(in2 < sizeof(@type@) * CHAR_BIT)) { - *out = in1 >> in2; - } - else { - *out = 0; - } - }); -#endif + BINARY_LOOP_FAST(@type@, @type@, *out = npy_rshift@c@(in1, in2)); } diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src index b691115e7ab6..df440e09569d 100644 --- a/numpy/core/src/umath/scalarmath.c.src +++ b/numpy/core/src/umath/scalarmath.c.src @@ -251,7 +251,7 @@ static void * long, ulong, longlong, ulonglong# * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, * npy_long, npy_ulong, npy_longlong, npy_ulonglong# - * #issigned = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0# + * #suffix = hh,uhh,h,uh,,u,l,ul,ll,ull# */ /**begin repeat1 @@ -263,42 +263,8 @@ static void /**end repeat1**/ -/* Note: these need to be kept in sync with the shift ufuncs */ - -#define @name@_ctype_lshift(arg1, arg2, out) \ - do { \ - if (NPY_LIKELY((arg2) < sizeof(@type@) * CHAR_BIT)) { \ - *(out) = (arg1) << (arg2); \ - } \ - else { \ - *(out) = 0; \ - } \ - } while (0) - -#if @issigned@ - #define @name@_ctype_rshift(arg1, arg2, out) \ - do { \ - if (NPY_LIKELY((arg2) < sizeof(@type@) * CHAR_BIT)) { \ - *(out) = (arg1) >> (arg2); \ - } \ - else if ((arg1) < 0) { \ - *(out) = -1; \ - } \ - else { \ - *(out) = 0; \ - } \ - } while (0) -#else - #define @name@_ctype_rshift(arg1, arg2, out) \ - do { \ - if (NPY_LIKELY((arg2) < sizeof(@type@) * CHAR_BIT)) { \ - *(out) = (arg1) >> (arg2); \ - } \ - else { \ - *(out) = 0; \ - } \ - } while (0) -#endif +#define @name@_ctype_lshift(arg1, arg2, out) *(out) = npy_lshift@suffix@(arg1, arg2) +#define @name@_ctype_rshift(arg1, arg2, out) *(out) = npy_rshift@suffix@(arg1, arg2) /**end repeat**/ From 6cf6ece43589670a28b765fd03402cc08ada61f0 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Wed, 12 Jun 2019 01:13:07 -0700 Subject: [PATCH 4/4] BUG: Disable -O3 on right_shift on compilers which emit an internal error Needed to make CI pass --- numpy/core/setup.py | 6 +++++ numpy/core/setup_common.py | 39 ++++++++++++++++++++++++++++++++ numpy/core/src/umath/loops.c.src | 6 ++++- 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 338502791383..e6088a48a805 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -463,6 +463,12 @@ def generate_config_h(ext, build_dir): rep = check_long_double_representation(config_cmd) moredefs.append(('HAVE_LDOUBLE_%s' % rep, 1)) + if check_for_right_shift_internal_compiler_error(config_cmd): + moredefs.append('NPY_DO_NOT_OPTIMIZE_LONG_right_shift') + moredefs.append('NPY_DO_NOT_OPTIMIZE_ULONG_right_shift') + moredefs.append('NPY_DO_NOT_OPTIMIZE_LONGLONG_right_shift') + moredefs.append('NPY_DO_NOT_OPTIMIZE_ULONGLONG_right_shift') + # Py3K check if sys.version_info[0] == 3: moredefs.append(('NPY_PY3K', 1)) diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 6e3109ab5786..6168e3f8deca 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -5,6 +5,7 @@ import warnings import copy import binascii +import textwrap from numpy.distutils.misc_util import mingw32 @@ -414,3 +415,41 @@ def long_double_representation(lines): else: # We never detected the after_sequence raise ValueError("Could not lock sequences (%s)" % saw) + + +def check_for_right_shift_internal_compiler_error(cmd): + """ + On our arm CI, this fails with an internal compilation error + + The failure looks like the following, and can be reproduced on ARM64 GCC 5.4: + + : In function 'right_shift': + :4:20: internal compiler error: in expand_shift_1, at expmed.c:2349 + ip1[i] = ip1[i] >> in2; + ^ + Please submit a full bug report, + with preprocessed source if appropriate. + See for instructions. + Compiler returned: 1 + + This function returns True if this compiler bug is present, and we need to + turn off optimization for the function + """ + cmd._check_compiler() + has_optimize = cmd.try_compile(textwrap.dedent("""\ + __attribute__((optimize("O3"))) void right_shift() {} + """), None, None) + if not has_optimize: + return False + + no_err = cmd.try_compile(textwrap.dedent("""\ + typedef long the_type; /* fails also for unsigned and long long */ + __attribute__((optimize("O3"))) void right_shift(the_type in2, the_type *ip1, int n) { + for (int i = 0; i < n; i++) { + if (in2 < (the_type)sizeof(the_type) * 8) { + ip1[i] = ip1[i] >> in2; + } + } + } + """), None, None) + return not no_err diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 3d0b41318c5b..0c5465761a2f 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -828,7 +828,11 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 void #undef INT_left_shift_needs_clear_floatstatus #undef UINT_left_shift_needs_clear_floatstatus -NPY_NO_EXPORT NPY_GCC_OPT_3 void +NPY_NO_EXPORT +#ifndef NPY_DO_NOT_OPTIMIZE_@TYPE@_right_shift +NPY_GCC_OPT_3 +#endif +void @TYPE@_right_shift@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) {