8000 BUG: optimizing compilers can reorder call to npy_get_floatstatus · r-devulap/numpy@ab354d2 · GitHub
[go: up one dir, main page]

Skip to content

Commit ab354d2

Browse files
committed
BUG: optimizing compilers can reorder call to npy_get_floatstatus
We should find a more generic and explicit way to prevent optimizing compilers from reordering the call to npy_get_floatstatus. To reproduce the problem, clang or gcc-8.1 are required. Confirmed that this fixes the problem using clang-6.0
1 parent 64e4a07 commit ab354d2

File tree

11 files changed

+136
-148
lines changed

11 files changed

+136
-148
lines changed

doc/source/reference/c-api.coremath.rst

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,14 +183,46 @@ Those can be useful for precise floating point comparison.
183183
* NPY_FPE_UNDERFLOW
184184
* NPY_FPE_INVALID
185185
186+
Note that :c:func:`npy_get_floatstatus_barrier` is preferable as it prevents
187+
agressive compiler optimizations reordering the call relative to
188+
the code setting the status, which could lead to incorrect results.
189+
186190
.. versionadded:: 1.9.0
187191
192+
.. c:function:: int npy_get_floatstatus_barrier(char*)
193+
194+
Get floating point status. A pointer to a local variable is passed in to
195+
prevent aggresive compiler optimizations from reodering this function call
196+
relative to the code setting the status, which could lead to incorrect
197+
results.
198+
199+
Returns a bitmask with following possible flags:
200+
201+
* NPY_FPE_DIVIDEBYZERO
202+
* NPY_FPE_OVERFLOW
203+
* NPY_FPE_UNDERFLOW
204+
* NPY_FPE_INVALID
205+
206+
.. versionadded:: 1.15.0
207+
188208
.. c:function:: int npy_clear_floatstatus()
189209
190210
Clears the floating point status. Returns the previous status mask.
191211
212+
Note that :c:func:`npy_clear_floatstatus_barrier` is preferable as it
213+
prevents agressive compiler optimizations reordering the call relative to
214+
the code setting the status, which could lead to incorrect results.
215+
192216
.. versionadded:: 1.9.0
193217
218+
.. c:function:: int npy_clear_floatstatus_barrier(char*)
219+
220+
Clears the floating point status. A pointer to a local variable is passed in to
221+
prevent aggresive compiler optimizations from reodering this function call.
222+
Returns the previous status mask.
223+
224+
.. versionadded:: 1.15.0
225+
n
194226
Complex functions
195227
~~~~~~~~~~~~~~~~~
196228
@@ -237,7 +269,7 @@ of floating point round-off error.
237269

238270
Like for other types, NumPy includes a typedef npy_half for the 16 bit
239271
float. Unlike for most of the other types, you cannot use this as a
240-
normal type in C, since is is a typedef for npy_uint16. For example,
272+
normal type in C, since it is a typedef for npy_uint16. For example,
241273
1.0 looks like 0x3c00 to C, and if you do an equality comparison
242274
between the different signed zeros, you will get -0.0 != 0.0
243275
(0x8000 != 0x0000), which is incorrect.

numpy/core/include/numpy/npy_math.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -524,8 +524,14 @@ npy_clongdouble npy_catanhl(npy_clongdouble z);
524524
#define NPY_FPE_UNDERFLOW 4
525525
#define NPY_FPE_INVALID 8
526526

527-
int npy_get_floatstatus(void);
527+
int npy_clear_floatstatus_barrier(char*);
528+
int npy_get_floatstatus_barrier(char*);
529+
/*
530+
* use caution with these - clang and gcc8.1 are known to reorder calls
531+
* to this form of the function which can defeat the check
532+
*/
528533
int npy_clear_floatstatus(void);
534+
int npy_get_floatstatus(void);
529535
void npy_set_floatstatus_divbyzero(void);
530536
void npy_set_floatstatus_overflow(void);
531537
void npy_set_floatstatus_underflow(void);

numpy/core/src/npymath/ieee754.c.src

Lines changed: 58 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
*/
77
#include "npy_math_common.h"
88
#include "npy_math_private.h"
9+
#include "numpy/utils.h"
910

1011
#ifndef HAVE_COPYSIGN
1112
double npy_copysign(double x, double y)
@@ -557,6 +558,15 @@ npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y)
557558
}
558559
#endif
559560

561+
int npy_clear_floatstatus() {
562+
char x=0;
563+
return npy_clear_floatstatus_barrier(&x);
564+
}
565+
int npy_get_floatstatus() {
566+
char x=0;
567+
return npy_get_floatstatus_barrier(&x);
568+
}
569+
560570
/*
561571
* Functions to set the floating point status word.
562572
* keep in sync with NO_FLOATING_POINT_SUPPORT in ufuncobject.h
@@ -574,18 +584,24 @@ npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y)
574584
defined(__NetBSD__)
575585
#include <ieeefp.h>
576586

577-
int npy_get_floatstatus(void)
587+
int npy_get_floatstatus_barrier(char * param))
578588
{
579589
int fpstatus = fpgetsticky();
590+
/*
591+
* By using a volatile, the compiler cannot reorder this call
592+
*/
593+
if (param != NULL) {
594+
volatile char NPY_UNUSED(c) = *(char*)param;
595+
}
580596
return ((FP_X_DZ & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
581597
((FP_X_OFL & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
582598
((FP_X_UFL & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
583599
((FP_X_INV & fpstatus) ? NPY_FPE_INVALID : 0);
584600
}
585601

586-
int npy_clear_floatstatus(void)
602+
int npy_clear_floatstatus_barrier(char * param)
587603
{
588-
int fpstatus = npy_get_floatstatus();
604+
int fpstatus = npy_get_floatstatus_barrier(param);
589605
fpsetsticky(0);
590606

591607
return fpstatus;
@@ -617,21 +633,27 @@ void npy_set_floatstatus_invalid(void)
617633
(defined(__FreeBSD__) && (__FreeBSD_version >= 502114))
618634
# include <fenv.h>
619635

620-
int npy_get_floatstatus(void)
636+
int npy_get_floatstatus_barrier(char* param)
621637
{
622638
int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW |
623639
FE_UNDERFLOW | FE_INVALID);
640+
/*
641+
* By using a volatile, the compiler cannot reorder this call
642+
*/
643+
if (param != NULL) {
644+
volatile char NPY_UNUSED(c) = *(char*)param;
645+
}
624646

625647
return ((FE_DIVBYZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
626648
((FE_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
627649
((FE_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
628650
((FE_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
629651
}
630652

631-
int npy_clear_floatstatus(void)
653+
int npy_clear_floatstatus_barrier(char * param)
632654
{
633655
/* testing float status is 50-100 times faster than clearing on x86 */
634-
int fpstatus = npy_get_floatstatus();
656+
int fpstatus = npy_get_floatstatus_barrier(param);
635657
if (fpstatus != 0) {
636658
feclearexcept(FE_DIVBYZERO | FE_OVERFLOW |
637659
FE_UNDERFLOW | FE_INVALID);
@@ -665,18 +687,24 @@ void npy_set_floatstatus_invalid(void)
665687
#include <float.h>
666688
#include <fpxcp.h>
667689

668-
int npy_get_floatstatus(void)
690+
int npy_get_floatstatus_barrier(char *param)
669691
{
670692
int fpstatus = fp_read_flag();
693+
/*
694+
* By using a volatile, the compiler cannot reorder this call
695+
*/
696+
if (param != NULL) {
697+
volatile char NPY_UNUSED(c) = *(char*)param;
698+
}
671699
return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
672700
((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
673701
((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
674702
((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
675703
}
676704

677-
int npy_clear_floatstatus(void)
705+
int npy_clear_floatstatus_barrier(char * param)
678706
{
679-
int fpstatus = npy_get_floatstatus();
707+
int fpstatus = npy_get_floatstatus_barrier(param);
680708
fp_swap_flag(0);
681709

682710
return fpstatus;
@@ -710,8 +738,11 @@ void npy_set_floatstatus_invalid(void)
710738
#include <float.h>
711739

712740

713-
int npy_get_floatstatus(void)
741+
int npy_get_floatstatus_barrier(char *param)
714742
{
743+
/*
744+
* By using a volatile, the compiler cannot reorder this call
745+
*/
715746
#if defined(_WIN64)
716747
int fpstatus = _statusfp();
717748
#else
@@ -720,15 +751,18 @@ int npy_get_floatstatus(void)
720751
_statusfp2(&fpstatus, &fpstatus2);
721752
fpstatus |= fpstatus2;
722753
#endif
754+
if (param != NULL) {
755+
volatile char NPY_UNUSED(c) = *(char*)param;
756+
}
723757
return ((SW_ZERODIVIDE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
724758
((SW_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
725759
((SW_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
726760
((SW_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
727761
}
728762

729-
int npy_clear_floatstatus(void)
763+
int npy_clear_floatstatus_barrier(char *param)
730764
{
731-
int fpstatus = npy_get_floatstatus();
765+
int fpstatus = npy_get_floatstatus_barrier(param);
732766
_clearfp();
733767

734768
return fpstatus;
@@ -739,18 +773,24 @@ int npy_clear_floatstatus(void)
739773

740774
#include <machine/fpu.h>
741775

742-
int npy_get_floatstatus(void)
776+
int npy_get_floatstatus_barrier(char *param)
743777
{
744778
unsigned long fpstatus = ieee_get_fp_control();
779+
/*
780+
* By using a volatile, the compiler cannot reorder this call
781+
*/
782+
if (param != NULL) {
783+
volatile char NPY_UNUSED(c) = *(char*)param;
784+
}
745785
return ((IEEE_STATUS_DZE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
746786
((IEEE_STATUS_OVF & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
747787
((IEEE_STATUS_UNF & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
748788
((IEEE_STATUS_INV & fpstatus) ? NPY_FPE_INVALID : 0);
749789
}
750790

751-
int npy_clear_floatstatus(void)
791+
int npy_clear_floatstatus_barrier(char *param)
752792
{
753-
long fpstatus = npy_get_floatstatus();
793+
int fpstatus = npy_get_floatstatus_barrier(param);
754794
/* clear status bits as well as disable exception mode if on */
755795
ieee_set_fp_control(0);
756796

@@ -759,13 +799,14 @@ int npy_clear_floatstatus(void)
759799

760800
#else
761801

762-
int npy_get_floatstatus(void)
802+
int npy_get_floatstatus_barrier(char NPY_UNUSED(*param))
763803
{
764804
return 0;
765805
}
766806

767-
int npy_clear_floatstatus(void)
807+
int npy_clear_floatstatus_barrier(char *param)
768808
{
809+
int fpstatus = npy_get_floatstatus_barrier(param);
769810
return 0;
770811
}
771812

numpy/core/src/umath/extobj.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ _check_ufunc_fperr(int errmask, PyObject *extobj, const char *ufunc_name) {
284284
if (!errmask) {
285285
return 0;
286286
}
287-
fperr = PyUFunc_getfperr();
287+
fperr = npy_get_floatstatus_barrier((char*)extobj);
288288
if (!fperr) {
289289
return 0;
290290
}

numpy/core/src/umath/loops.c.src

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1775,7 +1775,7 @@ NPY_NO_EXPORT void
17751775
*((npy_bool *)op1) = @func@(in1) != 0;
17761776
}
17771777
}
1778-
npy_clear_floatstatus();
1778+
npy_clear_floatstatus_barrier((char*)dimensions);
17791779
}
17801780
/**end repeat1**/
17811781

@@ -1857,7 +1857,7 @@ NPY_NO_EXPORT void
18571857
*((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in2)) ? in1 : in2;
18581858
}
18591859
}
1860-
npy_clear_floatstatus();
1860+
npy_clear_floatstatus_barrier((char*)dimensions);
18611861
}
18621862
/**end repeat1**/
18631863

@@ -1947,7 +1947,7 @@ NPY_NO_EXPORT void
19471947
*((@type@ *)op1) = tmp + 0;
19481948
}
19491949
}
1950-
npy_clear_floatstatus();
1950+
npy_clear_floatstatus_barrier((char*)dimensions);
19511951
}
19521952

19531953
NPY_NO_EXPORT void
@@ -2133,7 +2133,7 @@ HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
21332133
const npy_half in1 = *(npy_half *)ip1;
21342134
*((npy_bool *)op1) = @func@(in1) != 0;
21352135
}
2136-
npy_clear_floatstatus();
2136+
npy_clear_floatstatus_barrier((char*)dimensions);
21372137
}
21382138
/**end repeat**/
21392139

@@ -2195,7 +2195,7 @@ HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
21952195
const npy_half in2 = *(npy_half *)ip2;
21962196
*((npy_half *)op1) = (@OP@(in1, in2) || npy_half_isnan(in2)) ? in1 : in2;
21972197
}
2198-
npy_clear_floatstatus();
2198+
npy_clear_floatstatus_barrier((char*)dimensions);
21992199
}
22002200
/**end repeat**/
22012201

@@ -2635,7 +2635,7 @@ NPY_NO_EXPORT void
26352635
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
26362636
*((npy_bool *)op1) = @func@(in1r) @OP@ @func@(in1i);
26372637
}
2638-
npy_clear_floatstatus();
2638+
npy_clear_floatstatus_barrier((char*)dimensions);
26392639
}
26402640
/**end repeat1**/
26412641

@@ -2744,7 +2744,7 @@ NPY_NO_EXPORT void
27442744
((@ftype@ *)op1)[1] = in2i;
27452745
}
27462746
}
2747-
npy_clear_floatstatus();
2747+
npy_clear_floatstatus_barrier((char*)dimensions);
27482748
}
27492749
/**end repeat1**/
27502750

numpy/core/src/umath/reduction.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
560560
}
561561

562562
/* Start with the floating-point exception flags cleared */
563-
PyUFunc_clearfperr();
563+
npy_clear_floatstatus_barrier((char*)&iter);
564564

565565
if (NpyIter_GetIterSize(iter) != 0) {
566566
NpyIter_IterNextFunc *iternext;

0 commit comments

Comments
 (0)
0