Hello! > this patch fixes the asm statements in the gcc.target/i386/sse4_1-round* test > cases. > > They do lots of things that are just absolutely forbidden, like clobber > registers > that are not mentioned in the clobber list, and create a hidden data flow. > > The test cases work just by chance, and You can see the asm statements > ripped completely apart by the loop optimizer if you try to do the assembler > part in a loop: > > for (i = 0; i < 10; i++) { > __asm__ ("fld" ASM_SUFFIX " %0" : : "m" (*&f)); > > __asm__ ("fstcw %0" : "=m" (*&saved_cw)); > new_cw = saved_cw & clr_mask; > new_cw |= type; > __asm__ ("fldcw %0" : : "m" (*&new_cw)); > > __asm__ ("frndint\n" > "fstp" ASM_SUFFIX " %0\n" : "=m" (*&ret)); > __asm__ ("fldcw %0" : : "m" (*&saved_cw)); > } > return ret; > > So this patch avoids the hidden data flow, and > adds "st" to the clobber list. > > Boot-strapped and reg-tested on x86_64-pc-linux-gnu > OK for trunk?
Uh, no. x87 is a strange beast, and even your patch is wrong. The assembly should be written in this way: __asm__ ("fnstcw %0" : "=m" (saved_cw)); new_cw = saved_cw & clr_mask; new_cw |= type; __asm__ ("fldcw %2\n\t" "frndint\n\t" "fldcw %3" : "=t" (ret) : "0" (f), "m" (new_cw), "m" (saved_cw)); I'm testing the attached patch. Uros.
Index: gcc.target/i386/sse4_1-roundsd-4.c =================================================================== --- gcc.target/i386/sse4_1-roundsd-4.c (revision 231413) +++ gcc.target/i386/sse4_1-roundsd-4.c (working copy) @@ -36,7 +36,7 @@ init_round (double *src) static double do_round (double f, int type) { - short saved_cw, new_cw, clr_mask; + unsigned short saved_cw, new_cw, clr_mask; double ret; if ((type & 4)) @@ -50,16 +50,15 @@ do_round (double f, int type) clr_mask = ~0x0C3F; } - __asm__ ("fldl %0" : : "m" (*&f)); + __asm__ ("fstcw %0" : "=m" (saved_cw)); - __asm__ ("fstcw %0" : "=m" (*&saved_cw)); new_cw = saved_cw & clr_mask; new_cw |= type; - __asm__ ("fldcw %0" : : "m" (*&new_cw)); - __asm__ ("frndint\n" - "fstpl %0\n" : "=m" (*&ret)); - __asm__ ("fldcw %0" : : "m" (*&saved_cw)); + __asm__ ("fldcw %2\n\t" + "frndint\n\t" + "fldcw %3" : "=t" (ret) + : "0" (f), "m" (new_cw), "m" (saved_cw)); return ret; } Index: gcc.target/i386/sse4_1-roundss-4.c =================================================================== --- gcc.target/i386/sse4_1-roundss-4.c (revision 231413) +++ gcc.target/i386/sse4_1-roundss-4.c (working copy) @@ -36,7 +36,7 @@ init_round (float *src) static float do_round (float f, int type) { - short saved_cw, new_cw, clr_mask; + unsigned short saved_cw, new_cw, clr_mask; float ret; if ((type & 4)) @@ -50,16 +50,15 @@ do_round (float f, int type) clr_mask = ~0x0C3F; } - __asm__ ("flds %0" : : "m" (*&f)); + __asm__ ("fstcw %0" : "=m" (saved_cw)); - __asm__ ("fstcw %0" : "=m" (*&saved_cw)); new_cw = saved_cw & clr_mask; new_cw |= type; - __asm__ ("fldcw %0" : : "m" (*&new_cw)); - __asm__ ("frndint\n" - "fstps %0\n" : "=m" (*&ret)); - __asm__ ("fldcw %0" : : "m" (*&saved_cw)); + __asm__ ("fldcw %2\n\t" + "frndint\n\t" + "fldcw %3" : "=t" (ret) + : "0" (f), "m" (new_cw), "m" (saved_cw)); return ret; }