Hello!
> this patch fixes the asm statements in the gcc.target/i386/sse4_1-round* test
> cases.
>
> They do lots of things that are just absolutely forbidden, like clobber
> registers
> that are not mentioned in the clobber list, and create a hidden data flow.
>
> The test cases work just by chance, and You can see the asm statements
> ripped completely apart by the loop optimizer if you try to do the assembler
> part in a loop:
>
> for (i = 0; i < 10; i++) {
> __asm__ ("fld" ASM_SUFFIX " %0" : : "m" (*&f));
>
> __asm__ ("fstcw %0" : "=m" (*&saved_cw));
> new_cw = saved_cw & clr_mask;
> new_cw |= type;
> __asm__ ("fldcw %0" : : "m" (*&new_cw));
>
> __asm__ ("frndint\n"
> "fstp" ASM_SUFFIX " %0\n" : "=m" (*&ret));
> __asm__ ("fldcw %0" : : "m" (*&saved_cw));
> }
> return ret;
>
> So this patch avoids the hidden data flow, and
> adds "st" to the clobber list.
>
> Boot-strapped and reg-tested on x86_64-pc-linux-gnu
> OK for trunk?
Uh, no.
x87 is a strange beast, and even your patch is wrong. The assembly
should be written in this way:
__asm__ ("fnstcw %0" : "=m" (saved_cw));
new_cw = saved_cw & clr_mask;
new_cw |= type;
__asm__ ("fldcw %2\n\t"
"frndint\n\t"
"fldcw %3" : "=t" (ret)
: "0" (f), "m" (new_cw), "m" (saved_cw));
I'm testing the attached patch.
Uros.
Index: gcc.target/i386/sse4_1-roundsd-4.c
===================================================================
--- gcc.target/i386/sse4_1-roundsd-4.c (revision 231413)
+++ gcc.target/i386/sse4_1-roundsd-4.c (working copy)
@@ -36,7 +36,7 @@ init_round (double *src)
static double
do_round (double f, int type)
{
- short saved_cw, new_cw, clr_mask;
+ unsigned short saved_cw, new_cw, clr_mask;
double ret;
if ((type & 4))
@@ -50,16 +50,15 @@ do_round (double f, int type)
clr_mask = ~0x0C3F;
}
- __asm__ ("fldl %0" : : "m" (*&f));
+ __asm__ ("fstcw %0" : "=m" (saved_cw));
- __asm__ ("fstcw %0" : "=m" (*&saved_cw));
new_cw = saved_cw & clr_mask;
new_cw |= type;
- __asm__ ("fldcw %0" : : "m" (*&new_cw));
- __asm__ ("frndint\n"
- "fstpl %0\n" : "=m" (*&ret));
- __asm__ ("fldcw %0" : : "m" (*&saved_cw));
+ __asm__ ("fldcw %2\n\t"
+ "frndint\n\t"
+ "fldcw %3" : "=t" (ret)
+ : "0" (f), "m" (new_cw), "m" (saved_cw));
return ret;
}
Index: gcc.target/i386/sse4_1-roundss-4.c
===================================================================
--- gcc.target/i386/sse4_1-roundss-4.c (revision 231413)
+++ gcc.target/i386/sse4_1-roundss-4.c (working copy)
@@ -36,7 +36,7 @@ init_round (float *src)
static float
do_round (float f, int type)
{
- short saved_cw, new_cw, clr_mask;
+ unsigned short saved_cw, new_cw, clr_mask;
float ret;
if ((type & 4))
@@ -50,16 +50,15 @@ do_round (float f, int type)
clr_mask = ~0x0C3F;
}
- __asm__ ("flds %0" : : "m" (*&f));
+ __asm__ ("fstcw %0" : "=m" (saved_cw));
- __asm__ ("fstcw %0" : "=m" (*&saved_cw));
new_cw = saved_cw & clr_mask;
new_cw |= type;
- __asm__ ("fldcw %0" : : "m" (*&new_cw));
- __asm__ ("frndint\n"
- "fstps %0\n" : "=m" (*&ret));
- __asm__ ("fldcw %0" : : "m" (*&saved_cw));
+ __asm__ ("fldcw %2\n\t"
+ "frndint\n\t"
+ "fldcw %3" : "=t" (ret)
+ : "0" (f), "m" (new_cw), "m" (saved_cw));
return ret;
}