On Tue, Dec 8, 2015 at 7:10 PM, Uros Bizjak <[email protected]> wrote:
> Hello!
>
>> this patch fixes the asm statements in the gcc.target/i386/sse4_1-round*
>> test cases.
>>
>> They do lots of things that are just absolutely forbidden, like clobber
>> registers
>> that are not mentioned in the clobber list, and create a hidden data flow.
>>
>> The test cases work just by chance, and You can see the asm statements
>> ripped completely apart by the loop optimizer if you try to do the assembler
>> part in a loop:
>>
>> for (i = 0; i < 10; i++) {
>> __asm__ ("fld" ASM_SUFFIX " %0" : : "m" (*&f));
>>
>> __asm__ ("fstcw %0" : "=m" (*&saved_cw));
>> new_cw = saved_cw & clr_mask;
>> new_cw |= type;
>> __asm__ ("fldcw %0" : : "m" (*&new_cw));
>>
>> __asm__ ("frndint\n"
>> "fstp" ASM_SUFFIX " %0\n" : "=m" (*&ret));
>> __asm__ ("fldcw %0" : : "m" (*&saved_cw));
>> }
>> return ret;
>>
>> So this patch avoids the hidden data flow, and
>> adds "st" to the clobber list.
>>
>> Boot-strapped and reg-tested on x86_64-pc-linux-gnu
>> OK for trunk?
>
> Uh, no.
>
> x87 is a strange beast, and even your patch is wrong. The assembly
> should be written in this way:
>
> __asm__ ("fnstcw %0" : "=m" (saved_cw));
>
> new_cw = saved_cw & clr_mask;
> new_cw |= type;
>
> __asm__ ("fldcw %2\n\t"
> "frndint\n\t"
> "fldcw %3" : "=t" (ret)
> : "0" (f), "m" (new_cw), "m" (saved_cw));
>
> I'm testing the attached patch.
Committed slightly updated patch to mainline SVN with the following ChangeLog:
2015-12-08 Uros Bizjak <[email protected]>
* gcc.target/i386/sse4_1-round.h (do_round): Fix inline asm statements.
* gcc.target/i386/sse4_1-roundsd-4.c (do_round): Ditto.
* gcc.target/i386/sse4_1-roundss-4.c (do_round): Ditto.
Tested on x86_64-linux-gnu {,-m32}, committed to mainline SVN, will be
backported to all active branches.
Uros.
Index: gcc.target/i386/sse4_1-round.h
===================================================================
--- gcc.target/i386/sse4_1-round.h (revision 231413)
+++ gcc.target/i386/sse4_1-round.h (working copy)
@@ -28,7 +28,7 @@ init_round (FP_T *src)
static FP_T
do_round (FP_T f, int type)
{
- short saved_cw, new_cw, clr_mask;
+ unsigned short saved_cw, new_cw, clr_mask;
FP_T ret;
if ((type & 4))
@@ -42,16 +42,15 @@ do_round (FP_T f, int type)
clr_mask = ~0x0C3F;
}
- __asm__ ("fld" ASM_SUFFIX " %0" : : "m" (*&f));
+ __asm__ ("fnstcw %0" : "=m" (saved_cw));
- __asm__ ("fstcw %0" : "=m" (*&saved_cw));
new_cw = saved_cw & clr_mask;
new_cw |= type;
- __asm__ ("fldcw %0" : : "m" (*&new_cw));
- __asm__ ("frndint\n"
- "fstp" ASM_SUFFIX " %0\n" : "=m" (*&ret));
- __asm__ ("fldcw %0" : : "m" (*&saved_cw));
+ __asm__ ("fldcw %2\n\t"
+ "frndint\n\t"
+ "fldcw %3" : "=t" (ret)
+ : "0" (f), "m" (new_cw), "m" (saved_cw));
return ret;
}
Index: gcc.target/i386/sse4_1-roundsd-4.c
===================================================================
--- gcc.target/i386/sse4_1-roundsd-4.c (revision 231413)
+++ gcc.target/i386/sse4_1-roundsd-4.c (working copy)
@@ -36,7 +36,7 @@ init_round (double *src)
static double
do_round (double f, int type)
{
- short saved_cw, new_cw, clr_mask;
+ unsigned short saved_cw, new_cw, clr_mask;
double ret;
if ((type & 4))
@@ -50,16 +50,15 @@ do_round (double f, int type)
clr_mask = ~0x0C3F;
}
- __asm__ ("fldl %0" : : "m" (*&f));
+ __asm__ ("fnstcw %0" : "=m" (saved_cw));
- __asm__ ("fstcw %0" : "=m" (*&saved_cw));
new_cw = saved_cw & clr_mask;
new_cw |= type;
- __asm__ ("fldcw %0" : : "m" (*&new_cw));
- __asm__ ("frndint\n"
- "fstpl %0\n" : "=m" (*&ret));
- __asm__ ("fldcw %0" : : "m" (*&saved_cw));
+ __asm__ ("fldcw %2\n\t"
+ "frndint\n\t"
+ "fldcw %3" : "=t" (ret)
+ : "0" (f), "m" (new_cw), "m" (saved_cw));
return ret;
}
Index: gcc.target/i386/sse4_1-roundss-4.c
===================================================================
--- gcc.target/i386/sse4_1-roundss-4.c (revision 231413)
+++ gcc.target/i386/sse4_1-roundss-4.c (working copy)
@@ -36,7 +36,7 @@ init_round (float *src)
static float
do_round (float f, int type)
{
- short saved_cw, new_cw, clr_mask;
+ unsigned short saved_cw, new_cw, clr_mask;
float ret;
if ((type & 4))
@@ -50,16 +50,15 @@ do_round (float f, int type)
clr_mask = ~0x0C3F;
}
- __asm__ ("flds %0" : : "m" (*&f));
+ __asm__ ("fnstcw %0" : "=m" (saved_cw));
- __asm__ ("fstcw %0" : "=m" (*&saved_cw));
new_cw = saved_cw & clr_mask;
new_cw |= type;
- __asm__ ("fldcw %0" : : "m" (*&new_cw));
- __asm__ ("frndint\n"
- "fstps %0\n" : "=m" (*&ret));
- __asm__ ("fldcw %0" : : "m" (*&saved_cw));
+ __asm__ ("fldcw %2\n\t"
+ "frndint\n\t"
+ "fldcw %3" : "=t" (ret)
+ : "0" (f), "m" (new_cw), "m" (saved_cw));
return ret;
}