Hello!

Attached patch fixes PR56788, where _mm_frcz_{ss,sd} intrinsics
ignored their second argument.

As explained in the PR [1], gcc implements two-operand "vector-merge"
form as documented in Microsoft's definition [2]. However, in contrast
to other SSE scalar insns, the instruction itself clears upper bits to
zero.

There were a couple of problems: the builtin was declared as builtin
with two input operands, but the number of input operands didn't
correspond to referred insn pattern, leaving its second operand
uninitialized. The intrinsic was also implemented without necessary
movss/movsd fixup that would merge both its operands in a correct way.

Please also note that the definition in clang is wrong.

I didn't include any testcase in the patch, since I don't have access
to XOP target. Hopefully someone from AMD will provide tests that are
mysteriously missing from XOP testsuite.

2013-11-23  Uros Bizjak  <ubiz...@gmail.com>

    PR target/56788
    * config/i386/i386.c (bdesc_multi_arg) <IX86_BUILTIN_VFRCZSS>:
    Declare as MULTI_ARG_1_SF instruction.
    <IX86_BUILTIN_VFRCZSD>: Decleare as MULTI_ARG_1_DF instruction.
    * config/i386/sse.md (*xop_vmfrcz<mode>2): Rename
    from *xop_vmfrcz_<mode>.
    * config/i386/xopintrin.h (_mm_frcz_ss): Use __builtin_ia32_movss
    to merge scalar result with __A.
    (_mm_frcz_sd): Use __builtin_ia32_movsd to merge scalar
    result with __A.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32}.

The patch was committed to mainline SVN and will be committed to other
release branches in a couple of days (hopefully with additional
tests).

[1] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=56788
[2] http://msdn.microsoft.com/en-us/library/vstudio/gg445126%28v=vs.100%29.aspx

Uros.
Index: i386.c
===================================================================
--- i386.c      (revision 205300)
+++ i386.c      (working copy)
@@ -29189,8 +29189,8 @@
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3,         
"__builtin_ia32_vpshlw",      IX86_BUILTIN_VPSHLW,      UNKNOWN,      
(int)MULTI_ARG_2_HI },
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3,        
"__builtin_ia32_vpshlb",      IX86_BUILTIN_VPSHLB,      UNKNOWN,      
(int)MULTI_ARG_2_QI },
 
-  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2,       
"__builtin_ia32_vfrczss",     IX86_BUILTIN_VFRCZSS,     UNKNOWN,      
(int)MULTI_ARG_2_SF },
-  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2,       
"__builtin_ia32_vfrczsd",     IX86_BUILTIN_VFRCZSD,     UNKNOWN,      
(int)MULTI_ARG_2_DF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2,       
"__builtin_ia32_vfrczss",     IX86_BUILTIN_VFRCZSS,     UNKNOWN,      
(int)MULTI_ARG_1_SF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2,       
"__builtin_ia32_vfrczsd",     IX86_BUILTIN_VFRCZSD,     UNKNOWN,      
(int)MULTI_ARG_1_DF },
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2,         
"__builtin_ia32_vfrczps",     IX86_BUILTIN_VFRCZPS,     UNKNOWN,      
(int)MULTI_ARG_1_SF },
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2,         
"__builtin_ia32_vfrczpd",     IX86_BUILTIN_VFRCZPD,     UNKNOWN,      
(int)MULTI_ARG_1_DF },
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2,         
"__builtin_ia32_vfrczps256",  IX86_BUILTIN_VFRCZPS256,  UNKNOWN,      
(int)MULTI_ARG_1_SF2 },
Index: sse.md
===================================================================
--- sse.md      (revision 205300)
+++ sse.md      (working copy)
@@ -13193,7 +13193,6 @@
   [(set_attr "type" "ssecvt1")
    (set_attr "mode" "<MODE>")])
 
-;; scalar insns
 (define_expand "xop_vmfrcz<mode>2"
   [(set (match_operand:VF_128 0 "register_operand")
        (vec_merge:VF_128
@@ -13203,11 +13202,9 @@
          (match_dup 3)
          (const_int 1)))]
   "TARGET_XOP"
-{
-  operands[3] = CONST0_RTX (<MODE>mode);
-})
+  "operands[3] = CONST0_RTX (<MODE>mode);")
 
-(define_insn "*xop_vmfrcz_<mode>"
+(define_insn "*xop_vmfrcz<mode>2"
   [(set (match_operand:VF_128 0 "register_operand" "=x")
        (vec_merge:VF_128
          (unspec:VF_128
Index: xopintrin.h
===================================================================
--- xopintrin.h (revision 205300)
+++ xopintrin.h (working copy)
@@ -747,13 +747,17 @@
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_frcz_ss (__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_vfrczss ((__v4sf)__A, (__v4sf)__B);
+  return (__m128) __builtin_ia32_movss ((__v4sf)__A,
+                                       (__v4sf)
+                                       __builtin_ia32_vfrczss ((__v4sf)__B));
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_frcz_sd (__m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_vfrczsd ((__v2df)__A, (__v2df)__B);
+  return (__m128d) __builtin_ia32_movsd ((__v2df)__A,
+                                        (__v2df)
+                                        __builtin_ia32_vfrczsd ((__v2df)__B));
 }
 
 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))

Reply via email to