On Wed, Mar 09, 2016 at 02:06:03PM +0100, Uros Bizjak wrote:
> Let's go with the option 2) and always generate vec_concatv2df, as we
> only need it for [v,m,C] alternative. In the long term, we should
> enhance all patterns with new alternatives, but not in stage-4.

Ok, see patch below.

> Attached (lightly tested) patch that implements option 2) also allows
> us to simplify splitter enable condition a bit.

> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index cb8bcec..ef80d6a 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -4362,9 +4362,8 @@
>         (match_operand:DF 1 "nonimmediate_operand")))]
>    "TARGET_USE_VECTOR_FP_CONVERTS
>     && optimize_insn_for_speed_p ()
> -   && reload_completed && SSE_REG_P (operands[0])
> -   && (!EXT_REX_SSE_REG_P (operands[0])
> -       || TARGET_AVX512VL)"
> +   && reload_completed
> +   && SSE_REG_P (operands[0])"
>     [(set (match_dup 2)
>        (vec_concat:V4SF
>          (float_truncate:V2SF

Unfortunately, this really doesn't seem to work, I get ICEs on the
testcases.  I've tried to allow EXT_REX_SSE_REG_P for -mavx512f -mno-avx512vl
just for MEM_P (operands[1]), but even that ICEs.  Perhaps there are bugs
in other splitters.

I'll bootstrap/regtest this then:

2016-03-04  Jakub Jelinek  <ja...@redhat.com>

        PR target/70086
        * config/i386/i386.md (truncdfsf2 splitter): Use gen_vec_concatv2df
        instead of gen_sse2_loadlpd.
        * config/i386/sse.md (*vec_concatv2df): Rename to...
        (vec_concatv2df): ... this.

        * gcc.target/i386/pr70086-1.c: New test.
        * gcc.target/i386/pr70086-2.c: New test.
        * gcc.target/i386/pr70086-3.c: New test.

--- gcc/config/i386/i386.md.jj  2016-03-08 09:01:50.871475493 +0100
+++ gcc/config/i386/i386.md     2016-03-09 15:40:00.102942847 +0100
@@ -4393,8 +4393,8 @@ (define_split
       emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
     }
   else
-    emit_insn (gen_sse2_loadlpd (operands[4],
-                                CONST0_RTX (V2DFmode), operands[1]));
+    emit_insn (gen_vec_concatv2df (operands[4], operands[1],
+                                  CONST0_RTX (DFmode)));
 })
 
 ;; It's more profitable to split and then extend in the same register.
--- gcc/config/i386/sse.md.jj   2016-03-09 15:08:17.000000000 +0100
+++ gcc/config/i386/sse.md      2016-03-09 15:15:10.346223894 +0100
@@ -8951,7 +8951,7 @@ (define_insn "vec_dupv2df<mask_name>"
    (set_attr "prefix" "orig,maybe_vex,evex")
    (set_attr "mode" "V2DF,DF,DF")])
 
-(define_insn "*vec_concatv2df"
+(define_insn "vec_concatv2df"
   [(set (match_operand:V2DF 0 "register_operand"     "=x,x,v,x,v,x,x,v,x,x")
        (vec_concat:V2DF
          (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,m,0,0")
--- gcc/testsuite/gcc.target/i386/pr70086-1.c.jj        2016-03-09 
15:12:55.177060382 +0100
+++ gcc/testsuite/gcc.target/i386/pr70086-1.c   2016-03-09 15:12:55.177060382 
+0100
@@ -0,0 +1,11 @@
+/* PR target/70086 */
+/* { dg-do compile } */
+/* { dg-options "-mtune=barcelona -mavx512vl -ffloat-store" } */
+
+float
+foo (float a, float b, double c, float d, double e, float f)
+{
+  e -= d;
+  d *= e;
+  return e + d;
+}
--- gcc/testsuite/gcc.target/i386/pr70086-2.c.jj        2016-03-09 
15:12:55.177060382 +0100
+++ gcc/testsuite/gcc.target/i386/pr70086-2.c   2016-03-09 15:35:52.000000000 
+0100
@@ -0,0 +1,21 @@
+/* PR target/70086 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mtune=barcelona -mavx512vl" } */
+
+float
+foo (double *p)
+{
+  register float xmm16 __asm ("xmm16");
+  xmm16 = *p;
+  asm volatile ("" : "+v" (xmm16));
+  return xmm16;
+}
+
+float
+bar (double x)
+{
+  register float xmm16 __asm ("xmm16");
+  xmm16 = x;
+  asm volatile ("" : "+v" (xmm16));
+  return xmm16;
+}
--- gcc/testsuite/gcc.target/i386/pr70086-3.c.jj        2016-03-09 
15:36:28.332831118 +0100
+++ gcc/testsuite/gcc.target/i386/pr70086-3.c   2016-03-09 15:35:33.000000000 
+0100
@@ -0,0 +1,21 @@
+/* PR target/70086 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mtune=barcelona -mavx512f -mno-avx512vl" } */
+
+float
+foo (double *p)
+{
+  register float xmm16 __asm ("xmm16");
+  xmm16 = *p;
+  asm volatile ("" : "+v" (xmm16));
+  return xmm16;
+}
+
+float
+bar (double x)
+{
+  register float xmm16 __asm ("xmm16");
+  xmm16 = x;
+  asm volatile ("" : "+v" (xmm16));
+  return xmm16;
+}

        Jakub

Reply via email to