On Wed, Mar 09, 2016 at 02:06:03PM +0100, Uros Bizjak wrote: > Let's go with the option 2) and always generate vec_concatv2df, as we > only need it for [v,m,C] alternative. In the long term, we should > enhance all patterns with new alternatives, but not in stage-4.
Ok, see patch below. > Attached (lightly tested) patch that implements option 2) also allows > us to simplify splitter enable condition a bit. > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index cb8bcec..ef80d6a 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -4362,9 +4362,8 @@ > (match_operand:DF 1 "nonimmediate_operand")))] > "TARGET_USE_VECTOR_FP_CONVERTS > && optimize_insn_for_speed_p () > - && reload_completed && SSE_REG_P (operands[0]) > - && (!EXT_REX_SSE_REG_P (operands[0]) > - || TARGET_AVX512VL)" > + && reload_completed > + && SSE_REG_P (operands[0])" > [(set (match_dup 2) > (vec_concat:V4SF > (float_truncate:V2SF Unfortunately, this really doesn't seem to work, I get ICEs on the testcases. I've tried to allow EXT_REX_SSE_REG_P for -mavx512f -mno-avx512vl just for MEM_P (operands[1]), but even that ICEs. Perhaps there are bugs in other splitters. I'll bootstrap/regtest this then: 2016-03-04 Jakub Jelinek <ja...@redhat.com> PR target/70086 * config/i386/i386.md (truncdfsf2 splitter): Use gen_vec_concatv2df instead of gen_sse2_loadlpd. * config/i386/sse.md (*vec_concatv2df): Rename to... (vec_concatv2df): ... this. * gcc.target/i386/pr70086-1.c: New test. * gcc.target/i386/pr70086-2.c: New test. * gcc.target/i386/pr70086-3.c: New test. --- gcc/config/i386/i386.md.jj 2016-03-08 09:01:50.871475493 +0100 +++ gcc/config/i386/i386.md 2016-03-09 15:40:00.102942847 +0100 @@ -4393,8 +4393,8 @@ (define_split emit_insn (gen_vec_dupv2df (operands[4], operands[1])); } else - emit_insn (gen_sse2_loadlpd (operands[4], - CONST0_RTX (V2DFmode), operands[1])); + emit_insn (gen_vec_concatv2df (operands[4], operands[1], + CONST0_RTX (DFmode))); }) ;; It's more profitable to split and then extend in the same register. --- gcc/config/i386/sse.md.jj 2016-03-09 15:08:17.000000000 +0100 +++ gcc/config/i386/sse.md 2016-03-09 15:15:10.346223894 +0100 @@ -8951,7 +8951,7 @@ (define_insn "vec_dupv2df<mask_name>" (set_attr "prefix" "orig,maybe_vex,evex") (set_attr "mode" "V2DF,DF,DF")]) -(define_insn "*vec_concatv2df" +(define_insn "vec_concatv2df" [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x,v,x,x") (vec_concat:V2DF (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,m,0,0") --- gcc/testsuite/gcc.target/i386/pr70086-1.c.jj 2016-03-09 15:12:55.177060382 +0100 +++ gcc/testsuite/gcc.target/i386/pr70086-1.c 2016-03-09 15:12:55.177060382 +0100 @@ -0,0 +1,11 @@ +/* PR target/70086 */ +/* { dg-do compile } */ +/* { dg-options "-mtune=barcelona -mavx512vl -ffloat-store" } */ + +float +foo (float a, float b, double c, float d, double e, float f) +{ + e -= d; + d *= e; + return e + d; +} --- gcc/testsuite/gcc.target/i386/pr70086-2.c.jj 2016-03-09 15:12:55.177060382 +0100 +++ gcc/testsuite/gcc.target/i386/pr70086-2.c 2016-03-09 15:35:52.000000000 +0100 @@ -0,0 +1,21 @@ +/* PR target/70086 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mtune=barcelona -mavx512vl" } */ + +float +foo (double *p) +{ + register float xmm16 __asm ("xmm16"); + xmm16 = *p; + asm volatile ("" : "+v" (xmm16)); + return xmm16; +} + +float +bar (double x) +{ + register float xmm16 __asm ("xmm16"); + xmm16 = x; + asm volatile ("" : "+v" (xmm16)); + return xmm16; +} --- gcc/testsuite/gcc.target/i386/pr70086-3.c.jj 2016-03-09 15:36:28.332831118 +0100 +++ gcc/testsuite/gcc.target/i386/pr70086-3.c 2016-03-09 15:35:33.000000000 +0100 @@ -0,0 +1,21 @@ +/* PR target/70086 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mtune=barcelona -mavx512f -mno-avx512vl" } */ + +float +foo (double *p) +{ + register float xmm16 __asm ("xmm16"); + xmm16 = *p; + asm volatile ("" : "+v" (xmm16)); + return xmm16; +} + +float +bar (double x) +{ + register float xmm16 __asm ("xmm16"); + xmm16 = x; + asm volatile ("" : "+v" (xmm16)); + return xmm16; +} Jakub