Hi Jakub, On 21 Mar 21:10, Jakub Jelinek wrote: > vec_interleave_lowv4sf only supports =x, x, x alternative, not =v, v, v > (which should be supportable for AVX512VL only anyway, but probably > stage1 material), without AVX512VL and with ext sse reg input operand > we have to either due to interleaving or broadcast in the destination, > or disable the splitter. You're right, something is wrong w/ vec_interleave constraints. Need to look closer @ stage1.
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? You patch is OK. Thanks for fixing this! -- K > 2016-03-21 Jakub Jelinek <ja...@redhat.com> > > PR target/70300 > * config/i386/i386.md (cvtsd2ss splitter): Unpack in destination > instead of source if operands[1] is xmm16 and above and > !TARGET_AVX512VL. Use avx512f_vec_dupv16sf_1 instead of > vec_interleave_lowv4sf if we need to unpack xmm16 and above. > > * gcc.target/i386/pr70300.c: New test. > > --- gcc/config/i386/i386.md.jj 2016-03-17 12:53:25.000000000 +0100 > +++ gcc/config/i386/i386.md 2016-03-21 11:50:54.539001151 +0100 > @@ -4229,17 +4229,28 @@ (define_split > { > /* If it is unsafe to overwrite upper half of source, we need > to move to destination and unpack there. */ > - if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER > - || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4) > - && true_regnum (operands[0]) != true_regnum (operands[1])) > + if (((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER > + || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4) > + && true_regnum (operands[0]) != true_regnum (operands[1])) > + || (EXT_REX_SSE_REG_P (operands[1]) > + && !TARGET_AVX512VL)) > { > rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0])); > emit_move_insn (tmp, operands[1]); > } > else > operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); > - emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3], > - operands[3])); > + /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow > + =v, v, then vbroadcastss will be only needed for AVX512F without > + AVX512VL. */ > + if (!EXT_REX_SSE_REGNO_P (true_regnum (operands[3]))) > + emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3], > + operands[3])); > + else > + { > + rtx tmp = simplify_gen_subreg (V16SFmode, operands[3], V4SFmode, 0); > + emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp)); > + } > } > else > emit_insn (gen_vec_setv4sf_0 (operands[3], > --- gcc/testsuite/gcc.target/i386/pr70300.c.jj 2016-03-21 > 11:56:16.455580855 +0100 > +++ gcc/testsuite/gcc.target/i386/pr70300.c 2016-03-21 11:55:55.000000000 > +0100 > @@ -0,0 +1,25 @@ > +/* PR target/70300 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mtune=amdfam10 -mavx512f" } */ > + > +typedef _Complex A __attribute__ ((mode (SC))); > +typedef _Complex B __attribute__ ((mode (DC))); > +typedef _Complex C __attribute__ ((mode (TC))); > + > +C > +foo (A a, B b, C c, A d, B e, C f) > +{ > + b -= a; > + d += a; > + a += f; > + return a + b + d + e; > +} > + > +__attribute__((target ("avx512vl"))) C > +bar (A a, B b, C c, A d, B e, C f) > +{ > + b -= a; > + d += a; > + a += f; > + return a + b + d + e; > +} > > Jakub