Hi Jakub,
On 21 Mar 21:10, Jakub Jelinek wrote:
> vec_interleave_lowv4sf only supports =x, x, x alternative, not =v, v, v
> (which should be supportable for AVX512VL only anyway, but probably
> stage1 material), without AVX512VL and with ext sse reg input operand
> we have to either due to interleaving or broadcast in the destination,
> or disable the splitter.
You're right, something is wrong w/ vec_interleave constraints. Need to
look closer @ stage1.

> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
You patch is OK. Thanks for fixing this!

--
K

> 2016-03-21  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR target/70300
>       * config/i386/i386.md (cvtsd2ss splitter): Unpack in destination
>       instead of source if operands[1] is xmm16 and above and
>       !TARGET_AVX512VL.  Use avx512f_vec_dupv16sf_1 instead of
>       vec_interleave_lowv4sf if we need to unpack xmm16 and above.
> 
>       * gcc.target/i386/pr70300.c: New test.
> 
> --- gcc/config/i386/i386.md.jj        2016-03-17 12:53:25.000000000 +0100
> +++ gcc/config/i386/i386.md   2016-03-21 11:50:54.539001151 +0100
> @@ -4229,17 +4229,28 @@ (define_split
>      {
>        /* If it is unsafe to overwrite upper half of source, we need
>        to move to destination and unpack there.  */
> -      if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
> -        || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
> -       && true_regnum (operands[0]) != true_regnum (operands[1]))
> +      if (((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
> +         || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
> +        && true_regnum (operands[0]) != true_regnum (operands[1]))
> +       || (EXT_REX_SSE_REG_P (operands[1])
> +           && !TARGET_AVX512VL))
>       {
>         rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0]));
>         emit_move_insn (tmp, operands[1]);
>       }
>        else
>       operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
> -      emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
> -                                                  operands[3]));
> +      /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow
> +      =v, v, then vbroadcastss will be only needed for AVX512F without
> +      AVX512VL.  */
> +      if (!EXT_REX_SSE_REGNO_P (true_regnum (operands[3])))
> +     emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
> +                                            operands[3]));
> +      else
> +     {
> +       rtx tmp = simplify_gen_subreg (V16SFmode, operands[3], V4SFmode, 0);
> +       emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp));
> +     }
>      }
>    else
>      emit_insn (gen_vec_setv4sf_0 (operands[3],
> --- gcc/testsuite/gcc.target/i386/pr70300.c.jj        2016-03-21 
> 11:56:16.455580855 +0100
> +++ gcc/testsuite/gcc.target/i386/pr70300.c   2016-03-21 11:55:55.000000000 
> +0100
> @@ -0,0 +1,25 @@
> +/* PR target/70300 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune=amdfam10 -mavx512f" } */
> +
> +typedef _Complex A __attribute__ ((mode (SC)));
> +typedef _Complex B __attribute__ ((mode (DC)));
> +typedef _Complex C __attribute__ ((mode (TC)));
> +
> +C
> +foo (A a, B b, C c, A d, B e, C f)
> +{
> +  b -= a;
> +  d += a;
> +  a += f;
> +  return a + b + d + e;
> +}
> +
> +__attribute__((target ("avx512vl"))) C
> +bar (A a, B b, C c, A d, B e, C f)
> +{
> +  b -= a;
> +  d += a;
> +  a += f;
> +  return a + b + d + e;
> +}
> 
>       Jakub

Reply via email to