On Tue, Feb 4, 2020 at 2:13 PM Jakub Jelinek <ja...@redhat.com> wrote:
>
> On Tue, Feb 04, 2020 at 01:38:51PM +0100, Uros Bizjak wrote:
> > As Richard advised, let's put this safety stuff back. Usually, in
> > i386.md, these kind of splitters are implemented as two patterns, one
> > (define_insn_and_split) having "#", and the other (define_insn) with a
> > real insn. My opinion is, that this separation avoids confusion as
> > much as possible.
>
> Okay.  So like this if it passes bootstrap/regtest then?

Yes.

> 2020-02-04  Jakub Jelinek  <ja...@redhat.com>
>
>         PR target/92190
>         * config/i386/i386-features.c (ix86_add_reg_usage_to_vzeroupper): Only
>         include sets and not clobbers in the vzeroupper pattern.
>         * config/i386/sse.md (*avx_vzeroupper): Require in insn condition that
>         the parallel has 17 (64-bit) or 9 (32-bit) elts.
>         (*avx_vzeroupper_1): New define_insn_and_split.
>
>         * gcc.target/i386/pr92190.c: New test.

OK.

Thanks,
Uros.

> --- gcc/config/i386/i386-features.c.jj  2020-02-04 13:33:32.713885386 +0100
> +++ gcc/config/i386/i386-features.c     2020-02-04 13:55:44.358058104 +0100
> @@ -1764,29 +1764,32 @@ convert_scalars_to_vector (bool timode_p
>
>       (set (reg:V2DF R) (reg:V2DF R))
>
> -   which preserves the low 128 bits but clobbers the upper bits.
> -   For a dead register we just use:
> -
> -     (clobber (reg:V2DF R))
> -
> -   which invalidates any previous contents of R and stops R from becoming
> -   live across the vzeroupper in future.  */
> +   which preserves the low 128 bits but clobbers the upper bits.  */
>
>  static void
>  ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs)
>  {
>    rtx pattern = PATTERN (insn);
>    unsigned int nregs = TARGET_64BIT ? 16 : 8;
> -  rtvec vec = rtvec_alloc (nregs + 1);
> -  RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
> +  unsigned int npats = nregs;
>    for (unsigned int i = 0; i < nregs; ++i)
>      {
>        unsigned int regno = GET_SSE_REGNO (i);
> +      if (!bitmap_bit_p (live_regs, regno))
> +       npats--;
> +    }
> +  if (npats == 0)
> +    return;
> +  rtvec vec = rtvec_alloc (npats + 1);
> +  RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
> +  for (unsigned int i = 0, j = 0; i < nregs; ++i)
> +    {
> +      unsigned int regno = GET_SSE_REGNO (i);
> +      if (!bitmap_bit_p (live_regs, regno))
> +       continue;
>        rtx reg = gen_rtx_REG (V2DImode, regno);
> -      if (bitmap_bit_p (live_regs, regno))
> -       RTVEC_ELT (vec, i + 1) = gen_rtx_SET (reg, reg);
> -      else
> -       RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
> +      ++j;
> +      RTVEC_ELT (vec, j) = gen_rtx_SET (reg, reg);
>      }
>    XVEC (pattern, 0) = vec;
>    df_insn_rescan (insn);
> --- gcc/config/i386/sse.md.jj   2020-02-04 13:33:32.733885088 +0100
> +++ gcc/config/i386/sse.md      2020-02-04 13:57:38.995349722 +0100
> @@ -19818,11 +19818,49 @@ (define_expand "avx_vzeroupper"
>  (define_insn "*avx_vzeroupper"
>    [(match_parallel 0 "vzeroupper_pattern"
>       [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
> -  "TARGET_AVX"
> +  "TARGET_AVX && XVECLEN (operands[0], 0) == (TARGET_64BIT ? 16 : 8) + 1"
>    "vzeroupper"
>    [(set_attr "type" "sse")
>     (set_attr "modrm" "0")
>     (set_attr "memory" "none")
> +   (set_attr "prefix" "vex")
> +   (set_attr "btver2_decode" "vector")
> +   (set_attr "mode" "OI")])
> +
> +(define_insn_and_split "*avx_vzeroupper_1"
> +  [(match_parallel 0 "vzeroupper_pattern"
> +     [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
> +  "TARGET_AVX && XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1"
> +  "#"
> +  "&& epilogue_completed"
> +  [(match_dup 0)]
> +{
> +  /* For IPA-RA purposes, make it clear the instruction clobbers
> +     even XMM registers not mentioned explicitly in the pattern.  */
> +  unsigned int nregs = TARGET_64BIT ? 16 : 8;
> +  unsigned int npats = XVECLEN (operands[0], 0);
> +  rtvec vec = rtvec_alloc (nregs + 1);
> +  RTVEC_ELT (vec, 0) = XVECEXP (operands[0], 0, 0);
> +  for (unsigned int i = 0, j = 1; i < nregs; ++i)
> +    {
> +      unsigned int regno = GET_SSE_REGNO (i);
> +      if (j < npats
> +         && REGNO (SET_DEST (XVECEXP (operands[0], 0, j))) == regno)
> +       {
> +         RTVEC_ELT (vec, i + 1) = XVECEXP (operands[0], 0, j);
> +         j++;
> +       }
> +      else
> +       {
> +         rtx reg = gen_rtx_REG (V2DImode, regno);
> +         RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
> +       }
> +    }
> +  operands[0] = gen_rtx_PARALLEL (VOIDmode, vec);
> +}
> +  [(set_attr "type" "sse")
> +   (set_attr "modrm" "0")
> +   (set_attr "memory" "none")
>     (set_attr "prefix" "vex")
>     (set_attr "btver2_decode" "vector")
>     (set_attr "mode" "OI")])
> --- gcc/testsuite/gcc.target/i386/pr92190.c.jj  2020-02-04 13:55:44.364058015 
> +0100
> +++ gcc/testsuite/gcc.target/i386/pr92190.c     2020-02-04 13:55:44.364058015 
> +0100
> @@ -0,0 +1,19 @@
> +/* PR target/92190 */
> +/* { dg-do compile { target { *-*-linux* && lp64 } } } */
> +/* { dg-options "-mabi=ms -O2 -mavx512f" } */
> +
> +typedef char VC __attribute__((vector_size (16)));
> +typedef int VI __attribute__((vector_size (16 * sizeof 0)));
> +VC a;
> +VI b;
> +void bar (VI);
> +void baz (VC);
> +
> +void
> +foo (void)
> +{
> +  VC k = a;
> +  VI n = b;
> +  bar (n);
> +  baz (k);
> +}
>
>
>         Jakub
>

Reply via email to