On Thu, Sep 9, 2021 at 4:00 PM Hongtao Liu <crazy...@gmail.com> wrote:
>
> On Thu, Sep 9, 2021 at 3:54 PM liuhongt <hongtao....@intel.com> wrote:
> >
> > Hi:
> >   As a follow up of [1], the patch removes all scalar mode copysign related
> > post_reload splitter/define_insn and expand copysign directly into below 
> > using
> > paradoxical subregs.
>
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579057.html
> >
> >   op3 = op1 & ~mask;
> >   op4 = op2 & mask;
> >   dest = op3 | op4;
> >
> > It can sometimes generate better code just like avx512dq-abs-copysign-1.c
> > shows.
> >
> >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> >
Committed.
> > gcc/ChangeLog:
> >
> >         * config/i386/i386-expand.c (ix86_expand_copysign): Expand
> >         right into ANDNOT + AND + IOR, using paradoxical subregs.
> >         (ix86_split_copysign_const): Remove.
> >         (ix86_split_copysign_var): Ditto.
> >         * config/i386/i386-protos.h (ix86_split_copysign_const): Dotto.
> >         (ix86_split_copysign_var): Ditto.
> >         * config/i386/i386.md (@copysign<mode>3_const): Ditto.
> >         (@copysign<mode>3_var): Ditto.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/i386/avx512dq-abs-copysign-1.c: Adjust testcase.
> >         * gcc.target/i386/avx512vl-abs-copysign-1.c: Adjust testcase.
> > ---
> >  gcc/config/i386/i386-expand.c                 | 152 +++---------------
> >  gcc/config/i386/i386-protos.h                 |   2 -
> >  gcc/config/i386/i386.md                       |  44 -----
> >  .../gcc.target/i386/avx512dq-abs-copysign-1.c |   4 +-
> >  .../gcc.target/i386/avx512vl-abs-copysign-1.c |   4 +-
> >  5 files changed, 30 insertions(+), 176 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
> > index badbacc19d8..a0262a8f47d 100644
> > --- a/gcc/config/i386/i386-expand.c
> > +++ b/gcc/config/i386/i386-expand.c
> > @@ -2115,13 +2115,9 @@ void
> >  ix86_expand_copysign (rtx operands[])
> >  {
> >    machine_mode mode, vmode;
> > -  rtx dest, op0, op1, mask;
> > +  rtx dest, op0, op1, mask, op2, op3;
> >
> > -  dest = operands[0];
> > -  op0 = operands[1];
> > -  op1 = operands[2];
> > -
> > -  mode = GET_MODE (dest);
> > +  mode = GET_MODE (operands[0]);
> >
> >    if (mode == SFmode)
> >      vmode = V4SFmode;
> > @@ -2132,136 +2128,40 @@ ix86_expand_copysign (rtx operands[])
> >    else
> >      gcc_unreachable ();
> >
> > -  mask = ix86_build_signbit_mask (vmode, 0, 0);
> > -
> > -  if (CONST_DOUBLE_P (op0))
> > +  if (rtx_equal_p (operands[1], operands[2]))
> >      {
> > -      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
> > -       op0 = simplify_unary_operation (ABS, mode, op0, mode);
> > -
> > -      if (mode == SFmode || mode == DFmode)
> > -       {
> > -         if (op0 == CONST0_RTX (mode))
> > -           op0 = CONST0_RTX (vmode);
> > -         else
> > -           {
> > -             rtx v = ix86_build_const_vector (vmode, false, op0);
> > -
> > -             op0 = force_reg (vmode, v);
> > -           }
> > -       }
> > -      else if (op0 != CONST0_RTX (mode))
> > -       op0 = force_reg (mode, op0);
> > -
> > -      emit_insn (gen_copysign3_const (mode, dest, op0, op1, mask));
> > -    }
> > -  else
> > -    {
> > -      rtx nmask = ix86_build_signbit_mask (vmode, 0, 1);
> > -
> > -      emit_insn (gen_copysign3_var
> > -                (mode, dest, NULL_RTX, op0, op1, nmask, mask));
> > -    }
> > -}
> > -
> > -/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
> > -   be a constant, and so has already been expanded into a vector constant. 
> >  */
> > -
> > -void
> > -ix86_split_copysign_const (rtx operands[])
> > -{
> > -  machine_mode mode, vmode;
> > -  rtx dest, op0, mask, x;
> > -
> > -  dest = operands[0];
> > -  op0 = operands[1];
> > -  mask = operands[3];
> > -
> > -  mode = GET_MODE (dest);
> > -  vmode = GET_MODE (mask);
> > -
> > -  dest = lowpart_subreg (vmode, dest, mode);
> > -  x = gen_rtx_AND (vmode, dest, mask);
> > -  emit_insn (gen_rtx_SET (dest, x));
> > -
> > -  if (op0 != CONST0_RTX (vmode))
> > -    {
> > -      x = gen_rtx_IOR (vmode, dest, op0);
> > -      emit_insn (gen_rtx_SET (dest, x));
> > -    }
> > -}
> > -
> > -/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
> > -   so we have to do two masks.  */
> > -
> > -void
> > -ix86_split_copysign_var (rtx operands[])
> > -{
> > -  machine_mode mode, vmode;
> > -  rtx dest, scratch, op0, op1, mask, nmask, x;
> > -
> > -  dest = operands[0];
> > -  scratch = operands[1];
> > -  op0 = operands[2];
> > -  op1 = operands[3];
> > -  nmask = operands[4];
> > -  mask = operands[5];
> > -
> > -  mode = GET_MODE (dest);
> > -  vmode = GET_MODE (mask);
> > -
> > -  if (rtx_equal_p (op0, op1))
> > -    {
> > -      /* Shouldn't happen often (it's useless, obviously), but when it does
> > -        we'd generate incorrect code if we continue below.  */
> > -      emit_move_insn (dest, op0);
> > +      emit_move_insn (operands[0], operands[1]);
> >        return;
> >      }
> >
> > -  if (REG_P (mask) && REGNO (dest) == REGNO (mask))    /* alternative 0 */
> > -    {
> > -      gcc_assert (REGNO (op1) == REGNO (scratch));
> > -
> > -      x = gen_rtx_AND (vmode, scratch, mask);
> > -      emit_insn (gen_rtx_SET (scratch, x));
> > +  dest = lowpart_subreg (vmode, operands[0], mode);
> > +  op1 = lowpart_subreg (vmode, operands[2], mode);
> > +  mask = ix86_build_signbit_mask (vmode, 0, 0);
> >
> > -      dest = mask;
> > -      op0 = lowpart_subreg (vmode, op0, mode);
> > -      x = gen_rtx_NOT (vmode, dest);
> > -      x = gen_rtx_AND (vmode, x, op0);
> > -      emit_insn (gen_rtx_SET (dest, x));
> > -    }
> > -  else
> > +  if (CONST_DOUBLE_P (operands[1]))
> >      {
> > -      if (REGNO (op1) == REGNO (scratch))              /* alternative 1,3 
> > */
> > -       {
> > -         x = gen_rtx_AND (vmode, scratch, mask);
> > -       }
> > -      else                                             /* alternative 2,4 
> > */
> > +      op0 = simplify_unary_operation (ABS, mode, operands[1], mode);
> > +      /* Optimize for 0, simplify b = copy_signf (0.0f, a) to b = mask & 
> > a.  */
> > +      if (op0 == CONST0_RTX (mode))
> >         {
> > -          gcc_assert (REGNO (mask) == REGNO (scratch));
> > -          op1 = lowpart_subreg (vmode, op1, mode);
> > -         x = gen_rtx_AND (vmode, scratch, op1);
> > +         emit_move_insn (dest, gen_rtx_AND (vmode, mask, op1));
> > +         return;
> >         }
> > -      emit_insn (gen_rtx_SET (scratch, x));
> >
> > -      if (REGNO (op0) == REGNO (dest))                 /* alternative 1,2 
> > */
> > -       {
> > -         dest = lowpart_subreg (vmode, op0, mode);
> > -         x = gen_rtx_AND (vmode, dest, nmask);
> > -       }
> > -      else                                             /* alternative 3,4 
> > */
> > -       {
> > -          gcc_assert (REGNO (nmask) == REGNO (dest));
> > -         dest = nmask;
> > -         op0 = lowpart_subreg (vmode, op0, mode);
> > -         x = gen_rtx_AND (vmode, dest, op0);
> > -       }
> > -      emit_insn (gen_rtx_SET (dest, x));
> > +      if (GET_MODE_SIZE (mode) < 16)
> > +       op0 = ix86_build_const_vector (vmode, false, op0);
> > +      op0 = force_reg (vmode, op0);
> >      }
> > -
> > -  x = gen_rtx_IOR (vmode, dest, scratch);
> > -  emit_insn (gen_rtx_SET (dest, x));
> > +  else
> > +    op0 = lowpart_subreg (vmode, operands[1], mode);
> > +
> > +  op2 = gen_reg_rtx (vmode);
> > +  op3 = gen_reg_rtx (vmode);
> > +  emit_move_insn (op2, gen_rtx_AND (vmode,
> > +                                   gen_rtx_NOT (vmode, mask),
> > +                                   op0));
> > +  emit_move_insn (op3, gen_rtx_AND (vmode, mask, op1));
> > +  emit_move_insn (dest, gen_rtx_IOR (vmode, op2, op3));
> >  }
> >
> >  /* Expand an xorsign operation.  */
> > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> > index 72644e33a92..dcae34b915e 100644
> > --- a/gcc/config/i386/i386-protos.h
> > +++ b/gcc/config/i386/i386-protos.h
> > @@ -135,8 +135,6 @@ extern void ix86_expand_fp_absneg_operator (enum 
> > rtx_code, machine_mode,
> >  extern void ix86_split_fp_absneg_operator (enum rtx_code, machine_mode,
> >                                            rtx[]);
> >  extern void ix86_expand_copysign (rtx []);
> > -extern void ix86_split_copysign_const (rtx []);
> > -extern void ix86_split_copysign_var (rtx []);
> >  extern void ix86_expand_xorsign (rtx []);
> >  extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[]);
> >  extern bool ix86_match_ccmode (rtx, machine_mode);
> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > index 6b4ceb2bce3..ba0058dad81 100644
> > --- a/gcc/config/i386/i386.md
> > +++ b/gcc/config/i386/i386.md
> > @@ -10861,50 +10861,6 @@ (define_expand "copysign<mode>3"
> >     || (TARGET_SSE && (<MODE>mode == TFmode))"
> >    "ix86_expand_copysign (operands); DONE;")
> >
> > -(define_insn_and_split "@copysign<mode>3_const"
> > -  [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv")
> > -       (unspec:SSEMODEF
> > -         [(match_operand:<ssevecmodef> 1 "nonimm_or_0_operand" "YvmC")
> > -          (match_operand:SSEMODEF 2 "register_operand" "0")
> > -          (match_operand:<ssevecmodef> 3 "nonimmediate_operand" "Yvm")]
> > -         UNSPEC_COPYSIGN))]
> > -  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
> > -   || (TARGET_SSE && (<MODE>mode == TFmode))"
> > -  "#"
> > -  "&& reload_completed"
> > -  [(const_int 0)]
> > -  "ix86_split_copysign_const (operands); DONE;")
> > -
> > -(define_insn "@copysign<mode>3_var"
> > -  [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv,Yv,Yv,Yv,Yv")
> > -       (unspec:SSEMODEF
> > -         [(match_operand:SSEMODEF 2 "register_operand" "Yv,0,0,Yv,Yv")
> > -          (match_operand:SSEMODEF 3 "register_operand" "1,1,Yv,1,Yv")
> > -          (match_operand:<ssevecmodef> 4
> > -            "nonimmediate_operand" "X,Yvm,Yvm,0,0")
> > -          (match_operand:<ssevecmodef> 5
> > -            "nonimmediate_operand" "0,Yvm,1,Yvm,1")]
> > -         UNSPEC_COPYSIGN))
> > -   (clobber (match_scratch:<ssevecmodef> 1 "=Yv,Yv,Yv,Yv,Yv"))]
> > -  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
> > -   || (TARGET_SSE && (<MODE>mode == TFmode))"
> > -  "#")
> > -
> > -(define_split
> > -  [(set (match_operand:SSEMODEF 0 "register_operand")
> > -       (unspec:SSEMODEF
> > -         [(match_operand:SSEMODEF 2 "register_operand")
> > -          (match_operand:SSEMODEF 3 "register_operand")
> > -          (match_operand:<ssevecmodef> 4)
> > -          (match_operand:<ssevecmodef> 5)]
> > -         UNSPEC_COPYSIGN))
> > -   (clobber (match_scratch:<ssevecmodef> 1))]
> > -  "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
> > -    || (TARGET_SSE && (<MODE>mode == TFmode)))
> > -   && reload_completed"
> > -  [(const_int 0)]
> > -  "ix86_split_copysign_var (operands); DONE;")
> > -
> >  (define_expand "xorsign<mode>3"
> >    [(match_operand:MODEF 0 "register_operand")
> >     (match_operand:MODEF 1 "register_operand")
> > diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c 
> > b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c
> > index cb542d09058..0107df7741a 100644
> > --- a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c
> > @@ -64,8 +64,8 @@ f6 (double x)
> >  }
> >
> >  /* { dg-final { scan-assembler "vandps\[^\n\r\]*xmm16" } } */
> > -/* { dg-final { scan-assembler "vorps\[^\n\r\]*xmm16" } } */
> > +/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */
> >  /* { dg-final { scan-assembler "vxorps\[^\n\r\]*xmm16" } } */
> >  /* { dg-final { scan-assembler "vandpd\[^\n\r\]*xmm18" } } */
> > -/* { dg-final { scan-assembler "vorpd\[^\n\r\]*xmm18" } } */
> > +/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */
> >  /* { dg-final { scan-assembler "vxorpd\[^\n\r\]*xmm18" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c 
> > b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c
> > index b375c5fad80..b27335b9d99 100644
> > --- a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c
> > @@ -64,8 +64,8 @@ f6 (double x)
> >  }
> >
> >  /* { dg-final { scan-assembler "vpandd\[^\n\r\]*xmm16" } } */
> > -/* { dg-final { scan-assembler "vpord\[^\n\r\]*xmm16" } } */
> > +/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */
> >  /* { dg-final { scan-assembler "vpxord\[^\n\r\]*xmm16" } } */
> >  /* { dg-final { scan-assembler "vpandq\[^\n\r\]*xmm18" } } */
> > -/* { dg-final { scan-assembler "vporq\[^\n\r\]*xmm18" } } */
> > +/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */
> >  /* { dg-final { scan-assembler "vpxorq\[^\n\r\]*xmm18" } } */
> > --
> > 2.27.0
> >
>
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao

Reply via email to