On Thu, Sep 9, 2021 at 4:00 PM Hongtao Liu <crazy...@gmail.com> wrote: > > On Thu, Sep 9, 2021 at 3:54 PM liuhongt <hongtao....@intel.com> wrote: > > > > Hi: > > As a follow up of [1], the patch removes all scalar mode copysign related > > post_reload splitter/define_insn and expand copysign directly into below > > using > > paradoxical subregs. > > [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579057.html > > > > op3 = op1 & ~mask; > > op4 = op2 & mask; > > dest = op3 | op4; > > > > It can sometimes generate better code just like avx512dq-abs-copysign-1.c > > shows. > > > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. > > Committed. > > gcc/ChangeLog: > > > > * config/i386/i386-expand.c (ix86_expand_copysign): Expand > > right into ANDNOT + AND + IOR, using paradoxical subregs. > > (ix86_split_copysign_const): Remove. > > (ix86_split_copysign_var): Ditto. > > * config/i386/i386-protos.h (ix86_split_copysign_const): Dotto. > > (ix86_split_copysign_var): Ditto. > > * config/i386/i386.md (@copysign<mode>3_const): Ditto. > > (@copysign<mode>3_var): Ditto. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/avx512dq-abs-copysign-1.c: Adjust testcase. > > * gcc.target/i386/avx512vl-abs-copysign-1.c: Adjust testcase. > > --- > > gcc/config/i386/i386-expand.c | 152 +++--------------- > > gcc/config/i386/i386-protos.h | 2 - > > gcc/config/i386/i386.md | 44 ----- > > .../gcc.target/i386/avx512dq-abs-copysign-1.c | 4 +- > > .../gcc.target/i386/avx512vl-abs-copysign-1.c | 4 +- > > 5 files changed, 30 insertions(+), 176 deletions(-) > > > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > > index badbacc19d8..a0262a8f47d 100644 > > --- a/gcc/config/i386/i386-expand.c > > +++ b/gcc/config/i386/i386-expand.c > > @@ -2115,13 +2115,9 @@ void > > ix86_expand_copysign (rtx operands[]) > > { > > machine_mode mode, vmode; > > - rtx dest, op0, op1, mask; > > + rtx dest, op0, op1, mask, op2, op3; > > > > - dest = operands[0]; > > - op0 = operands[1]; > > - op1 = operands[2]; > > - > > - mode = GET_MODE (dest); > > + mode = GET_MODE (operands[0]); > > > > if (mode == SFmode) > > vmode = V4SFmode; > > @@ -2132,136 +2128,40 @@ ix86_expand_copysign (rtx operands[]) > > else > > gcc_unreachable (); > > > > - mask = ix86_build_signbit_mask (vmode, 0, 0); > > - > > - if (CONST_DOUBLE_P (op0)) > > + if (rtx_equal_p (operands[1], operands[2])) > > { > > - if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) > > - op0 = simplify_unary_operation (ABS, mode, op0, mode); > > - > > - if (mode == SFmode || mode == DFmode) > > - { > > - if (op0 == CONST0_RTX (mode)) > > - op0 = CONST0_RTX (vmode); > > - else > > - { > > - rtx v = ix86_build_const_vector (vmode, false, op0); > > - > > - op0 = force_reg (vmode, v); > > - } > > - } > > - else if (op0 != CONST0_RTX (mode)) > > - op0 = force_reg (mode, op0); > > - > > - emit_insn (gen_copysign3_const (mode, dest, op0, op1, mask)); > > - } > > - else > > - { > > - rtx nmask = ix86_build_signbit_mask (vmode, 0, 1); > > - > > - emit_insn (gen_copysign3_var > > - (mode, dest, NULL_RTX, op0, op1, nmask, mask)); > > - } > > -} > > - > > -/* Deconstruct a copysign operation into bit masks. Operand 0 is known to > > - be a constant, and so has already been expanded into a vector constant. > > */ > > - > > -void > > -ix86_split_copysign_const (rtx operands[]) > > -{ > > - machine_mode mode, vmode; > > - rtx dest, op0, mask, x; > > - > > - dest = operands[0]; > > - op0 = operands[1]; > > - mask = operands[3]; > > - > > - mode = GET_MODE (dest); > > - vmode = GET_MODE (mask); > > - > > - dest = lowpart_subreg (vmode, dest, mode); > > - x = gen_rtx_AND (vmode, dest, mask); > > - emit_insn (gen_rtx_SET (dest, x)); > > - > > - if (op0 != CONST0_RTX (vmode)) > > - { > > - x = gen_rtx_IOR (vmode, dest, op0); > > - emit_insn (gen_rtx_SET (dest, x)); > > - } > > -} > > - > > -/* Deconstruct a copysign operation into bit masks. Operand 0 is variable, > > - so we have to do two masks. */ > > - > > -void > > -ix86_split_copysign_var (rtx operands[]) > > -{ > > - machine_mode mode, vmode; > > - rtx dest, scratch, op0, op1, mask, nmask, x; > > - > > - dest = operands[0]; > > - scratch = operands[1]; > > - op0 = operands[2]; > > - op1 = operands[3]; > > - nmask = operands[4]; > > - mask = operands[5]; > > - > > - mode = GET_MODE (dest); > > - vmode = GET_MODE (mask); > > - > > - if (rtx_equal_p (op0, op1)) > > - { > > - /* Shouldn't happen often (it's useless, obviously), but when it does > > - we'd generate incorrect code if we continue below. */ > > - emit_move_insn (dest, op0); > > + emit_move_insn (operands[0], operands[1]); > > return; > > } > > > > - if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ > > - { > > - gcc_assert (REGNO (op1) == REGNO (scratch)); > > - > > - x = gen_rtx_AND (vmode, scratch, mask); > > - emit_insn (gen_rtx_SET (scratch, x)); > > + dest = lowpart_subreg (vmode, operands[0], mode); > > + op1 = lowpart_subreg (vmode, operands[2], mode); > > + mask = ix86_build_signbit_mask (vmode, 0, 0); > > > > - dest = mask; > > - op0 = lowpart_subreg (vmode, op0, mode); > > - x = gen_rtx_NOT (vmode, dest); > > - x = gen_rtx_AND (vmode, x, op0); > > - emit_insn (gen_rtx_SET (dest, x)); > > - } > > - else > > + if (CONST_DOUBLE_P (operands[1])) > > { > > - if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 > > */ > > - { > > - x = gen_rtx_AND (vmode, scratch, mask); > > - } > > - else /* alternative 2,4 > > */ > > + op0 = simplify_unary_operation (ABS, mode, operands[1], mode); > > + /* Optimize for 0, simplify b = copy_signf (0.0f, a) to b = mask & > > a. */ > > + if (op0 == CONST0_RTX (mode)) > > { > > - gcc_assert (REGNO (mask) == REGNO (scratch)); > > - op1 = lowpart_subreg (vmode, op1, mode); > > - x = gen_rtx_AND (vmode, scratch, op1); > > + emit_move_insn (dest, gen_rtx_AND (vmode, mask, op1)); > > + return; > > } > > - emit_insn (gen_rtx_SET (scratch, x)); > > > > - if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 > > */ > > - { > > - dest = lowpart_subreg (vmode, op0, mode); > > - x = gen_rtx_AND (vmode, dest, nmask); > > - } > > - else /* alternative 3,4 > > */ > > - { > > - gcc_assert (REGNO (nmask) == REGNO (dest)); > > - dest = nmask; > > - op0 = lowpart_subreg (vmode, op0, mode); > > - x = gen_rtx_AND (vmode, dest, op0); > > - } > > - emit_insn (gen_rtx_SET (dest, x)); > > + if (GET_MODE_SIZE (mode) < 16) > > + op0 = ix86_build_const_vector (vmode, false, op0); > > + op0 = force_reg (vmode, op0); > > } > > - > > - x = gen_rtx_IOR (vmode, dest, scratch); > > - emit_insn (gen_rtx_SET (dest, x)); > > + else > > + op0 = lowpart_subreg (vmode, operands[1], mode); > > + > > + op2 = gen_reg_rtx (vmode); > > + op3 = gen_reg_rtx (vmode); > > + emit_move_insn (op2, gen_rtx_AND (vmode, > > + gen_rtx_NOT (vmode, mask), > > + op0)); > > + emit_move_insn (op3, gen_rtx_AND (vmode, mask, op1)); > > + emit_move_insn (dest, gen_rtx_IOR (vmode, op2, op3)); > > } > > > > /* Expand an xorsign operation. */ > > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h > > index 72644e33a92..dcae34b915e 100644 > > --- a/gcc/config/i386/i386-protos.h > > +++ b/gcc/config/i386/i386-protos.h > > @@ -135,8 +135,6 @@ extern void ix86_expand_fp_absneg_operator (enum > > rtx_code, machine_mode, > > extern void ix86_split_fp_absneg_operator (enum rtx_code, machine_mode, > > rtx[]); > > extern void ix86_expand_copysign (rtx []); > > -extern void ix86_split_copysign_const (rtx []); > > -extern void ix86_split_copysign_var (rtx []); > > extern void ix86_expand_xorsign (rtx []); > > extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[]); > > extern bool ix86_match_ccmode (rtx, machine_mode); > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > > index 6b4ceb2bce3..ba0058dad81 100644 > > --- a/gcc/config/i386/i386.md > > +++ b/gcc/config/i386/i386.md > > @@ -10861,50 +10861,6 @@ (define_expand "copysign<mode>3" > > || (TARGET_SSE && (<MODE>mode == TFmode))" > > "ix86_expand_copysign (operands); DONE;") > > > > -(define_insn_and_split "@copysign<mode>3_const" > > - [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv") > > - (unspec:SSEMODEF > > - [(match_operand:<ssevecmodef> 1 "nonimm_or_0_operand" "YvmC") > > - (match_operand:SSEMODEF 2 "register_operand" "0") > > - (match_operand:<ssevecmodef> 3 "nonimmediate_operand" "Yvm")] > > - UNSPEC_COPYSIGN))] > > - "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) > > - || (TARGET_SSE && (<MODE>mode == TFmode))" > > - "#" > > - "&& reload_completed" > > - [(const_int 0)] > > - "ix86_split_copysign_const (operands); DONE;") > > - > > -(define_insn "@copysign<mode>3_var" > > - [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv,Yv,Yv,Yv,Yv") > > - (unspec:SSEMODEF > > - [(match_operand:SSEMODEF 2 "register_operand" "Yv,0,0,Yv,Yv") > > - (match_operand:SSEMODEF 3 "register_operand" "1,1,Yv,1,Yv") > > - (match_operand:<ssevecmodef> 4 > > - "nonimmediate_operand" "X,Yvm,Yvm,0,0") > > - (match_operand:<ssevecmodef> 5 > > - "nonimmediate_operand" "0,Yvm,1,Yvm,1")] > > - UNSPEC_COPYSIGN)) > > - (clobber (match_scratch:<ssevecmodef> 1 "=Yv,Yv,Yv,Yv,Yv"))] > > - "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) > > - || (TARGET_SSE && (<MODE>mode == TFmode))" > > - "#") > > - > > -(define_split > > - [(set (match_operand:SSEMODEF 0 "register_operand") > > - (unspec:SSEMODEF > > - [(match_operand:SSEMODEF 2 "register_operand") > > - (match_operand:SSEMODEF 3 "register_operand") > > - (match_operand:<ssevecmodef> 4) > > - (match_operand:<ssevecmodef> 5)] > > - UNSPEC_COPYSIGN)) > > - (clobber (match_scratch:<ssevecmodef> 1))] > > - "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) > > - || (TARGET_SSE && (<MODE>mode == TFmode))) > > - && reload_completed" > > - [(const_int 0)] > > - "ix86_split_copysign_var (operands); DONE;") > > - > > (define_expand "xorsign<mode>3" > > [(match_operand:MODEF 0 "register_operand") > > (match_operand:MODEF 1 "register_operand") > > diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c > > b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c > > index cb542d09058..0107df7741a 100644 > > --- a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c > > +++ b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c > > @@ -64,8 +64,8 @@ f6 (double x) > > } > > > > /* { dg-final { scan-assembler "vandps\[^\n\r\]*xmm16" } } */ > > -/* { dg-final { scan-assembler "vorps\[^\n\r\]*xmm16" } } */ > > +/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */ > > /* { dg-final { scan-assembler "vxorps\[^\n\r\]*xmm16" } } */ > > /* { dg-final { scan-assembler "vandpd\[^\n\r\]*xmm18" } } */ > > -/* { dg-final { scan-assembler "vorpd\[^\n\r\]*xmm18" } } */ > > +/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */ > > /* { dg-final { scan-assembler "vxorpd\[^\n\r\]*xmm18" } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c > > b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c > > index b375c5fad80..b27335b9d99 100644 > > --- a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c > > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c > > @@ -64,8 +64,8 @@ f6 (double x) > > } > > > > /* { dg-final { scan-assembler "vpandd\[^\n\r\]*xmm16" } } */ > > -/* { dg-final { scan-assembler "vpord\[^\n\r\]*xmm16" } } */ > > +/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */ > > /* { dg-final { scan-assembler "vpxord\[^\n\r\]*xmm16" } } */ > > /* { dg-final { scan-assembler "vpandq\[^\n\r\]*xmm18" } } */ > > -/* { dg-final { scan-assembler "vporq\[^\n\r\]*xmm18" } } */ > > +/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */ > > /* { dg-final { scan-assembler "vpxorq\[^\n\r\]*xmm18" } } */ > > -- > > 2.27.0 > > > > > -- > BR, > Hongtao
-- BR, Hongtao