On Wed, Sep 8, 2021 at 6:02 PM Jakub Jelinek <ja...@redhat.com> wrote: > > On Wed, Sep 08, 2021 at 06:00:50PM +0800, Hongtao Liu wrote: > > Yes, I think so. > > And I find paradoxical subreg like (subreg:V4SF (reg:SF)) are not > > allowed by validate_subreg until r11-621. > > That's why post_reload splitter is needed here. > > Following seems to work for all the testcases I've find (and in some > generates better code than the post-reload splitter): > > 2021-09-08 Jakub Jelinek <ja...@redhat.com> > liuhongt <hongtao....@intel.com> > > PR target/89984 > * config/i386/i386.md (@xorsign<mode>3_1): Remove. > * config/i386/i386-expand.c (ix86_expand_xorsign): Expand right away > into AND with mask and XOR, using paradoxical subregs. > (ix86_split_xorsign): Remove. Also remove this from i386-protos.h. > > * gcc.target/i386/avx-pr102224.c: Fix up PR number. > * gcc.dg/pr89984.c: New test. > * gcc.target/i386/avx-pr89984.c: New test. > Other LGTM. > --- gcc/config/i386/i386.md.jj 2021-09-08 11:40:55.826534981 +0200 > +++ gcc/config/i386/i386.md 2021-09-08 11:44:08.394828674 +0200 > @@ -10918,20 +10918,6 @@ (define_expand "xorsign<mode>3" > DONE; > }) > > -(define_insn_and_split "@xorsign<mode>3_1" > - [(set (match_operand:MODEF 0 "register_operand" "=&Yv,&Yv,&Yv") > - (unspec:MODEF > - [(match_operand:MODEF 1 "register_operand" "Yv,0,Yv") > - (match_operand:MODEF 2 "register_operand" "0,Yv,Yv") > - (match_operand:<ssevecmode> 3 "nonimmediate_operand" > "Yvm,Yvm,Yvm")] > - UNSPEC_XORSIGN))] > - "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" > - "#" > - "&& reload_completed" > - [(const_int 0)] > - "ix86_split_xorsign (operands); DONE;" > - [(set_attr "isa" "*,avx,avx")]) > - > ;; One complement instructions > > (define_expand "one_cmpl<mode>2" > --- gcc/config/i386/i386-expand.c.jj 2021-09-08 11:40:55.824535010 +0200 > +++ gcc/config/i386/i386-expand.c 2021-09-08 11:51:15.969819626 +0200 > @@ -2270,7 +2270,7 @@ void > ix86_expand_xorsign (rtx operands[]) > { > machine_mode mode, vmode; > - rtx dest, op0, op1, mask; > + rtx dest, op0, op1, mask, x, temp; > > dest = operands[0]; > op0 = operands[1]; > @@ -2285,60 +2285,15 @@ ix86_expand_xorsign (rtx operands[]) > else > gcc_unreachable (); > > + temp = gen_reg_rtx (vmode); > mask = ix86_build_signbit_mask (vmode, 0, 0); > > - emit_insn (gen_xorsign3_1 (mode, dest, op0, op1, mask)); > -} > + op1 = lowpart_subreg (vmode, op1, mode); > + x = gen_rtx_AND (vmode, op1, mask); > + emit_insn (gen_rtx_SET (temp, x)); > > -/* Deconstruct an xorsign operation into bit masks. */ > - > -void > -ix86_split_xorsign (rtx operands[]) > -{ > - machine_mode mode, vmode; > - rtx dest, op0, op1, mask, x; > - > - dest = operands[0]; > - op0 = operands[1]; > - op1 = operands[2]; > - mask = operands[3]; > - > - mode = GET_MODE (dest); > - vmode = GET_MODE (mask); > - > - /* The constraints ensure that for non-AVX dest == op1 is > - different from op0, and for AVX that at most two of > - dest, op0 and op1 are the same register but the third one > - is different. */ > - if (rtx_equal_p (op0, op1)) > - { > - gcc_assert (TARGET_AVX && !rtx_equal_p (op0, dest)); > - if (vmode == V4SFmode) > - vmode = V4SImode; > - else > - { > - gcc_assert (vmode == V2DFmode); > - vmode = V2DImode; > - } > - mask = lowpart_subreg (vmode, mask, GET_MODE (mask)); > - if (MEM_P (mask)) > - { > - rtx msk = lowpart_subreg (vmode, dest, mode); > - emit_insn (gen_rtx_SET (msk, mask)); > - mask = msk; > - } > - op0 = lowpart_subreg (vmode, op0, mode); > - x = gen_rtx_AND (vmode, gen_rtx_NOT (vmode, mask), op0); > - } > - else > - { > - op1 = lowpart_subreg (vmode, op1, mode); > - x = gen_rtx_AND (vmode, op1, mask); > - emit_insn (gen_rtx_SET (op1, x)); > - > - op0 = lowpart_subreg (vmode, op0, mode); > - x = gen_rtx_XOR (vmode, op1, op0); > - } > + op0 = lowpart_subreg (vmode, op0, mode); > + x = gen_rtx_XOR (vmode, temp, op0); > > dest = lowpart_subreg (vmode, dest, mode); > emit_insn (gen_rtx_SET (dest, x)); > --- gcc/testsuite/gcc.target/i386/avx-pr102224.c.jj 2021-09-08 > 11:40:55.826534981 +0200 > +++ gcc/testsuite/gcc.target/i386/avx-pr102224.c 2021-09-08 > 11:57:41.741386062 +0200 > @@ -1,4 +1,4 @@ > -/* PR tree-optimization/51581 */ > +/* PR target/102224 */ > /* { dg-do run } */ > /* { dg-options "-O2 -mavx" } */ > /* { dg-require-effective-target avx } */ > --- gcc/testsuite/gcc.dg/pr89984.c.jj 2021-09-08 11:56:33.799343240 +0200 > +++ gcc/testsuite/gcc.dg/pr89984.c 2021-09-08 11:54:36.070001821 +0200 > @@ -0,0 +1,20 @@ > +/* PR target/89984 */ > +/* { dg-do run } */ > +/* { dg-options "-O2" } */ > + > +__attribute__((noipa)) float > +foo (float x, float y) > +{ > + return x * __builtin_copysignf (1.0f, y) + y; > +} > + > +int > +main () > +{ > + if (foo (1.25f, 7.25f) != 1.25f + 7.25f > + || foo (1.75f, -3.25f) != -1.75f + -3.25f > + || foo (-2.25f, 7.5f) != -2.25f + 7.5f > + || foo (-3.0f, -4.0f) != 3.0f + -4.0f) > + __builtin_abort (); > + return 0; > +} > --- gcc/testsuite/gcc.target/i386/avx-pr89984.c.jj 2021-09-08 > 11:57:12.297800869 +0200 > +++ gcc/testsuite/gcc.target/i386/avx-pr89984.c 2021-09-08 11:57:56.936172001 > +0200 > @@ -0,0 +1,23 @@ > +/* PR target/89984 */ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mavx" } */ > +/* { dg-require-effective-target avx } */ > + > +#ifndef CHECK_H > +#define CHECK_H "avx-check.h" > +#endif > +#ifndef TEST > +#define TEST avx_test > +#endif > + > +#define main main1 > +#include "../../gcc.dg/pr89984.c" > +#undef main > + > +#include CHECK_H > + > +static void > +TEST (void) > +{ > + main1 (); > +} > > > Jakub >
-- BR, Hongtao