On Fri, Sep 16, 2022 at 9:09 AM liuhongt via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > There's peephole2 submit in 1990s which split cmp mem, 0 to load mem, > reg + test reg, reg. I don't know exact reason why gcc do this. > > For latest x86 processors, ciscization should help processor frontend > also codesize, for processor backend, they should be the same(has same > uops). > > So the patch deleted the peephole2, and also modify another splitter to > generate more cmp mem, 0 for 32-bit target. > > It will help instruction fetch. > > for minmax-1.c minmax-2.c minmax-10, pr96891.c, it's supposed to scan there's > no > comparison to 1 or -1, so adjust the testcase since under 32-bit > target, we now generate cmp mem, 0 instead of load + test. > > Similar for pr78035.c. > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} > No performance impact for SPEC2017 on ICX/Znver3. > > Ok for trunk? > > gcc/ChangeLog: > > * config/i386/i386.md (*<code><mode>3_1): Replace > register_operand with nonimmediate_operand for operand 1. Also > force_reg it when mode is QImode. > (define_peephole2): Deleted related peephole2. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/minmax-1.c: Scan-assemble-not for cmp with 1 > or -1, also don't scan-assembler test for ia32. > * gcc.target/i386/minmax-10.c: Ditto. > * gcc.target/i386/minmax-2.c: Ditto. > * gcc.target/i386/pr78035.c: Ditto. > * gcc.target/i386/pr96861.c: Scan either cmp or test 3 times. > --- > gcc/config/i386/i386.md | 18 +++++------------- > gcc/testsuite/gcc.target/i386/minmax-1.c | 4 ++-- > gcc/testsuite/gcc.target/i386/minmax-10.c | 4 ++-- > gcc/testsuite/gcc.target/i386/minmax-2.c | 4 ++-- > gcc/testsuite/gcc.target/i386/pr78035.c | 2 +- > gcc/testsuite/gcc.target/i386/pr96861.c | 4 ++-- > 6 files changed, 14 insertions(+), 22 deletions(-) > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 1be9b669909..93b905beb72 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -21871,7 +21871,7 @@ (define_insn_and_split "*<code><dwi>3_doubleword" > (define_insn_and_split "*<code><mode>3_1" > [(set (match_operand:SWI 0 "register_operand") > (maxmin:SWI > - (match_operand:SWI 1 "register_operand") > + (match_operand:SWI 1 "nonimmediate_operand") > (match_operand:SWI 2 "general_operand"))) > (clobber (reg:CC FLAGS_REG))] > "TARGET_CMOVE > @@ -21886,9 +21886,12 @@ (define_insn_and_split "*<code><mode>3_1" > { > machine_mode mode = <MODE>mode; > rtx cmp_op = operands[2]; > - > operands[2] = force_reg (mode, cmp_op); > > + /* movqicc_noc only support register_operand for op1. */ > + if (mode == QImode) > + operands[1] = force_reg (mode, operands[1]); > + > enum rtx_code code = <maxmin_rel>; > > if (cmp_op == const1_rtx) > @@ -22482,17 +22485,6 @@ (define_peephole2 > [(set (match_dup 2) (match_dup 1)) > (set (match_dup 0) (match_dup 2))]) > > -;; Don't compare memory with zero, load and use a test instead. > -(define_peephole2 > - [(set (match_operand 0 "flags_reg_operand") > - (match_operator 1 "compare_operator" > - [(match_operand:SI 2 "memory_operand") > - (const_int 0)])) > - (match_scratch:SI 3 "r")] > - "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)" > - [(set (match_dup 3) (match_dup 2)) > - (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))]) > - > ;; NOT is not pairable on Pentium, while XOR is, but one byte longer. > ;; Don't split NOTs with a displacement operand, because resulting XOR > ;; will not be pairable anyway. > diff --git a/gcc/testsuite/gcc.target/i386/minmax-1.c > b/gcc/testsuite/gcc.target/i386/minmax-1.c > index 0ec35b1c5a1..840b32c5414 100644 > --- a/gcc/testsuite/gcc.target/i386/minmax-1.c > +++ b/gcc/testsuite/gcc.target/i386/minmax-1.c > @@ -1,7 +1,7 @@ > /* { dg-do compile } */ > /* { dg-options "-O2 -march=opteron -mno-stv" } */ > -/* { dg-final { scan-assembler "test" } } */ > -/* { dg-final { scan-assembler-not "cmp" } } */ > +/* { dg-final { scan-assembler "test" { target { ! ia32 } } } } */ > +/* { dg-final { scan-assembler-not {(?n)cmp.*[$]+1} } } */ > #define max(a,b) (((a) > (b))? (a) : (b)) > int > t(int a) > diff --git a/gcc/testsuite/gcc.target/i386/minmax-10.c > b/gcc/testsuite/gcc.target/i386/minmax-10.c > index b044462c5a9..1dd2eedf435 100644 > --- a/gcc/testsuite/gcc.target/i386/minmax-10.c > +++ b/gcc/testsuite/gcc.target/i386/minmax-10.c > @@ -34,5 +34,5 @@ unsigned int umin1(unsigned int x) > return min(x,1); > } > > -/* { dg-final { scan-assembler-times "test" 6 } } */ > -/* { dg-final { scan-assembler-not "cmp" } } */ > +/* { dg-final { scan-assembler-times "test" 6 { target { ! ia32 } } } } */ > +/* { dg-final { scan-assembler-not {(?n)cmp.*1} } } */ > diff --git a/gcc/testsuite/gcc.target/i386/minmax-2.c > b/gcc/testsuite/gcc.target/i386/minmax-2.c > index af9baeaaf7c..2c82f6cecb9 100644 > --- a/gcc/testsuite/gcc.target/i386/minmax-2.c > +++ b/gcc/testsuite/gcc.target/i386/minmax-2.c > @@ -1,7 +1,7 @@ > /* { dg-do compile } */ > /* { dg-options "-O2 -mno-stv" } */ > -/* { dg-final { scan-assembler "test" } } */ > -/* { dg-final { scan-assembler-not "cmp" } } */ > +/* { dg-final { scan-assembler "test" { target { ! ia32 } } } } */ > +/* { dg-final { scan-assembler-not {(?n)cmp.*[$]1} } } */ > #define max(a,b) (((a) > (b))? (a) : (b)) > unsigned int > t(unsigned int a) > diff --git a/gcc/testsuite/gcc.target/i386/pr78035.c > b/gcc/testsuite/gcc.target/i386/pr78035.c > index 7d3a983b218..d543d3f1d38 100644 > --- a/gcc/testsuite/gcc.target/i386/pr78035.c > +++ b/gcc/testsuite/gcc.target/i386/pr78035.c > @@ -22,4 +22,4 @@ int bar () > } > > /* We should not optimize away either comparison. */ > -/* { dg-final { scan-assembler-times "cmp" 2 } } */ > +/* { dg-final { scan-assembler-times "(?:cmp|test)" 3 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr96861.c > b/gcc/testsuite/gcc.target/i386/pr96861.c > index 7b7aeccb83c..8c0f0841f7d 100644 > --- a/gcc/testsuite/gcc.target/i386/pr96861.c > +++ b/gcc/testsuite/gcc.target/i386/pr96861.c > @@ -34,5 +34,5 @@ unsigned int umin1(unsigned int x) > return min(x,1); > } > > -/* { dg-final { scan-assembler-times "test" 6 } } */ > -/* { dg-final { scan-assembler-not "cmp" } } */ > +/* { dg-final { scan-assembler-times "test" 6 { target { ! ia32 } } } } */ > +/* { dg-final { scan-assembler-not {(?n)cmp.*[$]+1} } } */ > -- > 2.18.1 >
-- BR, Hongtao