On Mon, Jul 17, 2023 at 8:44 AM Hongtao Liu <crazy...@gmail.com> wrote: > > Ping. > > On Tue, Jul 11, 2023 at 5:16 PM liuhongt via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > Similar like we did for CMPXCHG, but extended to all > > ix86_comparison_int_operator since CMPCCXADD set EFLAGS exactly same > > as CMP. > > > > When operand order in CMP insn is same as that in CMPCCXADD, > > CMP insn can be eliminated directly. > > > > When operand order is swapped in CMP insn, only optimize > > cmpccxadd + cmpl + jcc/setcc to cmpccxadd + jcc/setcc when FLAGS_REG is dead > > after jcc/setcc plus adjusting code for jcc/setcc. > > > > gcc/ChangeLog: > > > > PR target/110591 > > * config/i386/sync.md (cmpccxadd_<mode>): Adjust the pattern > > to explicitly set FLAGS_REG like *cmp<mode>_1, also add extra > > 3 define_peephole2 after the pattern. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/pr110591.c: New test. > > * gcc.target/i386/pr110591-2.c: New test.
LGTM. Thanks, Uros. > > --- > > gcc/config/i386/sync.md | 160 ++++++++++++++++++++- > > gcc/testsuite/gcc.target/i386/pr110591-2.c | 90 ++++++++++++ > > gcc/testsuite/gcc.target/i386/pr110591.c | 66 +++++++++ > > 3 files changed, 315 insertions(+), 1 deletion(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr110591-2.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr110591.c > > > > diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md > > index e1fa1504deb..e84226cf895 100644 > > --- a/gcc/config/i386/sync.md > > +++ b/gcc/config/i386/sync.md > > @@ -1093,7 +1093,9 @@ (define_insn "cmpccxadd_<mode>" > > UNSPECV_CMPCCXADD)) > > (set (match_dup 1) > > (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD)) > > - (clobber (reg:CC FLAGS_REG))] > > + (set (reg:CC FLAGS_REG) > > + (compare:CC (match_dup 1) > > + (match_dup 2)))] > > "TARGET_CMPCCXADD && TARGET_64BIT" > > { > > char buf[128]; > > @@ -1105,3 +1107,159 @@ (define_insn "cmpccxadd_<mode>" > > output_asm_insn (buf, operands); > > return ""; > > }) > > + > > +(define_peephole2 > > + [(set (match_operand:SWI48x 0 "register_operand") > > + (match_operand:SWI48x 1 "x86_64_general_operand")) > > + (parallel [(set (match_dup 0) > > + (unspec_volatile:SWI48x > > + [(match_operand:SWI48x 2 "memory_operand") > > + (match_dup 0) > > + (match_operand:SWI48x 3 "register_operand") > > + (match_operand:SI 4 "const_int_operand")] > > + UNSPECV_CMPCCXADD)) > > + (set (match_dup 2) > > + (unspec_volatile:SWI48x [(const_int 0)] > > UNSPECV_CMPCCXADD)) > > + (set (reg:CC FLAGS_REG) > > + (compare:CC (match_dup 2) > > + (match_dup 0)))]) > > + (set (reg FLAGS_REG) > > + (compare (match_operand:SWI48x 5 "register_operand") > > + (match_operand:SWI48x 6 "x86_64_general_operand")))] > > + "TARGET_CMPCCXADD && TARGET_64BIT > > + && rtx_equal_p (operands[0], operands[5]) > > + && rtx_equal_p (operands[1], operands[6])" > > + [(set (match_dup 0) > > + (match_dup 1)) > > + (parallel [(set (match_dup 0) > > + (unspec_volatile:SWI48x > > + [(match_dup 2) > > + (match_dup 0) > > + (match_dup 3) > > + (match_dup 4)] > > + UNSPECV_CMPCCXADD)) > > + (set (match_dup 2) > > + (unspec_volatile:SWI48x [(const_int 0)] > > UNSPECV_CMPCCXADD)) > > + (set (reg:CC FLAGS_REG) > > + (compare:CC (match_dup 2) > > + (match_dup 0)))]) > > + (set (match_dup 7) > > + (match_op_dup 8 > > + [(match_dup 9) (const_int 0)]))]) > > + > > +(define_peephole2 > > + [(set (match_operand:SWI48x 0 "register_operand") > > + (match_operand:SWI48x 1 "x86_64_general_operand")) > > + (parallel [(set (match_dup 0) > > + (unspec_volatile:SWI48x > > + [(match_operand:SWI48x 2 "memory_operand") > > + (match_dup 0) > > + (match_operand:SWI48x 3 "register_operand") > > + (match_operand:SI 4 "const_int_operand")] > > + UNSPECV_CMPCCXADD)) > > + (set (match_dup 2) > > + (unspec_volatile:SWI48x [(const_int 0)] > > UNSPECV_CMPCCXADD)) > > + (set (reg:CC FLAGS_REG) > > + (compare:CC (match_dup 2) > > + (match_dup 0)))]) > > + (set (reg FLAGS_REG) > > + (compare (match_operand:SWI48x 5 "register_operand") > > + (match_operand:SWI48x 6 "x86_64_general_operand"))) > > + (set (match_operand:QI 7 "nonimmediate_operand") > > + (match_operator:QI 8 "ix86_comparison_int_operator" > > + [(reg FLAGS_REG) (const_int 0)]))] > > + "TARGET_CMPCCXADD && TARGET_64BIT > > + && rtx_equal_p (operands[0], operands[6]) > > + && rtx_equal_p (operands[1], operands[5]) > > + && peep2_regno_dead_p (4, FLAGS_REG)" > > + [(set (match_dup 0) > > + (match_dup 1)) > > + (parallel [(set (match_dup 0) > > + (unspec_volatile:SWI48x > > + [(match_dup 2) > > + (match_dup 0) > > + (match_dup 3) > > + (match_dup 4)] > > + UNSPECV_CMPCCXADD)) > > + (set (match_dup 2) > > + (unspec_volatile:SWI48x [(const_int 0)] > > UNSPECV_CMPCCXADD)) > > + (set (reg:CC FLAGS_REG) > > + (compare:CC (match_dup 2) > > + (match_dup 0)))]) > > + (set (match_dup 7) > > + (match_op_dup 8 > > + [(match_dup 9) (const_int 0)]))] > > +{ > > + operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[8], 0)), FLAGS_REG); > > + if (swap_condition (GET_CODE (operands[8])) != GET_CODE (operands[8])) > > + { > > + operands[8] = shallow_copy_rtx (operands[8]); > > + enum rtx_code ccode = swap_condition (GET_CODE (operands[8])); > > + PUT_CODE (operands[8], ccode); > > + operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode, > > + operands[6], > > + operands[5]), > > + FLAGS_REG); > > + } > > +}) > > + > > +(define_peephole2 > > + [(set (match_operand:SWI48x 0 "register_operand") > > + (match_operand:SWI48x 1 "x86_64_general_operand")) > > + (parallel [(set (match_dup 0) > > + (unspec_volatile:SWI48x > > + [(match_operand:SWI48x 2 "memory_operand") > > + (match_dup 0) > > + (match_operand:SWI48x 3 "register_operand") > > + (match_operand:SI 4 "const_int_operand")] > > + UNSPECV_CMPCCXADD)) > > + (set (match_dup 2) > > + (unspec_volatile:SWI48x [(const_int 0)] > > UNSPECV_CMPCCXADD)) > > + (set (reg:CC FLAGS_REG) > > + (compare:CC (match_dup 2) > > + (match_dup 0)))]) > > + (set (reg FLAGS_REG) > > + (compare (match_operand:SWI48x 5 "register_operand") > > + (match_operand:SWI48x 6 "x86_64_general_operand"))) > > + (set (pc) > > + (if_then_else (match_operator 7 "ix86_comparison_int_operator" > > + [(reg FLAGS_REG) (const_int 0)]) > > + (label_ref (match_operand 8)) > > + (pc)))] > > + "TARGET_CMPCCXADD && TARGET_64BIT > > + && rtx_equal_p (operands[0], operands[6]) > > + && rtx_equal_p (operands[1], operands[5]) > > + && peep2_regno_dead_p (4, FLAGS_REG)" > > + [(set (match_dup 0) > > + (match_dup 1)) > > + (parallel [(set (match_dup 0) > > + (unspec_volatile:SWI48x > > + [(match_dup 2) > > + (match_dup 0) > > + (match_dup 3) > > + (match_dup 4)] > > + UNSPECV_CMPCCXADD)) > > + (set (match_dup 2) > > + (unspec_volatile:SWI48x [(const_int 0)] > > UNSPECV_CMPCCXADD)) > > + (set (reg:CC FLAGS_REG) > > + (compare:CC (match_dup 2) > > + (match_dup 0)))]) > > + (set (pc) > > + (if_then_else > > + (match_op_dup 7 > > + [(match_dup 9) (const_int 0)]) > > + (label_ref (match_dup 8)) > > + (pc)))] > > +{ > > + operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[7], 0)), FLAGS_REG); > > + if (swap_condition (GET_CODE (operands[7])) != GET_CODE (operands[7])) > > + { > > + operands[7] = shallow_copy_rtx (operands[7]); > > + enum rtx_code ccode = swap_condition (GET_CODE (operands[7])); > > + PUT_CODE (operands[7], ccode); > > + operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode, > > + operands[6], > > + operands[5]), > > + FLAGS_REG); > > + } > > +}) > > diff --git a/gcc/testsuite/gcc.target/i386/pr110591-2.c > > b/gcc/testsuite/gcc.target/i386/pr110591-2.c > > new file mode 100644 > > index 00000000000..92ffdb97d62 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr110591-2.c > > @@ -0,0 +1,90 @@ > > +/* { dg-do compile { target { ! ia32 } } } */ > > +/* { dg-options "-mcmpccxadd -O2 -fno-if-conversion -fno-if-conversion2" } > > */ > > +/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */ > > +/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */ > > + > > +#include <immintrin.h> > > + > > +int foo_jg (int *ptr, int v) > > +{ > > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v) > > + return 100; > > + return 200; > > +} > > + > > +int foo_jl (int *ptr, int v) > > +{ > > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v) > > + return 300; > > + return 100; > > +} > > + > > +int foo_je(int *ptr, int v) > > +{ > > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v) > > + return 123; > > + return 134; > > +} > > + > > +int foo_jne(int *ptr, int v) > > +{ > > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v) > > + return 111; > > + return 12; > > +} > > + > > +int foo_jge(int *ptr, int v) > > +{ > > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v) > > + return 413; > > + return 23; > > +} > > + > > +int foo_jle(int *ptr, int v) > > +{ > > + if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v) > > + return 3141; > > + return 341; > > +} > > + > > +int fooq_jg (long long *ptr, long long v) > > +{ > > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v) > > + return 123; > > + return 3; > > +} > > + > > +int fooq_jl (long long *ptr, long long v) > > +{ > > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v) > > + return 313; > > + return 5; > > +} > > + > > +int fooq_je(long long *ptr, long long v) > > +{ > > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v) > > + return 1313; > > + return 13; > > +} > > + > > +int fooq_jne(long long *ptr, long long v) > > +{ > > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v) > > + return 1314; > > + return 132; > > +} > > + > > +int fooq_jge(long long *ptr, long long v) > > +{ > > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v) > > + return 14314; > > + return 434; > > +} > > + > > +int fooq_jle(long long *ptr, long long v) > > +{ > > + if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v) > > + return 14414; > > + return 43; > > +} > > diff --git a/gcc/testsuite/gcc.target/i386/pr110591.c > > b/gcc/testsuite/gcc.target/i386/pr110591.c > > new file mode 100644 > > index 00000000000..32a515b429e > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr110591.c > > @@ -0,0 +1,66 @@ > > +/* { dg-do compile { target { ! ia32 } } } */ > > +/* { dg-options "-mcmpccxadd -O2" } */ > > +/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */ > > +/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */ > > + > > +#include <immintrin.h> > > + > > +_Bool foo_setg (int *ptr, int v) > > +{ > > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v; > > +} > > + > > +_Bool foo_setl (int *ptr, int v) > > +{ > > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v; > > +} > > + > > +_Bool foo_sete(int *ptr, int v) > > +{ > > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v; > > +} > > + > > +_Bool foo_setne(int *ptr, int v) > > +{ > > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v; > > +} > > + > > +_Bool foo_setge(int *ptr, int v) > > +{ > > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v; > > +} > > + > > +_Bool foo_setle(int *ptr, int v) > > +{ > > + return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v; > > +} > > + > > +_Bool fooq_setg (long long *ptr, long long v) > > +{ > > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v; > > +} > > + > > +_Bool fooq_setl (long long *ptr, long long v) > > +{ > > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v; > > +} > > + > > +_Bool fooq_sete(long long *ptr, long long v) > > +{ > > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v; > > +} > > + > > +_Bool fooq_setne(long long *ptr, long long v) > > +{ > > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v; > > +} > > + > > +_Bool fooq_setge(long long *ptr, long long v) > > +{ > > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v; > > +} > > + > > +_Bool fooq_setle(long long *ptr, long long v) > > +{ > > + return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v; > > +} > > -- > > 2.39.1.388.g2fc9e9ca3c > > > > > -- > BR, > Hongtao