On Wed, May 15, 2024 at 4:24 PM Hongyu Wang <hongyu.w...@intel.com> wrote: > > APX CCMP feature implements conditional compare which executes compare > when EFLAGS matches certain condition. > > CCMP introduces default flags value (dfv), when conditional compare does > not execute, it will directly set the flags according to dfv. > > The instruction goes like > > ccmpeq {dfv=sf,of,cf,zf} %rax, %r16 > > For this instruction, it will test EFLAGS regs if it matches conditional > code EQ, if yes, compare %rax and %r16 like legacy cmp. If no, the > EFLAGS will be updated according to dfv, which means SF,OF,CF,ZF are > set. PF will be set according to CF in dfv, and AF will always be > cleared. > > The dfv part can be a combination of sf,of,cf,zf, like {dfv=cf,zf} which > sets CF and ZF only and clear others, or {dfv=} which clears all EFLAGS. > > To enable CCMP, we implemented the target hook TARGET_GEN_CCMP_FIRST and > TARGET_GEN_CCMP_NEXT to reuse the current ccmp infrastructure. Also we > extended the cstorem4 optab to support storing different CCmode to fit > current ccmp infrasturcture. Ok. > > gcc/ChangeLog: > > * config/i386/i386-expand.cc (ix86_gen_ccmp_first): New function > that test if the first compare can be generated. > (ix86_gen_ccmp_next): New function to emit a simgle compare and ccmp > sequence. > * config/i386/i386-opts.h (enum apx_features): Add apx_ccmp. > * config/i386/i386-protos.h (ix86_gen_ccmp_first): New proto > declare. > (ix86_gen_ccmp_next): Likewise. > (ix86_get_flags_cc): Likewise. > * config/i386/i386.cc (ix86_flags_cc): New enum. > (ix86_ccmp_dfv_mapping): New string array to map conditional > code to dfv. > (ix86_print_operand): Handle special dfv flag for CCMP. > (ix86_get_flags_cc): New function to return x86 CC enum. > (TARGET_GEN_CCMP_FIRST): Define. > (TARGET_GEN_CCMP_NEXT): Likewise. > * config/i386/i386.h (TARGET_APX_CCMP): Define. > * config/i386/i386.md (@ccmp<mode>): New define_insn to support > ccmp. > (UNSPEC_APX_DFV): New unspec for ccmp dfv. > (ALL_CC): New mode iterator. > (cstorecc4): Change to ... > (cstore<mode>4) ... this, use ALL_CC to loop through all > available CCmodes. > * config/i386/i386.opt (apx_ccmp): Add enum value for ccmp. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/apx-ccmp-1.c: New compile test. > * gcc.target/i386/apx-ccmp-2.c: New runtime test. > --- > gcc/config/i386/i386-expand.cc | 121 +++++++++++++++++++++ > gcc/config/i386/i386-opts.h | 6 +- > gcc/config/i386/i386-protos.h | 5 + > gcc/config/i386/i386.cc | 50 +++++++++ > gcc/config/i386/i386.h | 1 + > gcc/config/i386/i386.md | 35 +++++- > gcc/config/i386/i386.opt | 3 + > gcc/testsuite/gcc.target/i386/apx-ccmp-1.c | 63 +++++++++++ > gcc/testsuite/gcc.target/i386/apx-ccmp-2.c | 57 ++++++++++ > 9 files changed, 337 insertions(+), 4 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/apx-ccmp-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/apx-ccmp-2.c > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc > index 1ab22fe7973..f00525e449f 100644 > --- a/gcc/config/i386/i386-expand.cc > +++ b/gcc/config/i386/i386-expand.cc > @@ -25554,4 +25554,125 @@ ix86_expand_fast_convert_bf_to_sf (rtx val) > return ret; > } > > +rtx > +ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq, > + rtx_code code, tree treeop0, tree treeop1) > +{ > + if (!TARGET_APX_CCMP) > + return NULL_RTX; > + > + rtx op0, op1, res; > + machine_mode op_mode; > + > + start_sequence (); > + expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL); > + > + op_mode = GET_MODE (op0); > + if (op_mode == VOIDmode) > + op_mode = GET_MODE (op1); > + > + if (!(op_mode == DImode || op_mode == SImode || op_mode == HImode > + || op_mode == QImode)) > + { > + end_sequence (); > + return NULL_RTX; > + } > + > + /* Canonicalize the operands according to mode. */ > + if (!nonimmediate_operand (op0, op_mode)) > + op0 = force_reg (op_mode, op0); > + if (!x86_64_general_operand (op1, op_mode)) > + op1 = force_reg (op_mode, op1); > + > + *prep_seq = get_insns (); > + end_sequence (); > + > + start_sequence (); > + > + res = ix86_expand_compare (code, op0, op1); > + > + if (!res) > + { > + end_sequence (); > + return NULL_RTX; > + } > + *gen_seq = get_insns (); > + end_sequence (); > + > + return res; > +} > + > +rtx > +ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, > + rtx_code cmp_code, tree treeop0, tree treeop1, > + rtx_code bit_code) > +{ > + if (!TARGET_APX_CCMP) > + return NULL_RTX; > + > + rtx op0, op1, target; > + machine_mode op_mode, cmp_mode, cc_mode = CCmode; > + int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0)); > + insn_code icode; > + rtx_code prev_code; > + struct expand_operand ops[5]; > + int dfv; > + > + push_to_sequence (*prep_seq); > + expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL); > + > + cmp_mode = op_mode = GET_MODE (op0); > + > + if (!(op_mode == DImode || op_mode == SImode || op_mode == HImode > + || op_mode == QImode)) > + { > + end_sequence (); > + return NULL_RTX; > + } > + > + icode = code_for_ccmp (op_mode); > + > + op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp); > + op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp); > + if (!op0 || !op1) > + { > + end_sequence (); > + return NULL_RTX; > + } > + > + *prep_seq = get_insns (); > + end_sequence (); > + > + target = gen_rtx_REG (cc_mode, FLAGS_REG); > + dfv = ix86_get_flags_cc ((rtx_code) cmp_code); > + > + prev_code = GET_CODE (prev); > + > + if (bit_code != AND) > + prev_code = reverse_condition (prev_code); > + else > + dfv = (int)(dfv ^ 1); > + > + prev = gen_rtx_fmt_ee (prev_code, VOIDmode, XEXP (prev, 0), > + const0_rtx); > + > + create_fixed_operand (&ops[0], target); > + create_fixed_operand (&ops[1], prev); > + create_fixed_operand (&ops[2], op0); > + create_fixed_operand (&ops[3], op1); > + create_fixed_operand (&ops[4], GEN_INT (dfv)); > + > + push_to_sequence (*gen_seq); > + if (!maybe_expand_insn (icode, 5, ops)) > + { > + end_sequence (); > + return NULL_RTX; > + } > + > + *gen_seq = get_insns (); > + end_sequence (); > + > + return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx); > +} > + > #include "gt-i386-expand.h" > diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h > index 60176ce609f..5fcc4927978 100644 > --- a/gcc/config/i386/i386-opts.h > +++ b/gcc/config/i386/i386-opts.h > @@ -140,8 +140,10 @@ enum apx_features { > apx_push2pop2 = 1 << 1, > apx_ndd = 1 << 2, > apx_ppx = 1 << 3, > - apx_nf = 1<< 4, > - apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf, > + apx_nf = 1 << 4, > + apx_ccmp = 1 << 5, > + apx_all = apx_egpr | apx_push2pop2 | apx_ndd > + | apx_ppx | apx_nf | apx_ccmp, > }; > > #endif > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h > index dbc861fb1ea..26e29df7312 100644 > --- a/gcc/config/i386/i386-protos.h > +++ b/gcc/config/i386/i386-protos.h > @@ -242,6 +242,11 @@ extern void ix86_expand_atomic_fetch_op_loop (rtx, rtx, > rtx, enum rtx_code, > extern void ix86_expand_cmpxchg_loop (rtx *, rtx, rtx, rtx, rtx, rtx, > bool, rtx_code_label *); > extern rtx ix86_expand_fast_convert_bf_to_sf (rtx); > +extern rtx ix86_gen_ccmp_first (rtx_insn **, rtx_insn **, enum rtx_code, > + tree, tree); > +extern rtx ix86_gen_ccmp_next (rtx_insn **, rtx_insn **, rtx, > + enum rtx_code, tree, tree, enum rtx_code); > +extern int ix86_get_flags_cc (enum rtx_code); > extern rtx ix86_memtag_untagged_pointer (rtx, rtx); > extern bool ix86_memtag_can_tag_addresses (void); > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index b4838b7939e..2363cab1eae 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -433,6 +433,22 @@ static bool i386_asm_output_addr_const_extra (FILE *, > rtx); > static bool ix86_can_inline_p (tree, tree); > static unsigned int ix86_minimum_incoming_stack_boundary (bool); > > +typedef enum ix86_flags_cc > +{ > + X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB, > + X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE, > + X86_CCS, X86_CCNS, X86_CCP, X86_CCNP, > + X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE > +} ix86_cc; > + > +static const char *ix86_ccmp_dfv_mapping[] = > +{ > + "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}", > + "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}", > + "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}", > + "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}" > +}; > + > > /* Whether -mtune= or -march= were specified */ > int ix86_tune_defaulted; > @@ -13690,6 +13706,7 @@ print_reg (rtx x, int code, FILE *file) > M -- print addr32 prefix for TARGET_X32 with VSIB address. > ! -- print NOTRACK prefix for jxx/call/ret instructions if required. > N -- print maskz if it's constant 0 operand. > + G -- print embedded flag for ccmp/ctest. > */ > > void > @@ -14083,6 +14100,14 @@ ix86_print_operand (FILE *file, rtx x, int code) > file); > return; > > + case 'G': > + { > + int dfv = INTVAL (x); > + const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv]; > + fputs (dfv_suffix, file); > + } > + return; > + > case 'H': > if (!offsettable_memref_p (x)) > { > @@ -16466,6 +16491,24 @@ ix86_convert_const_vector_to_integer (rtx op, > machine_mode mode) > return val.to_shwi (); > } > > +int ix86_get_flags_cc (rtx_code code) > +{ > + switch (code) > + { > + case NE: return X86_CCNE; > + case EQ: return X86_CCE; > + case GE: return X86_CCNL; > + case GT: return X86_CCNLE; > + case LE: return X86_CCLE; > + case LT: return X86_CCL; > + case GEU: return X86_CCNB; > + case GTU: return X86_CCNBE; > + case LEU: return X86_CCBE; > + case LTU: return X86_CCB; > + default: return -1; > + } > +} > + > /* Return TRUE or FALSE depending on whether the first SET in INSN > has source and destination with matching CC modes, and that the > CC mode is at least as constrained as REQ_MODE. */ > @@ -26765,6 +26808,13 @@ ix86_libgcc_floating_mode_supported_p > #undef TARGET_MEMTAG_TAG_SIZE > #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size > > +#undef TARGET_GEN_CCMP_FIRST > +#define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first > + > +#undef TARGET_GEN_CCMP_NEXT > +#define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next > + > + > static bool > ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED) > { > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index f20ae4726da..5631bc4695a 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -56,6 +56,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. > If not, see > #define TARGET_APX_NDD (ix86_apx_features & apx_ndd) > #define TARGET_APX_PPX (ix86_apx_features & apx_ppx) > #define TARGET_APX_NF (ix86_apx_features & apx_nf) > +#define TARGET_APX_CCMP (ix86_apx_features & apx_ccmp) > > #include "config/vxworks-dummy.h" > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index ddde83e57f5..49978d1f383 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -217,6 +217,10 @@ (define_c_enum "unspec" [ > > ;; For APX PPX support > UNSPEC_APX_PPX > + > + ;; For APX CCMP support > + ;; DFV = default flag value > + UNSPEC_APX_DFV > ]) > > (define_c_enum "unspecv" [ > @@ -1504,6 +1508,25 @@ (define_expand "cstore<mode>4" > DONE; > }) > > +(define_insn "@ccmp<mode>" > + [(set (match_operand:CC 0 "flags_reg_operand") > + (if_then_else:CC > + (match_operator 1 "comparison_operator" > + [(reg:CC FLAGS_REG) (const_int 0)]) > + (compare:CC > + (minus:SWI (match_operand:SWI 2 "nonimmediate_operand" "<r>m,<r>") > + (match_operand:SWI 3 "<general_operand>" > "<r><i>,<r><m>")) > + (const_int 0)) > + (unspec:SI > + [(match_operand:SI 4 "const_0_to_15_operand")] > + UNSPEC_APX_DFV)))] > + "TARGET_APX_CCMP" > + "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}" > + [(set_attr "type" "icmp") > + (set_attr "mode" "<MODE>") > + (set_attr "length_immediate" "1") > + (set_attr "prefix" "evex")]) > + > (define_expand "@cmp<mode>_1" > [(set (reg:CC FLAGS_REG) > (compare:CC (match_operand:SWI48 0 "nonimmediate_operand") > @@ -1850,10 +1873,18 @@ (define_expand "cbranchcc4" > DONE; > }) > > -(define_expand "cstorecc4" > +;; For conditonal compare, the middle-end hook will convert > +;; CCmode to sub-CCmode using SELECT_CC_MODE macro and try > +;; to find cstore<submodes> in optab. Add ALL_CC to support > +;; the cstore after ccmp sequence. > + > +(define_mode_iterator ALL_CC > + [CCGC CCGOC CCNO CCGZ CCA CCC CCO CCP CCS CCZ CC]) > + > +(define_expand "cstore<mode>4" > [(set (match_operand:QI 0 "register_operand") > (match_operator 1 "comparison_operator" > - [(match_operand 2 "flags_reg_operand") > + [(match_operand:ALL_CC 2 "flags_reg_operand") > (match_operand 3 "const0_operand")]))] > "" > { > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt > index 66021d59d4e..7e6fe91d1d6 100644 > --- a/gcc/config/i386/i386.opt > +++ b/gcc/config/i386/i386.opt > @@ -1359,6 +1359,9 @@ Enum(apx_features) String(ppx) Value(apx_ppx) Set(5) > EnumValue > Enum(apx_features) String(nf) Value(apx_nf) Set(6) > > +EnumValue > +Enum(apx_features) String(ccmp) Value(apx_ccmp) Set(7) > + > EnumValue > Enum(apx_features) String(all) Value(apx_all) Set(1) > > diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c > b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c > new file mode 100644 > index 00000000000..5a2dad89f1f > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c > @@ -0,0 +1,63 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mapx-features=ccmp" } */ > + > +int > +f1 (int a) > +{ > + return a < 17 || a == 32; > +} > + > +int > +f2 (int a) > +{ > + return a > 33 || a == 18; > +} > + > +int > +f3 (int a, int b) > +{ > + return a != 19 && b > 34; > +} > + > +int > +f4 (int a, int b) > +{ > + return a < 35 && b == 20; > +} > + > +int > +f5 (short a) > +{ > + return a == 0 || a == 5; > +} > + > +int > +f6 (long long a) > +{ > + return a == 6 || a == 0; > +} > + > +int > +f7 (char a, char b) > +{ > + return a > 0 && b <= 7; > +} > + > +int > +f8 (int a, int b) > +{ > + return a == 9 && b > 0; > +} > + > +int > +f9 (int a, int b) > +{ > + a += b; > + return a == 3 || a == 0; > +} > + > +/* { dg-final { scan-assembler-times "ccmpg" 2 } } */ > +/* { dg-final { scan-assembler-times "ccmple" 2 } } */ > +/* { dg-final { scan-assembler-times "ccmpne" 4 } } */ > +/* { dg-final { scan-assembler-times "ccmpe" 1 } } */ > + > diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c > b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c > new file mode 100644 > index 00000000000..30a1c216c1b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c > @@ -0,0 +1,57 @@ > +/* { dg-do run { target { ! ia32 } } } */ > +/* { dg-require-effective-target apxf } */ > +/* { dg-options "-O3 -mno-apxf" } */ > + > +__attribute__((noinline, noclone, target("apxf"))) > +int foo_apx(int a, int b, int c, int d) > +{ > + int sum = a; > + > + if (a != c) > + { > + c += d; > + a += b; > + sum += a + c; > + if (b != d && sum < c || sum > d) > + { > + b += d; > + sum += b; > + } > + } > + > + return sum; > +} > + > +__attribute__((noinline, noclone, target("no-apxf"))) > +int foo_noapx(int a, int b, int c, int d) > +{ > + int sum = a; > + > + if (a != c) > + { > + c += d; > + a += b; > + sum += a + c; > + if (b != d && sum < c || sum > d) > + { > + b += d; > + sum += b; > + } > + } > + > + return sum; > +} > + > +int main (void) > +{ > + if (!__builtin_cpu_supports ("apxf")) > + return 0; > + > + int val1 = foo_noapx (23, 17, 32, 44); > + int val2 = foo_apx (23, 17, 32, 44); > + > + if (val1 != val2) > + __builtin_abort (); > + > + return 0; > +} > -- > 2.31.1 >
-- BR, Hongtao