On Wed, May 15, 2024 at 4:24 PM Hongyu Wang <hongyu.w...@intel.com> wrote:
>
> APX CCMP feature implements conditional compare which executes compare
> when EFLAGS matches certain condition.
>
> CCMP introduces default flags value (dfv), when conditional compare does
> not execute, it will directly set the flags according to dfv.
>
> The instruction goes like
>
> ccmpeq {dfv=sf,of,cf,zf}  %rax, %r16
>
> For this instruction, it will test EFLAGS regs if it matches conditional
> code EQ, if yes, compare %rax and %r16 like legacy cmp. If no, the
> EFLAGS will be updated according to dfv, which means SF,OF,CF,ZF are
> set. PF will be set according to CF in dfv, and AF will always be
> cleared.
>
> The dfv part can be a combination of sf,of,cf,zf, like {dfv=cf,zf} which
> sets CF and ZF only and clear others, or {dfv=} which clears all EFLAGS.
>
> To enable CCMP, we implemented the target hook TARGET_GEN_CCMP_FIRST and
> TARGET_GEN_CCMP_NEXT to reuse the current ccmp infrastructure. Also we
> extended the cstorem4 optab to support storing different CCmode to fit
> current ccmp infrasturcture.
Ok.
>
> gcc/ChangeLog:
>
>         * config/i386/i386-expand.cc (ix86_gen_ccmp_first): New function
>         that test if the first compare can be generated.
>         (ix86_gen_ccmp_next): New function to emit a simgle compare and ccmp
>         sequence.
>         * config/i386/i386-opts.h (enum apx_features): Add apx_ccmp.
>         * config/i386/i386-protos.h (ix86_gen_ccmp_first): New proto
>         declare.
>         (ix86_gen_ccmp_next): Likewise.
>         (ix86_get_flags_cc): Likewise.
>         * config/i386/i386.cc (ix86_flags_cc): New enum.
>         (ix86_ccmp_dfv_mapping): New string array to map conditional
>         code to dfv.
>         (ix86_print_operand): Handle special dfv flag for CCMP.
>         (ix86_get_flags_cc): New function to return x86 CC enum.
>         (TARGET_GEN_CCMP_FIRST): Define.
>         (TARGET_GEN_CCMP_NEXT): Likewise.
>         * config/i386/i386.h (TARGET_APX_CCMP): Define.
>         * config/i386/i386.md (@ccmp<mode>): New define_insn to support
>         ccmp.
>         (UNSPEC_APX_DFV): New unspec for ccmp dfv.
>         (ALL_CC): New mode iterator.
>         (cstorecc4): Change to ...
>         (cstore<mode>4) ... this, use ALL_CC to loop through all
>         available CCmodes.
>         * config/i386/i386.opt (apx_ccmp): Add enum value for ccmp.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/apx-ccmp-1.c: New compile test.
>         * gcc.target/i386/apx-ccmp-2.c: New runtime test.
> ---
>  gcc/config/i386/i386-expand.cc             | 121 +++++++++++++++++++++
>  gcc/config/i386/i386-opts.h                |   6 +-
>  gcc/config/i386/i386-protos.h              |   5 +
>  gcc/config/i386/i386.cc                    |  50 +++++++++
>  gcc/config/i386/i386.h                     |   1 +
>  gcc/config/i386/i386.md                    |  35 +++++-
>  gcc/config/i386/i386.opt                   |   3 +
>  gcc/testsuite/gcc.target/i386/apx-ccmp-1.c |  63 +++++++++++
>  gcc/testsuite/gcc.target/i386/apx-ccmp-2.c |  57 ++++++++++
>  9 files changed, 337 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ccmp-2.c
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index 1ab22fe7973..f00525e449f 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -25554,4 +25554,125 @@ ix86_expand_fast_convert_bf_to_sf (rtx val)
>    return ret;
>  }
>
> +rtx
> +ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
> +                       rtx_code code, tree treeop0, tree treeop1)
> +{
> +  if (!TARGET_APX_CCMP)
> +    return NULL_RTX;
> +
> +  rtx op0, op1, res;
> +  machine_mode op_mode;
> +
> +  start_sequence ();
> +  expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
> +
> +  op_mode = GET_MODE (op0);
> +  if (op_mode == VOIDmode)
> +    op_mode = GET_MODE (op1);
> +
> +  if (!(op_mode == DImode || op_mode == SImode || op_mode == HImode
> +       || op_mode == QImode))
> +    {
> +      end_sequence ();
> +      return NULL_RTX;
> +    }
> +
> +  /* Canonicalize the operands according to mode.  */
> +  if (!nonimmediate_operand (op0, op_mode))
> +    op0 = force_reg (op_mode, op0);
> +  if (!x86_64_general_operand (op1, op_mode))
> +    op1 = force_reg (op_mode, op1);
> +
> +  *prep_seq = get_insns ();
> +  end_sequence ();
> +
> +  start_sequence ();
> +
> +  res = ix86_expand_compare (code, op0, op1);
> +
> +  if (!res)
> +    {
> +      end_sequence ();
> +      return NULL_RTX;
> +    }
> +  *gen_seq = get_insns ();
> +  end_sequence ();
> +
> +  return res;
> +}
> +
> +rtx
> +ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
> +                      rtx_code cmp_code, tree treeop0, tree treeop1,
> +                      rtx_code bit_code)
> +{
> +  if (!TARGET_APX_CCMP)
> +    return NULL_RTX;
> +
> +  rtx op0, op1, target;
> +  machine_mode op_mode, cmp_mode, cc_mode = CCmode;
> +  int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
> +  insn_code icode;
> +  rtx_code prev_code;
> +  struct expand_operand ops[5];
> +  int dfv;
> +
> +  push_to_sequence (*prep_seq);
> +  expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
> +
> +  cmp_mode = op_mode = GET_MODE (op0);
> +
> +  if (!(op_mode == DImode || op_mode == SImode || op_mode == HImode
> +       || op_mode == QImode))
> +    {
> +      end_sequence ();
> +      return NULL_RTX;
> +    }
> +
> +  icode = code_for_ccmp (op_mode);
> +
> +  op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
> +  op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
> +  if (!op0 || !op1)
> +    {
> +      end_sequence ();
> +      return NULL_RTX;
> +    }
> +
> +  *prep_seq = get_insns ();
> +  end_sequence ();
> +
> +  target = gen_rtx_REG (cc_mode, FLAGS_REG);
> +  dfv = ix86_get_flags_cc ((rtx_code) cmp_code);
> +
> +  prev_code = GET_CODE (prev);
> +
> +  if (bit_code != AND)
> +    prev_code = reverse_condition (prev_code);
> +  else
> +    dfv = (int)(dfv ^ 1);
> +
> +  prev = gen_rtx_fmt_ee (prev_code, VOIDmode, XEXP (prev, 0),
> +                        const0_rtx);
> +
> +  create_fixed_operand (&ops[0], target);
> +  create_fixed_operand (&ops[1], prev);
> +  create_fixed_operand (&ops[2], op0);
> +  create_fixed_operand (&ops[3], op1);
> +  create_fixed_operand (&ops[4], GEN_INT (dfv));
> +
> +  push_to_sequence (*gen_seq);
> +  if (!maybe_expand_insn (icode, 5, ops))
> +    {
> +      end_sequence ();
> +      return NULL_RTX;
> +    }
> +
> +  *gen_seq = get_insns ();
> +  end_sequence ();
> +
> +  return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
> +}
> +
>  #include "gt-i386-expand.h"
> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
> index 60176ce609f..5fcc4927978 100644
> --- a/gcc/config/i386/i386-opts.h
> +++ b/gcc/config/i386/i386-opts.h
> @@ -140,8 +140,10 @@ enum apx_features {
>    apx_push2pop2 = 1 << 1,
>    apx_ndd = 1 << 2,
>    apx_ppx = 1 << 3,
> -  apx_nf = 1<< 4,
> -  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
> +  apx_nf = 1 << 4,
> +  apx_ccmp = 1 << 5,
> +  apx_all = apx_egpr | apx_push2pop2 | apx_ndd
> +           | apx_ppx | apx_nf | apx_ccmp,
>  };
>
>  #endif
> diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> index dbc861fb1ea..26e29df7312 100644
> --- a/gcc/config/i386/i386-protos.h
> +++ b/gcc/config/i386/i386-protos.h
> @@ -242,6 +242,11 @@ extern void ix86_expand_atomic_fetch_op_loop (rtx, rtx, 
> rtx, enum rtx_code,
>  extern void ix86_expand_cmpxchg_loop (rtx *, rtx, rtx, rtx, rtx, rtx,
>                                       bool, rtx_code_label *);
>  extern rtx ix86_expand_fast_convert_bf_to_sf (rtx);
> +extern rtx ix86_gen_ccmp_first (rtx_insn **, rtx_insn **, enum rtx_code,
> +                               tree, tree);
> +extern rtx ix86_gen_ccmp_next (rtx_insn **, rtx_insn **, rtx,
> +                              enum rtx_code, tree, tree, enum rtx_code);
> +extern int ix86_get_flags_cc (enum rtx_code);
>  extern rtx ix86_memtag_untagged_pointer (rtx, rtx);
>  extern bool ix86_memtag_can_tag_addresses (void);
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index b4838b7939e..2363cab1eae 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -433,6 +433,22 @@ static bool i386_asm_output_addr_const_extra (FILE *, 
> rtx);
>  static bool ix86_can_inline_p (tree, tree);
>  static unsigned int ix86_minimum_incoming_stack_boundary (bool);
>
> +typedef enum ix86_flags_cc
> +{
> +  X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB,
> +  X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE,
> +  X86_CCS, X86_CCNS, X86_CCP, X86_CCNP,
> +  X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE
> +} ix86_cc;
> +
> +static const char *ix86_ccmp_dfv_mapping[] =
> +{
> +  "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}",
> +  "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}",
> +  "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}",
> +  "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}"
> +};
> +
>
>  /* Whether -mtune= or -march= were specified */
>  int ix86_tune_defaulted;
> @@ -13690,6 +13706,7 @@ print_reg (rtx x, int code, FILE *file)
>     M -- print addr32 prefix for TARGET_X32 with VSIB address.
>     ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
>     N -- print maskz if it's constant 0 operand.
> +   G -- print embedded flag for ccmp/ctest.
>   */
>
>  void
> @@ -14083,6 +14100,14 @@ ix86_print_operand (FILE *file, rtx x, int code)
>                               file);
>           return;
>
> +       case 'G':
> +         {
> +           int dfv = INTVAL (x);
> +           const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv];
> +           fputs (dfv_suffix, file);
> +         }
> +         return;
> +
>         case 'H':
>           if (!offsettable_memref_p (x))
>             {
> @@ -16466,6 +16491,24 @@ ix86_convert_const_vector_to_integer (rtx op, 
> machine_mode mode)
>    return val.to_shwi ();
>  }
>
> +int ix86_get_flags_cc (rtx_code code)
> +{
> +  switch (code)
> +    {
> +      case NE: return X86_CCNE;
> +      case EQ: return X86_CCE;
> +      case GE: return X86_CCNL;
> +      case GT: return X86_CCNLE;
> +      case LE: return X86_CCLE;
> +      case LT: return X86_CCL;
> +      case GEU: return X86_CCNB;
> +      case GTU: return X86_CCNBE;
> +      case LEU: return X86_CCBE;
> +      case LTU: return X86_CCB;
> +      default: return -1;
> +    }
> +}
> +
>  /* Return TRUE or FALSE depending on whether the first SET in INSN
>     has source and destination with matching CC modes, and that the
>     CC mode is at least as constrained as REQ_MODE.  */
> @@ -26765,6 +26808,13 @@ ix86_libgcc_floating_mode_supported_p
>  #undef TARGET_MEMTAG_TAG_SIZE
>  #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
>
> +#undef TARGET_GEN_CCMP_FIRST
> +#define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
> +
> +#undef TARGET_GEN_CCMP_NEXT
> +#define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next
> +
> +
>  static bool
>  ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
>  {
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index f20ae4726da..5631bc4695a 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -56,6 +56,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
> If not, see
>  #define TARGET_APX_NDD (ix86_apx_features & apx_ndd)
>  #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
>  #define TARGET_APX_NF (ix86_apx_features & apx_nf)
> +#define TARGET_APX_CCMP (ix86_apx_features & apx_ccmp)
>
>  #include "config/vxworks-dummy.h"
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index ddde83e57f5..49978d1f383 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -217,6 +217,10 @@ (define_c_enum "unspec" [
>
>    ;; For APX PPX support
>    UNSPEC_APX_PPX
> +
> +  ;; For APX CCMP support
> +  ;; DFV = default flag value
> +  UNSPEC_APX_DFV
>  ])
>
>  (define_c_enum "unspecv" [
> @@ -1504,6 +1508,25 @@ (define_expand "cstore<mode>4"
>    DONE;
>  })
>
> +(define_insn "@ccmp<mode>"
> + [(set (match_operand:CC 0 "flags_reg_operand")
> +       (if_then_else:CC
> +        (match_operator 1 "comparison_operator"
> +         [(reg:CC FLAGS_REG) (const_int 0)])
> +       (compare:CC
> +         (minus:SWI (match_operand:SWI 2 "nonimmediate_operand" "<r>m,<r>")
> +                    (match_operand:SWI 3 "<general_operand>" 
> "<r><i>,<r><m>"))
> +         (const_int 0))
> +       (unspec:SI
> +         [(match_operand:SI 4 "const_0_to_15_operand")]
> +         UNSPEC_APX_DFV)))]
> + "TARGET_APX_CCMP"
> + "ccmp%C1{<imodesuffix>}\t%G4 {%3, %2|%2, %3}"
> + [(set_attr "type" "icmp")
> +  (set_attr "mode" "<MODE>")
> +  (set_attr "length_immediate" "1")
> +  (set_attr "prefix" "evex")])
> +
>  (define_expand "@cmp<mode>_1"
>    [(set (reg:CC FLAGS_REG)
>         (compare:CC (match_operand:SWI48 0 "nonimmediate_operand")
> @@ -1850,10 +1873,18 @@ (define_expand "cbranchcc4"
>    DONE;
>  })
>
> -(define_expand "cstorecc4"
> +;; For conditonal compare, the middle-end hook will convert
> +;; CCmode to sub-CCmode using SELECT_CC_MODE macro and try
> +;; to find cstore<submodes> in optab. Add ALL_CC to support
> +;; the cstore after ccmp sequence.
> +
> +(define_mode_iterator ALL_CC
> + [CCGC CCGOC CCNO CCGZ CCA CCC CCO CCP CCS CCZ CC])
> +
> +(define_expand "cstore<mode>4"
>    [(set (match_operand:QI 0 "register_operand")
>                (match_operator 1 "comparison_operator"
> -               [(match_operand 2 "flags_reg_operand")
> +               [(match_operand:ALL_CC 2 "flags_reg_operand")
>                  (match_operand 3 "const0_operand")]))]
>    ""
>  {
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index 66021d59d4e..7e6fe91d1d6 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1359,6 +1359,9 @@ Enum(apx_features) String(ppx) Value(apx_ppx) Set(5)
>  EnumValue
>  Enum(apx_features) String(nf) Value(apx_nf) Set(6)
>
> +EnumValue
> +Enum(apx_features) String(ccmp) Value(apx_ccmp) Set(7)
> +
>  EnumValue
>  Enum(apx_features) String(all) Value(apx_all) Set(1)
>
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c 
> b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
> new file mode 100644
> index 00000000000..5a2dad89f1f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
> @@ -0,0 +1,63 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mapx-features=ccmp" } */
> +
> +int
> +f1 (int a)
> +{
> +  return a < 17 || a == 32;
> +}
> +
> +int
> +f2 (int a)
> +{
> +  return a > 33 || a == 18;
> +}
> +
> +int
> +f3 (int a, int b)
> +{
> +  return a != 19 && b > 34;
> +}
> +
> +int
> +f4 (int a, int b)
> +{
> +  return a < 35 && b == 20;
> +}
> +
> +int
> +f5 (short a)
> +{
> +  return a == 0 || a == 5;
> +}
> +
> +int
> +f6 (long long a)
> +{
> +  return a == 6 || a == 0;
> +}
> +
> +int
> +f7 (char a, char b)
> +{
> +  return a > 0 && b <= 7;
> +}
> +
> +int
> +f8 (int a, int b)
> +{
> +  return a == 9 && b > 0;
> +}
> +
> +int
> +f9 (int a, int b)
> +{
> +  a += b;
> +  return a == 3 || a == 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "ccmpg" 2 } } */
> +/* { dg-final { scan-assembler-times "ccmple" 2 } } */
> +/* { dg-final { scan-assembler-times "ccmpne" 4 } } */
> +/* { dg-final { scan-assembler-times "ccmpe" 1 } } */
> +
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c 
> b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c
> new file mode 100644
> index 00000000000..30a1c216c1b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c
> @@ -0,0 +1,57 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-require-effective-target apxf } */
> +/* { dg-options "-O3 -mno-apxf" } */
> +
> +__attribute__((noinline, noclone, target("apxf")))
> +int foo_apx(int a, int b, int c, int d)
> +{
> +  int sum = a;
> +
> +  if (a != c)
> +    {
> +      c += d;
> +      a += b;
> +      sum += a + c;
> +      if (b != d && sum < c || sum > d)
> +       {
> +         b += d;
> +         sum += b;
> +       }
> +    }
> +
> +  return sum;
> +}
> +
> +__attribute__((noinline, noclone, target("no-apxf")))
> +int foo_noapx(int a, int b, int c, int d)
> +{
> +  int sum = a;
> +
> +  if (a != c)
> +    {
> +      c += d;
> +      a += b;
> +      sum += a + c;
> +      if (b != d && sum < c || sum > d)
> +       {
> +         b += d;
> +         sum += b;
> +       }
> +    }
> +
> +  return sum;
> +}
> +
> +int main (void)
> +{
> +  if (!__builtin_cpu_supports ("apxf"))
> +    return 0;
> +
> +  int val1 = foo_noapx (23, 17, 32, 44);
> +  int val2 = foo_apx (23, 17, 32, 44);
> +
> +  if (val1 != val2)
> +    __builtin_abort ();
> +
> +  return 0;
> +}
> --
> 2.31.1
>


-- 
BR,
Hongtao

Reply via email to