From: Pan Li <pan2...@intel.com>

Hi Richard & Tamar,

Try the DEF_INTERNAL_INT_EXT_FN as your suggestion.  By mapping
us_plus$a3 to the RTL representation (us_plus:m x y) in optabs.def.
And then expand_US_PLUS in internal-fn.cc.  Not very sure if my
understanding is correct for DEF_INTERNAL_INT_EXT_FN.

I am not sure if we still need DEF_INTERNAL_SIGNED_OPTAB_FN here, given
the RTL representation has (ss_plus:m x y) and (us_plus:m x y) already.

Note this patch is a draft for validation, no test are invovled here.

gcc/ChangeLog:

        * builtins.def (BUILT_IN_US_PLUS): Add builtin def.
        (BUILT_IN_US_PLUSIMAX): Ditto.
        (BUILT_IN_US_PLUSL): Ditto.
        (BUILT_IN_US_PLUSLL): Ditto.
        (BUILT_IN_US_PLUSG): Ditto.
        * config/riscv/riscv-protos.h (riscv_expand_us_plus): Add new
        func decl for expanding us_plus.
        * config/riscv/riscv.cc (riscv_expand_us_plus): Add new func
        impl for expanding us_plus.
        * config/riscv/riscv.md (us_plus<mode>3): Add new pattern impl
        us_plus<mode>3.
        * internal-fn.cc (expand_US_PLUS): Add new func impl to expand
        US_PLUS.
        * internal-fn.def (US_PLUS): Add new INT_EXT_FN.
        * internal-fn.h (expand_US_PLUS): Add new func decl.
        * match.pd: Add new simplify pattern for us_plus.
        * optabs.def (OPTAB_NL): Add new OPTAB_NL to US_PLUS rtl.

Signed-off-by: Pan Li <pan2...@intel.com>
---
 gcc/builtins.def                |  7 +++++
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv.cc       | 46 +++++++++++++++++++++++++++++++++
 gcc/config/riscv/riscv.md       | 11 ++++++++
 gcc/internal-fn.cc              | 26 +++++++++++++++++++
 gcc/internal-fn.def             |  3 +++
 gcc/internal-fn.h               |  1 +
 gcc/match.pd                    | 17 ++++++++++++
 gcc/optabs.def                  |  2 ++
 9 files changed, 114 insertions(+)

diff --git a/gcc/builtins.def b/gcc/builtins.def
index f6f3e104f6a..0777b912cfa 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -1055,6 +1055,13 @@ DEF_GCC_BUILTIN        (BUILT_IN_POPCOUNTIMAX, 
"popcountimax", BT_FN_INT_UINTMAX
 DEF_GCC_BUILTIN        (BUILT_IN_POPCOUNTL, "popcountl", BT_FN_INT_ULONG, 
ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_POPCOUNTLL, "popcountll", 
BT_FN_INT_ULONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_POPCOUNTG, "popcountg", BT_FN_INT_VAR, 
ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
+
+DEF_GCC_BUILTIN        (BUILT_IN_US_PLUS, "us_plus", BT_FN_INT_UINT, 
ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN        (BUILT_IN_US_PLUSIMAX, "us_plusimax", 
BT_FN_INT_UINTMAX, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN        (BUILT_IN_US_PLUSL, "us_plusl", BT_FN_INT_ULONG, 
ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN        (BUILT_IN_US_PLUSLL, "us_plusll", BT_FN_INT_ULONGLONG, 
ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN        (BUILT_IN_US_PLUSG, "us_plusg", BT_FN_INT_VAR, 
ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
+
 DEF_EXT_LIB_BUILTIN    (BUILT_IN_POSIX_MEMALIGN, "posix_memalign", 
BT_FN_INT_PTRPTR_SIZE_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
 DEF_GCC_BUILTIN        (BUILT_IN_PREFETCH, "prefetch", 
BT_FN_VOID_CONST_PTR_VAR, ATTR_NOVOPS_LEAF_LIST)
 DEF_LIB_BUILTIN        (BUILT_IN_REALLOC, "realloc", BT_FN_PTR_PTR_SIZE, 
ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LEAF_LIST)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 80efdf2b7e5..ba6086f1f25 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -132,6 +132,7 @@ extern void riscv_asm_output_external (FILE *, const tree, 
const char *);
 extern bool
 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
 extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
+extern void riscv_expand_us_plus (rtx, rtx, rtx);
 
 #ifdef RTX_CODE
 extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool 
*invert_ptr = 0);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 4100abc9dd1..23f08974f07 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -10657,6 +10657,52 @@ riscv_vector_mode_supported_any_target_p (machine_mode)
   return true;
 }
 
+/* Emit insn for the saturation addu, aka (x + y) | - ((x + y) < x).  */
+void
+riscv_expand_us_plus (rtx dest, rtx x, rtx y)
+{
+  machine_mode mode = GET_MODE (dest);
+  rtx pmode_sum = gen_reg_rtx (Pmode);
+  rtx pmode_lt = gen_reg_rtx (Pmode);
+  rtx pmode_x = gen_lowpart (Pmode, x);
+  rtx pmode_y = gen_lowpart (Pmode, y);
+  rtx pmode_dest = gen_reg_rtx (Pmode);
+
+  /* Step-1: sum = x + y  */
+  if (mode == SImode && mode != Pmode)
+    { /* Take addw to avoid the sum truncate.  */
+      rtx simode_sum = gen_reg_rtx (SImode);
+      riscv_emit_binary (PLUS, simode_sum, x, y);
+      emit_move_insn (pmode_sum, gen_lowpart (Pmode, simode_sum));
+    }
+  else
+    riscv_emit_binary (PLUS, pmode_sum, pmode_x, pmode_y);
+
+  /* Step-1.1: truncate sum for HI and QI as we have no insn for add QI/HI.  */
+  if (mode == HImode || mode == QImode)
+    {
+      int mode_bits = GET_MODE_BITSIZE (mode).to_constant ();
+      int shift_bits = GET_MODE_BITSIZE (Pmode) - mode_bits;
+
+      gcc_assert (shift_bits > 0);
+
+      riscv_emit_binary (ASHIFT, pmode_sum, pmode_sum, GEN_INT (shift_bits));
+      riscv_emit_binary (LSHIFTRT, pmode_sum, pmode_sum, GEN_INT (shift_bits));
+    }
+
+  /* Step-2: lt = sum < x  */
+  riscv_emit_binary (LTU, pmode_lt, pmode_sum, pmode_x);
+
+  /* Step-3: lt = -lt  */
+  riscv_emit_unary (NEG, pmode_lt, pmode_lt);
+
+  /* Step-4: pmode_dest = sum | lt  */
+  riscv_emit_binary (IOR, pmode_dest, pmode_lt, pmode_sum);
+
+  /* Step-5: dest = pmode_dest */
+  emit_move_insn (dest, gen_lowpart (mode, pmode_dest));
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 3f7a023d941..eaa9867023c 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3841,6 +3841,17 @@ (define_insn "*large_load_address"
   [(set_attr "type" "load")
    (set (attr "length") (const_int 8))])
 
+(define_expand "us_plus<mode>3"
+  [(match_operand:ANYI   0 "register_operand")
+   (match_operand:ANYI   1 "register_operand")
+   (match_operand:ANYI   2 "register_operand")]
+  ""
+  {
+    riscv_expand_us_plus (operands[0], operands[1], operands[2]);
+    DONE;
+  }
+)
+
 (include "bitmanip.md")
 (include "crypto.md")
 (include "sync.md")
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index a07f25f3aee..a7341a57ffa 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -5177,3 +5177,29 @@ expand_POPCOUNT (internal_fn fn, gcall *stmt)
       emit_move_insn (plhs, cmp);
     }
 }
+
+void
+expand_US_PLUS (internal_fn fn, gcall *stmt)
+{
+  tree lhs = gimple_call_lhs (stmt);
+  tree rhs_0 = gimple_call_arg (stmt, 0);
+  tree rhs_1 = gimple_call_arg (stmt, 1);
+
+  do_pending_stack_adjust ();
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  rtx op_0 = expand_normal (rhs_0);
+  rtx op_1 = expand_normal (rhs_1);
+
+  class expand_operand ops[3];
+
+  create_output_operand (&ops[0], target, TYPE_MODE (TREE_TYPE (lhs)));
+  create_output_operand (&ops[1], op_0, TYPE_MODE (TREE_TYPE (rhs_0)));
+  create_output_operand (&ops[2], op_1, TYPE_MODE (TREE_TYPE (rhs_1)));
+
+  insn_code code = optab_handler (us_plus_optab, TYPE_MODE (TREE_TYPE 
(rhs_0)));
+  expand_insn (code, 3, ops);
+
+  if (!rtx_equal_p (target, ops[0].value))
+    emit_move_insn (target, ops[0].value);
+}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index c14d30365c1..b1d7b5a0307 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -447,6 +447,9 @@ DEF_INTERNAL_INT_FN (FFS, ECF_CONST | ECF_NOTHROW, ffs, 
unary)
 DEF_INTERNAL_INT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary)
 DEF_INTERNAL_INT_EXT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary)
 
+/* Binary integer ops.  */
+DEF_INTERNAL_INT_EXT_FN (US_PLUS, ECF_CONST | ECF_NOTHROW, us_plus, binary)
+
 DEF_INTERNAL_FN (GOMP_TARGET_REV, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index bccee1c3e09..46e404b4a49 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -263,6 +263,7 @@ extern void expand_DIVMODBITINT (internal_fn, gcall *);
 extern void expand_FLOATTOBITINT (internal_fn, gcall *);
 extern void expand_BITINTTOFLOAT (internal_fn, gcall *);
 extern void expand_POPCOUNT (internal_fn, gcall *);
+extern void expand_US_PLUS (internal_fn, gcall *);
 
 extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
 
diff --git a/gcc/match.pd b/gcc/match.pd
index c5b6540f939..f45fd58ad23 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -10265,3 +10265,20 @@ and,
       }
       (if (full_perm_p)
        (vec_perm (op@3 @0 @1) @3 @2))))))
+
+#if GIMPLE
+
+/* Unsigned saturation add, aka:
+   SAT_ADDU = (X + Y) | - ((X + Y) < X) or
+   SAT_ADDU = (X + Y) | - ((X + Y) < Y).  */
+(simplify
+ (bit_ior:c (plus:c@2 @0 @1) (negate (convert (lt @2 @0))))
+   (if (optimize
+       && INTEGRAL_TYPE_P (type)
+       && TYPE_UNSIGNED (TREE_TYPE (@0))
+       && types_match (type, TREE_TYPE (@0))
+       && types_match (type, TREE_TYPE (@1))
+       && direct_internal_fn_supported_p (IFN_US_PLUS, type, 
OPTIMIZE_FOR_BOTH))
+   (IFN_US_PLUS @0 @1)))
+
+#endif
diff --git a/gcc/optabs.def b/gcc/optabs.def
index ad14f9328b9..5855c4e0834 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -179,6 +179,8 @@ OPTAB_NL(clrsb_optab, "clrsb$a2", CLRSB, "clrsb", '2', 
gen_int_libfunc)
 OPTAB_NL(popcount_optab, "popcount$a2", POPCOUNT, "popcount", '2', 
gen_int_libfunc)
 OPTAB_NL(parity_optab, "parity$a2", PARITY, "parity", '2', gen_int_libfunc)
 
+OPTAB_NL(us_plus_optab, "us_plus$a3", US_PLUS, "us_plus", '3', gen_int_libfunc)
+
 /* Comparison libcalls for integers MUST come in pairs, signed/unsigned.  */
 OPTAB_NL(cmp_optab, NULL, UNKNOWN, "cmp", '2', gen_int_fp_fixed_libfunc)
 OPTAB_NL(ucmp_optab, NULL, UNKNOWN, "ucmp", '2', gen_int_libfunc)
-- 
2.34.1

Reply via email to