Hi Pan, > From: Pan Li <pan2...@intel.com> > > Hi Richard & Tamar, > > Try the DEF_INTERNAL_INT_EXT_FN as your suggestion. By mapping > us_plus$a3 to the RTL representation (us_plus:m x y) in optabs.def. > And then expand_US_PLUS in internal-fn.cc. Not very sure if my > understanding is correct for DEF_INTERNAL_INT_EXT_FN. > > I am not sure if we still need DEF_INTERNAL_SIGNED_OPTAB_FN here, given > the RTL representation has (ss_plus:m x y) and (us_plus:m x y) already. >
I think a couple of things are being confused here. So lets break it down: The reason for DEF_INTERNAL_SIGNED_OPTAB_FN is because in GIMPLE we only want one internal function for both signed and unsigned SAT_ADD. with this definition we don't need SAT_UADD and SAT_SADD but instead we will only have SAT_ADD, which will expand to us_plus or ss_plus. Now the downside of this is that this is a direct internal optab. This means that for the representation to be used the target *must* have the optab implemented. This is a bit annoying because it doesn't allow us to generically assume that all targets use SAT_ADD for saturating add and thus only have to write optimization for this representation. This is why Richi said we may need to use a new tree_code because we can override tree code expansions. However the same can be done with the _EXT_FN internal functions. So what I meant was that we want to have a combination of the two. i.e. a DEF_INTERNAL_SIGNED_OPTAB_EXT_FN. If Richi agrees, the below is what I meant. It creates the infrastructure for this and for now only allows a default fallback for unsigned saturating add and makes it easier for us to add the rest later Also, unless I'm wrong (and Richi can correct me here), us_plus and ss_plus are the RTL expression, but the optab for saturation are ssadd and usadd. So you don't need to make new us_plus and ss_plus ones. diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index a07f25f3aee..aaf9f8991b3 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4103,6 +4103,17 @@ direct_internal_fn_supported_p (internal_fn fn, tree_pair types, return direct_##TYPE##_optab_supported_p (which_optab, types, \ opt_type); \ } +#define DEF_INTERNAL_SIGNED_OPTAB_EXT_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \ + UNSIGNED_OPTAB, TYPE) \ + case IFN_##CODE: \ + { \ + optab which_optab = (TYPE_UNSIGNED (types.SELECTOR) \ + ? UNSIGNED_OPTAB ## _optab \ + : SIGNED_OPTAB ## _optab); \ + return direct_##TYPE##_optab_supported_p (which_optab, types, \ + opt_type) \ + || internal_##CODE##_fn_supported_p (types.SELECTOR, opt_type); \ + } #include "internal-fn.def" case IFN_LAST: @@ -4303,6 +4314,8 @@ set_edom_supported_p (void) optab which_optab = direct_internal_fn_optab (fn, types); \ expand_##TYPE##_optab_fn (fn, stmt, which_optab); \ } +#define DEF_INTERNAL_SIGNED_OPTAB_EXT_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \ + UNSIGNED_OPTAB, TYPE) #include "internal-fn.def" /* Routines to expand each internal function, indexed by function number. @@ -5177,3 +5190,45 @@ expand_POPCOUNT (internal_fn fn, gcall *stmt) emit_move_insn (plhs, cmp); } } + +void +expand_SAT_ADD (internal_fn fn, gcall *stmt) +{ + /* Check if the target supports the expansion through an IFN. */ + tree_pair types = direct_internal_fn_types (fn, stmt); + optab which_optab = direct_internal_fn_optab (fn, types); + if (direct_binary_optab_supported_p (which_optab, types, + insn_optimization_type ())) + { + expand_binary_optab_fn (fn, stmt, which_optab); + return; + } + + /* Target does not support the optab, but we can de-compose it. */ + /* + ... decompose to a canonical representation ... + if (TYPE_UNSIGNED (types.SELECTOR)) + { + ... + decompose back to (X + Y) | - ((X + Y) < X) + } + else + { + ... + } + */ +} + +bool internal_SAT_ADD_fn_supported_p (tree type, optimization_type /* optype */) +{ + /* For now, don't support decomposing vector ops. */ + if (VECTOR_TYPE_P (type)) + return false; + + /* Signed saturating arithmetic is harder to do since we'll so for now + lets ignore. */ + if (!TYPE_UNSIGNED (type)) + return false; + + return TREE_CODE (type) == INTEGER_TYPE; +} \ No newline at end of file diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index c14d30365c1..5a2491228d5 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -92,6 +92,10 @@ along with GCC; see the file COPYING3. If not see unsigned inputs respectively, both without the trailing "_optab". SELECTOR says which type in the tree_pair determines the signedness. + DEF_INTERNAL_SIGNED_OPTAB_EXT_FN is like DEF_INTERNAL_SIGNED_OPTAB_FN, except + that it has expand_##NAME defined in internal-fn.cc to override the + DEF_INTERNAL_SIGNED_OPTAB_FN expansion behavior. + DEF_INTERNAL_FLT_FN is like DEF_INTERNAL_OPTAB_FN, but in addition, the function implements the computational part of a built-in math function BUILT_IN_<NAME>{F,,L}. Unlike some built-in functions, @@ -153,6 +157,13 @@ along with GCC; see the file COPYING3. If not see DEF_INTERNAL_FN (NAME, FLAGS | ECF_LEAF, NULL) #endif +#ifndef DEF_INTERNAL_SIGNED_OPTAB_EXT_FN +#define DEF_INTERNAL_SIGNED_OPTAB_EXT_FN(NAME, FLAGS, SELECTOR, SIGNED_OPTAB, \ + UNSIGNED_OPTAB, TYPE) \ + DEF_INTERNAL_SIGNED_OPTAB_FN (NAME, FLAGS, SELECTOR, SIGNED_OPTAB, \ + UNSIGNED_OPTAB, TYPE) +#endif + #ifndef DEF_INTERNAL_FLT_FN #define DEF_INTERNAL_FLT_FN(NAME, FLAGS, OPTAB, TYPE) \ DEF_INTERNAL_OPTAB_FN (NAME, FLAGS, OPTAB, TYPE) @@ -274,6 +285,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | ECF_NOTHROW, first, smulhs, umulhs, binary) DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first, smulhrs, umulhrs, binary) +DEF_INTERNAL_SIGNED_OPTAB_EXT_FN (SAT_ADD, ECF_CONST | ECF_NOTHROW, first, + ssadd, usadd, binary) DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) @@ -593,5 +606,6 @@ DEF_INTERNAL_FN (BITINTTOFLOAT, ECF_PURE | ECF_LEAF, ". R . ") #undef DEF_INTERNAL_FLT_FN #undef DEF_INTERNAL_FLT_FLOATN_FN #undef DEF_INTERNAL_SIGNED_OPTAB_FN +#undef DEF_INTERNAL_SIGNED_OPTAB_EXT_FN #undef DEF_INTERNAL_OPTAB_FN #undef DEF_INTERNAL_FN diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index bccee1c3e09..dbdb1e6bad2 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -263,6 +263,8 @@ extern void expand_DIVMODBITINT (internal_fn, gcall *); extern void expand_FLOATTOBITINT (internal_fn, gcall *); extern void expand_BITINTTOFLOAT (internal_fn, gcall *); extern void expand_POPCOUNT (internal_fn, gcall *); +extern void expand_SAT_ADD (internal_fn, gcall *); +extern bool internal_SAT_ADD_fn_supported_p (tree, optimization_type); extern bool vectorized_internal_fn_supported_p (internal_fn, tree); > Note this patch is a draft for validation, no test are invovled here. > > gcc/ChangeLog: > > * builtins.def (BUILT_IN_US_PLUS): Add builtin def. > (BUILT_IN_US_PLUSIMAX): Ditto. > (BUILT_IN_US_PLUSL): Ditto. > (BUILT_IN_US_PLUSLL): Ditto. > (BUILT_IN_US_PLUSG): Ditto. > * config/riscv/riscv-protos.h (riscv_expand_us_plus): Add new > func decl for expanding us_plus. > * config/riscv/riscv.cc (riscv_expand_us_plus): Add new func > impl for expanding us_plus. > * config/riscv/riscv.md (us_plus<mode>3): Add new pattern impl > us_plus<mode>3. > * internal-fn.cc (expand_US_PLUS): Add new func impl to expand > US_PLUS. > * internal-fn.def (US_PLUS): Add new INT_EXT_FN. > * internal-fn.h (expand_US_PLUS): Add new func decl. > * match.pd: Add new simplify pattern for us_plus. > * optabs.def (OPTAB_NL): Add new OPTAB_NL to US_PLUS rtl. > > Signed-off-by: Pan Li <pan2...@intel.com> > --- > gcc/builtins.def | 7 +++++ > gcc/config/riscv/riscv-protos.h | 1 + > gcc/config/riscv/riscv.cc | 46 +++++++++++++++++++++++++++++++++ > gcc/config/riscv/riscv.md | 11 ++++++++ > gcc/internal-fn.cc | 26 +++++++++++++++++++ > gcc/internal-fn.def | 3 +++ > gcc/internal-fn.h | 1 + > gcc/match.pd | 17 ++++++++++++ > gcc/optabs.def | 2 ++ > 9 files changed, 114 insertions(+) > > diff --git a/gcc/builtins.def b/gcc/builtins.def > index f6f3e104f6a..0777b912cfa 100644 > --- a/gcc/builtins.def > +++ b/gcc/builtins.def > @@ -1055,6 +1055,13 @@ DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTIMAX, > "popcountimax", BT_FN_INT_UINTMAX > DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTL, "popcountl", BT_FN_INT_ULONG, > ATTR_CONST_NOTHROW_LEAF_LIST) > DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTLL, "popcountll", > BT_FN_INT_ULONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST) > DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTG, "popcountg", BT_FN_INT_VAR, > ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF) > + > +DEF_GCC_BUILTIN (BUILT_IN_US_PLUS, "us_plus", BT_FN_INT_UINT, > ATTR_CONST_NOTHROW_LEAF_LIST) > +DEF_GCC_BUILTIN (BUILT_IN_US_PLUSIMAX, "us_plusimax", > BT_FN_INT_UINTMAX, ATTR_CONST_NOTHROW_LEAF_LIST) > +DEF_GCC_BUILTIN (BUILT_IN_US_PLUSL, "us_plusl", BT_FN_INT_ULONG, > ATTR_CONST_NOTHROW_LEAF_LIST) > +DEF_GCC_BUILTIN (BUILT_IN_US_PLUSLL, "us_plusll", > BT_FN_INT_ULONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST) > +DEF_GCC_BUILTIN (BUILT_IN_US_PLUSG, "us_plusg", BT_FN_INT_VAR, > ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF) > + > DEF_EXT_LIB_BUILTIN (BUILT_IN_POSIX_MEMALIGN, "posix_memalign", > BT_FN_INT_PTRPTR_SIZE_SIZE, ATTR_NOTHROW_NONNULL_LEAF) > DEF_GCC_BUILTIN (BUILT_IN_PREFETCH, "prefetch", > BT_FN_VOID_CONST_PTR_VAR, ATTR_NOVOPS_LEAF_LIST) > DEF_LIB_BUILTIN (BUILT_IN_REALLOC, "realloc", BT_FN_PTR_PTR_SIZE, > ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LEAF_LIST) > diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h > index 80efdf2b7e5..ba6086f1f25 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -132,6 +132,7 @@ extern void riscv_asm_output_external (FILE *, const tree, > const char *); > extern bool > riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int); > extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx); > +extern void riscv_expand_us_plus (rtx, rtx, rtx); > > #ifdef RTX_CODE > extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool > *invert_ptr = > 0); > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc > index 4100abc9dd1..23f08974f07 100644 > --- a/gcc/config/riscv/riscv.cc > +++ b/gcc/config/riscv/riscv.cc > @@ -10657,6 +10657,52 @@ riscv_vector_mode_supported_any_target_p > (machine_mode) > return true; > } > > +/* Emit insn for the saturation addu, aka (x + y) | - ((x + y) < x). */ > +void > +riscv_expand_us_plus (rtx dest, rtx x, rtx y) > +{ > + machine_mode mode = GET_MODE (dest); > + rtx pmode_sum = gen_reg_rtx (Pmode); > + rtx pmode_lt = gen_reg_rtx (Pmode); > + rtx pmode_x = gen_lowpart (Pmode, x); > + rtx pmode_y = gen_lowpart (Pmode, y); > + rtx pmode_dest = gen_reg_rtx (Pmode); > + > + /* Step-1: sum = x + y */ > + if (mode == SImode && mode != Pmode) > + { /* Take addw to avoid the sum truncate. */ > + rtx simode_sum = gen_reg_rtx (SImode); > + riscv_emit_binary (PLUS, simode_sum, x, y); > + emit_move_insn (pmode_sum, gen_lowpart (Pmode, simode_sum)); > + } > + else > + riscv_emit_binary (PLUS, pmode_sum, pmode_x, pmode_y); > + > + /* Step-1.1: truncate sum for HI and QI as we have no insn for add QI/HI. > */ > + if (mode == HImode || mode == QImode) > + { > + int mode_bits = GET_MODE_BITSIZE (mode).to_constant (); > + int shift_bits = GET_MODE_BITSIZE (Pmode) - mode_bits; > + > + gcc_assert (shift_bits > 0); > + > + riscv_emit_binary (ASHIFT, pmode_sum, pmode_sum, GEN_INT (shift_bits)); > + riscv_emit_binary (LSHIFTRT, pmode_sum, pmode_sum, GEN_INT > (shift_bits)); > + } > + > + /* Step-2: lt = sum < x */ > + riscv_emit_binary (LTU, pmode_lt, pmode_sum, pmode_x); > + > + /* Step-3: lt = -lt */ > + riscv_emit_unary (NEG, pmode_lt, pmode_lt); > + > + /* Step-4: pmode_dest = sum | lt */ > + riscv_emit_binary (IOR, pmode_dest, pmode_lt, pmode_sum); > + > + /* Step-5: dest = pmode_dest */ > + emit_move_insn (dest, gen_lowpart (mode, pmode_dest)); > +} > + > /* Initialize the GCC target structure. */ > #undef TARGET_ASM_ALIGNED_HI_OP > #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" > diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md > index 3f7a023d941..eaa9867023c 100644 > --- a/gcc/config/riscv/riscv.md > +++ b/gcc/config/riscv/riscv.md > @@ -3841,6 +3841,17 @@ (define_insn "*large_load_address" > [(set_attr "type" "load") > (set (attr "length") (const_int 8))]) > > +(define_expand "us_plus<mode>3" > + [(match_operand:ANYI 0 "register_operand") > + (match_operand:ANYI 1 "register_operand") > + (match_operand:ANYI 2 "register_operand")] > + "" > + { > + riscv_expand_us_plus (operands[0], operands[1], operands[2]); > + DONE; > + } > +) > + > (include "bitmanip.md") > (include "crypto.md") > (include "sync.md") > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > index a07f25f3aee..a7341a57ffa 100644 > --- a/gcc/internal-fn.cc > +++ b/gcc/internal-fn.cc > @@ -5177,3 +5177,29 @@ expand_POPCOUNT (internal_fn fn, gcall *stmt) > emit_move_insn (plhs, cmp); > } > } > + > +void > +expand_US_PLUS (internal_fn fn, gcall *stmt) > +{ > + tree lhs = gimple_call_lhs (stmt); > + tree rhs_0 = gimple_call_arg (stmt, 0); > + tree rhs_1 = gimple_call_arg (stmt, 1); > + > + do_pending_stack_adjust (); > + > + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); > + rtx op_0 = expand_normal (rhs_0); > + rtx op_1 = expand_normal (rhs_1); > + > + class expand_operand ops[3]; > + > + create_output_operand (&ops[0], target, TYPE_MODE (TREE_TYPE (lhs))); > + create_output_operand (&ops[1], op_0, TYPE_MODE (TREE_TYPE (rhs_0))); > + create_output_operand (&ops[2], op_1, TYPE_MODE (TREE_TYPE (rhs_1))); > + > + insn_code code = optab_handler (us_plus_optab, TYPE_MODE (TREE_TYPE > (rhs_0))); > + expand_insn (code, 3, ops); > + > + if (!rtx_equal_p (target, ops[0].value)) > + emit_move_insn (target, ops[0].value); > +} This can be simplified by calling expand_binary_optab_fn instead. See my template > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index c14d30365c1..b1d7b5a0307 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -447,6 +447,9 @@ DEF_INTERNAL_INT_FN (FFS, ECF_CONST | > ECF_NOTHROW, ffs, unary) > DEF_INTERNAL_INT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary) > DEF_INTERNAL_INT_EXT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, > popcount, unary) > > +/* Binary integer ops. */ > +DEF_INTERNAL_INT_EXT_FN (US_PLUS, ECF_CONST | ECF_NOTHROW, us_plus, > binary) > + > DEF_INTERNAL_FN (GOMP_TARGET_REV, ECF_NOVOPS | ECF_LEAF | > ECF_NOTHROW, NULL) > DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | > ECF_NOTHROW, NULL) > DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL) > diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h > index bccee1c3e09..46e404b4a49 100644 > --- a/gcc/internal-fn.h > +++ b/gcc/internal-fn.h > @@ -263,6 +263,7 @@ extern void expand_DIVMODBITINT (internal_fn, gcall *); > extern void expand_FLOATTOBITINT (internal_fn, gcall *); > extern void expand_BITINTTOFLOAT (internal_fn, gcall *); > extern void expand_POPCOUNT (internal_fn, gcall *); > +extern void expand_US_PLUS (internal_fn, gcall *); > > extern bool vectorized_internal_fn_supported_p (internal_fn, tree); > > diff --git a/gcc/match.pd b/gcc/match.pd > index c5b6540f939..f45fd58ad23 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -10265,3 +10265,20 @@ and, > } > (if (full_perm_p) > (vec_perm (op@3 @0 @1) @3 @2)))))) > + > +#if GIMPLE > + > +/* Unsigned saturation add, aka: > + SAT_ADDU = (X + Y) | - ((X + Y) < X) or > + SAT_ADDU = (X + Y) | - ((X + Y) < Y). */ > +(simplify > + (bit_ior:c (plus:c@2 @0 @1) (negate (convert (lt @2 @0)))) > + (if (optimize > + && INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1)) > + && direct_internal_fn_supported_p (IFN_US_PLUS, type, > OPTIMIZE_FOR_BOTH)) > + (IFN_US_PLUS @0 @1))) > + > +#endif With the version above you can drop the #if GIMPLE and the > + && direct_internal_fn_supported_p (IFN_US_PLUS, type, Check. Thanks, Tamar > diff --git a/gcc/optabs.def b/gcc/optabs.def > index ad14f9328b9..5855c4e0834 100644 > --- a/gcc/optabs.def > +++ b/gcc/optabs.def > @@ -179,6 +179,8 @@ OPTAB_NL(clrsb_optab, "clrsb$a2", CLRSB, "clrsb", '2', > gen_int_libfunc) > OPTAB_NL(popcount_optab, "popcount$a2", POPCOUNT, "popcount", '2', > gen_int_libfunc) > OPTAB_NL(parity_optab, "parity$a2", PARITY, "parity", '2', gen_int_libfunc) > > +OPTAB_NL(us_plus_optab, "us_plus$a3", US_PLUS, "us_plus", '3', > gen_int_libfunc) > + > /* Comparison libcalls for integers MUST come in pairs, signed/unsigned. */ > OPTAB_NL(cmp_optab, NULL, UNKNOWN, "cmp", '2', gen_int_fp_fixed_libfunc) > OPTAB_NL(ucmp_optab, NULL, UNKNOWN, "ucmp", '2', gen_int_libfunc) > -- > 2.34.1
sat-template.diff
Description: sat-template.diff