Hi Pan,

> From: Pan Li <pan2...@intel.com>
> 
> Hi Richard & Tamar,
> 
> Try the DEF_INTERNAL_INT_EXT_FN as your suggestion.  By mapping
> us_plus$a3 to the RTL representation (us_plus:m x y) in optabs.def.
> And then expand_US_PLUS in internal-fn.cc.  Not very sure if my
> understanding is correct for DEF_INTERNAL_INT_EXT_FN.
> 
> I am not sure if we still need DEF_INTERNAL_SIGNED_OPTAB_FN here, given
> the RTL representation has (ss_plus:m x y) and (us_plus:m x y) already.
> 

I think a couple of things are being confused here.  So lets break it down:

The reason for DEF_INTERNAL_SIGNED_OPTAB_FN is because in GIMPLE
we only want one internal function for both signed and unsigned SAT_ADD.
with this definition we don't need SAT_UADD and SAT_SADD but instead
we will only have SAT_ADD, which will expand to us_plus or ss_plus.

Now the downside of this is that this is a direct internal optab.  This means
that for the representation to be used the target *must* have the optab
implemented.   This is a bit annoying because it doesn't allow us to generically
assume that all targets use SAT_ADD for saturating add and thus only have to
write optimization for this representation.

This is why Richi said we may need to use a new tree_code because we can
override tree code expansions.  However the same can be done with the _EXT_FN
internal functions.

So what I meant was that we want to have a combination of the two. i.e. a
DEF_INTERNAL_SIGNED_OPTAB_EXT_FN.

If Richi agrees, the below is what I meant. It creates the infrastructure for 
this
and for now only allows a default fallback for unsigned saturating add and makes
it easier for us to add the rest later

Also, unless I'm wrong (and Richi can correct me here), us_plus and ss_plus are 
the
RTL expression, but the optab for saturation are ssadd and usadd.  So you don't
need to make new us_plus and ss_plus ones.

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index a07f25f3aee..aaf9f8991b3 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4103,6 +4103,17 @@ direct_internal_fn_supported_p (internal_fn fn, 
tree_pair types,
        return direct_##TYPE##_optab_supported_p (which_optab, types,   \
                                                  opt_type);            \
       }
+#define DEF_INTERNAL_SIGNED_OPTAB_EXT_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \
+                                        UNSIGNED_OPTAB, TYPE)          \
+    case IFN_##CODE:                                                   \
+      {                                                                        
\
+       optab which_optab = (TYPE_UNSIGNED (types.SELECTOR)             \
+                            ? UNSIGNED_OPTAB ## _optab                 \
+                            : SIGNED_OPTAB ## _optab);                 \
+       return direct_##TYPE##_optab_supported_p (which_optab, types,   \
+                                                 opt_type)             \
+              || internal_##CODE##_fn_supported_p (types.SELECTOR, opt_type); \
+      }
 #include "internal-fn.def"
 
     case IFN_LAST:
@@ -4303,6 +4314,8 @@ set_edom_supported_p (void)
     optab which_optab = direct_internal_fn_optab (fn, types);          \
     expand_##TYPE##_optab_fn (fn, stmt, which_optab);                  \
   }
+#define DEF_INTERNAL_SIGNED_OPTAB_EXT_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \
+                                        UNSIGNED_OPTAB, TYPE)
 #include "internal-fn.def"
 
 /* Routines to expand each internal function, indexed by function number.
@@ -5177,3 +5190,45 @@ expand_POPCOUNT (internal_fn fn, gcall *stmt)
       emit_move_insn (plhs, cmp);
     }
 }
+
+void
+expand_SAT_ADD (internal_fn fn, gcall *stmt)
+{
+  /* Check if the target supports the expansion through an IFN.  */
+  tree_pair types = direct_internal_fn_types (fn, stmt);
+  optab which_optab = direct_internal_fn_optab (fn, types);
+  if (direct_binary_optab_supported_p (which_optab, types,
+                                      insn_optimization_type ()))
+    {
+      expand_binary_optab_fn (fn, stmt, which_optab);
+      return;
+    }
+
+  /* Target does not support the optab, but we can de-compose it.  */
+  /*
+  ... decompose to a canonical representation ...
+  if (TYPE_UNSIGNED (types.SELECTOR))
+    {
+      ...
+      decompose back to (X + Y) | - ((X + Y) < X)
+    }
+  else
+    {
+      ...
+    }
+  */
+}
+
+bool internal_SAT_ADD_fn_supported_p (tree type, optimization_type /* optype 
*/)
+{
+  /* For now, don't support decomposing vector ops.  */
+  if (VECTOR_TYPE_P (type))
+    return false;
+
+  /* Signed saturating arithmetic is harder to do since we'll so for now
+     lets ignore.  */
+  if (!TYPE_UNSIGNED (type))
+    return false;
+
+  return TREE_CODE (type) == INTEGER_TYPE;
+}
\ No newline at end of file
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index c14d30365c1..5a2491228d5 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -92,6 +92,10 @@ along with GCC; see the file COPYING3.  If not see
    unsigned inputs respectively, both without the trailing "_optab".
    SELECTOR says which type in the tree_pair determines the signedness.
 
+   DEF_INTERNAL_SIGNED_OPTAB_EXT_FN is like DEF_INTERNAL_SIGNED_OPTAB_FN, 
except
+   that it has expand_##NAME defined in internal-fn.cc to override the
+   DEF_INTERNAL_SIGNED_OPTAB_FN expansion behavior.
+
    DEF_INTERNAL_FLT_FN is like DEF_INTERNAL_OPTAB_FN, but in addition,
    the function implements the computational part of a built-in math
    function BUILT_IN_<NAME>{F,,L}.  Unlike some built-in functions,
@@ -153,6 +157,13 @@ along with GCC; see the file COPYING3.  If not see
   DEF_INTERNAL_FN (NAME, FLAGS | ECF_LEAF, NULL)
 #endif
 
+#ifndef DEF_INTERNAL_SIGNED_OPTAB_EXT_FN
+#define DEF_INTERNAL_SIGNED_OPTAB_EXT_FN(NAME, FLAGS, SELECTOR, SIGNED_OPTAB, \
+                                    UNSIGNED_OPTAB, TYPE) \
+  DEF_INTERNAL_SIGNED_OPTAB_FN (NAME, FLAGS, SELECTOR, SIGNED_OPTAB, \
+                               UNSIGNED_OPTAB, TYPE)
+#endif
+
 #ifndef DEF_INTERNAL_FLT_FN
 #define DEF_INTERNAL_FLT_FN(NAME, FLAGS, OPTAB, TYPE) \
   DEF_INTERNAL_OPTAB_FN (NAME, FLAGS, OPTAB, TYPE)
@@ -274,6 +285,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | 
ECF_NOTHROW, first,
                              smulhs, umulhs, binary)
 DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
                              smulhrs, umulhrs, binary)
+DEF_INTERNAL_SIGNED_OPTAB_EXT_FN (SAT_ADD, ECF_CONST | ECF_NOTHROW, first,
+                                 ssadd, usadd, binary)
 
 DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
 DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
@@ -593,5 +606,6 @@ DEF_INTERNAL_FN (BITINTTOFLOAT, ECF_PURE | ECF_LEAF, ". R . 
")
 #undef DEF_INTERNAL_FLT_FN
 #undef DEF_INTERNAL_FLT_FLOATN_FN
 #undef DEF_INTERNAL_SIGNED_OPTAB_FN
+#undef DEF_INTERNAL_SIGNED_OPTAB_EXT_FN
 #undef DEF_INTERNAL_OPTAB_FN
 #undef DEF_INTERNAL_FN
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index bccee1c3e09..dbdb1e6bad2 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -263,6 +263,8 @@ extern void expand_DIVMODBITINT (internal_fn, gcall *);
 extern void expand_FLOATTOBITINT (internal_fn, gcall *);
 extern void expand_BITINTTOFLOAT (internal_fn, gcall *);
 extern void expand_POPCOUNT (internal_fn, gcall *);
+extern void expand_SAT_ADD (internal_fn, gcall *);
+extern bool internal_SAT_ADD_fn_supported_p (tree, optimization_type);
 
 extern bool vectorized_internal_fn_supported_p (internal_fn, tree);

> Note this patch is a draft for validation, no test are invovled here.
> 
> gcc/ChangeLog:
> 
>       * builtins.def (BUILT_IN_US_PLUS): Add builtin def.
>       (BUILT_IN_US_PLUSIMAX): Ditto.
>       (BUILT_IN_US_PLUSL): Ditto.
>       (BUILT_IN_US_PLUSLL): Ditto.
>       (BUILT_IN_US_PLUSG): Ditto.
>       * config/riscv/riscv-protos.h (riscv_expand_us_plus): Add new
>       func decl for expanding us_plus.
>       * config/riscv/riscv.cc (riscv_expand_us_plus): Add new func
>       impl for expanding us_plus.
>       * config/riscv/riscv.md (us_plus<mode>3): Add new pattern impl
>       us_plus<mode>3.
>       * internal-fn.cc (expand_US_PLUS): Add new func impl to expand
>       US_PLUS.
>       * internal-fn.def (US_PLUS): Add new INT_EXT_FN.
>       * internal-fn.h (expand_US_PLUS): Add new func decl.
>       * match.pd: Add new simplify pattern for us_plus.
>       * optabs.def (OPTAB_NL): Add new OPTAB_NL to US_PLUS rtl.
> 
> Signed-off-by: Pan Li <pan2...@intel.com>
> ---
>  gcc/builtins.def                |  7 +++++
>  gcc/config/riscv/riscv-protos.h |  1 +
>  gcc/config/riscv/riscv.cc       | 46 +++++++++++++++++++++++++++++++++
>  gcc/config/riscv/riscv.md       | 11 ++++++++
>  gcc/internal-fn.cc              | 26 +++++++++++++++++++
>  gcc/internal-fn.def             |  3 +++
>  gcc/internal-fn.h               |  1 +
>  gcc/match.pd                    | 17 ++++++++++++
>  gcc/optabs.def                  |  2 ++
>  9 files changed, 114 insertions(+)
> 
> diff --git a/gcc/builtins.def b/gcc/builtins.def
> index f6f3e104f6a..0777b912cfa 100644
> --- a/gcc/builtins.def
> +++ b/gcc/builtins.def
> @@ -1055,6 +1055,13 @@ DEF_GCC_BUILTIN        (BUILT_IN_POPCOUNTIMAX,
> "popcountimax", BT_FN_INT_UINTMAX
>  DEF_GCC_BUILTIN        (BUILT_IN_POPCOUNTL, "popcountl", BT_FN_INT_ULONG,
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_GCC_BUILTIN        (BUILT_IN_POPCOUNTLL, "popcountll",
> BT_FN_INT_ULONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_GCC_BUILTIN        (BUILT_IN_POPCOUNTG, "popcountg", BT_FN_INT_VAR,
> ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
> +
> +DEF_GCC_BUILTIN        (BUILT_IN_US_PLUS, "us_plus", BT_FN_INT_UINT,
> ATTR_CONST_NOTHROW_LEAF_LIST)
> +DEF_GCC_BUILTIN        (BUILT_IN_US_PLUSIMAX, "us_plusimax",
> BT_FN_INT_UINTMAX, ATTR_CONST_NOTHROW_LEAF_LIST)
> +DEF_GCC_BUILTIN        (BUILT_IN_US_PLUSL, "us_plusl", BT_FN_INT_ULONG,
> ATTR_CONST_NOTHROW_LEAF_LIST)
> +DEF_GCC_BUILTIN        (BUILT_IN_US_PLUSLL, "us_plusll",
> BT_FN_INT_ULONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST)
> +DEF_GCC_BUILTIN        (BUILT_IN_US_PLUSG, "us_plusg", BT_FN_INT_VAR,
> ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
> +
>  DEF_EXT_LIB_BUILTIN    (BUILT_IN_POSIX_MEMALIGN, "posix_memalign",
> BT_FN_INT_PTRPTR_SIZE_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
>  DEF_GCC_BUILTIN        (BUILT_IN_PREFETCH, "prefetch",
> BT_FN_VOID_CONST_PTR_VAR, ATTR_NOVOPS_LEAF_LIST)
>  DEF_LIB_BUILTIN        (BUILT_IN_REALLOC, "realloc", BT_FN_PTR_PTR_SIZE,
> ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LEAF_LIST)
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index 80efdf2b7e5..ba6086f1f25 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -132,6 +132,7 @@ extern void riscv_asm_output_external (FILE *, const tree,
> const char *);
>  extern bool
>  riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
>  extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
> +extern void riscv_expand_us_plus (rtx, rtx, rtx);
> 
>  #ifdef RTX_CODE
>  extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool 
> *invert_ptr =
> 0);
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 4100abc9dd1..23f08974f07 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -10657,6 +10657,52 @@ riscv_vector_mode_supported_any_target_p
> (machine_mode)
>    return true;
>  }
> 
> +/* Emit insn for the saturation addu, aka (x + y) | - ((x + y) < x).  */
> +void
> +riscv_expand_us_plus (rtx dest, rtx x, rtx y)
> +{
> +  machine_mode mode = GET_MODE (dest);
> +  rtx pmode_sum = gen_reg_rtx (Pmode);
> +  rtx pmode_lt = gen_reg_rtx (Pmode);
> +  rtx pmode_x = gen_lowpart (Pmode, x);
> +  rtx pmode_y = gen_lowpart (Pmode, y);
> +  rtx pmode_dest = gen_reg_rtx (Pmode);
> +
> +  /* Step-1: sum = x + y  */
> +  if (mode == SImode && mode != Pmode)
> +    { /* Take addw to avoid the sum truncate.  */
> +      rtx simode_sum = gen_reg_rtx (SImode);
> +      riscv_emit_binary (PLUS, simode_sum, x, y);
> +      emit_move_insn (pmode_sum, gen_lowpart (Pmode, simode_sum));
> +    }
> +  else
> +    riscv_emit_binary (PLUS, pmode_sum, pmode_x, pmode_y);
> +
> +  /* Step-1.1: truncate sum for HI and QI as we have no insn for add QI/HI.  
> */
> +  if (mode == HImode || mode == QImode)
> +    {
> +      int mode_bits = GET_MODE_BITSIZE (mode).to_constant ();
> +      int shift_bits = GET_MODE_BITSIZE (Pmode) - mode_bits;
> +
> +      gcc_assert (shift_bits > 0);
> +
> +      riscv_emit_binary (ASHIFT, pmode_sum, pmode_sum, GEN_INT (shift_bits));
> +      riscv_emit_binary (LSHIFTRT, pmode_sum, pmode_sum, GEN_INT
> (shift_bits));
> +    }
> +
> +  /* Step-2: lt = sum < x  */
> +  riscv_emit_binary (LTU, pmode_lt, pmode_sum, pmode_x);
> +
> +  /* Step-3: lt = -lt  */
> +  riscv_emit_unary (NEG, pmode_lt, pmode_lt);
> +
> +  /* Step-4: pmode_dest = sum | lt  */
> +  riscv_emit_binary (IOR, pmode_dest, pmode_lt, pmode_sum);
> +
> +  /* Step-5: dest = pmode_dest */
> +  emit_move_insn (dest, gen_lowpart (mode, pmode_dest));
> +}
> +
>  /* Initialize the GCC target structure.  */
>  #undef TARGET_ASM_ALIGNED_HI_OP
>  #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index 3f7a023d941..eaa9867023c 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -3841,6 +3841,17 @@ (define_insn "*large_load_address"
>    [(set_attr "type" "load")
>     (set (attr "length") (const_int 8))])
> 
> +(define_expand "us_plus<mode>3"
> +  [(match_operand:ANYI   0 "register_operand")
> +   (match_operand:ANYI   1 "register_operand")
> +   (match_operand:ANYI   2 "register_operand")]
> +  ""
> +  {
> +    riscv_expand_us_plus (operands[0], operands[1], operands[2]);
> +    DONE;
> +  }
> +)
> +
>  (include "bitmanip.md")
>  (include "crypto.md")
>  (include "sync.md")
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index a07f25f3aee..a7341a57ffa 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -5177,3 +5177,29 @@ expand_POPCOUNT (internal_fn fn, gcall *stmt)
>        emit_move_insn (plhs, cmp);
>      }
>  }
> +
> +void
> +expand_US_PLUS (internal_fn fn, gcall *stmt)
> +{
> +  tree lhs = gimple_call_lhs (stmt);
> +  tree rhs_0 = gimple_call_arg (stmt, 0);
> +  tree rhs_1 = gimple_call_arg (stmt, 1);
> +
> +  do_pending_stack_adjust ();
> +
> +  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
> +  rtx op_0 = expand_normal (rhs_0);
> +  rtx op_1 = expand_normal (rhs_1);
> +
> +  class expand_operand ops[3];
> +
> +  create_output_operand (&ops[0], target, TYPE_MODE (TREE_TYPE (lhs)));
> +  create_output_operand (&ops[1], op_0, TYPE_MODE (TREE_TYPE (rhs_0)));
> +  create_output_operand (&ops[2], op_1, TYPE_MODE (TREE_TYPE (rhs_1)));
> +
> +  insn_code code = optab_handler (us_plus_optab, TYPE_MODE (TREE_TYPE
> (rhs_0)));
> +  expand_insn (code, 3, ops);
> +
> +  if (!rtx_equal_p (target, ops[0].value))
> +    emit_move_insn (target, ops[0].value);
> +}

This can be simplified by calling expand_binary_optab_fn instead. See my 
template

> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index c14d30365c1..b1d7b5a0307 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -447,6 +447,9 @@ DEF_INTERNAL_INT_FN (FFS, ECF_CONST |
> ECF_NOTHROW, ffs, unary)
>  DEF_INTERNAL_INT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary)
>  DEF_INTERNAL_INT_EXT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW,
> popcount, unary)
> 
> +/* Binary integer ops.  */
> +DEF_INTERNAL_INT_EXT_FN (US_PLUS, ECF_CONST | ECF_NOTHROW, us_plus,
> binary)
> +
>  DEF_INTERNAL_FN (GOMP_TARGET_REV, ECF_NOVOPS | ECF_LEAF |
> ECF_NOTHROW, NULL)
>  DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF |
> ECF_NOTHROW, NULL)
>  DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> index bccee1c3e09..46e404b4a49 100644
> --- a/gcc/internal-fn.h
> +++ b/gcc/internal-fn.h
> @@ -263,6 +263,7 @@ extern void expand_DIVMODBITINT (internal_fn, gcall *);
>  extern void expand_FLOATTOBITINT (internal_fn, gcall *);
>  extern void expand_BITINTTOFLOAT (internal_fn, gcall *);
>  extern void expand_POPCOUNT (internal_fn, gcall *);
> +extern void expand_US_PLUS (internal_fn, gcall *);
> 
>  extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index c5b6540f939..f45fd58ad23 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -10265,3 +10265,20 @@ and,
>        }
>        (if (full_perm_p)
>       (vec_perm (op@3 @0 @1) @3 @2))))))
> +
> +#if GIMPLE
> +
> +/* Unsigned saturation add, aka:
> +   SAT_ADDU = (X + Y) | - ((X + Y) < X) or
> +   SAT_ADDU = (X + Y) | - ((X + Y) < Y).  */
> +(simplify
> + (bit_ior:c (plus:c@2 @0 @1) (negate (convert (lt @2 @0))))
> +   (if (optimize
> +     && INTEGRAL_TYPE_P (type)
> +     && TYPE_UNSIGNED (TREE_TYPE (@0))
> +     && types_match (type, TREE_TYPE (@0))
> +     && types_match (type, TREE_TYPE (@1))
> +     && direct_internal_fn_supported_p (IFN_US_PLUS, type,
> OPTIMIZE_FOR_BOTH))
> +   (IFN_US_PLUS @0 @1)))
> +
> +#endif

With the version above you can drop the #if GIMPLE and the 
> +     && direct_internal_fn_supported_p (IFN_US_PLUS, type,

Check.

Thanks,
Tamar

> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index ad14f9328b9..5855c4e0834 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -179,6 +179,8 @@ OPTAB_NL(clrsb_optab, "clrsb$a2", CLRSB, "clrsb", '2',
> gen_int_libfunc)
>  OPTAB_NL(popcount_optab, "popcount$a2", POPCOUNT, "popcount", '2',
> gen_int_libfunc)
>  OPTAB_NL(parity_optab, "parity$a2", PARITY, "parity", '2', gen_int_libfunc)
> 
> +OPTAB_NL(us_plus_optab, "us_plus$a3", US_PLUS, "us_plus", '3',
> gen_int_libfunc)
> +
>  /* Comparison libcalls for integers MUST come in pairs, signed/unsigned.  */
>  OPTAB_NL(cmp_optab, NULL, UNKNOWN, "cmp", '2', gen_int_fp_fixed_libfunc)
>  OPTAB_NL(ucmp_optab, NULL, UNKNOWN, "ucmp", '2', gen_int_libfunc)
> --
> 2.34.1

Attachment: sat-template.diff
Description: sat-template.diff

Reply via email to