RE: [PATCH v1] Internal-fn: Add new internal function SAT_ADDU

Tamar Christina Tue, 27 Feb 2024 03:15:53 -0800

> Am 19.02.24 um 08:36 schrieb Richard Biener:
> > On Sat, Feb 17, 2024 at 11:30 AM <pan2...@intel.com> wrote:
> >>
> >> From: Pan Li <pan2...@intel.com>
> >>
> >> This patch would like to add the middle-end presentation for the
> >> unsigned saturation add.  Aka set the result of add to the max
> >> when overflow.  It will take the pattern similar as below.
> >>
> >> SAT_ADDU (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))
> 
> Does this even try to wort out the costs?
> 
> For example, with the following example
> 
> 
> #define T __UINT16_TYPE__
> 
> T sat_add1 (T x, T y)
> {
>    return (x + y) | (- (T)((T)(x + y) < x));
> }
> 
> T sat_add2 (T x, T y)
> {
>      T z = x + y;
>      if (z < x)
>          z = (T) -1;
>      return z;
> }
> 
> And then "avr-gcc -S -Os -dp" the code is
> 
> 
> sat_add1:
>       add r22,r24      ;  7   [c=8 l=2]  *addhi3/0
>       adc r23,r25
>       ldi r18,lo8(1)   ;  8   [c=4 l=2]  *movhi/4
>       ldi r19,0
>       cp r22,r24       ;  9   [c=8 l=2]  cmphi3/2
>       cpc r23,r25
>       brlo .L2                 ;  10  [c=16 l=1]  branch
>       ldi r19,0                ;  31  [c=4 l=1]  movqi_insn/0
>       ldi r18,0                ;  32  [c=4 l=1]  movqi_insn/0
> .L2:
>       clr r24  ;  13  [c=12 l=4]  neghi2/1
>       clr r25
>       sub r24,r18
>       sbc r25,r19
>       or r24,r22               ;  29  [c=4 l=1]  iorqi3/0
>       or r25,r23               ;  30  [c=4 l=1]  iorqi3/0
>       ret              ;  35  [c=0 l=1]  return
> 
> sat_add2:
>       add r22,r24      ;  8   [c=8 l=2]  *addhi3/0
>       adc r23,r25
>       cp r22,r24       ;  9   [c=8 l=2]  cmphi3/2
>       cpc r23,r25
>       brsh .L3                 ;  10  [c=16 l=1]  branch
>       ldi r22,lo8(-1)  ;  5   [c=4 l=2]  *movhi/4
>       ldi r23,lo8(-1)
> .L3:
>       mov r25,r23      ;  21  [c=4 l=1]  movqi_insn/0
>       mov r24,r22      ;  22  [c=4 l=1]  movqi_insn/0
>       ret              ;  25  [c=0 l=1]  return
> 
> i.e. the conditional jump is better than overly smart arithmetic
> (smaller and faster code with less register pressure).
> With larger dypes the difference is even more pronounced-
>


*on AVR. https://godbolt.org/z/7jaExbTa8  shows the branchless code is better.
And the branchy code will vectorize worse if at all 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112600
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51492

But looking at that output it just seems like it's your expansion that's 
inefficient.

But fair point, perhaps it should be just a normal DEF_INTERNAL_SIGNED_OPTAB_FN 
so that we
provide the additional optimization only for targets that want it.

Tamar

> >> Take uint8_t as example, we will have:
> >>
> >> * SAT_ADDU (1, 254)   => 255.
> >> * SAT_ADDU (1, 255)   => 255.
> >> * SAT_ADDU (2, 255)   => 255.
> >> * SAT_ADDU (255, 255) => 255.
> >>
> >> The patch also implement the SAT_ADDU in the riscv backend as
> >> the sample.  Given below example:
> >>
> >> uint64_t sat_add_u64 (uint64_t x, uint64_t y)
> >> {
> >>    return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
> >> }
> >>
> >> Before this patch:
> >>
> >> uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> >> {
> >>    long unsigned int _1;
> >>    _Bool _2;
> >>    long unsigned int _3;
> >>    long unsigned int _4;
> >>    uint64_t _7;
> >>    long unsigned int _10;
> >>    __complex__ long unsigned int _11;
> >>
> >> ;;   basic block 2, loop depth 0
> >> ;;    pred:       ENTRY
> >>    _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
> >>    _1 = REALPART_EXPR <_11>;
> >>    _10 = IMAGPART_EXPR <_11>;
> >>    _2 = _10 != 0;
> >>    _3 = (long unsigned int) _2;
> >>    _4 = -_3;
> >>    _7 = _1 | _4;
> >>    return _7;
> >> ;;    succ:       EXIT
> >>
> >> }
> >>
> >> After this patch:
> >>
> >> uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> >> {
> >>    uint64_t _7;
> >>
> >> ;;   basic block 2, loop depth 0
> >> ;;    pred:       ENTRY
> >>    _7 = .SAT_ADDU (x_5(D), y_6(D)); [tail call]
> >>    return _7;
> >> ;;    succ:       EXIT
> >>
> >> }
> >>
> >> Then we will have the middle-end representation like .SAT_ADDU after
> >> this patch.
> >
> > I'll note that on RTL we already have SS_PLUS/US_PLUS and friends and
> > the corresponding ssadd/usadd optabs.  There's not much documentation
> > unfortunately besides the use of gen_*_fixed_libfunc usage where the comment
> > suggests this is used for fixed-point operations.  It looks like arm uses
> > fractional/accumulator modes for this but for example bfin has ssaddsi3.
> >
> > So the question is whether the fixed-point case can be distinguished from
> > the integer case based on mode.
> >
> > There's also FIXED_POINT_TYPE on the GENERIC/GIMPLE side and
> > no special tree operator codes for them.  So compared to what appears
> > to be the case on RTL we'd need a way to represent saturating integer
> > operations on GIMPLE.
> >
> > The natural thing is to use direct optab internal functions (that's what you
> > basically did, but you added a new optab, IMO without good reason).
> > More GIMPLE-like would be to let the types involved decide whether
> > it's signed or unsigned saturation.  That's actually what I'd prefer here
> > and if we don't map 1:1 to optabs then instead use tree codes like
> > S_PLUS_EXPR (mimicing RTL here).
> >
> > Any other opinions?  Anyone knows more about fixed-point and RTL/modes?
> >
> > Richard.
> >
> >>          PR target/51492
> >>          PR target/112600
> >>
> >> gcc/ChangeLog:
> >>
> >>          * config/riscv/riscv-protos.h (riscv_expand_saturation_addu):
> >>          New func decl for the SAT_ADDU expand.
> >>          * config/riscv/riscv.cc (riscv_expand_saturation_addu): New func
> >>          impl for the SAT_ADDU expand.
> >>          * config/riscv/riscv.md (sat_addu_<mode>3): New pattern to impl
> >>          the standard name SAT_ADDU.
> >>          * doc/md.texi: Add doc for SAT_ADDU.
> >>          * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADDU.
> >>          * internal-fn.def (SAT_ADDU): Add SAT_ADDU.
> >>          * match.pd: Add simplify pattern patch for SAT_ADDU.
> >>          * optabs.def (OPTAB_D): Add sat_addu_optab.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >>          * gcc.target/riscv/sat_addu-1.c: New test.
> >>          * gcc.target/riscv/sat_addu-2.c: New test.
> >>          * gcc.target/riscv/sat_addu-3.c: New test.
> >>          * gcc.target/riscv/sat_addu-4.c: New test.
> >>          * gcc.target/riscv/sat_addu-run-1.c: New test.
> >>          * gcc.target/riscv/sat_addu-run-2.c: New test.
> >>          * gcc.target/riscv/sat_addu-run-3.c: New test.
> >>          * gcc.target/riscv/sat_addu-run-4.c: New test.
> >>          * gcc.target/riscv/sat_arith.h: New test.
> >>
> >> Signed-off-by: Pan Li <pan2...@intel.com>
> >> ---
> >>   gcc/config/riscv/riscv-protos.h               |  1 +
> >>   gcc/config/riscv/riscv.cc                     | 46 +++++++++++++++++
> >>   gcc/config/riscv/riscv.md                     | 11 +++++
> >>   gcc/doc/md.texi                               | 11 +++++
> >>   gcc/internal-fn.cc                            |  1 +
> >>   gcc/internal-fn.def                           |  1 +
> >>   gcc/match.pd                                  | 22 +++++++++
> >>   gcc/optabs.def                                |  2 +
> >>   gcc/testsuite/gcc.target/riscv/sat_addu-1.c   | 18 +++++++
> >>   gcc/testsuite/gcc.target/riscv/sat_addu-2.c   | 20 ++++++++
> >>   gcc/testsuite/gcc.target/riscv/sat_addu-3.c   | 17 +++++++
> >>   gcc/testsuite/gcc.target/riscv/sat_addu-4.c   | 16 ++++++
> >>   .../gcc.target/riscv/sat_addu-run-1.c         | 42 ++++++++++++++++
> >>   .../gcc.target/riscv/sat_addu-run-2.c         | 42 ++++++++++++++++
> >>   .../gcc.target/riscv/sat_addu-run-3.c         | 42 ++++++++++++++++
> >>   .../gcc.target/riscv/sat_addu-run-4.c         | 49 +++++++++++++++++++
> >>   gcc/testsuite/gcc.target/riscv/sat_arith.h    | 15 ++++++
> >>   17 files changed, 356 insertions(+)
> >>   create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-1.c
> >>   create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-2.c
> >>   create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-3.c
> >>   create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-4.c
> >>   create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-run-1.c
> >>   create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-run-2.c
> >>   create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-run-3.c
> >>   create mode 100644 gcc/testsuite/gcc.target/riscv/sat_addu-run-4.c
> >>   create mode 100644 gcc/testsuite/gcc.target/riscv/sat_arith.h
> >>
> >> diff --git a/gcc/config/riscv/riscv-protos.h 
> >> b/gcc/config/riscv/riscv-protos.h
> >> index ae1685850ac..f201b2384f9 100644
> >> --- a/gcc/config/riscv/riscv-protos.h
> >> +++ b/gcc/config/riscv/riscv-protos.h
> >> @@ -132,6 +132,7 @@ extern void riscv_asm_output_external (FILE *, const
> tree, const char *);
> >>   extern bool
> >>   riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
> >>   extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
> >> +extern void riscv_expand_saturation_addu (rtx, rtx, rtx);
> >>
> >>   #ifdef RTX_CODE
> >>   extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool 
> >> *invert_ptr
> = 0);
> >> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> >> index 799d7919a4a..84e86eb5d49 100644
> >> --- a/gcc/config/riscv/riscv.cc
> >> +++ b/gcc/config/riscv/riscv.cc
> >> @@ -10657,6 +10657,52 @@ riscv_vector_mode_supported_any_target_p
> (machine_mode)
> >>     return true;
> >>   }
> >>
> >> +/* Emit insn for the saturation addu, aka (x + y) | - ((x + y) < x).  */
> >> +void
> >> +riscv_expand_saturation_addu (rtx dest, rtx x, rtx y)
> >> +{
> >> +  machine_mode mode = GET_MODE (dest);
> >> +  rtx pmode_sum = gen_reg_rtx (Pmode);
> >> +  rtx pmode_lt = gen_reg_rtx (Pmode);
> >> +  rtx pmode_x = gen_lowpart (Pmode, x);
> >> +  rtx pmode_y = gen_lowpart (Pmode, y);
> >> +  rtx pmode_dest = gen_reg_rtx (Pmode);
> >> +
> >> +  /* Step-1: sum = x + y  */
> >> +  if (mode == SImode && mode != Pmode)
> >> +    { /* Take addw to avoid the sum truncate.  */
> >> +      rtx simode_sum = gen_reg_rtx (SImode);
> >> +      riscv_emit_binary (PLUS, simode_sum, x, y);
> >> +      emit_move_insn (pmode_sum, gen_lowpart (Pmode, simode_sum));
> >> +    }
> >> +  else
> >> +    riscv_emit_binary (PLUS, pmode_sum, pmode_x, pmode_y);
> >> +
> >> +  /* Step-1.1: truncate sum for HI and QI as we have no insn for add 
> >> QI/HI.  */
> >> +  if (mode == HImode || mode == QImode)
> >> +    {
> >> +      int shift_bits = GET_MODE_BITSIZE (Pmode)
> >> +       - GET_MODE_BITSIZE (mode).to_constant ();
> >> +
> >> +      gcc_assert (shift_bits > 0);
> >> +
> >> +      riscv_emit_binary (ASHIFT, pmode_sum, pmode_sum, GEN_INT
> (shift_bits));
> >> +      riscv_emit_binary (LSHIFTRT, pmode_sum, pmode_sum, GEN_INT
> (shift_bits));
> >> +    }
> >> +
> >> +  /* Step-2: lt = sum < x  */
> >> +  riscv_emit_binary (LTU, pmode_lt, pmode_sum, pmode_x);
> >> +
> >> +  /* Step-3: lt = -lt  */
> >> +  riscv_emit_unary (NEG, pmode_lt, pmode_lt);
> >> +
> >> +  /* Step-4: pmode_dest = sum | lt  */
> >> +  riscv_emit_binary (IOR, pmode_dest, pmode_lt, pmode_sum);
> >> +
> >> +  /* Step-5: dest = pmode_dest */
> >> +  emit_move_insn (dest, gen_lowpart (mode, pmode_dest));
> >> +}
> >> +
> >>   /* Initialize the GCC target structure.  */
> >>   #undef TARGET_ASM_ALIGNED_HI_OP
> >>   #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
> >> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> >> index 39b29795cd6..03cbe5a2ca9 100644
> >> --- a/gcc/config/riscv/riscv.md
> >> +++ b/gcc/config/riscv/riscv.md
> >> @@ -3841,6 +3841,17 @@ (define_insn "*large_load_address"
> >>     [(set_attr "type" "load")
> >>      (set (attr "length") (const_int 8))])
> >>
> >> +(define_expand "sat_addu_<mode>3"
> >> +  [(match_operand:ANYI   0 "register_operand")
> >> +   (match_operand:ANYI   1 "register_operand")
> >> +   (match_operand:ANYI   2 "register_operand")]
> >> +  ""
> >> +  {
> >> +    riscv_expand_saturation_addu (operands[0], operands[1], operands[2]);
> >> +    DONE;
> >> +  }
> >> +)
> >> +
> >>   (include "bitmanip.md")
> >>   (include "crypto.md")
> >>   (include "sync.md")
> >> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> >> index b0c61925120..5867afdb1a0 100644
> >> --- a/gcc/doc/md.texi
> >> +++ b/gcc/doc/md.texi
> >> @@ -6653,6 +6653,17 @@ The operation is only supported for vector modes
> @var{m}.
> >>
> >>   This pattern is not allowed to @code{FAIL}.
> >>
> >> +@cindex @code{sat_addu_@var{m}3} instruction pattern
> >> +@item @samp{sat_addu_@var{m}3}
> >> +Perform the saturation unsigned add for the operand 1 and operand 2 and
> >> +store the result into the operand 0.  All operands have mode @var{m},
> >> +which is a scalar integer mode.
> >> +
> >> +@smallexample
> >> +  typedef unsigned char uint8_t;
> >> +  uint8_t sat_addu (uint8_t x, uint8_t y) => return (x + y) | -((x + y) < 
> >> x);
> >> +@end smallexample
> >> +
> >>   @cindex @code{cmla@var{m}4} instruction pattern
> >>   @item @samp{cmla@var{m}4}
> >>   Perform a vector multiply and accumulate that is semantically the same as
> >> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> >> index a07f25f3aee..dee73dbc614 100644
> >> --- a/gcc/internal-fn.cc
> >> +++ b/gcc/internal-fn.cc
> >> @@ -4159,6 +4159,7 @@ commutative_binary_fn_p (internal_fn fn)
> >>       case IFN_VEC_WIDEN_PLUS_HI:
> >>       case IFN_VEC_WIDEN_PLUS_EVEN:
> >>       case IFN_VEC_WIDEN_PLUS_ODD:
> >> +    case IFN_SAT_ADDU:
> >>         return true;
> >>
> >>       default:
> >> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> >> index c14d30365c1..a04592fc779 100644
> >> --- a/gcc/internal-fn.def
> >> +++ b/gcc/internal-fn.def
> >> @@ -428,6 +428,7 @@ DEF_INTERNAL_WIDENING_OPTAB_FN
> (VEC_WIDEN_ABD,
> >>                                  binary)
> >>   DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub,
> ternary)
> >>   DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd,
> ternary)
> >> +DEF_INTERNAL_OPTAB_FN (SAT_ADDU, ECF_CONST | ECF_NOTHROW,
> sat_addu, binary)
> >>
> >>   /* FP scales.  */
> >>   DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary)
> >> diff --git a/gcc/match.pd b/gcc/match.pd
> >> index 711c3a10c3f..9de1106adcf 100644
> >> --- a/gcc/match.pd
> >> +++ b/gcc/match.pd
> >> @@ -1994,6 +1994,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >>    )
> >>   )
> >>
> >> +#if GIMPLE
> >> +
> >> +/* Saturation add unsigned, aka:
> >> +   SAT_ADDU = (X + Y) | - ((X + Y) < X) or
> >> +   SAT_ADDU = (X + Y) | - ((X + Y) < Y).  */
> >> +(simplify
> >> + (bit_ior:c (plus:c@2 @0 @1) (negate (convert (lt @2 @0))))
> >> +   (if (optimize
> >> +       && INTEGRAL_TYPE_P (type)
> >> +       && TYPE_UNSIGNED (TREE_TYPE (@0))
> >> +       && types_match (type, TREE_TYPE (@0))
> >> +       && types_match (type, TREE_TYPE (@1))
> >> +       && direct_internal_fn_supported_p (IFN_SAT_ADDU, type,
> OPTIMIZE_FOR_BOTH))
> >> +   (IFN_SAT_ADDU @0 @1)))
> >> +
> >> +/* SAT_ADDU (X, 0) = X  */
> >> +(simplify
> >> + (IFN_SAT_ADDU:c @0 integer_zerop)
> >> + @0)
> >> +
> >> +#endif
> >> +
> >>   /* A few cases of fold-const.cc negate_expr_p predicate.  */
> >>   (match negate_expr_p
> >>    INTEGER_CST
> >> diff --git a/gcc/optabs.def b/gcc/optabs.def
> >> index ad14f9328b9..a2c11b7707b 100644
> >> --- a/gcc/optabs.def
> >> +++ b/gcc/optabs.def
> >> @@ -300,6 +300,8 @@ OPTAB_D (usubc5_optab, "usubc$I$a5")
> >>   OPTAB_D (addptr3_optab, "addptr$a3")
> >>   OPTAB_D (spaceship_optab, "spaceship$a3")
> >>
> >> +OPTAB_D (sat_addu_optab, "sat_addu_$a3")
> >> +
> >>   OPTAB_D (smul_highpart_optab, "smul$a3_highpart")
> >>   OPTAB_D (umul_highpart_optab, "umul$a3_highpart")
> >>
> >> diff --git a/gcc/testsuite/gcc.target/riscv/sat_addu-1.c
> b/gcc/testsuite/gcc.target/riscv/sat_addu-1.c
> >> new file mode 100644
> >> index 00000000000..229abef0faa
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/sat_addu-1.c
> >> @@ -0,0 +1,18 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fno-schedule-insns -fno-
> schedule-insns2" } */
> >> +/* { dg-skip-if "" { *-*-* } { "-flto" } } */
> >> +/* { dg-final { check-function-bodies "**" "" } } */
> >> +
> >> +#include "sat_arith.h"
> >> +
> >> +/*
> >> +** sat_addu_uint8_t:
> >> +** add\s+[atx][0-9]+,\s*a0,\s*a1
> >> +** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
> >> +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
> >> +** neg\s+[atx][0-9]+,\s*[atx][0-9]+
> >> +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
> >> +** andi\s+a0,\s*a0,\s*0xff
> >> +** ret
> >> +*/
> >> +DEF_SAT_ADDU(uint8_t)
> >> diff --git a/gcc/testsuite/gcc.target/riscv/sat_addu-2.c
> b/gcc/testsuite/gcc.target/riscv/sat_addu-2.c
> >> new file mode 100644
> >> index 00000000000..4023b030811
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/sat_addu-2.c
> >> @@ -0,0 +1,20 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fno-schedule-insns -fno-
> schedule-insns2" } */
> >> +/* { dg-skip-if "" { *-*-* } { "-flto" } } */
> >> +/* { dg-final { check-function-bodies "**" "" } } */
> >> +
> >> +#include "sat_arith.h"
> >> +
> >> +/*
> >> +** sat_addu_uint16_t:
> >> +** add\s+[atx][0-9]+,\s*a0,\s*a1
> >> +** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
> >> +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
> >> +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
> >> +** neg\s+[atx][0-9]+,\s*[atx][0-9]+
> >> +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
> >> +** slli\s+a0,\s*a0,\s*48
> >> +** srli\s+a0,\s*a0,\s*48
> >> +** ret
> >> +*/
> >> +DEF_SAT_ADDU(uint16_t)
> >> diff --git a/gcc/testsuite/gcc.target/riscv/sat_addu-3.c
> b/gcc/testsuite/gcc.target/riscv/sat_addu-3.c
> >> new file mode 100644
> >> index 00000000000..4d0af97fb67
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/sat_addu-3.c
> >> @@ -0,0 +1,17 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fno-schedule-insns -fno-
> schedule-insns2" } */
> >> +/* { dg-skip-if "" { *-*-* } { "-flto" } } */
> >> +/* { dg-final { check-function-bodies "**" "" } } */
> >> +
> >> +#include "sat_arith.h"
> >> +
> >> +/*
> >> +** sat_addu_uint32_t:
> >> +** addw\s+[atx][0-9]+,\s*a0,\s*a1
> >> +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
> >> +** neg\s+[atx][0-9]+,\s*[atx][0-9]+
> >> +** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
> >> +** sext.w\s+a0,\s*a0
> >> +** ret
> >> +*/
> >> +DEF_SAT_ADDU(uint32_t)
> >> diff --git a/gcc/testsuite/gcc.target/riscv/sat_addu-4.c
> b/gcc/testsuite/gcc.target/riscv/sat_addu-4.c
> >> new file mode 100644
> >> index 00000000000..926f31266e3
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/sat_addu-4.c
> >> @@ -0,0 +1,16 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fno-schedule-insns -fno-
> schedule-insns2" } */
> >> +/* { dg-skip-if "" { *-*-* } { "-flto" } } */
> >> +/* { dg-final { check-function-bodies "**" "" } } */
> >> +
> >> +#include "sat_arith.h"
> >> +
> >> +/*
> >> +** sat_addu_uint64_t:
> >> +** add\s+[atx][0-9]+,\s*a0,\s*a1
> >> +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
> >> +** neg\s+[atx][0-9]+,\s*[atx][0-9]+
> >> +** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
> >> +** ret
> >> +*/
> >> +DEF_SAT_ADDU(uint64_t)
> >> diff --git a/gcc/testsuite/gcc.target/riscv/sat_addu-run-1.c
> b/gcc/testsuite/gcc.target/riscv/sat_addu-run-1.c
> >> new file mode 100644
> >> index 00000000000..b19515c39d1
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/sat_addu-run-1.c
> >> @@ -0,0 +1,42 @@
> >> +/* { dg-do run { target { riscv_v } } } */
> >> +/* { dg-additional-options "-std=c99" } */
> >> +
> >> +#include "sat_arith.h"
> >> +
> >> +DEF_SAT_ADDU(uint8_t)
> >> +
> >> +int
> >> +main ()
> >> +{
> >> +  if (RUN_SAT_ADDU (uint8_t, 0, 0) != 0)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint8_t, 0, 1) != 1)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint8_t, 1, 1) != 2)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint8_t, 0, 254) != 254)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint8_t, 1, 254) != 255)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint8_t, 2, 254) != 255)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint8_t, 0, 255) != 255)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint8_t, 1, 255) != 255)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint8_t, 2, 255) != 255)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint8_t, 255, 255) != 255)
> >> +    __builtin_abort ();
> >> +
> >> +  return 0;
> >> +}
> >> diff --git a/gcc/testsuite/gcc.target/riscv/sat_addu-run-2.c
> b/gcc/testsuite/gcc.target/riscv/sat_addu-run-2.c
> >> new file mode 100644
> >> index 00000000000..90073fbe4ba
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/sat_addu-run-2.c
> >> @@ -0,0 +1,42 @@
> >> +/* { dg-do run { target { riscv_v } } } */
> >> +/* { dg-additional-options "-std=c99" } */
> >> +
> >> +#include "sat_arith.h"
> >> +
> >> +DEF_SAT_ADDU(uint16_t)
> >> +
> >> +int
> >> +main ()
> >> +{
> >> +  if (RUN_SAT_ADDU (uint16_t, 0, 0) != 0)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint16_t, 0, 1) != 1)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint16_t, 1, 1) != 2)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint16_t, 0, 65534) != 65534)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint16_t, 1, 65534) != 65535)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint16_t, 2, 65534) != 65535)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint16_t, 0, 65535) != 65535)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint16_t, 1, 65535) != 65535)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint16_t, 2, 65535) != 65535)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint16_t, 65535, 65535) != 65535)
> >> +    __builtin_abort ();
> >> +
> >> +  return 0;
> >> +}
> >> diff --git a/gcc/testsuite/gcc.target/riscv/sat_addu-run-3.c
> b/gcc/testsuite/gcc.target/riscv/sat_addu-run-3.c
> >> new file mode 100644
> >> index 00000000000..996dd3de737
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/sat_addu-run-3.c
> >> @@ -0,0 +1,42 @@
> >> +/* { dg-do run { target { riscv_v } } } */
> >> +/* { dg-additional-options "-std=c99" } */
> >> +
> >> +#include "sat_arith.h"
> >> +
> >> +DEF_SAT_ADDU(uint32_t)
> >> +
> >> +int
> >> +main ()
> >> +{
> >> +  if (RUN_SAT_ADDU (uint32_t, 0, 0) != 0)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint32_t, 0, 1) != 1)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint32_t, 1, 1) != 2)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint32_t, 0, 4294967294) != 4294967294)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint32_t, 1, 4294967294) != 4294967295)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint32_t, 2, 4294967294) != 4294967295)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint32_t, 0, 4294967295) != 4294967295)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint32_t, 1, 4294967295) != 4294967295)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint32_t, 2, 4294967295) != 4294967295)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint32_t, 4294967295, 4294967295) != 4294967295)
> >> +    __builtin_abort ();
> >> +
> >> +  return 0;
> >> +}
> >> diff --git a/gcc/testsuite/gcc.target/riscv/sat_addu-run-4.c
> b/gcc/testsuite/gcc.target/riscv/sat_addu-run-4.c
> >> new file mode 100644
> >> index 00000000000..51a5421577b
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/sat_addu-run-4.c
> >> @@ -0,0 +1,49 @@
> >> +/* { dg-do run { target { riscv_v } } } */
> >> +/* { dg-additional-options "-std=c99" } */
> >> +
> >> +#include "sat_arith.h"
> >> +
> >> +DEF_SAT_ADDU(uint64_t)
> >> +
> >> +int
> >> +main ()
> >> +{
> >> +  if (RUN_SAT_ADDU (uint64_t, 0, 0) != 0)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint64_t, 0, 1) != 1)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint64_t, 1, 1) != 2)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint64_t, 0, 18446744073709551614u)
> >> +    != 18446744073709551614u)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint64_t, 1, 18446744073709551614u)
> >> +    != 18446744073709551615u)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint64_t, 2, 18446744073709551614u)
> >> +    != 18446744073709551615u)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint64_t, 0, 18446744073709551615u)
> >> +    != 18446744073709551615u)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint64_t, 1, 18446744073709551615u)
> >> +    != 18446744073709551615u)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint64_t, 2, 18446744073709551615u)
> >> +    != 18446744073709551615u)
> >> +    __builtin_abort ();
> >> +
> >> +  if (RUN_SAT_ADDU (uint64_t, 18446744073709551615u,
> 18446744073709551615u)
> >> +    != 18446744073709551615u)
> >> +    __builtin_abort ();
> >> +
> >> +  return 0;
> >> +}
> >> diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h
> b/gcc/testsuite/gcc.target/riscv/sat_arith.h
> >> new file mode 100644
> >> index 00000000000..4c00157685e
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
> >> @@ -0,0 +1,15 @@
> >> +#ifndef HAVE_SAT_ARITH
> >> +#define HAVE_SAT_ARITH
> >> +
> >> +#include <stdint.h>
> >> +
> >> +#define DEF_SAT_ADDU(TYPE)                       \
> >> +TYPE __attribute__((noinline))                   \
> >> +sat_addu_##TYPE (TYPE x, TYPE y)                 \
> >> +{                                                \
> >> +  return (x + y) | (-(TYPE)((TYPE)(x + y) < x)); \
> >> +}
> >> +
> >> +#define RUN_SAT_ADDU(TYPE, x, y) sat_addu_##TYPE(x, y)
> >> +
> >> +#endif
> >> --
> >> 2.34.1
> >>

RE: [PATCH v1] Internal-fn: Add new internal function SAT_ADDU

Reply via email to