On Tue, Sep 3, 2024 at 2:34 PM <pan2...@intel.com> wrote: > > From: Pan Li <pan2...@intel.com> > > This patch would like to support the form 2 of the scalar signed > integer .SAT_ADD. Aka below example: > > Form 2: > #define DEF_SAT_S_ADD_FMT_2(T, UT, MIN, MAX) \ > T __attribute__((noinline)) \ > sat_s_add_##T##_fmt_2 (T x, T y) \ > { \ > T sum = (UT)x + (UT)y; \ > \ > if ((x ^ y) < 0 || (sum ^ x) >= 0) \ > return sum; \ > \ > return x < 0 ? MIN : MAX; \ > } > > DEF_SAT_S_ADD_FMT_2(int8_t, uint8_t, INT8_MIN, INT8_MAX) > > We can tell the difference before and after this patch if backend > implemented the ssadd<m>3 pattern similar as below. > > Before this patch: > 4 │ __attribute__((noinline)) > 5 │ int8_t sat_s_add_int8_t_fmt_2 (int8_t x, int8_t y) > 6 │ { > 7 │ int8_t sum; > 8 │ unsigned char x.0_1; > 9 │ unsigned char y.1_2; > 10 │ unsigned char _3; > 11 │ signed char _4; > 12 │ signed char _5; > 13 │ int8_t _6; > 14 │ _Bool _11; > 15 │ signed char _12; > 16 │ signed char _13; > 17 │ signed char _14; > 18 │ signed char _22; > 19 │ signed char _23; > 20 │ > 21 │ ;; basic block 2, loop depth 0 > 22 │ ;; pred: ENTRY > 23 │ x.0_1 = (unsigned char) x_7(D); > 24 │ y.1_2 = (unsigned char) y_8(D); > 25 │ _3 = x.0_1 + y.1_2; > 26 │ sum_9 = (int8_t) _3; > 27 │ _4 = x_7(D) ^ y_8(D); > 28 │ _5 = x_7(D) ^ sum_9; > 29 │ _23 = ~_4; > 30 │ _22 = _5 & _23; > 31 │ if (_22 >= 0) > 32 │ goto <bb 4>; [42.57%] > 33 │ else > 34 │ goto <bb 3>; [57.43%] > 35 │ ;; succ: 4 > 36 │ ;; 3 > 37 │ > 38 │ ;; basic block 3, loop depth 0 > 39 │ ;; pred: 2 > 40 │ _11 = x_7(D) < 0; > 41 │ _12 = (signed char) _11; > 42 │ _13 = -_12; > 43 │ _14 = _13 ^ 127; > 44 │ ;; succ: 4 > 45 │ > 46 │ ;; basic block 4, loop depth 0 > 47 │ ;; pred: 2 > 48 │ ;; 3 > 49 │ # _6 = PHI <sum_9(2), _14(3)> > 50 │ return _6; > 51 │ ;; succ: EXIT > 52 │ > 53 │ } > > After this patch: > 4 │ __attribute__((noinline)) > 5 │ int8_t sat_s_add_int8_t_fmt_2 (int8_t x, int8_t y) > 6 │ { > 7 │ int8_t _6; > 8 │ > 9 │ ;; basic block 2, loop depth 0 > 10 │ ;; pred: ENTRY > 11 │ _6 = .SAT_ADD (x_7(D), y_8(D)); [tail call] > 12 │ return _6; > 13 │ ;; succ: EXIT > 14 │ > 15 │ } > > The below test suites are passed for this patch. > * The rv64gcv fully regression test. > * The x86 bootstrap test. > * The x86 fully regression test. > > gcc/ChangeLog: > > * match.pd: Add the form 2 of signed .SAT_ADD matching. > > Signed-off-by: Pan Li <pan2...@intel.com> > --- > gcc/match.pd | 15 +++++++++++++++ > 1 file changed, 15 insertions(+) > > diff --git a/gcc/match.pd b/gcc/match.pd > index 4298e89dad6..1372f2ba377 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3207,6 +3207,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) > && types_match (type, @0, @1)))) > > +/* Signed saturation add, case 2: > + T sum = (T)((UT)X + (UT)Y) > + SAT_S_ADD = (X ^ sum) & !(X ^ Y) >= 0 ? sum : (-(T)(X < 0) ^ MAX); > + > + The T and UT are type pair like T=int8_t, UT=uint8_t. */ > +(match (signed_integer_sat_add @0 @1) > + (cond^ (ge (bit_and:c (bit_xor:c @0 (nop_convert@2 (plus (nop_convert @0) > + (nop_convert @1)))) > + (bit_not (bit_xor:c @0 @1)))
You only need one :c on either bit_xor. > + integer_zerop) > + @2 > + (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)) > + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) > + && types_match (type, @0, @1)))) I think the types_match is redundant as you have the bit_xor combining both. OK with those changes. Richard. > + > /* Unsigned saturation sub, case 1 (branch with gt): > SAT_U_SUB = X > Y ? X - Y : 0 */ > (match (unsigned_integer_sat_sub @0 @1) > -- > 2.43.0 >