RE: [PATCH v1] Match: Support form 1 for scalar signed integer .SAT_ADD

Li, Pan2 Mon, 05 Aug 2024 18:22:00 -0700

Hi Richard,

It looks like the plus will have additional convert to unsigned in int8 and 
int16, see below example in test.c.006t.gimple.
And we need these convert ops in one matching pattern to cover all int scalar 
types.


I am not sure if there is a better way here, given convert in matching pattern 
is not very elegant up to a point.

int16_t
add_i16 (int16_t a, int16_t b)
{
  int16_t sum = a + b;
  return sum;
}

int32_t
add_i32 (int32_t a, int32_t b)
{
  int32_t sum = a + b;
  return sum;
}

------- 006t.gimple -------
int16_t add_i16 (int16_t a, int16_t b)
{
  int16_t D.2815;
  int16_t sum;

  a.0_1 = (unsigned short) a;
  b.1_2 = (unsigned short) b;
  _3 = a.0_1 + b.1_2;
  sum = (int16_t) _3;
  D.2815 = sum;
  return D.2815;
}

int32_t add_i32 (int32_t a, int32_t b)
{
  int32_t D.2817;
  int32_t sum;

  sum = a + b;
  D.2817 = sum;
  return D.2817;
}

Pan

-----Original Message-----
From: Li, Pan2 
Sent: Monday, August 5, 2024 9:52 PM
To: Richard Biener <richard.guent...@gmail.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@gmail.com; 
jeffreya...@gmail.com; rdapp....@gmail.com
Subject: RE: [PATCH v1] Match: Support form 1 for scalar signed integer .SAT_ADD

Thanks Richard for comments.

> The convert looks odd to me given @0 is involved in both & operands.

The convert is introduced as the GIMPLE IL is somehow different for int8_t when 
compares to int32_t or int64_t.
There are some additional ops convert to unsigned for plus, see below line 8-9 
and line 22-23.
But we cannot see similar GIMPLE IL for int32_t and int64_t. To reconcile the 
types from int8_t to int64_t, add the
convert here.

Or may be I have some mistake in the example, let me revisit it and send v2 if 
no surprise.

   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_1 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t sum;
   8   │   unsigned char x.1_1;
   9   │   unsigned char y.2_2;
  10   │   unsigned char _3;
  11   │   signed char _4;
  12   │   signed char _5;
  13   │   int8_t _6;
  14   │   _Bool _11;
  15   │   signed char _12;
  16   │   signed char _13;
  17   │   signed char _14;
  18   │   signed char _22;
  19   │   signed char _23;
  20   │
  21   │   <bb 2> [local count: 1073741822]:
  22   │   x.1_1 = (unsigned char) x_7(D);
  23   │   y.2_2 = (unsigned char) y_8(D);
  24   │   _3 = x.1_1 + y.2_2;
  25   │   sum_9 = (int8_t) _3;
  26   │   _4 = x_7(D) ^ y_8(D);
  27   │   _5 = x_7(D) ^ sum_9;
  28   │   _23 = ~_4;
  29   │   _22 = _5 & _23;
  30   │   if (_22 < 0)
  31   │     goto <bb 3>; [41.00%]
  32   │   else
  33   │     goto <bb 4>; [59.00%]
  34   │
  35   │   <bb 3> [local count: 259738146]:
  36   │   _11 = x_7(D) < 0;
  37   │   _12 = (signed char) _11;
  38   │   _13 = -_12;
  39   │   _14 = _13 ^ 127;
  40   │
  41   │   <bb 4> [local count: 1073741824]:
  42   │   # _6 = PHI <_14(3), sum_9(2)>
  43   │   return _6;
  44   │
  45   │ }

Pan

-----Original Message-----
From: Richard Biener <richard.guent...@gmail.com> 
Sent: Monday, August 5, 2024 7:16 PM
To: Li, Pan2 <pan2...@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@gmail.com; 
jeffreya...@gmail.com; rdapp....@gmail.com
Subject: Re: [PATCH v1] Match: Support form 1 for scalar signed integer .SAT_ADD

On Mon, Aug 5, 2024 at 9:14 AM <pan2...@intel.com> wrote:
>
> From: Pan Li <pan2...@intel.com>
>
> This patch would like to support the form 1 of the scalar signed
> integer .SAT_ADD.  Aka below example:
>
> Form 1:
>   #define DEF_SAT_S_ADD_FMT_1(T)             \
>   T __attribute__((noinline))                \
>   sat_s_add_##T##_fmt_1 (T x, T y)           \
>   {                                          \
>     T min = (T)1u << (sizeof (T) * 8 - 1);   \
>     T max = min - 1;                         \
>     return (x ^ y) < 0                       \
>       ? (T)(x + y)                           \
>       : ((T)(x + y) ^ x) >= 0                \
>         ? (T)(x + y)                         \
>         : x < 0 ? min : max;                 \
>   }
>
> DEF_SAT_S_ADD_FMT_1 (int64_t)
>
> We can tell the difference before and after this patch if backend
> implemented the ssadd<m>3 pattern similar as below.
>
> Before this patch:
>    4   │ __attribute__((noinline))
>    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
>    6   │ {
>    7   │   long int _1;
>    8   │   long int _2;
>    9   │   long int _3;
>   10   │   int64_t _4;
>   11   │   long int _7;
>   12   │   _Bool _9;
>   13   │   long int _10;
>   14   │   long int _11;
>   15   │   long int _12;
>   16   │   long int _13;
>   17   │
>   18   │ ;;   basic block 2, loop depth 0
>   19   │ ;;    pred:       ENTRY
>   20   │   _1 = x_5(D) ^ y_6(D);
>   21   │   _13 = x_5(D) + y_6(D);
>   22   │   _3 = x_5(D) ^ _13;
>   23   │   _2 = ~_1;
>   24   │   _7 = _2 & _3;
>   25   │   if (_7 >= 0)
>   26   │     goto <bb 4>; [59.00%]
>   27   │   else
>   28   │     goto <bb 3>; [41.00%]
>   29   │ ;;    succ:       4
>   30   │ ;;                3
>   31   │
>   32   │ ;;   basic block 3, loop depth 0
>   33   │ ;;    pred:       2
>   34   │   _9 = x_5(D) < 0;
>   35   │   _10 = (long int) _9;
>   36   │   _11 = -_10;
>   37   │   _12 = _11 ^ 9223372036854775807;
>   38   │ ;;    succ:       4
>   39   │
>   40   │ ;;   basic block 4, loop depth 0
>   41   │ ;;    pred:       2
>   42   │ ;;                3
>   43   │   # _4 = PHI <_13(2), _12(3)>
>   44   │   return _4;
>   45   │ ;;    succ:       EXIT
>   46   │
>   47   │ }
>
> After this patch:
>    4   │ __attribute__((noinline))
>    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
>    6   │ {
>    7   │   int64_t _4;
>    8   │
>    9   │ ;;   basic block 2, loop depth 0
>   10   │ ;;    pred:       ENTRY
>   11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
>   12   │   return _4;
>   13   │ ;;    succ:       EXIT
>   14   │
>   15   │ }
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 fully regression test.
>
> gcc/ChangeLog:
>
>         * match.pd: Add the matching for signed .SAT_ADD.
>         * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
>         matching func decl.
>         (match_unsigned_saturation_add): Try signed .SAT_ADD and rename
>         to ...
>         (match_saturation_add): ... here.
>         (math_opts_dom_walker::after_dom_children): Update the above renamed
>         func from caller.
>
> Signed-off-by: Pan Li <pan2...@intel.com>
> ---
>  gcc/match.pd              | 14 +++++++++++++
>  gcc/tree-ssa-math-opts.cc | 42 ++++++++++++++++++++++++++++++++++-----
>  2 files changed, 51 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index c9c8478d286..0a2ffc733d3 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3311,6 +3311,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    }
>    (if (otype_precision < itype_precision && wi::eq_p (trunc_max, 
> int_cst))))))
>
> +/* Signed saturation add, case 1:
> +   T min = (T)1u << (sizeof (T) * 8 - 1);
> +   T max = min - 1;
> +   SAT_S_ADD = (X ^ Y) < 0
> +     ? (X + Y)
> +     : ((T)(X + Y) ^ X) >= 0 ? (X + Y) : X < 0 ? min : max.  */
> +(match (signed_integer_sat_add @0 @1)
> +  (cond^ (ge (bit_and:c (bit_xor @0 (convert? @2)) (bit_not (bit_xor @0 @1)))

This matches arbitrary Z in (X ^ (T)Z) & ~(X ^ Y) which cannot be intended.
The GIMPLE IL in the comment below suggests Z == X + Y?

The convert looks odd to me given @0 is involved in both & operands.
The comment above has the same logic error.

I believe the bit_xor lack :c

> +   integer_zerop)

Please indent this to line up with the first operand of the 'ge' to make it
better readable.

> +   (convert? (plus@2 (convert1? @0) (convert1? @1)))

Same with the converts.  The plus needs :c I think.  Is this about
common sign-conversions being hoisted from (int)x + (int)y -> (int)(x+y)?

Note all the :c and conditional converts makes this a quite heavy pattern
(all combinations of swaps and converts gets code).

> +   (bit_xor (negate (convert (lt @0 integer_zerop))) max_value))
> + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
> +      && types_match (type, @0, @1))))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 8d96a4c964b..d5c9b475f72 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, 
> tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
>
> +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
> +
>  static void
>  build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn 
> fn,
>                                     tree lhs, tree op_0, tree op_1)
> @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator 
> *gsi, gassign *stmt)
>  }
>
>  /*
> - * Try to match saturation unsigned add with PHI.
> + * Try to match saturation add with PHI.
> + * For unsigned integer:
>   *   <bb 2> :
>   *   _1 = x_3(D) + y_4(D);
>   *   if (_1 >= x_3(D))
> @@ -4086,10 +4089,38 @@ match_unsigned_saturation_add (gimple_stmt_iterator 
> *gsi, gassign *stmt)
>   *   # _2 = PHI <255(2), _1(3)>
>   *   =>
>   *   <bb 4> [local count: 1073741824]:
> - *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
> + *   _2 = .SAT_ADD (x_4(D), y_5(D));
> + *
> + * For signed integer:
> + *   _1 = x_5(D) ^ y_6(D);
> + *   _13 = x_5(D) + y_6(D);
> + *   _3 = x_5(D) ^ _13;
> + *   _2 = ~_1;
> + *   _7 = _2 & _3;
> + *   if (_7 >= 0)
> + *     goto <bb 4>; [59.00%]
> + *   else
> + *     goto <bb 3>; [41.00%]
> + *   ;;    succ:       4
> + *   ;;                3
> + *   ;;   basic block 3, loop depth 0
> + *   ;;    pred:       2
> + *   _9 = x_5(D) < 0;
> + *   _10 = (long int) _9;
> + *   _11 = -_10;
> + *   _12 = _11 ^ 9223372036854775807;
> + *   ;;    succ:       4
> + *   ;;   basic block 4, loop depth 0
> + *   ;;    pred:       2
> + *   ;;                3
> + *   # _4 = PHI <_13(2), _12(3)>
> + *   =>
> + *   ;;   basic block 2, loop depth 0
> + *   ;;    pred:       ENTRY
> + *   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]  */
>
>  static void
> -match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> +match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
>  {
>    if (gimple_phi_num_args (phi) != 2)
>      return;
> @@ -4097,7 +4128,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator 
> *gsi, gphi *phi)
>    tree ops[2];
>    tree phi_result = gimple_phi_result (phi);
>
> -  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
> +  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> +      || gimple_signed_integer_sat_add (phi_result, ops, NULL))
>      build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
>                                         ops[0], ops[1]);
>  }
> @@ -6097,7 +6129,7 @@ math_opts_dom_walker::after_dom_children (basic_block 
> bb)
>      gsi_next (&psi))
>      {
>        gimple_stmt_iterator gsi = gsi_after_labels (bb);
> -      match_unsigned_saturation_add (&gsi, psi.phi ());
> +      match_saturation_add (&gsi, psi.phi ());
>        match_unsigned_saturation_sub (&gsi, psi.phi ());
>      }
>
> --
> 2.43.0
>

RE: [PATCH v1] Match: Support form 1 for scalar signed integer .SAT_ADD

Reply via email to