Re: [PATCH v1 1/4] Match: Support form 1 for vector signed integer SAT_SUB

Richard Biener Fri, 11 Oct 2024 02:10:21 -0700

On Fri, Oct 11, 2024 at 8:24 AM <pan2...@intel.com> wrote:
>
> From: Pan Li <pan2...@intel.com>
>
> This patch would like to support the form 1 of the vector signed
> integer SAT_SUB.  Aka below example:
>
> Form 1:
>   #define DEF_VEC_SAT_S_SUB_FMT_1(T, UT, MIN, MAX)                     \
>   void __attribute__((noinline))                                       \
>   vec_sat_s_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
>   {                                                                    \
>     unsigned i;                                                        \
>     for (i = 0; i < limit; i++)                                        \
>       {                                                                \
>         T x = op_1[i];                                                 \
>         T y = op_2[i];                                                 \
>         T minus = (UT)x - (UT)y;                                       \
>         out[i] = (x ^ y) >= 0                                          \
>           ? minus                                                      \
>           : (minus ^ x) >= 0                                           \
>             ? minus                                                    \
>             : x < 0 ? MIN : MAX;                                       \
>       }                                                                \
>   }
>
> DEF_VEC_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)
>
> Before this patch:
>   91   │   _108 = .SELECT_VL (ivtmp_106, POLY_INT_CST [16, 16]);
>   92   │   vect_x_16.11_80 = .MASK_LEN_LOAD (vectp_op_1.9_78, 8B, { -1, ... 
> }, _108, 0);
>   93   │   _69 = vect_x_16.11_80 >> 7;
>   94   │   vect_x.12_81 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned 
> char>(vect_x_16.11_80);
>   95   │   vect_y_18.15_85 = .MASK_LEN_LOAD (vectp_op_2.13_83, 8B, { -1, ... 
> }, _108, 0);
>   96   │   vect__7.21_91 = vect_x_16.11_80 ^ vect_y_18.15_85;
>   97   │   mask__44.22_92 = vect__7.21_91 < { 0, ... };
>   98   │   vect_y.16_86 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned 
> char>(vect_y_18.15_85);
>   99   │   vect__6.17_87 = vect_x.12_81 - vect_y.16_86;
>  100   │   vect_minus_19.18_88 = VIEW_CONVERT_EXPR<vector([16,16]) signed 
> char>(vect__6.17_87);
>  101   │   vect__8.19_89 = vect_x_16.11_80 ^ vect_minus_19.18_88;
>  102   │   mask__42.20_90 = vect__8.19_89 < { 0, ... };
>  103   │   mask__41.23_93 = mask__42.20_90 & mask__44.22_92;
>  104   │   _4 = .COND_XOR (mask__41.23_93, _69, { 127, ... }, 
> vect_minus_19.18_88);
>  105   │   .MASK_LEN_STORE (vectp_out.31_102, 8B, { -1, ... }, _108, 0, _4);
>  106   │   vectp_op_1.9_79 = vectp_op_1.9_78 + _108;
>  107   │   vectp_op_2.13_84 = vectp_op_2.13_83 + _108;
>  108   │   vectp_out.31_103 = vectp_out.31_102 + _108;
>  109   │   ivtmp_107 = ivtmp_106 - _108;
>
> After this patch:
>   81   │   _102 = .SELECT_VL (ivtmp_100, POLY_INT_CST [16, 16]);
>   82   │   vect_x_16.11_89 = .MASK_LEN_LOAD (vectp_op_1.9_87, 8B, { -1, ... 
> }, _102, 0);
>   83   │   vect_y_18.14_93 = .MASK_LEN_LOAD (vectp_op_2.12_91, 8B, { -1, ... 
> }, _102, 0);
>   84   │   vect_patt_38.15_94 = .SAT_SUB (vect_x_16.11_89, vect_y_18.14_93);
>   85   │   .MASK_LEN_STORE (vectp_out.16_96, 8B, { -1, ... }, _102, 0, 
> vect_patt_38.15_94);
>   86   │   vectp_op_1.9_88 = vectp_op_1.9_87 + _102;
>   87   │   vectp_op_2.12_92 = vectp_op_2.12_91 + _102;
>   88   │   vectp_out.16_97 = vectp_out.16_96 + _102;
>   89   │   ivtmp_101 = ivtmp_100 - _102;
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 fully regression test.


OK.

I wonder since we now can match many different variants of writing
signed and unsigned
saturation add and sub whether it makes sense to canonicalize to the "cheapest"
variant when the target doesn't support .SAT_SUB/ADD?  Are there any
"sub-patterns"
not forming the full saturation add/sub that can be
simplified/canonicalized in such
way maybe?

> gcc/ChangeLog:
>
>         * match.pd: Add case 1 matching pattern for vector signed SAT_SUB.
>
> Signed-off-by: Pan Li <pan2...@intel.com>
> ---
>  gcc/match.pd | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 8a7569ce387..a3c298d3a22 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3401,6 +3401,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* Signed saturation sub, case 4:
> +   T minus = (T)((UT)X - (UT)Y);
> +   SAT_S_SUB = (X ^ Y) < 0 & (X ^ minus) < 0 ? (-(T)(X < 0) ^ MAX) : minus;
> +
> +   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
> +(match (signed_integer_sat_sub @0 @1)
> + (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (minus (nop_convert @0)
> +                                                        (nop_convert @1))))
> +                      integer_zerop)
> +                  (lt (bit_xor:c @0 @1) integer_zerop))
> +       (bit_xor:c (nop_convert (negate (nop_convert (convert
> +                                                     (lt @0 
> integer_zerop)))))
> +                  max_value)
> +       @2)
> + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type))))
> +
>  /* Unsigned saturation truncate, case 1, sizeof (WT) > sizeof (NT).
>     SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))).  */
>  (match (unsigned_integer_sat_trunc @0)
> --
> 2.43.0
>

Re: [PATCH v1 1/4] Match: Support form 1 for vector signed integer SAT_SUB

Reply via email to