On Mon, May 19, 2025 at 10:42 AM Li Xu <xu...@eswincomputing.com> wrote:
>
> From: xuli <xu...@eswincomputing.com>
>
> This patch would like to support vector SAT_ADD when one of the op
> is singed IMM.
>
> void __attribute__((noinline))                                       \
> vec_sat_s_add_imm_##T##_fmt_1##_##INDEX (T *out, T *op_1, unsigned limit) \
> {                                                                    \
>   unsigned i;                                                        \
>   for (i = 0; i < limit; i++)                                        \
>     {                                                                \
>       T x = op_1[i];                                                 \
>       T sum = (UT)x + (UT)IMM;                                       \
>       out[i] = (x ^ IMM) < 0                                         \
>         ? sum                                                        \
>         : (sum ^ x) >= 0                                             \
>           ? sum                                                      \
>           : x < 0 ? MIN : MAX;                                       \
>     }                                                                \
> }
>
> Take below form1 as example:
> DEF_VEC_SAT_S_ADD_IMM_FMT_1(0, int8_t, uint8_t, 9, INT8_MIN, INT8_MAX)
>
> Before this patch:
> __attribute__((noinline))
> void vec_sat_s_add_imm_int8_t_fmt_1_0 (int8_t * restrict out, int8_t * 
> restrict op_1, unsigned int limit)
> {
>   vector([16,16]) signed char * vectp_out.28;
>   vector([16,16]) signed char vect_iftmp.27;
>   vector([16,16]) <signed-boolean:1> mask__28.26;
>   vector([16,16]) <signed-boolean:1> mask__29.25;
>   vector([16,16]) <signed-boolean:1> mask__19.19;
>   vector([16,16]) <signed-boolean:1> mask__31.18;
>   vector([16,16]) signed char vect__6.17;
>   vector([16,16]) signed char vect__5.16;
>   vector([16,16]) signed char vect_sum_15.15;
>   vector([16,16]) unsigned char vect__4.14;
>   vector([16,16]) unsigned char vect_x.13;
>   vector([16,16]) signed char vect_x_14.12;
>   vector([16,16]) signed char * vectp_op_1.10;
>   vector([16,16]) <signed-boolean:1> _78;
>   vector([16,16]) unsigned char _79;
>   vector([16,16]) unsigned char _80;
>   unsigned long _92;
>   unsigned long ivtmp_93;
>   unsigned long ivtmp_94;
>   unsigned long _95;
>
>   <bb 2> [local count: 118111598]:
>   if (limit_12(D) != 0)
>     goto <bb 3>; [89.00%]
>   else
>     goto <bb 5>; [11.00%]
>
>   <bb 3> [local count: 105119322]:
>   _92 = (unsigned long) limit_12(D);
>
>   <bb 4> [local count: 955630226]:
>   # vectp_op_1.10_62 = PHI <vectp_op_1.10_63(4), op_1_13(D)(3)>
>   # vectp_out.28_89 = PHI <vectp_out.28_90(4), out_16(D)(3)>
>   # ivtmp_93 = PHI <ivtmp_94(4), _92(3)>
>   _95 = .SELECT_VL (ivtmp_93, POLY_INT_CST [16, 16]);
>   vect_x_14.12_64 = .MASK_LEN_LOAD (vectp_op_1.10_62, 8B, { -1, ... }, _95, 
> 0);
>   vect_x.13_65 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned 
> char>(vect_x_14.12_64);
>   vect__4.14_67 = vect_x.13_65 + { 9, ... };
>   vect_sum_15.15_68 = VIEW_CONVERT_EXPR<vector([16,16]) signed 
> char>(vect__4.14_67);
>   vect__5.16_70 = vect_x_14.12_64 ^ { 9, ... };
>   vect__6.17_71 = vect_x_14.12_64 ^ vect_sum_15.15_68;
>   mask__31.18_73 = vect__5.16_70 >= { 0, ... };
>   mask__19.19_75 = vect_x_14.12_64 < { 0, ... };
>   mask__29.25_85 = vect__6.17_71 < { 0, ... };
>   mask__28.26_86 = mask__31.18_73 & mask__29.25_85;
>   _78 = ~mask__28.26_86;
>   _79 = .VCOND_MASK (mask__19.19_75, { 128, ... }, { 127, ... });
>   _80 = .COND_ADD (_78, vect_x.13_65, { 9, ... }, _79);
>   vect_iftmp.27_87 = VIEW_CONVERT_EXPR<vector([16,16]) signed char>(_80);
>   .MASK_LEN_STORE (vectp_out.28_89, 8B, { -1, ... }, _95, 0, 
> vect_iftmp.27_87);
>   vectp_op_1.10_63 = vectp_op_1.10_62 + _95;
>   vectp_out.28_90 = vectp_out.28_89 + _95;
>   ivtmp_94 = ivtmp_93 - _95;
>   if (ivtmp_94 != 0)
>     goto <bb 4>; [89.00%]
>   else
>     goto <bb 5>; [11.00%]
>
>   <bb 5> [local count: 118111600]:
>   return;
>
> }
>
> After this patch:
> __attribute__((noinline))
> void vec_sat_s_add_imm_int8_t_fmt_1_0 (int8_t * restrict out, int8_t * 
> restrict op_1, unsigned int limit)
> {
>   vector([16,16]) signed char * vectp_out.12;
>   vector([16,16]) signed char vect_patt_10.11;
>   vector([16,16]) signed char vect_x_14.10;
>   vector([16,16]) signed char D.2852;
>   vector([16,16]) signed char * vectp_op_1.8;
>   vector([16,16]) signed char _73(D);
>   unsigned long _80;
>   unsigned long ivtmp_81;
>   unsigned long ivtmp_82;
>   unsigned long _83;
>
>   <bb 2> [local count: 118111598]:
>   if (limit_12(D) != 0)
>     goto <bb 3>; [89.00%]
>   else
>     goto <bb 5>; [11.00%]
>
>   <bb 3> [local count: 105119322]:
>   _80 = (unsigned long) limit_12(D);
>
>   <bb 4> [local count: 955630226]:
>   # vectp_op_1.8_71 = PHI <vectp_op_1.8_72(4), op_1_13(D)(3)>
>   # vectp_out.12_77 = PHI <vectp_out.12_78(4), out_16(D)(3)>
>   # ivtmp_81 = PHI <ivtmp_82(4), _80(3)>
>   _83 = .SELECT_VL (ivtmp_81, POLY_INT_CST [16, 16]);
>   vect_x_14.10_74 = .MASK_LEN_LOAD (vectp_op_1.8_71, 8B, { -1, ... }, _73(D), 
> _83, 0);
>   vect_patt_10.11_75 = .SAT_ADD (vect_x_14.10_74, { 9, ... });
>   .MASK_LEN_STORE (vectp_out.12_77, 8B, { -1, ... }, _83, 0, 
> vect_patt_10.11_75);
>   vectp_op_1.8_72 = vectp_op_1.8_71 + _83;
>   vectp_out.12_78 = vectp_out.12_77 + _83;
>   ivtmp_82 = ivtmp_81 - _83;
>   if (ivtmp_82 != 0)
>     goto <bb 4>; [89.00%]
>   else
>     goto <bb 5>; [11.00%]
>
>   <bb 5> [local count: 118111600]:
>   return;
>
> }
>
> The below test suites are passed for this patch:
> 1. The rv64gcv fully regression tests.
> 2. The x86 bootstrap tests.
> 3. The x86 fully regression tests.

OK.

Thanks,
Richard.

> Signed-off-by: Li Xu <xu...@eswincomputing.com>
> gcc/ChangeLog:
>
>         * match.pd: add singned vector SAT_ADD IMM form1 matching.
>
> ---
>  gcc/match.pd | 10 ++++++++++
>  1 file changed, 10 insertions(+)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index d83fd696ada..98411af3940 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3369,6 +3369,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>                     (ge (bit_xor:c @0 @1) integer_zerop))
>          (signed_integer_sat_val @0)
>          @2))
> + (match (signed_integer_sat_add @0 @1)
> +  /* T SUM = (T)((UT)X + (UT)IMM)
> +     SAT_S_ADD = (X ^ SUM) < 0 && (X ^ IMM) >= 0 ? (-(T)(X < 0) ^ MAX) : SUM 
>  */
> +  (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (plus (nop_convert @0)
> +                                                        INTEGER_CST@1)))
> +                       integer_zerop)
> +                   (ge (bit_xor:c @0 INTEGER_CST@3) integer_zerop))
> +        (signed_integer_sat_val @0)
> +        @2)
> +  (if (wi::eq_p (wi::to_wide (@1), wi::to_wide (@3)))))
>   (match (signed_integer_sat_add @0 @1)
>     /* SUM = .ADD_OVERFLOW (X, Y)
>        SAT_S_ADD = IMAGPART_EXPR (SUM) != 0 ? (-(T)(X < 0) ^ MAX) : SUM  */
> --
> 2.17.1
>

Reply via email to