On Mon, May 19, 2025 at 10:42 AM Li Xu <xu...@eswincomputing.com> wrote: > > From: xuli <xu...@eswincomputing.com> > > This patch would like to support vector SAT_ADD when one of the op > is singed IMM. > > void __attribute__((noinline)) \ > vec_sat_s_add_imm_##T##_fmt_1##_##INDEX (T *out, T *op_1, unsigned limit) \ > { \ > unsigned i; \ > for (i = 0; i < limit; i++) \ > { \ > T x = op_1[i]; \ > T sum = (UT)x + (UT)IMM; \ > out[i] = (x ^ IMM) < 0 \ > ? sum \ > : (sum ^ x) >= 0 \ > ? sum \ > : x < 0 ? MIN : MAX; \ > } \ > } > > Take below form1 as example: > DEF_VEC_SAT_S_ADD_IMM_FMT_1(0, int8_t, uint8_t, 9, INT8_MIN, INT8_MAX) > > Before this patch: > __attribute__((noinline)) > void vec_sat_s_add_imm_int8_t_fmt_1_0 (int8_t * restrict out, int8_t * > restrict op_1, unsigned int limit) > { > vector([16,16]) signed char * vectp_out.28; > vector([16,16]) signed char vect_iftmp.27; > vector([16,16]) <signed-boolean:1> mask__28.26; > vector([16,16]) <signed-boolean:1> mask__29.25; > vector([16,16]) <signed-boolean:1> mask__19.19; > vector([16,16]) <signed-boolean:1> mask__31.18; > vector([16,16]) signed char vect__6.17; > vector([16,16]) signed char vect__5.16; > vector([16,16]) signed char vect_sum_15.15; > vector([16,16]) unsigned char vect__4.14; > vector([16,16]) unsigned char vect_x.13; > vector([16,16]) signed char vect_x_14.12; > vector([16,16]) signed char * vectp_op_1.10; > vector([16,16]) <signed-boolean:1> _78; > vector([16,16]) unsigned char _79; > vector([16,16]) unsigned char _80; > unsigned long _92; > unsigned long ivtmp_93; > unsigned long ivtmp_94; > unsigned long _95; > > <bb 2> [local count: 118111598]: > if (limit_12(D) != 0) > goto <bb 3>; [89.00%] > else > goto <bb 5>; [11.00%] > > <bb 3> [local count: 105119322]: > _92 = (unsigned long) limit_12(D); > > <bb 4> [local count: 955630226]: > # vectp_op_1.10_62 = PHI <vectp_op_1.10_63(4), op_1_13(D)(3)> > # vectp_out.28_89 = PHI <vectp_out.28_90(4), out_16(D)(3)> > # ivtmp_93 = PHI <ivtmp_94(4), _92(3)> > _95 = .SELECT_VL (ivtmp_93, POLY_INT_CST [16, 16]); > vect_x_14.12_64 = .MASK_LEN_LOAD (vectp_op_1.10_62, 8B, { -1, ... }, _95, > 0); > vect_x.13_65 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned > char>(vect_x_14.12_64); > vect__4.14_67 = vect_x.13_65 + { 9, ... }; > vect_sum_15.15_68 = VIEW_CONVERT_EXPR<vector([16,16]) signed > char>(vect__4.14_67); > vect__5.16_70 = vect_x_14.12_64 ^ { 9, ... }; > vect__6.17_71 = vect_x_14.12_64 ^ vect_sum_15.15_68; > mask__31.18_73 = vect__5.16_70 >= { 0, ... }; > mask__19.19_75 = vect_x_14.12_64 < { 0, ... }; > mask__29.25_85 = vect__6.17_71 < { 0, ... }; > mask__28.26_86 = mask__31.18_73 & mask__29.25_85; > _78 = ~mask__28.26_86; > _79 = .VCOND_MASK (mask__19.19_75, { 128, ... }, { 127, ... }); > _80 = .COND_ADD (_78, vect_x.13_65, { 9, ... }, _79); > vect_iftmp.27_87 = VIEW_CONVERT_EXPR<vector([16,16]) signed char>(_80); > .MASK_LEN_STORE (vectp_out.28_89, 8B, { -1, ... }, _95, 0, > vect_iftmp.27_87); > vectp_op_1.10_63 = vectp_op_1.10_62 + _95; > vectp_out.28_90 = vectp_out.28_89 + _95; > ivtmp_94 = ivtmp_93 - _95; > if (ivtmp_94 != 0) > goto <bb 4>; [89.00%] > else > goto <bb 5>; [11.00%] > > <bb 5> [local count: 118111600]: > return; > > } > > After this patch: > __attribute__((noinline)) > void vec_sat_s_add_imm_int8_t_fmt_1_0 (int8_t * restrict out, int8_t * > restrict op_1, unsigned int limit) > { > vector([16,16]) signed char * vectp_out.12; > vector([16,16]) signed char vect_patt_10.11; > vector([16,16]) signed char vect_x_14.10; > vector([16,16]) signed char D.2852; > vector([16,16]) signed char * vectp_op_1.8; > vector([16,16]) signed char _73(D); > unsigned long _80; > unsigned long ivtmp_81; > unsigned long ivtmp_82; > unsigned long _83; > > <bb 2> [local count: 118111598]: > if (limit_12(D) != 0) > goto <bb 3>; [89.00%] > else > goto <bb 5>; [11.00%] > > <bb 3> [local count: 105119322]: > _80 = (unsigned long) limit_12(D); > > <bb 4> [local count: 955630226]: > # vectp_op_1.8_71 = PHI <vectp_op_1.8_72(4), op_1_13(D)(3)> > # vectp_out.12_77 = PHI <vectp_out.12_78(4), out_16(D)(3)> > # ivtmp_81 = PHI <ivtmp_82(4), _80(3)> > _83 = .SELECT_VL (ivtmp_81, POLY_INT_CST [16, 16]); > vect_x_14.10_74 = .MASK_LEN_LOAD (vectp_op_1.8_71, 8B, { -1, ... }, _73(D), > _83, 0); > vect_patt_10.11_75 = .SAT_ADD (vect_x_14.10_74, { 9, ... }); > .MASK_LEN_STORE (vectp_out.12_77, 8B, { -1, ... }, _83, 0, > vect_patt_10.11_75); > vectp_op_1.8_72 = vectp_op_1.8_71 + _83; > vectp_out.12_78 = vectp_out.12_77 + _83; > ivtmp_82 = ivtmp_81 - _83; > if (ivtmp_82 != 0) > goto <bb 4>; [89.00%] > else > goto <bb 5>; [11.00%] > > <bb 5> [local count: 118111600]: > return; > > } > > The below test suites are passed for this patch: > 1. The rv64gcv fully regression tests. > 2. The x86 bootstrap tests. > 3. The x86 fully regression tests.
OK. Thanks, Richard. > Signed-off-by: Li Xu <xu...@eswincomputing.com> > gcc/ChangeLog: > > * match.pd: add singned vector SAT_ADD IMM form1 matching. > > --- > gcc/match.pd | 10 ++++++++++ > 1 file changed, 10 insertions(+) > > diff --git a/gcc/match.pd b/gcc/match.pd > index d83fd696ada..98411af3940 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3369,6 +3369,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (ge (bit_xor:c @0 @1) integer_zerop)) > (signed_integer_sat_val @0) > @2)) > + (match (signed_integer_sat_add @0 @1) > + /* T SUM = (T)((UT)X + (UT)IMM) > + SAT_S_ADD = (X ^ SUM) < 0 && (X ^ IMM) >= 0 ? (-(T)(X < 0) ^ MAX) : SUM > */ > + (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (plus (nop_convert @0) > + INTEGER_CST@1))) > + integer_zerop) > + (ge (bit_xor:c @0 INTEGER_CST@3) integer_zerop)) > + (signed_integer_sat_val @0) > + @2) > + (if (wi::eq_p (wi::to_wide (@1), wi::to_wide (@3))))) > (match (signed_integer_sat_add @0 @1) > /* SUM = .ADD_OVERFLOW (X, Y) > SAT_S_ADD = IMAGPART_EXPR (SUM) != 0 ? (-(T)(X < 0) ^ MAX) : SUM */ > -- > 2.17.1 >