On Fri, Oct 11, 2024 at 8:24 AM <pan2...@intel.com> wrote: > > From: Pan Li <pan2...@intel.com> > > This patch would like to support the form 1 of the vector signed > integer SAT_SUB. Aka below example: > > Form 1: > #define DEF_VEC_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \ > void __attribute__((noinline)) \ > vec_sat_s_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \ > { \ > unsigned i; \ > for (i = 0; i < limit; i++) \ > { \ > T x = op_1[i]; \ > T y = op_2[i]; \ > T minus = (UT)x - (UT)y; \ > out[i] = (x ^ y) >= 0 \ > ? minus \ > : (minus ^ x) >= 0 \ > ? minus \ > : x < 0 ? MIN : MAX; \ > } \ > } > > DEF_VEC_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX) > > Before this patch: > 91 │ _108 = .SELECT_VL (ivtmp_106, POLY_INT_CST [16, 16]); > 92 │ vect_x_16.11_80 = .MASK_LEN_LOAD (vectp_op_1.9_78, 8B, { -1, ... > }, _108, 0); > 93 │ _69 = vect_x_16.11_80 >> 7; > 94 │ vect_x.12_81 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned > char>(vect_x_16.11_80); > 95 │ vect_y_18.15_85 = .MASK_LEN_LOAD (vectp_op_2.13_83, 8B, { -1, ... > }, _108, 0); > 96 │ vect__7.21_91 = vect_x_16.11_80 ^ vect_y_18.15_85; > 97 │ mask__44.22_92 = vect__7.21_91 < { 0, ... }; > 98 │ vect_y.16_86 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned > char>(vect_y_18.15_85); > 99 │ vect__6.17_87 = vect_x.12_81 - vect_y.16_86; > 100 │ vect_minus_19.18_88 = VIEW_CONVERT_EXPR<vector([16,16]) signed > char>(vect__6.17_87); > 101 │ vect__8.19_89 = vect_x_16.11_80 ^ vect_minus_19.18_88; > 102 │ mask__42.20_90 = vect__8.19_89 < { 0, ... }; > 103 │ mask__41.23_93 = mask__42.20_90 & mask__44.22_92; > 104 │ _4 = .COND_XOR (mask__41.23_93, _69, { 127, ... }, > vect_minus_19.18_88); > 105 │ .MASK_LEN_STORE (vectp_out.31_102, 8B, { -1, ... }, _108, 0, _4); > 106 │ vectp_op_1.9_79 = vectp_op_1.9_78 + _108; > 107 │ vectp_op_2.13_84 = vectp_op_2.13_83 + _108; > 108 │ vectp_out.31_103 = vectp_out.31_102 + _108; > 109 │ ivtmp_107 = ivtmp_106 - _108; > > After this patch: > 81 │ _102 = .SELECT_VL (ivtmp_100, POLY_INT_CST [16, 16]); > 82 │ vect_x_16.11_89 = .MASK_LEN_LOAD (vectp_op_1.9_87, 8B, { -1, ... > }, _102, 0); > 83 │ vect_y_18.14_93 = .MASK_LEN_LOAD (vectp_op_2.12_91, 8B, { -1, ... > }, _102, 0); > 84 │ vect_patt_38.15_94 = .SAT_SUB (vect_x_16.11_89, vect_y_18.14_93); > 85 │ .MASK_LEN_STORE (vectp_out.16_96, 8B, { -1, ... }, _102, 0, > vect_patt_38.15_94); > 86 │ vectp_op_1.9_88 = vectp_op_1.9_87 + _102; > 87 │ vectp_op_2.12_92 = vectp_op_2.12_91 + _102; > 88 │ vectp_out.16_97 = vectp_out.16_96 + _102; > 89 │ ivtmp_101 = ivtmp_100 - _102; > > The below test suites are passed for this patch. > * The rv64gcv fully regression test. > * The x86 bootstrap test. > * The x86 fully regression test.
OK. I wonder since we now can match many different variants of writing signed and unsigned saturation add and sub whether it makes sense to canonicalize to the "cheapest" variant when the target doesn't support .SAT_SUB/ADD? Are there any "sub-patterns" not forming the full saturation add/sub that can be simplified/canonicalized in such way maybe? > gcc/ChangeLog: > > * match.pd: Add case 1 matching pattern for vector signed SAT_SUB. > > Signed-off-by: Pan Li <pan2...@intel.com> > --- > gcc/match.pd | 16 ++++++++++++++++ > 1 file changed, 16 insertions(+) > > diff --git a/gcc/match.pd b/gcc/match.pd > index 8a7569ce387..a3c298d3a22 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3401,6 +3401,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) > && types_match (type, @0, @1)))) > > +/* Signed saturation sub, case 4: > + T minus = (T)((UT)X - (UT)Y); > + SAT_S_SUB = (X ^ Y) < 0 & (X ^ minus) < 0 ? (-(T)(X < 0) ^ MAX) : minus; > + > + The T and UT are type pair like T=int8_t, UT=uint8_t. */ > +(match (signed_integer_sat_sub @0 @1) > + (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (minus (nop_convert @0) > + (nop_convert @1)))) > + integer_zerop) > + (lt (bit_xor:c @0 @1) integer_zerop)) > + (bit_xor:c (nop_convert (negate (nop_convert (convert > + (lt @0 > integer_zerop))))) > + max_value) > + @2) > + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)))) > + > /* Unsigned saturation truncate, case 1, sizeof (WT) > sizeof (NT). > SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))). */ > (match (unsigned_integer_sat_trunc @0) > -- > 2.43.0 >