On Fri, Jul 26, 2024 at 11:20 AM <pan2...@intel.com> wrote: > > From: Pan Li <pan2...@intel.com> > > This patch would like to support .SAT_SUB when one of the op > is IMM. Aka below 1-4 forms. > > Form 1: > #define DEF_SAT_U_SUB_IMM_FMT_1(T, IMM) \ > T __attribute__((noinline)) \ > sat_u_sub_imm##IMM##_##T##_fmt_1 (T y) \ > { \ > return IMM >= y ? IMM - y : 0; \ > } > > Form 2: > #define DEF_SAT_U_SUB_IMM_FMT_2(T, IMM) \ > T __attribute__((noinline)) \ > sat_u_sub_imm##IMM##_##T##_fmt_2 (T y) \ > { \ > return IMM > y ? IMM - y : 0; \ > } > > Form 3: > #define DEF_SAT_U_SUB_IMM_FMT_3(T, IMM) \ > T __attribute__((noinline)) \ > sat_u_sub_imm##IMM##_##T##_fmt_3 (T x) \ > { \ > return x >= IMM ? x - IMM : 0; \ > } > > Form 4: > #define DEF_SAT_U_SUB_IMM_FMT_4(T, IMM) \ > T __attribute__((noinline)) \ > sat_u_sub_imm##IMM##_##T##_fmt_4 (T x) \ > { \ > return x > IMM ? x - IMM : 0; \ > } > > Take below form 1 as example: > > DEF_SAT_U_SUB_OP0_IMM_FMT_1(uint32_t, 11) > > Before this patch: > 4 │ __attribute__((noinline)) > 5 │ uint64_t sat_u_sub_imm11_uint64_t_fmt_1 (uint64_t y) > 6 │ { > 7 │ uint64_t _1; > 8 │ uint64_t _3; > 9 │ > 10 │ ;; basic block 2, loop depth 0 > 11 │ ;; pred: ENTRY > 12 │ if (y_2(D) <= 11) > 13 │ goto <bb 3>; [50.00%] > 14 │ else > 15 │ goto <bb 4>; [50.00%] > 16 │ ;; succ: 3 > 17 │ ;; 4 > 18 │ > 19 │ ;; basic block 3, loop depth 0 > 20 │ ;; pred: 2 > 21 │ _3 = 11 - y_2(D); > 22 │ ;; succ: 4 > 23 │ > 24 │ ;; basic block 4, loop depth 0 > 25 │ ;; pred: 2 > 26 │ ;; 3 > 27 │ # _1 = PHI <0(2), _3(3)> > 28 │ return _1; > 29 │ ;; succ: EXIT > 30 │ > 31 │ } > > After this patch: > 4 │ __attribute__((noinline)) > 5 │ uint64_t sat_u_sub_imm11_uint64_t_fmt_1 (uint64_t y) > 6 │ { > 7 │ uint64_t _1; > 8 │ > 9 │ ;; basic block 2, loop depth 0 > 10 │ ;; pred: ENTRY > 11 │ _1 = .SAT_SUB (11, y_2(D)); [tail call] > 12 │ return _1; > 13 │ ;; succ: EXIT > 14 │ > 15 │ } > > The below test suites are passed for this patch: > 1. The rv64gcv fully regression tests. > 2. The x86 bootstrap tests. > 3. The x86 fully regression tests.
OK. Thanks, Richard. > gcc/ChangeLog: > > * match.pd: Add case 9 and case 10 for .SAT_SUB when one > of the op is IMM. > > Signed-off-by: Pan Li <pan2...@intel.com> > --- > gcc/match.pd | 35 +++++++++++++++++++++++++++++++++++ > 1 file changed, 35 insertions(+) > > diff --git a/gcc/match.pd b/gcc/match.pd > index cf359b0ec0f..b2e7d61790d 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3234,6 +3234,41 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > && types_match (type, @0, @1)))) > > +/* Unsigned saturation sub with op_0 imm, case 9 (branch with gt): > + SAT_U_SUB = IMM > Y ? (IMM - Y) : 0. > + = IMM >= Y ? (IMM - Y) : 0. */ > +(match (unsigned_integer_sat_sub @0 @1) > + (cond^ (le @1 INTEGER_CST@2) (minus INTEGER_CST@0 @1) integer_zerop) > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > + && types_match (type, @1)) > + (with > + { > + unsigned precision = TYPE_PRECISION (type); > + wide_int max = wi::mask (precision, false, precision); > + wide_int c0 = wi::to_wide (@0); > + wide_int c2 = wi::to_wide (@2); > + wide_int c2_add_1 = wi::add (c2, wi::uhwi (1, precision)); > + bool equal_p = wi::eq_p (c0, c2); > + bool less_than_1_p = !wi::eq_p (c2, max) && wi::eq_p (c2_add_1, c0); > + } > + (if (equal_p || less_than_1_p))))) > + > +/* Unsigned saturation sub with op_1 imm, case 10: > + SAT_U_SUB = X > IMM ? (X - IMM) : 0. > + = X >= IMM ? (X - IMM) : 0. */ > +(match (unsigned_integer_sat_sub @0 @1) > + (plus (max @0 INTEGER_CST@1) INTEGER_CST@2) > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > + && types_match (type, @1)) > + (with > + { > + unsigned precision = TYPE_PRECISION (type); > + wide_int c1 = wi::to_wide (@1); > + wide_int c2 = wi::to_wide (@2); > + wide_int sum = wi::add (c1, c2); > + } > + (if (wi::eq_p (sum, wi::uhwi (0, precision))))))) > + > /* Unsigned saturation truncate, case 1, sizeof (WT) > sizeof (NT). > SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))). */ > (match (unsigned_integer_sat_trunc @0) > -- > 2.34.1 >