On Tue, Oct 29, 2024 at 9:27 AM <pan2...@intel.com> wrote: > > From: Pan Li <pan2...@intel.com> > > There are sorts of forms for the unsigned SAT_ADD. Some of them are > complicated while others are cheap. This patch would like to simplify > the complicated form into the cheap ones. For example as below: > > From the form 4 (branch): > SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). > > To (branchless): > SAT_U_ADD = (X + Y) | - ((X + Y) < X). > > #define T uint8_t > > T sat_add_u_1 (T x, T y) > { > return (T)(x + y) < x ? -1 : (x + y); > } > > Before this patch: > 1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y) > 2 │ { > 3 │ uint8_t D.2809; > 4 │ > 5 │ _1 = x + y; > 6 │ if (x <= _1) goto <D.2810>; else goto <D.2811>; > 7 │ <D.2810>: > 8 │ D.2809 = x + y; > 9 │ goto <D.2812>; > 10 │ <D.2811>: > 11 │ D.2809 = 255; > 12 │ <D.2812>: > 13 │ return D.2809; > 14 │ } > > After this patch: > 1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y) > 2 │ { > 3 │ uint8_t D.2809; > 4 │ > 5 │ _1 = x + y; > 6 │ _2 = x + y; > 7 │ _3 = x > _2; > 8 │ _4 = (unsigned char) _3; > 9 │ _5 = -_4; > 10 │ D.2809 = _1 | _5; > 11 │ return D.2809; > 12 │ } > > The below test suites are passed for this patch. > * The rv64gcv fully regression test. > * The x86 bootstrap test. > * The x86 fully regression test. > > gcc/ChangeLog: > > * match.pd: Remove unsigned branch form 4 for SAT_ADD, and > add simplify to branchless instead. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c: New test. > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c: New test. > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c: New test. > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c: New test.
You are testing GENERIC folding, so gcc.dg/ is a better location, not tree-ssa/ I wonder if the simplification is already applied by the frontend and thus .original shows the simplified form or only .gimple? Did you check the simplification applies when writing as if (x + y < x) return -1; else return x + y; ? If it doesn't then removing the match is likely premature. I think phiopt should be able to perform the matching here (it does the COND_EXPR building). Thanks, Richard. > Signed-off-by: Pan Li <pan2...@intel.com> > --- > gcc/match.pd | 11 +++++++---- > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c | 15 +++++++++++++++ > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c | 15 +++++++++++++++ > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c | 15 +++++++++++++++ > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c | 15 +++++++++++++++ > 5 files changed, 67 insertions(+), 4 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index 809c717bc86..4d1143b6ec3 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3154,10 +3154,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > && types_match (type, @0, @1)) > (bit_ior @2 (negate (convert (lt @2 @0)))))) > > -/* Unsigned saturation add, case 4 (branch with lt): > - SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */ > -(match (unsigned_integer_sat_add @0 @1) > - (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2)) > +/* Simplify SAT_U_ADD to the cheap form > + From: SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). > + To: SAT_U_ADD = (X + Y) | - ((X + Y) < X). */ > +(simplify (cond (lt (plus:c@2 @0 @1) @0) integer_minus_onep @2) > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > + && types_match (type, @0, @1)) > + (bit_ior @2 (negate (convert (lt @2 @0)))))) > > /* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW): > SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1. */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > new file mode 100644 > index 00000000000..6e327f58d46 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > + > +#include <stdint.h> > + > +#define T uint16_t > + > +T sat_add_u_1 (T x, T y) > +{ > + return (T)(x + y) < x ? -1 : (x + y); > +} > + > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > new file mode 100644 > index 00000000000..17b09396b69 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > + > +#include <stdint.h> > + > +#define T uint32_t > + > +T sat_add_u_1 (T x, T y) > +{ > + return (T)(x + y) < x ? -1 : (x + y); > +} > + > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > new file mode 100644 > index 00000000000..8db9b427a65 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > + > +#include <stdint.h> > + > +#define T uint64_t > + > +T sat_add_u_1 (T x, T y) > +{ > + return (T)(x + y) < x ? -1 : (x + y); > +} > + > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > new file mode 100644 > index 00000000000..4e86ef9a5fd > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > + > +#include <stdint.h> > + > +#define T uint8_t > + > +T sat_add_u_1 (T x, T y) > +{ > + return (T)(x + y) < x ? -1 : (x + y); > +} > + > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > -- > 2.43.0 >