From: Pan Li <pan2...@intel.com> There are sorts of forms for the unsigned SAT_ADD. Some of them are complicated while others are cheap. This patch would like to simplify the complicated form into the cheap ones. For example as below:
>From the form 7 (branch): SAT_U_ADD = x <= (T)(x + y) ? (x + y) : -1. To (branchless): SAT_U_ADD = (X + Y) | - ((X + Y) < X). #define T uint8_t T sat_add_u_1 (T x, T y) { return x <= (T)(x + y) ? (x + y) : -1; } Before this patch: 1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y) 2 │ { 3 │ uint8_t D.2809; 4 │ 5 │ _1 = x + y; 6 │ if (x <= _1) goto <D.2810>; else goto <D.2811>; 7 │ <D.2810>: 8 │ D.2809 = x + y; 9 │ goto <D.2812>; 10 │ <D.2811>: 11 │ D.2809 = 255; 12 │ <D.2812>: 13 │ return D.2809; 14 │ } After this patch: 1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y) 2 │ { 3 │ uint8_t D.2809; 4 │ 5 │ _1 = x + y; 6 │ _2 = x + y; 7 │ _3 = x > _2; 8 │ _4 = (unsigned char) _3; 9 │ _5 = -_4; 10 │ D.2809 = _1 | _5; 11 │ return D.2809; 12 │ } The simplify doesn't need to check if target support the SAT_ADD, it is somehow the optimization in gimple level. The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Remove unsigned branch form 7 for SAT_ADD, and add simplify to branchless instead. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/sat_u_add-simplify-3-u16.c: New test. * gcc.dg/tree-ssa/sat_u_add-simplify-3-u32.c: New test. * gcc.dg/tree-ssa/sat_u_add-simplify-3-u64.c: New test. * gcc.dg/tree-ssa/sat_u_add-simplify-3-u8.c: New test. Signed-off-by: Pan Li <pan2...@intel.com> --- gcc/match.pd | 13 ++++++++----- .../gcc.dg/tree-ssa/sat_u_add-simplify-3-u16.c | 15 +++++++++++++++ .../gcc.dg/tree-ssa/sat_u_add-simplify-3-u32.c | 15 +++++++++++++++ .../gcc.dg/tree-ssa/sat_u_add-simplify-3-u64.c | 15 +++++++++++++++ .../gcc.dg/tree-ssa/sat_u_add-simplify-3-u8.c | 15 +++++++++++++++ 5 files changed, 68 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u16.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u32.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u64.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u8.c diff --git a/gcc/match.pd b/gcc/match.pd index 4d1143b6ec3..d871fb8c24e 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3154,6 +3154,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && types_match (type, @0, @1)) (bit_ior @2 (negate (convert (lt @2 @0)))))) +/* Simplify SAT_U_ADD to the cheap form + From: SAT_U_ADD = x <= (X + Y) ? (X + Y) : -1. + To: SAT_U_ADD = (X + Y) | - ((X + Y) < X). */ +(simplify (cond (le @0 (plus:c@2 @0 @1)) @2 integer_minus_onep) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)) + (bit_ior @2 (negate (convert (lt @2 @0)))))) + /* Simplify SAT_U_ADD to the cheap form From: SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). To: SAT_U_ADD = (X + Y) | - ((X + Y) < X). */ @@ -3174,11 +3182,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop) integer_minus_onep (usadd_left_part_2 @0 @1))) -/* Unsigned saturation add, case 7 (branch with le): - SAT_ADD = x <= (X + Y) ? (X + Y) : -1. */ -(match (unsigned_integer_sat_add @0 @1) - (cond^ (le @0 (usadd_left_part_1@2 @0 @1)) @2 integer_minus_onep)) - /* Unsigned saturation add, case 8 (branch with gt): SAT_ADD = x > (X + Y) ? -1 : (X + Y). */ (match (unsigned_integer_sat_add @0 @1) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u16.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u16.c new file mode 100644 index 00000000000..81ebd090a69 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u16.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ + +#include <stdint.h> + +#define T uint16_t + +T sat_add_u_1 (T x, T y) +{ + return x <= (T)(x + y) ? (x + y) : -1; +} + +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u32.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u32.c new file mode 100644 index 00000000000..bb026d28219 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u32.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ + +#include <stdint.h> + +#define T uint32_t + +T sat_add_u_1 (T x, T y) +{ + return x <= (T)(x + y) ? (x + y) : -1; +} + +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u64.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u64.c new file mode 100644 index 00000000000..bb026d28219 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u64.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ + +#include <stdint.h> + +#define T uint32_t + +T sat_add_u_1 (T x, T y) +{ + return x <= (T)(x + y) ? (x + y) : -1; +} + +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u8.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u8.c new file mode 100644 index 00000000000..77f1332be57 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-3-u8.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ + +#include <stdint.h> + +#define T uint8_t + +T sat_add_u_1 (T x, T y) +{ + return x <= (T)(x + y) ? (x + y) : -1; +} + +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ -- 2.43.0