From: Pan Li <pan2...@intel.com> There are sorts of forms for the unsigned SAT_ADD. Some of them are complicated while others are cheap. This patch would like to simplify the complicated form into the cheap ones. For example as below:
>From the form 6 (branch): SUM = ADD_OVERFLOW (X, Y) SAT_U_ADD = IMAGPART_EXPR (SUM) != 0 ? -1 : REALPART_EXPR (SUM) To (branchless): SAT_U_ADD = (X + Y) | - ((X + Y) < X). #define T uint8_t T sat_add_u_1 (T x, T y) { T ret; return __builtin_add_overflow (x, y, &ret) != 0 ? -1 : ret; } Before this patch in phiopt2: 4 │ uint8_t sat_u_add_uint8_t_13 (uint8_t x, uint8_t y) 5 │ { 6 │ unsigned char _1; 7 │ unsigned char _2; 8 │ uint8_t _3; 9 │ __complex__ unsigned char _6; 10 │ 11 │ <bb 2> [local count: 1073741824]: 12 │ _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); 13 │ _1 = REALPART_EXPR <_6>; 14 │ _2 = IMAGPART_EXPR <_6>; 15 │ if (_2 != 0) 16 │ goto <bb 4>; [35.00%] 17 │ else 18 │ goto <bb 3>; [65.00%] 19 │ 20 │ <bb 3> [local count: 697932184]: 21 │ 22 │ <bb 4> [local count: 1073741824]: 23 │ # _3 = PHI <_1(3), 255(2)> 24 │ return _3; 25 │ 26 │ } After this patch: 14 │ uint8_t sat_u_add_uint8_t_13 (uint8_t x, uint8_t y) 15 │ { 16 │ unsigned char _1; 17 │ __complex__ unsigned char _6; 18 │ unsigned char _8; 19 │ _Bool _9; 20 │ unsigned char _10; 21 │ unsigned char _11; 22 │ unsigned char _12; 23 │ 24 │ <bb 2> [local count: 1073741824]: 25 │ _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); // Dead code 26 │ _1 = REALPART_EXPR <_6>; // Ditto 27 │ _8 = x_4(D) + y_5(D); 28 │ _9 = x_4(D) > _8; 29 │ _10 = (unsigned char) _9; 30 │ _11 = -_10; 31 │ _12 = _8 | _11; 32 │ return _12; 33 │ 34 │ } The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Remove unsigned branch form 6 for SAT_ADD, and add simplify to branchless instead. Signed-off-by: Pan Li <pan2...@intel.com> --- gcc/match.pd | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 8ed08b95bc0..c360e212340 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3165,16 +3165,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) SAT_U_ADD = IMAGPART_EXPR (SUM) == 0 ? REALPART_EXPR (SUM) : -1 */ (simplify (cond (eq (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop) (realpart @2) integer_minus_onep) + (if (types_match (type, @0, @1)) + (bit_ior (plus@3 @0 @1) (negate (convert (lt @3 @0)))))) + /* From SUM = ADD_OVERFLOW (X, Y) + SAT_U_ADD = IMAGPART_EXPR (SUM) != 0 ? -1 : REALPART_EXPR (SUM) */ + (simplify (cond (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop) + integer_minus_onep (realpart @2)) (if (types_match (type, @0, @1)) (bit_ior (plus@3 @0 @1) (negate (convert (lt @3 @0))))))) -/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW): - SUM = ADD_OVERFLOW (X, Y) - SAT_U_ADD = IMAGPART_EXPR (SUM) != 0 ? -1 : REALPART_EXPR (SUM). */ -(match (unsigned_integer_sat_add @0 @1) - (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop) - integer_minus_onep (usadd_left_part_2 @0 @1))) - /* Unsigned saturation add, case 9 (one op is imm): SAT_U_ADD = (X + 3) >= x ? (X + 3) : -1. */ (match (unsigned_integer_sat_add @0 @1) -- 2.43.0