From: Pan Li <pan2...@intel.com> This patch would like to support the __builtin_add_overflow branch form for unsigned SAT_ADD. For example as below:
uint64_t sat_add (uint64_t x, uint64_t y) { uint64_t ret; return __builtin_add_overflow (x, y, &ret) ? -1 : ret; } Different to the branchless version, we leverage the simplify to convert the branch version of SAT_ADD into branchless if and only if the backend has supported the IFN_SAT_ADD. Thus, the backend has the ability to choose branch or branchless implementation of .SAT_ADD. For example, some target can take care of branches code more optimally. When the target implement the IFN_SAT_ADD for unsigned and before this patch: uint64_t sat_add (uint64_t x, uint64_t y) { long unsigned int _1; long unsigned int _2; uint64_t _3; __complex__ long unsigned int _6; ;; basic block 2, loop depth 0 ;; pred: ENTRY _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); _2 = IMAGPART_EXPR <_6>; if (_2 != 0) goto <bb 4>; [35.00%] else goto <bb 3>; [65.00%] ;; succ: 4 ;; 3 ;; basic block 3, loop depth 0 ;; pred: 2 _1 = REALPART_EXPR <_6>; ;; succ: 4 ;; basic block 4, loop depth 0 ;; pred: 3 ;; 2 # _3 = PHI <_1(3), 18446744073709551615(2)> return _3; ;; succ: EXIT } After this patch: uint64_t sat_add (uint64_t x, uint64_t y) { long unsigned int _12; ;; basic block 2, loop depth 0 ;; pred: ENTRY _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call] return _12; ;; succ: EXIT } The below test suites are passed for this patch: * The x86 bootstrap test. * The x86 fully regression test. * The riscv fully regression test. gcc/ChangeLog: * match.pd: Add new simplify to convert branch SAT_ADD into branchless, if and only if backend implement the IFN. Signed-off-by: Pan Li <pan2...@intel.com> --- gcc/match.pd | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index cff67c84498..2dc77a46e67 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3080,6 +3080,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (match (unsigned_integer_sat_add @0 @1) (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) +#if GIMPLE + +(simplify + (cond (ne (imagpart (IFN_ADD_OVERFLOW@2 @0 @1)) integer_zerop) + integer_minus_onep (realpart @2)) + (if (ternary_integer_types_match_p (type, @0, @1) && TYPE_UNSIGNED (type) + && direct_internal_fn_supported_p (IFN_SAT_ADD, type, OPTIMIZE_FOR_BOTH)) + (bit_ior (plus@3 @0 @1) (negate (convert (lt @3 @0)))))) + +#endif + /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */ (for eqne (eq ne) -- 2.34.1