From: Dhruv Chawla <dhr...@nvidia.com> This patch folds the following patterns: - max (a, add (a, b)) -> [sum, ovf] = addo (a, b); !ovf ? sum : a - max (a, sub (a, b)) -> [sum, ovf] = subo (a, b); !ovf ? a : sum - min (a, add (a, b)) -> [sum, ovf] = addo (a, b); !ovf ? a : sum - min (a, sub (a, b)) -> [sum, ovf] = addo (a, b); !ovf ? sum : a
Where ovf is the overflow flag, addo and subo are overflowing addition and subtraction, respectively. The folded patterns can normally be implemented as an overflowing operation combined with a conditional move/select instruction. Explanation for the conditions handled in arith_overflow_check_p: Case 1/2: r = a + b; max/min (a, r) or max/min (r, a) lhs (r) if crhs1 (a) and crhs2 (r) => lhs (r) == crhs2 (r) && (rhs1 (a or b) == crhs1 (a) || rhs2 (a or b) == crhs1 (a)) if crhs1 (r) and crhs2 (a) => lhs (r) == crhs1 (r) && (rhs1 (a or b) == crhs2 (a) || rhs2 (a or b) == crhs2 (a)) Both rhs1 and rhs2 are checked in (rhs<n> == crhs<n>) as addition is commutative. Case 3/4: r = a - b; max/min (a, r) or max/min (r, a) lhs (r) if crhs1 (a) and crhs2 (r) => lhs (r) == crhs2 (r) && rhs1 (a) == crhs1 (a) if crhs1 (r) and crhs2 (a) => lhs (r) == crhs1 (r) && rhs1 (a) == crhs2 (a) Bootstrapped and regtested on aarch64-unknown-linux-gnu. Signed-off-by: Dhruv Chawla <dhr...@nvidia.com> gcc/ChangeLog: PR middle-end/116815 * tree-ssa-math-opts.cc (arith_overflow_check_p): Match min/max patterns. (build_minmax_replacement_statements): New function. (match_arith_overflow): Update to handle min/max patterns. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/pr116815-1.c: New test. * gcc.dg/tree-ssa/pr116815-2.c: Likewise. * gcc.dg/tree-ssa/pr116815-3.c: Likewise. --- gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c | 42 ++++++ gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c | 93 +++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c | 43 ++++++ gcc/tree-ssa-math-opts.cc | 151 +++++++++++++++++++-- 4 files changed, 318 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c new file mode 100644 index 00000000000..5d62843d63c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +/* PR middle-end/116815 */ + +/* Single-use tests. */ + +static inline unsigned +max (unsigned a, unsigned b) +{ + return a > b ? a : b; +} + +static inline unsigned +min (unsigned a, unsigned b) +{ + return a < b ? a : b; +} + +#define OPERATION(op, type, N, exp1, exp2) \ + unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); } + +OPERATION (max, add, 1, a, a + b) +OPERATION (max, add, 2, a, b + a) +OPERATION (max, add, 3, a + b, a) +OPERATION (max, add, 4, b + a, a) + +OPERATION (min, add, 1, a, a + b) +OPERATION (min, add, 2, a, b + a) +OPERATION (min, add, 3, a + b, a) +OPERATION (min, add, 4, b + a, a) + +OPERATION (max, sub, 1, a, a - b) +OPERATION (max, sub, 2, a - b, a) + +OPERATION (min, sub, 1, a, a - b) +OPERATION (min, sub, 2, a - b, a) + +/* { dg-final { scan-tree-dump-not "MAX_EXPR" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "MIN_EXPR" "optimized" } } */ +/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 8 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 4 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c new file mode 100644 index 00000000000..56e8038ef82 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c @@ -0,0 +1,93 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +/* PR middle-end/116815 */ + +/* Negative tests. */ + +static inline int +smax (int a, int b) +{ + return a > b ? a : b; +} + +static inline int +smin (int a, int b) +{ + return a < b ? a : b; +} + +static inline unsigned +umax (unsigned a, unsigned b) +{ + return a > b ? a : b; +} + +static inline unsigned +umin (unsigned a, unsigned b) +{ + return a < b ? a : b; +} + +#define ASSUME(cond) if (!(cond)) __builtin_unreachable (); + +/* This transformation does not trigger on signed types. */ + +int +smax_add (int a, int b) +{ + ASSUME (b >= 0); + return smax (a, a + b); +} + +int +smin_add (int a, int b) +{ + ASSUME (b >= 0); + return smin (a, a + b); +} + +int +smax_sub (int a, int b) +{ + ASSUME (b >= 0); + return smax (a, a - b); +} + +int +smin_sub (int a, int b) +{ + ASSUME (b >= 0); + return smin (a, a - b); +} + +/* Invalid patterns. */ + +/* This can potentially be matched, but the RHS gets factored to + (a + b) * b. */ +unsigned +umax_factored (unsigned a, unsigned b) +{ + return umax (a * b, a * b + b * b); +} + +unsigned +umin_mult (unsigned a, unsigned b) +{ + return umin (a, a * b); +} + +unsigned +umax_sub (unsigned a, unsigned b) +{ + return umax (a, b - a); +} + +unsigned +umin_sub (unsigned a, unsigned b) +{ + return umin (a, b - a); +} + +/* { dg-final { scan-tree-dump-not "ADD_OVERFLOW" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "SUB_OVERFLOW" "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c new file mode 100644 index 00000000000..af1fe18d50a --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +/* PR middle-end/116815 */ + +/* Multi-use tests. */ + +static inline unsigned +max (unsigned a, unsigned b) +{ + return a > b ? a : b; +} + +static inline unsigned +min (unsigned a, unsigned b) +{ + return a < b ? a : b; +} + +unsigned +umax_add_umin_add (unsigned a, unsigned b) +{ + return max (a, a + b) + min (a + b, b); +} + +unsigned +umin_add_umax_add (unsigned a, unsigned b) +{ + return min (a, b + a) + max (b + a, b); +} + +unsigned +multiple_paths (unsigned a, unsigned b) +{ + if (a > 5) + return max (a, a + b); + else + return min (a, a + b); +} + +/* { dg-final { scan-tree-dump-not "MAX_EXPR" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "MIN_EXPR" "optimized" } } */ +/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 3 "optimized" } } */ diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index 7e819f37446..f08cac68ca7 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -3981,11 +3981,26 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt, return 1; } - if (TREE_CODE_CLASS (ccode) != tcc_comparison) + if (TREE_CODE_CLASS (ccode) != tcc_comparison + && TREE_CODE_CLASS (ccode) != tcc_binary) return 0; switch (ccode) { + case MAX_EXPR: + case MIN_EXPR: + /* 1. r = a + b; max (a, r) or max (r, a) + 2. r = a + b; min (a, r) or min (r, a) + 3. r = a - b; max (a, r) or max (r, a) + 4. r = a - b; min (a, r) or min (r, a) */ + if ((code == PLUS_EXPR + && ((lhs == crhs1 && (rhs1 == crhs2 || rhs2 == crhs2)) + || (lhs == crhs2 && (rhs1 == crhs1 || rhs2 == crhs2)))) + || (code == MINUS_EXPR + && ((lhs == crhs1 && rhs1 == crhs2) + || (lhs == crhs2 && rhs1 == crhs1)))) + return 1; + break; case GT_EXPR: case LE_EXPR: if (maxval) @@ -4339,6 +4354,73 @@ match_saturation_trunc (gimple_stmt_iterator *gsi, gphi *phi) return true; } +/* Assume that ovf = overflow_flag (add/sub (...)). + The replacement forms are: + max (add) -> ovf ? a : a + b + min (sub) -> ovf ? a : a - b + max (sub) -> ovf ? a - b : a + min (add) -> ovf ? a + b : a. */ + +static tree +build_minmax_replacement_statements (gimple *def_stmt, tree ovf, tree new_lhs, + tree type, gimple *minmax_stmt) +{ + enum tree_code code = gimple_assign_rhs_code (def_stmt); + enum tree_code rhs_code = gimple_assign_rhs_code (minmax_stmt); + gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR); + + tree lhs = gimple_assign_lhs (def_stmt); + tree rhs1 = gimple_assign_rhs1 (def_stmt); + tree rhs2 = gimple_assign_rhs2 (def_stmt); + + tree use_rhs1 = gimple_assign_rhs1 (minmax_stmt); + tree use_rhs2 = gimple_assign_rhs2 (minmax_stmt); + + /* First figure out which variable from def_stmt will be used in the + COND_EXPR. */ + tree minmax_var = NULL_TREE; + /* Either max/min (a, add/sub (a, b)) or + max/min (add/sub (a, b), a). */ + if ((lhs == use_rhs2 && use_rhs1 == rhs1) + || (lhs == use_rhs1 && use_rhs2 == rhs1)) + minmax_var = rhs1; + /* Either max/min (a, add (b, a)) or + max/min (add (b, a), a). */ + else if (code == PLUS_EXPR) + minmax_var = rhs2; + + /* The above should always match rhs1 for MINUS_EXPR. */ + gcc_checking_assert ( + minmax_var != NULL_TREE + && (code == PLUS_EXPR || (use_rhs1 != rhs2 && use_rhs2 != rhs2))); + + /* Figure out if we have to generate: + (ovf != 0 ? new_lhs : minmax_var) or + (ovf != 0 ? minmax_var : new_lhs) i.e. (ovf == 0 ? new_lhs : minmax_var). + The default case is assumed to be the first one. */ + bool flip = false; + if ((rhs_code == MIN_EXPR && code == PLUS_EXPR) + || (rhs_code == MAX_EXPR && code == MINUS_EXPR)) + flip = true; + + /* Generate the actual code. */ + tree minmax = make_ssa_name (type); + tree comparison_result = make_ssa_name (boolean_type_node); + tree comparison_expr = build2 (flip ? EQ_EXPR : NE_EXPR, boolean_type_node, + ovf, build_int_cst (type, 0)); + gimple *comparison_stmt + = gimple_build_assign (comparison_result, comparison_expr); + + tree conditional + = build3 (COND_EXPR, type, comparison_result, minmax_var, new_lhs); + gimple *new_minmax_stmt = gimple_build_assign (minmax, conditional); + gimple_stmt_iterator gsi = gsi_for_stmt (minmax_stmt); + gsi_insert_before (&gsi, comparison_stmt, GSI_NEW_STMT); + gsi_insert_after (&gsi, new_minmax_stmt, GSI_NEW_STMT); + + return minmax; +} + /* Recognize for unsigned x x = y - z; if (x > y) @@ -4391,7 +4473,19 @@ match_saturation_trunc (gimple_stmt_iterator *gsi, gphi *phi) z = IMAGPART_EXPR <_7>; _8 = IMAGPART_EXPR <_7>; _9 = _8 != 0; - iftmp.0_3 = (int) _9; */ + iftmp.0_3 = (int) _9; + + And also recognize: + c = max/min (a, add/sub (a, b)) + and replace it with + _7 = .(ADD|SUB)_OVERFLOW (a, b); + _8 = REALPART_EXPR <_7>; + _9 = IMAGPART_EXPR <_7>; + _10 = _9 != 0; (or _9 == 0) + _11 = _10 ? _8 : a; + c = _11; + This can be optimized to a single conditional select instruction with an + overflowing arithmetic instruction. */ static bool match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, @@ -4425,6 +4519,7 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, tree rhs1 = gimple_assign_rhs1 (stmt); tree rhs2 = gimple_assign_rhs2 (stmt); + bool minmax_use_seen = false; FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) { use_stmt = USE_STMT (use_p); @@ -4445,6 +4540,13 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, return false; cond_stmt = use_stmt; } + if (gimple_code (use_stmt) == GIMPLE_ASSIGN + && gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS) + { + tree_code rhs_code = gimple_assign_rhs_code (use_stmt); + if (rhs_code == MAX_EXPR || rhs_code == MIN_EXPR) + minmax_use_seen = true; + } ovf_use_seen = true; } else @@ -4494,7 +4596,10 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, tree maxval = NULL_TREE; if (!ovf_use_seen - || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen)) + || (code != MULT_EXPR + && (code == BIT_NOT_EXPR + ? use_seen + : !minmax_use_seen && !use_seen)) || (code == PLUS_EXPR && optab_handler (uaddv4_optab, TYPE_MODE (type)) == CODE_FOR_nothing) @@ -4758,6 +4863,7 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, gsi_insert_after (gsi, g2, GSI_NEW_STMT); else gsi_insert_before (gsi, g2, GSI_SAME_STMT); + if (code == MULT_EXPR) mul_stmts.quick_push (g2); @@ -4786,15 +4892,25 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, if (gimple_code (use_stmt) == GIMPLE_COND) { gcond *cond_stmt = as_a <gcond *> (use_stmt); - gimple_cond_set_lhs (cond_stmt, ovf); - gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0)); - gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR); + tree rhs = gimple_cond_rhs (cond_stmt); + if (TREE_CODE (rhs) == MIN_EXPR || TREE_CODE (rhs) == MAX_EXPR) + gimple_cond_set_rhs (cond_stmt, + build_minmax_replacement_statements ( + stmt, ovf, new_lhs, type, use_stmt)); + else + { + gimple_cond_set_lhs (cond_stmt, ovf); + gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0)); + gimple_cond_set_code (cond_stmt, + ovf_use == 1 ? NE_EXPR : EQ_EXPR); + } } else { gcc_checking_assert (is_gimple_assign (use_stmt)); if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS) { + tree_code rhs_code = gimple_assign_rhs_code (use_stmt); if (gimple_assign_rhs_code (use_stmt) == RSHIFT_EXPR) { g2 = gimple_build_assign (make_ssa_name (boolean_type_node), @@ -4843,6 +4959,14 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, gsi_remove (&gsiu, true); continue; } + else if (rhs_code == MIN_EXPR || rhs_code == MAX_EXPR) + { + gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); + gimple_assign_set_rhs_from_tree ( + &gsi, + build_minmax_replacement_statements (stmt, ovf, new_lhs, + type, use_stmt)); + } else { gimple_assign_set_rhs1 (use_stmt, ovf); @@ -4854,11 +4978,16 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, } else { - gcc_checking_assert (gimple_assign_rhs_code (use_stmt) - == COND_EXPR); - tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR, - boolean_type_node, ovf, - build_int_cst (type, 0)); + tree_code rhs_code = gimple_assign_rhs_code (use_stmt); + gcc_checking_assert (rhs_code == COND_EXPR || rhs_code == MAX_EXPR + || rhs_code == MIN_EXPR); + tree cond = NULL_TREE; + if (rhs_code != COND_EXPR) + cond = build_minmax_replacement_statements (stmt, ovf, new_lhs, + type, use_stmt); + else + cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR, + boolean_type_node, ovf, build_int_cst (type, 0)); gimple_assign_set_rhs1 (use_stmt, cond); } } -- 2.44.0