On 30/05/25 13:35, Andrew Pinski wrote:
External email: Use caution opening links or attachments
On Thu, May 29, 2025 at 1:05 AM <dhr...@nvidia.com> wrote:
From: Dhruv Chawla <dhr...@nvidia.com>
This patch folds the following patterns:
- max (a, add (a, b)) -> [sum, ovf] = addo (a, b); !ovf ? sum : a
- max (a, sub (a, b)) -> [sum, ovf] = subo (a, b); !ovf ? a : sum
- min (a, add (a, b)) -> [sum, ovf] = addo (a, b); !ovf ? a : sum
- min (a, sub (a, b)) -> [sum, ovf] = addo (a, b); !ovf ? sum : a
Where ovf is the overflow flag, addo and subo are overflowing addition and
subtraction, respectively. The folded patterns can normally be implemented as
an overflowing operation combined with a conditional move/select instruction.
Explanation for the conditions handled in arith_overflow_check_p:
Case 1/2: r = a + b; max/min (a, r) or max/min (r, a)
lhs (r)
if crhs1 (a) and crhs2 (r)
=> lhs (r) == crhs2 (r) &&
(rhs1 (a or b) == crhs1 (a) || rhs2 (a or b) == crhs1 (a))
if crhs1 (r) and crhs2 (a)
=> lhs (r) == crhs1 (r) &&
(rhs1 (a or b) == crhs2 (a) || rhs2 (a or b) == crhs2 (a))
Both rhs1 and rhs2 are checked in (rhs<n> == crhs<n>) as addition is
commutative.
Case 3/4: r = a - b; max/min (a, r) or max/min (r, a)
lhs (r)
if crhs1 (a) and crhs2 (r)
=> lhs (r) == crhs2 (r) && rhs1 (a) == crhs1 (a)
if crhs1 (r) and crhs2 (a)
=> lhs (r) == crhs1 (r) && rhs1 (a) == crhs2 (a)
Bootstrapped and regtested on aarch64-unknown-linux-gnu.
Signed-off-by: Dhruv Chawla <dhr...@nvidia.com>
gcc/ChangeLog:
PR middle-end/116815
* tree-ssa-math-opts.cc (arith_overflow_check_p): Match min/max
patterns.
(build_minmax_replacement_statements): New function.
(match_arith_overflow): Update to handle min/max patterns.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/pr116815-1.c: New test.
* gcc.dg/tree-ssa/pr116815-2.c: Likewise.
* gcc.dg/tree-ssa/pr116815-3.c: Likewise.
---
gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c | 42 ++++++
gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c | 93 +++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c | 43 ++++++
gcc/tree-ssa-math-opts.cc | 151 +++++++++++++++++++--
4 files changed, 318 insertions(+), 11 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c
new file mode 100644
index 00000000000..5d62843d63c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR middle-end/116815 */
+
+/* Single-use tests. */
+
+static inline unsigned
+max (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned
+min (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)
\
+ unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-tree-dump-not "MAX_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "MIN_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 8 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 4 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c
new file mode 100644
index 00000000000..56e8038ef82
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c
@@ -0,0 +1,93 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR middle-end/116815 */
+
+/* Negative tests. */
+
+static inline int
+smax (int a, int b)
+{
+ return a > b ? a : b;
+}
+
+static inline int
+smin (int a, int b)
+{
+ return a < b ? a : b;
+}
+
+static inline unsigned
+umax (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned
+umin (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+#define ASSUME(cond) if (!(cond)) __builtin_unreachable ();
+
+/* This transformation does not trigger on signed types. */
+
+int
+smax_add (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smax (a, a + b);
+}
+
+int
+smin_add (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smin (a, a + b);
+}
+
+int
+smax_sub (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smax (a, a - b);
+}
+
+int
+smin_sub (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smin (a, a - b);
+}
+
+/* Invalid patterns. */
+
+/* This can potentially be matched, but the RHS gets factored to
+ (a + b) * b. */
+unsigned
+umax_factored (unsigned a, unsigned b)
+{
+ return umax (a * b, a * b + b * b);
+}
+
+unsigned
+umin_mult (unsigned a, unsigned b)
+{
+ return umin (a, a * b);
+}
+
+unsigned
+umax_sub (unsigned a, unsigned b)
+{
+ return umax (a, b - a);
+}
+
+unsigned
+umin_sub (unsigned a, unsigned b)
+{
+ return umin (a, b - a);
+}
+
+/* { dg-final { scan-tree-dump-not "ADD_OVERFLOW" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "SUB_OVERFLOW" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c
new file mode 100644
index 00000000000..af1fe18d50a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR middle-end/116815 */
+
+/* Multi-use tests. */
+
+static inline unsigned
+max (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned
+min (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+unsigned
+umax_add_umin_add (unsigned a, unsigned b)
+{
+ return max (a, a + b) + min (a + b, b);
+}
+
+unsigned
+umin_add_umax_add (unsigned a, unsigned b)
+{
+ return min (a, b + a) + max (b + a, b);
+}
+
+unsigned
+multiple_paths (unsigned a, unsigned b)
+{
+ if (a > 5)
+ return max (a, a + b);
+ else
+ return min (a, a + b);
+}
+
+/* { dg-final { scan-tree-dump-not "MAX_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "MIN_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 3 "optimized" } } */
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 7e819f37446..f08cac68ca7 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -3981,11 +3981,26 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt,
gimple *&use_stmt,
return 1;
}
- if (TREE_CODE_CLASS (ccode) != tcc_comparison)
+ if (TREE_CODE_CLASS (ccode) != tcc_comparison
+ && TREE_CODE_CLASS (ccode) != tcc_binary)
return 0;
I think the check on TREE_CODE_CLASS can be removed. This was done originally
I think your comment got cut off? I have changed this to a checking assert, I
feel that is good to have as a sanity check.
switch (ccode)
{
+ case MAX_EXPR:
+ case MIN_EXPR:
+ /* 1. r = a + b; max (a, r) or max (r, a)
+ 2. r = a + b; min (a, r) or min (r, a)
+ 3. r = a - b; max (a, r) or max (r, a)
+ 4. r = a - b; min (a, r) or min (r, a) */
+ if ((code == PLUS_EXPR
+ && ((lhs == crhs1 && (rhs1 == crhs2 || rhs2 == crhs2))
+ || (lhs == crhs2 && (rhs1 == crhs1 || rhs2 == crhs2))))
+ || (code == MINUS_EXPR
+ && ((lhs == crhs1 && rhs1 == crhs2)
+ || (lhs == crhs2 && rhs1 == crhs1))))
+ return 1;
+ break;
I wonder if there is a nicer way of doing this, by treating MIN_EXPR
as `a < b ? a : b` and MAX_EXPR as `a > b ? a : b` when extracting the
ccode.
so ccode becomes GT_EXPR or LT_EXPR.
This works really well, thanks for the suggestion! It turns out that these
patterns are already handled, and it is a matter of choosing the correct
alternative to match against. I did not notice them before, so I missed them
in my patch.
case GT_EXPR:
case LE_EXPR:
if (maxval)
@@ -4339,6 +4354,73 @@ match_saturation_trunc (gimple_stmt_iterator *gsi, gphi
*phi)
return true;
}
+/* Assume that ovf = overflow_flag (add/sub (...)).
+ The replacement forms are:
+ max (add) -> ovf ? a : a + b
+ min (sub) -> ovf ? a : a - b
+ max (sub) -> ovf ? a - b : a
+ min (add) -> ovf ? a + b : a. */
+
+static tree
+build_minmax_replacement_statements (gimple *def_stmt, tree ovf, tree new_lhs,
+ tree type, gimple *minmax_stmt)
+{
+ enum tree_code code = gimple_assign_rhs_code (def_stmt);
+ enum tree_code rhs_code = gimple_assign_rhs_code (minmax_stmt);
+ gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
+
+ tree lhs = gimple_assign_lhs (def_stmt);
+ tree rhs1 = gimple_assign_rhs1 (def_stmt);
+ tree rhs2 = gimple_assign_rhs2 (def_stmt);
+
+ tree use_rhs1 = gimple_assign_rhs1 (minmax_stmt);
+ tree use_rhs2 = gimple_assign_rhs2 (minmax_stmt);
+
+ /* First figure out which variable from def_stmt will be used in the
+ COND_EXPR. */
+ tree minmax_var = NULL_TREE;
+ /* Either max/min (a, add/sub (a, b)) or
+ max/min (add/sub (a, b), a). */
+ if ((lhs == use_rhs2 && use_rhs1 == rhs1)
+ || (lhs == use_rhs1 && use_rhs2 == rhs1))
+ minmax_var = rhs1;
+ /* Either max/min (a, add (b, a)) or
+ max/min (add (b, a), a). */
+ else if (code == PLUS_EXPR)
+ minmax_var = rhs2;
+
+ /* The above should always match rhs1 for MINUS_EXPR. */
+ gcc_checking_assert (
+ minmax_var != NULL_TREE
+ && (code == PLUS_EXPR || (use_rhs1 != rhs2 && use_rhs2 != rhs2)));
+
+ /* Figure out if we have to generate:
+ (ovf != 0 ? new_lhs : minmax_var) or
+ (ovf != 0 ? minmax_var : new_lhs) i.e. (ovf == 0 ? new_lhs :
minmax_var).
+ The default case is assumed to be the first one. */
+ bool flip = false;
+ if ((rhs_code == MIN_EXPR && code == PLUS_EXPR)
+ || (rhs_code == MAX_EXPR && code == MINUS_EXPR))
+ flip = true;
Why not just:
flip = (rhs_code == MIN_EXPR) != (code == MINUS_EXPR);
Since we rhs_code will be either MIN or MAX and code will either be
MINUS or PLUS.
Done, thanks. I feel the if-statement is more readable though.
+
+ /* Generate the actual code. */
+ tree minmax = make_ssa_name (type);
Since you are replacing the statement later on, why not just reuse the
lhs here? See below too ...
+ tree comparison_result = make_ssa_name (boolean_type_node);
+ tree comparison_expr = build2 (flip ? EQ_EXPR : NE_EXPR, boolean_type_node,
+ ovf, build_int_cst (type, 0));
+ gimple *comparison_stmt
+ = gimple_build_assign (comparison_result, comparison_expr);
You don't need to use build2 followed by gimple_build_assign,
just use gimple_builtd_assign:
gimple *comparison_stmt = gimple_build_assign (comparison_result, flip
? EQ_EXPR : NE_EXPR, ovf, build_int_cst (type, 0));
Thanks! Didn't know these overloads existed :)
+
+ tree conditional
+ = build3 (COND_EXPR, type, comparison_result, minmax_var, new_lhs);
+ gimple *new_minmax_stmt = gimple_build_assign (minmax, conditional);
Same thing here:
gimple *new_minmax_stmt = gimple_build_assign (minmax, COND_EXPR,
comparison_result, minmax_var, new_lhs);
+ gimple_stmt_iterator gsi = gsi_for_stmt (minmax_stmt);
+ gsi_insert_before (&gsi, comparison_stmt, GSI_NEW_STMT);
+ gsi_insert_after (&gsi, new_minmax_stmt, GSI_NEW_STMT);
This is confusing, before and then after. Why not just use both before?
gsi_insert_before (&gsi, comparison_stmt, GSI_SAME_STMT);
gsi_insert_before (&gsi, new_minmax_stmt, GSI_SAME_STMT);
And since you are going to reuse the lhs, remove the old statement.
This is so you don't end up with:
_1 = _2;
Which is not useful to have around.
+
+ return minmax;
+}
+
/* Recognize for unsigned x
x = y - z;
if (x > y)
@@ -4391,7 +4473,19 @@ match_saturation_trunc (gimple_stmt_iterator *gsi, gphi
*phi)
z = IMAGPART_EXPR <_7>;
_8 = IMAGPART_EXPR <_7>;
_9 = _8 != 0;
- iftmp.0_3 = (int) _9; */
+ iftmp.0_3 = (int) _9;
+
+ And also recognize:
+ c = max/min (a, add/sub (a, b))
+ and replace it with
+ _7 = .(ADD|SUB)_OVERFLOW (a, b);
+ _8 = REALPART_EXPR <_7>;
+ _9 = IMAGPART_EXPR <_7>;
+ _10 = _9 != 0; (or _9 == 0)
+ _11 = _10 ? _8 : a;
+ c = _11;
+ This can be optimized to a single conditional select instruction with an
+ overflowing arithmetic instruction. */
static bool
match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
@@ -4425,6 +4519,7 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
tree rhs1 = gimple_assign_rhs1 (stmt);
tree rhs2 = gimple_assign_rhs2 (stmt);
+ bool minmax_use_seen = false;
FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
{
use_stmt = USE_STMT (use_p);
@@ -4445,6 +4540,13 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
return false;
cond_stmt = use_stmt;
}
+ if (gimple_code (use_stmt) == GIMPLE_ASSIGN
+ && gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
+ {
+ tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
+ if (rhs_code == MAX_EXPR || rhs_code == MIN_EXPR)
+ minmax_use_seen = true;
+ }
Maybe it is better just to write this case:
if (gassign *ass = dyn_cast <gassign *>(use_stmt))
{
tree_code rhs_code = gimple_assign_rhs_code (ass);
minmax_use_seen |= rhs_code == MAX_EXPR || rhs_code == MIN_EXPR;
}
`gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS` is not
really useful here since you are then comparing the code itself.
And using C++ style checks and getting a static type check for
gimple_assign_rhs_code improves the code slightly for checking case.
ovf_use_seen = true;
}
else
@@ -4494,7 +4596,10 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
tree maxval = NULL_TREE;
if (!ovf_use_seen
- || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
+ || (code != MULT_EXPR
+ && (code == BIT_NOT_EXPR
+ ? use_seen
+ : !minmax_use_seen && !use_seen))
|| (code == PLUS_EXPR
&& optab_handler (uaddv4_optab,
TYPE_MODE (type)) == CODE_FOR_nothing)
@@ -4758,6 +4863,7 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
gsi_insert_after (gsi, g2, GSI_NEW_STMT);
else
gsi_insert_before (gsi, g2, GSI_SAME_STMT);
+
if (code == MULT_EXPR)
mul_stmts.quick_push (g2);
@@ -4786,15 +4892,25 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
if (gimple_code (use_stmt) == GIMPLE_COND)
{
gcond *cond_stmt = as_a <gcond *> (use_stmt);
- gimple_cond_set_lhs (cond_stmt, ovf);
- gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
- gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
+ tree rhs = gimple_cond_rhs (cond_stmt);
+ if (TREE_CODE (rhs) == MIN_EXPR || TREE_CODE (rhs) == MAX_EXPR)
+ gimple_cond_set_rhs (cond_stmt,
+ build_minmax_replacement_statements (
+ stmt, ovf, new_lhs, type, use_stmt));
+ else
+ {
+ gimple_cond_set_lhs (cond_stmt, ovf);
+ gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
+ gimple_cond_set_code (cond_stmt,
+ ovf_use == 1 ? NE_EXPR : EQ_EXPR);
+ }
}
else
{
gcc_checking_assert (is_gimple_assign (use_stmt));
if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
{
+ tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
if (gimple_assign_rhs_code (use_stmt) == RSHIFT_EXPR)
{
g2 = gimple_build_assign (make_ssa_name (boolean_type_node),
@@ -4843,6 +4959,14 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
gsi_remove (&gsiu, true);
continue;
}
+ else if (rhs_code == MIN_EXPR || rhs_code == MAX_EXPR)
+ {
+ gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
+ gimple_assign_set_rhs_from_tree (
+ &gsi,
+ build_minmax_replacement_statements (stmt, ovf, new_lhs,
+ type, use_stmt));
+ }
See above about reusing/removing this statement.
else
{
gimple_assign_set_rhs1 (use_stmt, ovf);
@@ -4854,11 +4978,16 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
}
else
{
- gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
- == COND_EXPR);
- tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
- boolean_type_node, ovf,
- build_int_cst (type, 0));
+ tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
+ gcc_checking_assert (rhs_code == COND_EXPR || rhs_code == MAX_EXPR
+ || rhs_code == MIN_EXPR);
+ tree cond = NULL_TREE;
+ if (rhs_code != COND_EXPR)
+ cond = build_minmax_replacement_statements (stmt, ovf, new_lhs,
+ type, use_stmt);
+ else
+ cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
+ boolean_type_node, ovf, build_int_cst (type, 0));
Note COND_EXPR no longer has the possibility of a condition part of it
(since r13-707-g68e0063397ba82) so this code will be removed soon
(PR120477 records this dead code). So I think my mention about reusing
the lhs and replacing statements applies here too. Also are you sure
this produces the correct resolve for MIN/MAX?
Because I think we would produce:
+ _7 = .(ADD|SUB)_OVERFLOW (a, b);
+ _8 = REALPART_EXPR <_7>;
+ _9 = IMAGPART_EXPR <_7>;
+ _10 = _9 != 0; (or _9 == 0)
+ _11 = _10 ? _8 : a;
_res = MIN/MAX(_11, _8)
Which will give the same value but I suspect you wanted to remove the
MIN/MAX here too.
Yeah it seems I have made a mistake here, I don't think this code would
ever actually execute. This will only trigger if the RHS is a 3-op code,
which only COND_EXPR satisfies. This can never match with MIN/MAX, so I
have removed this code.
I have attached a version of the patch with your comments applied, does
it seem okay? I have also added a testcase where a use of the MIN/MAX
is present.
-- >8 --
[PATCH] widening_mul: Make better use of overflowing operations in codegen of
min/max(a, add/sub(a, b))
This patch folds the following patterns:
- max (a, add (a, b)) -> [sum, ovf] = addo (a, b); !ovf ? sum : a
- max (a, sub (a, b)) -> [sum, ovf] = subo (a, b); !ovf ? a : sum
- min (a, add (a, b)) -> [sum, ovf] = addo (a, b); !ovf ? a : sum
- min (a, sub (a, b)) -> [sum, ovf] = addo (a, b); !ovf ? sum : a
Where ovf is the overflow flag, addo and subo are overflowing addition and
subtraction, respectively. The folded patterns can normally be implemented as
an overflowing operation combined with a conditional move/select instruction.
Explanation for the conditions handled in arith_overflow_check_p:
Case 1/2: r = a + b; max/min (a, r) or max/min (r, a)
lhs (r)
if crhs1 (a) and crhs2 (r)
=> lhs (r) == crhs2 (r) &&
(rhs1 (a or b) == crhs1 (a) || rhs2 (a or b) == crhs1 (a))
if crhs1 (r) and crhs2 (a)
=> lhs (r) == crhs1 (r) &&
(rhs1 (a or b) == crhs2 (a) || rhs2 (a or b) == crhs2 (a))
Both rhs1 and rhs2 are checked in (rhs<n> == crhs<n>) as addition is
commutative.
Case 3/4: r = a - b; max/min (a, r) or max/min (r, a)
lhs (r)
if crhs1 (a) and crhs2 (r)
=> lhs (r) == crhs2 (r) && rhs1 (a) == crhs1 (a)
if crhs1 (r) and crhs2 (a)
=> lhs (r) == crhs1 (r) && rhs1 (a) == crhs2 (a)
Bootstrapped and regtested on aarch64-unknown-linux-gnu.
Signed-off-by: Dhruv Chawla <dhr...@nvidia.com>
gcc/ChangeLog:
PR middle-end/116815
* tree-ssa-math-opts.cc (arith_overflow_check_p): Match min/max
patterns.
(build_minmax_replacement_statements): New function.
(match_arith_overflow): Update to handle min/max patterns.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/pr116815-1.c: New test.
* gcc.dg/tree-ssa/pr116815-2.c: Likewise.
* gcc.dg/tree-ssa/pr116815-3.c: Likewise.
* gcc.dg/tree-ssa/pr116815-4.c: Likewise.
---
gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c | 42 ++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c | 93 +++++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c | 43 ++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr116815-4.c | 50 +++++++++
gcc/tree-ssa-math-opts.cc | 116 +++++++++++++++++++--
5 files changed, 336 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-4.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c
new file mode 100644
index 00000000000..5d62843d63c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR middle-end/116815 */
+
+/* Single-use tests. */
+
+static inline unsigned
+max (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned
+min (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)
\
+ unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-tree-dump-not "MAX_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "MIN_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 8 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 4 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c
new file mode 100644
index 00000000000..56e8038ef82
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c
@@ -0,0 +1,93 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR middle-end/116815 */
+
+/* Negative tests. */
+
+static inline int
+smax (int a, int b)
+{
+ return a > b ? a : b;
+}
+
+static inline int
+smin (int a, int b)
+{
+ return a < b ? a : b;
+}
+
+static inline unsigned
+umax (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned
+umin (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+#define ASSUME(cond) if (!(cond)) __builtin_unreachable ();
+
+/* This transformation does not trigger on signed types. */
+
+int
+smax_add (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smax (a, a + b);
+}
+
+int
+smin_add (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smin (a, a + b);
+}
+
+int
+smax_sub (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smax (a, a - b);
+}
+
+int
+smin_sub (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smin (a, a - b);
+}
+
+/* Invalid patterns. */
+
+/* This can potentially be matched, but the RHS gets factored to
+ (a + b) * b. */
+unsigned
+umax_factored (unsigned a, unsigned b)
+{
+ return umax (a * b, a * b + b * b);
+}
+
+unsigned
+umin_mult (unsigned a, unsigned b)
+{
+ return umin (a, a * b);
+}
+
+unsigned
+umax_sub (unsigned a, unsigned b)
+{
+ return umax (a, b - a);
+}
+
+unsigned
+umin_sub (unsigned a, unsigned b)
+{
+ return umin (a, b - a);
+}
+
+/* { dg-final { scan-tree-dump-not "ADD_OVERFLOW" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "SUB_OVERFLOW" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c
new file mode 100644
index 00000000000..af1fe18d50a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR middle-end/116815 */
+
+/* Multi-use tests. */
+
+static inline unsigned
+max (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned
+min (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+unsigned
+umax_add_umin_add (unsigned a, unsigned b)
+{
+ return max (a, a + b) + min (a + b, b);
+}
+
+unsigned
+umin_add_umax_add (unsigned a, unsigned b)
+{
+ return min (a, b + a) + max (b + a, b);
+}
+
+unsigned
+multiple_paths (unsigned a, unsigned b)
+{
+ if (a > 5)
+ return max (a, a + b);
+ else
+ return min (a, a + b);
+}
+
+/* { dg-final { scan-tree-dump-not "MAX_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "MIN_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 3 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-4.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-4.c
new file mode 100644
index 00000000000..0f96a28fb3a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-4.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR middle-end/116815 */
+
+/* Single-use tests with a use of the min-max in an if-condition. */
+
+static inline unsigned
+max (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned
+min (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)
\
+ unsigned u##op##type##N (unsigned a, unsigned b, unsigned c, unsigned d,
\
+ unsigned e) \
+ {
\
+ unsigned result = op (exp1, exp2);
\
+ if (result == c || result == c * 2)
\
+ return d;
\
+ else
\
+ return e;
\
+ }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-tree-dump-not "MAX_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "MIN_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 8 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 4 "optimized" } } */
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 7e819f37446..9007c6aa9f9 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -3902,6 +3902,13 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt,
gimple *&use_stmt,
ccode = gimple_assign_rhs_code (cur_use_stmt);
crhs1 = gimple_assign_rhs1 (cur_use_stmt);
crhs2 = gimple_assign_rhs2 (cur_use_stmt);
+ /* Treat (MIN/MAX)_EXPR (a, b) as (a < b ? a : b) or (a > b ? a : b).
+ Choose the correct arm of the ccode switch based on the form being
+ matched, min/max(r, a) or min/max(a, r). These are independent of
+ whether MIN_EXPR or MAX_EXPR is being performed as this distinction
+ is handled by build_minmax_replacement_statements. */
+ if (ccode == MIN_EXPR || ccode == MAX_EXPR)
+ ccode = (lhs == crhs1) == (code == PLUS_EXPR) ? LT_EXPR : GT_EXPR;
}
else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
{
@@ -3981,8 +3988,8 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt,
gimple *&use_stmt,
return 1;
}
- if (TREE_CODE_CLASS (ccode) != tcc_comparison)
- return 0;
+ gcc_checking_assert (TREE_CODE_CLASS (ccode) == tcc_comparison
+ || TREE_CODE_CLASS (ccode) == tcc_binary);
switch (ccode)
{
@@ -4339,6 +4346,66 @@ match_saturation_trunc (gimple_stmt_iterator *gsi, gphi
*phi)
return true;
}
+/* Assume that ovf = overflow_flag (add/sub (...)).
+ The replacement forms are:
+ max (add) -> ovf ? a : a + b
+ min (sub) -> ovf ? a : a - b
+ max (sub) -> ovf ? a - b : a
+ min (add) -> ovf ? a + b : a. */
+
+static void
+build_minmax_replacement_statements (gimple *def_stmt, tree ovf,
+ tree realpart_lhs, tree type,
+ gimple *minmax_stmt)
+{
+ enum tree_code code = gimple_assign_rhs_code (def_stmt);
+ enum tree_code rhs_code = gimple_assign_rhs_code (minmax_stmt);
+ gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
+
+ tree lhs = gimple_assign_lhs (def_stmt);
+ tree rhs1 = gimple_assign_rhs1 (def_stmt);
+ tree rhs2 = gimple_assign_rhs2 (def_stmt);
+
+ tree use_rhs1 = gimple_assign_rhs1 (minmax_stmt);
+ tree use_rhs2 = gimple_assign_rhs2 (minmax_stmt);
+
+ /* First figure out which variable from def_stmt will be used in the
+ COND_EXPR. */
+ tree minmax_var = NULL_TREE;
+ /* Either max/min (a, add/sub (a, b)) or
+ max/min (add/sub (a, b), a). */
+ if ((lhs == use_rhs2 && use_rhs1 == rhs1)
+ || (lhs == use_rhs1 && use_rhs2 == rhs1))
+ minmax_var = rhs1;
+ /* Either max/min (a, add (b, a)) or
+ max/min (add (b, a), a). */
+ else if (code == PLUS_EXPR)
+ minmax_var = rhs2;
+
+ /* The above should always match rhs1 for MINUS_EXPR. */
+ gcc_checking_assert (
+ minmax_var != NULL_TREE
+ && (code == PLUS_EXPR || (use_rhs1 != rhs2 && use_rhs2 != rhs2)));
+
+ /* Figure out if we have to generate:
+ (ovf != 0 ? new_lhs : minmax_var) or
+ (ovf != 0 ? minmax_var : new_lhs) i.e. (ovf == 0 ? new_lhs :
minmax_var).
+ The default case is assumed to be the first one. */
+ bool flip = (rhs_code == MIN_EXPR) != (code == MINUS_EXPR);
+
+ /* Generate the actual code. */
+ tree comparison_result = make_ssa_name (boolean_type_node);
+ gimple *comparison_stmt
+ = gimple_build_assign (comparison_result, flip ? EQ_EXPR : NE_EXPR, ovf,
+ build_int_cst (type, 0));
+ gimple *new_minmax_stmt
+ = gimple_build_assign (gimple_assign_lhs (minmax_stmt), COND_EXPR,
+ comparison_result, minmax_var, realpart_lhs);
+ gimple_stmt_iterator gsi = gsi_for_stmt (minmax_stmt);
+ gsi_insert_before (&gsi, comparison_stmt, GSI_SAME_STMT);
+ gsi_replace (&gsi, new_minmax_stmt, false);
+}
+
/* Recognize for unsigned x
x = y - z;
if (x > y)
@@ -4391,7 +4458,19 @@ match_saturation_trunc (gimple_stmt_iterator *gsi, gphi
*phi)
z = IMAGPART_EXPR <_7>;
_8 = IMAGPART_EXPR <_7>;
_9 = _8 != 0;
- iftmp.0_3 = (int) _9; */
+ iftmp.0_3 = (int) _9;
+
+ And also recognize:
+ c = max/min (a, add/sub (a, b))
+ and replace it with
+ _7 = .(ADD|SUB)_OVERFLOW (a, b);
+ _8 = REALPART_EXPR <_7>;
+ _9 = IMAGPART_EXPR <_7>;
+ _10 = _9 != 0; (or _9 == 0)
+ _11 = _10 ? _8 : a;
+ c = _11;
+ This can be optimized to a single conditional select instruction with an
+ overflowing arithmetic instruction. */
static bool
match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
@@ -4425,6 +4504,7 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
tree rhs1 = gimple_assign_rhs1 (stmt);
tree rhs2 = gimple_assign_rhs2 (stmt);
+ bool minmax_use_seen = false;
FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
{
use_stmt = USE_STMT (use_p);
@@ -4445,6 +4525,11 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
return false;
cond_stmt = use_stmt;
}
+ if (gassign *assign = dyn_cast<gassign *> (use_stmt))
+ {
+ tree_code rhs_code = gimple_assign_rhs_code (assign);
+ minmax_use_seen |= (rhs_code == MAX_EXPR || rhs_code == MIN_EXPR);
+ }
ovf_use_seen = true;
}
else
@@ -4494,7 +4579,10 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
tree maxval = NULL_TREE;
if (!ovf_use_seen
- || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
+ || (code != MULT_EXPR
+ && (code == BIT_NOT_EXPR
+ ? use_seen
+ : !minmax_use_seen && !use_seen))
|| (code == PLUS_EXPR
&& optab_handler (uaddv4_optab,
TYPE_MODE (type)) == CODE_FOR_nothing)
@@ -4786,16 +4874,25 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
if (gimple_code (use_stmt) == GIMPLE_COND)
{
gcond *cond_stmt = as_a <gcond *> (use_stmt);
- gimple_cond_set_lhs (cond_stmt, ovf);
- gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
- gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
+ tree rhs = gimple_cond_rhs (cond_stmt);
+ if (TREE_CODE (rhs) == MIN_EXPR || TREE_CODE (rhs) == MAX_EXPR)
+ build_minmax_replacement_statements (stmt, ovf, new_lhs, type,
+ use_stmt);
+ else
+ {
+ gimple_cond_set_lhs (cond_stmt, ovf);
+ gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
+ gimple_cond_set_code (cond_stmt,
+ ovf_use == 1 ? NE_EXPR : EQ_EXPR);
+ }
}
else
{
gcc_checking_assert (is_gimple_assign (use_stmt));
if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
{
- if (gimple_assign_rhs_code (use_stmt) == RSHIFT_EXPR)
+ tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
+ if (rhs_code == RSHIFT_EXPR)
{
g2 = gimple_build_assign (make_ssa_name (boolean_type_node),
ovf_use == 1 ? NE_EXPR : EQ_EXPR,
@@ -4843,6 +4940,9 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
gsi_remove (&gsiu, true);
continue;
}
+ else if (rhs_code == MIN_EXPR || rhs_code == MAX_EXPR)
+ build_minmax_replacement_statements (stmt, ovf, new_lhs, type,
+ use_stmt);
else
{
gimple_assign_set_rhs1 (use_stmt, ovf);
--
2.44.0
Thanks,
Andrew
gimple_assign_set_rhs1 (use_stmt, cond);
}
}
--
2.44.0
--
Regards,
Dhruv
From a2b7772bf9a84de27a2886863adf6f3620696621 Mon Sep 17 00:00:00 2001
From: Dhruv Chawla <dhr...@nvidia.com>
Date: Mon, 26 May 2025 09:37:36 -0700
Subject: [PATCH] widening_mul: Make better use of overflowing operations in
codegen of min/max(a, add/sub(a, b))
This patch folds the following patterns:
- max (a, add (a, b)) -> [sum, ovf] = addo (a, b); !ovf ? sum : a
- max (a, sub (a, b)) -> [sum, ovf] = subo (a, b); !ovf ? a : sum
- min (a, add (a, b)) -> [sum, ovf] = addo (a, b); !ovf ? a : sum
- min (a, sub (a, b)) -> [sum, ovf] = addo (a, b); !ovf ? sum : a
Where ovf is the overflow flag, addo and subo are overflowing addition and
subtraction, respectively. The folded patterns can normally be implemented as
an overflowing operation combined with a conditional move/select instruction.
Explanation for the conditions handled in arith_overflow_check_p:
Case 1/2: r = a + b; max/min (a, r) or max/min (r, a)
lhs (r)
if crhs1 (a) and crhs2 (r)
=> lhs (r) == crhs2 (r) &&
(rhs1 (a or b) == crhs1 (a) || rhs2 (a or b) == crhs1 (a))
if crhs1 (r) and crhs2 (a)
=> lhs (r) == crhs1 (r) &&
(rhs1 (a or b) == crhs2 (a) || rhs2 (a or b) == crhs2 (a))
Both rhs1 and rhs2 are checked in (rhs<n> == crhs<n>) as addition is
commutative.
Case 3/4: r = a - b; max/min (a, r) or max/min (r, a)
lhs (r)
if crhs1 (a) and crhs2 (r)
=> lhs (r) == crhs2 (r) && rhs1 (a) == crhs1 (a)
if crhs1 (r) and crhs2 (a)
=> lhs (r) == crhs1 (r) && rhs1 (a) == crhs2 (a)
Bootstrapped and regtested on aarch64-unknown-linux-gnu.
Signed-off-by: Dhruv Chawla <dhr...@nvidia.com>
gcc/ChangeLog:
PR middle-end/116815
* tree-ssa-math-opts.cc (arith_overflow_check_p): Match min/max
patterns.
(build_minmax_replacement_statements): New function.
(match_arith_overflow): Update to handle min/max patterns.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/pr116815-1.c: New test.
* gcc.dg/tree-ssa/pr116815-2.c: Likewise.
* gcc.dg/tree-ssa/pr116815-3.c: Likewise.
* gcc.dg/tree-ssa/pr116815-4.c: Likewise.
---
gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c | 42 ++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c | 93 +++++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c | 43 ++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr116815-4.c | 50 +++++++++
gcc/tree-ssa-math-opts.cc | 116 +++++++++++++++++++--
5 files changed, 336 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-4.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c
new file mode 100644
index 00000000000..5d62843d63c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR middle-end/116815 */
+
+/* Single-use tests. */
+
+static inline unsigned
+max (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned
+min (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)
\
+ unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-tree-dump-not "MAX_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "MIN_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 8 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 4 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c
new file mode 100644
index 00000000000..56e8038ef82
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c
@@ -0,0 +1,93 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR middle-end/116815 */
+
+/* Negative tests. */
+
+static inline int
+smax (int a, int b)
+{
+ return a > b ? a : b;
+}
+
+static inline int
+smin (int a, int b)
+{
+ return a < b ? a : b;
+}
+
+static inline unsigned
+umax (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned
+umin (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+#define ASSUME(cond) if (!(cond)) __builtin_unreachable ();
+
+/* This transformation does not trigger on signed types. */
+
+int
+smax_add (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smax (a, a + b);
+}
+
+int
+smin_add (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smin (a, a + b);
+}
+
+int
+smax_sub (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smax (a, a - b);
+}
+
+int
+smin_sub (int a, int b)
+{
+ ASSUME (b >= 0);
+ return smin (a, a - b);
+}
+
+/* Invalid patterns. */
+
+/* This can potentially be matched, but the RHS gets factored to
+ (a + b) * b. */
+unsigned
+umax_factored (unsigned a, unsigned b)
+{
+ return umax (a * b, a * b + b * b);
+}
+
+unsigned
+umin_mult (unsigned a, unsigned b)
+{
+ return umin (a, a * b);
+}
+
+unsigned
+umax_sub (unsigned a, unsigned b)
+{
+ return umax (a, b - a);
+}
+
+unsigned
+umin_sub (unsigned a, unsigned b)
+{
+ return umin (a, b - a);
+}
+
+/* { dg-final { scan-tree-dump-not "ADD_OVERFLOW" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "SUB_OVERFLOW" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c
new file mode 100644
index 00000000000..af1fe18d50a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR middle-end/116815 */
+
+/* Multi-use tests. */
+
+static inline unsigned
+max (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned
+min (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+unsigned
+umax_add_umin_add (unsigned a, unsigned b)
+{
+ return max (a, a + b) + min (a + b, b);
+}
+
+unsigned
+umin_add_umax_add (unsigned a, unsigned b)
+{
+ return min (a, b + a) + max (b + a, b);
+}
+
+unsigned
+multiple_paths (unsigned a, unsigned b)
+{
+ if (a > 5)
+ return max (a, a + b);
+ else
+ return min (a, a + b);
+}
+
+/* { dg-final { scan-tree-dump-not "MAX_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "MIN_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 3 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-4.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-4.c
new file mode 100644
index 00000000000..0f96a28fb3a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-4.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR middle-end/116815 */
+
+/* Single-use tests with a use of the min-max in an if-condition. */
+
+static inline unsigned
+max (unsigned a, unsigned b)
+{
+ return a > b ? a : b;
+}
+
+static inline unsigned
+min (unsigned a, unsigned b)
+{
+ return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)
\
+ unsigned u##op##type##N (unsigned a, unsigned b, unsigned c, unsigned d,
\
+ unsigned e) \
+ {
\
+ unsigned result = op (exp1, exp2);
\
+ if (result == c || result == c * 2)
\
+ return d;
\
+ else
\
+ return e;
\
+ }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-tree-dump-not "MAX_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "MIN_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 8 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 4 "optimized" } } */
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 7e819f37446..9007c6aa9f9 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -3902,6 +3902,13 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt,
gimple *&use_stmt,
ccode = gimple_assign_rhs_code (cur_use_stmt);
crhs1 = gimple_assign_rhs1 (cur_use_stmt);
crhs2 = gimple_assign_rhs2 (cur_use_stmt);
+ /* Treat (MIN/MAX)_EXPR (a, b) as (a < b ? a : b) or (a > b ? a : b).
+ Choose the correct arm of the ccode switch based on the form being
+ matched, min/max(r, a) or min/max(a, r). These are independent of
+ whether MIN_EXPR or MAX_EXPR is being performed as this distinction
+ is handled by build_minmax_replacement_statements. */
+ if (ccode == MIN_EXPR || ccode == MAX_EXPR)
+ ccode = (lhs == crhs1) == (code == PLUS_EXPR) ? LT_EXPR : GT_EXPR;
}
else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
{
@@ -3981,8 +3988,8 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt,
gimple *&use_stmt,
return 1;
}
- if (TREE_CODE_CLASS (ccode) != tcc_comparison)
- return 0;
+ gcc_checking_assert (TREE_CODE_CLASS (ccode) == tcc_comparison
+ || TREE_CODE_CLASS (ccode) == tcc_binary);
switch (ccode)
{
@@ -4339,6 +4346,66 @@ match_saturation_trunc (gimple_stmt_iterator *gsi, gphi
*phi)
return true;
}
+/* Assume that ovf = overflow_flag (add/sub (...)).
+ The replacement forms are:
+ max (add) -> ovf ? a : a + b
+ min (sub) -> ovf ? a : a - b
+ max (sub) -> ovf ? a - b : a
+ min (add) -> ovf ? a + b : a. */
+
+static void
+build_minmax_replacement_statements (gimple *def_stmt, tree ovf,
+ tree realpart_lhs, tree type,
+ gimple *minmax_stmt)
+{
+ enum tree_code code = gimple_assign_rhs_code (def_stmt);
+ enum tree_code rhs_code = gimple_assign_rhs_code (minmax_stmt);
+ gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
+
+ tree lhs = gimple_assign_lhs (def_stmt);
+ tree rhs1 = gimple_assign_rhs1 (def_stmt);
+ tree rhs2 = gimple_assign_rhs2 (def_stmt);
+
+ tree use_rhs1 = gimple_assign_rhs1 (minmax_stmt);
+ tree use_rhs2 = gimple_assign_rhs2 (minmax_stmt);
+
+ /* First figure out which variable from def_stmt will be used in the
+ COND_EXPR. */
+ tree minmax_var = NULL_TREE;
+ /* Either max/min (a, add/sub (a, b)) or
+ max/min (add/sub (a, b), a). */
+ if ((lhs == use_rhs2 && use_rhs1 == rhs1)
+ || (lhs == use_rhs1 && use_rhs2 == rhs1))
+ minmax_var = rhs1;
+ /* Either max/min (a, add (b, a)) or
+ max/min (add (b, a), a). */
+ else if (code == PLUS_EXPR)
+ minmax_var = rhs2;
+
+ /* The above should always match rhs1 for MINUS_EXPR. */
+ gcc_checking_assert (
+ minmax_var != NULL_TREE
+ && (code == PLUS_EXPR || (use_rhs1 != rhs2 && use_rhs2 != rhs2)));
+
+ /* Figure out if we have to generate:
+ (ovf != 0 ? new_lhs : minmax_var) or
+ (ovf != 0 ? minmax_var : new_lhs) i.e. (ovf == 0 ? new_lhs :
minmax_var).
+ The default case is assumed to be the first one. */
+ bool flip = (rhs_code == MIN_EXPR) != (code == MINUS_EXPR);
+
+ /* Generate the actual code. */
+ tree comparison_result = make_ssa_name (boolean_type_node);
+ gimple *comparison_stmt
+ = gimple_build_assign (comparison_result, flip ? EQ_EXPR : NE_EXPR, ovf,
+ build_int_cst (type, 0));
+ gimple *new_minmax_stmt
+ = gimple_build_assign (gimple_assign_lhs (minmax_stmt), COND_EXPR,
+ comparison_result, minmax_var, realpart_lhs);
+ gimple_stmt_iterator gsi = gsi_for_stmt (minmax_stmt);
+ gsi_insert_before (&gsi, comparison_stmt, GSI_SAME_STMT);
+ gsi_replace (&gsi, new_minmax_stmt, false);
+}
+
/* Recognize for unsigned x
x = y - z;
if (x > y)
@@ -4391,7 +4458,19 @@ match_saturation_trunc (gimple_stmt_iterator *gsi, gphi
*phi)
z = IMAGPART_EXPR <_7>;
_8 = IMAGPART_EXPR <_7>;
_9 = _8 != 0;
- iftmp.0_3 = (int) _9; */
+ iftmp.0_3 = (int) _9;
+
+ And also recognize:
+ c = max/min (a, add/sub (a, b))
+ and replace it with
+ _7 = .(ADD|SUB)_OVERFLOW (a, b);
+ _8 = REALPART_EXPR <_7>;
+ _9 = IMAGPART_EXPR <_7>;
+ _10 = _9 != 0; (or _9 == 0)
+ _11 = _10 ? _8 : a;
+ c = _11;
+ This can be optimized to a single conditional select instruction with an
+ overflowing arithmetic instruction. */
static bool
match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
@@ -4425,6 +4504,7 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
tree rhs1 = gimple_assign_rhs1 (stmt);
tree rhs2 = gimple_assign_rhs2 (stmt);
+ bool minmax_use_seen = false;
FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
{
use_stmt = USE_STMT (use_p);
@@ -4445,6 +4525,11 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
return false;
cond_stmt = use_stmt;
}
+ if (gassign *assign = dyn_cast<gassign *> (use_stmt))
+ {
+ tree_code rhs_code = gimple_assign_rhs_code (assign);
+ minmax_use_seen |= (rhs_code == MAX_EXPR || rhs_code == MIN_EXPR);
+ }
ovf_use_seen = true;
}
else
@@ -4494,7 +4579,10 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
tree maxval = NULL_TREE;
if (!ovf_use_seen
- || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
+ || (code != MULT_EXPR
+ && (code == BIT_NOT_EXPR
+ ? use_seen
+ : !minmax_use_seen && !use_seen))
|| (code == PLUS_EXPR
&& optab_handler (uaddv4_optab,
TYPE_MODE (type)) == CODE_FOR_nothing)
@@ -4786,16 +4874,25 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
if (gimple_code (use_stmt) == GIMPLE_COND)
{
gcond *cond_stmt = as_a <gcond *> (use_stmt);
- gimple_cond_set_lhs (cond_stmt, ovf);
- gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
- gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
+ tree rhs = gimple_cond_rhs (cond_stmt);
+ if (TREE_CODE (rhs) == MIN_EXPR || TREE_CODE (rhs) == MAX_EXPR)
+ build_minmax_replacement_statements (stmt, ovf, new_lhs, type,
+ use_stmt);
+ else
+ {
+ gimple_cond_set_lhs (cond_stmt, ovf);
+ gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
+ gimple_cond_set_code (cond_stmt,
+ ovf_use == 1 ? NE_EXPR : EQ_EXPR);
+ }
}
else
{
gcc_checking_assert (is_gimple_assign (use_stmt));
if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
{
- if (gimple_assign_rhs_code (use_stmt) == RSHIFT_EXPR)
+ tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
+ if (rhs_code == RSHIFT_EXPR)
{
g2 = gimple_build_assign (make_ssa_name (boolean_type_node),
ovf_use == 1 ? NE_EXPR : EQ_EXPR,
@@ -4843,6 +4940,9 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple
*stmt,
gsi_remove (&gsiu, true);
continue;
}
+ else if (rhs_code == MIN_EXPR || rhs_code == MAX_EXPR)
+ build_minmax_replacement_statements (stmt, ovf, new_lhs, type,
+ use_stmt);
else
{
gimple_assign_set_rhs1 (use_stmt, ovf);
--
2.44.0