From: Dhruv Chawla <dhr...@nvidia.com>

This patch folds the following patterns:
- max (a, add (a, b)) -> [sum, ovf] = addo (a, b); !ovf ? sum : a
- max (a, sub (a, b)) -> [sum, ovf] = subo (a, b); !ovf ? a : sum
- min (a, add (a, b)) -> [sum, ovf] = addo (a, b); !ovf ? a : sum
- min (a, sub (a, b)) -> [sum, ovf] = addo (a, b); !ovf ? sum : a

Where ovf is the overflow flag, addo and subo are overflowing addition and
subtraction, respectively. The folded patterns can normally be implemented as
an overflowing operation combined with a conditional move/select instruction.

Explanation for the conditions handled in arith_overflow_check_p:

Case 1/2: r = a + b; max/min (a, r) or max/min (r, a)
  lhs (r)
    if crhs1 (a) and crhs2 (r)
      => lhs (r) == crhs2 (r) &&
         (rhs1 (a or b) == crhs1 (a) || rhs2 (a or b) == crhs1 (a))
    if crhs1 (r) and crhs2 (a)
      => lhs (r) == crhs1 (r) &&
         (rhs1 (a or b) == crhs2 (a) || rhs2 (a or b) == crhs2 (a))

Both rhs1 and rhs2 are checked in (rhs<n> == crhs<n>) as addition is
commutative.

Case 3/4: r = a - b; max/min (a, r) or max/min (r, a)
  lhs (r)
    if crhs1 (a) and crhs2 (r)
      => lhs (r) == crhs2 (r) && rhs1 (a) == crhs1 (a)
    if crhs1 (r) and crhs2 (a)
      => lhs (r) == crhs1 (r) && rhs1 (a) == crhs2 (a)

Bootstrapped and regtested on aarch64-unknown-linux-gnu.

Signed-off-by: Dhruv Chawla <dhr...@nvidia.com>

gcc/ChangeLog:

        PR middle-end/116815
        * tree-ssa-math-opts.cc (arith_overflow_check_p): Match min/max
        patterns.
        (build_minmax_replacement_statements): New function.
        (match_arith_overflow): Update to handle min/max patterns.

gcc/testsuite/ChangeLog:

        * gcc.dg/tree-ssa/pr116815-1.c: New test.
        * gcc.dg/tree-ssa/pr116815-2.c: Likewise.
        * gcc.dg/tree-ssa/pr116815-3.c: Likewise.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c |  42 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c |  93 +++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c |  43 ++++++
 gcc/tree-ssa-math-opts.cc                  | 151 +++++++++++++++++++--
 4 files changed, 318 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c
new file mode 100644
index 00000000000..5d62843d63c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-1.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR middle-end/116815 */
+
+/* Single-use tests.  */
+
+static inline unsigned
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)                                     
\
+  unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-tree-dump-not "MAX_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "MIN_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 8 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 4 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c
new file mode 100644
index 00000000000..56e8038ef82
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-2.c
@@ -0,0 +1,93 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR middle-end/116815 */
+
+/* Negative tests.  */
+
+static inline int
+smax (int a, int b)
+{
+  return a > b ? a : b;
+}
+
+static inline int
+smin (int a, int b)
+{
+  return a < b ? a : b;
+}
+
+static inline unsigned
+umax (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned
+umin (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+#define ASSUME(cond) if (!(cond)) __builtin_unreachable ();
+
+/* This transformation does not trigger on signed types.  */
+
+int
+smax_add (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smax (a, a + b);
+}
+
+int
+smin_add (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smin (a, a + b);
+}
+
+int
+smax_sub (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smax (a, a - b);
+}
+
+int
+smin_sub (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smin (a, a - b);
+}
+
+/* Invalid patterns.  */
+
+/* This can potentially be matched, but the RHS gets factored to
+   (a + b) * b.  */
+unsigned
+umax_factored (unsigned a, unsigned b)
+{
+  return umax (a * b, a * b + b * b);
+}
+
+unsigned
+umin_mult (unsigned a, unsigned b)
+{
+  return umin (a, a * b);
+}
+
+unsigned
+umax_sub (unsigned a, unsigned b)
+{
+  return umax (a, b - a);
+}
+
+unsigned
+umin_sub (unsigned a, unsigned b)
+{
+  return umin (a, b - a);
+}
+
+/* { dg-final { scan-tree-dump-not "ADD_OVERFLOW" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "SUB_OVERFLOW" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c
new file mode 100644
index 00000000000..af1fe18d50a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr116815-3.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* PR middle-end/116815 */
+
+/* Multi-use tests.  */
+
+static inline unsigned
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+unsigned
+umax_add_umin_add (unsigned a, unsigned b)
+{
+  return max (a, a + b) + min (a + b, b);
+}
+
+unsigned
+umin_add_umax_add (unsigned a, unsigned b)
+{
+  return min (a, b + a) + max (b + a, b);
+}
+
+unsigned
+multiple_paths (unsigned a, unsigned b)
+{
+  if (a > 5)
+    return max (a, a + b);
+  else
+    return min (a, a + b);
+}
+
+/* { dg-final { scan-tree-dump-not "MAX_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "MIN_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 3 "optimized" } } */
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 7e819f37446..f08cac68ca7 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -3981,11 +3981,26 @@ arith_overflow_check_p (gimple *stmt, gimple 
*cast_stmt, gimple *&use_stmt,
       return 1;
     }
 
-  if (TREE_CODE_CLASS (ccode) != tcc_comparison)
+  if (TREE_CODE_CLASS (ccode) != tcc_comparison
+      && TREE_CODE_CLASS (ccode) != tcc_binary)
     return 0;
 
   switch (ccode)
     {
+    case MAX_EXPR:
+    case MIN_EXPR:
+      /* 1. r = a + b; max (a, r) or max (r, a)
+        2. r = a + b; min (a, r) or min (r, a)
+        3. r = a - b; max (a, r) or max (r, a)
+        4. r = a - b; min (a, r) or min (r, a)  */
+      if ((code == PLUS_EXPR
+          && ((lhs == crhs1 && (rhs1 == crhs2 || rhs2 == crhs2))
+              || (lhs == crhs2 && (rhs1 == crhs1 || rhs2 == crhs2))))
+         || (code == MINUS_EXPR
+             && ((lhs == crhs1 && rhs1 == crhs2)
+                 || (lhs == crhs2 && rhs1 == crhs1))))
+       return 1;
+      break;
     case GT_EXPR:
     case LE_EXPR:
       if (maxval)
@@ -4339,6 +4354,73 @@ match_saturation_trunc (gimple_stmt_iterator *gsi, gphi 
*phi)
   return true;
 }
 
+/* Assume that ovf = overflow_flag (add/sub (...)).
+   The replacement forms are:
+     max (add) -> ovf ? a : a + b
+     min (sub) -> ovf ? a : a - b
+     max (sub) -> ovf ? a - b : a
+     min (add) -> ovf ? a + b : a.  */
+
+static tree
+build_minmax_replacement_statements (gimple *def_stmt, tree ovf, tree new_lhs,
+                                    tree type, gimple *minmax_stmt)
+{
+  enum tree_code code = gimple_assign_rhs_code (def_stmt);
+  enum tree_code rhs_code = gimple_assign_rhs_code (minmax_stmt);
+  gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
+
+  tree lhs = gimple_assign_lhs (def_stmt);
+  tree rhs1 = gimple_assign_rhs1 (def_stmt);
+  tree rhs2 = gimple_assign_rhs2 (def_stmt);
+
+  tree use_rhs1 = gimple_assign_rhs1 (minmax_stmt);
+  tree use_rhs2 = gimple_assign_rhs2 (minmax_stmt);
+
+  /* First figure out which variable from def_stmt will be used in the
+     COND_EXPR.  */
+  tree minmax_var = NULL_TREE;
+  /* Either max/min (a, add/sub (a, b)) or
+           max/min (add/sub (a, b), a).  */
+  if ((lhs == use_rhs2 && use_rhs1 == rhs1)
+      || (lhs == use_rhs1 && use_rhs2 == rhs1))
+    minmax_var = rhs1;
+  /* Either max/min (a, add (b, a)) or
+           max/min (add (b, a), a).  */
+  else if (code == PLUS_EXPR)
+    minmax_var = rhs2;
+
+  /* The above should always match rhs1 for MINUS_EXPR.  */
+  gcc_checking_assert (
+    minmax_var != NULL_TREE
+    && (code == PLUS_EXPR || (use_rhs1 != rhs2 && use_rhs2 != rhs2)));
+
+  /* Figure out if we have to generate:
+       (ovf != 0 ? new_lhs : minmax_var) or
+       (ovf != 0 ? minmax_var : new_lhs) i.e. (ovf == 0 ? new_lhs : 
minmax_var).
+     The default case is assumed to be the first one.  */
+  bool flip = false;
+  if ((rhs_code == MIN_EXPR && code == PLUS_EXPR)
+      || (rhs_code == MAX_EXPR && code == MINUS_EXPR))
+    flip = true;
+
+  /* Generate the actual code.  */
+  tree minmax = make_ssa_name (type);
+  tree comparison_result = make_ssa_name (boolean_type_node);
+  tree comparison_expr = build2 (flip ? EQ_EXPR : NE_EXPR, boolean_type_node,
+                                ovf, build_int_cst (type, 0));
+  gimple *comparison_stmt
+    = gimple_build_assign (comparison_result, comparison_expr);
+
+  tree conditional
+    = build3 (COND_EXPR, type, comparison_result, minmax_var, new_lhs);
+  gimple *new_minmax_stmt = gimple_build_assign (minmax, conditional);
+  gimple_stmt_iterator gsi = gsi_for_stmt (minmax_stmt);
+  gsi_insert_before (&gsi, comparison_stmt, GSI_NEW_STMT);
+  gsi_insert_after (&gsi, new_minmax_stmt, GSI_NEW_STMT);
+
+  return minmax;
+}
+
 /* Recognize for unsigned x
    x = y - z;
    if (x > y)
@@ -4391,7 +4473,19 @@ match_saturation_trunc (gimple_stmt_iterator *gsi, gphi 
*phi)
    z = IMAGPART_EXPR <_7>;
    _8 = IMAGPART_EXPR <_7>;
    _9 = _8 != 0;
-   iftmp.0_3 = (int) _9;  */
+   iftmp.0_3 = (int) _9;
+
+   And also recognize:
+   c = max/min (a, add/sub (a, b))
+   and replace it with
+   _7 = .(ADD|SUB)_OVERFLOW (a, b);
+   _8 = REALPART_EXPR <_7>;
+   _9 = IMAGPART_EXPR <_7>;
+   _10 = _9 != 0; (or _9 == 0)
+   _11 = _10 ? _8 : a;
+   c = _11;
+   This can be optimized to a single conditional select instruction with an
+   overflowing arithmetic instruction.  */
 
 static bool
 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
@@ -4425,6 +4519,7 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple 
*stmt,
 
   tree rhs1 = gimple_assign_rhs1 (stmt);
   tree rhs2 = gimple_assign_rhs2 (stmt);
+  bool minmax_use_seen = false;
   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
     {
       use_stmt = USE_STMT (use_p);
@@ -4445,6 +4540,13 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple 
*stmt,
                return false;
              cond_stmt = use_stmt;
            }
+         if (gimple_code (use_stmt) == GIMPLE_ASSIGN
+             && gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
+           {
+             tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
+             if (rhs_code == MAX_EXPR || rhs_code == MIN_EXPR)
+               minmax_use_seen = true;
+           }
          ovf_use_seen = true;
        }
       else
@@ -4494,7 +4596,10 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple 
*stmt,
 
   tree maxval = NULL_TREE;
   if (!ovf_use_seen
-      || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
+      || (code != MULT_EXPR
+         && (code == BIT_NOT_EXPR
+               ? use_seen
+               : !minmax_use_seen && !use_seen))
       || (code == PLUS_EXPR
          && optab_handler (uaddv4_optab,
                            TYPE_MODE (type)) == CODE_FOR_nothing)
@@ -4758,6 +4863,7 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple 
*stmt,
     gsi_insert_after (gsi, g2, GSI_NEW_STMT);
   else
     gsi_insert_before (gsi, g2, GSI_SAME_STMT);
+
   if (code == MULT_EXPR)
     mul_stmts.quick_push (g2);
 
@@ -4786,15 +4892,25 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple 
*stmt,
       if (gimple_code (use_stmt) == GIMPLE_COND)
        {
          gcond *cond_stmt = as_a <gcond *> (use_stmt);
-         gimple_cond_set_lhs (cond_stmt, ovf);
-         gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
-         gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
+         tree rhs = gimple_cond_rhs (cond_stmt);
+         if (TREE_CODE (rhs) == MIN_EXPR || TREE_CODE (rhs) == MAX_EXPR)
+           gimple_cond_set_rhs (cond_stmt,
+                                build_minmax_replacement_statements (
+                                  stmt, ovf, new_lhs, type, use_stmt));
+         else
+           {
+             gimple_cond_set_lhs (cond_stmt, ovf);
+             gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
+             gimple_cond_set_code (cond_stmt,
+                                   ovf_use == 1 ? NE_EXPR : EQ_EXPR);
+           }
        }
       else
        {
          gcc_checking_assert (is_gimple_assign (use_stmt));
          if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
            {
+             tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
              if (gimple_assign_rhs_code (use_stmt) == RSHIFT_EXPR)
                {
                  g2 = gimple_build_assign (make_ssa_name (boolean_type_node),
@@ -4843,6 +4959,14 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple 
*stmt,
                  gsi_remove (&gsiu, true);
                  continue;
                }
+             else if (rhs_code == MIN_EXPR || rhs_code == MAX_EXPR)
+               {
+                 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
+                 gimple_assign_set_rhs_from_tree (
+                   &gsi,
+                   build_minmax_replacement_statements (stmt, ovf, new_lhs,
+                                                        type, use_stmt));
+               }
              else
                {
                  gimple_assign_set_rhs1 (use_stmt, ovf);
@@ -4854,11 +4978,16 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple 
*stmt,
            }
          else
            {
-             gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
-                                  == COND_EXPR);
-             tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
-                                 boolean_type_node, ovf,
-                                 build_int_cst (type, 0));
+             tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
+             gcc_checking_assert (rhs_code == COND_EXPR || rhs_code == MAX_EXPR
+                                  || rhs_code == MIN_EXPR);
+             tree cond = NULL_TREE;
+             if (rhs_code != COND_EXPR)
+               cond = build_minmax_replacement_statements (stmt, ovf, new_lhs,
+                                                           type, use_stmt);
+             else
+               cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
+                              boolean_type_node, ovf, build_int_cst (type, 0));
              gimple_assign_set_rhs1 (use_stmt, cond);
            }
        }
-- 
2.44.0

Reply via email to