On December 12, 2020 9:01:50 AM GMT+01:00, Jakub Jelinek <ja...@redhat.com> 
wrote:
>Hi!
>
>The following patch recognizes another form of hand written
>__builtin_add_overflow (this time _p), in particular when
>the code does unsigned
>if (x > ~0U - y)
>or
>if (x <= ~0U - y)
>it can be optimized (if the subtraction turned into ~y is single use)
>into
>if (__builtin_add_overflow_p (x, y, 0U))
>or
>if (!__builtin_add_overflow_p (x, y, 0U))
>and generate better code, e.g. for the first function in the testcase:
>-      movl    %esi, %eax
>       addl    %edi, %esi
>-      notl    %eax
>-      cmpl    %edi, %eax
>-      movl    $-1, %eax
>-      cmovnb  %esi, %eax
>+      jc      .L3
>+      movl    %esi, %eax
>+      ret
>+.L3:
>+      orl     $-1, %eax
>       ret
>on x86_64.  As for the jumps vs. conditional move case, that is some CE
>issue with complex branch patterns we should fix up no matter what, but
>in this case I'm actually not sure if branchy code isn't better,
>overflow
>is something that isn't that common.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok. 

Richard. 

>2020-12-12  Jakub Jelinek  <ja...@redhat.com>
>
>       PR tree-optimization/96272
>       * tree-ssa-math-opts.c (uaddsub_overflow_check_p): Add OTHER argument.
>       Handle BIT_NOT_EXPR.
>       (match_uaddsub_overflow): Optimize unsigned a > ~b into
>       __imag__ .ADD_OVERFLOW (a, b).
>       (math_opts_dom_walker::after_dom_children): Call
>match_uaddsub_overflow
>       even for BIT_NOT_EXPR.
>
>       * gcc.dg/tree-ssa/pr96272.c: New test.
>
>--- gcc/tree-ssa-math-opts.c.jj        2020-11-22 19:16:25.302440812 +0100
>+++ gcc/tree-ssa-math-opts.c   2020-12-11 16:19:17.314846597 +0100
>@@ -3457,7 +3457,8 @@ convert_mult_to_fma (gimple *mul_stmt, t
>    and 0 otherwise.  */
> 
> static int
>-uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt, tree maxval)
>+uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt, tree maxval,
>+                        tree *other)
> {
>   enum tree_code ccode = ERROR_MARK;
>   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
>@@ -3520,6 +3521,13 @@ uaddsub_overflow_check_p (gimple *stmt,
>         || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
>             && crhs2 == lhs))
>       return ccode == GT_EXPR ? 1 : -1;
>+      /* r = ~a; b > r or b <= r.  */
>+      if (code == BIT_NOT_EXPR && crhs2 == lhs)
>+      {
>+        if (other)
>+          *other = crhs1;
>+        return ccode == GT_EXPR ? 1 : -1;
>+      }
>       break;
>     case LT_EXPR:
>     case GE_EXPR:
>@@ -3531,6 +3539,13 @@ uaddsub_overflow_check_p (gimple *stmt,
>         || (code == PLUS_EXPR && crhs1 == lhs
>             && (crhs2 == rhs1 || crhs2 == rhs2)))
>       return ccode == LT_EXPR ? 1 : -1;
>+      /* r = ~a; r < b or r >= b.  */
>+      if (code == BIT_NOT_EXPR && crhs1 == lhs)
>+      {
>+        if (other)
>+          *other = crhs2;
>+        return ccode == LT_EXPR ? 1 : -1;
>+      }
>       break;
>     default:
>       break;
>@@ -3560,7 +3575,15 @@ uaddsub_overflow_check_p (gimple *stmt,
>    _9 = REALPART_EXPR <_7>;
>    _8 = IMAGPART_EXPR <_8>;
>    if (_8)
>-   and replace (utype) x with _9.  */
>+   and replace (utype) x with _9.
>+
>+   Also recognize:
>+   x = ~z;
>+   if (y > x)
>+   and replace it with
>+   _7 = ADD_OVERFLOW (y, z);
>+   _8 = IMAGPART_EXPR <_8>;
>+   if (_8)  */
> 
> static bool
> match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
>@@ -3576,34 +3599,49 @@ match_uaddsub_overflow (gimple_stmt_iter
>   gimple *add_stmt = NULL;
>   bool add_first = false;
> 
>-  gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
>+  gcc_checking_assert (code == PLUS_EXPR
>+                     || code == MINUS_EXPR
>+                     || code == BIT_NOT_EXPR);
>   if (!INTEGRAL_TYPE_P (type)
>       || !TYPE_UNSIGNED (type)
>       || has_zero_uses (lhs)
>-      || (code == MINUS_EXPR
>-        && optab_handler (usubv4_optab,
>+      || (code != PLUS_EXPR
>+        && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
>                           TYPE_MODE (type)) == CODE_FOR_nothing))
>     return false;
> 
>+  tree rhs1 = gimple_assign_rhs1 (stmt);
>+  tree rhs2 = gimple_assign_rhs2 (stmt);
>   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
>     {
>       use_stmt = USE_STMT (use_p);
>       if (is_gimple_debug (use_stmt))
>       continue;
> 
>-      if (uaddsub_overflow_check_p (stmt, use_stmt, NULL_TREE))
>-      ovf_use_seen = true;
>+      tree other = NULL_TREE;
>+      if (uaddsub_overflow_check_p (stmt, use_stmt, NULL_TREE,
>&other))
>+      {
>+        if (code == BIT_NOT_EXPR)
>+          {
>+            gcc_assert (other);
>+            if (TREE_CODE (other) != SSA_NAME)
>+              return false;
>+            if (rhs2 == NULL)
>+              rhs2 = other;
>+            else if (rhs2 != other)
>+              return false;
>+          }
>+        ovf_use_seen = true;
>+      }
>       else
>       use_seen = true;
>       if (ovf_use_seen && use_seen)
>       break;
>     }
> 
>-  tree rhs1 = gimple_assign_rhs1 (stmt);
>-  tree rhs2 = gimple_assign_rhs2 (stmt);
>   tree maxval = NULL_TREE;
>   if (!ovf_use_seen
>-      || !use_seen
>+      || (code == BIT_NOT_EXPR ? use_seen : !use_seen)
>       || (code == PLUS_EXPR
>         && optab_handler (uaddv4_optab,
>                           TYPE_MODE (type)) == CODE_FOR_nothing))
>@@ -3664,7 +3702,7 @@ match_uaddsub_overflow (gimple_stmt_iter
>         if (is_gimple_debug (use_stmt))
>           continue;
> 
>-        if (uaddsub_overflow_check_p (stmt, use_stmt, maxval))
>+        if (uaddsub_overflow_check_p (stmt, use_stmt, maxval, NULL))
>           {
>             ovf_use_seen = true;
>             use_bb = gimple_bb (use_stmt);
>@@ -3781,23 +3819,27 @@ match_uaddsub_overflow (gimple_stmt_iter
>     }
> 
>   tree ctype = build_complex_type (type);
>-  gcall *g = gimple_build_call_internal (code == PLUS_EXPR
>+  gcall *g = gimple_build_call_internal (code != MINUS_EXPR
>                                        ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
>                                        2, rhs1, rhs2);
>   tree ctmp = make_ssa_name (ctype);
>   gimple_call_set_lhs (g, ctmp);
>   gsi_insert_before (gsi, g, GSI_SAME_STMT);
>   tree new_lhs = maxval ? make_ssa_name (type) : lhs;
>-  gassign *g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
>-                                   build1 (REALPART_EXPR, type, ctmp));
>-  if (maxval)
>+  gassign *g2;
>+  if (code != BIT_NOT_EXPR)
>     {
>-      gsi_insert_before (gsi, g2, GSI_SAME_STMT);
>-      if (add_first)
>-      *gsi = gsi_for_stmt (stmt);
>+      g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
>+                              build1 (REALPART_EXPR, type, ctmp));
>+      if (maxval)
>+      {
>+        gsi_insert_before (gsi, g2, GSI_SAME_STMT);
>+        if (add_first)
>+          *gsi = gsi_for_stmt (stmt);
>+      }
>+      else
>+      gsi_replace (gsi, g2, true);
>     }
>-  else
>-    gsi_replace (gsi, g2, true);
>   tree ovf = make_ssa_name (type);
>   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
>                           build1 (IMAGPART_EXPR, type, ctmp));
>@@ -3808,9 +3850,10 @@ match_uaddsub_overflow (gimple_stmt_iter
>       if (is_gimple_debug (use_stmt))
>       continue;
> 
>-      int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt, maxval);
>+      int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt, maxval,
>NULL);
>       if (ovf_use == 0)
>       {
>+        gcc_assert (code != BIT_NOT_EXPR);
>         if (maxval)
>           {
>             tree use_lhs = gimple_assign_lhs (use_stmt);
>@@ -3863,6 +3906,12 @@ match_uaddsub_overflow (gimple_stmt_iter
>         gsi_replace (&gsi2, g, true);
>       }
>     }
>+  else if (code == BIT_NOT_EXPR)
>+    {
>+      gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
>+      gsi_remove (&gsi2, true);
>+      release_ssa_name (lhs);
>+    }
>   return true;
> }
> 
>@@ -4188,6 +4237,10 @@ math_opts_dom_walker::after_dom_children
>               match_uaddsub_overflow (&gsi, stmt, code);
>             break;
> 
>+          case BIT_NOT_EXPR:
>+            match_uaddsub_overflow (&gsi, stmt, code);
>+            break;
>+
>           case TRUNC_MOD_EXPR:
>             convert_to_divmod (as_a<gassign *> (stmt));
>             break;
>--- gcc/testsuite/gcc.dg/tree-ssa/pr96272.c.jj 2020-12-11
>16:30:18.498463346 +0100
>+++ gcc/testsuite/gcc.dg/tree-ssa/pr96272.c    2020-12-11
>16:30:06.344599075 +0100
>@@ -0,0 +1,37 @@
>+/* PR tree-optimization/96272 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-widening_mul" } */
>+
>+unsigned
>+foo (unsigned a, unsigned b)
>+{
>+  if (a > ~0U - b)
>+    return ~0U;
>+  return a + b;
>+}
>+
>+unsigned
>+bar (unsigned a, unsigned b)
>+{
>+  if (a <= ~0U - b)
>+    return ~0U;
>+  return a + b;
>+}
>+
>+unsigned
>+baz (unsigned a, unsigned b)
>+{
>+  if (~0U - b < a)
>+    return ~0U;
>+  return a + b;
>+}
>+
>+unsigned
>+qux (unsigned a, unsigned b)
>+{
>+  if (~0U - b >= a)
>+    return ~0U;
>+  return a + b;
>+}
>+
>+/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 4 "widening_mul" {
>target { i?86-*-* x86_64-*-* } } } } */
>
>       Jakub

Reply via email to