On December 30, 2020 10:18:52 AM GMT+01:00, Jakub Jelinek <ja...@redhat.com> 
wrote:
>Hi!
>
>The following patch adds an optimization mentioned in PR56719 #c8.
>We already have the x != 0 && y != 0 && z != 0 into (x | y | z) != 0
>and x != -1 && y != -1 && y != -1 into (x & y & z) != -1
>optimizations, this patch just extends that to
>x < C && y < C && z < C for power of two constants C into
>(x | y | z) < C (for unsigned comparisons).
>
>I didn't want to create too many buckets (there can be TYPE_PRECISION
>such
>constants), so the patch instead just uses one buckets for all such
>constants and loops over that bucket up to TYPE_PRECISION times.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok. 

Richard. 

>2020-12-30  Jakub Jelinek  <ja...@redhat.com>
>
>       PR tree-optimization/56719
>       * tree-ssa-reassoc.c (optimize_range_tests_cmp_bitwise): Also optimize
>       x < C && y < C && z < C when C is a power of two constant into
>       (x | y | z) < C.
>
>       * gcc.dg/tree-ssa/pr56719.c: New test.
>
>--- gcc/tree-ssa-reassoc.c.jj  2020-12-01 13:19:12.859127403 +0100
>+++ gcc/tree-ssa-reassoc.c     2020-12-29 12:42:23.432102952 +0100
>@@ -3317,7 +3317,9 @@ optimize_range_tests_to_bit_test (enum t
> }
> 
> /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
>-   and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
> */
>+   and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
>+   Also, handle x < C && y < C && z < C where C is power of two as
>+   (x | y | z) < C.  */
> 
> static bool
>optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int
>length,
>@@ -3333,20 +3335,44 @@ optimize_range_tests_cmp_bitwise (enum t
> 
>   for (i = first; i < length; i++)
>     {
>+      int idx;
>+
>       if (ranges[i].exp == NULL_TREE
>         || TREE_CODE (ranges[i].exp) != SSA_NAME
>         || !ranges[i].in_p
>         || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
>-        || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
>-        || ranges[i].low == NULL_TREE
>-        || ranges[i].low != ranges[i].high)
>+        || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
>       continue;
> 
>-      bool zero_p = integer_zerop (ranges[i].low);
>-      if (!zero_p && !integer_all_onesp (ranges[i].low))
>+      if (ranges[i].low != NULL_TREE
>+        && ranges[i].high != NULL_TREE
>+        && tree_int_cst_equal (ranges[i].low, ranges[i].high))
>+      {
>+        idx = !integer_zerop (ranges[i].low);
>+        if (idx && !integer_all_onesp (ranges[i].low))
>+          continue;
>+      }
>+      else if (ranges[i].high != NULL_TREE
>+             && TREE_CODE (ranges[i].high) == INTEGER_CST)
>+      {
>+        wide_int w = wi::to_wide (ranges[i].high);
>+        int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
>+        int l = wi::clz (w);
>+        idx = 2;
>+        if (l <= 0
>+            || l >= prec
>+            || w != wi::mask (prec - l, false, prec))
>+          continue;
>+        if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
>+               && ranges[i].low == NULL_TREE)
>+              || (ranges[i].low
>+                  && integer_zerop (ranges[i].low))))
>+          continue;
>+      }
>+      else
>       continue;
> 
>-      b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
>+      b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 3 + idx;
>       if (buckets.length () <= b)
>       buckets.safe_grow_cleared (b + 1, true);
>       if (chains.length () <= (unsigned) i)
>@@ -3359,6 +3385,44 @@ optimize_range_tests_cmp_bitwise (enum t
>     if (i && chains[i - 1])
>       {
>       int j, k = i;
>+      if ((b % 3) == 2)
>+        {
>+          /* When ranges[X - 1].high + 1 is a power of two,
>+             we need to process the same bucket up to
>+             precision - 1 times, each time split the entries
>+             with the same high bound into one chain and the
>+             rest into another one to be processed later.  */
>+          int this_prev = i;
>+          int other_prev = 0;
>+          for (j = chains[i - 1]; j; j = chains[j - 1])
>+            {
>+              if (tree_int_cst_equal (ranges[i - 1].high,
>+                                      ranges[j - 1].high))
>+                {
>+                  chains[this_prev - 1] = j;
>+                  this_prev = j;
>+                }
>+              else if (other_prev == 0)
>+                {
>+                  buckets[b] = j;
>+                  other_prev = j;
>+                }
>+              else
>+                {
>+                  chains[other_prev - 1] = j;
>+                  other_prev = j;
>+                }
>+            }
>+          chains[this_prev - 1] = 0;
>+          if (other_prev)
>+            chains[other_prev - 1] = 0;
>+          if (chains[i - 1] == 0)
>+            {
>+              if (other_prev)
>+                b--;
>+              continue;
>+            }
>+        }
>       for (j = chains[i - 1]; j; j = chains[j - 1])
>         {
>           gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
>@@ -3426,8 +3490,8 @@ optimize_range_tests_cmp_bitwise (enum t
>               exp = gimple_assign_lhs (g);
>             }
>           g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
>-                                   (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
>-                                   op, exp);
>+                                   (b % 3) == 1
>+                                   ? BIT_AND_EXPR : BIT_IOR_EXPR, op, exp);
>           gimple_seq_add_stmt_without_update (&seq, g);
>           op = gimple_assign_lhs (g);
>         }
>@@ -3435,10 +3499,13 @@ optimize_range_tests_cmp_bitwise (enum t
>       if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
>                              candidates.length (), opcode, ops, op,
>                              seq, true, ranges[k - 1].low,
>-                             ranges[k - 1].low, strict_overflow_p))
>+                             ranges[k - 1].high, strict_overflow_p))
>         any_changes = true;
>       else
>         gimple_seq_discard (seq);
>+      if ((b % 3) == 2 && buckets[b] != i)
>+        /* There is more work to do for this bucket.  */
>+        b--;
>       }
> 
>   return any_changes;
>--- gcc/testsuite/gcc.dg/tree-ssa/pr56719.c.jj 2020-12-29
>12:58:31.662269428 +0100
>+++ gcc/testsuite/gcc.dg/tree-ssa/pr56719.c    2020-12-29
>12:57:47.367768181 +0100
>@@ -0,0 +1,33 @@
>+/* PR tree-optimization/56719 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-optimized" } */
>+/* { dg-final { scan-tree-dump-times " > 1023;" 1 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times " > 2047;" 1 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times " > 8191;" 1 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times " <= 1023;" 1 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times " <= 4095;" 1 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times " <= 8191;" 1 "optimized" } } */
>+
>+int
>+f1 (int x, int y)
>+{
>+  return x > 0x3ffU || y > 0x3ffU;
>+}
>+
>+int
>+f2 (int x, int y, int z, unsigned w)
>+{
>+  return x > 0x1fffU || z > 0x7ffU || w > 0x7ffU || y > 0x1fffU;
>+}
>+
>+int
>+f3 (int x, int y)
>+{
>+  return x <= 0x3ffU && y <= 0x3ffU;
>+}
>+
>+int
>+f4 (int x, int y, unsigned z, unsigned w)
>+{
>+  return x <= 0x1fffU && z <= 0xfff && w <= 0xfff && y <= 0x1fffU;
>+}
>
>       Jakub

Reply via email to