On December 30, 2020 10:18:52 AM GMT+01:00, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >The following patch adds an optimization mentioned in PR56719 #c8. >We already have the x != 0 && y != 0 && z != 0 into (x | y | z) != 0 >and x != -1 && y != -1 && y != -1 into (x & y & z) != -1 >optimizations, this patch just extends that to >x < C && y < C && z < C for power of two constants C into >(x | y | z) < C (for unsigned comparisons). > >I didn't want to create too many buckets (there can be TYPE_PRECISION >such >constants), so the patch instead just uses one buckets for all such >constants and loops over that bucket up to TYPE_PRECISION times. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok. Richard. >2020-12-30 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/56719 > * tree-ssa-reassoc.c (optimize_range_tests_cmp_bitwise): Also optimize > x < C && y < C && z < C when C is a power of two constant into > (x | y | z) < C. > > * gcc.dg/tree-ssa/pr56719.c: New test. > >--- gcc/tree-ssa-reassoc.c.jj 2020-12-01 13:19:12.859127403 +0100 >+++ gcc/tree-ssa-reassoc.c 2020-12-29 12:42:23.432102952 +0100 >@@ -3317,7 +3317,9 @@ optimize_range_tests_to_bit_test (enum t > } > > /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0 >- and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. > */ >+ and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. >+ Also, handle x < C && y < C && z < C where C is power of two as >+ (x | y | z) < C. */ > > static bool >optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int >length, >@@ -3333,20 +3335,44 @@ optimize_range_tests_cmp_bitwise (enum t > > for (i = first; i < length; i++) > { >+ int idx; >+ > if (ranges[i].exp == NULL_TREE > || TREE_CODE (ranges[i].exp) != SSA_NAME > || !ranges[i].in_p > || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1 >- || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE >- || ranges[i].low == NULL_TREE >- || ranges[i].low != ranges[i].high) >+ || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE) > continue; > >- bool zero_p = integer_zerop (ranges[i].low); >- if (!zero_p && !integer_all_onesp (ranges[i].low)) >+ if (ranges[i].low != NULL_TREE >+ && ranges[i].high != NULL_TREE >+ && tree_int_cst_equal (ranges[i].low, ranges[i].high)) >+ { >+ idx = !integer_zerop (ranges[i].low); >+ if (idx && !integer_all_onesp (ranges[i].low)) >+ continue; >+ } >+ else if (ranges[i].high != NULL_TREE >+ && TREE_CODE (ranges[i].high) == INTEGER_CST) >+ { >+ wide_int w = wi::to_wide (ranges[i].high); >+ int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)); >+ int l = wi::clz (w); >+ idx = 2; >+ if (l <= 0 >+ || l >= prec >+ || w != wi::mask (prec - l, false, prec)) >+ continue; >+ if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp)) >+ && ranges[i].low == NULL_TREE) >+ || (ranges[i].low >+ && integer_zerop (ranges[i].low)))) >+ continue; >+ } >+ else > continue; > >- b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p; >+ b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 3 + idx; > if (buckets.length () <= b) > buckets.safe_grow_cleared (b + 1, true); > if (chains.length () <= (unsigned) i) >@@ -3359,6 +3385,44 @@ optimize_range_tests_cmp_bitwise (enum t > if (i && chains[i - 1]) > { > int j, k = i; >+ if ((b % 3) == 2) >+ { >+ /* When ranges[X - 1].high + 1 is a power of two, >+ we need to process the same bucket up to >+ precision - 1 times, each time split the entries >+ with the same high bound into one chain and the >+ rest into another one to be processed later. */ >+ int this_prev = i; >+ int other_prev = 0; >+ for (j = chains[i - 1]; j; j = chains[j - 1]) >+ { >+ if (tree_int_cst_equal (ranges[i - 1].high, >+ ranges[j - 1].high)) >+ { >+ chains[this_prev - 1] = j; >+ this_prev = j; >+ } >+ else if (other_prev == 0) >+ { >+ buckets[b] = j; >+ other_prev = j; >+ } >+ else >+ { >+ chains[other_prev - 1] = j; >+ other_prev = j; >+ } >+ } >+ chains[this_prev - 1] = 0; >+ if (other_prev) >+ chains[other_prev - 1] = 0; >+ if (chains[i - 1] == 0) >+ { >+ if (other_prev) >+ b--; >+ continue; >+ } >+ } > for (j = chains[i - 1]; j; j = chains[j - 1]) > { > gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp); >@@ -3426,8 +3490,8 @@ optimize_range_tests_cmp_bitwise (enum t > exp = gimple_assign_lhs (g); > } > g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2), >- (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR, >- op, exp); >+ (b % 3) == 1 >+ ? BIT_AND_EXPR : BIT_IOR_EXPR, op, exp); > gimple_seq_add_stmt_without_update (&seq, g); > op = gimple_assign_lhs (g); > } >@@ -3435,10 +3499,13 @@ optimize_range_tests_cmp_bitwise (enum t > if (update_range_test (&ranges[k - 1], NULL, candidates.address (), > candidates.length (), opcode, ops, op, > seq, true, ranges[k - 1].low, >- ranges[k - 1].low, strict_overflow_p)) >+ ranges[k - 1].high, strict_overflow_p)) > any_changes = true; > else > gimple_seq_discard (seq); >+ if ((b % 3) == 2 && buckets[b] != i) >+ /* There is more work to do for this bucket. */ >+ b--; > } > > return any_changes; >--- gcc/testsuite/gcc.dg/tree-ssa/pr56719.c.jj 2020-12-29 >12:58:31.662269428 +0100 >+++ gcc/testsuite/gcc.dg/tree-ssa/pr56719.c 2020-12-29 >12:57:47.367768181 +0100 >@@ -0,0 +1,33 @@ >+/* PR tree-optimization/56719 */ >+/* { dg-do compile } */ >+/* { dg-options "-O2 -fdump-tree-optimized" } */ >+/* { dg-final { scan-tree-dump-times " > 1023;" 1 "optimized" } } */ >+/* { dg-final { scan-tree-dump-times " > 2047;" 1 "optimized" } } */ >+/* { dg-final { scan-tree-dump-times " > 8191;" 1 "optimized" } } */ >+/* { dg-final { scan-tree-dump-times " <= 1023;" 1 "optimized" } } */ >+/* { dg-final { scan-tree-dump-times " <= 4095;" 1 "optimized" } } */ >+/* { dg-final { scan-tree-dump-times " <= 8191;" 1 "optimized" } } */ >+ >+int >+f1 (int x, int y) >+{ >+ return x > 0x3ffU || y > 0x3ffU; >+} >+ >+int >+f2 (int x, int y, int z, unsigned w) >+{ >+ return x > 0x1fffU || z > 0x7ffU || w > 0x7ffU || y > 0x1fffU; >+} >+ >+int >+f3 (int x, int y) >+{ >+ return x <= 0x3ffU && y <= 0x3ffU; >+} >+ >+int >+f4 (int x, int y, unsigned z, unsigned w) >+{ >+ return x <= 0x1fffU && z <= 0xfff && w <= 0xfff && y <= 0x1fffU; >+} > > Jakub