Hello. All right, I come up with a rapid speed up that can allow us to remove the introduced parameter. It contains 2 parts: - BIT TEST: we allow at maximum a range that is smaller GET_MODE_BITSIZE - JT: we spent quite some time in density calculation, we can guess it first and it leads to a fast bail out.
Patch can bootstrap on x86_64-linux-gnu and survives regression tests. Ready to be installed? Thanks, Martin
>From dc4c1d129a50c7f51d28235506479f29d51dae07 Mon Sep 17 00:00:00 2001 From: Martin Liska <mli...@suse.cz> Date: Thu, 24 Sep 2020 13:34:13 +0200 Subject: [PATCH 2/2] switch conversion: make a rapid speed up gcc/ChangeLog: PR tree-optimization/96979 * tree-switch-conversion.c (jump_table_cluster::can_be_handled): Make a fast bail out. (bit_test_cluster::can_be_handled): Likewise here. * tree-switch-conversion.h (get_range): Use wi::to_wide instead of a folding. gcc/testsuite/ChangeLog: PR tree-optimization/96979 * g++.dg/tree-ssa/pr96979.C: New test. --- gcc/testsuite/g++.dg/tree-ssa/pr96979.C | 48 +++++++++++++++++++++++++ gcc/tree-switch-conversion.c | 32 ++++++++++++----- gcc/tree-switch-conversion.h | 7 ++-- 3 files changed, 74 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr96979.C diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C new file mode 100644 index 00000000000..ec0f57a8548 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C @@ -0,0 +1,48 @@ +/* PR tree-optimization/96979 */ +/* { dg-do compile } */ +/* { dg-options "-std=c++17 -O2" } */ + +using u64 = unsigned long long; + +constexpr inline u64 +foo (const char *str) noexcept +{ + u64 value = 0xcbf29ce484222325ULL; + for (u64 i = 0; str[i]; i++) + value = (value ^ u64(str[i])) * 0x100000001b3ULL; + return value; +} + +struct V +{ + enum W + { +#define A(n) n, +#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9) +#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9) +#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9) +#define E D(foo1) D(foo2) D(foo3) + E + last + }; + + constexpr static W + bar (const u64 h) noexcept + { + switch (h) + { +#undef A +#define F(n) #n +#define A(n) case foo (F(n)): return n; + E + } + return last; + } +}; + +int +baz (const char *s) +{ + const u64 h = foo (s); + return V::bar (h); +} diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c index 186411ff3c4..3212e964b84 100644 --- a/gcc/tree-switch-conversion.c +++ b/gcc/tree-switch-conversion.c @@ -1268,6 +1268,15 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters, if (range == 0) return false; + unsigned HOST_WIDE_INT lhs = 100 * range; + if (lhs < range) + return false; + + /* First make quick guess as each cluster + can add at maximum 2 to the comparison_count. */ + if (lhs > 2 * max_ratio * (end - start + 1)) + return false; + unsigned HOST_WIDE_INT comparison_count = 0; for (unsigned i = start; i <= end; i++) { @@ -1275,10 +1284,6 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters, comparison_count += sc->m_range_p ? 2 : 1; } - unsigned HOST_WIDE_INT lhs = 100 * range; - if (lhs < range) - return false; - return lhs <= max_ratio * comparison_count; } @@ -1364,12 +1369,12 @@ bit_test_cluster::can_be_handled (unsigned HOST_WIDE_INT range, { /* Check overflow. */ if (range == 0) - return 0; + return false; if (range >= GET_MODE_BITSIZE (word_mode)) return false; - return uniq <= 3; + return uniq <= m_max_case_bit_tests; } /* Return true when cluster starting at START and ending at END (inclusive) @@ -1379,6 +1384,7 @@ bool bit_test_cluster::can_be_handled (const vec<cluster *> &clusters, unsigned start, unsigned end) { + auto_vec<int> dest_bbs; /* For algorithm correctness, bit test for a single case must return true. We bail out in is_beneficial if it's called just for a single case. */ @@ -1387,15 +1393,23 @@ bit_test_cluster::can_be_handled (const vec<cluster *> &clusters, unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (), clusters[end]->get_high ()); - auto_bitmap dest_bbs; + + /* Make a guess first. */ + if (!can_be_handled (range, m_max_case_bit_tests)) + return false; for (unsigned i = start; i <= end; i++) { simple_cluster *sc = static_cast<simple_cluster *> (clusters[i]); - bitmap_set_bit (dest_bbs, sc->m_case_bb->index); + if (!dest_bbs.contains (sc->m_case_bb->index)) + { + dest_bbs.safe_push (sc->m_case_bb->index); + if (dest_bbs.length () > m_max_case_bit_tests) + return false; + } } - return can_be_handled (range, bitmap_count_bits (dest_bbs)); + return true; } /* Return true when COUNT of cases of UNIQ labels is beneficial for bit test diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h index 9ebcf109319..dbfd9eecba2 100644 --- a/gcc/tree-switch-conversion.h +++ b/gcc/tree-switch-conversion.h @@ -84,11 +84,10 @@ public: then return 0. */ static unsigned HOST_WIDE_INT get_range (tree low, tree high) { - tree r = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low); - if (!tree_fits_uhwi_p (r)) + wide_int w = wi::to_wide (high) - wi::to_wide (low); + if (wi::neg_p (w, TYPE_SIGN (TREE_TYPE (low))) || !wi::fits_uhwi_p (w)) return 0; - - return tree_to_uhwi (r) + 1; + return w.to_uhwi () + 1; } /* Case label. */ -- 2.28.0
>From e1956c0019d8df4a022ea17bc76f9e62fac182c7 Mon Sep 17 00:00:00 2001 From: Martin Liska <mli...@suse.cz> Date: Thu, 24 Sep 2020 13:34:58 +0200 Subject: [PATCH 1/2] Revert "switch lowering: limit number of cluster attemps" This reverts commit c6df6039e9180c580945266302ec14047d358364. --- gcc/doc/invoke.texi | 4 -- gcc/params.opt | 4 -- gcc/testsuite/g++.dg/tree-ssa/pr96979.C | 50 ------------------------- gcc/tree-switch-conversion.c | 17 --------- 4 files changed, 75 deletions(-) delete mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr96979.C diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 75203ba2420..08f1102030c 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -13471,10 +13471,6 @@ The smallest number of different values for which it is best to use a jump-table instead of a tree of conditional branches. If the value is 0, use the default for the machine. -@item max-switch-clustering-attempts -The maximum number of clustering attempts used -in bit-test and jump-table switch expansion. - @item jump-table-max-growth-ratio-for-size The maximum code size growth ratio when expanding into a jump table (in percent). The parameter is used when diff --git a/gcc/params.opt b/gcc/params.opt index 5f2e11d6c57..dcf5e020f01 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -82,10 +82,6 @@ The maximum length of a constant string for a builtin string cmp call eligible f Common Joined UInteger Var(param_case_values_threshold) Param Optimization The smallest number of different values for which it is best to use a jump-table instead of a tree of conditional branches, if 0, use the default for the machine. --param=max-switch-clustering-attempts= -Common Joined UInteger Var(param_max_switch_clustering_attempts) Param Optimization Init(10000) -The maximum number of clustering attempts used in bit-test and jump-table switch expansion. - -param=comdat-sharing-probability= Common Joined UInteger Var(param_comdat_sharing_probability) Init(20) Param Optimization Probability that COMDAT function will be shared with different compilation unit. diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C deleted file mode 100644 index 85c703a140d..00000000000 --- a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C +++ /dev/null @@ -1,50 +0,0 @@ -/* PR tree-optimization/96979 */ -/* { dg-do compile } */ -/* { dg-options "-std=c++17 -O2 -fdump-tree-switchlower1" } */ - -using u64 = unsigned long long; - -constexpr inline u64 -foo (const char *str) noexcept -{ - u64 value = 0xcbf29ce484222325ULL; - for (u64 i = 0; str[i]; i++) - value = (value ^ u64(str[i])) * 0x100000001b3ULL; - return value; -} - -struct V -{ - enum W - { -#define A(n) n, -#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9) -#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9) -#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9) -#define E D(foo1) D(foo2) D(foo3) - E - last - }; - - constexpr static W - bar (const u64 h) noexcept - { - switch (h) - { -#undef A -#define F(n) #n -#define A(n) case foo (F(n)): return n; - E - } - return last; - } -}; - -int -baz (const char *s) -{ - const u64 h = foo (s); - return V::bar (h); -} - -/* { dg-final { scan-tree-dump-times ";; Bail out: --param=max-switch-clustering-attempts reached" 2 "switchlower1" } } */ diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c index e6a2c7a6a84..186411ff3c4 100644 --- a/gcc/tree-switch-conversion.c +++ b/gcc/tree-switch-conversion.c @@ -1183,7 +1183,6 @@ jump_table_cluster::find_jump_tables (vec<cluster *> &clusters) min.quick_push (min_cluster_item (0, 0, 0)); - HOST_WIDE_INT attempts = 0; for (unsigned i = 1; i <= l; i++) { /* Set minimal # of clusters with i-th item to infinite. */ @@ -1195,14 +1194,6 @@ jump_table_cluster::find_jump_tables (vec<cluster *> &clusters) if (i - j < case_values_threshold ()) s += i - j; - if (attempts++ == param_max_switch_clustering_attempts) - { - if (dump_file) - fprintf (dump_file, ";; Bail out: " - "--param=max-switch-clustering-attempts reached\n"); - return clusters.copy (); - } - /* Prefer clusters with smaller number of numbers covered. */ if ((min[j].m_count + 1 < min[i].m_count || (min[j].m_count + 1 == min[i].m_count @@ -1317,7 +1308,6 @@ bit_test_cluster::find_bit_tests (vec<cluster *> &clusters) min.quick_push (min_cluster_item (0, 0, 0)); - HOST_WIDE_INT attempts = 0; for (unsigned i = 1; i <= l; i++) { /* Set minimal # of clusters with i-th item to infinite. */ @@ -1325,13 +1315,6 @@ bit_test_cluster::find_bit_tests (vec<cluster *> &clusters) for (unsigned j = 0; j < i; j++) { - if (attempts++ == param_max_switch_clustering_attempts) - { - if (dump_file) - fprintf (dump_file, ";; Bail out: " - "--param=max-switch-clustering-attempts reached\n"); - return clusters.copy (); - } if (min[j].m_count + 1 < min[i].m_count && can_be_handled (clusters, j, i - 1)) min[i] = min_cluster_item (min[j].m_count + 1, j, INT_MAX); -- 2.28.0