On Mon, Oct 28, 2024 at 9:58 PM Andi Kleen <a...@linux.intel.com> wrote: > > From: Andi Kleen <a...@gcc.gnu.org> > > The current switch bit test clustering enumerates all possible case > clusters combinations to find ones that fit the bit test constrains > best. This causes performance problems with very large switches. > > For bit test clustering which happens naturally in word sized chunks > I don't think such an expensive algorithm is really needed. > > This patch implements a simple greedy algorithm that walks > the sorted list and examines word sized windows and tries > to cluster them. > > Surprisingly the new algorithm gives consistly better clusters > for the examples I tried. > > For example from the gcc bootstrap: > > old: 0-15 16-31 96-175 > new: 0-31 96-175 > > I'm not fully sure why that is, probably some bug in the old > algorithm? This shows even up in the test suite where if-to-switch-6 > now can generate a switch, as well as a case in switch-1.c > > I don't have a proof that the new algorithm is always as good or better, > but so far at least I don't see any counter examples. > > It also fixes the excessive compile time in PR117091, > however this was already fixed by an earlier patch > that doesn't run clustering when no targets have multiple > values.
OK if you add a comment (as part of the function comment for example) explaining the idea of the algorithm. Thanks, Richard. > gcc/ChangeLog: > > PR middle-end/117091 > * tree-switch-conversion.cc (bit_test_cluster::find_bit_tests): > Change clustering algorithm to simple greedy. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/if-to-switch-6.c: Allow condition chain. > * gcc.dg/tree-ssa/switch-1.c: Allow more bit tests. > --- > .../gcc.dg/tree-ssa/if-to-switch-6.c | 2 +- > gcc/testsuite/gcc.dg/tree-ssa/switch-1.c | 2 +- > gcc/tree-switch-conversion.cc | 76 ++++++++++--------- > 3 files changed, 42 insertions(+), 38 deletions(-) > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c > b/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c > index b1640673eae1..657af770e438 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c > @@ -39,4 +39,4 @@ int main(int argc, char **argv) > return 0; > } > > -/* { dg-final { scan-tree-dump-not "Condition chain" "iftoswitch" } } */ > +/* { dg-final { scan-tree-dump "Condition chain" "iftoswitch" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c > index 6f70c9de0c19..f1654aba6d99 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c > @@ -107,4 +107,4 @@ int foo5 (int x) > } > } > > -/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: BT:10-62 > 600-700 JT:1000-1021 111111" "switchlower1" } } */ > +/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: BT:10-62 > 600-700 BT:1000-1021 111111" "switchlower1" } } */ > diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc > index 3436c2a8b98c..b7736a9853d9 100644 > --- a/gcc/tree-switch-conversion.cc > +++ b/gcc/tree-switch-conversion.cc > @@ -1782,55 +1782,59 @@ bit_test_cluster::find_bit_tests (vec<cluster *> > &clusters, int max_c) > return clusters.copy (); > > unsigned l = clusters.length (); > - auto_vec<min_cluster_item> min; > - min.reserve (l + 1); > + vec<cluster *> output; > > - min.quick_push (min_cluster_item (0, 0, 0)); > + output.create (l); > > - for (unsigned i = 1; i <= l; i++) > + unsigned end; > + for (unsigned i = 0; i < l; i += end) > { > - /* Set minimal # of clusters with i-th item to infinite. */ > - min.quick_push (min_cluster_item (INT_MAX, INT_MAX, INT_MAX)); > + HOST_WIDE_INT values = 0; > + hash_set<basic_block> targets; > + cluster *start_cluster = clusters[i]; > > - for (unsigned j = 0; j < i; j++) > + end = 0; > + while (i + end < l) > { > - if (min[j].m_count + 1 < min[i].m_count > - && can_be_handled (clusters, j, i - 1)) > - min[i] = min_cluster_item (min[j].m_count + 1, j, INT_MAX); > + cluster *end_cluster = clusters[i + end]; > + > + /* Does value range fit into the BITS_PER_WORD window? */ > + HOST_WIDE_INT w = cluster::get_range (start_cluster->get_low (), > + end_cluster->get_high ()); > + if (w == 0 || w > BITS_PER_WORD) > + break; > + > + /* Compute # of values tested for new case. */ > + HOST_WIDE_INT r = 1; > + if (!end_cluster->is_single_value_p ()) > + r = cluster::get_range (end_cluster->get_low (), > + end_cluster->get_high ()); > + if (r == 0) > + break; > + > + /* Check for max # of targets. */ > + if (targets.elements() == m_max_case_bit_tests > + && !targets.contains (end_cluster->m_case_bb)) > + break; > + > + targets.add (end_cluster->m_case_bb); > + values += r; > + end++; > } > > - gcc_checking_assert (min[i].m_count != INT_MAX); > - } > - > - /* No result. */ > - if (min[l].m_count == l) > - return clusters.copy (); > - > - vec<cluster *> output; > - output.create (4); > - > - /* Find and build the clusters. */ > - for (unsigned end = l;;) > - { > - int start = min[end].m_start; > - > - if (is_beneficial (clusters, start, end - 1)) > + if (is_beneficial (values, targets.elements ())) > { > - bool entire = start == 0 && end == clusters.length (); > - output.safe_push (new bit_test_cluster (clusters, start, end - 1, > - entire)); > + output.safe_push (new bit_test_cluster (clusters, i, i + end - 1, > + i == 0 && end == l)); > } > else > - for (int i = end - 1; i >= start; i--) > + { > output.safe_push (clusters[i]); > - > - end = start; > - > - if (start <= 0) > - break; > + /* ??? Might be able to skip more. */ > + end = 1; > + } > } > > - output.reverse (); > return output; > } > > -- > 2.46.2 >