On Mon, Oct 28, 2024 at 9:58 PM Andi Kleen <a...@linux.intel.com> wrote:
>
> From: Andi Kleen <a...@gcc.gnu.org>
>
> The current switch bit test clustering enumerates all possible case
> clusters combinations to find ones that fit the bit test constrains
> best.  This causes performance problems with very large switches.
>
> For bit test clustering which happens naturally in word sized chunks
> I don't think such an expensive algorithm is really needed.
>
> This patch implements a simple greedy algorithm that walks
> the sorted list and examines word sized windows and tries
> to cluster them.
>
> Surprisingly the new algorithm gives consistly better clusters
> for the examples I tried.
>
> For example from the gcc bootstrap:
>
> old: 0-15 16-31 96-175
> new: 0-31 96-175
>
> I'm not fully sure why that is, probably some bug in the old
> algorithm? This shows even up in the test suite where if-to-switch-6
> now can generate a switch, as well as a case in switch-1.c
>
> I don't have a proof that the new algorithm is always as good or better,
> but so far at least I don't see any counter examples.
>
> It also fixes the excessive compile time in PR117091,
> however this was already fixed by an earlier patch
> that doesn't run clustering when no targets have multiple
> values.

OK if you add a comment (as part of the function comment for example)
explaining the idea of the algorithm.

Thanks,
Richard.

> gcc/ChangeLog:
>
>         PR middle-end/117091
>         * tree-switch-conversion.cc (bit_test_cluster::find_bit_tests):
>         Change clustering algorithm to simple greedy.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/if-to-switch-6.c: Allow condition chain.
>         * gcc.dg/tree-ssa/switch-1.c: Allow more bit tests.
> ---
>  .../gcc.dg/tree-ssa/if-to-switch-6.c          |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/switch-1.c      |  2 +-
>  gcc/tree-switch-conversion.cc                 | 76 ++++++++++---------
>  3 files changed, 42 insertions(+), 38 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c
> index b1640673eae1..657af770e438 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-6.c
> @@ -39,4 +39,4 @@ int main(int argc, char **argv)
>    return 0;
>  }
>
> -/* { dg-final { scan-tree-dump-not "Condition chain" "iftoswitch" } } */
> +/* { dg-final { scan-tree-dump "Condition chain" "iftoswitch" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c
> index 6f70c9de0c19..f1654aba6d99 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/switch-1.c
> @@ -107,4 +107,4 @@ int foo5 (int x)
>    }
>  }
>
> -/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: BT:10-62 
> 600-700 JT:1000-1021 111111" "switchlower1" } } */
> +/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: BT:10-62 
> 600-700 BT:1000-1021 111111" "switchlower1" } } */
> diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc
> index 3436c2a8b98c..b7736a9853d9 100644
> --- a/gcc/tree-switch-conversion.cc
> +++ b/gcc/tree-switch-conversion.cc
> @@ -1782,55 +1782,59 @@ bit_test_cluster::find_bit_tests (vec<cluster *> 
> &clusters, int max_c)
>      return clusters.copy ();
>
>    unsigned l = clusters.length ();
> -  auto_vec<min_cluster_item> min;
> -  min.reserve (l + 1);
> +  vec<cluster *> output;
>
> -  min.quick_push (min_cluster_item (0, 0, 0));
> +  output.create (l);
>
> -  for (unsigned i = 1; i <= l; i++)
> +  unsigned end;
> +  for (unsigned i = 0; i < l; i += end)
>      {
> -      /* Set minimal # of clusters with i-th item to infinite.  */
> -      min.quick_push (min_cluster_item (INT_MAX, INT_MAX, INT_MAX));
> +      HOST_WIDE_INT values = 0;
> +      hash_set<basic_block> targets;
> +      cluster *start_cluster = clusters[i];
>
> -      for (unsigned j = 0; j < i; j++)
> +      end = 0;
> +      while (i + end < l)
>         {
> -         if (min[j].m_count + 1 < min[i].m_count
> -             && can_be_handled (clusters, j, i - 1))
> -           min[i] = min_cluster_item (min[j].m_count + 1, j, INT_MAX);
> +         cluster *end_cluster = clusters[i + end];
> +
> +         /* Does value range fit into the BITS_PER_WORD window?  */
> +         HOST_WIDE_INT w = cluster::get_range (start_cluster->get_low (),
> +                                               end_cluster->get_high ());
> +         if (w == 0 || w > BITS_PER_WORD)
> +           break;
> +
> +         /* Compute # of values tested for new case.  */
> +         HOST_WIDE_INT r = 1;
> +         if (!end_cluster->is_single_value_p ())
> +           r = cluster::get_range (end_cluster->get_low (),
> +                                   end_cluster->get_high ());
> +         if (r == 0)
> +           break;
> +
> +         /* Check for max # of targets.  */
> +         if (targets.elements() == m_max_case_bit_tests
> +             && !targets.contains (end_cluster->m_case_bb))
> +           break;
> +
> +         targets.add (end_cluster->m_case_bb);
> +         values += r;
> +         end++;
>         }
>
> -      gcc_checking_assert (min[i].m_count != INT_MAX);
> -    }
> -
> -  /* No result.  */
> -  if (min[l].m_count == l)
> -    return clusters.copy ();
> -
> -  vec<cluster *> output;
> -  output.create (4);
> -
> -  /* Find and build the clusters.  */
> -  for (unsigned end = l;;)
> -    {
> -      int start = min[end].m_start;
> -
> -      if (is_beneficial (clusters, start, end - 1))
> +      if (is_beneficial (values, targets.elements ()))
>         {
> -         bool entire = start == 0 && end == clusters.length ();
> -         output.safe_push (new bit_test_cluster (clusters, start, end - 1,
> -                                                 entire));
> +         output.safe_push (new bit_test_cluster (clusters, i, i + end - 1,
> +                                                 i == 0 && end == l));
>         }
>        else
> -       for (int i = end - 1; i >= start; i--)
> +       {
>           output.safe_push (clusters[i]);
> -
> -      end = start;
> -
> -      if (start <= 0)
> -       break;
> +         /* ??? Might be able to skip more.  */
> +         end = 1;
> +       }
>      }
>
> -  output.reverse ();
>    return output;
>  }
>
> --
> 2.46.2
>

Reply via email to