Hello.

All right, I come up with a rapid speed up that can allow us to remove
the introduced parameter. It contains 2 parts:
- BIT TEST: we allow at maximum a range that is smaller GET_MODE_BITSIZE
- JT: we spent quite some time in density calculation, we can guess it first
  and it leads to a fast bail out.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin
>From dc4c1d129a50c7f51d28235506479f29d51dae07 Mon Sep 17 00:00:00 2001
From: Martin Liska <mli...@suse.cz>
Date: Thu, 24 Sep 2020 13:34:13 +0200
Subject: [PATCH 2/2] switch conversion: make a rapid speed up

gcc/ChangeLog:

	PR tree-optimization/96979
	* tree-switch-conversion.c (jump_table_cluster::can_be_handled):
	Make a fast bail out.
	(bit_test_cluster::can_be_handled): Likewise here.
	* tree-switch-conversion.h (get_range): Use wi::to_wide instead
	of a folding.

gcc/testsuite/ChangeLog:

	PR tree-optimization/96979
	* g++.dg/tree-ssa/pr96979.C: New test.
---
 gcc/testsuite/g++.dg/tree-ssa/pr96979.C | 48 +++++++++++++++++++++++++
 gcc/tree-switch-conversion.c            | 32 ++++++++++++-----
 gcc/tree-switch-conversion.h            |  7 ++--
 3 files changed, 74 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr96979.C

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
new file mode 100644
index 00000000000..ec0f57a8548
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
@@ -0,0 +1,48 @@
+/* PR tree-optimization/96979 */
+/* { dg-do compile } */
+/* { dg-options "-std=c++17 -O2" } */
+
+using u64 = unsigned long long;
+
+constexpr inline u64
+foo (const char *str) noexcept
+{
+  u64 value = 0xcbf29ce484222325ULL;
+  for (u64 i = 0; str[i]; i++)
+    value = (value ^ u64(str[i])) * 0x100000001b3ULL;
+  return value;
+}
+
+struct V
+{
+  enum W
+  {
+#define A(n) n,
+#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9)
+#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9)
+#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9)
+#define E D(foo1) D(foo2) D(foo3)
+    E
+    last
+  };
+
+  constexpr static W
+  bar (const u64 h) noexcept
+  {
+    switch (h)
+      {
+#undef A
+#define F(n) #n
+#define A(n) case foo (F(n)): return n;
+        E
+      }
+    return last;
+  }
+};
+
+int
+baz (const char *s)
+{
+  const u64 h = foo (s);
+  return V::bar (h);
+}
diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index 186411ff3c4..3212e964b84 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -1268,6 +1268,15 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
   if (range == 0)
     return false;
 
+  unsigned HOST_WIDE_INT lhs = 100 * range;
+  if (lhs < range)
+    return false;
+
+  /* First make quick guess as each cluster
+     can add at maximum 2 to the comparison_count.  */
+  if (lhs > 2 * max_ratio * (end - start + 1))
+    return false;
+
   unsigned HOST_WIDE_INT comparison_count = 0;
   for (unsigned i = start; i <= end; i++)
     {
@@ -1275,10 +1284,6 @@ jump_table_cluster::can_be_handled (const vec<cluster *> &clusters,
       comparison_count += sc->m_range_p ? 2 : 1;
     }
 
-  unsigned HOST_WIDE_INT lhs = 100 * range;
-  if (lhs < range)
-    return false;
-
   return lhs <= max_ratio * comparison_count;
 }
 
@@ -1364,12 +1369,12 @@ bit_test_cluster::can_be_handled (unsigned HOST_WIDE_INT range,
 {
   /* Check overflow.  */
   if (range == 0)
-    return 0;
+    return false;
 
   if (range >= GET_MODE_BITSIZE (word_mode))
     return false;
 
-  return uniq <= 3;
+  return uniq <= m_max_case_bit_tests;
 }
 
 /* Return true when cluster starting at START and ending at END (inclusive)
@@ -1379,6 +1384,7 @@ bool
 bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
 				  unsigned start, unsigned end)
 {
+  auto_vec<int> dest_bbs;
   /* For algorithm correctness, bit test for a single case must return
      true.  We bail out in is_beneficial if it's called just for
      a single case.  */
@@ -1387,15 +1393,23 @@ bit_test_cluster::can_be_handled (const vec<cluster *> &clusters,
 
   unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (),
 					    clusters[end]->get_high ());
-  auto_bitmap dest_bbs;
+
+  /* Make a guess first.  */
+  if (!can_be_handled (range, m_max_case_bit_tests))
+    return false;
 
   for (unsigned i = start; i <= end; i++)
     {
       simple_cluster *sc = static_cast<simple_cluster *> (clusters[i]);
-      bitmap_set_bit (dest_bbs, sc->m_case_bb->index);
+      if (!dest_bbs.contains (sc->m_case_bb->index))
+	{
+	  dest_bbs.safe_push (sc->m_case_bb->index);
+	  if (dest_bbs.length () > m_max_case_bit_tests)
+	    return false;
+	}
     }
 
-  return can_be_handled (range, bitmap_count_bits (dest_bbs));
+  return true;
 }
 
 /* Return true when COUNT of cases of UNIQ labels is beneficial for bit test
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index 9ebcf109319..dbfd9eecba2 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -84,11 +84,10 @@ public:
      then return 0.  */
   static unsigned HOST_WIDE_INT get_range (tree low, tree high)
   {
-    tree r = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low);
-    if (!tree_fits_uhwi_p (r))
+    wide_int w = wi::to_wide (high) - wi::to_wide (low);
+    if (wi::neg_p (w, TYPE_SIGN (TREE_TYPE (low))) || !wi::fits_uhwi_p (w))
       return 0;
-
-    return tree_to_uhwi (r) + 1;
+    return w.to_uhwi () + 1;
   }
 
   /* Case label.  */
-- 
2.28.0

>From e1956c0019d8df4a022ea17bc76f9e62fac182c7 Mon Sep 17 00:00:00 2001
From: Martin Liska <mli...@suse.cz>
Date: Thu, 24 Sep 2020 13:34:58 +0200
Subject: [PATCH 1/2] Revert "switch lowering: limit number of cluster attemps"

This reverts commit c6df6039e9180c580945266302ec14047d358364.
---
 gcc/doc/invoke.texi                     |  4 --
 gcc/params.opt                          |  4 --
 gcc/testsuite/g++.dg/tree-ssa/pr96979.C | 50 -------------------------
 gcc/tree-switch-conversion.c            | 17 ---------
 4 files changed, 75 deletions(-)
 delete mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr96979.C

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 75203ba2420..08f1102030c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -13471,10 +13471,6 @@ The smallest number of different values for which it is best to use a
 jump-table instead of a tree of conditional branches.  If the value is
 0, use the default for the machine.
 
-@item max-switch-clustering-attempts
-The maximum number of clustering attempts used
-in bit-test and jump-table switch expansion.
-
 @item jump-table-max-growth-ratio-for-size
 The maximum code size growth ratio when expanding
 into a jump table (in percent).  The parameter is used when
diff --git a/gcc/params.opt b/gcc/params.opt
index 5f2e11d6c57..dcf5e020f01 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -82,10 +82,6 @@ The maximum length of a constant string for a builtin string cmp call eligible f
 Common Joined UInteger Var(param_case_values_threshold) Param Optimization
 The smallest number of different values for which it is best to use a jump-table instead of a tree of conditional branches, if 0, use the default for the machine.
 
--param=max-switch-clustering-attempts=
-Common Joined UInteger Var(param_max_switch_clustering_attempts) Param Optimization Init(10000)
-The maximum number of clustering attempts used in bit-test and jump-table switch expansion.
-
 -param=comdat-sharing-probability=
 Common Joined UInteger Var(param_comdat_sharing_probability) Init(20) Param Optimization
 Probability that COMDAT function will be shared with different compilation unit.
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
deleted file mode 100644
index 85c703a140d..00000000000
--- a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C
+++ /dev/null
@@ -1,50 +0,0 @@
-/* PR tree-optimization/96979 */
-/* { dg-do compile } */
-/* { dg-options "-std=c++17 -O2 -fdump-tree-switchlower1" } */
-
-using u64 = unsigned long long;
-
-constexpr inline u64
-foo (const char *str) noexcept
-{
-  u64 value = 0xcbf29ce484222325ULL;
-  for (u64 i = 0; str[i]; i++)
-    value = (value ^ u64(str[i])) * 0x100000001b3ULL;
-  return value;
-}
-
-struct V
-{
-  enum W
-  {
-#define A(n) n,
-#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9)
-#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9)
-#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9)
-#define E D(foo1) D(foo2) D(foo3)
-    E
-    last
-  };
-
-  constexpr static W
-  bar (const u64 h) noexcept
-  {
-    switch (h)
-      {
-#undef A
-#define F(n) #n
-#define A(n) case foo (F(n)): return n;
-        E
-      }
-    return last;
-  }
-};
-
-int
-baz (const char *s)
-{
-  const u64 h = foo (s);
-  return V::bar (h);
-}
-
-/* { dg-final { scan-tree-dump-times ";; Bail out: --param=max-switch-clustering-attempts reached" 2 "switchlower1" } } */
diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index e6a2c7a6a84..186411ff3c4 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -1183,7 +1183,6 @@ jump_table_cluster::find_jump_tables (vec<cluster *> &clusters)
 
   min.quick_push (min_cluster_item (0, 0, 0));
 
-  HOST_WIDE_INT attempts = 0;
   for (unsigned i = 1; i <= l; i++)
     {
       /* Set minimal # of clusters with i-th item to infinite.  */
@@ -1195,14 +1194,6 @@ jump_table_cluster::find_jump_tables (vec<cluster *> &clusters)
 	  if (i - j < case_values_threshold ())
 	    s += i - j;
 
-	  if (attempts++ == param_max_switch_clustering_attempts)
-	    {
-	      if (dump_file)
-		fprintf (dump_file, ";; Bail out: "
-			 "--param=max-switch-clustering-attempts reached\n");
-	      return clusters.copy ();
-	    }
-
 	  /* Prefer clusters with smaller number of numbers covered.  */
 	  if ((min[j].m_count + 1 < min[i].m_count
 	       || (min[j].m_count + 1 == min[i].m_count
@@ -1317,7 +1308,6 @@ bit_test_cluster::find_bit_tests (vec<cluster *> &clusters)
 
   min.quick_push (min_cluster_item (0, 0, 0));
 
-  HOST_WIDE_INT attempts = 0;
   for (unsigned i = 1; i <= l; i++)
     {
       /* Set minimal # of clusters with i-th item to infinite.  */
@@ -1325,13 +1315,6 @@ bit_test_cluster::find_bit_tests (vec<cluster *> &clusters)
 
       for (unsigned j = 0; j < i; j++)
 	{
-	  if (attempts++ == param_max_switch_clustering_attempts)
-	    {
-	      if (dump_file)
-		fprintf (dump_file, ";; Bail out: "
-			 "--param=max-switch-clustering-attempts reached\n");
-	      return clusters.copy ();
-	    }
 	  if (min[j].m_count + 1 < min[i].m_count
 	      && can_be_handled (clusters, j, i - 1))
 	    min[i] = min_cluster_item (min[j].m_count + 1, j, INT_MAX);
-- 
2.28.0

Reply via email to