This pair of patches provides a sparse representation of the on-entry cache for ranger.  When the number of basic blocks exceed -param=evrp-sparse-threshold=  (default to 800), ranger moves to a sparse representation rather allocating a full vector for each ssa-name.

This is based on an extension of the sparse bitmap code which enables us to use multiple bits instead of single bits. We'll cache up to 15 unique ranges for any ssa_name in this implementation.

Bootstrapped on x86_64-pc-linux-gnu with no new regressions. Pushed.

I will also port this to gcc11 in a couple of days assuming no unforeseen issues show up.

Andrew

commit 508e291d0aa1b2ec6d2287b324d044ac4ddf82e6
Author: Andrew MacLeod <amacl...@redhat.com>
Date:   Mon Jun 7 13:12:01 2021 -0400

    Implement multi-bit aligned accessors for sparse bitmap.
    
    Provide set/get routines to allow sparse bitmaps to be treated as an array
    of multiple bit values. Only chunk sizes that are powers of 2 are supported.
    
            * bitmap.c (bitmap_set_aligned_chunk): New.
            (bitmap_get_aligned_chunk): New.
            (test_aligned_chunk): New.
            (bitmap_c_tests): Call test_aligned_chunk.
            * bitmap.h (bitmap_set_aligned_chunk, bitmap_get_aligned_chunk): New.

diff --git a/gcc/bitmap.c b/gcc/bitmap.c
index 5a650cdfc1d..b915fdfbb54 100644
--- a/gcc/bitmap.c
+++ b/gcc/bitmap.c
@@ -1004,6 +1004,83 @@ bitmap_bit_p (const_bitmap head, int bit)
   return (ptr->bits[word_num] >> bit_num) & 1;
 }
 
+/* Set CHUNK_SIZE bits at a time in bitmap HEAD.
+   Store CHUNK_VALUE starting at bits CHUNK * chunk_size.
+   This is the set routine for viewing bitmap as a multi-bit sparse array.  */
+
+void
+bitmap_set_aligned_chunk (bitmap head, unsigned int chunk,
+			  unsigned int chunk_size, BITMAP_WORD chunk_value)
+{
+  // Ensure chunk size is a power of 2 and fits in BITMAP_WORD.
+  gcc_checking_assert (pow2p_hwi (chunk_size));
+  gcc_checking_assert (chunk_size < (sizeof (BITMAP_WORD) * CHAR_BIT));
+
+  // Ensure chunk_value is within range of chunk_size bits.
+  BITMAP_WORD max_value = (1 << chunk_size) - 1;
+  gcc_checking_assert (chunk_value <= max_value);
+
+  unsigned bit = chunk * chunk_size;
+  unsigned indx = bit / BITMAP_ELEMENT_ALL_BITS;
+  bitmap_element *ptr;
+  if (!head->tree_form)
+    ptr = bitmap_list_find_element (head, indx);
+  else
+    ptr = bitmap_tree_find_element (head, indx);
+  unsigned word_num = bit / BITMAP_WORD_BITS % BITMAP_ELEMENT_WORDS;
+  unsigned bit_num  = bit % BITMAP_WORD_BITS;
+  BITMAP_WORD bit_val = chunk_value << bit_num;
+  BITMAP_WORD mask = ~(max_value << bit_num);
+
+  if (ptr != 0)
+    {
+      ptr->bits[word_num] &= mask;
+      ptr->bits[word_num] |= bit_val;
+      return;
+    }
+
+  ptr = bitmap_element_allocate (head);
+  ptr->indx = bit / BITMAP_ELEMENT_ALL_BITS;
+  ptr->bits[word_num] = bit_val;
+  if (!head->tree_form)
+    bitmap_list_link_element (head, ptr);
+  else
+    bitmap_tree_link_element (head, ptr);
+}
+
+/* This is the get routine for viewing bitmap as a multi-bit sparse array.
+   Return a set of CHUNK_SIZE consecutive bits from HEAD, starting at bit
+   CHUNK * chunk_size.   */
+
+BITMAP_WORD
+bitmap_get_aligned_chunk (const_bitmap head, unsigned int chunk,
+			  unsigned int chunk_size)
+{
+  // Ensure chunk size is a power of 2, fits in BITMAP_WORD and is in range.
+  gcc_checking_assert (pow2p_hwi (chunk_size));
+  gcc_checking_assert (chunk_size < (sizeof (BITMAP_WORD) * CHAR_BIT));
+
+  BITMAP_WORD max_value = (1 << chunk_size) - 1;
+  unsigned bit = chunk * chunk_size;
+  unsigned int indx = bit / BITMAP_ELEMENT_ALL_BITS;
+  const bitmap_element *ptr;
+  unsigned bit_num;
+  unsigned word_num;
+
+  if (!head->tree_form)
+    ptr = bitmap_list_find_element (const_cast<bitmap> (head), indx);
+  else
+    ptr = bitmap_tree_find_element (const_cast<bitmap> (head), indx);
+  if (ptr == 0)
+    return 0;
+
+  bit_num = bit % BITMAP_WORD_BITS;
+  word_num = bit / BITMAP_WORD_BITS % BITMAP_ELEMENT_WORDS;
+
+  // Return 4 bits.
+  return (ptr->bits[word_num] >> bit_num) & max_value;
+}
+
 #if GCC_VERSION < 3400
 /* Table of number of set bits in a character, indexed by value of char.  */
 static const unsigned char popcount_table[] =
@@ -2857,6 +2934,33 @@ test_bitmap_single_bit_set_p ()
   ASSERT_EQ (1066, bitmap_first_set_bit (b));
 }
 
+/* Verify accessing aligned bit chunks works as expected.  */
+
+static void
+test_aligned_chunk (unsigned num_bits)
+{
+  bitmap b = bitmap_gc_alloc ();
+  int limit = 2 ^ num_bits;
+
+  int index = 3;
+  for (int x = 0; x < limit; x++)
+    {
+      bitmap_set_aligned_chunk (b, index, num_bits, (BITMAP_WORD) x);
+      ASSERT_TRUE ((int) bitmap_get_aligned_chunk (b, index, num_bits) == x);
+      ASSERT_TRUE ((int) bitmap_get_aligned_chunk (b, index + 1,
+						   num_bits) == 0);
+      ASSERT_TRUE ((int) bitmap_get_aligned_chunk (b, index - 1,
+						   num_bits) == 0);
+      index += 3;
+    }
+  index = 3;
+  for (int x = 0; x < limit ; x++)
+    {
+      ASSERT_TRUE ((int) bitmap_get_aligned_chunk (b, index, num_bits) == x);
+      index += 3;
+    }
+}
+
 /* Run all of the selftests within this file.  */
 
 void
@@ -2867,6 +2971,10 @@ bitmap_c_tests ()
   test_clear_bit_in_middle ();
   test_copying ();
   test_bitmap_single_bit_set_p ();
+  /* Test 2, 4 and 8 bit aligned chunks.  */
+  test_aligned_chunk (2);
+  test_aligned_chunk (4);
+  test_aligned_chunk (8);
 }
 
 } // namespace selftest
diff --git a/gcc/bitmap.h b/gcc/bitmap.h
index 26138556b2a..0846f79665d 100644
--- a/gcc/bitmap.h
+++ b/gcc/bitmap.h
@@ -438,6 +438,13 @@ extern bool bitmap_set_bit (bitmap, int);
 /* Return true if a bit is set in a bitmap.  */
 extern int bitmap_bit_p (const_bitmap, int);
 
+/* Set and get multiple bit values in a sparse bitmap.  This allows a bitmap to
+   function as a sparse array of bit patterns where the patterns are
+   multiples of power of 2. This is more efficient than performing this as
+   multiple individual operations.  */
+void bitmap_set_aligned_chunk (bitmap, unsigned int, unsigned int, BITMAP_WORD);
+BITMAP_WORD bitmap_get_aligned_chunk (const_bitmap, unsigned int, unsigned int);
+
 /* Debug functions to print a bitmap.  */
 extern void debug_bitmap (const_bitmap);
 extern void debug_bitmap_file (FILE *, const_bitmap);
commit 5ebec2f9218a39ec582a5fc53104cfe8f9bd8a65
Author: Andrew MacLeod <amacl...@redhat.com>
Date:   Mon Jun 7 13:18:55 2021 -0400

    Implement a sparse bitmap representation for Rangers on-entry cache.
    
    Use a sparse representation for the on entry cache, and utilize it when
    the number of basic blocks in the function exceeds param_evrp_sparse_threshold.
    
            PR tree-optimization/PR100299
            * gimple-range-cache.cc (class sbr_sparse_bitmap): New.
            (sbr_sparse_bitmap::sbr_sparse_bitmap): New.
            (sbr_sparse_bitmap::bitmap_set_quad): New.
            (sbr_sparse_bitmap::bitmap_get_quad): New.
            (sbr_sparse_bitmap::set_bb_range): New.
            (sbr_sparse_bitmap::get_bb_range): New.
            (sbr_sparse_bitmap::bb_range_p): New.
            (block_range_cache::block_range_cache): initialize bitmap obstack.
            (block_range_cache::~block_range_cache): Destruct obstack.
            (block_range_cache::set_bb_range): Decide when to utilze the
            sparse on entry cache.
            * gimple-range-cache.h (block_range_cache): Add bitmap obstack.
            * params.opt (-param=evrp-sparse-threshold): New.

diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc
index c58acf48dfb..249515fbaf5 100644
--- a/gcc/gimple-range-cache.cc
+++ b/gcc/gimple-range-cache.cc
@@ -235,12 +235,140 @@ sbr_vector::bb_range_p (const basic_block bb)
   return m_tab[bb->index] != NULL;
 }
 
+// This class implements the on entry cache via a sparse bitmap.
+// It uses the quad bit routines to access 4 bits at a time.
+// A value of 0 (the default) means there is no entry, and a value of
+// 1 thru SBR_NUM represents an element in the m_range vector.
+// Varying is given the first value (1) and pre-cached.
+// SBR_NUM + 1 represents the value of UNDEFINED, and is never stored.
+// SBR_NUM is the number of values that can be cached.
+// Indexes are 1..SBR_NUM and are stored locally at m_range[0..SBR_NUM-1]
+
+#define SBR_NUM		14
+#define SBR_UNDEF	SBR_NUM + 1
+#define SBR_VARYING	1
+
+class sbr_sparse_bitmap : public ssa_block_ranges
+{
+public:
+  sbr_sparse_bitmap (tree t, irange_allocator *allocator, bitmap_obstack *bm);
+  virtual void set_bb_range (const basic_block bb, const irange &r) OVERRIDE;
+  virtual bool get_bb_range (irange &r, const basic_block bb) OVERRIDE;
+  virtual bool bb_range_p (const basic_block bb) OVERRIDE;
+private:
+  void bitmap_set_quad (bitmap head, int quad, int quad_value);
+  int bitmap_get_quad (const_bitmap head, int quad);
+  irange_allocator *m_irange_allocator;
+  irange *m_range[SBR_NUM];
+  bitmap bitvec;
+  tree m_type;
+};
+
+// Initialize a block cache for an ssa_name of type T.
+
+sbr_sparse_bitmap::sbr_sparse_bitmap (tree t, irange_allocator *allocator,
+				bitmap_obstack *bm)
+{
+  gcc_checking_assert (TYPE_P (t));
+  m_type = t;
+  bitvec = BITMAP_ALLOC (bm);
+  m_irange_allocator = allocator;
+  // Pre-cache varying.
+  m_range[0] = m_irange_allocator->allocate (2);
+  m_range[0]->set_varying (t);
+  // Pre-cache zero and non-zero values for pointers.
+  if (POINTER_TYPE_P (t))
+    {
+      m_range[1] = m_irange_allocator->allocate (2);
+      m_range[1]->set_nonzero (t);
+      m_range[2] = m_irange_allocator->allocate (2);
+      m_range[2]->set_zero (t);
+    }
+  else
+    m_range[1] = m_range[2] = NULL;
+  // Clear SBR_NUM entries.
+  for (int x = 3; x < SBR_NUM; x++)
+    m_range[x] = 0;
+}
+
+// Set 4 bit values in a sparse bitmap. This allows a bitmap to
+// function as a sparse array of 4 bit values.
+// QUAD is the index, QUAD_VALUE is the 4 bit value to set.
+
+inline void
+sbr_sparse_bitmap::bitmap_set_quad (bitmap head, int quad, int quad_value)
+{
+  bitmap_set_aligned_chunk (head, quad, 4, (BITMAP_WORD) quad_value);
+}
+
+// Get a 4 bit value from a sparse bitmap. This allows a bitmap to
+// function as a sparse array of 4 bit values.
+// QUAD is the index.
+inline int
+sbr_sparse_bitmap::bitmap_get_quad (const_bitmap head, int quad)
+{
+  return (int) bitmap_get_aligned_chunk (head, quad, 4);
+}
+
+// Set the range on entry to basic block BB to R.
+
+void
+sbr_sparse_bitmap::set_bb_range (const basic_block bb, const irange &r)
+{
+  if (r.undefined_p ())
+    {
+      bitmap_set_quad (bitvec, bb->index, SBR_UNDEF);
+      return;
+    }
+
+  // Loop thru the values to see if R is already present.
+  for (int x = 0; x < SBR_NUM; x++)
+    if (!m_range[x] || r == *(m_range[x]))
+      {
+	if (!m_range[x])
+	  m_range[x] = m_irange_allocator->allocate (r);
+	bitmap_set_quad (bitvec, bb->index, x + 1);
+	return;
+      }
+  // All values are taken, default to VARYING.
+  bitmap_set_quad (bitvec, bb->index, SBR_VARYING);
+  return;
+}
+
+// Return the range associated with block BB in R.  Return false if
+// there is no range.
+
+bool
+sbr_sparse_bitmap::get_bb_range (irange &r, const basic_block bb)
+{
+  int value = bitmap_get_quad (bitvec, bb->index);
+
+  if (!value)
+    return false;
+
+  gcc_checking_assert (value <= SBR_UNDEF);
+  if (value == SBR_UNDEF)
+    r.set_undefined ();
+  else
+    r = *(m_range[value - 1]);
+  return true;
+}
+
+// Return true if a range is present.
+
+bool
+sbr_sparse_bitmap::bb_range_p (const basic_block bb)
+{
+  return (bitmap_get_quad (bitvec, bb->index) != 0);
+}
+
 // -------------------------------------------------------------------------
 
 // Initialize the block cache.
 
 block_range_cache::block_range_cache ()
 {
+  bitmap_obstack_initialize (&m_bitmaps);
   m_ssa_ranges.create (0);
   m_ssa_ranges.safe_grow_cleared (num_ssa_names);
   m_irange_allocator = new irange_allocator;
@@ -253,6 +381,7 @@ block_range_cache::~block_range_cache ()
   delete m_irange_allocator;
   // Release the vector itself.
   m_ssa_ranges.release ();
+  bitmap_obstack_release (&m_bitmaps);
 }
 
 // Set the range for NAME on entry to block BB to R.
@@ -268,9 +397,21 @@ block_range_cache::set_bb_range (tree name, const basic_block bb,
 
   if (!m_ssa_ranges[v])
     {
-      void *r = m_irange_allocator->get_memory (sizeof (sbr_vector));
-      m_ssa_ranges[v] = new (r) sbr_vector (TREE_TYPE (name),
-					    m_irange_allocator);
+      // Use sparse representation if there are too many basic blocks.
+      if (last_basic_block_for_fn (cfun) > param_evrp_sparse_threshold)
+	{
+	  void *r = m_irange_allocator->get_memory (sizeof (sbr_sparse_bitmap));
+	  m_ssa_ranges[v] = new (r) sbr_sparse_bitmap (TREE_TYPE (name),
+						       m_irange_allocator,
+						       &m_bitmaps);
+	}
+      else
+	{
+	  // Otherwise use the default vector implemntation.
+	  void *r = m_irange_allocator->get_memory (sizeof (sbr_vector));
+	  m_ssa_ranges[v] = new (r) sbr_vector (TREE_TYPE (name),
+						m_irange_allocator);
+	}
     }
   m_ssa_ranges[v]->set_bb_range (bb, r);
 }
diff --git a/gcc/gimple-range-cache.h b/gcc/gimple-range-cache.h
index 4af461d2aa3..ce4449a08db 100644
--- a/gcc/gimple-range-cache.h
+++ b/gcc/gimple-range-cache.h
@@ -61,6 +61,7 @@ private:
   ssa_block_ranges &get_block_ranges (tree name);
   ssa_block_ranges *query_block_ranges (tree name);
   irange_allocator *m_irange_allocator;
+  bitmap_obstack m_bitmaps;
 };
 
 // This global cache is used with the range engine as markers for what
diff --git a/gcc/params.opt b/gcc/params.opt
index 0d0dcd216f6..18e6036c4f4 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -126,6 +126,10 @@ Maximum size (in bytes) of objects tracked bytewise by dead store elimination.
 Common Joined UInteger Var(param_early_inlining_insns) Init(6) Optimization Param
 Maximal estimated growth of function body caused by early inlining of single call.
 
+-param=evrp-sparse-threshold=
+Common Joined UInteger Var(param_evrp_sparse_threshold) Init(800) Optimization Param
+Maximum number of basic blocks before EVRP uses a sparse cache.
+
 -param=evrp-mode=
 Common Joined Var(param_evrp_mode) Enum(evrp_mode) Init(EVRP_MODE_EVRP_FIRST) Param Optimization
 --param=evrp-mode=[legacy|ranger|legacy-first|ranger-first|ranger-trace|ranger-debug|trace|debug] Specifies the mode Early VRP should operate in.

Reply via email to