I've played with different BITMAP_WORD and BITMAP_ELEMENT_WORDS
values and found the code using CTZ and friends a bit unwieldly
to adjust.  So I came up with a set of macros wrapping them
(_maybe_ inline overloads would be a cleaner solution, but ...).

Specifically it looks like for PR91257 making bitmap_element
cache-line aligned and 32byte in size by using an unsigned
int BITMAP_WORD and BITMAP_ELEMENT_WORDS 3 (thus reducing
the load factor from 0.4 to 0.375) is a slight win (in both
memory use and compile-time).  Enlarging to 64 bytes and keeping
unsigned long (and the padding) is worst, even using only
a single unsigned long (but then aligning to 32bytes again)
is faster than the current state.  Surprisingly the division/modulo
by 3 or the larger number of loads/stores didn't matter or were
at least offsetted by reliably touching only a single cache-line
per bitmap_element.

I would guess the optimal setting should depend on the host.

Bootstrap & regtest running on x86_64-unknown-linux-gnu.

Any objection to the macro-ification below?

Richard.

2019-07-30  Richard Biener  <rguent...@suse.de>

        * bitmap.h (BITMAP_WORD_CTZ): New define.
        (BITMAP_WORD_CLZ): Likewise.
        (BITMAP_WORD_POPCOUNT): Likewise.
        (bmp_iter_next_bit): Use BITMAP_WORD_CTZ, remove assert.
        * bitmap.c (bitmap_count_bits_in_word): use BITMAP_WORD_POPCOUNT.
        (bitmap_single_bit_set_p): likewise.
        (bitmap_first_set_bit): Use BITMAP_WORD_CTZ, remove assert.
        (bitmap_last_set_bit): Use BITMAP_WORD_CLZ, remove assert.

Index: gcc/bitmap.c
===================================================================
--- gcc/bitmap.c        (revision 273795)
+++ gcc/bitmap.c        (working copy)
@@ -1016,7 +1020,7 @@ bitmap_count_bits_in_word (const BITMAP_
 #if GCC_VERSION >= 3400
       /* Note that popcountl matches BITMAP_WORD in type, so the actual size
         of BITMAP_WORD is not material.  */
-      count += __builtin_popcountl (bits[ix]);
+      count += BITMAP_WORD_POPCOUNT (bits[ix]);
 #else
       count += bitmap_popcount (bits[ix]);
 #endif
@@ -1101,7 +1105,7 @@ bitmap_single_bit_set_p (const_bitmap a)
 #if GCC_VERSION >= 3400
       /* Note that popcountl matches BITMAP_WORD in type, so the actual size
         of BITMAP_WORD is not material.  */
-      count += __builtin_popcountl (elt->bits[ix]);
+      count += BITMAP_WORD_POPCOUNT (elt->bits[ix]);
 #else
       count += bitmap_popcount (elt->bits[ix]);
 #endif
@@ -1142,8 +1146,7 @@ bitmap_first_set_bit (const_bitmap a)
   bit_no += ix * BITMAP_WORD_BITS;
 
 #if GCC_VERSION >= 3004
-  gcc_assert (sizeof (long) == sizeof (word));
-  bit_no += __builtin_ctzl (word);
+  bit_no += BITMAP_WORD_CTZ (word);
 #else
   /* Binary search for the first set bit.  */
 #if BITMAP_WORD_BITS > 64
@@ -1200,8 +1203,7 @@ bitmap_last_set_bit (const_bitmap a)
  found_bit:
   bit_no += ix * BITMAP_WORD_BITS;
 #if GCC_VERSION >= 3004
-  gcc_assert (sizeof (long) == sizeof (word));
-  bit_no += BITMAP_WORD_BITS - __builtin_clzl (word) - 1;
+  bit_no += BITMAP_WORD_BITS - BITMAP_WORD_CLZ (word) - 1;
 #else
   /* Hopefully this is a twos-complement host...  */
   BITMAP_WORD x = word;
Index: gcc/bitmap.h
===================================================================
--- gcc/bitmap.h        (revision 273795)
+++ gcc/bitmap.h        (working copy)
@@ -272,6 +272,10 @@ extern mem_alloc_description<bitmap_usag
 /* Fundamental storage type for bitmap.  */
 
 typedef unsigned long BITMAP_WORD;
+/* Word primitives.  */
+#define BITMAP_WORD_CTZ __builtin_ctzl
+#define BITMAP_WORD_CLZ __builtin_clzl
+#define BITMAP_WORD_POPCOUNT __builtin_popcountl
 /* BITMAP_WORD_BITS needs to be unsigned, but cannot contain casts as
    it is used in preprocessor directives -- hence the 1u.  */
 #define BITMAP_WORD_BITS (CHAR_BIT * SIZEOF_LONG * 1u)
@@ -683,8 +688,7 @@ bmp_iter_next_bit (bitmap_iterator * bi,
 {
 #if (GCC_VERSION >= 3004)
   {
-    unsigned int n = __builtin_ctzl (bi->bits);
-    gcc_assert (sizeof (unsigned long) == sizeof (BITMAP_WORD));
+    unsigned int n = BITMAP_WORD_CTZ (bi->bits);
     bi->bits >>= n;
     *bit_no += n;
   }

Reply via email to