Re: Speedup and cleanup hash-table.h

Richard Biener Fri, 19 Dec 2014 01:14:05 -0800

On December 19, 2014 12:48:22 AM CET, Jan Hubicka <hubi...@ucw.cz> wrote:
>Hi,
>this patch started as experiment moving hash_table_mod1 inline because
>it shows
>high in streaming profiles and it represents a branch-less code that is
>good
>to schedule to surrounding instructions.
>While looking at hash-tab.h I however noticed that it was not updated
>very
>well while it was moved from libiberty.h.  It still assumes lack of
>64bit
>integers to be possible and also it uses abort() calls instead of
>gcc_asserts.
>Fixed thus.


I think for optimal result you want to use GCC_unreachable in the place of the 
aborts instead. (To get value information for non-checking builds)

Richard.

>
>Bootstrapped/regtested x86_64-linux, also checked that cc1plus binary
>shirnks by about 15kb despite the extra inlining. Will commit it
>shortly.
>
>Honza
>
>       * hash-table.h (struct pointer_hash): Fix formating.
>       (hash_table_higher_prime_index): Declare pure.
>       (hash_table_mod2, hash_table_mod1, mul_mod): Move inline;
>       assume that uint64_t always exists.
>       (hash_table<Descriptor, Allocator, false>): Use gcc_checking_assert.
>       (hash_table<Descriptor, Allocator, false>::expand ()): Fix formating.
>       (hash_table<Descriptor, Allocator, false>::clear_slot (value_type
>**slot)):
>       Use checking assert.
>       * hash-table.c: Remvoe #if 0 code.
>       (hash_table_higher_prime_index): Use gcc_assert.
>       (mul_mod, hash-table_mod1, hash_table_mod2): move to hash-table.h
>
>Index: hash-table.h
>===================================================================
>--- hash-table.h       (revision 218795)
>+++ hash-table.h       (working copy)
>@@ -282,7 +282,8 @@ struct pointer_hash : typed_noop_remove
> 
>   static inline hashval_t hash (const value_type &);
> 
>-  static inline bool equal (const value_type &existing, const
>compare_type &candidate);
>+  static inline bool equal (const value_type &existing,
>+                          const compare_type &candidate);
> };
> 
> template <typename Type>
>@@ -388,9 +389,54 @@ extern struct prime_ent const prime_tab[
> 
> /* Functions for computing hash table indexes.  */
> 
>-extern unsigned int hash_table_higher_prime_index (unsigned long n);
>-extern hashval_t hash_table_mod1 (hashval_t hash, unsigned int index);
>-extern hashval_t hash_table_mod2 (hashval_t hash, unsigned int index);
>+extern unsigned int hash_table_higher_prime_index (unsigned long n)
>+   ATTRIBUTE_PURE;
>+
>+/* Return X % Y using multiplicative inverse values INV and SHIFT.
>+
>+   The multiplicative inverses computed above are for 32-bit types,
>+   and requires that we be able to compute a highpart multiply.
>+
>+   FIX: I am not at all convinced that
>+     3 loads, 2 multiplications, 3 shifts, and 3 additions
>+   will be faster than
>+     1 load and 1 modulus
>+   on modern systems running a compiler.  */
>+
>+inline hashval_t
>+mul_mod (hashval_t x, hashval_t y, hashval_t inv, int shift)
>+{
>+   hashval_t t1, t2, t3, t4, q, r;
>+
>+   t1 = ((uint64_t)x * inv) >> 32;
>+   t2 = x - t1;
>+   t3 = t2 >> 1;
>+   t4 = t1 + t3;
>+   q  = t4 >> shift;
>+   r  = x - (q * y);
>+
>+   return r;
>+}
>+
>+/* Compute the primary table index for HASH given current prime index.
> */
>+
>+inline hashval_t
>+hash_table_mod1 (hashval_t hash, unsigned int index)
>+{
>+  const struct prime_ent *p = &prime_tab[index];
>+  gcc_checking_assert (sizeof (hashval_t) * CHAR_BIT <= 32);
>+    return mul_mod (hash, p->prime, p->inv, p->shift);
>+}
>+
>+/* Compute the secondary table index for HASH given current prime
>index.  */
>+
>+inline hashval_t
>+hash_table_mod2 (hashval_t hash, unsigned int index)
>+{
>+  const struct prime_ent *p = &prime_tab[index];
>+  gcc_checking_assert (sizeof (hashval_t) * CHAR_BIT <= 32);
>+  return 1 + mul_mod (hash, p->prime - 2, p->inv_m2, p->shift);
>+}
> 
>/* The below is some template meta programming to decide if we should
>use the
>hash table partial specialization that directly stores value_type
>instead of
>@@ -748,8 +794,7 @@ hash_table<Descriptor, Allocator, false>
> 
>   if (*slot == HTAB_EMPTY_ENTRY)
>     return slot;
>-  else if (*slot == HTAB_DELETED_ENTRY)
>-    abort ();
>+  gcc_checking_assert (*slot != HTAB_DELETED_ENTRY);
> 
>   hash2 = hash_table_mod2 (hash, m_size_prime_index);
>   for (;;)
>@@ -761,8 +806,7 @@ hash_table<Descriptor, Allocator, false>
>       slot = m_entries + index;
>       if (*slot == HTAB_EMPTY_ENTRY)
>         return slot;
>-      else if (*slot == HTAB_DELETED_ENTRY)
>-        abort ();
>+      gcc_checking_assert (*slot != HTAB_DELETED_ENTRY);
>     }
> }
> 
>@@ -773,7 +817,7 @@ hash_table<Descriptor, Allocator, false>
>   table entries is changed.  If memory allocation fails, this function
>    will abort.  */
> 
>-        template<typename Descriptor, template<typename Type> class
>Allocator>
>+template<typename Descriptor, template<typename Type> class Allocator>
> void
> hash_table<Descriptor, Allocator, false>::expand ()
> {
>@@ -862,9 +906,9 @@ template<typename Descriptor, template<t
> void
>hash_table<Descriptor, Allocator, false>::clear_slot (value_type
>**slot)
> {
>-  if (slot < m_entries || slot >= m_entries + size ()
>-      || *slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY)
>-    abort ();
>+  gcc_checking_assert (!(slot < m_entries || slot >= m_entries + size
>()
>+                       || *slot == HTAB_EMPTY_ENTRY
>+                       || *slot == HTAB_DELETED_ENTRY));
> 
>   Descriptor::remove (*slot);
> 
>@@ -1317,8 +1361,9 @@ hash_table<Descriptor, Allocator, true>
> 
>   if (is_empty (*slot))
>     return slot;
>-  else if (is_deleted (*slot))
>-    abort ();
>+#ifdef ENABLE_CHECKING
>+  gcc_checking_assert (!is_deleted (*slot));
>+#endif
> 
>   hash2 = hash_table_mod2 (hash, m_size_prime_index);
>   for (;;)
>@@ -1330,8 +1375,9 @@ hash_table<Descriptor, Allocator, true>
>       slot = m_entries + index;
>       if (is_empty (*slot))
>         return slot;
>-      else if (is_deleted (*slot))
>-        abort ();
>+#ifdef ENABLE_CHECKING
>+      gcc_checking_assert (!is_deleted (*slot));
>+#endif
>     }
> }
> 
>@@ -1437,9 +1483,8 @@ template<typename Descriptor, template<t
> void
> hash_table<Descriptor, Allocator, true>::clear_slot (value_type *slot)
> {
>-  if (slot < m_entries || slot >= m_entries + size ()
>-      || is_empty (*slot) || is_deleted (*slot))
>-    abort ();
>+  gcc_checking_assert (!(slot < m_entries || slot >= m_entries + size
>()
>+                       || is_empty (*slot) || is_deleted (*slot)));
> 
>   Descriptor::remove (*slot);
> 
>Index: hash-table.c
>===================================================================
>--- hash-table.c       (revision 218795)
>+++ hash-table.c       (working copy)
>@@ -41,39 +41,6 @@ along with GCC; see the file COPYING3.
>   For the record, here's the function that computed the table; it's a 
>vastly simplified version of the function of the same name from gcc. 
>*/
> 
>-#if 0
>-unsigned int
>-ceil_log2 (unsigned int x)
>-{
>-  int i;
>-  for (i = 31; i >= 0 ; --i)
>-    if (x > (1u << i))
>-      return i+1;
>-  abort ();
>-}
>-
>-unsigned int
>-choose_multiplier (unsigned int d, unsigned int *mlp, unsigned char
>*shiftp)
>-{
>-  unsigned long long mhigh;
>-  double nx;
>-  int lgup, post_shift;
>-  int pow, pow2;
>-  int n = 32, precision = 32;
>-
>-  lgup = ceil_log2 (d);
>-  pow = n + lgup;
>-  pow2 = n + lgup - precision;
>-
>-  nx = ldexp (1.0, pow) + ldexp (1.0, pow2);
>-  mhigh = nx / d;
>-
>-  *shiftp = lgup - 1;
>-  *mlp = mhigh;
>-  return mhigh >> 32;
>-}
>-#endif
>-
> struct prime_ent const prime_tab[] = {
>   {          7, 0x24924925, 0x9999999b, 2 },
>   {         13, 0x3b13b13c, 0x745d1747, 3 },
>@@ -127,67 +94,8 @@ hash_table_higher_prime_index (unsigned
>     }
> 
>   /* If we've run out of primes, abort.  */
>-  if (n > prime_tab[low].prime)
>-    {
>-      fprintf (stderr, "Cannot find prime bigger than %lu\n", n);
>-      abort ();
>-    }
>+  gcc_assert (n <= prime_tab[low].prime);
> 
>   return low;
> }
> 
>-/* Return X % Y using multiplicative inverse values INV and SHIFT.
>-
>-   The multiplicative inverses computed above are for 32-bit types,
>-   and requires that we be able to compute a highpart multiply.
>-
>-   FIX: I am not at all convinced that
>-     3 loads, 2 multiplications, 3 shifts, and 3 additions
>-   will be faster than
>-     1 load and 1 modulus
>-   on modern systems running a compiler.  */
>-
>-#ifdef UNSIGNED_64BIT_TYPE
>-static inline hashval_t
>-mul_mod (hashval_t x, hashval_t y, hashval_t inv, int shift)
>-{
>-  __extension__ typedef UNSIGNED_64BIT_TYPE ull;
>-   hashval_t t1, t2, t3, t4, q, r;
>-
>-   t1 = ((ull)x * inv) >> 32;
>-   t2 = x - t1;
>-   t3 = t2 >> 1;
>-   t4 = t1 + t3;
>-   q  = t4 >> shift;
>-   r  = x - (q * y);
>-
>-   return r;
>-}
>-#endif
>-
>-/* Compute the primary table index for HASH given current prime index.
> */
>-
>-hashval_t
>-hash_table_mod1 (hashval_t hash, unsigned int index)
>-{
>-  const struct prime_ent *p = &prime_tab[index];
>-#ifdef UNSIGNED_64BIT_TYPE
>-  if (sizeof (hashval_t) * CHAR_BIT <= 32)
>-    return mul_mod (hash, p->prime, p->inv, p->shift);
>-#endif
>-  return hash % p->prime;
>-}
>-
>-
>-/* Compute the secondary table index for HASH given current prime
>index.  */
>-
>-hashval_t
>-hash_table_mod2 (hashval_t hash, unsigned int index)
>-{
>-  const struct prime_ent *p = &prime_tab[index];
>-#ifdef UNSIGNED_64BIT_TYPE
>-  if (sizeof (hashval_t) * CHAR_BIT <= 32)
>-    return 1 + mul_mod (hash, p->prime - 2, p->inv_m2, p->shift);
>-#endif
>-  return 1 + hash % (p->prime - 2);
>-}

Re: Speedup and cleanup hash-table.h

Reply via email to