This is an implementation of PR 68303.
I try to use this idea as much as possible to avoid computation of hash
codes.
Note that tests are not showing any gain. I guess hash computation must
be quite bad to get a benefit from it. So I am only activating it when
hash code is not cached and/or when computation is not fast.
PR libstdc++/68303
* include/bits/hashtable_policy.h
(_Small_size_threshold<_Hash>): New.
(_Hashtable_traits<>): Add _Small_size_threshold std::size_t template
parameter, default to 0.
(_Hashtable_traits<>::__small_size_threshold): New.
(_Hash_code_base<>::_M_hash_code(const __node_type*)): New.
(_Equal_helper<>::_S_node_equals): New.
* include/bits/hashtable.h:
(__small_size_threshold_default<>): New template alias.
(_Hashtable<>::find): Add linear lookup when size is lower or equal to
_Small_size_threshold.
(_Hashtable<>::_M_emplace<_Args>(true_type, _Args&&...)): Add linear
lookup when size is lower or equal to _Small_size_threshold.
(_Hashtable<>::_M_insert<>(_Arg&&, const _NodeGenerator&, true_type,
size_type)): Likewise.
(_Hashtable<>::_M_compute_hash_code(const_iterator, const key_type&)):
New.
(_Hashtable<>::_M_emplace<_Args>(false_type, _Args&&...)): Use latter.
(_Hashtable<>::_M_insert(const_iterator, _Arg&&, const _NodeGenerator&,
false_type)): Likewise.
(_Hashtable<>::_M_find_before_node(const key_type&)): New.
(_Hashtable<>::_M_erase(true_type, const key_type&)): Use latter if
size
is lower or equal to _Small_size_threshold.
(_Hashtable<>::_M_erase(false_type, const key_type&)): Likewise.
* include/bits/unordered_map.h (__umaps_traits): Adapt using small size
threshold set to 20.
(__ummap_traits): Likewise.
* include/bits/unordered_set.h (__uset_traits, __ummset_traits):
Likewise.
* src/c++11/hashtable_c++0x.cc: Add <bits/functional_hash.h> include.
Tested under Linux x86_64.
François
diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h
index 9dadc62e328..460f25affe4 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -48,6 +48,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Mandatory to have erase not throwing.
__is_nothrow_invocable<const _Hash&, const _Tp&>>>;
+ template<bool __cache, typename _Hash>
+ using __small_size_threshold_default
+ = typename conditional<__cache,
+ // No small size optimization if hash code is cached...
+ integral_constant<int, 0>,
+ _Small_size_threshold<_Hash>>::type;
/**
* Primary class template _Hashtable.
*
@@ -743,6 +749,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__node_base*
_M_find_before_node(size_type, const key_type&, __hash_code) const;
+ __node_base*
+ _M_find_before_node(const key_type&);
+
__node_type*
_M_find_node(size_type __bkt, const key_type& __key,
__hash_code __c) const
@@ -766,6 +775,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__node_base*
_M_get_previous_node(size_type __bkt, __node_base* __n);
+ pair<const_iterator, __hash_code>
+ _M_compute_hash_code(const_iterator __hint, const key_type& __k) const;
+
// Insert node __n with hash code __code, in bucket __bkt if no
// rehash (assumes no element with same key already present).
// Takes ownership of __n if insertion succeeds, throws otherwise.
@@ -1490,6 +1502,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
find(const key_type& __k)
-> iterator
{
+ if (size() <= __traits_type::__small_size_threshold::value)
+ {
+ for (auto __it = begin(); __it != end(); ++__it)
+ if (this->_M_key_equals(__k, __it._M_cur))
+ return __it;
+ return end();
+ }
+
__hash_code __code = this->_M_hash_code(__k);
std::size_t __bkt = _M_bucket_index(__code);
return iterator(_M_find_node(__bkt, __k, __code));
@@ -1504,6 +1524,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
find(const key_type& __k) const
-> const_iterator
{
+ if (size() <= __traits_type::__small_size_threshold::value)
+ {
+ for (auto __it = begin(); __it != end(); ++__it)
+ if (this->_M_key_equals(__k, __it._M_cur))
+ return __it;
+ return end();
+ }
+
__hash_code __code = this->_M_hash_code(__k);
std::size_t __bkt = _M_bucket_index(__code);
return const_iterator(_M_find_node(__bkt, __k, __code));
@@ -1619,6 +1647,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return nullptr;
}
+ // Find the node before the one whose key compares equal to k.
+ // Return nullptr if no node is found.
+ template<typename _Key, typename _Value,
+ typename _Alloc, typename _ExtractKey, typename _Equal,
+ typename _Hash, typename _RehashPolicy, typename _Traits>
+ auto
+ _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
+ _Hash, _RehashPolicy, _Traits>::
+ _M_find_before_node(const key_type& __k)
+ -> __node_base*
+ {
+ __node_base* __prev_p = &_M_before_begin;
+ if (!__prev_p->_M_nxt)
+ return nullptr;
+
+ for (__node_type* __p = static_cast<__node_type*>(__prev_p->_M_nxt);
+ __p != nullptr;
+ __p = __p->_M_next())
+ {
+ if (this->_M_key_equals(__k, __p))
+ return __prev_p;
+
+ __prev_p = __p;
+ }
+
+ return nullptr;
+ }
+
template<typename _Key, typename _Value,
typename _Alloc, typename _ExtractKey, typename _Equal,
typename _Hash, typename _RehashPolicy, typename _Traits>
@@ -1702,11 +1758,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// First build the node to get access to the hash code
_Scoped_node __node { this, std::forward<_Args>(__args)... };
const key_type& __k = this->_M_extract()(__node._M_node->_M_v());
+ if (size() <= __traits_type::__small_size_threshold::value)
+ {
+ for (auto __it = begin(); __it != end(); ++__it)
+ if (this->_M_key_equals(__k, __it._M_cur))
+ // There is already an equivalent node, no insertion
+ return { __it, false };
+ }
+
__hash_code __code = this->_M_hash_code(__k);
size_type __bkt = _M_bucket_index(__code);
- if (__node_type* __p = _M_find_node(__bkt, __k, __code))
- // There is already an equivalent node, no insertion
- return std::make_pair(iterator(__p), false);
+ if (size() > __traits_type::__small_size_threshold::value)
+ if (__node_type* __p = _M_find_node(__bkt, __k, __code))
+ // There is already an equivalent node, no insertion
+ return { iterator(__p), false };
// Insert the node
auto __pos = _M_insert_node(__uks, __bkt, __code, __node._M_node);
@@ -1728,13 +1793,40 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_Scoped_node __node { this, std::forward<_Args>(__args)... };
const key_type& __k = this->_M_extract()(__node._M_node->_M_v());
- __hash_code __code = this->_M_hash_code(__k);
+ auto __res = this->_M_compute_hash_code(__hint, __k);
auto __pos
- = _M_insert_node(__mks, __hint._M_cur, __code, __node._M_node);
+ = _M_insert_node(__mks, __res.first._M_cur, __res.second,
+ __node._M_node);
__node._M_node = nullptr;
return __pos;
}
+ template<typename _Key, typename _Value,
+ typename _Alloc, typename _ExtractKey, typename _Equal,
+ typename _Hash, typename _RehashPolicy, typename _Traits>
+ auto
+ _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
+ _Hash, _RehashPolicy, _Traits>::
+ _M_compute_hash_code(const_iterator __hint, const key_type& __k) const
+ -> pair<const_iterator, __hash_code>
+ {
+ if (size() <= __traits_type::__small_size_threshold::value)
+ {
+ if (__hint != cend())
+ {
+ for (auto __it = __hint; __it != cend(); ++__it)
+ if (this->_M_key_equals(__k, __it._M_cur))
+ return { __it, this->_M_hash_code(__it._M_cur) };
+ }
+
+ for (auto __it = cbegin(); __it != __hint; ++__it)
+ if (this->_M_key_equals(__k, __it._M_cur))
+ return { __it, this->_M_hash_code(__it._M_cur) };
+ }
+
+ return { __hint, this->_M_hash_code(__k) };
+ }
+
template<typename _Key, typename _Value,
typename _Alloc, typename _ExtractKey, typename _Equal,
typename _Hash, typename _RehashPolicy, typename _Traits>
@@ -1830,11 +1922,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
-> pair<iterator, bool>
{
const key_type& __k = this->_M_extract()(__v);
+
+ if (size() <= __traits_type::__small_size_threshold::value)
+ for (auto __it = begin(); __it != end(); ++__it)
+ if (this->_M_key_equals(__k, __it._M_cur))
+ return { __it, false };
+
__hash_code __code = this->_M_hash_code(__k);
size_type __bkt = _M_bucket_index(__code);
- if (__node_type* __node = _M_find_node(__bkt, __k, __code))
- return { iterator(__node), false };
+ if (size() > __traits_type::__small_size_threshold::value)
+ if (__node_type* __node = _M_find_node(__bkt, __k, __code))
+ return { iterator(__node), false };
_Scoped_node __node{ __node_gen(std::forward<_Arg>(__v)), this };
auto __pos = _M_insert_node(__uks, __bkt, __code, __node._M_node);
@@ -1856,13 +1955,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
// First compute the hash code so that we don't do anything if it
// throws.
- __hash_code __code = this->_M_hash_code(this->_M_extract()(__v));
+ auto __res
+ = this->_M_compute_hash_code(__hint, this->_M_extract()(__v));
// Second allocate new node so that we don't rehash if it throws.
_Scoped_node __node{ __node_gen(std::forward<_Arg>(__v)), this };
- const key_type& __k = this->_M_extract()(__node._M_node->_M_v());
auto __pos
- = _M_insert_node(__mks, __hint._M_cur, __code, __node._M_node);
+ = _M_insert_node(__mks, __res.first._M_cur, __res.second,
+ __node._M_node);
__node._M_node = nullptr;
return __pos;
}
@@ -1922,16 +2022,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_erase(__unique_keys_t, const key_type& __k)
-> size_type
{
- __hash_code __code = this->_M_hash_code(__k);
- std::size_t __bkt = _M_bucket_index(__code);
+ __node_base* __prev_n;
+ __node_type* __n;
+ std::size_t __bkt;
+ if (size() <= __traits_type::__small_size_threshold::value)
+ {
+ __prev_n = _M_find_before_node(__k);
+ if (!__prev_n)
+ return 0;
- // Look for the node before the first matching node.
- __node_base* __prev_n = _M_find_before_node(__bkt, __k, __code);
- if (!__prev_n)
- return 0;
+ // We found a matching node, erase it.
+ __n = static_cast<__node_type*>(__prev_n->_M_nxt);
+ __bkt = _M_bucket_index(__n);
+ }
+ else
+ {
+ __hash_code __code = this->_M_hash_code(__k);
+ __bkt = _M_bucket_index(__code);
+
+ // Look for the node before the first matching node.
+ __prev_n = _M_find_before_node(__bkt, __k, __code);
+ if (!__prev_n)
+ return 0;
+
+ // We found a matching node, erase it.
+ __n = static_cast<__node_type*>(__prev_n->_M_nxt);
+ }
- // We found a matching node, erase it.
- __node_type* __n = static_cast<__node_type*>(__prev_n->_M_nxt);
_M_erase(__bkt, __prev_n, __n);
return 1;
}
@@ -1945,13 +2062,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_erase(__multi_keys_t, const key_type& __k)
-> size_type
{
- __hash_code __code = this->_M_hash_code(__k);
- std::size_t __bkt = _M_bucket_index(__code);
+ std::size_t __bkt;
+ __node_base* __prev_n;
+ __node_type* __n;
+ if (size() <= __traits_type::__small_size_threshold::value)
+ {
+ __prev_n = _M_find_before_node(__k);
+ if (!__prev_n)
+ return 0;
- // Look for the node before the first matching node.
- __node_base* __prev_n = _M_find_before_node(__bkt, __k, __code);
- if (!__prev_n)
- return 0;
+ // We found a matching node, erase it.
+ __n = static_cast<__node_type*>(__prev_n->_M_nxt);
+ __bkt = _M_bucket_index(__n);
+ }
+ else
+ {
+ __hash_code __code = this->_M_hash_code(__k);
+ __bkt = _M_bucket_index(__code);
+
+ // Look for the node before the first matching node.
+ __prev_n = _M_find_before_node(__bkt, __k, __code);
+ if (!__prev_n)
+ return 0;
+
+ __n = static_cast<__node_type*>(__prev_n->_M_nxt);
+ }
// _GLIBCXX_RESOLVE_LIB_DEFECTS
// 526. Is it undefined if a function in the standard changes
@@ -1959,7 +2094,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// We use one loop to find all matching nodes and another to deallocate
// them so that the key stays valid during the first loop. It might be
// invalidated indirectly when destroying nodes.
- __node_type* __n = static_cast<__node_type*>(__prev_n->_M_nxt);
__node_type* __n_last = __n->_M_next();
while (__n_last && this->_M_node_equals(__n, __n_last))
__n_last = __n_last->_M_next();
diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
index 11ea47b322e..6f329c82335 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -45,6 +45,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typename _Traits>
class _Hashtable;
+ /**
+ * struct _Small_size_threshold
+ *
+ * Traits like type giving the threshold below which the hashtable will be
+ * considered as small.
+ *
+ * @tparam _Hash The hash functor.
+ */
+ template<typename _Hash>
+ struct _Small_size_threshold
+ : public std::integral_constant<int, __is_fast_hash<_Hash>::value ? 0 : 20>
+ { };
+
namespace __detail
{
/**
@@ -203,13 +216,22 @@ namespace __detail
* be an arbitrary number. This is true for unordered_set and
* unordered_map, false for unordered_multiset and
* unordered_multimap.
+ *
+ * @tparam _Small_size_threshold Integer value. Threshold below which
+ * hashtable will be considered as small enough to perform linear
+ * lookup.
*/
- template<bool _Cache_hash_code, bool _Constant_iterators, bool _Unique_keys>
+ template<bool _Cache_hash_code,
+ bool _Constant_iterators,
+ bool _Unique_keys,
+ int _Small_size_threshold = 0>
struct _Hashtable_traits
{
using __hash_cached = __bool_constant<_Cache_hash_code>;
using __constant_iterators = __bool_constant<_Constant_iterators>;
using __unique_keys = __bool_constant<_Unique_keys>;
+ using __small_size_threshold
+ = integral_constant<int, _Small_size_threshold>;
};
/**
@@ -1039,9 +1061,11 @@ namespace __detail
struct _Rehash_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
_Hash, _RehashPolicy, _Traits, true_type>
{
+ private:
using __hashtable = _Hashtable<_Key, _Value, _Alloc, _ExtractKey,
_Equal, _Hash, _RehashPolicy, _Traits>;
+ public:
float
max_load_factor() const noexcept
{
@@ -1189,6 +1213,10 @@ namespace __detail
return _M_hash()(__k);
}
+ __hash_code
+ _M_hash_code(const __node_type* __n) const
+ { return _M_hash_code(_M_extract()(__n->_M_v())); }
+
std::size_t
_M_bucket_index(__hash_code __c, std::size_t __bkt_count) const
{ return _RangedHash{}(__c, __bkt_count); }
@@ -1198,9 +1226,7 @@ namespace __detail
noexcept( noexcept(declval<const _Hash&>()(declval<const _Key&>()))
&& noexcept(declval<const _RangedHash&>()((__hash_code)0,
(std::size_t)0)) )
- {
- return _RangedHash{}(_M_hash()(_M_extract()(__p->_M_v())), __bkt_count);
- }
+ { return _RangedHash{}(_M_hash_code(__p), __bkt_count); }
void
_M_store_code(__node_type*, __hash_code) const
@@ -1268,6 +1294,10 @@ namespace __detail
return _M_hash()(__k);
}
+ __hash_code
+ _M_hash_code(const __node_type* __n) const
+ { return __n->_M_hash_code; }
+
std::size_t
_M_bucket_index(__hash_code __c, std::size_t __bkt_count) const
{ return _RangedHash{}(__c, __bkt_count); }
@@ -1669,21 +1699,26 @@ namespace __detail
{ }
bool
- _M_equals(const _Key& __k, __hash_code __c, const __node_type* __n) const
+ _M_key_equals(const _Key& __k, const __node_type* __n) const
{
static_assert(__is_invocable<const _Equal&, const _Key&, const _Key&>{},
"key equality predicate must be invocable with two arguments of "
"key type");
+ return _M_eq()(__k, this->_M_extract()(__n->_M_v()));
+ }
+
+ bool
+ _M_equals(const _Key& __k, __hash_code __c, const __node_type* __n) const
+ {
return _Equal_hash_code<__node_type>::_S_equals(__c, *__n)
- && _M_eq()(__k, this->_M_extract()(__n->_M_v()));
+ && _M_key_equals(__k, __n);
}
bool
_M_node_equals(const __node_type* __lhn, const __node_type* __rhn) const
{
return _Equal_hash_code<__node_type>::_S_node_equals(*__lhn, *__rhn)
- && _M_eq()(this->_M_extract()(__lhn->_M_v()),
- this->_M_extract()(__rhn->_M_v()));
+ && _M_key_equals(this->_M_extract()(__lhn->_M_v()), __rhn);
}
void
diff --git a/libstdc++-v3/include/bits/unordered_map.h b/libstdc++-v3/include/bits/unordered_map.h
index 310cfd39d79..5265020f608 100644
--- a/libstdc++-v3/include/bits/unordered_map.h
+++ b/libstdc++-v3/include/bits/unordered_map.h
@@ -36,30 +36,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_GLIBCXX_BEGIN_NAMESPACE_CONTAINER
/// Base types for unordered_map.
- template<bool _Cache>
- using __umap_traits = __detail::_Hashtable_traits<_Cache, false, true>;
+ template<bool _Cache, typename _Hash>
+ using __umap_traits
+ = __detail::_Hashtable_traits<_Cache, false, true,
+ __small_size_threshold_default<_Cache, _Hash>::value>;
template<typename _Key,
typename _Tp,
typename _Hash = hash<_Key>,
typename _Pred = std::equal_to<_Key>,
typename _Alloc = std::allocator<std::pair<const _Key, _Tp> >,
- typename _Tr = __umap_traits<__cache_default<_Key, _Hash>::value>>
+ typename _Tr = __umap_traits<__cache_default<_Key, _Hash>::value,
+ _Hash>>
using __umap_hashtable = _Hashtable<_Key, std::pair<const _Key, _Tp>,
_Alloc, __detail::_Select1st,
_Pred, _Hash,
__detail::_Prime_rehash_policy, _Tr>;
/// Base types for unordered_multimap.
- template<bool _Cache>
- using __ummap_traits = __detail::_Hashtable_traits<_Cache, false, false>;
+ template<bool _Cache, typename _Hash>
+ using __ummap_traits
+ = __detail::_Hashtable_traits<_Cache, false, false,
+ __small_size_threshold_default<_Cache, _Hash>::value>;
template<typename _Key,
typename _Tp,
typename _Hash = hash<_Key>,
typename _Pred = std::equal_to<_Key>,
typename _Alloc = std::allocator<std::pair<const _Key, _Tp> >,
- typename _Tr = __ummap_traits<__cache_default<_Key, _Hash>::value>>
+ typename _Tr = __ummap_traits<__cache_default<_Key, _Hash>::value,
+ _Hash>>
using __ummap_hashtable = _Hashtable<_Key, std::pair<const _Key, _Tp>,
_Alloc, __detail::_Select1st,
_Pred, _Hash,
diff --git a/libstdc++-v3/include/bits/unordered_set.h b/libstdc++-v3/include/bits/unordered_set.h
index 4319495f18b..9bfa8639faf 100644
--- a/libstdc++-v3/include/bits/unordered_set.h
+++ b/libstdc++-v3/include/bits/unordered_set.h
@@ -36,27 +36,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_GLIBCXX_BEGIN_NAMESPACE_CONTAINER
/// Base types for unordered_set.
- template<bool _Cache>
- using __uset_traits = __detail::_Hashtable_traits<_Cache, true, true>;
+ template<bool _Cache, typename _Hash>
+ using __uset_traits
+ = __detail::_Hashtable_traits<_Cache, true, true,
+ __small_size_threshold_default<_Cache, _Hash>::value>;
template<typename _Value,
typename _Hash = hash<_Value>,
typename _Pred = std::equal_to<_Value>,
typename _Alloc = std::allocator<_Value>,
- typename _Tr = __uset_traits<__cache_default<_Value, _Hash>::value>>
+ typename _Tr = __uset_traits<__cache_default<_Value, _Hash>::value,
+ _Hash>>
using __uset_hashtable = _Hashtable<_Value, _Value, _Alloc,
__detail::_Identity, _Pred, _Hash,
__detail::_Prime_rehash_policy, _Tr>;
/// Base types for unordered_multiset.
- template<bool _Cache>
- using __umset_traits = __detail::_Hashtable_traits<_Cache, true, false>;
+ template<bool _Cache, typename _Hash>
+ using __umset_traits
+ = __detail::_Hashtable_traits<_Cache, true, false,
+ __small_size_threshold_default<_Cache, _Hash>::value>;
template<typename _Value,
typename _Hash = hash<_Value>,
typename _Pred = std::equal_to<_Value>,
typename _Alloc = std::allocator<_Value>,
- typename _Tr = __umset_traits<__cache_default<_Value, _Hash>::value>>
+ typename _Tr = __umset_traits<__cache_default<_Value, _Hash>::value,
+ _Hash>>
using __umset_hashtable = _Hashtable<_Value, _Value, _Alloc,
__detail::_Identity,
_Pred, _Hash,
diff --git a/libstdc++-v3/src/c++11/hashtable_c++0x.cc b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
index de437d00b56..dcf8a81fc5e 100644
--- a/libstdc++-v3/src/c++11/hashtable_c++0x.cc
+++ b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
@@ -30,6 +30,7 @@
#include <tuple>
#include <ext/aligned_buffer.h>
#include <ext/alloc_traits.h>
+#include <bits/functional_hash.h>
#include <bits/hashtable_policy.h>
namespace std _GLIBCXX_VISIBILITY(default)