On 25/09/2015 15:28, Jonathan Wakely wrote: > @@ -501,6 +503,129 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION >> mutable std::size_t _M_next_resize; >> }; >> >> + /// Range hashing function considering that second args is a power >> of 2. > > Does this mean "assuming" not "considering"?
I assume yes. > >> + struct _Mask_range_hashing >> + { >> + typedef std::size_t first_argument_type; >> + typedef std::size_t second_argument_type; >> + typedef std::size_t result_type; >> + >> + result_type >> + operator()(first_argument_type __num, >> + second_argument_type __den) const noexcept >> + { return __num & (__den - 1); } >> + }; >> + >> + >> + /// Helper type to compute next power of 2. >> + template<std::size_t _N> >> + struct _NextPower2 >> + { >> + static std::size_t >> + _Get(std::size_t __n) >> + { >> + std::size_t __next = _NextPower2<(_N >> 1)>::_Get(__n); >> + return __next |= __next >> _N; >> + } >> + }; >> + >> + template<> >> + struct _NextPower2<1> >> + { >> + static std::size_t >> + _Get(std::size_t __n) >> + { return __n |= __n >> 1; } >> + }; > > This doesn't seem to return the next power of 2, it returns one less. > > _NextPower2<32>::_Get(2) returns 3, but 2 is already a power of 2. > _NextPower2<32>::_Get(3) returns 3, but the next power of 2 is 4. Yes, name is bad, that is just part of the algo you copy/paste below. I review implementation to have _NextPower2 do all the algo. > > > I don't think this needs to be a recursive template, it can simply be > a function, can't it? I wanted code to adapt to any sizeof(std::size_t) without relying on some preprocessor checks. As you pointed out additional >> 32 on 32 bits or >> 64 on 64 bits wouldn't hurt but the recursive template just make sure that we don't do useless operations. > > >> + /// Rehash policy providing power of 2 bucket numbers. Ease modulo >> + /// operations. >> + struct _Power2_rehash_policy >> + { >> + using __has_load_factor = std::true_type; >> + >> + _Power2_rehash_policy(float __z = 1.0) noexcept >> + : _M_max_load_factor(__z), _M_next_resize(0) { } >> + >> + float >> + max_load_factor() const noexcept >> + { return _M_max_load_factor; } >> + >> + // Return a bucket size no smaller than n (as long as n is not >> above the >> + // highest power of 2). > > This says "no smaller than n" but it actually seems to guarantee > "greater than n" because _NextPower2<>::_Get(n)+1 is 2n when n is a > power of two. yes but this function is calling _NextPower2<>::_Get(n - 1) + 1, there is a minus one which make this comment valid as shown by newly introduced test. > >> + std::size_t >> + _M_next_bkt(std::size_t __n) const >> + { >> + constexpr auto __max_bkt >> + = (std::size_t(1) << (sizeof(std::size_t) * 8 - 1)); >> + >> + std::size_t __res >> + = _NextPower2<((sizeof(std::size_t) * 8) >> 1)>::_Get(--__n) + 1; > > You wouldn't need to add one to the result if the template actually > returned a power of two! > >> + if (__res == 0) >> + __res = __max_bkt; >> + >> + if (__res == __max_bkt) >> + // Set next resize to the max value so that we never try to >> rehash again >> + // as we already reach the biggest possible bucket number. >> + // Note that it might result in max_load_factor not being >> respected. >> + _M_next_resize = std::size_t(0) - 1; >> + else >> + _M_next_resize >> + = __builtin_floor(__res * (long double)_M_max_load_factor); >> + >> + return __res; >> + } > > What are the requirements for this function, "no smaller than n" or > "greater than n"? 'No smaller than n' like stated in the comment. However for big n it is not possible, even in the prime number based implementation. So I played with _M_next_resize to make sure that _M_next_bkt won't be called again as soon as the max bucket number has been reach. > > If "no smaller than n" is correct then the algorithm you want is > "round up to nearest power of 2", which you can find here (I wrote > this earlier this year for some reason I can't remember now): > > https://gitlab.com/redistd/redistd/blob/master/include/redi/bits.h > > The non-recursive version is only a valid constexpr function in C++14, > but since you don't need a constexpr function you could just that, > extended to handle 64-bit: > > std::size_t > clp2(std::size_t n) > { > std::uint_least64_t x = n; > // Algorithm from Hacker's Delight, Figure 3-3. > x = x - 1; > x = x | (x >> 1); > x = x | (x >> 2); > x = x | (x >> 4); > x = x | (x >> 8); > x = x | (x >>16); > x = x | (x >>32); > return x + 1; > } > > We could avoid the last shift when sizeof(size_t) == 32, I don't know > if the optimisers will take care of that anyway. This is indeed the algo I found by myself and that I adapted to work with any sizeof(size_t). Do you prefer the new version or do you want to stick a more explicit version like the one you propose above. In this case is the last 32 bits shift enough ? No 128 bits platform yet ? François
diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h index a9ad7dd..bf8b644 100644 --- a/libstdc++-v3/include/bits/hashtable_policy.h +++ b/libstdc++-v3/include/bits/hashtable_policy.h @@ -457,6 +457,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION /// smallest prime that keeps the load factor small enough. struct _Prime_rehash_policy { + using __has_load_factor = std::true_type; + _Prime_rehash_policy(float __z = 1.0) noexcept : _M_max_load_factor(__z), _M_next_resize(0) { } @@ -501,6 +503,136 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION mutable std::size_t _M_next_resize; }; + /// Range hashing function assuming that second args is a power of 2. + struct _Mask_range_hashing + { + typedef std::size_t first_argument_type; + typedef std::size_t second_argument_type; + typedef std::size_t result_type; + + result_type + operator()(first_argument_type __num, + second_argument_type __den) const noexcept + { return __num & (__den - 1); } + }; + + + /// Helper type to compute next power of 2. + template<typename _SizeT, + std::size_t _N = sizeof(_SizeT) << 2, bool _Increment = true> + struct _NextPower2 + { + static _SizeT + _Get(_SizeT __n) + { + _SizeT __next = _NextPower2<_SizeT, (_N >> 1), false>::_Get(__n); + __next |= __next >> _N; + if (_Increment) + ++__next; + + return __next; + } + }; + + template<typename _SizeT> + struct _NextPower2<_SizeT, 1, false> + { + static _SizeT + _Get(_SizeT __n) + { + --__n; + return __n |= __n >> 1; + } + }; + + /// Rehash policy providing power of 2 bucket numbers. Ease modulo + /// operations. + struct _Power2_rehash_policy + { + using __has_load_factor = std::true_type; + + _Power2_rehash_policy(float __z = 1.0) noexcept + : _M_max_load_factor(__z), _M_next_resize(0) { } + + float + max_load_factor() const noexcept + { return _M_max_load_factor; } + + // Return a bucket size no smaller than n (as long as n is not above the + // highest power of 2). + std::size_t + _M_next_bkt(std::size_t __n) const + { + constexpr auto __max_bkt + = std::size_t(1) << ((sizeof(std::size_t) << 3) - 1); + + std::size_t __res = _NextPower2<std::size_t>::_Get(__n); + + if (__res == 0) + __res = __max_bkt; + + if (__res == __max_bkt) + // Set next resize to the max value so that we never try to rehash again + // as we already reach the biggest possible bucket number. + // Note that it might result in max_load_factor not being respected. + _M_next_resize = std::size_t(0) - 1; + else + _M_next_resize + = __builtin_floor(__res * (long double)_M_max_load_factor); + + return __res; + } + + // Return a bucket count appropriate for n elements + std::size_t + _M_bkt_for_elements(std::size_t __n) const + { return __builtin_ceil(__n / (long double)_M_max_load_factor); } + + // __n_bkt is current bucket count, __n_elt is current element count, + // and __n_ins is number of elements to be inserted. Do we need to + // increase bucket count? If so, return make_pair(true, n), where n + // is the new bucket count. If not, return make_pair(false, 0). + std::pair<bool, std::size_t> + _M_need_rehash(std::size_t __n_bkt, std::size_t __n_elt, + std::size_t __n_ins) const + { + if (__n_elt + __n_ins >= _M_next_resize) + { + long double __min_bkts = (__n_elt + __n_ins) + / (long double)_M_max_load_factor; + if (__min_bkts >= __n_bkt) + return std::make_pair(true, + _M_next_bkt(std::max<std::size_t>(__builtin_floor(__min_bkts) + 1, + __n_bkt * _S_growth_factor))); + + _M_next_resize + = __builtin_floor(__n_bkt * (long double)_M_max_load_factor); + return std::make_pair(false, 0); + } + else + return std::make_pair(false, 0); + } + + typedef std::size_t _State; + + _State + _M_state() const + { return _M_next_resize; } + + void + _M_reset() noexcept + { _M_next_resize = 0; } + + void + _M_reset(_State __state) + { _M_next_resize = __state; } + + static const std::size_t _S_growth_factor = 2; + + float _M_max_load_factor; + mutable std::size_t _M_next_resize; + }; + // Base classes for std::_Hashtable. We define these base classes // because in some cases we want to do different things depending on // the value of a policy class. In some cases the policy class @@ -775,8 +907,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typename _ExtractKey, typename _Equal, typename _H1, typename _H2, typename _Hash, typename _RehashPolicy, typename _Traits, - bool _Constant_iterators = _Traits::__constant_iterators::value, - bool _Unique_keys = _Traits::__unique_keys::value> + bool _Constant_iterators = _Traits::__constant_iterators::value> struct _Insert; /// Specialization. @@ -785,65 +916,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typename _H1, typename _H2, typename _Hash, typename _RehashPolicy, typename _Traits> struct _Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, - _RehashPolicy, _Traits, true, true> + _RehashPolicy, _Traits, true> : public _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits> { using __base_type = _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>; - using value_type = typename __base_type::value_type; - using iterator = typename __base_type::iterator; - using const_iterator = typename __base_type::const_iterator; - - using __unique_keys = typename __base_type::__unique_keys; - using __hashtable = typename __base_type::__hashtable; - using __node_gen_type = typename __base_type::__node_gen_type; - - using __base_type::insert; - std::pair<iterator, bool> - insert(value_type&& __v) - { - __hashtable& __h = this->_M_conjure_hashtable(); - __node_gen_type __node_gen(__h); - return __h._M_insert(std::move(__v), __node_gen, __unique_keys()); - } - - iterator - insert(const_iterator __hint, value_type&& __v) - { - __hashtable& __h = this->_M_conjure_hashtable(); - __node_gen_type __node_gen(__h); - return __h._M_insert(__hint, std::move(__v), __node_gen, - __unique_keys()); - } - }; + using __hashtable_base = _Hashtable_base<_Key, _Value, _ExtractKey, + _Equal, _H1, _H2, _Hash, + _Traits>; - /// Specialization. - template<typename _Key, typename _Value, typename _Alloc, - typename _ExtractKey, typename _Equal, - typename _H1, typename _H2, typename _Hash, - typename _RehashPolicy, typename _Traits> - struct _Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, - _RehashPolicy, _Traits, true, false> - : public _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, - _H1, _H2, _Hash, _RehashPolicy, _Traits> - { - using __base_type = _Insert_base<_Key, _Value, _Alloc, _ExtractKey, - _Equal, _H1, _H2, _Hash, - _RehashPolicy, _Traits>; using value_type = typename __base_type::value_type; using iterator = typename __base_type::iterator; using const_iterator = typename __base_type::const_iterator; using __unique_keys = typename __base_type::__unique_keys; + using __ireturn_type = typename __hashtable_base::__ireturn_type; using __hashtable = typename __base_type::__hashtable; using __node_gen_type = typename __base_type::__node_gen_type; using __base_type::insert; - iterator + __ireturn_type insert(value_type&& __v) { __hashtable& __h = this->_M_conjure_hashtable(); @@ -865,9 +961,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template<typename _Key, typename _Value, typename _Alloc, typename _ExtractKey, typename _Equal, typename _H1, typename _H2, typename _Hash, - typename _RehashPolicy, typename _Traits, bool _Unique_keys> + typename _RehashPolicy, typename _Traits> struct _Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, - _RehashPolicy, _Traits, false, _Unique_keys> + _RehashPolicy, _Traits, false> : public _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits> { @@ -911,28 +1007,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } }; + template<typename _Policy> + using __has_load_factor = typename _Policy::__has_load_factor; + /** * Primary class template _Rehash_base. * * Give hashtable the max_load_factor functions and reserve iff the - * rehash policy is _Prime_rehash_policy. + * rehash policy supports it. */ template<typename _Key, typename _Value, typename _Alloc, typename _ExtractKey, typename _Equal, typename _H1, typename _H2, typename _Hash, - typename _RehashPolicy, typename _Traits> + typename _RehashPolicy, typename _Traits, + typename = + __detected_or_t<std::false_type, __has_load_factor, _RehashPolicy>> struct _Rehash_base; - /// Specialization. + /// Specialization when rehash policy doesn't provide load factor management. template<typename _Key, typename _Value, typename _Alloc, typename _ExtractKey, typename _Equal, - typename _H1, typename _H2, typename _Hash, typename _Traits> + typename _H1, typename _H2, typename _Hash, + typename _RehashPolicy, typename _Traits> struct _Rehash_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, - _H1, _H2, _Hash, _Prime_rehash_policy, _Traits> + _H1, _H2, _Hash, _RehashPolicy, _Traits, + std::false_type> + { + }; + + /// Specialization when rehash policy provide load factor management. + template<typename _Key, typename _Value, typename _Alloc, + typename _ExtractKey, typename _Equal, + typename _H1, typename _H2, typename _Hash, + typename _RehashPolicy, typename _Traits> + struct _Rehash_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, + _H1, _H2, _Hash, _RehashPolicy, _Traits, + std::true_type> { using __hashtable = _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, - _Prime_rehash_policy, _Traits>; + _RehashPolicy, _Traits>; float max_load_factor() const noexcept @@ -945,7 +1059,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION max_load_factor(float __z) { __hashtable* __this = static_cast<__hashtable*>(this); - __this->__rehash_policy(_Prime_rehash_policy(__z)); + __this->__rehash_policy(_RehashPolicy(__z)); } void diff --git a/libstdc++-v3/src/c++11/hashtable_c++0x.cc b/libstdc++-v3/src/c++11/hashtable_c++0x.cc index 69f999f..dd2afe3 100644 --- a/libstdc++-v3/src/c++11/hashtable_c++0x.cc +++ b/libstdc++-v3/src/c++11/hashtable_c++0x.cc @@ -60,8 +60,16 @@ namespace __detail = sizeof(__prime_list) / sizeof(unsigned long) - 1; const unsigned long* __next_bkt = std::lower_bound(__prime_list + 5, __prime_list + __n_primes, __n); - _M_next_resize = - __builtin_ceil(*__next_bkt * (long double)_M_max_load_factor); + + if (__next_bkt == __prime_list + __n_primes) + // Set next resize to the max value so that we never try to rehash again + // as we already reach the biggest possible bucket number. + // Note that it might result in max_load_factor not being respected. + _M_next_resize = std::size_t(0) - 1; + else + _M_next_resize = + __builtin_ceil(*__next_bkt * (long double)_M_max_load_factor); + return *__next_bkt; } diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/hash_policy/power2_rehash.cc b/libstdc++-v3/testsuite/23_containers/unordered_set/hash_policy/power2_rehash.cc new file mode 100644 index 0000000..502794b --- /dev/null +++ b/libstdc++-v3/testsuite/23_containers/unordered_set/hash_policy/power2_rehash.cc @@ -0,0 +1,42 @@ +// Copyright (C) 2015 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. +// +// { dg-options "-std=gnu++11" } + +#include <unordered_set> + +#include <testsuite_hooks.h> + +void test01() +{ + bool test __attribute__((unused)) = true; + + std::__detail::_Power2_rehash_policy policy; + VERIFY( policy._M_next_bkt(1) == 1 ); + VERIFY( policy._M_next_bkt(2) == 2 ); + VERIFY( policy._M_next_bkt(3) == 4 ); + VERIFY( policy._M_next_bkt(5) == 8 ); + VERIFY( policy._M_next_bkt(33) == 64 ); + VERIFY( policy._M_next_bkt((std::size_t(1) << (sizeof(std::size_t) * 8 - 2)) + 1) + == (std::size_t(1) << (sizeof(std::size_t) * 8 - 1)) ); +} + +int main() +{ + test01(); + return 0; +} diff --git a/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc b/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc index ef956a0..a07d552 100644 --- a/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc +++ b/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc @@ -127,7 +127,27 @@ template<bool cache> using __umset = std::__umset_hashtable<Foo, HashFunction, std::equal_to<Foo>, std::allocator<Foo>, - std::__uset_traits<cache>>; + std::__umset_traits<cache>>; + +template<bool cache> + using __uset2 = + std::_Hashtable<Foo, Foo, std::allocator<Foo>, + std::__detail::_Identity, + std::equal_to<Foo>, HashFunction, + std::__detail::_Mask_range_hashing, + std::__detail::_Default_ranged_hash, + std::__detail::_Power2_rehash_policy, + std::__uset_traits<cache>>; + +template<bool cache> + using __umset2 = + std::_Hashtable<Foo, Foo, std::allocator<Foo>, + std::__detail::_Identity, + std::equal_to<Foo>, HashFunction, + std::__detail::_Mask_range_hashing, + std::__detail::_Default_ranged_hash, + std::__detail::_Power2_rehash_policy, + std::__umset_traits<cache>>; int main() { @@ -181,6 +201,19 @@ int main() stop_counters(time, resource); report_performance(__FILE__, "std benches", time, resource); + start_counters(time, resource); + bench<__uset2<false>>( + "std::unordered_set2 without hash code cached ", foos); + bench<__uset2<true>>( + "std::unordered_set2 with hash code cached ", foos); + bench<__umset2<false>>( + "std::unordered_multiset2 without hash code cached ", foos); + bench<__umset2<true>>( + "std::unordered_multiset2 with hash code cached ", foos); + + stop_counters(time, resource); + report_performance(__FILE__, "std2 benches", time, resource); + bench<std::unordered_set<Foo, HashFunction>>( "std::unordered_set default cache ", foos); bench<std::unordered_multiset<Foo, HashFunction>>( diff --git a/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc index 9846104..4b29fde 100644 --- a/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc +++ b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc @@ -177,6 +177,16 @@ template<bool cache> cache>; template<bool cache> + using __uset2 = + std::_Hashtable<int, int, std::allocator<int>, + std::__detail::_Identity, + std::equal_to<int>, std::hash<int>, + std::__detail::_Mask_range_hashing, + std::__detail::_Default_ranged_hash, + std::__detail::_Power2_rehash_policy, + std::__uset_traits<cache>>; + +template<bool cache> using __str_uset = std::__uset_hashtable<std::string, std::hash<std::string>, std::equal_to<std::string>, @@ -190,6 +200,16 @@ template<bool cache> std::allocator<std::string>, cache>; +template<bool cache> + using __str_uset2 = + std::_Hashtable<std::string, std::string, std::allocator<std::string>, + std::__detail::_Identity, + std::equal_to<std::string>, std::hash<std::string>, + std::__detail::_Mask_range_hashing, + std::__detail::_Default_ranged_hash, + std::__detail::_Power2_rehash_policy, + std::__uset_traits<cache>>; + int main() { bench<__tr1_uset<false>>( @@ -202,6 +222,10 @@ int main() "std::unordered_set<int> with hash code cached"); bench<std::unordered_set<int>>( "std::unordered_set<int> default cache"); + bench<__uset2<false>>( + "std::unordered_set2<int> without hash code cached"); + bench<__uset2<true>>( + "std::unordered_set2<int> with hash code cached"); bench_str<__tr1_str_uset<false>>( "std::tr1::unordered_set<string> without hash code cached"); bench_str<__tr1_str_uset<true>>( @@ -210,7 +234,11 @@ int main() "std::unordered_set<string> without hash code cached"); bench_str<__str_uset<true>>( "std::unordered_set<string> with hash code cached"); - bench_str<std::unordered_set<std::string>>( + bench_str<std::unordered_set<std::string>>( "std::unordered_set<string> default cache"); + bench_str<__str_uset2<false>>( + "std::unordered_set2<string> without hash code cached"); + bench_str<__str_uset2<true>>( + "std::unordered_set2<string> with hash code cached"); return 0; }