scott.smith created this revision. Utilize atomics to update linked lists without requiring locks. Reduces the number of calls to compute the hash by not having to compute it once to determine the bucket and then again inside a separate hashtable implementation. Use xxhash for better performance, especially with longer strings. Eliminates calls to hash when just updating the value by being able to use atomics instead of locks.
Repository: rL LLVM https://reviews.llvm.org/D32832 Files: source/Utility/ConstString.cpp
Index: source/Utility/ConstString.cpp =================================================================== --- source/Utility/ConstString.cpp +++ source/Utility/ConstString.cpp @@ -18,6 +18,7 @@ #include "llvm/Support/FormatProviders.h" // for format_provider #include "llvm/Support/RWMutex.h" #include "llvm/Support/Threading.h" +#include "llvm/Support/xxhash.h" #include <algorithm> // for min #include <array> @@ -31,47 +32,58 @@ class Pool { public: - typedef const char *StringPoolValueType; - typedef llvm::StringMap<StringPoolValueType, llvm::BumpPtrAllocator> - StringPool; - typedef llvm::StringMapEntry<StringPoolValueType> StringPoolEntryType; + struct Entry { + uint64_t hash; + size_t key_len; + std::atomic<const char *> value; + std::atomic<Entry *> next; + // string follows + void setValue(const char * val) { + value.store(val, std::memory_order_release); + } + const char * cstr() const { + return reinterpret_cast<const char *>(this + 1); + } + llvm::StringRef str() const { + return llvm::StringRef(cstr(), key_len); + } + }; + + Pool() { + for (auto & a : m_string_pools) + a.store(nullptr, std::memory_order_release); + } + + typedef std::atomic<Entry *> StringPool; + typedef Entry StringPoolEntryType; static StringPoolEntryType & GetStringMapEntryFromKeyData(const char *keyData) { - return StringPoolEntryType::GetStringMapEntryFromKeyData(keyData); + StringPoolEntryType * p = reinterpret_cast<StringPoolEntryType *>(const_cast<char *>(keyData)); + return *(p - 1); } static size_t GetConstCStringLength(const char *ccstr) { if (ccstr != nullptr) { // Since the entry is read only, and we derive the entry entirely from the // pointer, we don't need the lock. const StringPoolEntryType &entry = GetStringMapEntryFromKeyData(ccstr); - return entry.getKey().size(); + return entry.key_len; } return 0; } - StringPoolValueType GetMangledCounterpart(const char *ccstr) const { + const char * GetMangledCounterpart(const char *ccstr) const { if (ccstr != nullptr) { - const uint8_t h = hash(llvm::StringRef(ccstr)); - llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex); - return GetStringMapEntryFromKeyData(ccstr).getValue(); + return GetStringMapEntryFromKeyData(ccstr).value; } return nullptr; } bool SetMangledCounterparts(const char *key_ccstr, const char *value_ccstr) { if (key_ccstr != nullptr && value_ccstr != nullptr) { - { - const uint8_t h = hash(llvm::StringRef(key_ccstr)); - llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex); - GetStringMapEntryFromKeyData(key_ccstr).setValue(value_ccstr); - } - { - const uint8_t h = hash(llvm::StringRef(value_ccstr)); - llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex); - GetStringMapEntryFromKeyData(value_ccstr).setValue(key_ccstr); - } + GetStringMapEntryFromKeyData(key_ccstr).setValue(value_ccstr); + GetStringMapEntryFromKeyData(value_ccstr).setValue(key_ccstr); return true; } return false; @@ -91,53 +103,50 @@ const char *GetConstCStringWithStringRef(const llvm::StringRef &string_ref) { if (string_ref.data()) { - const uint8_t h = hash(string_ref); - - { - llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex); - auto it = m_string_pools[h].m_string_map.find(string_ref); - if (it != m_string_pools[h].m_string_map.end()) - return it->getKeyData(); + const uint64_t h = hash(string_ref); + while (true) { + auto * b = &bucket(h); + Entry * bref; + while ((bref = b->load())) { + if (bref->hash > h) + break; + if (bref->hash == h) { + // Hash collision, do a secondary sort on string + llvm::StringRef bstringref = bref->str(); + if (string_ref == bstringref) + return bref->cstr(); + if (bstringref > string_ref) + break; + } + b = &bref->next; + } + Entry * new_entry = reinterpret_cast<Entry *>( + malloc(sizeof(Entry) + string_ref.size() + 1)); + new_entry->hash = h; + new_entry->key_len = string_ref.size(); + new_entry->value.store(nullptr, std::memory_order_relaxed); + new_entry->next.store(bref, std::memory_order_relaxed); + char * dest = const_cast<char *>(new_entry->cstr()); + memcpy(dest, string_ref.data(), string_ref.size()); + dest[string_ref.size()] = 0; + if (b->compare_exchange_strong(bref, new_entry)) { + return dest; + } + free(new_entry); } - - llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex); - StringPoolEntryType &entry = - *m_string_pools[h] - .m_string_map.insert(std::make_pair(string_ref, nullptr)) - .first; - return entry.getKeyData(); } return nullptr; } const char * GetConstCStringAndSetMangledCounterPart(const char *demangled_cstr, const char *mangled_ccstr) { if (demangled_cstr != nullptr) { - const char *demangled_ccstr = nullptr; - - { - llvm::StringRef string_ref(demangled_cstr); - const uint8_t h = hash(string_ref); - llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex); + const char *demangled_ccstr = GetConstCStringWithStringRef(demangled_cstr); - // Make string pool entry with the mangled counterpart already set - StringPoolEntryType &entry = - *m_string_pools[h] - .m_string_map.insert(std::make_pair(string_ref, mangled_ccstr)) - .first; - - // Extract the const version of the demangled_cstr - demangled_ccstr = entry.getKeyData(); - } - - { - // Now assign the demangled const string as the counterpart of the - // mangled const string... - const uint8_t h = hash(llvm::StringRef(mangled_ccstr)); - llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex); - GetStringMapEntryFromKeyData(mangled_ccstr).setValue(demangled_ccstr); - } + // Now assign the demangled const string as the counterpart of the + // mangled const string... + GetStringMapEntryFromKeyData(mangled_ccstr).setValue(demangled_ccstr); // Return the constant demangled C string return demangled_ccstr; @@ -161,26 +170,27 @@ //------------------------------------------------------------------ size_t MemorySize() const { size_t mem_size = sizeof(Pool); + /* for (const auto &pool : m_string_pools) { llvm::sys::SmartScopedReader<false> rlock(pool.m_mutex); for (const auto &entry : pool.m_string_map) mem_size += sizeof(StringPoolEntryType) + entry.getKey().size(); } + */ return mem_size; } protected: - uint8_t hash(const llvm::StringRef &s) const { - uint32_t h = llvm::HashString(s); - return ((h >> 24) ^ (h >> 16) ^ (h >> 8) ^ h) & 0xff; + static const unsigned hash_bits = 20; + uint64_t hash(const llvm::StringRef &s) const { + return llvm::xxHash64(s); } - struct PoolEntry { - mutable llvm::sys::SmartRWMutex<false> m_mutex; - StringPool m_string_map; - }; + std::atomic<Entry *> & bucket(uint64_t hash) { + return m_string_pools[hash >> (64 - hash_bits)]; + } - std::array<PoolEntry, 256> m_string_pools; + std::atomic<Entry *> m_string_pools[1 << hash_bits]; }; //----------------------------------------------------------------------
_______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits