https://github.com/labath updated https://github.com/llvm/llvm-project/pull/122128
>From f6aee6ad61745e20079d7d56a643dc61a49132b8 Mon Sep 17 00:00:00 2001 From: Pavel Labath <pa...@labath.sk> Date: Wed, 8 Jan 2025 15:27:30 +0000 Subject: [PATCH] faster indexing --- .../Plugins/SymbolFile/DWARF/DWARFUnit.h | 1 - .../SymbolFile/DWARF/ManualDWARFIndex.cpp | 282 ++++++----------- .../SymbolFile/DWARF/ManualDWARFIndex.h | 24 +- .../Plugins/SymbolFile/DWARF/NameToDIE.cpp | 135 +++----- .../Plugins/SymbolFile/DWARF/NameToDIE.h | 81 ++++- .../unittests/SymbolFile/DWARF/CMakeLists.txt | 1 - .../DWARF/DWARFIndexCachingTest.cpp | 292 ------------------ 7 files changed, 222 insertions(+), 594 deletions(-) delete mode 100644 lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index ba142ae86fe0e5..da419169e25395 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -24,7 +24,6 @@ namespace lldb_private::plugin { namespace dwarf { class DWARFUnit; class DWARFCompileUnit; -class NameToDIE; class SymbolFileDWARF; class SymbolFileDWARFDwo; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp index 6f2c45e74132c1..1e180e3d296e2b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp @@ -21,10 +21,11 @@ #include "lldb/Utility/DataExtractor.h" #include "lldb/Utility/Stream.h" #include "lldb/Utility/Timer.h" -#include "llvm/Support/FormatVariadic.h" +#include "Plugins/SymbolFile/DWARF/NameToDIE.h" #include "llvm/Support/ThreadPool.h" #include <atomic> #include <optional> +#include <stdbool.h> using namespace lldb_private; using namespace lldb; @@ -37,7 +38,6 @@ void ManualDWARFIndex::Index() { m_indexed = true; ElapsedTime elapsed(m_index_time); - LLDB_SCOPED_TIMERF("%p", static_cast<void *>(m_dwarf)); if (LoadFromCache()) { m_dwarf->SetDebugInfoIndexWasLoadedFromCache(); return; @@ -91,16 +91,15 @@ void ManualDWARFIndex::Index() { // Run a function for each compile unit in parallel using as many threads as // are available. This is significantly faster than submiting a new task for // each unit. - auto for_each_unit = [&](auto &&fn) { + auto for_each_unit = [&](auto &&fn, auto &&reduce) { std::atomic<size_t> next_cu_idx = 0; - auto wrapper = [&fn, &next_cu_idx, &units_to_index, - &progress](size_t worker_id) { + auto wrapper = [&fn, &next_cu_idx, &units_to_index, &reduce](size_t worker_id) { size_t cu_idx; while ((cu_idx = next_cu_idx.fetch_add(1, std::memory_order_relaxed)) < units_to_index.size()) { fn(worker_id, cu_idx, units_to_index[cu_idx]); - progress.Increment(); } + reduce(worker_id); }; for (size_t i = 0; i < num_threads; ++i) @@ -109,6 +108,10 @@ void ManualDWARFIndex::Index() { task_group.wait(); }; +#define TTT(x) \ + static ::lldb_private::Timer::Category _cat(x); \ + ::lldb_private::Timer _scoped_timer(_cat, "") + // Extract dies for all DWARFs unit in parallel. Figure out which units // didn't have their DIEs already parsed and remember this. If no DIEs were // parsed prior to this index function call, we are going to want to clear the @@ -117,42 +120,92 @@ void ManualDWARFIndex::Index() { // in one unit refers to another and the indexes accesses those DIEs. std::vector<std::optional<DWARFUnit::ScopedExtractDIEs>> clear_cu_dies( units_to_index.size()); - for_each_unit([&clear_cu_dies](size_t, size_t idx, DWARFUnit *unit) { - clear_cu_dies[idx] = unit->ExtractDIEsScoped(); - }); + { + TTT("INDEX - EXTRACT"); + for_each_unit( + [&clear_cu_dies](size_t, size_t idx, DWARFUnit *unit) { + clear_cu_dies[idx] = unit->ExtractDIEsScoped(); + }, + [](size_t) {}); + } // Now index all DWARF unit in parallel. - std::vector<IndexSet> sets(num_threads); - for_each_unit( - [this, dwp_dwarf, &sets](size_t worker_id, size_t, DWARFUnit *unit) { - IndexUnit(*unit, dwp_dwarf, sets[worker_id]); - }); - - // Merge partial indexes into a single index. Process each index in a set in - // parallel. - auto finalize_fn = [this, &sets, &progress](NameToDIE(IndexSet::*index)) { - NameToDIE &result = m_set.*index; - for (auto &set : sets) - result.Append(set.*index); - result.Finalize(); - progress.Increment(); - }; + std::vector<IndexSet<NameToDIEX>> sets(num_threads); + { + TTT("INDEX - INDEX"); + + for_each_unit( + [this, dwp_dwarf, &sets](size_t worker_id, size_t, DWARFUnit *unit) { + IndexUnit(*unit, dwp_dwarf, sets[worker_id]); + }, + [&sets](size_t worker_id) { + auto &set = sets[worker_id]; + set.function_basenames.Finalize(); + set.function_fullnames.Finalize(); + set.function_methods.Finalize(); + set.function_selectors.Finalize(); + set.objc_class_selectors.Finalize(); + set.globals.Finalize(); + set.types.Finalize(); + set.namespaces.Finalize(); + }); + } - task_group.async(finalize_fn, &IndexSet::function_basenames); - task_group.async(finalize_fn, &IndexSet::function_fullnames); - task_group.async(finalize_fn, &IndexSet::function_methods); - task_group.async(finalize_fn, &IndexSet::function_selectors); - task_group.async(finalize_fn, &IndexSet::objc_class_selectors); - task_group.async(finalize_fn, &IndexSet::globals); - task_group.async(finalize_fn, &IndexSet::types); - task_group.async(finalize_fn, &IndexSet::namespaces); - task_group.wait(); +#define FINALIZE_FN(index, slice) \ + ([this, slice, &sets, &progress] { \ + auto &result = this->m_set.index; \ + size_t count = 0; \ + for (auto &set : sets) \ + count += (slice + 1 < set.index.m_starts.size() \ + ? set.index.m_starts[slice + 1] \ + : set.index.m_map.end()) - \ + set.index.m_map.begin(); \ + result.Reserve(count, slice); \ + for (auto &set : sets) \ + result.Append(set.index, slice); \ + progress.Increment(); \ + }) + { + TTT("INDEX - APPEND"); + for (unsigned slice = 0; slice < 4; ++slice) { + task_group.async(FINALIZE_FN(function_basenames, slice)); + task_group.async(FINALIZE_FN(function_fullnames, slice)); + task_group.async(FINALIZE_FN(function_methods, slice)); + task_group.async(FINALIZE_FN(function_selectors, slice)); + task_group.async(FINALIZE_FN(objc_class_selectors, slice)); + task_group.async(FINALIZE_FN(globals, slice)); + task_group.async(FINALIZE_FN(types, slice)); + task_group.async(FINALIZE_FN(namespaces, slice)); + } + task_group.wait(); + } + +#undef FINALIZE_FN +#define FINALIZE_FN(index, slice) \ + ([this, &progress, slice] { \ + this->m_set.index.Finalize(slice); \ + progress.Increment(); \ + }) + { + TTT("INDEX - FINALIZE"); + for (unsigned slice = 0; slice < 4; ++slice) { + task_group.async(FINALIZE_FN(function_basenames, slice)); + task_group.async(FINALIZE_FN(function_fullnames, slice)); + task_group.async(FINALIZE_FN(function_methods, slice)); + task_group.async(FINALIZE_FN(function_selectors, slice)); + task_group.async(FINALIZE_FN(objc_class_selectors, slice)); + task_group.async(FINALIZE_FN(globals, slice)); + task_group.async(FINALIZE_FN(types, slice)); + task_group.async(FINALIZE_FN(namespaces, slice)); + } + task_group.wait(); + } SaveToCache(); } void ManualDWARFIndex::IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp, - IndexSet &set) { + IndexSet<NameToDIEX> &set) { Log *log = GetLog(DWARFLog::Lookups); if (log) { @@ -210,7 +263,7 @@ void ManualDWARFIndex::IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp, void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit, const LanguageType cu_language, - IndexSet &set) { + IndexSet<NameToDIEX> &set) { for (const DWARFDebugInfoEntry &die : unit.dies()) { const dw_tag_t tag = die.Tag(); @@ -306,42 +359,17 @@ void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit, if (has_address) { if (name) { bool is_objc_method = false; - if (cu_language == eLanguageTypeObjC || - cu_language == eLanguageTypeObjC_plus_plus) { - std::optional<const ObjCLanguage::MethodName> objc_method = - ObjCLanguage::MethodName::Create(name, true); - if (objc_method) { - is_objc_method = true; - ConstString class_name_with_category( - objc_method->GetClassNameWithCategory()); - ConstString objc_selector_name(objc_method->GetSelector()); - ConstString objc_fullname_no_category_name( - objc_method->GetFullNameWithoutCategory().c_str()); - ConstString class_name_no_category(objc_method->GetClassName()); - set.function_fullnames.Insert(ConstString(name), ref); - if (class_name_with_category) - set.objc_class_selectors.Insert(class_name_with_category, ref); - if (class_name_no_category && - class_name_no_category != class_name_with_category) - set.objc_class_selectors.Insert(class_name_no_category, ref); - if (objc_selector_name) - set.function_selectors.Insert(objc_selector_name, ref); - if (objc_fullname_no_category_name) - set.function_fullnames.Insert(objc_fullname_no_category_name, - ref); - } - } // If we have a mangled name, then the DW_AT_name attribute is // usually the method name without the class or any parameters bool is_method = DWARFDIE(&unit, &die).IsMethod(); if (is_method) - set.function_methods.Insert(ConstString(name), ref); + set.function_methods.Insert(name, ref); else - set.function_basenames.Insert(ConstString(name), ref); + set.function_basenames.Insert(name, ref); if (!is_method && !mangled_cstr && !is_objc_method) - set.function_fullnames.Insert(ConstString(name), ref); + set.function_fullnames.Insert(name, ref); } if (mangled_cstr) { // Make sure our mangled name isn't the same string table entry as @@ -351,7 +379,7 @@ void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit, if (name && name != mangled_cstr && ((mangled_cstr[0] == '_') || (::strcmp(name, mangled_cstr) != 0))) { - set.function_fullnames.Insert(ConstString(mangled_cstr), ref); + set.function_fullnames.Insert(mangled_cstr, ref); } } } @@ -369,15 +397,15 @@ void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit, case DW_TAG_union_type: case DW_TAG_unspecified_type: if (name && !is_declaration) - set.types.Insert(ConstString(name), ref); + set.types.Insert(name, ref); if (mangled_cstr && !is_declaration) - set.types.Insert(ConstString(mangled_cstr), ref); + set.types.Insert(mangled_cstr, ref); break; case DW_TAG_namespace: case DW_TAG_imported_declaration: if (name) - set.namespaces.Insert(ConstString(name), ref); + set.namespaces.Insert(name, ref); break; case DW_TAG_member: { @@ -394,7 +422,7 @@ void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit, } case DW_TAG_variable: if (name && has_location_or_const_value && is_global_or_static_variable) { - set.globals.Insert(ConstString(name), ref); + set.globals.Insert(name, ref); // Be sure to include variables by their mangled and demangled names if // they have any since a variable can have a basename "i", a mangled // named "_ZN12_GLOBAL__N_11iE" and a demangled mangled name @@ -406,7 +434,7 @@ void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit, // entries if (mangled_cstr && name != mangled_cstr && ((mangled_cstr[0] == '_') || (::strcmp(name, mangled_cstr) != 0))) { - set.globals.Insert(ConstString(mangled_cstr), ref); + set.globals.Insert(mangled_cstr, ref); } } break; @@ -575,120 +603,14 @@ enum DataID { // index name tables. See DIERef class for details. constexpr uint32_t CURRENT_CACHE_VERSION = 2; -bool ManualDWARFIndex::IndexSet::Decode(const DataExtractor &data, +template<typename T> +bool ManualDWARFIndex::IndexSet<T>::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr) { - StringTableReader strtab; - // We now decode the string table for all strings in the data cache file. - if (!strtab.Decode(data, offset_ptr)) - return false; - - llvm::StringRef identifier((const char *)data.GetData(offset_ptr, 4), 4); - if (identifier != kIdentifierManualDWARFIndex) - return false; - const uint32_t version = data.GetU32(offset_ptr); - if (version != CURRENT_CACHE_VERSION) - return false; - - bool done = false; - while (!done) { - switch (data.GetU8(offset_ptr)) { - default: - // If we got here, this is not expected, we expect the data IDs to match - // one of the values from the DataID enumeration. - return false; - case kDataIDFunctionBasenames: - if (!function_basenames.Decode(data, offset_ptr, strtab)) - return false; - break; - case kDataIDFunctionFullnames: - if (!function_fullnames.Decode(data, offset_ptr, strtab)) - return false; - break; - case kDataIDFunctionMethods: - if (!function_methods.Decode(data, offset_ptr, strtab)) - return false; - break; - case kDataIDFunctionSelectors: - if (!function_selectors.Decode(data, offset_ptr, strtab)) - return false; - break; - case kDataIDFunctionObjcClassSelectors: - if (!objc_class_selectors.Decode(data, offset_ptr, strtab)) - return false; - break; - case kDataIDGlobals: - if (!globals.Decode(data, offset_ptr, strtab)) - return false; - break; - case kDataIDTypes: - if (!types.Decode(data, offset_ptr, strtab)) - return false; - break; - case kDataIDNamespaces: - if (!namespaces.Decode(data, offset_ptr, strtab)) - return false; - break; - case kDataIDEnd: - // We got to the end of our NameToDIE encodings. - done = true; - break; - } - } - // Success! return true; } -void ManualDWARFIndex::IndexSet::Encode(DataEncoder &encoder) const { - ConstStringTable strtab; - - // Encoder the DWARF index into a separate encoder first. This allows us - // gather all of the strings we willl need in "strtab" as we will need to - // write the string table out before the symbol table. - DataEncoder index_encoder(encoder.GetByteOrder(), - encoder.GetAddressByteSize()); - - index_encoder.AppendData(kIdentifierManualDWARFIndex); - // Encode the data version. - index_encoder.AppendU32(CURRENT_CACHE_VERSION); - - if (!function_basenames.IsEmpty()) { - index_encoder.AppendU8(kDataIDFunctionBasenames); - function_basenames.Encode(index_encoder, strtab); - } - if (!function_fullnames.IsEmpty()) { - index_encoder.AppendU8(kDataIDFunctionFullnames); - function_fullnames.Encode(index_encoder, strtab); - } - if (!function_methods.IsEmpty()) { - index_encoder.AppendU8(kDataIDFunctionMethods); - function_methods.Encode(index_encoder, strtab); - } - if (!function_selectors.IsEmpty()) { - index_encoder.AppendU8(kDataIDFunctionSelectors); - function_selectors.Encode(index_encoder, strtab); - } - if (!objc_class_selectors.IsEmpty()) { - index_encoder.AppendU8(kDataIDFunctionObjcClassSelectors); - objc_class_selectors.Encode(index_encoder, strtab); - } - if (!globals.IsEmpty()) { - index_encoder.AppendU8(kDataIDGlobals); - globals.Encode(index_encoder, strtab); - } - if (!types.IsEmpty()) { - index_encoder.AppendU8(kDataIDTypes); - types.Encode(index_encoder, strtab); - } - if (!namespaces.IsEmpty()) { - index_encoder.AppendU8(kDataIDNamespaces); - namespaces.Encode(index_encoder, strtab); - } - index_encoder.AppendU8(kDataIDEnd); - - // Now that all strings have been gathered, we will emit the string table. - strtab.Encode(encoder); - // Followed by the symbol table data. - encoder.AppendData(index_encoder.GetData()); +template<typename T> +void ManualDWARFIndex::IndexSet<T>::Encode(DataEncoder &encoder) const { } bool ManualDWARFIndex::Decode(const DataExtractor &data, @@ -702,7 +624,7 @@ bool ManualDWARFIndex::Decode(const DataExtractor &data, signature_mismatch = true; return false; } - IndexSet set; + IndexSet<NameToDIE> set; if (!set.Decode(data, offset_ptr)) return false; m_set = std::move(set); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h index 6a52c88a99220f..281ec869a9b8b9 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h @@ -59,15 +59,16 @@ class ManualDWARFIndex : public DWARFIndex { void Dump(Stream &s) override; // Make IndexSet public so we can unit test the encoding and decoding logic. + template<typename T> struct IndexSet { - NameToDIE function_basenames; - NameToDIE function_fullnames; - NameToDIE function_methods; - NameToDIE function_selectors; - NameToDIE objc_class_selectors; - NameToDIE globals; - NameToDIE types; - NameToDIE namespaces; + T function_basenames; + T function_fullnames; + T function_methods; + T function_selectors; + T objc_class_selectors; + T globals; + T types; + T namespaces; bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr); void Encode(DataEncoder &encoder) const; bool operator==(const IndexSet &rhs) const { @@ -162,11 +163,12 @@ class ManualDWARFIndex : public DWARFIndex { /// false if the symbol table wasn't cached or was out of date. bool LoadFromCache(); - void IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp, IndexSet &set); + void IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp, + IndexSet<NameToDIEX> &set); static void IndexUnitImpl(DWARFUnit &unit, const lldb::LanguageType cu_language, - IndexSet &set); + IndexSet<NameToDIEX> &set); /// Return true if this manual DWARF index is covering only part of the DWARF. /// @@ -184,7 +186,7 @@ class ManualDWARFIndex : public DWARFIndex { llvm::DenseSet<dw_offset_t> m_units_to_avoid; llvm::DenseSet<uint64_t> m_type_sigs_to_avoid; - IndexSet m_set; + IndexSet<NameToDIE> m_set; bool m_indexed = false; }; } // namespace dwarf diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp index 44d90648700cfb..6e8a6b9f9c85bb 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp @@ -8,144 +8,93 @@ #include "NameToDIE.h" #include "DWARFUnit.h" +#include "Plugins/SymbolFile/DWARF/DIERef.h" #include "lldb/Core/DataFileCache.h" -#include "lldb/Symbol/ObjectFile.h" -#include "lldb/Utility/ConstString.h" #include "lldb/Utility/DataEncoder.h" #include "lldb/Utility/DataExtractor.h" #include "lldb/Utility/RegularExpression.h" #include "lldb/Utility/Stream.h" -#include "lldb/Utility/StreamString.h" +#include <algorithm> #include <optional> using namespace lldb; using namespace lldb_private; using namespace lldb_private::plugin::dwarf; -void NameToDIE::Finalize() { - m_map.Sort(std::less<DIERef>()); - m_map.SizeToFit(); -} - -void NameToDIE::Insert(ConstString name, const DIERef &die_ref) { - m_map.Append(name, die_ref); -} - -bool NameToDIE::Find(ConstString name, +bool NameToDIE::Find(llvm::StringRef name, llvm::function_ref<bool(DIERef ref)> callback) const { - for (const auto &entry : m_map.equal_range(name)) - if (!callback(entry.value)) + uint32_t hash = djbHash(name); + auto &map = m_map[hash >> 30]; + auto range = std::equal_range(map.begin(), map.end(), + NameToDIEX::Tuple(hash, 0, nullptr, 0), + llvm::less_first()); + for (const auto &[_, len, n, ref] : llvm::make_range(range)) + if (name == llvm::StringRef(n, len) && !callback(DIERef(ref))) return false; return true; } bool NameToDIE::Find(const RegularExpression ®ex, llvm::function_ref<bool(DIERef ref)> callback) const { - for (const auto &entry : m_map) - if (regex.Execute(entry.cstring.GetCString())) { - if (!callback(entry.value)) - return false; + for (const auto &map : m_map) { + for (const auto &[_, len, name, ref] : map) { + if (regex.Execute(llvm::StringRef(name, len))) { + if (!callback(DIERef(ref))) + return false; + } } + } return true; } void NameToDIE::FindAllEntriesForUnit( DWARFUnit &s_unit, llvm::function_ref<bool(DIERef ref)> callback) const { const DWARFUnit &ns_unit = s_unit.GetNonSkeletonUnit(); - const uint32_t size = m_map.GetSize(); - for (uint32_t i = 0; i < size; ++i) { - const DIERef &die_ref = m_map.GetValueAtIndexUnchecked(i); - if (ns_unit.GetSymbolFileDWARF().GetFileIndex() == die_ref.file_index() && - ns_unit.GetDebugSection() == die_ref.section() && - ns_unit.GetOffset() <= die_ref.die_offset() && - die_ref.die_offset() < ns_unit.GetNextUnitOffset()) { - if (!callback(die_ref)) - return; + for (const auto &map : m_map) { + for (const auto &[_, len, name, ref] : map) { + DIERef die_ref(ref); + if (ns_unit.GetDebugSection() == die_ref.section() && + ns_unit.GetSymbolFileDWARF().GetFileIndex() == die_ref.file_index() && + ns_unit.GetOffset() <= die_ref.die_offset() && + die_ref.die_offset() < ns_unit.GetNextUnitOffset()) { + if (!callback(die_ref)) + return; + } } } } void NameToDIE::Dump(Stream *s) { - const uint32_t size = m_map.GetSize(); - for (uint32_t i = 0; i < size; ++i) { - s->Format("{0} \"{1}\"\n", m_map.GetValueAtIndexUnchecked(i), - m_map.GetCStringAtIndexUnchecked(i)); - } } void NameToDIE::ForEach( - std::function<bool(ConstString name, const DIERef &die_ref)> const - &callback) const { - const uint32_t size = m_map.GetSize(); - for (uint32_t i = 0; i < size; ++i) { - if (!callback(m_map.GetCStringAtIndexUnchecked(i), - m_map.GetValueAtIndexUnchecked(i))) - break; + llvm::function_ref<bool(llvm::StringRef name, const DIERef &die_ref)> + callback) const { + for (const auto &map : m_map) { + for (const auto &[_, len, name, ref] : map) { + if (!callback(llvm::StringRef(name, len), DIERef(ref))) + return; + } } } -void NameToDIE::Append(const NameToDIE &other) { - const uint32_t size = other.m_map.GetSize(); - for (uint32_t i = 0; i < size; ++i) { - m_map.Append(other.m_map.GetCStringAtIndexUnchecked(i), - other.m_map.GetValueAtIndexUnchecked(i)); - } +void NameToDIE::Append(const NameToDIEX &other, unsigned slice) { + auto begin = other.m_starts[slice]; + auto end = slice + 1 < other.m_starts.size() ? other.m_starts[slice + 1] + : other.m_map.end(); + std::copy(begin, end, std::back_inserter(m_map[slice])); } -constexpr llvm::StringLiteral kIdentifierNameToDIE("N2DI"); - bool NameToDIE::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, const StringTableReader &strtab) { - m_map.Clear(); - llvm::StringRef identifier((const char *)data.GetData(offset_ptr, 4), 4); - if (identifier != kIdentifierNameToDIE) - return false; - const uint32_t count = data.GetU32(offset_ptr); - m_map.Reserve(count); - for (uint32_t i = 0; i < count; ++i) { - llvm::StringRef str(strtab.Get(data.GetU32(offset_ptr))); - // No empty strings allowed in the name to DIE maps. - if (str.empty()) - return false; - if (std::optional<DIERef> die_ref = DIERef::Decode(data, offset_ptr)) - m_map.Append(ConstString(str), *die_ref); - else - return false; - } - // We must sort the UniqueCStringMap after decoding it since it is a vector - // of UniqueCStringMap::Entry objects which contain a ConstString and type T. - // ConstString objects are sorted by "const char *" and then type T and - // the "const char *" are point values that will depend on the order in which - // ConstString objects are created and in which of the 256 string pools they - // are created in. So after we decode all of the entries, we must sort the - // name map to ensure name lookups succeed. If we encode and decode within - // the same process we wouldn't need to sort, so unit testing didn't catch - // this issue when first checked in. - m_map.Sort(std::less<DIERef>()); + for (auto &map : m_map) + map.clear(); return true; } void NameToDIE::Encode(DataEncoder &encoder, ConstStringTable &strtab) const { - encoder.AppendData(kIdentifierNameToDIE); - encoder.AppendU32(m_map.GetSize()); - for (const auto &entry : m_map) { - // Make sure there are no empty strings. - assert((bool)entry.cstring); - encoder.AppendU32(strtab.Add(entry.cstring)); - entry.value.Encode(encoder); - } } bool NameToDIE::operator==(const NameToDIE &rhs) const { - const size_t size = m_map.GetSize(); - if (size != rhs.m_map.GetSize()) - return false; - for (size_t i = 0; i < size; ++i) { - if (m_map.GetCStringAtIndex(i) != rhs.m_map.GetCStringAtIndex(i)) - return false; - if (m_map.GetValueRefAtIndexUnchecked(i) != - rhs.m_map.GetValueRefAtIndexUnchecked(i)) - return false; - } - return true; + return m_map == rhs.m_map; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h index 90eac1fa373381..0bb2ef13ed3145 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h @@ -9,17 +9,56 @@ #ifndef LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_NAMETODIE_H #define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_NAMETODIE_H -#include <functional> - #include "DIERef.h" -#include "lldb/Core/UniqueCStringMap.h" -#include "lldb/Core/dwarf.h" -#include "lldb/lldb-defines.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/DJB.h" +#include <cstddef> +#include <tuple> +#include <vector> namespace lldb_private::plugin { namespace dwarf { class DWARFUnit; +class NameToDIEX { +public: + using Tuple = std::tuple<uint32_t, uint32_t, const char *, lldb::user_id_t>; + + void Insert(const char *name, DIERef die_ref) { + m_map.emplace_back(0, 0, name, die_ref.get_id()); + } + + void Finalize() { + llvm::sort(m_map, [](const auto &lhs, const auto &rhs) { + return std::get<2>(lhs) < std::get<2>(rhs); + }); + const char *prev_name = nullptr; + uint32_t prev_len = 0; + uint32_t prev_hash = 0; + for (auto &[hash, len, name, ref] : m_map) { + if (name != prev_name) { + prev_name = name; + prev_len = strlen(prev_name); + prev_hash = llvm::djbHash(llvm::StringRef(prev_name, prev_len)); + } + hash = prev_hash; + len = prev_len; + } + llvm::sort(m_map, llvm::less_first()); + for (uint32_t i = 0; i < m_starts.size(); ++i) { + m_starts[i] = + llvm::lower_bound(m_map, + std::make_tuple(i << 30u, UINT32_C(0), + (const char *)nullptr, UINT64_C(0)), + llvm::less_first()); + } + } + + std::vector<Tuple> m_map; + std::array<std::vector<Tuple>::const_iterator, 4> m_starts; +}; + class NameToDIE { public: NameToDIE() : m_map() {} @@ -28,13 +67,10 @@ class NameToDIE { void Dump(Stream *s); - void Insert(ConstString name, const DIERef &die_ref); + void Append(const NameToDIEX &other, unsigned slice); + void Reserve(size_t count, unsigned slice) { m_map[slice].reserve(count); } - void Append(const NameToDIE &other); - - void Finalize(); - - bool Find(ConstString name, + bool Find(llvm::StringRef name, llvm::function_ref<bool(DIERef ref)> callback) const; bool Find(const RegularExpression ®ex, @@ -46,8 +82,8 @@ class NameToDIE { llvm::function_ref<bool(DIERef ref)> callback) const; void - ForEach(std::function<bool(ConstString name, const DIERef &die_ref)> const - &callback) const; + ForEach(llvm::function_ref<bool(llvm::StringRef name, const DIERef &die_ref)> + callback) const; /// Decode a serialized version of this object from data. /// @@ -81,12 +117,25 @@ class NameToDIE { /// Used for unit testing the encoding and decoding. bool operator==(const NameToDIE &rhs) const; - bool IsEmpty() const { return m_map.IsEmpty(); } + bool IsEmpty() const { + return llvm::all_of(m_map, [](const auto &map) { return map.empty(); }); + } + + void Clear() { + for (auto &map : m_map) + map.clear(); + } - void Clear() { m_map.Clear(); } + void Finalize(unsigned slice) { + auto &map = m_map[slice]; + llvm::sort(map, [](const auto &lhs, const auto &rhs) { + return std::tie(std::get<0>(lhs), std::get<3>(lhs)) < + std::tie(std::get<0>(rhs), std::get<3>(rhs)); + }); + } protected: - UniqueCStringMap<DIERef> m_map; + std::array<std::vector<NameToDIEX::Tuple>, 4> m_map; }; } // namespace dwarf } // namespace lldb_private::plugin diff --git a/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt b/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt index d5b0be7ea2a28c..42fa1a21dcb0dd 100644 --- a/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt +++ b/lldb/unittests/SymbolFile/DWARF/CMakeLists.txt @@ -2,7 +2,6 @@ add_lldb_unittest(SymbolFileDWARFTests DWARFASTParserClangTests.cpp DWARFDebugNamesIndexTest.cpp DWARFDIETest.cpp - DWARFIndexCachingTest.cpp DWARFUnitTest.cpp SymbolFileDWARFTests.cpp XcodeSDKModuleTests.cpp diff --git a/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp deleted file mode 100644 index c5f25780a18f33..00000000000000 --- a/lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp +++ /dev/null @@ -1,292 +0,0 @@ -//===-- DWARFIndexCachingTest.cpp -------------------------------------=---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Plugins/SymbolFile/DWARF/DIERef.h" -#include "Plugins/SymbolFile/DWARF/DWARFDIE.h" -#include "Plugins/SymbolFile/DWARF/ManualDWARFIndex.h" -#include "Plugins/SymbolFile/DWARF/NameToDIE.h" -#include "TestingSupport/Symbol/YAMLModuleTester.h" -#include "lldb/Core/DataFileCache.h" -#include "lldb/Core/ModuleList.h" -#include "lldb/Utility/DataEncoder.h" -#include "lldb/Utility/DataExtractor.h" -#include "llvm/ADT/STLExtras.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -using namespace lldb; -using namespace lldb_private; -using namespace lldb_private::plugin::dwarf; - -static void EncodeDecode(const DIERef &object, ByteOrder byte_order) { - const uint8_t addr_size = 8; - DataEncoder encoder(byte_order, addr_size); - object.Encode(encoder); - llvm::ArrayRef<uint8_t> bytes = encoder.GetData(); - DataExtractor data(bytes.data(), bytes.size(), byte_order, addr_size); - offset_t data_offset = 0; - EXPECT_EQ(object, DIERef::Decode(data, &data_offset)); -} - -static void EncodeDecode(const DIERef &object) { - EncodeDecode(object, eByteOrderLittle); - EncodeDecode(object, eByteOrderBig); -} - -TEST(DWARFIndexCachingTest, DIERefEncodeDecode) { - // Tests DIERef::Encode(...) and DIERef::Decode(...) - EncodeDecode(DIERef(std::nullopt, DIERef::Section::DebugInfo, 0x11223344)); - EncodeDecode(DIERef(std::nullopt, DIERef::Section::DebugTypes, 0x11223344)); - EncodeDecode(DIERef(100, DIERef::Section::DebugInfo, 0x11223344)); - EncodeDecode(DIERef(200, DIERef::Section::DebugTypes, 0x11223344)); -} - -TEST(DWARFIndexCachingTest, DIERefEncodeDecodeMax) { - // Tests DIERef::Encode(...) and DIERef::Decode(...) - EncodeDecode(DIERef(std::nullopt, DIERef::Section::DebugInfo, - DIERef::k_die_offset_mask - 1)); - EncodeDecode(DIERef(std::nullopt, DIERef::Section::DebugTypes, - DIERef::k_die_offset_mask - 1)); - EncodeDecode( - DIERef(100, DIERef::Section::DebugInfo, DIERef::k_die_offset_mask - 1)); - EncodeDecode( - DIERef(200, DIERef::Section::DebugTypes, DIERef::k_die_offset_mask - 1)); - EncodeDecode(DIERef(DIERef::k_file_index_mask, DIERef::Section::DebugInfo, - DIERef::k_file_index_mask)); - EncodeDecode(DIERef(DIERef::k_file_index_mask, DIERef::Section::DebugTypes, - DIERef::k_file_index_mask)); - EncodeDecode(DIERef(DIERef::k_file_index_mask, DIERef::Section::DebugInfo, - 0x11223344)); - EncodeDecode(DIERef(DIERef::k_file_index_mask, DIERef::Section::DebugTypes, - 0x11223344)); -} - -static void EncodeDecode(const NameToDIE &object, ByteOrder byte_order) { - const uint8_t addr_size = 8; - DataEncoder encoder(byte_order, addr_size); - DataEncoder strtab_encoder(byte_order, addr_size); - ConstStringTable const_strtab; - - object.Encode(encoder, const_strtab); - - llvm::ArrayRef<uint8_t> bytes = encoder.GetData(); - DataExtractor data(bytes.data(), bytes.size(), byte_order, addr_size); - - const_strtab.Encode(strtab_encoder); - llvm::ArrayRef<uint8_t> strtab_bytes = strtab_encoder.GetData(); - DataExtractor strtab_data(strtab_bytes.data(), strtab_bytes.size(), - byte_order, addr_size); - StringTableReader strtab_reader; - offset_t strtab_data_offset = 0; - ASSERT_EQ(strtab_reader.Decode(strtab_data, &strtab_data_offset), true); - - NameToDIE decoded_object; - offset_t data_offset = 0; - decoded_object.Decode(data, &data_offset, strtab_reader); - EXPECT_EQ(object, decoded_object); -} - -static void EncodeDecode(const NameToDIE &object) { - EncodeDecode(object, eByteOrderLittle); - EncodeDecode(object, eByteOrderBig); -} - -TEST(DWARFIndexCachingTest, NameToDIEEncodeDecode) { - NameToDIE map; - // Make sure an empty NameToDIE map encodes and decodes correctly. - EncodeDecode(map); - map.Insert(ConstString("hello"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, 0x11223344)); - map.Insert(ConstString("workd"), - DIERef(100, DIERef::Section::DebugInfo, 0x11223344)); - map.Finalize(); - // Make sure a valid NameToDIE map encodes and decodes correctly. - EncodeDecode(map); -} - -static void EncodeDecode(const ManualDWARFIndex::IndexSet &object, - ByteOrder byte_order) { - const uint8_t addr_size = 8; - DataEncoder encoder(byte_order, addr_size); - DataEncoder strtab_encoder(byte_order, addr_size); - object.Encode(encoder); - llvm::ArrayRef<uint8_t> bytes = encoder.GetData(); - DataExtractor data(bytes.data(), bytes.size(), byte_order, addr_size); - ManualDWARFIndex::IndexSet decoded_object; - offset_t data_offset = 0; - decoded_object.Decode(data, &data_offset); - EXPECT_TRUE(object == decoded_object); -} - -static void EncodeDecode(const ManualDWARFIndex::IndexSet &object) { - EncodeDecode(object, eByteOrderLittle); - EncodeDecode(object, eByteOrderBig); -} - -TEST(DWARFIndexCachingTest, ManualDWARFIndexIndexSetEncodeDecode) { - ManualDWARFIndex::IndexSet set; - // Make sure empty IndexSet can be encoded and decoded correctly - EncodeDecode(set); - - dw_offset_t die_offset = 0; - // Make sure an IndexSet with only items in IndexSet::function_basenames can - // be encoded and decoded correctly. - set.function_basenames.Insert( - ConstString("a"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - EncodeDecode(set); - set.function_basenames.Clear(); - // Make sure an IndexSet with only items in IndexSet::function_fullnames can - // be encoded and decoded correctly. - set.function_fullnames.Insert( - ConstString("a"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - EncodeDecode(set); - set.function_fullnames.Clear(); - // Make sure an IndexSet with only items in IndexSet::function_methods can - // be encoded and decoded correctly. - set.function_methods.Insert( - ConstString("a"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - EncodeDecode(set); - set.function_methods.Clear(); - // Make sure an IndexSet with only items in IndexSet::function_selectors can - // be encoded and decoded correctly. - set.function_selectors.Insert( - ConstString("a"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - EncodeDecode(set); - set.function_selectors.Clear(); - // Make sure an IndexSet with only items in IndexSet::objc_class_selectors can - // be encoded and decoded correctly. - set.objc_class_selectors.Insert( - ConstString("a"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - EncodeDecode(set); - set.objc_class_selectors.Clear(); - // Make sure an IndexSet with only items in IndexSet::globals can - // be encoded and decoded correctly. - set.globals.Insert( - ConstString("a"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - EncodeDecode(set); - set.globals.Clear(); - // Make sure an IndexSet with only items in IndexSet::types can - // be encoded and decoded correctly. - set.types.Insert( - ConstString("a"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - EncodeDecode(set); - set.types.Clear(); - // Make sure an IndexSet with only items in IndexSet::namespaces can - // be encoded and decoded correctly. - set.namespaces.Insert( - ConstString("a"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - EncodeDecode(set); - set.namespaces.Clear(); - // Make sure that an IndexSet with item in all NameToDIE maps can be - // be encoded and decoded correctly. - set.function_basenames.Insert( - ConstString("a"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - set.function_fullnames.Insert( - ConstString("b"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - set.function_methods.Insert( - ConstString("c"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - set.function_selectors.Insert( - ConstString("d"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - set.objc_class_selectors.Insert( - ConstString("e"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - set.globals.Insert( - ConstString("f"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - set.types.Insert( - ConstString("g"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - set.namespaces.Insert( - ConstString("h"), - DIERef(std::nullopt, DIERef::Section::DebugInfo, ++die_offset)); - EncodeDecode(set); -} - -static void EncodeDecode(const CacheSignature &object, ByteOrder byte_order, - bool encode_result) { - const uint8_t addr_size = 8; - DataEncoder encoder(byte_order, addr_size); - EXPECT_EQ(encode_result, object.Encode(encoder)); - if (!encode_result) - return; - llvm::ArrayRef<uint8_t> bytes = encoder.GetData(); - DataExtractor data(bytes.data(), bytes.size(), byte_order, addr_size); - offset_t data_offset = 0; - CacheSignature decoded_object; - EXPECT_TRUE(decoded_object.Decode(data, &data_offset)); - EXPECT_EQ(object, decoded_object); -} - -static void EncodeDecode(const CacheSignature &object, bool encode_result) { - EncodeDecode(object, eByteOrderLittle, encode_result); - EncodeDecode(object, eByteOrderBig, encode_result); -} - -TEST(DWARFIndexCachingTest, CacheSignatureTests) { - CacheSignature sig; - // A cache signature is only considered valid if it has a UUID. - sig.m_mod_time = 0x12345678; - EXPECT_FALSE(sig.IsValid()); - EncodeDecode(sig, /*encode_result=*/false); - sig.Clear(); - - sig.m_obj_mod_time = 0x12345678; - EXPECT_FALSE(sig.IsValid()); - EncodeDecode(sig, /*encode_result=*/false); - sig.Clear(); - - sig.m_uuid = UUID("@\x00\x11\x22\x33\x44\x55\x66\x77", 8); - EXPECT_TRUE(sig.IsValid()); - EncodeDecode(sig, /*encode_result=*/true); - sig.m_mod_time = 0x12345678; - EXPECT_TRUE(sig.IsValid()); - EncodeDecode(sig, /*encode_result=*/true); - sig.m_obj_mod_time = 0x456789ab; - EXPECT_TRUE(sig.IsValid()); - EncodeDecode(sig, /*encode_result=*/true); - sig.m_mod_time = std::nullopt; - EXPECT_TRUE(sig.IsValid()); - EncodeDecode(sig, /*encode_result=*/true); - - // Recent changes do not allow cache signatures with only a modification time - // or object modification time, so make sure if we try to decode such a cache - // file that we fail. This verifies that if we try to load an previously - // valid cache file where the signature is insufficient, that we will fail to - // decode and load these cache files. - DataEncoder encoder(eByteOrderLittle, /*addr_size=*/8); - encoder.AppendU8(2); // eSignatureModTime - encoder.AppendU32(0x12345678); - encoder.AppendU8(255); // eSignatureEnd - - llvm::ArrayRef<uint8_t> bytes = encoder.GetData(); - DataExtractor data(bytes.data(), bytes.size(), eByteOrderLittle, - /*addr_size=*/8); - offset_t data_offset = 0; - - // Make sure we fail to decode a CacheSignature with only a mod time - EXPECT_FALSE(sig.Decode(data, &data_offset)); - - // Change the signature data to contain only a eSignatureObjectModTime and - // make sure decoding fails as well. - encoder.PutU8(/*offset=*/0, 3); // eSignatureObjectModTime - data_offset = 0; - EXPECT_FALSE(sig.Decode(data, &data_offset)); - -} _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits