Author: Greg Clayton Date: 2021-06-02T10:31:37-07:00 New Revision: b0572abf72fd4aafbb56bc41350e41bdfd96cdde
URL: https://github.com/llvm/llvm-project/commit/b0572abf72fd4aafbb56bc41350e41bdfd96cdde DIFF: https://github.com/llvm/llvm-project/commit/b0572abf72fd4aafbb56bc41350e41bdfd96cdde.diff LOG: Improve performance when parsing symbol tables in mach-o files. Some larger projects were loading quite slowly with the current LLDB on macOS and macOS simulator builds. I did some instrument traces and found 3 main culprits: - a LLDB timer that was put into a function that was called too often - a std::set that was keeping track of the address of symbols that were already added - a unnamed function generator in ObjectFile that was going slow due to allocations In order to see this in action I ran the latest LLDB on a large application with many frameworks using the following method: (lldb) script import time; start_time = time.perf_counter() (lldb) file Large.app (lldb) script print(time.perf_counter() - start_time) I first range "sudo purge" to clear the system file caches to simulate a cold startup of the debugger, followed by two iterations with warm file caches. Prior to this fix I was seeing the following timings: 17.68 (cold) 14.56 (warm 1) 14.52 (warm 2) After this fix I was seeing: 11.32 (cold) 8.43 (warm 1) 8.49 (warm 2) Differential Revision: https://reviews.llvm.org/D103504 Added: Modified: lldb/source/Core/Module.cpp lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp lldb/source/Symbol/ObjectFile.cpp Removed: ################################################################################ diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index 117b0bfd5aae3..64193b57b095f 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -440,8 +440,6 @@ CompUnitSP Module::GetCompileUnitAtIndex(size_t index) { bool Module::ResolveFileAddress(lldb::addr_t vm_addr, Address &so_addr) { std::lock_guard<std::recursive_mutex> guard(m_mutex); - LLDB_SCOPED_TIMERF("Module::ResolveFileAddress (vm_addr = 0x%" PRIx64 ")", - vm_addr); SectionList *section_list = GetSectionList(); if (section_list) return so_addr.ResolveAddressUsingFileSections(vm_addr, section_list); diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index f346f5c01c2f4..0285e336df804 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -44,6 +44,7 @@ #include "lldb/Host/SafeMachO.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MemoryBuffer.h" @@ -1895,9 +1896,9 @@ class MachSymtabSectionInfo { filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath(); Host::SystemLog(Host::eSystemLogError, - "error: unable to find section %d for a symbol in %s, corrupt file?\n", - n_sect, - filename.c_str()); + "error: unable to find section %d for a symbol in " + "%s, corrupt file?\n", + n_sect, filename.c_str()); } } if (m_section_infos[n_sect].vm_range.Contains(file_addr)) { @@ -2183,7 +2184,16 @@ size_t ObjectFileMachO::ParseSymtab() { // We add symbols to the table in the order of most information (nlist // records) to least (function starts), and avoid duplicating symbols // via this set. - std::set<addr_t> symbols_added; + llvm::DenseSet<addr_t> symbols_added; + + // We are using a llvm::DenseSet for "symbols_added" so we must be sure we + // do not add the tombstone or empty keys to the set. + auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) { + // Don't add the tombstone or empty keys. + if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1) + return; + symbols_added.insert(file_addr); + }; FunctionStarts function_starts; lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic); uint32_t i; @@ -3640,9 +3650,9 @@ size_t ObjectFileMachO::ParseSymtab() { symbol_section); sym[GSYM_sym_idx].GetAddressRef().SetOffset( symbol_value); - symbols_added.insert(sym[GSYM_sym_idx] - .GetAddress() - .GetFileAddress()); + add_symbol_addr(sym[GSYM_sym_idx] + .GetAddress() + .GetFileAddress()); // We just need the flags from the linker // symbol, so put these flags // into the N_GSYM flags to avoid duplicate @@ -3662,7 +3672,7 @@ size_t ObjectFileMachO::ParseSymtab() { if (set_value) { sym[sym_idx].GetAddressRef().SetSection(symbol_section); sym[sym_idx].GetAddressRef().SetOffset(symbol_value); - symbols_added.insert( + add_symbol_addr( sym[sym_idx].GetAddress().GetFileAddress()); } sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); @@ -4475,7 +4485,7 @@ size_t ObjectFileMachO::ParseSymtab() { // invalid address of zero when the global is a common symbol. sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section); sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value); - symbols_added.insert( + add_symbol_addr( sym[GSYM_sym_idx].GetAddress().GetFileAddress()); // We just need the flags from the linker symbol, so put these // flags into the N_GSYM flags to avoid duplicate symbols in @@ -4494,7 +4504,8 @@ size_t ObjectFileMachO::ParseSymtab() { if (set_value) { sym[sym_idx].GetAddressRef().SetSection(symbol_section); sym[sym_idx].GetAddressRef().SetOffset(symbol_value); - symbols_added.insert(sym[sym_idx].GetAddress().GetFileAddress()); + if (symbol_section) + add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress()); } sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); if (nlist.n_desc & N_WEAK_REF) @@ -4590,7 +4601,7 @@ size_t ObjectFileMachO::ParseSymtab() { sym[sym_idx].SetIsSynthetic(true); sym[sym_idx].SetExternal(true); sym[sym_idx].GetAddressRef() = symbol_addr; - symbols_added.insert(symbol_addr.GetFileAddress()); + add_symbol_addr(symbol_addr.GetFileAddress()); if (e.entry.flags & TRIE_SYMBOL_IS_THUMB) sym[sym_idx].SetFlags(MACHO_NLIST_ARM_SYMBOL_IS_THUMB); ++sym_idx; @@ -4645,7 +4656,7 @@ size_t ObjectFileMachO::ParseSymtab() { sym[sym_idx].SetType(eSymbolTypeCode); sym[sym_idx].SetIsSynthetic(true); sym[sym_idx].GetAddressRef() = symbol_addr; - symbols_added.insert(symbol_addr.GetFileAddress()); + add_symbol_addr(symbol_addr.GetFileAddress()); if (symbol_flags) sym[sym_idx].SetFlags(symbol_flags); if (symbol_byte_size) @@ -4746,7 +4757,7 @@ size_t ObjectFileMachO::ParseSymtab() { sym[sym_idx].SetType(eSymbolTypeResolver); sym[sym_idx].SetIsSynthetic(true); sym[sym_idx].GetAddressRef() = so_addr; - symbols_added.insert(so_addr.GetFileAddress()); + add_symbol_addr(so_addr.GetFileAddress()); sym[sym_idx].SetByteSize(symbol_stub_byte_size); ++sym_idx; } diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index f5dcbc5467f72..b0fdd50b3c0f1 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -617,11 +617,13 @@ ObjectFile::GetSymbolTypeFromName(llvm::StringRef name, } ConstString ObjectFile::GetNextSyntheticSymbolName() { - StreamString ss; + llvm::SmallString<256> name; + llvm::raw_svector_ostream os(name); ConstString file_name = GetModule()->GetFileSpec().GetFilename(); - ss.Printf("___lldb_unnamed_symbol%u$$%s", ++m_synthetic_symbol_idx, - file_name.GetCString()); - return ConstString(ss.GetString()); + ++m_synthetic_symbol_idx; + os << "___lldb_unnamed_symbol" << m_synthetic_symbol_idx << "$$" + << file_name.GetStringRef(); + return ConstString(os.str()); } std::vector<ObjectFile::LoadableData> _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits