https://github.com/zhyty created https://github.com/llvm/llvm-project/pull/130912
This patch improves the time performance of LLDB launch on Linux machines, particularly for executables with a lot of shared library dependencies (or modules). The commits have some context on their specific changes as well -- hopefully this helps the review. # Testing >From fbf25b1cd4f6d527944fb85fc4d2d03498755a05 Mon Sep 17 00:00:00 2001 From: Tom Yang <toy...@fb.com> Date: Mon, 10 Mar 2025 17:58:17 -0700 Subject: [PATCH 1/3] Add option for enabling/disabling parallel load Add a setting to gate the new parallel dynamic library loading feature in the next diff. I followed the example of `lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp` and `lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp` to create a scoped setting for the new feature. NOTE: this setting defaults to FALSE. Users should be able to enable the new feature as easily as adding a line to their .lldbinit file, or manually setting it in their session. ``` (lldb) apropos "POSIX dynamic" No commands found pertaining to 'POSIX dynamic'. Try 'help' to see a complete list of debugger commands. The following settings variables may relate to 'POSIX dynamic': plugin.dynamic-loader.posix-dyld.parallel-module-load -- Enable experimental loading of modules in parallel for the POSIX dynamic loader. (lldb) settings show plugin.dynamic-loader.posix-dyld.parallel-module-load plugin.dynamic-loader.posix-dyld.parallel-module-load (boolean) = false (lldb) settings set plugin.dynamic-loader.posix-dyld.parallel-module-load true (lldb) settings show plugin.dynamic-loader.posix-dyld.parallel-module-load plugin.dynamic-loader.posix-dyld.parallel-module-load (boolean) = true ``` --- .../DynamicLoader/POSIX-DYLD/CMakeLists.txt | 12 +++++ .../POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp | 49 ++++++++++++++++++- .../POSIX-DYLD/DynamicLoaderPOSIXDYLD.h | 2 + .../DynamicLoaderPOSIXDYLDProperties.td | 8 +++ 4 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLDProperties.td diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/CMakeLists.txt b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/CMakeLists.txt index c1e00b2dd444f..532bfb20ea0f1 100644 --- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/CMakeLists.txt +++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/CMakeLists.txt @@ -1,3 +1,11 @@ +lldb_tablegen(DynamicLoaderPOSIXDYLDProperties.inc -gen-lldb-property-defs + SOURCE DynamicLoaderPOSIXDYLDProperties.td + TARGET LLDBPluginDynamicLoaderPOSIXDYLDPropertiesGen) + +lldb_tablegen(DynamicLoaderPOSIXDYLDPropertiesEnum.inc -gen-lldb-property-enum-defs + SOURCE DynamicLoaderPOSIXDYLDProperties.td + TARGET LLDBPluginDynamicLoaderPOSIXDYLDPropertiesEnumGen) + add_lldb_library(lldbPluginDynamicLoaderPosixDYLD PLUGIN DYLDRendezvous.cpp DynamicLoaderPOSIXDYLD.cpp @@ -13,3 +21,7 @@ add_lldb_library(lldbPluginDynamicLoaderPosixDYLD PLUGIN LINK_COMPONENTS Support ) + +add_dependencies(lldbPluginDynamicLoaderPosixDYLD + LLDBPluginDynamicLoaderPOSIXDYLDPropertiesGen + LLDBPluginDynamicLoaderPOSIXDYLDPropertiesEnumGen) diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp index 53ba11ac21bd3..c89d16649922d 100644 --- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp +++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp @@ -34,9 +34,56 @@ using namespace lldb_private; LLDB_PLUGIN_DEFINE_ADV(DynamicLoaderPOSIXDYLD, DynamicLoaderPosixDYLD) +namespace { + +#define LLDB_PROPERTIES_dynamicloaderposixdyld +#include "DynamicLoaderPOSIXDYLDProperties.inc" + +enum { +#define LLDB_PROPERTIES_dynamicloaderposixdyld +#include "DynamicLoaderPOSIXDYLDPropertiesEnum.inc" +}; + +class PluginProperties : public Properties { +public: + static llvm::StringRef GetSettingName() { + return DynamicLoaderPOSIXDYLD::GetPluginNameStatic(); + } + + PluginProperties() : Properties() { + m_collection_sp = std::make_shared<OptionValueProperties>(GetSettingName()); + m_collection_sp->Initialize(g_dynamicloaderposixdyld_properties); + } + + ~PluginProperties() override = default; + + bool GetParallelModuleLoad() const { + const uint32_t idx = ePropertyParallelModuleLoad; + return GetPropertyAtIndexAs<bool>(idx, true); + } +}; + +} // namespace + +static PluginProperties &GetGlobalPluginProperties() { + static PluginProperties g_settings; + return g_settings; +} + void DynamicLoaderPOSIXDYLD::Initialize() { PluginManager::RegisterPlugin(GetPluginNameStatic(), - GetPluginDescriptionStatic(), CreateInstance); + GetPluginDescriptionStatic(), CreateInstance, + &DebuggerInitialize); +} + +void DynamicLoaderPOSIXDYLD::DebuggerInitialize(Debugger &debugger) { + if (!PluginManager::GetSettingForDynamicLoaderPlugin( + debugger, PluginProperties::GetSettingName())) { + const bool is_global_setting = true; + PluginManager::CreateSettingForDynamicLoaderPlugin( + debugger, GetGlobalPluginProperties().GetValueProperties(), + "Properties for the POSIX dynamic loader plug-in.", is_global_setting); + } } void DynamicLoaderPOSIXDYLD::Terminate() {} diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.h b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.h index bde334aaca40b..5bb1e00dd7df0 100644 --- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.h +++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.h @@ -28,6 +28,8 @@ class DynamicLoaderPOSIXDYLD : public lldb_private::DynamicLoader { static void Initialize(); + static void DebuggerInitialize(lldb_private::Debugger &debugger); + static void Terminate(); static llvm::StringRef GetPluginNameStatic() { return "posix-dyld"; } diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLDProperties.td b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLDProperties.td new file mode 100644 index 0000000000000..68e3448196cec --- /dev/null +++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLDProperties.td @@ -0,0 +1,8 @@ + +include "../../../../include/lldb/Core/PropertiesBase.td" + +let Definition = "dynamicloaderposixdyld" in { + def ParallelModuleLoad: Property<"parallel-module-load", "Boolean">, + DefaultFalse, + Desc<"Enable experimental loading of modules in parallel for the POSIX dynamic loader.">; +} >From 4dbb16492f9f44201b1539a3b4775a2073983afd Mon Sep 17 00:00:00 2001 From: Tom Yang <toy...@fb.com> Date: Tue, 11 Mar 2025 10:17:23 -0700 Subject: [PATCH 2/3] parallelize POSIX dyld RefreshModules This diff parallelizes `DynamicLoaderPOSIXDYLD::RefreshModules`, which speeds up module loading on Linux. The major benefit of this is we can speed up symbol table indexing and parsing, which is the biggest bottleneck for targets which dynamically link many shared libraries. This speedup is only noticeable when **preloading** symbols. This is when `target.preload-symbols` is `true`, which is the default Meta. The symbol preload option tells the debugger to fully load all of the symbol tables when modules are loaded, as opposed to lazily loading when symbols are requested. Initially, I discovered the specific bottleneck by using the Linux `perf` tool as suggested by @jeffreytan. I saw that ~93% of samples were in `RefreshModules`, and mainly in `LoadModuleAtAddress` and `PreloadSymbols`. `LoadModuleAtAddress` appears independent and parallelize-able at first. The main issue is `DynamicLoaderPOSIXDYLD` maintains a map of loaded modules to their link addresses via `m_loaded_modules`. Modifying and reading to this map isn't thread-safe, so this diff also includes accessor methods that protect the map in the multithreaded context. Luckily, the critical section of modifying or reading from the map isn't super costly, so the contention doesn't appear to negatively impact performance. * Parallelize `DynamicLoaderPOSIXDYLD::LoadAllCurrentModules` to speed up attach scenario. I tested with some larger internal targets with up to 15000 modules, and found significant performance improvements. Typically, I was seeing 2-3X launch speed increases, where "launch" is starting the binary and reaching `main`. I manually ran `ninja check-lldb` several times, and compared with the baseline. At this point, we're not seeing any new failures or new unresolved tests. --- .../POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp | 121 +++++++++++++----- .../POSIX-DYLD/DynamicLoaderPOSIXDYLD.h | 17 ++- 2 files changed, 101 insertions(+), 37 deletions(-) diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp index c89d16649922d..c452547585de8 100644 --- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp +++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp @@ -10,6 +10,7 @@ #include "DynamicLoaderPOSIXDYLD.h" #include "lldb/Breakpoint/BreakpointLocation.h" +#include "lldb/Core/Debugger.h" #include "lldb/Core/Module.h" #include "lldb/Core/ModuleSpec.h" #include "lldb/Core/PluginManager.h" @@ -25,6 +26,7 @@ #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/ProcessInfo.h" +#include "llvm/Support/ThreadPool.h" #include <memory> #include <optional> @@ -231,16 +233,37 @@ void DynamicLoaderPOSIXDYLD::DidLaunch() { Status DynamicLoaderPOSIXDYLD::CanLoadImage() { return Status(); } +void DynamicLoaderPOSIXDYLD::SetLoadedModule(const ModuleSP &module_sp, + addr_t link_map_addr) { + std::unique_lock<std::shared_mutex> lock(m_loaded_modules_rw_mutex); + m_loaded_modules[module_sp] = link_map_addr; +} + +void DynamicLoaderPOSIXDYLD::UnloadModule(const ModuleSP &module_sp) { + std::unique_lock<std::shared_mutex> lock(m_loaded_modules_rw_mutex); + m_loaded_modules.erase(module_sp); +} + +std::optional<lldb::addr_t> DynamicLoaderPOSIXDYLD::GetLoadedModuleLinkAddr( + const ModuleSP &module_sp) { + std::shared_lock<std::shared_mutex> lock(m_loaded_modules_rw_mutex); + auto it = m_loaded_modules.find(module_sp); + if (it != m_loaded_modules.end()) + return it->second; + return std::nullopt; +} + void DynamicLoaderPOSIXDYLD::UpdateLoadedSections(ModuleSP module, addr_t link_map_addr, addr_t base_addr, bool base_addr_is_offset) { - m_loaded_modules[module] = link_map_addr; + SetLoadedModule(module, link_map_addr); + UpdateLoadedSectionsCommon(module, base_addr, base_addr_is_offset); } void DynamicLoaderPOSIXDYLD::UnloadSections(const ModuleSP module) { - m_loaded_modules.erase(module); + UnloadModule(module); UnloadSectionsCommon(module); } @@ -448,7 +471,7 @@ void DynamicLoaderPOSIXDYLD::RefreshModules() { // The rendezvous class doesn't enumerate the main module, so track that // ourselves here. ModuleSP executable = GetTargetExecutable(); - m_loaded_modules[executable] = m_rendezvous.GetLinkMapAddress(); + SetLoadedModule(executable, m_rendezvous.GetLinkMapAddress()); DYLDRendezvous::iterator I; DYLDRendezvous::iterator E; @@ -470,34 +493,66 @@ void DynamicLoaderPOSIXDYLD::RefreshModules() { E = m_rendezvous.end(); m_initial_modules_added = true; } - for (; I != E; ++I) { - // Don't load a duplicate copy of ld.so if we have already loaded it - // earlier in LoadInterpreterModule. If we instead loaded then unloaded it - // later, the section information for ld.so would be removed. That - // information is required for placing breakpoints on Arm/Thumb systems. - if ((m_interpreter_module.lock() != nullptr) && - (I->base_addr == m_interpreter_base)) - continue; - - ModuleSP module_sp = - LoadModuleAtAddress(I->file_spec, I->link_addr, I->base_addr, true); - if (!module_sp.get()) - continue; - - if (module_sp->GetObjectFile()->GetBaseAddress().GetLoadAddress( - &m_process->GetTarget()) == m_interpreter_base) { - ModuleSP interpreter_sp = m_interpreter_module.lock(); - if (m_interpreter_module.lock() == nullptr) { - m_interpreter_module = module_sp; - } else if (module_sp == interpreter_sp) { - // Module already loaded. - continue; - } - } - loaded_modules.AppendIfNeeded(module_sp); - new_modules.Append(module_sp); + std::mutex interpreter_module_mutex; + // We should be able to take SOEntry as reference since the data + // exists for the duration of this call in `m_rendezvous`. + auto load_module_fn = + [this, &loaded_modules, &new_modules, + &interpreter_module_mutex](const DYLDRendezvous::SOEntry &so_entry) { + // Don't load a duplicate copy of ld.so if we have already loaded it + // earlier in LoadInterpreterModule. If we instead loaded then + // unloaded it later, the section information for ld.so would be + // removed. That information is required for placing breakpoints on + // Arm/Thumb systems. + { + // `m_interpreter_module` may be modified by another thread at the + // same time, so we guard the access here. + std::lock_guard<std::mutex> lock(interpreter_module_mutex); + if ((m_interpreter_module.lock() != nullptr) && + (so_entry.base_addr == m_interpreter_base)) + return; + } + + ModuleSP module_sp = LoadModuleAtAddress( + so_entry.file_spec, so_entry.link_addr, so_entry.base_addr, true); + if (!module_sp.get()) + return; + + { + // `m_interpreter_module` may be modified by another thread at the + // same time, so we guard the access here. + std::lock_guard<std::mutex> lock(interpreter_module_mutex); + // Set the interpreter module, if this is the interpreter. + if (module_sp->GetObjectFile()->GetBaseAddress().GetLoadAddress( + &m_process->GetTarget()) == m_interpreter_base) { + ModuleSP interpreter_sp = m_interpreter_module.lock(); + if (m_interpreter_module.lock() == nullptr) { + m_interpreter_module = module_sp; + } else if (module_sp == interpreter_sp) { + // Module already loaded. + return; + } + } + } + + loaded_modules.AppendIfNeeded(module_sp); + new_modules.Append(module_sp); + }; + + // Loading modules in parallel tends to be faster, but is still unstable. + // Once it's stable, we can remove this setting and remove the serial + // approach. + if (GetGlobalPluginProperties().GetParallelModuleLoad()) { + llvm::ThreadPoolTaskGroup task_group(Debugger::GetThreadPool()); + for (; I != E; ++I) + task_group.async(load_module_fn, *I); + task_group.wait(); + } else { + for (; I != E; ++I) + load_module_fn(*I); } + m_process->GetTarget().ModulesDidLoad(new_modules); } @@ -683,7 +738,7 @@ void DynamicLoaderPOSIXDYLD::LoadAllCurrentModules() { // The rendezvous class doesn't enumerate the main module, so track that // ourselves here. ModuleSP executable = GetTargetExecutable(); - m_loaded_modules[executable] = m_rendezvous.GetLinkMapAddress(); + SetLoadedModule(executable, m_rendezvous.GetLinkMapAddress()); std::vector<FileSpec> module_names; for (I = m_rendezvous.begin(), E = m_rendezvous.end(); I != E; ++I) @@ -775,15 +830,15 @@ DynamicLoaderPOSIXDYLD::GetThreadLocalData(const lldb::ModuleSP module_sp, const lldb::ThreadSP thread, lldb::addr_t tls_file_addr) { Log *log = GetLog(LLDBLog::DynamicLoader); - auto it = m_loaded_modules.find(module_sp); - if (it == m_loaded_modules.end()) { + std::optional<addr_t> link_map_addr_opt = GetLoadedModuleLinkAddr(module_sp); + if (!link_map_addr_opt.has_value()) { LLDB_LOGF( log, "GetThreadLocalData error: module(%s) not found in loaded modules", module_sp->GetObjectName().AsCString()); return LLDB_INVALID_ADDRESS; } - addr_t link_map = it->second; + addr_t link_map = link_map_addr_opt.value(); if (link_map == LLDB_INVALID_ADDRESS || link_map == 0) { LLDB_LOGF(log, "GetThreadLocalData error: invalid link map address=0x%" PRIx64, diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.h b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.h index 5bb1e00dd7df0..183a157654ba2 100644 --- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.h +++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.h @@ -95,10 +95,6 @@ class DynamicLoaderPOSIXDYLD : public lldb_private::DynamicLoader { /// Contains the pointer to the interpret module, if loaded. std::weak_ptr<lldb_private::Module> m_interpreter_module; - /// Loaded module list. (link map for each module) - std::map<lldb::ModuleWP, lldb::addr_t, std::owner_less<lldb::ModuleWP>> - m_loaded_modules; - /// Returns true if the process is for a core file. bool IsCoreFile() const; @@ -182,6 +178,19 @@ class DynamicLoaderPOSIXDYLD : public lldb_private::DynamicLoader { DynamicLoaderPOSIXDYLD(const DynamicLoaderPOSIXDYLD &) = delete; const DynamicLoaderPOSIXDYLD & operator=(const DynamicLoaderPOSIXDYLD &) = delete; + + /// Loaded module list. (link map for each module) + /// This may be accessed in a multi-threaded context. Use the accessor methods + /// to access `m_loaded_modules` safely. + std::map<lldb::ModuleWP, lldb::addr_t, std::owner_less<lldb::ModuleWP>> + m_loaded_modules; + std::shared_mutex m_loaded_modules_rw_mutex; + + void SetLoadedModule(const lldb::ModuleSP &module_sp, + lldb::addr_t link_map_addr); + void UnloadModule(const lldb::ModuleSP &module_sp); + std::optional<lldb::addr_t> + GetLoadedModuleLinkAddr(const lldb::ModuleSP &module_sp); }; #endif // LLDB_SOURCE_PLUGINS_DYNAMICLOADER_POSIX_DYLD_DYNAMICLOADERPOSIXDYLD_H >From b7ddeb253a4dcc5212583650c8c6bfdc491bcaf6 Mon Sep 17 00:00:00 2001 From: Tom Yang <toy...@fb.com> Date: Tue, 11 Mar 2025 10:35:02 -0700 Subject: [PATCH 3/3] parallelize dyld for attach scenario --- .../POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp index c452547585de8..cfac8e03e73f5 100644 --- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp +++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp @@ -746,19 +746,31 @@ void DynamicLoaderPOSIXDYLD::LoadAllCurrentModules() { m_process->PrefetchModuleSpecs( module_names, m_process->GetTarget().GetArchitecture().GetTriple()); - for (I = m_rendezvous.begin(), E = m_rendezvous.end(); I != E; ++I) { - ModuleSP module_sp = - LoadModuleAtAddress(I->file_spec, I->link_addr, I->base_addr, true); + auto load_module_fn = [this, &module_list, + &log](const DYLDRendezvous::SOEntry &so_entry) { + ModuleSP module_sp = LoadModuleAtAddress( + so_entry.file_spec, so_entry.link_addr, so_entry.base_addr, true); if (module_sp.get()) { LLDB_LOG(log, "LoadAllCurrentModules loading module: {0}", - I->file_spec.GetFilename()); + so_entry.file_spec.GetFilename()); module_list.Append(module_sp); } else { Log *log = GetLog(LLDBLog::DynamicLoader); LLDB_LOGF( log, "DynamicLoaderPOSIXDYLD::%s failed loading module %s at 0x%" PRIx64, - __FUNCTION__, I->file_spec.GetPath().c_str(), I->base_addr); + __FUNCTION__, so_entry.file_spec.GetPath().c_str(), + so_entry.base_addr); + } + }; + if (GetGlobalPluginProperties().GetParallelModuleLoad()) { + llvm::ThreadPoolTaskGroup task_group(Debugger::GetThreadPool()); + for (I = m_rendezvous.begin(), E = m_rendezvous.end(); I != E; ++I) + task_group.async(load_module_fn, *I); + task_group.wait(); + } else { + for (I = m_rendezvous.begin(), E = m_rendezvous.end(); I != E; ++I) { + load_module_fn(*I); } } _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits