jasonmolenda created this revision. jasonmolenda added reviewers: clayborg, JDevlieghere, aprantl. jasonmolenda added a project: LLDB. Herald added a subscriber: mgrang.
In addition to the traditional nlist records in Mach-O binaries, the dynamic linker dyld has a trie structure that it uses to record symbols with external linkage. It uses this structure when resolving symbols at runtime; the nlist records can be completely stripped from a binary without detriment. lldb has read the special class of reexport symbols out of the dyld trie until now. This patch has four parts - 1. Remove checks for an empty string table / nlist table as meaning there is no symbol table. 2. Changes ParseTrieEntries to recognize the externally-visible symbols and add them to a second array of TrieEntries. 3. After populating the symbol table from the nlist records, looks for any matching symbol addresses that we read from the trie, marks them as already seen so we don't add duplicated symbols in the table. 4. Adds the trie entries that hadn't already been seen, and marks any function starts with those addresses as already-added. There is a test case that has a variety of text and data symbols with different linkage visibility and a test case that checks that we don't have duplicate symbol table entries, and that we can still find the externally visible symbols after stripping the binary. The patch at this point is pretty straightforward. It's easy to make mistakes in ObjectFileMachO::ParseSymtab, and in the process of writing this I think I made all of them. But I'm open to any feedback about how things might be done more clearly. Adrian, I wasn't sure how well I'm conforming to best practices on the testsuite Makefile, where I'm compiling my main.cpp has a dylib, then making a stripped copy. This works, but if you have a chance to look at it and provide feedback, I would appreciate it. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D76758 Files: lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp lldb/test/API/macosx/dyld-trie-symbols/Makefile lldb/test/API/macosx/dyld-trie-symbols/TestDyldTrieSymbols.py lldb/test/API/macosx/dyld-trie-symbols/main.cpp
Index: lldb/test/API/macosx/dyld-trie-symbols/main.cpp =================================================================== --- /dev/null +++ lldb/test/API/macosx/dyld-trie-symbols/main.cpp @@ -0,0 +1,29 @@ +int patval; // external symbol, will not be completely stripped +int pat(int in) { // external symbol, will not be completely stripped + if (patval == 0) + patval = in; + return patval; +} + +static int fooval; // static symbol, stripped +int foo() { // external symbol, will not be completely stripped + if (fooval == 0) + fooval = 5; + return fooval; +} + +int bazval = 10; // external symbol, will not be completely stripped +int baz () { // external symbol, will not be completely stripped + return foo() + bazval; +} + +static int barval = 15; // static symbol, stripped +static int bar () { // static symbol, stripped; __lldb_unnamed_symbol from func starts + return baz() + barval; +} + +int calculate () // external symbol, will not be completely stripped +{ + return bar(); +} + Index: lldb/test/API/macosx/dyld-trie-symbols/TestDyldTrieSymbols.py =================================================================== --- /dev/null +++ lldb/test/API/macosx/dyld-trie-symbols/TestDyldTrieSymbols.py @@ -0,0 +1,87 @@ +""" +Test that we read the exported symbols from the dyld trie +""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class DyldTrieSymbolsTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + NO_DEBUG_INFO_TESTCASE = True + + @skipIfRemote + @skipUnlessDarwin + + def test_dyld_trie_symbols(self): + """Test that we make create symbol table entries from the dyld trie data structure.""" + self.build() + unstripped_exe = self.getBuildArtifact("a.out") + stripped_exe = self.getBuildArtifact("a.out-stripped") + + unstripped_target = self.dbg.CreateTarget(unstripped_exe) + self.assertTrue(unstripped_target.IsValid(), "Got a vaid stripped target.") + + # Verify that the expected symbols are present in an unstripped + # binary, and that we didn't duplicate the entries in the symbol + # table. + unstripped_bazval_symbols = unstripped_target.FindSymbols("bazval") + self.assertEqual(unstripped_bazval_symbols.GetSize(), 1) + unstripped_patval_symbols = unstripped_target.FindSymbols("patval") + self.assertEqual(unstripped_patval_symbols.GetSize(), 1) + unstripped_Z3foo_symbols = unstripped_target.FindSymbols("_Z3foov") + self.assertEqual(unstripped_Z3foo_symbols.GetSize(), 1) + unstripped_foo_symbols = unstripped_target.FindSymbols("foo") + self.assertEqual(unstripped_foo_symbols.GetSize(), 1) + + # make sure we can look up the mangled name, demangled base name, + # demangled name with argument. + unstripped_Z3pat_symbols = unstripped_target.FindSymbols("_Z3pati") + self.assertEqual(unstripped_Z3pat_symbols.GetSize(), 1) + unstripped_pat_symbols = unstripped_target.FindSymbols("pat") + self.assertEqual(unstripped_pat_symbols.GetSize(), 1) + unstripped_patint_symbols = unstripped_target.FindSymbols("pat(int)") + self.assertEqual(unstripped_patint_symbols.GetSize(), 1) + + unstripped_bar_symbols = unstripped_target.FindSymbols("bar") + self.assertEqual(unstripped_bar_symbols.GetSize(), 1) + + + + # Verify that we can retrieve all the symbols with external + # linkage after the binary has been stripped; they should not + # exist in the nlist records at this point and can only be + # retrieved from the dyld trie structure. + + stripped_target = self.dbg.CreateTarget(stripped_exe) + self.assertTrue(stripped_target.IsValid(), "Got a vaid stripped target.") + + # Check that we're able to still retrieve all the symbols after + # the binary has been stripped. Check that one we know will be + # removed is absent. + stripped_bazval_symbols = stripped_target.FindSymbols("bazval") + self.assertEqual(stripped_bazval_symbols.GetSize(), 1) + stripped_patval_symbols = stripped_target.FindSymbols("patval") + self.assertEqual(stripped_patval_symbols.GetSize(), 1) + stripped_Z3foo_symbols = stripped_target.FindSymbols("_Z3foov") + self.assertEqual(stripped_Z3foo_symbols.GetSize(), 1) + stripped_foo_symbols = stripped_target.FindSymbols("foo") + self.assertEqual(stripped_foo_symbols.GetSize(), 1) + + # make sure we can look up the mangled name, demangled base name, + # demangled name with argument. + stripped_Z3pat_symbols = stripped_target.FindSymbols("_Z3pati") + self.assertEqual(stripped_Z3pat_symbols.GetSize(), 1) + stripped_pat_symbols = stripped_target.FindSymbols("pat") + self.assertEqual(stripped_pat_symbols.GetSize(), 1) + stripped_patint_symbols = stripped_target.FindSymbols("pat(int)") + self.assertEqual(stripped_patint_symbols.GetSize(), 1) + + # bar should have been strippped. We should not find it, or the + # stripping went wrong. + stripped_bar_symbols = stripped_target.FindSymbols("bar") + self.assertEqual(stripped_bar_symbols.GetSize(), 0) + Index: lldb/test/API/macosx/dyld-trie-symbols/Makefile =================================================================== --- /dev/null +++ lldb/test/API/macosx/dyld-trie-symbols/Makefile @@ -0,0 +1,17 @@ +CXX_SOURCES := main.cpp +EXE := a.out +MAKE_DSYM := NO + +include Makefile.rules + +all: a.out a.out-stripped + +a.out: main.o + $(CC) $(CFLAGS_NO_DEBUG) -dynamiclib -o a.out main.o + +a.out-stripped: + cp a.out a.out-stripped + strip a.out-stripped + +main.o: main.cpp + $(CC) $(CFLAGS_NO_DEBUG) -c $< -o $@ Index: lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp =================================================================== --- lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1918,6 +1918,16 @@ else printf("\n"); } + // If this is an external symbol trie entry, set the flags + // to indicate that this symbol has already been added to the + // symbol table, and this entry should not be added. + void SymbolAlreadyPresent() { flags = LLDB_INVALID_ADDRESS; } + bool IsSymbolAlreadyPresent() { + if (flags == LLDB_INVALID_ADDRESS) + return true; + else + return false; + } ConstString name; uint64_t address = LLDB_INVALID_ADDRESS; uint64_t flags = 0; @@ -1946,10 +1956,13 @@ const bool is_arm, std::vector<llvm::StringRef> &nameSlices, std::set<lldb::addr_t> &resolver_addresses, - std::vector<TrieEntryWithOffset> &output) { + std::vector<TrieEntryWithOffset> &reexports, + std::vector<TrieEntryWithOffset> &ext_symbols) { if (!data.ValidOffset(offset)) return true; + // Terminal node -- end of a branch, possibly add this to + // the symbol table or resoler table. const uint64_t terminalSize = data.GetULEB128(&offset); lldb::offset_t children_offset = offset + terminalSize; if (terminalSize != 0) { @@ -1971,9 +1984,18 @@ } else e.entry.other = 0; } - // Only add symbols that are reexport symbols with a valid import name + bool add_this_entry = false; if (EXPORT_SYMBOL_FLAGS_REEXPORT & e.entry.flags && import_name && import_name[0]) { + // add symbols that are reexport symbols with a valid import name + add_this_entry = true; + } else if (e.entry.flags == 0 && + (import_name == nullptr || import_name[0] == '\0')) { + // add externally visible symbols, in case the nlist record has + // been stripped/omitted. + add_this_entry = true; + } + if (add_this_entry) { std::string name; if (!nameSlices.empty()) { for (auto name_slice : nameSlices) @@ -1987,7 +2009,11 @@ // Skip the leading '_' e.entry.import_name.SetCString(import_name + 1); } - output.push_back(e); + if (EXPORT_SYMBOL_FLAGS_REEXPORT & e.entry.flags) { + reexports.push_back(e); + } else { + ext_symbols.push_back(e); + } } } @@ -2001,7 +2027,7 @@ lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset); if (childNodeOffset) { if (!ParseTrieEntries(data, childNodeOffset, is_arm, nameSlices, - resolver_addresses, output)) { + resolver_addresses, reexports, ext_symbols)) { return false; } } @@ -2091,29 +2117,6 @@ if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) == nullptr) // fill in symoff, nsyms, stroff, strsize fields return 0; - if (symtab_load_command.symoff == 0) { - if (log) - module_sp->LogMessage(log, "LC_SYMTAB.symoff == 0"); - return 0; - } - - if (symtab_load_command.stroff == 0) { - if (log) - module_sp->LogMessage(log, "LC_SYMTAB.stroff == 0"); - return 0; - } - - if (symtab_load_command.nsyms == 0) { - if (log) - module_sp->LogMessage(log, "LC_SYMTAB.nsyms == 0"); - return 0; - } - - if (symtab_load_command.strsize == 0) { - if (log) - module_sp->LogMessage(log, "LC_SYMTAB.strsize == 0"); - return 0; - } break; case LC_DYLD_INFO: @@ -2354,27 +2357,7 @@ } } - if (nlist_data.GetByteSize() == 0 && - memory_module_load_level == eMemoryModuleLoadLevelComplete) { - if (log) - module_sp->LogMessage(log, "failed to read nlist data"); - return 0; - } - const bool have_strtab_data = strtab_data.GetByteSize() > 0; - if (!have_strtab_data) { - if (process) { - if (strtab_addr == LLDB_INVALID_ADDRESS) { - if (log) - module_sp->LogMessage(log, "failed to locate the strtab in memory"); - return 0; - } - } else { - if (log) - module_sp->LogMessage(log, "failed to read strtab data"); - return 0; - } - } ConstString g_segment_name_TEXT = GetSegmentNameTEXT(); ConstString g_segment_name_DATA = GetSegmentNameDATA(); @@ -2508,13 +2491,14 @@ std::string memory_symbol_name; uint32_t unmapped_local_symbols_found = 0; - std::vector<TrieEntryWithOffset> trie_entries; + std::vector<TrieEntryWithOffset> reexport_trie_entries; + std::vector<TrieEntryWithOffset> external_sym_trie_entries; std::set<lldb::addr_t> resolver_addresses; if (dyld_trie_data.GetByteSize() > 0) { std::vector<llvm::StringRef> nameSlices; ParseTrieEntries(dyld_trie_data, 0, is_arm, nameSlices, resolver_addresses, - trie_entries); + reexport_trie_entries, external_sym_trie_entries); ConstString text_segment_name("__TEXT"); SectionSP text_segment_sp = @@ -2523,7 +2507,9 @@ const lldb::addr_t text_segment_file_addr = text_segment_sp->GetFileAddress(); if (text_segment_file_addr != LLDB_INVALID_ADDRESS) { - for (auto &e : trie_entries) + for (auto &e : reexport_trie_entries) + e.entry.address += text_segment_file_addr; + for (auto &e : external_sym_trie_entries) e.entry.address += text_segment_file_addr; } } @@ -4501,8 +4487,112 @@ } } + // Look through the current symbol table, if any symbols match + // an entry in the external_sym_trie_entries vector, mark it + // as already-present. + if (external_sym_trie_entries.size() > 0) { + auto sort_by_address = [](const TrieEntryWithOffset &a, + const TrieEntryWithOffset &b) -> bool { + return b.entry.address < a.entry.address; + }; + std::sort(external_sym_trie_entries.begin(), + external_sym_trie_entries.end(), sort_by_address); + for (uint32_t i = 0; i < sym_idx; i++) { + TrieEntryWithOffset ent(0); + addr_t symbol_lookup_file_addr = sym[i].GetAddress().GetFileAddress(); + ent.entry.address = symbol_lookup_file_addr; + const auto it = std::lower_bound(external_sym_trie_entries.begin(), + external_sym_trie_entries.end(), ent, + sort_by_address); + if (it != external_sym_trie_entries.end() && + it->entry.address == symbol_lookup_file_addr) { + it->entry.SymbolAlreadyPresent(); + } + if (is_arm && sym[i].GetType() == eSymbolTypeCode && + (sym[i].GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)) { + ent.entry.address = symbol_lookup_file_addr + 1; + const auto it = std::lower_bound(external_sym_trie_entries.begin(), + external_sym_trie_entries.end(), ent, + sort_by_address); + if (it != external_sym_trie_entries.end() && + it->entry.address == symbol_lookup_file_addr) { + it->entry.SymbolAlreadyPresent(); + } + } + } + } + + // Count how many trie symbols we'll add to the symbol table + int trie_symbol_table_augment_count = 0; + for (auto &e : external_sym_trie_entries) { + // Skip entries that don't have the info we need to add a symbol + if (e.entry.IsSymbolAlreadyPresent() || e.entry.name.IsEmpty()) + continue; + trie_symbol_table_augment_count++; + } + + if (num_syms < sym_idx + trie_symbol_table_augment_count) { + num_syms = sym_idx + trie_symbol_table_augment_count; + sym = symtab->Resize(num_syms); + } uint32_t synthetic_sym_id = symtab_load_command.nsyms; + // Add symbols from the trie to the symbol table + for (auto &e : external_sym_trie_entries) { + if (e.entry.IsSymbolAlreadyPresent() || e.entry.name.IsEmpty()) + continue; + + bool symbol_is_thumb = false; + if (is_arm && e.entry.address & 1) { + symbol_is_thumb = true; + e.entry.address &= THUMB_ADDRESS_BIT_MASK; + } + + // Mark any matching function_starts entry as already seen. + if (function_starts_count > 0) { + FunctionStarts::Entry *func_start_entry = + function_starts.FindEntry(e.entry.address, !is_arm); + if (func_start_entry) { + if (symbol_is_thumb == false && + func_start_entry->addr == e.entry.address) { + func_start_entry->data = true; + } + if (symbol_is_thumb && func_start_entry->addr == e.entry.address + 1) { + func_start_entry->data = true; + } + } + } + + Address symbol_addr; + if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) { + SectionSP symbol_section(symbol_addr.GetSection()); + if (symbol_section) { + sym[sym_idx].SetID(synthetic_sym_id++); + sym[sym_idx].GetMangled().SetMangledName(e.entry.name); + switch (symbol_section->GetType()) { + case eSectionTypeCode: + sym[sym_idx].SetType(eSymbolTypeCode); + break; + case eSectionTypeOther: + case eSectionTypeData: + case eSectionTypeZeroFill: + sym[sym_idx].SetType(eSymbolTypeData); + break; + default: + break; + } + sym[sym_idx].SetIsSynthetic(false); + sym[sym_idx].SetExternal(true); + sym[sym_idx].GetAddressRef() = symbol_addr; + uint32_t symbol_flags = 0; + if (symbol_is_thumb) + symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB; + sym[sym_idx].SetFlags(symbol_flags); + ++sym_idx; + } + } + } + if (function_starts_count > 0) { uint32_t num_synthetic_function_symbols = 0; for (i = 0; i < function_starts_count; ++i) { @@ -4669,8 +4759,8 @@ } } - if (!trie_entries.empty()) { - for (const auto &e : trie_entries) { + if (!reexport_trie_entries.empty()) { + for (const auto &e : reexport_trie_entries) { if (e.entry.import_name) { // Only add indirect symbols from the Trie entries if we didn't have // a N_INDR nlist entry for this already
_______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits