This revision was automatically updated to reflect the committed changes.
Closed by commit rGf0a5af906b41: Merge in symbols from Mach-O dyld trie to the 
symbol table (authored by jasonmolenda).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76758/new/

https://reviews.llvm.org/D76758

Files:
  lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
  lldb/test/API/macosx/dyld-trie-symbols/Makefile
  lldb/test/API/macosx/dyld-trie-symbols/TestDyldTrieSymbols.py
  lldb/test/API/macosx/dyld-trie-symbols/main.cpp

Index: lldb/test/API/macosx/dyld-trie-symbols/main.cpp
===================================================================
--- /dev/null
+++ lldb/test/API/macosx/dyld-trie-symbols/main.cpp
@@ -0,0 +1,29 @@
+int patval;          // external symbol, will not be completely stripped
+int pat(int in) {    // external symbol, will not be completely stripped
+  if (patval == 0)
+    patval = in;
+  return patval;
+}
+
+static int fooval;  // static symbol, stripped
+int foo() {         // external symbol, will not be completely stripped
+  if (fooval == 0)
+    fooval = 5;
+  return fooval;
+}
+
+int bazval = 10;   // external symbol, will not be completely stripped
+int baz () {       // external symbol, will not be completely stripped
+  return foo() + bazval;
+}
+
+static int barval = 15; // static symbol, stripped
+static int bar () {     // static symbol, stripped; __lldb_unnamed_symbol from func starts
+  return baz() + barval;
+}
+
+int calculate ()   // external symbol, will not be completely stripped
+{
+  return bar();
+}
+
Index: lldb/test/API/macosx/dyld-trie-symbols/TestDyldTrieSymbols.py
===================================================================
--- /dev/null
+++ lldb/test/API/macosx/dyld-trie-symbols/TestDyldTrieSymbols.py
@@ -0,0 +1,87 @@
+"""
+Test that we read the exported symbols from the dyld trie
+"""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+class DyldTrieSymbolsTestCase(TestBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+
+    NO_DEBUG_INFO_TESTCASE = True
+
+    @skipIfRemote
+    @skipUnlessDarwin
+    
+    def test_dyld_trie_symbols(self):
+        """Test that we make create symbol table entries from the dyld trie data structure."""
+        self.build()
+        unstripped_exe = self.getBuildArtifact("a.out")
+        stripped_exe = self.getBuildArtifact("a.out-stripped")
+
+        unstripped_target = self.dbg.CreateTarget(unstripped_exe)
+        self.assertTrue(unstripped_target.IsValid(), "Got a vaid stripped target.")
+
+        # Verify that the expected symbols are present in an unstripped
+        # binary, and that we didn't duplicate the entries in the symbol
+        # table.
+        unstripped_bazval_symbols = unstripped_target.FindSymbols("bazval")
+        self.assertEqual(unstripped_bazval_symbols.GetSize(), 1)
+        unstripped_patval_symbols = unstripped_target.FindSymbols("patval")
+        self.assertEqual(unstripped_patval_symbols.GetSize(), 1)
+        unstripped_Z3foo_symbols = unstripped_target.FindSymbols("_Z3foov")
+        self.assertEqual(unstripped_Z3foo_symbols.GetSize(), 1)
+        unstripped_foo_symbols = unstripped_target.FindSymbols("foo")
+        self.assertEqual(unstripped_foo_symbols.GetSize(), 1)
+
+        # make sure we can look up the mangled name, demangled base name,
+        # demangled name with argument.
+        unstripped_Z3pat_symbols = unstripped_target.FindSymbols("_Z3pati")
+        self.assertEqual(unstripped_Z3pat_symbols.GetSize(), 1)
+        unstripped_pat_symbols = unstripped_target.FindSymbols("pat")
+        self.assertEqual(unstripped_pat_symbols.GetSize(), 1)
+        unstripped_patint_symbols = unstripped_target.FindSymbols("pat(int)")
+        self.assertEqual(unstripped_patint_symbols.GetSize(), 1)
+
+        unstripped_bar_symbols = unstripped_target.FindSymbols("bar")
+        self.assertEqual(unstripped_bar_symbols.GetSize(), 1)
+
+
+
+        # Verify that we can retrieve all the symbols with external
+        # linkage after the binary has been stripped; they should not
+        # exist in the nlist records at this point and can only be
+        # retrieved from the dyld trie structure.
+
+        stripped_target = self.dbg.CreateTarget(stripped_exe)
+        self.assertTrue(stripped_target.IsValid(), "Got a vaid stripped target.")
+
+        # Check that we're able to still retrieve all the symbols after
+        # the binary has been stripped. Check that one we know will be
+        # removed is absent.
+        stripped_bazval_symbols = stripped_target.FindSymbols("bazval")
+        self.assertEqual(stripped_bazval_symbols.GetSize(), 1)
+        stripped_patval_symbols = stripped_target.FindSymbols("patval")
+        self.assertEqual(stripped_patval_symbols.GetSize(), 1)
+        stripped_Z3foo_symbols = stripped_target.FindSymbols("_Z3foov")
+        self.assertEqual(stripped_Z3foo_symbols.GetSize(), 1)
+        stripped_foo_symbols = stripped_target.FindSymbols("foo")
+        self.assertEqual(stripped_foo_symbols.GetSize(), 1)
+
+        # make sure we can look up the mangled name, demangled base name,
+        # demangled name with argument.
+        stripped_Z3pat_symbols = stripped_target.FindSymbols("_Z3pati")
+        self.assertEqual(stripped_Z3pat_symbols.GetSize(), 1)
+        stripped_pat_symbols = stripped_target.FindSymbols("pat")
+        self.assertEqual(stripped_pat_symbols.GetSize(), 1)
+        stripped_patint_symbols = stripped_target.FindSymbols("pat(int)")
+        self.assertEqual(stripped_patint_symbols.GetSize(), 1)
+
+        # bar should have been strippped.  We should not find it, or the
+        # stripping went wrong.
+        stripped_bar_symbols = stripped_target.FindSymbols("bar")
+        self.assertEqual(stripped_bar_symbols.GetSize(), 0)
+
Index: lldb/test/API/macosx/dyld-trie-symbols/Makefile
===================================================================
--- /dev/null
+++ lldb/test/API/macosx/dyld-trie-symbols/Makefile
@@ -0,0 +1,13 @@
+CXX_SOURCES := main.cpp
+EXE := a.out
+MAKE_DSYM := NO
+LD_EXTRAS = -dynamiclib -image_base 0x8000
+CFLAGS = $(CFLAGS_NO_DEBUG)
+
+include Makefile.rules
+
+all: a.out a.out-stripped
+
+a.out-stripped:
+	cp a.out a.out-stripped
+	strip a.out-stripped
Index: lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
===================================================================
--- lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -1907,6 +1907,7 @@
   std::vector<SectionInfo> m_section_infos;
 };
 
+#define TRIE_SYMBOL_IS_THUMB (1ULL << 63)
 struct TrieEntry {
   void Dump() const {
     printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"",
@@ -1920,7 +1921,9 @@
   }
   ConstString name;
   uint64_t address = LLDB_INVALID_ADDRESS;
-  uint64_t flags = 0;
+  uint64_t flags =
+      0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER,
+         // TRIE_SYMBOL_IS_THUMB
   uint64_t other = 0;
   ConstString import_name;
 };
@@ -1943,13 +1946,16 @@
 };
 
 static bool ParseTrieEntries(DataExtractor &data, lldb::offset_t offset,
-                             const bool is_arm,
+                             const bool is_arm, addr_t text_seg_base_addr,
                              std::vector<llvm::StringRef> &nameSlices,
                              std::set<lldb::addr_t> &resolver_addresses,
-                             std::vector<TrieEntryWithOffset> &output) {
+                             std::vector<TrieEntryWithOffset> &reexports,
+                             std::vector<TrieEntryWithOffset> &ext_symbols) {
   if (!data.ValidOffset(offset))
     return true;
 
+  // Terminal node -- end of a branch, possibly add this to
+  // the symbol table or resolver table.
   const uint64_t terminalSize = data.GetULEB128(&offset);
   lldb::offset_t children_offset = offset + terminalSize;
   if (terminalSize != 0) {
@@ -1962,6 +1968,8 @@
       import_name = data.GetCStr(&offset);
     } else {
       e.entry.address = data.GetULEB128(&offset);
+      if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
+        e.entry.address += text_seg_base_addr;
       if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
         e.entry.other = data.GetULEB128(&offset);
         uint64_t resolver_addr = e.entry.other;
@@ -1971,9 +1979,18 @@
       } else
         e.entry.other = 0;
     }
-    // Only add symbols that are reexport symbols with a valid import name
-    if (EXPORT_SYMBOL_FLAGS_REEXPORT & e.entry.flags && import_name &&
-        import_name[0]) {
+    bool add_this_entry = false;
+    if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) &&
+        import_name && import_name[0]) {
+      // add symbols that are reexport symbols with a valid import name.
+      add_this_entry = true;
+    } else if (e.entry.flags == 0 &&
+               (import_name == nullptr || import_name[0] == '\0')) {
+      // add externally visible symbols, in case the nlist record has
+      // been stripped/omitted.
+      add_this_entry = true;
+    }
+    if (add_this_entry) {
       std::string name;
       if (!nameSlices.empty()) {
         for (auto name_slice : nameSlices)
@@ -1987,7 +2004,15 @@
         // Skip the leading '_'
         e.entry.import_name.SetCString(import_name + 1);
       }
-      output.push_back(e);
+      if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) {
+        reexports.push_back(e);
+      } else {
+        if (is_arm && (e.entry.address & 1)) {
+          e.entry.flags |= TRIE_SYMBOL_IS_THUMB;
+          e.entry.address &= THUMB_ADDRESS_BIT_MASK;
+        }
+        ext_symbols.push_back(e);
+      }
     }
   }
 
@@ -2000,8 +2025,9 @@
       return false; // Corrupt data
     lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset);
     if (childNodeOffset) {
-      if (!ParseTrieEntries(data, childNodeOffset, is_arm, nameSlices,
-                            resolver_addresses, output)) {
+      if (!ParseTrieEntries(data, childNodeOffset, is_arm, text_seg_base_addr,
+                            nameSlices, resolver_addresses, reexports,
+                            ext_symbols)) {
         return false;
       }
     }
@@ -2066,7 +2092,15 @@
   struct symtab_command symtab_load_command = {0, 0, 0, 0, 0, 0};
   struct linkedit_data_command function_starts_load_command = {0, 0, 0, 0};
   struct dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  // The data element of type bool indicates that this entry is thumb
+  // code.
   typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts;
+
+  // Record the address of every function/data that we add to the symtab.
+  // We add symbols to the table in the order of most information (nlist
+  // records) to least (function starts), and avoid duplicating symbols
+  // via this set.
+  std::set<addr_t> symbols_added;
   FunctionStarts function_starts;
   lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
   uint32_t i;
@@ -2091,29 +2125,6 @@
       if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) ==
           nullptr) // fill in symoff, nsyms, stroff, strsize fields
         return 0;
-      if (symtab_load_command.symoff == 0) {
-        if (log)
-          module_sp->LogMessage(log, "LC_SYMTAB.symoff == 0");
-        return 0;
-      }
-
-      if (symtab_load_command.stroff == 0) {
-        if (log)
-          module_sp->LogMessage(log, "LC_SYMTAB.stroff == 0");
-        return 0;
-      }
-
-      if (symtab_load_command.nsyms == 0) {
-        if (log)
-          module_sp->LogMessage(log, "LC_SYMTAB.nsyms == 0");
-        return 0;
-      }
-
-      if (symtab_load_command.strsize == 0) {
-        if (log)
-          module_sp->LogMessage(log, "LC_SYMTAB.strsize == 0");
-        return 0;
-      }
       break;
 
     case LC_DYLD_INFO:
@@ -2354,27 +2365,7 @@
     }
   }
 
-  if (nlist_data.GetByteSize() == 0 &&
-      memory_module_load_level == eMemoryModuleLoadLevelComplete) {
-    if (log)
-      module_sp->LogMessage(log, "failed to read nlist data");
-    return 0;
-  }
-
   const bool have_strtab_data = strtab_data.GetByteSize() > 0;
-  if (!have_strtab_data) {
-    if (process) {
-      if (strtab_addr == LLDB_INVALID_ADDRESS) {
-        if (log)
-          module_sp->LogMessage(log, "failed to locate the strtab in memory");
-        return 0;
-      }
-    } else {
-      if (log)
-        module_sp->LogMessage(log, "failed to read strtab data");
-      return 0;
-    }
-  }
 
   ConstString g_segment_name_TEXT = GetSegmentNameTEXT();
   ConstString g_segment_name_DATA = GetSegmentNameDATA();
@@ -2401,6 +2392,7 @@
         section_list->FindSectionByName(g_section_name_eh_frame);
 
   const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM);
+  const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions();
 
   // lldb works best if it knows the start address of all functions in a
   // module. Linker symbols or debug info are normally the best source of
@@ -2426,6 +2418,14 @@
            0) {
       // Now append the current entry
       function_start_entry.addr += delta;
+      if (is_arm) {
+        if (function_start_entry.addr & 1) {
+          function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
+          function_start_entry.data = true;
+        } else if (always_thumb) {
+          function_start_entry.data = true;
+        }
+      }
       function_starts.Append(function_start_entry);
     }
   } else {
@@ -2448,6 +2448,14 @@
         if (func) {
           FunctionStarts::Entry function_start_entry;
           function_start_entry.addr = func->base - text_base_addr;
+          if (is_arm) {
+            if (function_start_entry.addr & 1) {
+              function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
+              function_start_entry.data = true;
+            } else if (always_thumb) {
+              function_start_entry.data = true;
+            }
+          }
           function_starts.Append(function_start_entry);
         }
       }
@@ -2508,25 +2516,21 @@
   std::string memory_symbol_name;
   uint32_t unmapped_local_symbols_found = 0;
 
-  std::vector<TrieEntryWithOffset> trie_entries;
+  std::vector<TrieEntryWithOffset> reexport_trie_entries;
+  std::vector<TrieEntryWithOffset> external_sym_trie_entries;
   std::set<lldb::addr_t> resolver_addresses;
 
   if (dyld_trie_data.GetByteSize() > 0) {
-    std::vector<llvm::StringRef> nameSlices;
-    ParseTrieEntries(dyld_trie_data, 0, is_arm, nameSlices, resolver_addresses,
-                     trie_entries);
-
     ConstString text_segment_name("__TEXT");
     SectionSP text_segment_sp =
         GetSectionList()->FindSectionByName(text_segment_name);
-    if (text_segment_sp) {
-      const lldb::addr_t text_segment_file_addr =
-          text_segment_sp->GetFileAddress();
-      if (text_segment_file_addr != LLDB_INVALID_ADDRESS) {
-        for (auto &e : trie_entries)
-          e.entry.address += text_segment_file_addr;
-      }
-    }
+    lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS;
+    if (text_segment_sp)
+      text_segment_file_addr = text_segment_sp->GetFileAddress();
+    std::vector<llvm::StringRef> nameSlices;
+    ParseTrieEntries(dyld_trie_data, 0, is_arm, text_segment_file_addr,
+                     nameSlices, resolver_addresses, reexport_trie_entries,
+                     external_sym_trie_entries);
   }
 
   typedef std::set<ConstString> IndirectSymbols;
@@ -3598,6 +3602,9 @@
                                     symbol_section);
                                 sym[GSYM_sym_idx].GetAddressRef().SetOffset(
                                     symbol_value);
+                                symbols_added.insert(sym[GSYM_sym_idx]
+                                                         .GetAddress()
+                                                         .GetFileAddress());
                                 // We just need the flags from the linker
                                 // symbol, so put these flags
                                 // into the N_GSYM flags to avoid duplicate
@@ -3617,6 +3624,8 @@
                       if (set_value) {
                         sym[sym_idx].GetAddressRef().SetSection(symbol_section);
                         sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
+                        symbols_added.insert(
+                            sym[sym_idx].GetAddress().GetFileAddress());
                       }
                       sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
 
@@ -4428,6 +4437,8 @@
                 // invalid address of zero when the global is a common symbol.
                 sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section);
                 sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value);
+                symbols_added.insert(
+                    sym[GSYM_sym_idx].GetAddress().GetFileAddress());
                 // We just need the flags from the linker symbol, so put these
                 // flags into the N_GSYM flags to avoid duplicate symbols in
                 // the symbol table.
@@ -4445,6 +4456,7 @@
       if (set_value) {
         sym[sym_idx].GetAddressRef().SetSection(symbol_section);
         sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
+        symbols_added.insert(sym[sym_idx].GetAddress().GetFileAddress());
       }
       sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
       if (nlist.n_desc & N_WEAK_REF)
@@ -4501,12 +4513,60 @@
     }
   }
 
+  // Count how many trie symbols we'll add to the symbol table
+  int trie_symbol_table_augment_count = 0;
+  for (auto &e : external_sym_trie_entries) {
+    if (symbols_added.find(e.entry.address) == symbols_added.end())
+      trie_symbol_table_augment_count++;
+  }
+
+  if (num_syms < sym_idx + trie_symbol_table_augment_count) {
+    num_syms = sym_idx + trie_symbol_table_augment_count;
+    sym = symtab->Resize(num_syms);
+  }
   uint32_t synthetic_sym_id = symtab_load_command.nsyms;
 
+  // Add symbols from the trie to the symbol table.
+  for (auto &e : external_sym_trie_entries) {
+    if (symbols_added.find(e.entry.address) != symbols_added.end())
+      continue;
+
+    // Find the section that this trie address is in, use that to annotate
+    // symbol type as we add the trie address and name to the symbol table.
+    Address symbol_addr;
+    if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) {
+      SectionSP symbol_section(symbol_addr.GetSection());
+      if (symbol_section) {
+        sym[sym_idx].SetID(synthetic_sym_id++);
+        sym[sym_idx].GetMangled().SetMangledName(e.entry.name);
+        switch (symbol_section->GetType()) {
+        case eSectionTypeCode:
+          sym[sym_idx].SetType(eSymbolTypeCode);
+          break;
+        case eSectionTypeOther:
+        case eSectionTypeData:
+        case eSectionTypeZeroFill:
+          sym[sym_idx].SetType(eSymbolTypeData);
+          break;
+        default:
+          break;
+        }
+        sym[sym_idx].SetIsSynthetic(false);
+        sym[sym_idx].SetExternal(true);
+        sym[sym_idx].GetAddressRef() = symbol_addr;
+        symbols_added.insert(symbol_addr.GetFileAddress());
+        if (e.entry.flags & TRIE_SYMBOL_IS_THUMB)
+          sym[sym_idx].SetFlags(MACHO_NLIST_ARM_SYMBOL_IS_THUMB);
+        ++sym_idx;
+      }
+    }
+  }
+
   if (function_starts_count > 0) {
     uint32_t num_synthetic_function_symbols = 0;
     for (i = 0; i < function_starts_count; ++i) {
-      if (!function_starts.GetEntryRef(i).data)
+      if (symbols_added.find(function_starts.GetEntryRef(i).addr) ==
+          symbols_added.end())
         ++num_synthetic_function_symbols;
     }
 
@@ -4518,14 +4578,11 @@
       for (i = 0; i < function_starts_count; ++i) {
         const FunctionStarts::Entry *func_start_entry =
             function_starts.GetEntryAtIndex(i);
-        if (!func_start_entry->data) {
+        if (symbols_added.find(func_start_entry->addr) == symbols_added.end()) {
           addr_t symbol_file_addr = func_start_entry->addr;
           uint32_t symbol_flags = 0;
-          if (is_arm) {
-            if (symbol_file_addr & 1)
-              symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
-            symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
-          }
+          if (func_start_entry->data)
+            symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
           Address symbol_addr;
           if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) {
             SectionSP symbol_section(symbol_addr.GetSection());
@@ -4552,6 +4609,7 @@
               sym[sym_idx].SetType(eSymbolTypeCode);
               sym[sym_idx].SetIsSynthetic(true);
               sym[sym_idx].GetAddressRef() = symbol_addr;
+              symbols_added.insert(symbol_addr.GetFileAddress());
               if (symbol_flags)
                 sym[sym_idx].SetFlags(symbol_flags);
               if (symbol_byte_size)
@@ -4652,6 +4710,7 @@
                     sym[sym_idx].SetType(eSymbolTypeResolver);
                   sym[sym_idx].SetIsSynthetic(true);
                   sym[sym_idx].GetAddressRef() = so_addr;
+                  symbols_added.insert(so_addr.GetFileAddress());
                   sym[sym_idx].SetByteSize(symbol_stub_byte_size);
                   ++sym_idx;
                 }
@@ -4669,8 +4728,8 @@
     }
   }
 
-  if (!trie_entries.empty()) {
-    for (const auto &e : trie_entries) {
+  if (!reexport_trie_entries.empty()) {
+    for (const auto &e : reexport_trie_entries) {
       if (e.entry.import_name) {
         // Only add indirect symbols from the Trie entries if we didn't have
         // a N_INDR nlist entry for this already
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to