labath created this revision.
labath added reviewers: clayborg, zturner, JDevlieghere.
Herald added a subscriber: aprantl.

If we opened a file which was produced on system with different path
syntax, we would parse the paths from the debug info incorrectly.

The reason for that is that we would parse the paths as they were
native. For example this meant that on linux we would treat the entire
windows path as a single file name with no directory component, and then
we would concatenate that with the single directory component from the
DW_AT_comp_dir attribute. When parsing posix paths on windows, we would
at least get the directory separators right, but we still would treat
the posix paths as relative, and concatenate them where we shouldn't.

This patch attempts to remedy this by guessing the path syntax used in
each compile unit. (Unfortunately, there is no info in DWARF which would
give the definitive path style used by the produces, so guessing is all
we can do.) Currently, this guessing is based on the DW_AT_comp_dir
attribute of the compile unit, but this can be refined later if needed
(for example, the DW_AT_name of the compile unit may also contain some
useful info). This style is then used when parsing the line table of
that compile unit.

This patch is sufficient to make the line tables come out right, and
enable breakpoint setting by file name work correctly. Setting a
breakpoint by full path still has some kinks (specifically, using a
windows-style full path will not work on linux because the path will be
parsed as a linux path), but this will require larger changes in how
breakpoint setting works.


https://reviews.llvm.org/D56543

Files:
  lit/SymbolFile/DWARF/Inputs/dir-separator-posix.lldbinit
  lit/SymbolFile/DWARF/Inputs/dir-separator-windows.lldbinit
  lit/SymbolFile/DWARF/dir-separator-posix.s
  lit/SymbolFile/DWARF/dir-separator-windows.s
  source/Plugins/SymbolFile/DWARF/DWARFDebugLine.cpp
  source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
  source/Plugins/SymbolFile/DWARF/DWARFUnit.h
  source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp

Index: source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
===================================================================
--- source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -183,7 +183,8 @@
   return colon_pos + 1;
 }
 
-static FileSpec resolveCompDir(const char *path_from_dwarf) {
+static FileSpec resolveCompDir(const char *path_from_dwarf,
+                               FileSpec::Style path_style) {
   if (!path_from_dwarf)
     return FileSpec();
 
@@ -196,7 +197,7 @@
   bool is_symlink = false;
   // Always normalize our compile unit directory to get rid of redundant
   // slashes and other path anomalies before we use it for path prepending
-  FileSpec local_spec(local_path);
+  FileSpec local_spec(local_path, path_style);
   const auto &file_specs = GetGlobalPluginProperties()->GetSymLinkPaths();
   for (size_t i = 0; i < file_specs.GetSize() && !is_symlink; ++i)
     is_symlink = FileSpec::Equal(file_specs.GetFileSpecAtIndex(i),
@@ -810,7 +811,7 @@
         if (module_sp) {
           const DWARFDIE cu_die = dwarf_cu->DIE();
           if (cu_die) {
-            FileSpec cu_file_spec(cu_die.GetName());
+            FileSpec cu_file_spec(cu_die.GetName(), dwarf_cu->GetPathStyle());
             if (cu_file_spec) {
               // If we have a full path to the compile unit, we don't need to
               // resolve the file.  This can be expensive e.g. when the source
@@ -819,7 +820,8 @@
               if (cu_file_spec.IsRelative()) {
                 const char *cu_comp_dir{
                     cu_die.GetAttributeValueAsString(DW_AT_comp_dir, nullptr)};
-                cu_file_spec.PrependPathComponent(resolveCompDir(cu_comp_dir));
+                cu_file_spec.PrependPathComponent(
+                    resolveCompDir(cu_comp_dir, dwarf_cu->GetPathStyle()));
               }
 
               std::string remapped_file;
@@ -952,7 +954,8 @@
 
     if (cu_die) {
       FileSpec cu_comp_dir = resolveCompDir(
-          cu_die.GetAttributeValueAsString(DW_AT_comp_dir, nullptr));
+          cu_die.GetAttributeValueAsString(DW_AT_comp_dir, nullptr),
+          dwarf_cu->GetPathStyle());
       const dw_offset_t stmt_list = cu_die.GetAttributeValueAsUnsigned(
           DW_AT_stmt_list, DW_INVALID_OFFSET);
       if (stmt_list != DW_INVALID_OFFSET) {
Index: source/Plugins/SymbolFile/DWARF/DWARFUnit.h
===================================================================
--- source/Plugins/SymbolFile/DWARF/DWARFUnit.h
+++ source/Plugins/SymbolFile/DWARF/DWARFUnit.h
@@ -169,6 +169,8 @@
 
   bool GetIsOptimized();
 
+  lldb_private::FileSpec::Style GetPathStyle();
+
   SymbolFileDWARFDwo *GetDwoSymbolFile() const;
 
   dw_offset_t GetBaseObjOffset() const;
@@ -214,6 +216,7 @@
   lldb::LanguageType m_language_type = lldb::eLanguageTypeUnknown;
   bool m_is_dwarf64 = false;
   lldb_private::LazyBool m_is_optimized = lldb_private::eLazyBoolCalculate;
+  llvm::Optional<lldb_private::FileSpec::Style> m_path_style;
   dw_addr_t m_addr_base = 0;   // Value of DW_AT_addr_base
   dw_addr_t m_ranges_base = 0; // Value of DW_AT_ranges_base
   // If this is a dwo compile unit this is the offset of the base compile unit
@@ -249,6 +252,8 @@
   void AddUnitDIE(const DWARFDebugInfoEntry &cu_die);
   void ExtractDIEsEndCheck(lldb::offset_t offset) const;
 
+  void ComputePathStyle();
+
   DISALLOW_COPY_AND_ASSIGN(DWARFUnit);
 };
 
Index: source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
===================================================================
--- source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -766,6 +766,26 @@
   return m_is_optimized == eLazyBoolYes;
 }
 
+FileSpec::Style DWARFUnit::GetPathStyle() {
+  if (!m_path_style)
+    ComputePathStyle();
+  return *m_path_style;
+}
+
+void DWARFUnit::ComputePathStyle() {
+  m_path_style = FileSpec::Style::native;
+  const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
+  if (!die)
+    return;
+  llvm::StringRef comp_dir =
+      die->GetAttributeValueAsString(m_dwarf, this, DW_AT_comp_dir, NULL);
+  if (comp_dir.startswith("/"))
+    m_path_style = FileSpec::Style::posix;
+  else if (comp_dir.size() > 3 && llvm::isAlpha(comp_dir[0]) &&
+           comp_dir.substr(1, 2) == ":\\")
+    m_path_style = FileSpec::Style::windows;
+}
+
 SymbolFileDWARFDwo *DWARFUnit::GetDwoSymbolFile() const {
   return m_dwo_symbol_file.get();
 }
Index: source/Plugins/SymbolFile/DWARF/DWARFDebugLine.cpp
===================================================================
--- source/Plugins/SymbolFile/DWARF/DWARFDebugLine.cpp
+++ source/Plugins/SymbolFile/DWARF/DWARFDebugLine.cpp
@@ -950,7 +950,7 @@
     const lldb_private::FileSpec &comp_dir, FileSpec &file) const {
   uint32_t idx = file_idx - 1; // File indexes are 1 based...
   if (idx < file_names.size()) {
-    file.SetFile(file_names[idx].name, FileSpec::Style::native);
+    file.SetFile(file_names[idx].name, comp_dir.GetPathStyle());
     if (file.IsRelative()) {
       if (file_names[idx].dir_idx > 0) {
         const uint32_t dir_idx = file_names[idx].dir_idx - 1;
Index: lit/SymbolFile/DWARF/dir-separator-windows.s
===================================================================
--- /dev/null
+++ lit/SymbolFile/DWARF/dir-separator-windows.s
@@ -0,0 +1,67 @@
+# Test that parsing of line tables works reasonably, even if the host directory
+# separator does not match the separator of the compile unit.
+
+# REQUIRES: lld
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj > %t.o
+# RUN: ld.lld %t.o -o %t
+# RUN: %lldb %t -s %S/Inputs/dir-separator-windows.lldbinit -o exit | FileCheck %s
+
+# CHECK-LABEL: image dump line-table a.c
+# CHECK: Line table for C:\tmp\a.c
+# CHECK-NEXT: 0x0000000000201000: C:\tmp\a.c:1
+# CHECK-NEXT: 0x0000000000201001: C:\tmp\b.c:1
+# CHECK-NEXT: 0x0000000000201002: C:\tmp\b.c:1
+# CHECK-EMPTY:
+
+# CHECK-LABEL: breakpoint set -f a.c -l 1
+# CHECK: Breakpoint 1: {{.*}}`_start,
+
+# CHECK-LABEL: breakpoint set -f C:/tmp/b.c -l 1
+# CHECK: Breakpoint 2: {{.*}}`_start + 1,
+
+	.text
+	.globl	_start
+_start:
+	.file	1 "C:\\tmp\\a.c"
+	.loc	1 1 0
+        nop
+	.file	2 "C:\\tmp\\b.c"
+	.loc	2 1 0
+        nop
+
+	.section	.debug_str,"MS",@progbits,1
+.Linfo_string1:
+	.asciz	"a.c"
+.Linfo_string2:
+	.asciz	"C:\\tmp"
+	.section	.debug_abbrev,"",@progbits
+	.byte	1                       # Abbreviation Code
+	.byte	17                      # DW_TAG_compile_unit
+	.byte	0                       # DW_CHILDREN_no
+	.byte	19                      # DW_AT_language
+	.byte	5                       # DW_FORM_data2
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	16                      # DW_AT_stmt_list
+	.byte	23                      # DW_FORM_sec_offset
+	.byte	27                      # DW_AT_comp_dir
+	.byte	14                      # DW_FORM_strp
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	0                       # EOM(3)
+	.section	.debug_info,"",@progbits
+.Lcu_begin0:
+	.long	.Lcu_end0-.Lcu_start0   # Length of Unit
+.Lcu_start0:
+	.short	4                       # DWARF version number
+	.long	.debug_abbrev           # Offset Into Abbrev. Section
+	.byte	8                       # Address Size (in bytes)
+	.byte	1                       # Abbrev [1] 0xb:0x1f DW_TAG_compile_unit
+	.short	12                      # DW_AT_language
+	.long	.Linfo_string1          # DW_AT_name
+	.long	.Lline_table_start0     # DW_AT_stmt_list
+	.long	.Linfo_string2          # DW_AT_comp_dir
+.Lcu_end0:
+	.section	.debug_line,"",@progbits
+.Lline_table_start0:
Index: lit/SymbolFile/DWARF/dir-separator-posix.s
===================================================================
--- /dev/null
+++ lit/SymbolFile/DWARF/dir-separator-posix.s
@@ -0,0 +1,67 @@
+# Test that parsing of line tables works reasonably, even if the host directory
+# separator does not match the separator of the compile unit.
+
+# REQUIRES: lld
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj > %t.o
+# RUN: ld.lld %t.o -o %t
+# RUN: %lldb %t -s %S/Inputs/dir-separator-posix.lldbinit -o exit | FileCheck %s
+
+# CHECK-LABEL: image dump line-table a.c
+# CHECK: Line table for /tmp/a.c
+# CHECK-NEXT: 0x0000000000201000: /tmp/a.c:1
+# CHECK-NEXT: 0x0000000000201001: /tmp/b.c:1
+# CHECK-NEXT: 0x0000000000201002: /tmp/b.c:1
+# CHECK-EMPTY:
+
+# CHECK-LABEL: breakpoint set -f a.c -l 1
+# CHECK: Breakpoint 1: {{.*}}`_start,
+
+# CHECK-LABEL: breakpoint set -f /tmp/b.c -l 1
+# CHECK: Breakpoint 2: {{.*}}`_start + 1,
+
+	.text
+	.globl	_start
+_start:
+	.file	1 "/tmp/a.c"
+	.loc	1 1 0
+        nop
+	.file	2 "/tmp/b.c"
+	.loc	2 1 0
+        nop
+
+	.section	.debug_str,"MS",@progbits,1
+.Linfo_string1:
+	.asciz	"a.c"
+.Linfo_string2:
+	.asciz	"/tmp"
+	.section	.debug_abbrev,"",@progbits
+	.byte	1                       # Abbreviation Code
+	.byte	17                      # DW_TAG_compile_unit
+	.byte	0                       # DW_CHILDREN_no
+	.byte	19                      # DW_AT_language
+	.byte	5                       # DW_FORM_data2
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	16                      # DW_AT_stmt_list
+	.byte	23                      # DW_FORM_sec_offset
+	.byte	27                      # DW_AT_comp_dir
+	.byte	14                      # DW_FORM_strp
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	0                       # EOM(3)
+	.section	.debug_info,"",@progbits
+.Lcu_begin0:
+	.long	.Lcu_end0-.Lcu_start0   # Length of Unit
+.Lcu_start0:
+	.short	4                       # DWARF version number
+	.long	.debug_abbrev           # Offset Into Abbrev. Section
+	.byte	8                       # Address Size (in bytes)
+	.byte	1                       # Abbrev [1] 0xb:0x1f DW_TAG_compile_unit
+	.short	12                      # DW_AT_language
+	.long	.Linfo_string1          # DW_AT_name
+	.long	.Lline_table_start0     # DW_AT_stmt_list
+	.long	.Linfo_string2          # DW_AT_comp_dir
+.Lcu_end0:
+	.section	.debug_line,"",@progbits
+.Lline_table_start0:
Index: lit/SymbolFile/DWARF/Inputs/dir-separator-windows.lldbinit
===================================================================
--- /dev/null
+++ lit/SymbolFile/DWARF/Inputs/dir-separator-windows.lldbinit
@@ -0,0 +1,7 @@
+image dump line-table a.c
+breakpoint set -f a.c -l 1
+breakpoint set -f C:/tmp/b.c -l 1
+
+# This will fail on non-windows systems because the path will by parsed
+# according to posix rules
+# breakpoint set -f 'C:\tmp\b.c' -l 1
Index: lit/SymbolFile/DWARF/Inputs/dir-separator-posix.lldbinit
===================================================================
--- /dev/null
+++ lit/SymbolFile/DWARF/Inputs/dir-separator-posix.lldbinit
@@ -0,0 +1,3 @@
+image dump line-table a.c
+breakpoint set -f a.c -l 1
+breakpoint set -f /tmp/b.c -l 1
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to