labath created this revision. labath added reviewers: clayborg, zturner, JDevlieghere. Herald added a subscriber: aprantl.
If we opened a file which was produced on system with different path syntax, we would parse the paths from the debug info incorrectly. The reason for that is that we would parse the paths as they were native. For example this meant that on linux we would treat the entire windows path as a single file name with no directory component, and then we would concatenate that with the single directory component from the DW_AT_comp_dir attribute. When parsing posix paths on windows, we would at least get the directory separators right, but we still would treat the posix paths as relative, and concatenate them where we shouldn't. This patch attempts to remedy this by guessing the path syntax used in each compile unit. (Unfortunately, there is no info in DWARF which would give the definitive path style used by the produces, so guessing is all we can do.) Currently, this guessing is based on the DW_AT_comp_dir attribute of the compile unit, but this can be refined later if needed (for example, the DW_AT_name of the compile unit may also contain some useful info). This style is then used when parsing the line table of that compile unit. This patch is sufficient to make the line tables come out right, and enable breakpoint setting by file name work correctly. Setting a breakpoint by full path still has some kinks (specifically, using a windows-style full path will not work on linux because the path will be parsed as a linux path), but this will require larger changes in how breakpoint setting works. https://reviews.llvm.org/D56543 Files: lit/SymbolFile/DWARF/Inputs/dir-separator-posix.lldbinit lit/SymbolFile/DWARF/Inputs/dir-separator-windows.lldbinit lit/SymbolFile/DWARF/dir-separator-posix.s lit/SymbolFile/DWARF/dir-separator-windows.s source/Plugins/SymbolFile/DWARF/DWARFDebugLine.cpp source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp source/Plugins/SymbolFile/DWARF/DWARFUnit.h source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
Index: source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp =================================================================== --- source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -183,7 +183,8 @@ return colon_pos + 1; } -static FileSpec resolveCompDir(const char *path_from_dwarf) { +static FileSpec resolveCompDir(const char *path_from_dwarf, + FileSpec::Style path_style) { if (!path_from_dwarf) return FileSpec(); @@ -196,7 +197,7 @@ bool is_symlink = false; // Always normalize our compile unit directory to get rid of redundant // slashes and other path anomalies before we use it for path prepending - FileSpec local_spec(local_path); + FileSpec local_spec(local_path, path_style); const auto &file_specs = GetGlobalPluginProperties()->GetSymLinkPaths(); for (size_t i = 0; i < file_specs.GetSize() && !is_symlink; ++i) is_symlink = FileSpec::Equal(file_specs.GetFileSpecAtIndex(i), @@ -810,7 +811,7 @@ if (module_sp) { const DWARFDIE cu_die = dwarf_cu->DIE(); if (cu_die) { - FileSpec cu_file_spec(cu_die.GetName()); + FileSpec cu_file_spec(cu_die.GetName(), dwarf_cu->GetPathStyle()); if (cu_file_spec) { // If we have a full path to the compile unit, we don't need to // resolve the file. This can be expensive e.g. when the source @@ -819,7 +820,8 @@ if (cu_file_spec.IsRelative()) { const char *cu_comp_dir{ cu_die.GetAttributeValueAsString(DW_AT_comp_dir, nullptr)}; - cu_file_spec.PrependPathComponent(resolveCompDir(cu_comp_dir)); + cu_file_spec.PrependPathComponent( + resolveCompDir(cu_comp_dir, dwarf_cu->GetPathStyle())); } std::string remapped_file; @@ -952,7 +954,8 @@ if (cu_die) { FileSpec cu_comp_dir = resolveCompDir( - cu_die.GetAttributeValueAsString(DW_AT_comp_dir, nullptr)); + cu_die.GetAttributeValueAsString(DW_AT_comp_dir, nullptr), + dwarf_cu->GetPathStyle()); const dw_offset_t stmt_list = cu_die.GetAttributeValueAsUnsigned( DW_AT_stmt_list, DW_INVALID_OFFSET); if (stmt_list != DW_INVALID_OFFSET) { Index: source/Plugins/SymbolFile/DWARF/DWARFUnit.h =================================================================== --- source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -169,6 +169,8 @@ bool GetIsOptimized(); + lldb_private::FileSpec::Style GetPathStyle(); + SymbolFileDWARFDwo *GetDwoSymbolFile() const; dw_offset_t GetBaseObjOffset() const; @@ -214,6 +216,7 @@ lldb::LanguageType m_language_type = lldb::eLanguageTypeUnknown; bool m_is_dwarf64 = false; lldb_private::LazyBool m_is_optimized = lldb_private::eLazyBoolCalculate; + llvm::Optional<lldb_private::FileSpec::Style> m_path_style; dw_addr_t m_addr_base = 0; // Value of DW_AT_addr_base dw_addr_t m_ranges_base = 0; // Value of DW_AT_ranges_base // If this is a dwo compile unit this is the offset of the base compile unit @@ -249,6 +252,8 @@ void AddUnitDIE(const DWARFDebugInfoEntry &cu_die); void ExtractDIEsEndCheck(lldb::offset_t offset) const; + void ComputePathStyle(); + DISALLOW_COPY_AND_ASSIGN(DWARFUnit); }; Index: source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp =================================================================== --- source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -766,6 +766,26 @@ return m_is_optimized == eLazyBoolYes; } +FileSpec::Style DWARFUnit::GetPathStyle() { + if (!m_path_style) + ComputePathStyle(); + return *m_path_style; +} + +void DWARFUnit::ComputePathStyle() { + m_path_style = FileSpec::Style::native; + const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly(); + if (!die) + return; + llvm::StringRef comp_dir = + die->GetAttributeValueAsString(m_dwarf, this, DW_AT_comp_dir, NULL); + if (comp_dir.startswith("/")) + m_path_style = FileSpec::Style::posix; + else if (comp_dir.size() > 3 && llvm::isAlpha(comp_dir[0]) && + comp_dir.substr(1, 2) == ":\\") + m_path_style = FileSpec::Style::windows; +} + SymbolFileDWARFDwo *DWARFUnit::GetDwoSymbolFile() const { return m_dwo_symbol_file.get(); } Index: source/Plugins/SymbolFile/DWARF/DWARFDebugLine.cpp =================================================================== --- source/Plugins/SymbolFile/DWARF/DWARFDebugLine.cpp +++ source/Plugins/SymbolFile/DWARF/DWARFDebugLine.cpp @@ -950,7 +950,7 @@ const lldb_private::FileSpec &comp_dir, FileSpec &file) const { uint32_t idx = file_idx - 1; // File indexes are 1 based... if (idx < file_names.size()) { - file.SetFile(file_names[idx].name, FileSpec::Style::native); + file.SetFile(file_names[idx].name, comp_dir.GetPathStyle()); if (file.IsRelative()) { if (file_names[idx].dir_idx > 0) { const uint32_t dir_idx = file_names[idx].dir_idx - 1; Index: lit/SymbolFile/DWARF/dir-separator-windows.s =================================================================== --- /dev/null +++ lit/SymbolFile/DWARF/dir-separator-windows.s @@ -0,0 +1,67 @@ +# Test that parsing of line tables works reasonably, even if the host directory +# separator does not match the separator of the compile unit. + +# REQUIRES: lld + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj > %t.o +# RUN: ld.lld %t.o -o %t +# RUN: %lldb %t -s %S/Inputs/dir-separator-windows.lldbinit -o exit | FileCheck %s + +# CHECK-LABEL: image dump line-table a.c +# CHECK: Line table for C:\tmp\a.c +# CHECK-NEXT: 0x0000000000201000: C:\tmp\a.c:1 +# CHECK-NEXT: 0x0000000000201001: C:\tmp\b.c:1 +# CHECK-NEXT: 0x0000000000201002: C:\tmp\b.c:1 +# CHECK-EMPTY: + +# CHECK-LABEL: breakpoint set -f a.c -l 1 +# CHECK: Breakpoint 1: {{.*}}`_start, + +# CHECK-LABEL: breakpoint set -f C:/tmp/b.c -l 1 +# CHECK: Breakpoint 2: {{.*}}`_start + 1, + + .text + .globl _start +_start: + .file 1 "C:\\tmp\\a.c" + .loc 1 1 0 + nop + .file 2 "C:\\tmp\\b.c" + .loc 2 1 0 + nop + + .section .debug_str,"MS",@progbits,1 +.Linfo_string1: + .asciz "a.c" +.Linfo_string2: + .asciz "C:\\tmp" + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Lcu_end0-.Lcu_start0 # Length of Unit +.Lcu_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x1f DW_TAG_compile_unit + .short 12 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir +.Lcu_end0: + .section .debug_line,"",@progbits +.Lline_table_start0: Index: lit/SymbolFile/DWARF/dir-separator-posix.s =================================================================== --- /dev/null +++ lit/SymbolFile/DWARF/dir-separator-posix.s @@ -0,0 +1,67 @@ +# Test that parsing of line tables works reasonably, even if the host directory +# separator does not match the separator of the compile unit. + +# REQUIRES: lld + +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj > %t.o +# RUN: ld.lld %t.o -o %t +# RUN: %lldb %t -s %S/Inputs/dir-separator-posix.lldbinit -o exit | FileCheck %s + +# CHECK-LABEL: image dump line-table a.c +# CHECK: Line table for /tmp/a.c +# CHECK-NEXT: 0x0000000000201000: /tmp/a.c:1 +# CHECK-NEXT: 0x0000000000201001: /tmp/b.c:1 +# CHECK-NEXT: 0x0000000000201002: /tmp/b.c:1 +# CHECK-EMPTY: + +# CHECK-LABEL: breakpoint set -f a.c -l 1 +# CHECK: Breakpoint 1: {{.*}}`_start, + +# CHECK-LABEL: breakpoint set -f /tmp/b.c -l 1 +# CHECK: Breakpoint 2: {{.*}}`_start + 1, + + .text + .globl _start +_start: + .file 1 "/tmp/a.c" + .loc 1 1 0 + nop + .file 2 "/tmp/b.c" + .loc 2 1 0 + nop + + .section .debug_str,"MS",@progbits,1 +.Linfo_string1: + .asciz "a.c" +.Linfo_string2: + .asciz "/tmp" + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Lcu_end0-.Lcu_start0 # Length of Unit +.Lcu_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x1f DW_TAG_compile_unit + .short 12 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir +.Lcu_end0: + .section .debug_line,"",@progbits +.Lline_table_start0: Index: lit/SymbolFile/DWARF/Inputs/dir-separator-windows.lldbinit =================================================================== --- /dev/null +++ lit/SymbolFile/DWARF/Inputs/dir-separator-windows.lldbinit @@ -0,0 +1,7 @@ +image dump line-table a.c +breakpoint set -f a.c -l 1 +breakpoint set -f C:/tmp/b.c -l 1 + +# This will fail on non-windows systems because the path will by parsed +# according to posix rules +# breakpoint set -f 'C:\tmp\b.c' -l 1 Index: lit/SymbolFile/DWARF/Inputs/dir-separator-posix.lldbinit =================================================================== --- /dev/null +++ lit/SymbolFile/DWARF/Inputs/dir-separator-posix.lldbinit @@ -0,0 +1,3 @@ +image dump line-table a.c +breakpoint set -f a.c -l 1 +breakpoint set -f /tmp/b.c -l 1
_______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits