llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-lldb Author: Pavel Labath (labath) <details> <summary>Changes</summary> I ran into this when LTO completely emptied two compile units, so they ended up with the same hash (see #<!-- -->100375). Although, ideally, the compiler would try to ensure we don't end up with a hash collision even in this case, guaranteeing their absence is practically impossible. This patch ensures this situation does not bring down lldb. --- Full diff: https://github.com/llvm/llvm-project/pull/100577.diff 3 Files Affected: - (modified) lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp (+13-13) - (modified) lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h (+1-1) - (added) lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s (+142) ``````````diff diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index 66a762bf9b685..0a52159d055bb 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -97,12 +97,14 @@ void DWARFUnit::ExtractUnitDIEIfNeeded() { *m_dwo_id, m_first_die.GetOffset())); return; // Can't fetch the compile unit from the dwo file. } - // If the skeleton compile unit gets its unit DIE parsed first, then this - // will fill in the DWO file's back pointer to this skeleton compile unit. - // If the DWO files get parsed on their own first the skeleton back link - // can be done manually in DWARFUnit::GetSkeletonCompileUnit() which will - // do a reverse lookup and cache the result. - dwo_cu->SetSkeletonUnit(this); + + // Link the DWO unit to this object, if it hasn't been linked already (this + // can happen when we have an index, and the DWO unit is parsed first). + if (!dwo_cu->LinkToSkeletonUnit(*this)) { + SetDwoError(Status::createWithFormat( + "multiple compile units with Dwo ID {0:x16}", *m_dwo_id)); + return; + } DWARFBaseDIE dwo_cu_die = dwo_cu->GetUnitDIEOnly(); if (!dwo_cu_die.IsValid()) { @@ -718,13 +720,11 @@ DWARFCompileUnit *DWARFUnit::GetSkeletonUnit() { return llvm::dyn_cast_or_null<DWARFCompileUnit>(m_skeleton_unit); } -void DWARFUnit::SetSkeletonUnit(DWARFUnit *skeleton_unit) { - // If someone is re-setting the skeleton compile unit backlink, make sure - // it is setting it to a valid value when it wasn't valid, or if the - // value in m_skeleton_unit was valid, it should be the same value. - assert(skeleton_unit); - assert(m_skeleton_unit == nullptr || m_skeleton_unit == skeleton_unit); - m_skeleton_unit = skeleton_unit; +bool DWARFUnit::LinkToSkeletonUnit(DWARFUnit &skeleton_unit) { + if (m_skeleton_unit && m_skeleton_unit != &skeleton_unit) + return false; + m_skeleton_unit = &skeleton_unit; + return true; } bool DWARFUnit::Supports_DW_AT_APPLE_objc_complete_type() { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index 85c37971ced8e..209104fe3a054 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -170,7 +170,7 @@ class DWARFUnit : public UserID { /// both cases correctly and avoids crashes. DWARFCompileUnit *GetSkeletonUnit(); - void SetSkeletonUnit(DWARFUnit *skeleton_unit); + bool LinkToSkeletonUnit(DWARFUnit &skeleton_unit); bool Supports_DW_AT_APPLE_objc_complete_type(); diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s new file mode 100644 index 0000000000000..d626b4602ad58 --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s @@ -0,0 +1,142 @@ +## Test that lldb handles (mainly, that it doesn't crash) the situation where +## two skeleton compile units have the same DWO ID (and try to claim the same +## split unit from the DWP file. This can sometimes happen when the compile unit +## is nearly empty (e.g. because LTO has optimized all of it away). + +# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s --defsym MAIN=0 > %t +# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s > %t.dwp +# RUN: %lldb %t -o "image lookup -t my_enum_type" \ +# RUN: -o "image dump separate-debug-info" -o exit | FileCheck %s + +## Check that we're able to access the type within the split unit (no matter +## which skeleton unit it ends up associated with). Completely ignoring the unit +## might also be reasonable. +# CHECK: image lookup -t my_enum_type +# CHECK: 1 match found +# CHECK: name = "my_enum_type", byte-size = 4, compiler_type = "enum my_enum_type { +# CHECK-NEXT: }" +# +## Check that we get some indication of the error. +# CHECK: image dump separate-debug-info +# CHECK: Dwo ID Err Dwo Path +# CHECK: 0xdeadbeefbaadf00d E multiple compile units with Dwo ID 0xdeadbeefbaadf00d + +.set DWO_ID, 0xdeadbeefbaadf00d + +## The main file. +.ifdef MAIN + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 74 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 0x76 # DW_AT_dwo_name + .byte 8 # DW_FORM_string + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + + .section .debug_info,"",@progbits +.irpc I,01 +.Lcu_begin\I: + .long .Ldebug_info_end\I-.Ldebug_info_start\I # Length of Unit +.Ldebug_info_start\I: + .short 5 # DWARF version number + .byte 4 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .quad DWO_ID # DWO id + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .ascii "foo" + .byte '0' + \I + .asciz ".dwo\0" # DW_AT_dwo_name +.Ldebug_info_end\I: +.endr + +.else +## DWP file starts here. + + .section .debug_abbrev.dwo,"e",@progbits +.LAbbrevBegin: + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 8 # DW_FORM_string + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 4 # DW_TAG_enumeration_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) +.LAbbrevEnd: + .section .debug_info.dwo,"e",@progbits +.LCUBegin: +.Lcu_begin1: + .long .Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit +.Ldebug_info_start1: + .short 5 # DWARF version number + .byte 5 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long 0 # Offset Into Abbrev. Section + .quad DWO_ID # DWO id + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .asciz "Hand-written DWARF" # DW_AT_producer + .short 12 # DW_AT_language + .byte 2 # Abbrev [2] DW_TAG_enumeration_type + .asciz "my_enum_type" # DW_AT_name + .long .Lint-.Lcu_begin1 # DW_AT_type + .byte 4 # DW_AT_byte_size +.Lint: + .byte 4 # Abbrev [4] DW_TAG_base_type + .asciz "int" # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end1: +.LCUEnd: + .section .debug_cu_index, "", @progbits +## Header: + .short 5 # Version + .short 0 # Padding + .long 2 # Section count + .long 1 # Unit count + .long 2 # Slot count +## Hash Table of Signatures: + .quad 0 + .quad DWO_ID +## Parallel Table of Indexes: + .long 0 + .long 1 +## Table of Section Offsets: +## Row 0: + .long 1 # DW_SECT_INFO + .long 3 # DW_SECT_ABBREV +## Row 1: + .long 0 # Offset in .debug_info.dwo + .long 0 # Offset in .debug_abbrev.dwo +## Table of Section Sizes: + .long .LCUEnd-.LCUBegin # Size in .debug_info.dwo + .long .LAbbrevEnd-.LAbbrevBegin # Size in .debug_abbrev.dwo +.endif `````````` </details> https://github.com/llvm/llvm-project/pull/100577 _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits