Author: Fangrui Song Date: 2020-08-26T15:35:47+02:00 New Revision: 21d01a67c9613932053dd89c9957782f86e0c93f
URL: https://github.com/llvm/llvm-project/commit/21d01a67c9613932053dd89c9957782f86e0c93f DIFF: https://github.com/llvm/llvm-project/commit/21d01a67c9613932053dd89c9957782f86e0c93f.diff LOG: [ELF] --gdb-index: skip SHF_GROUP .debug_info -gdwarf-5 -fdebug-types-section may produce multiple .debug_info sections. All except one are type units (.debug_types before DWARF v5). When constructing .gdb_index, we should ignore these type units. We use a simple heuristic: the compile unit does not have the SHF_GROUP flag. (This needs to be revisited if people place compile unit .debug_info in COMDAT groups.) This issue manifests as a data race: because an object file may have multiple .debug_info sections, we may concurrently construct `LLDDwarfObj` for the same file in multiple threads. The threads may access `InputSectionBase::data()` concurrently on the same input section. `InputSectionBase::data()` does a lazy uncompress() and rewrites the member variable `rawData`. A thread running zlib `inflate()` (transitively called by uncompress()) on a buffer with `rawData` tampered by another thread may fail with `uncompress failed: zlib error: Z_DATA_ERROR`. Even if no data race occurred in an optimistic run, if there are N .debug_info, one CU entry and its address ranges will be replicated N times. The result .gdb_index can be much larger than a correct one. The new test gdb-index-dwarf5-type-unit.s actually has two compile units. This cannot be produced with regular approaches (it can be produced with -r --unique). This is used to demonstrate that the .gdb_index construction code only considers the last non-SHF_GROUP .debug_info Reviewed By: grimar Differential Revision: https://reviews.llvm.org/D85579 (cherry picked from commit fb141292f4411448af41fc454c07f3903acb84dd) Added: lld/test/ELF/gdb-index-dwarf5-type-unit.s Modified: lld/ELF/DWARF.cpp lld/ELF/DWARF.h lld/ELF/SyntheticSections.cpp Removed: ################################################################################ diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp index 24c44730bf64..5767f6020f93 100644 --- a/lld/ELF/DWARF.cpp +++ b/lld/ELF/DWARF.cpp @@ -26,7 +26,12 @@ using namespace lld; using namespace lld::elf; template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) { - for (InputSectionBase *sec : obj->getSections()) { + // Get the ELF sections to retrieve sh_flags. See the SHF_GROUP comment below. + ArrayRef<typename ELFT::Shdr> objSections = + CHECK(obj->getObj().sections(), obj); + assert(objSections.size() == obj->getSections().size()); + for (auto it : llvm::enumerate(obj->getSections())) { + InputSectionBase *sec = it.value(); if (!sec) continue; @@ -35,7 +40,6 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) { .Case(".debug_addr", &addrSection) .Case(".debug_gnu_pubnames", &gnuPubnamesSection) .Case(".debug_gnu_pubtypes", &gnuPubtypesSection) - .Case(".debug_info", &infoSection) .Case(".debug_loclists", &loclistsSection) .Case(".debug_ranges", &rangesSection) .Case(".debug_rnglists", &rnglistsSection) @@ -53,6 +57,20 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) { strSection = toStringRef(sec->data()); else if (sec->name == ".debug_line_str") lineStrSection = toStringRef(sec->data()); + else if (sec->name == ".debug_info" && + !(objSections[it.index()].sh_flags & ELF::SHF_GROUP)) { + // In DWARF v5, -fdebug-types-section places type units in .debug_info + // sections in COMDAT groups. They are not compile units and thus should + // be ignored for .gdb_index/diagnostics purposes. + // + // We use a simple heuristic: the compile unit does not have the SHF_GROUP + // flag. If we place compile units in COMDAT groups in the future, we may + // need to perform a lightweight parsing. We drop the SHF_GROUP flag when + // the InputSection was created, so we need to retrieve sh_flags from the + // associated ELF section header. + infoSection.Data = toStringRef(sec->data()); + infoSection.sec = sec; + } } } diff --git a/lld/ELF/DWARF.h b/lld/ELF/DWARF.h index a12dae6e9960..900c63de26ff 100644 --- a/lld/ELF/DWARF.h +++ b/lld/ELF/DWARF.h @@ -32,6 +32,10 @@ template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject { f(infoSection); } + InputSection *getInfoSection() const { + return cast<InputSection>(infoSection.sec); + } + const llvm::DWARFSection &getLoclistsSection() const override { return loclistsSection; } diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 731b9f658060..09f771d12359 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -28,6 +28,7 @@ #include "lld/Common/Strings.h" #include "lld/Common/Version.h" #include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" @@ -2653,15 +2654,6 @@ void GdbIndexSection::initOutputSize() { } } -static std::vector<InputSection *> getDebugInfoSections() { - std::vector<InputSection *> ret; - for (InputSectionBase *s : inputSections) - if (InputSection *isec = dyn_cast<InputSection>(s)) - if (isec->name == ".debug_info") - ret.push_back(isec); - return ret; -} - static std::vector<GdbIndexSection::CuEntry> readCuList(DWARFContext &dwarf) { std::vector<GdbIndexSection::CuEntry> ret; for (std::unique_ptr<DWARFUnit> &cu : dwarf.compile_units()) @@ -2815,30 +2807,40 @@ createSymbols(ArrayRef<std::vector<GdbIndexSection::NameAttrEntry>> nameAttrs, // Returns a newly-created .gdb_index section. template <class ELFT> GdbIndexSection *GdbIndexSection::create() { - std::vector<InputSection *> sections = getDebugInfoSections(); - - // .debug_gnu_pub{names,types} are useless in executables. - // They are present in input object files solely for creating - // a .gdb_index. So we can remove them from the output. - for (InputSectionBase *s : inputSections) + // Collect InputFiles with .debug_info. See the comment in + // LLDDwarfObj<ELFT>::LLDDwarfObj. If we do lightweight parsing in the future, + // note that isec->data() may uncompress the full content, which should be + // parallelized. + SetVector<InputFile *> files; + for (InputSectionBase *s : inputSections) { + InputSection *isec = dyn_cast<InputSection>(s); + if (!isec) + continue; + // .debug_gnu_pub{names,types} are useless in executables. + // They are present in input object files solely for creating + // a .gdb_index. So we can remove them from the output. if (s->name == ".debug_gnu_pubnames" || s->name == ".debug_gnu_pubtypes") s->markDead(); + else if (isec->name == ".debug_info") + files.insert(isec->file); + } - std::vector<GdbChunk> chunks(sections.size()); - std::vector<std::vector<NameAttrEntry>> nameAttrs(sections.size()); + std::vector<GdbChunk> chunks(files.size()); + std::vector<std::vector<NameAttrEntry>> nameAttrs(files.size()); - parallelForEachN(0, sections.size(), [&](size_t i) { + parallelForEachN(0, files.size(), [&](size_t i) { // To keep memory usage low, we don't want to keep cached DWARFContext, so // avoid getDwarf() here. - ObjFile<ELFT> *file = sections[i]->getFile<ELFT>(); + ObjFile<ELFT> *file = cast<ObjFile<ELFT>>(files[i]); DWARFContext dwarf(std::make_unique<LLDDwarfObj<ELFT>>(file)); + auto &dobj = static_cast<const LLDDwarfObj<ELFT> &>(dwarf.getDWARFObj()); - chunks[i].sec = sections[i]; + // If the are multiple compile units .debug_info (very rare ld -r --unique), + // this only picks the last one. Other address ranges are lost. + chunks[i].sec = dobj.getInfoSection(); chunks[i].compilationUnits = readCuList(dwarf); - chunks[i].addressAreas = readAddressAreas(dwarf, sections[i]); - nameAttrs[i] = readPubNamesAndTypes<ELFT>( - static_cast<const LLDDwarfObj<ELFT> &>(dwarf.getDWARFObj()), - chunks[i].compilationUnits); + chunks[i].addressAreas = readAddressAreas(dwarf, chunks[i].sec); + nameAttrs[i] = readPubNamesAndTypes<ELFT>(dobj, chunks[i].compilationUnits); }); auto *ret = make<GdbIndexSection>(); diff --git a/lld/test/ELF/gdb-index-dwarf5-type-unit.s b/lld/test/ELF/gdb-index-dwarf5-type-unit.s new file mode 100644 index 000000000000..5cd6778fe7e4 --- /dev/null +++ b/lld/test/ELF/gdb-index-dwarf5-type-unit.s @@ -0,0 +1,93 @@ +# REQUIRES: x86, zlib +## -gdwarf-5 -fdebug-types-section may produce multiple .debug_info sections. +## All except one are type units. Test we can locate the compile unit, add it to +## the index, and not erroneously duplicate it (which would happen if we +## consider every .debug_info a compile unit). + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld --gdb-index -Ttext=0x1000 %t.o -o %t +# RUN: llvm-dwarfdump --gdb-index %t | FileCheck %s + +## Test we don't uncompress a section while another thread is concurrently +## accessing it. This would be detected by tsan as a data race. +# RUN: llvm-objcopy --compress-debug-sections %t.o +# RUN: ld.lld --gdb-index -Ttext=0x1000 %t.o -o %t1 +# RUN: llvm-dwarfdump --gdb-index %t1 | FileCheck %s + +## In this test, there are actually two compile unit .debug_info (very uncommon; +## -r --unique). Currently we only handle the last compile unit. +# CHECK: CU list offset = 0x18, has 1 entries: +# CHECK-NEXT: 0: Offset = 0x32, Length = 0x19 + +# CHECK: Address area offset = 0x28, has 1 entries: +# CHECK-NEXT: Low/High address = [0x1001, 0x1002) (Size: 0x1), CU id = 0 + +.Lfunc_begin0: + ret +.Lfunc_end0: +.Lfunc_begin1: + ret +.Lfunc_end1: + +.section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 65 # DW_TAG_type_unit + .byte 0 # DW_CHILDREN_no + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 2 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + + .byte 0 # EOM(3) + +.macro TYPE_UNIT id signature +.section .debug_info,"G",@progbits,\signature + .long .Ldebug_info_end\id-.Ldebug_info_start\id # Length of Unit +.Ldebug_info_start\id: + .short 5 # DWARF version number + .byte 2 # DWARF Unit Type + .byte 8 # Address Size + .long .debug_abbrev # Offset Into Abbrev. Section + .quad \signature # Type Signature + .long .Ldebug_info_end\id # Type DIE Offset + .byte 1 # Abbrev [1] DW_TAG_type_unit +.Ldebug_info_end\id: +.endm + +## We place compile units between two type units (rare). A naive approach will +## take either the first or the last .debug_info +TYPE_UNIT 0, 123 + +.section .debug_info,"",@progbits,unique,0 +.Lcu_begin0: + .long .Lcu_end0-.Lcu_begin0-4 # Length of Unit + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 2 # Abbrev [2] DW_TAG_compile_unit + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc +.Lcu_end0: + +.section .debug_info,"",@progbits,unique,1 +.Lcu_begin1: + .long .Lcu_end1-.Lcu_begin1-4 # Length of Unit + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 2 # Abbrev [2] DW_TAG_compile_unit + .quad .Lfunc_begin1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc +.Lcu_end1: + +TYPE_UNIT 1, 456 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits