https://github.com/labath updated https://github.com/llvm/llvm-project/pull/117996
>From 78b8dabf1ada3d567d3f1a193fdf0cc0f159cb37 Mon Sep 17 00:00:00 2001 From: Pavel Labath <pa...@labath.sk> Date: Wed, 27 Nov 2024 17:05:23 +0100 Subject: [PATCH 1/2] [lldb] Use the function block as a source for function ranges This is a follow-up/reimplementation of #115730. While working on that patch, I did not realize that the correct (discontinuous) set of ranges is already stored in the block representing the whole function. The catch -- ranges for this block are only set later, when parsing all of the blocks of the function. This patch changes that by populating the function block ranges eagerly -- from within the Function constructor. This also necessitates a corresponding change in all of the symbol files -- so that they stop populating the ranges of that block. This allows us to avoid some unnecessary work (not parsing the function DW_AT_ranges twice) and also results in some simplification of the parsing code. --- lldb/include/lldb/Symbol/Function.h | 3 - .../Breakpad/SymbolFileBreakpad.cpp | 4 +- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 180 +++++++----------- .../SymbolFile/DWARF/SymbolFileDWARF.h | 3 +- .../NativePDB/SymbolFileNativePDB.cpp | 12 +- .../Plugins/SymbolFile/PDB/SymbolFilePDB.cpp | 53 ++---- lldb/source/Symbol/Function.cpp | 16 +- .../DWARF/x86/discontinuous-function.s | 2 +- .../SymbolFile/PDB/function-nested-block.test | 1 - 9 files changed, 111 insertions(+), 163 deletions(-) diff --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h index 70f51a846f8d96..e71dc2348b1734 100644 --- a/lldb/include/lldb/Symbol/Function.h +++ b/lldb/include/lldb/Symbol/Function.h @@ -650,9 +650,6 @@ class Function : public UserID, public SymbolContextScope { /// All lexical blocks contained in this function. Block m_block; - /// List of address ranges belonging to the function. - AddressRanges m_ranges; - /// The function address range that covers the widest range needed to contain /// all blocks. DEPRECATED: do not use this field in new code as the range may /// include addresses belonging to other functions. diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp index df3bf157278daf..bc886259d6fa5f 100644 --- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp +++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp @@ -299,9 +299,7 @@ size_t SymbolFileBreakpad::ParseBlocksRecursive(Function &func) { // "INLINE 0 ...", the current level is 0 and its parent block is the // function block at index 0. std::vector<Block *> blocks; - Block &block = func.GetBlock(false); - block.AddRange(Block::Range(0, func.GetAddressRange().GetByteSize())); - blocks.push_back(&block); + blocks.push_back(&func.GetBlock(false)); size_t blocks_added = 0; addr_t func_base = func.GetAddressRange().GetBaseAddress().GetOffset(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index fe711c56958c44..d814b635f2e5cd 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -1305,121 +1305,76 @@ bool SymbolFileDWARF::ParseDebugMacros(CompileUnit &comp_unit) { return true; } -size_t SymbolFileDWARF::ParseBlocksRecursive( - lldb_private::CompileUnit &comp_unit, Block *parent_block, - const DWARFDIE &orig_die, addr_t subprogram_low_pc, uint32_t depth) { +size_t SymbolFileDWARF::ParseBlocksRecursive(CompileUnit &comp_unit, + Block *parent_block, DWARFDIE die, + addr_t subprogram_low_pc) { size_t blocks_added = 0; - DWARFDIE die = orig_die; - while (die) { + for (; die; die = die.GetSibling()) { dw_tag_t tag = die.Tag(); - switch (tag) { - case DW_TAG_inlined_subroutine: - case DW_TAG_subprogram: - case DW_TAG_lexical_block: { - Block *block = nullptr; - if (tag == DW_TAG_subprogram) { - // Skip any DW_TAG_subprogram DIEs that are inside of a normal or - // inlined functions. These will be parsed on their own as separate - // entities. - - if (depth > 0) - break; + if (tag != DW_TAG_inlined_subroutine && tag != DW_TAG_lexical_block) + continue; - block = parent_block; - } else { - block = parent_block->CreateChild(die.GetID()).get(); - } - DWARFRangeList ranges; - const char *name = nullptr; - const char *mangled_name = nullptr; - - std::optional<int> decl_file; - std::optional<int> decl_line; - std::optional<int> decl_column; - std::optional<int> call_file; - std::optional<int> call_line; - std::optional<int> call_column; - if (die.GetDIENamesAndRanges(name, mangled_name, ranges, decl_file, - decl_line, decl_column, call_file, call_line, - call_column, nullptr)) { - if (tag == DW_TAG_subprogram) { - assert(subprogram_low_pc == LLDB_INVALID_ADDRESS); - subprogram_low_pc = ranges.GetMinRangeBase(0); - } else if (tag == DW_TAG_inlined_subroutine) { - // We get called here for inlined subroutines in two ways. The first - // time is when we are making the Function object for this inlined - // concrete instance. Since we're creating a top level block at - // here, the subprogram_low_pc will be LLDB_INVALID_ADDRESS. So we - // need to adjust the containing address. The second time is when we - // are parsing the blocks inside the function that contains the - // inlined concrete instance. Since these will be blocks inside the - // containing "real" function the offset will be for that function. - if (subprogram_low_pc == LLDB_INVALID_ADDRESS) { - subprogram_low_pc = ranges.GetMinRangeBase(0); - } - } - - const size_t num_ranges = ranges.GetSize(); - for (size_t i = 0; i < num_ranges; ++i) { - const DWARFRangeList::Entry &range = ranges.GetEntryRef(i); - const addr_t range_base = range.GetRangeBase(); - if (range_base >= subprogram_low_pc) - block->AddRange(Block::Range(range_base - subprogram_low_pc, - range.GetByteSize())); - else { - GetObjectFile()->GetModule()->ReportError( - "{0:x8}: adding range [{1:x16}-{2:x16}) which has a base " - "that is less than the function's low PC {3:x16}. Please file " - "a bug and attach the file at the " - "start of this error message", - block->GetID(), range_base, range.GetRangeEnd(), - subprogram_low_pc); - } - } - block->FinalizeRanges(); - - if (tag != DW_TAG_subprogram && - (name != nullptr || mangled_name != nullptr)) { - std::unique_ptr<Declaration> decl_up; - if (decl_file || decl_line || decl_column) - decl_up = std::make_unique<Declaration>( - comp_unit.GetSupportFiles().GetFileSpecAtIndex( - decl_file ? *decl_file : 0), - decl_line ? *decl_line : 0, decl_column ? *decl_column : 0); - - std::unique_ptr<Declaration> call_up; - if (call_file || call_line || call_column) - call_up = std::make_unique<Declaration>( - comp_unit.GetSupportFiles().GetFileSpecAtIndex( - call_file ? *call_file : 0), - call_line ? *call_line : 0, call_column ? *call_column : 0); - - block->SetInlinedFunctionInfo(name, mangled_name, decl_up.get(), - call_up.get()); + Block *block = parent_block->CreateChild(die.GetID()).get(); + DWARFRangeList ranges; + const char *name = nullptr; + const char *mangled_name = nullptr; + + std::optional<int> decl_file; + std::optional<int> decl_line; + std::optional<int> decl_column; + std::optional<int> call_file; + std::optional<int> call_line; + std::optional<int> call_column; + if (die.GetDIENamesAndRanges(name, mangled_name, ranges, decl_file, + decl_line, decl_column, call_file, call_line, + call_column, nullptr)) { + const size_t num_ranges = ranges.GetSize(); + for (size_t i = 0; i < num_ranges; ++i) { + const DWARFRangeList::Entry &range = ranges.GetEntryRef(i); + const addr_t range_base = range.GetRangeBase(); + if (range_base >= subprogram_low_pc) + block->AddRange(Block::Range(range_base - subprogram_low_pc, + range.GetByteSize())); + else { + GetObjectFile()->GetModule()->ReportError( + "{0:x8}: adding range [{1:x16}-{2:x16}) which has a base " + "that is less than the function's low PC {3:x16}. Please file " + "a bug and attach the file at the " + "start of this error message", + block->GetID(), range_base, range.GetRangeEnd(), + subprogram_low_pc); } + } + block->FinalizeRanges(); + + if (tag != DW_TAG_subprogram && + (name != nullptr || mangled_name != nullptr)) { + std::unique_ptr<Declaration> decl_up; + if (decl_file || decl_line || decl_column) + decl_up = std::make_unique<Declaration>( + comp_unit.GetSupportFiles().GetFileSpecAtIndex( + decl_file ? *decl_file : 0), + decl_line ? *decl_line : 0, decl_column ? *decl_column : 0); + + std::unique_ptr<Declaration> call_up; + if (call_file || call_line || call_column) + call_up = std::make_unique<Declaration>( + comp_unit.GetSupportFiles().GetFileSpecAtIndex( + call_file ? *call_file : 0), + call_line ? *call_line : 0, call_column ? *call_column : 0); + + block->SetInlinedFunctionInfo(name, mangled_name, decl_up.get(), + call_up.get()); + } - ++blocks_added; + ++blocks_added; - if (die.HasChildren()) { - blocks_added += - ParseBlocksRecursive(comp_unit, block, die.GetFirstChild(), - subprogram_low_pc, depth + 1); - } + if (die.HasChildren()) { + blocks_added += ParseBlocksRecursive( + comp_unit, block, die.GetFirstChild(), subprogram_low_pc); } - } break; - default: - break; } - - // Only parse siblings of the block if we are not at depth zero. A depth of - // zero indicates we are currently parsing the top level DW_TAG_subprogram - // DIE - - if (depth == 0) - die.Clear(); - else - die = die.GetSibling(); } return blocks_added; } @@ -3240,8 +3195,17 @@ size_t SymbolFileDWARF::ParseBlocksRecursive(Function &func) { DWARFDIE function_die = dwarf_cu->GetNonSkeletonUnit().GetDIE(function_die_offset); if (function_die) { - ParseBlocksRecursive(*comp_unit, &func.GetBlock(false), function_die, - LLDB_INVALID_ADDRESS, 0); + // We can't use the file address from the Function object as (in the OSO + // case) it will already be remapped to the main module. + DWARFRangeList ranges = + function_die.GetDIE()->GetAttributeAddressRanges( + function_die.GetCU(), + /*check_hi_lo_pc=*/true); + lldb::addr_t function_file_addr = + ranges.GetMinRangeBase(LLDB_INVALID_ADDRESS); + if (function_file_addr != LLDB_INVALID_ADDRESS) + ParseBlocksRecursive(*comp_unit, &func.GetBlock(false), + function_die.GetFirstChild(), function_file_addr); } return functions_added; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index ac25a0c48ee7d7..76f4188fdf4afb 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -395,8 +395,7 @@ class SymbolFileDWARF : public SymbolFileCommon { Function *ParseFunction(CompileUnit &comp_unit, const DWARFDIE &die); size_t ParseBlocksRecursive(CompileUnit &comp_unit, Block *parent_block, - const DWARFDIE &die, - lldb::addr_t subprogram_low_pc, uint32_t depth); + DWARFDIE die, lldb::addr_t subprogram_low_pc); size_t ParseTypes(const SymbolContext &sc, const DWARFDIE &die, bool parse_siblings, bool parse_children); diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index d17fedf26b4c48..27d51bbd1cb563 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -394,18 +394,12 @@ Block *SymbolFileNativePDB::CreateBlock(PdbCompilandSymId block_id) { switch (sym.kind()) { case S_GPROC32: - case S_LPROC32: { + case S_LPROC32: // This is a function. It must be global. Creating the Function entry // for it automatically creates a block for it. - FunctionSP func = GetOrCreateFunction(block_id, *comp_unit); - if (func) { - Block &block = func->GetBlock(false); - if (block.GetNumRanges() == 0) - block.AddRange(Block::Range(0, func->GetAddressRange().GetByteSize())); - return █ - } + if (FunctionSP func = GetOrCreateFunction(block_id, *comp_unit)) + return &func->GetBlock(false); break; - } case S_BLOCK32: { // This is a block. Its parent is either a function or another block. In // either case, its parent can be viewed as a block (e.g. a function diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index 4935b0fbdfd877..952c1eb35ccb56 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -402,44 +402,33 @@ static size_t ParseFunctionBlocksForPDBSymbol( assert(pdb_symbol && parent_block); size_t num_added = 0; - switch (pdb_symbol->getSymTag()) { - case PDB_SymType::Block: - case PDB_SymType::Function: { - Block *block = nullptr; - auto &raw_sym = pdb_symbol->getRawSymbol(); - if (auto *pdb_func = llvm::dyn_cast<PDBSymbolFunc>(pdb_symbol)) { - if (pdb_func->hasNoInlineAttribute()) - break; - if (is_top_parent) - block = parent_block; - else - break; - } else if (llvm::isa<PDBSymbolBlock>(pdb_symbol)) { - auto uid = pdb_symbol->getSymIndexId(); - if (parent_block->FindBlockByID(uid)) - break; - if (raw_sym.getVirtualAddress() < func_file_vm_addr) - break; - block = parent_block->CreateChild(pdb_symbol->getSymIndexId()).get(); - } else - llvm_unreachable("Unexpected PDB symbol!"); + if (!is_top_parent) { + // Ranges for the top block were parsed together with the function. + if (pdb_symbol->getSymTag() != PDB_SymType::Block) + return num_added; + auto &raw_sym = pdb_symbol->getRawSymbol(); + assert(llvm::isa<PDBSymbolBlock>(pdb_symbol)); + auto uid = pdb_symbol->getSymIndexId(); + if (parent_block->FindBlockByID(uid)) + return num_added; + if (raw_sym.getVirtualAddress() < func_file_vm_addr) + return num_added; + + Block *block = parent_block->CreateChild(pdb_symbol->getSymIndexId()).get(); block->AddRange(Block::Range( raw_sym.getVirtualAddress() - func_file_vm_addr, raw_sym.getLength())); block->FinalizeRanges(); - ++num_added; - auto results_up = pdb_symbol->findAllChildren(); - if (!results_up) - break; - while (auto symbol_up = results_up->getNext()) { - num_added += ParseFunctionBlocksForPDBSymbol( - func_file_vm_addr, symbol_up.get(), block, false); - } - } break; - default: - break; + } + auto results_up = pdb_symbol->findAllChildren(); + if (!results_up) + return num_added; + + while (auto symbol_up = results_up->getNext()) { + num_added += ParseFunctionBlocksForPDBSymbol(func_file_vm_addr, + symbol_up.get(), parent_block, false); } return num_added; } diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp index b346749ca06ec3..0c067a0126571d 100644 --- a/lldb/source/Symbol/Function.cpp +++ b/lldb/source/Symbol/Function.cpp @@ -279,9 +279,14 @@ Function::Function(CompileUnit *comp_unit, lldb::user_id_t func_uid, AddressRanges ranges) : UserID(func_uid), m_comp_unit(comp_unit), m_type_uid(type_uid), m_type(type), m_mangled(mangled), m_block(*this, func_uid), - m_ranges(std::move(ranges)), m_range(CollapseRanges(m_ranges)), - m_frame_base(), m_flags(), m_prologue_byte_size(0) { + m_range(CollapseRanges(ranges)), m_prologue_byte_size(0) { assert(comp_unit != nullptr); + lldb::addr_t base_file_addr = m_range.GetBaseAddress().GetFileAddress(); + for (const AddressRange &range: ranges) + m_block.AddRange( + Block::Range(range.GetBaseAddress().GetFileAddress() - base_file_addr, + range.GetByteSize())); + m_block.FinalizeRanges(); } Function::~Function() = default; @@ -426,13 +431,16 @@ void Function::GetDescription(Stream *s, lldb::DescriptionLevel level, llvm::interleaveComma(decl_context, *s, [&](auto &ctx) { ctx.Dump(*s); }); *s << "}"; } - *s << ", range" << (m_ranges.size() > 1 ? "s" : "") << " = "; + *s << ", range" << (m_block.GetNumRanges() > 1 ? "s" : "") << " = "; Address::DumpStyle fallback_style = level == eDescriptionLevelVerbose ? Address::DumpStyleModuleWithFileAddress : Address::DumpStyleFileAddress; - for (const AddressRange &range : m_ranges) + for (unsigned idx = 0; idx < m_block.GetNumRanges(); ++idx) { + AddressRange range; + m_block.GetRangeAtIndex(idx, range); range.Dump(s, target, Address::DumpStyleLoadAddress, fallback_style); + } } void Function::Dump(Stream *s, bool show_context) const { diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s b/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s index 2584158207cc8e..b03d5d12ad2a1d 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s @@ -10,7 +10,7 @@ # CHECK: 1 match found in {{.*}} # CHECK: Summary: {{.*}}`foo -# CHECK: Function: id = {{.*}}, name = "foo", ranges = [0x0000000000000000-0x0000000000000007)[0x0000000000000007-0x000000000000000e)[0x0000000000000014-0x000000000000001b)[0x000000000000001b-0x000000000000001c) +# CHECK: Function: id = {{.*}}, name = "foo", ranges = [0x0000000000000000-0x000000000000000e)[0x0000000000000014-0x000000000000001c) .text diff --git a/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test b/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test index 1cb20a40363827..9057d01c25840f 100644 --- a/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test +++ b/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test @@ -2,7 +2,6 @@ REQUIRES: system-windows, lld RUN: %build --compiler=clang-cl --nodefaultlib --output=%t.exe %S/Inputs/FunctionNestedBlockTest.cpp RUN: lldb-test symbols -find=function -file FunctionNestedBlockTest.cpp -line 4 %t.exe | FileCheck --check-prefix=CHECK-FUNCTION %s RUN: lldb-test symbols -find=block -file FunctionNestedBlockTest.cpp -line 4 %t.exe | FileCheck --check-prefix=CHECK-BLOCK %s -XFAIL: * CHECK-FUNCTION: Found 1 functions: CHECK-FUNCTION: name = "{{.*}}", mangled = "{{_?}}main" >From f2a1a02b1124a073db529a66e2410c501b458ba3 Mon Sep 17 00:00:00 2001 From: Pavel Labath <pa...@labath.sk> Date: Thu, 28 Nov 2024 12:53:04 +0100 Subject: [PATCH 2/2] reformat --- lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp | 7 +++---- lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp | 5 ++--- lldb/source/Symbol/Function.cpp | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index d814b635f2e5cd..6f19b264eb3dda 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -3197,10 +3197,9 @@ size_t SymbolFileDWARF::ParseBlocksRecursive(Function &func) { if (function_die) { // We can't use the file address from the Function object as (in the OSO // case) it will already be remapped to the main module. - DWARFRangeList ranges = - function_die.GetDIE()->GetAttributeAddressRanges( - function_die.GetCU(), - /*check_hi_lo_pc=*/true); + DWARFRangeList ranges = function_die.GetDIE()->GetAttributeAddressRanges( + function_die.GetCU(), + /*check_hi_lo_pc=*/true); lldb::addr_t function_file_addr = ranges.GetMinRangeBase(LLDB_INVALID_ADDRESS); if (function_file_addr != LLDB_INVALID_ADDRESS) diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index 952c1eb35ccb56..b7854c05d345a8 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -420,15 +420,14 @@ static size_t ParseFunctionBlocksForPDBSymbol( block->AddRange(Block::Range( raw_sym.getVirtualAddress() - func_file_vm_addr, raw_sym.getLength())); block->FinalizeRanges(); - } auto results_up = pdb_symbol->findAllChildren(); if (!results_up) return num_added; while (auto symbol_up = results_up->getNext()) { - num_added += ParseFunctionBlocksForPDBSymbol(func_file_vm_addr, - symbol_up.get(), parent_block, false); + num_added += ParseFunctionBlocksForPDBSymbol( + func_file_vm_addr, symbol_up.get(), parent_block, false); } return num_added; } diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp index 0c067a0126571d..4f07b946353a44 100644 --- a/lldb/source/Symbol/Function.cpp +++ b/lldb/source/Symbol/Function.cpp @@ -282,7 +282,7 @@ Function::Function(CompileUnit *comp_unit, lldb::user_id_t func_uid, m_range(CollapseRanges(ranges)), m_prologue_byte_size(0) { assert(comp_unit != nullptr); lldb::addr_t base_file_addr = m_range.GetBaseAddress().GetFileAddress(); - for (const AddressRange &range: ranges) + for (const AddressRange &range : ranges) m_block.AddRange( Block::Range(range.GetBaseAddress().GetFileAddress() - base_file_addr, range.GetByteSize())); _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits