https://github.com/JDevlieghere updated https://github.com/llvm/llvm-project/pull/153634
>From bfc57b337054bd1184b96baa1d59dd75a23a70c1 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere <jo...@devlieghere.com> Date: Thu, 14 Aug 2025 10:49:48 -0700 Subject: [PATCH 1/3] [lldb] Create sections fro Wasm segments This is a continuation of #153494. In a WebAssembly file, the "name" section contains names for the segments in the data section (WASM_NAMES_DATA_SEGMENT). We already parse these as sections, as with this PR, we also create sub-sections for the data segments. --- .../ObjectFile/wasm/ObjectFileWasm.cpp | 70 ++++++++++++++----- lldb/test/Shell/Symtab/symtab-wasm.test | 20 ++++-- 2 files changed, 64 insertions(+), 26 deletions(-) diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp index b3144f28f4913..dc0b0241d1f24 100644 --- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp +++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp @@ -281,7 +281,16 @@ ParseFunctions(SectionSP code_section_sp) { return functions; } -static llvm::Expected<std::vector<AddressRange>> +struct WasmSegment { + WasmSegment(SectionSP section_sp, lldb::offset_t offset, uint32_t size, + uint32_t flags) + : address_range(section_sp, offset, size), flags(flags) {}; + std::string name; + AddressRange address_range; + uint32_t flags = 0; +}; + +static llvm::Expected<std::vector<WasmSegment>> ParseData(SectionSP data_section_sp) { DataExtractor data; data_section_sp->GetSectionData(data); @@ -292,7 +301,7 @@ ParseData(SectionSP data_section_sp) { if (segment_count > std::numeric_limits<uint32_t>::max()) return llvm::createStringError("segment count overflows uint32_t"); - std::vector<AddressRange> segments; + std::vector<WasmSegment> segments; segments.reserve(segment_count); for (uint32_t i = 0; i < segment_count; ++i) { @@ -304,7 +313,7 @@ ParseData(SectionSP data_section_sp) { if (flags > std::numeric_limits<uint32_t>::max()) return llvm::createStringError("segment size overflows uint32_t"); - segments.emplace_back(data_section_sp, offset, segment_size); + segments.emplace_back(data_section_sp, offset, segment_size, flags); std::optional<lldb::offset_t> next_offset = llvm::checkedAddUnsigned(offset, segment_size); @@ -319,7 +328,7 @@ ParseData(SectionSP data_section_sp) { static llvm::Expected<std::vector<Symbol>> ParseNames(SectionSP name_section_sp, const std::vector<AddressRange> &function_ranges, - const std::vector<AddressRange> &segment_ranges) { + std::vector<WasmSegment> &segments) { DataExtractor name_section_data; name_section_sp->GetSectionData(name_section_data); @@ -358,12 +367,14 @@ ParseNames(SectionSP name_section_sp, for (uint64_t i = 0; c && i < count; ++i) { const uint64_t idx = data.getULEB128(c); const std::optional<std::string> name = GetWasmString(data, c); - if (!name || idx >= segment_ranges.size()) + if (!name || idx >= segments.size()) continue; + // Update the segment name. + segments[i].name = *name; symbols.emplace_back( symbols.size(), Mangled(*name), lldb::eSymbolTypeData, /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false, - /*is_artificial=*/false, segment_ranges[idx], + /*is_artificial=*/false, segments[i].address_range, /*size_is_valid=*/true, /*contains_linker_annotations=*/false, /*flags=*/0); } @@ -391,33 +402,34 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) { // The name section contains names and indexes. First parse the data from the // relevant sections so we can access it by its index. - std::vector<AddressRange> function_ranges; - std::vector<AddressRange> segment_ranges; + std::vector<AddressRange> functions; + std::vector<WasmSegment> segments; // Parse the code section. if (SectionSP code_section_sp = m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false)) { - llvm::Expected<std::vector<AddressRange>> functions = + llvm::Expected<std::vector<AddressRange>> maybe_functions = ParseFunctions(code_section_sp); - if (!functions) { - LLDB_LOG_ERROR(log, functions.takeError(), + if (!maybe_functions) { + LLDB_LOG_ERROR(log, maybe_functions.takeError(), "Failed to parse Wasm code section: {0}"); return; } - function_ranges = *functions; + functions = *maybe_functions; } // Parse the data section. - if (SectionSP data_section_sp = - m_sections_up->FindSectionByType(lldb::eSectionTypeData, false)) { - llvm::Expected<std::vector<AddressRange>> segments = + SectionSP data_section_sp = + m_sections_up->FindSectionByType(lldb::eSectionTypeData, false); + if (data_section_sp) { + llvm::Expected<std::vector<WasmSegment>> maybe_segments = ParseData(data_section_sp); - if (!segments) { - LLDB_LOG_ERROR(log, segments.takeError(), + if (!maybe_segments) { + LLDB_LOG_ERROR(log, maybe_segments.takeError(), "Failed to parse Wasm data section: {0}"); return; } - segment_ranges = *segments; + segments = *maybe_segments; } // Parse the name section. @@ -429,7 +441,7 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) { } llvm::Expected<std::vector<Symbol>> symbols = - ParseNames(name_section_sp, function_ranges, segment_ranges); + ParseNames(name_section_sp, functions, segments); if (!symbols) { LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}"); return; @@ -438,6 +450,26 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) { for (const Symbol &symbol : *symbols) symtab.AddSymbol(symbol); + lldb::user_id_t segment_id = 0; + for (const WasmSegment &segment : segments) { + const lldb::addr_t segment_addr = + segment.address_range.GetBaseAddress().GetFileAddress(); + const size_t segment_size = segment.address_range.GetByteSize(); + SectionSP segment_sp = std::make_shared<Section>( + /*parent_section_sp=*/data_section_sp, GetModule(), + /*obj_file=*/this, + ++segment_id << 8, // 1-based segment index, shifted by 8 bits to avoid + // collision with section IDs. + ConstString(segment.name), eSectionTypeData, + /*file_vm_addr=*/segment_addr, + /*vm_size=*/segment_size, + /*file_offset=*/segment_addr, + /*file_size=*/segment_size, + /*log2align=*/0, segment.flags); + m_sections_up->AddSection(segment_sp); + GetModule()->GetSectionList()->AddSection(segment_sp); + } + symtab.Finalize(); } diff --git a/lldb/test/Shell/Symtab/symtab-wasm.test b/lldb/test/Shell/Symtab/symtab-wasm.test index 5374b0c2f2892..5e7c7cabc5280 100644 --- a/lldb/test/Shell/Symtab/symtab-wasm.test +++ b/lldb/test/Shell/Symtab/symtab-wasm.test @@ -1,9 +1,15 @@ # RUN: yaml2obj %S/Inputs/simple.wasm.yaml -o %t.wasm -# RUN: %lldb %t.wasm -o 'image dump symtab' +# RUN: %lldb %t.wasm -o 'image dump symtab' -o 'image dump sections' | FileCheck %s -# CHECK: Code 0x0000000000000002 0x0000000000000002 {{.*}} __wasm_call_ctors -# CHECK: Code 0x0000000000000005 0x0000000000000029 {{.*}} add -# CHECK: Code 0x000000000000002f 0x000000000000004c {{.*}} __original_main -# CHECK: Code 0x000000000000007c 0x0000000000000009 {{.*}} main -# CHECK: Data 0x000000000000022f 0x0000000000000041 {{.*}} .rodata -# CHECK: Data 0x0000000000000270 0x0000000000000000 {{.*}} .data +CHECK: Code 0x0000000000000002 0x0000000000000002 0x00000000 __wasm_call_ctors +CHECK: Code 0x0000000000000005 0x0000000000000029 0x00000000 add +CHECK: Code 0x000000000000002f 0x000000000000004c 0x00000000 __original_main +CHECK: Code 0x000000000000007c 0x0000000000000009 0x00000000 main +CHECK: Data 0x000000000000022f 0x0000000000000041 0x00000000 .rodata +CHECK: Data 0x0000000000000270 0x0000000000000000 0x00000000 .data + +CHECK: 0x0000000000000001 code {{.*}} 0x000001a1 0x00000085 0x00000000 symtab-wasm.test.tmp.wasm.code +CHECK: 0x0000000000000003 data {{.*}} 0x0000022c 0x0000001a 0x00000000 symtab-wasm.test.tmp.wasm.data +CHECK: 0x0000000000000040 wasm-name {{.*}} 0x00000251 0x00000059 0x00000000 symtab-wasm.test.tmp.wasm.name +CHECK: 0x0000000000000100 data {{.*}} 0x0000022f 0x00000041 0x00000000 symtab-wasm.test.tmp.wasm.data..rodata +CHECK: 0x0000000000000200 data {{.*}} 0x00000270 0x00000000 0x00000000 symtab-wasm.test.tmp.wasm.data..data >From 83cd4e1512c652567fa4c3deb342774dbce6d2c9 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere <jo...@devlieghere.com> Date: Sun, 17 Aug 2025 10:48:12 -0700 Subject: [PATCH 2/3] Check segment_size, not flags --- lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp index dc0b0241d1f24..f1f34a7fe7fbf 100644 --- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp +++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp @@ -310,7 +310,7 @@ ParseData(SectionSP data_section_sp) { return llvm::createStringError("segment flags overflows uint32_t"); const uint64_t segment_size = data.GetULEB128(&offset); - if (flags > std::numeric_limits<uint32_t>::max()) + if (segment_size > std::numeric_limits<uint32_t>::max()) return llvm::createStringError("segment size overflows uint32_t"); segments.emplace_back(data_section_sp, offset, segment_size, flags); >From 83ae0f619a30f4c309a61389e0f5ca1b4cbda48a Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere <jo...@devlieghere.com> Date: Mon, 18 Aug 2025 08:08:30 -0700 Subject: [PATCH 3/3] Support 'active' data segments --- .../ObjectFile/wasm/ObjectFileWasm.cpp | 26 ++++++++++++++----- lldb/test/Shell/Symtab/symtab-wasm.test | 8 +++--- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp index f1f34a7fe7fbf..a000b34fbb549 100644 --- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp +++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp @@ -282,12 +282,10 @@ ParseFunctions(SectionSP code_section_sp) { } struct WasmSegment { - WasmSegment(SectionSP section_sp, lldb::offset_t offset, uint32_t size, - uint32_t flags) - : address_range(section_sp, offset, size), flags(flags) {}; + WasmSegment(SectionSP section_sp, lldb::offset_t offset, uint32_t size) + : address_range(section_sp, offset, size) {}; std::string name; AddressRange address_range; - uint32_t flags = 0; }; static llvm::Expected<std::vector<WasmSegment>> @@ -309,11 +307,27 @@ ParseData(SectionSP data_section_sp) { if (flags > std::numeric_limits<uint32_t>::max()) return llvm::createStringError("segment flags overflows uint32_t"); + // Data segments have a mode that identifies them as either passive or + // active. An active data segment copies its contents into a memory during + // instantiation, as specified by a memory index and a constant expression + // defining an offset into that memory. + if (flags & llvm::wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) { + const uint64_t memidx = data.GetULEB128(&offset); + if (memidx > std::numeric_limits<uint32_t>::max()) + return llvm::createStringError("memidx overflows uint32_t"); + } + + if ((flags & llvm::wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) { + // Skip over the constant expression. + for (uint8_t b = 0; b != llvm::wasm::WASM_OPCODE_END;) + b = data.GetU8(&offset); + } + const uint64_t segment_size = data.GetULEB128(&offset); if (segment_size > std::numeric_limits<uint32_t>::max()) return llvm::createStringError("segment size overflows uint32_t"); - segments.emplace_back(data_section_sp, offset, segment_size, flags); + segments.emplace_back(data_section_sp, offset, segment_size); std::optional<lldb::offset_t> next_offset = llvm::checkedAddUnsigned(offset, segment_size); @@ -465,7 +479,7 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) { /*vm_size=*/segment_size, /*file_offset=*/segment_addr, /*file_size=*/segment_size, - /*log2align=*/0, segment.flags); + /*log2align=*/0, /*flags=*/0); m_sections_up->AddSection(segment_sp); GetModule()->GetSectionList()->AddSection(segment_sp); } diff --git a/lldb/test/Shell/Symtab/symtab-wasm.test b/lldb/test/Shell/Symtab/symtab-wasm.test index 5e7c7cabc5280..4170d9aba9eea 100644 --- a/lldb/test/Shell/Symtab/symtab-wasm.test +++ b/lldb/test/Shell/Symtab/symtab-wasm.test @@ -5,11 +5,11 @@ CHECK: Code 0x0000000000000002 0x0000000000000002 0x00000000 __wasm_call_ctors CHECK: Code 0x0000000000000005 0x0000000000000029 0x00000000 add CHECK: Code 0x000000000000002f 0x000000000000004c 0x00000000 __original_main CHECK: Code 0x000000000000007c 0x0000000000000009 0x00000000 main -CHECK: Data 0x000000000000022f 0x0000000000000041 0x00000000 .rodata -CHECK: Data 0x0000000000000270 0x0000000000000000 0x00000000 .data +CHECK: Data 0x0000000000000233 0x0000000000000009 0x00000000 .rodata +CHECK: Data 0x0000000000000242 0x0000000000000004 0x00000000 .data CHECK: 0x0000000000000001 code {{.*}} 0x000001a1 0x00000085 0x00000000 symtab-wasm.test.tmp.wasm.code CHECK: 0x0000000000000003 data {{.*}} 0x0000022c 0x0000001a 0x00000000 symtab-wasm.test.tmp.wasm.data CHECK: 0x0000000000000040 wasm-name {{.*}} 0x00000251 0x00000059 0x00000000 symtab-wasm.test.tmp.wasm.name -CHECK: 0x0000000000000100 data {{.*}} 0x0000022f 0x00000041 0x00000000 symtab-wasm.test.tmp.wasm.data..rodata -CHECK: 0x0000000000000200 data {{.*}} 0x00000270 0x00000000 0x00000000 symtab-wasm.test.tmp.wasm.data..data +CHECK: 0x0000000000000100 data {{.*}} 0x00000233 0x00000009 0x00000000 symtab-wasm.test.tmp.wasm.data..rodata +CHECK: 0x0000000000000200 data {{.*}} 0x00000242 0x00000004 0x00000000 symtab-wasm.test.tmp.wasm.data..data _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits