Author: Jonas Devlieghere Date: 2025-08-14T10:01:41-05:00 New Revision: d0e40ff705cd746f118a118bd882af422bc026f8
URL: https://github.com/llvm/llvm-project/commit/d0e40ff705cd746f118a118bd882af422bc026f8 DIFF: https://github.com/llvm/llvm-project/commit/d0e40ff705cd746f118a118bd882af422bc026f8.diff LOG: [lldb] Support parsing data symbols from the Wasm name section (#153494) This PR adds support for parsing the data symbols from the WebAssembly name section, which consists of a name and address range for the segments in the Wasm data section. Unlike other object file formats, Wasm has no symbols for referencing items within those segments (i.e. symbols the user has defined). Added: Modified: lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml lldb/test/Shell/Symtab/symtab-wasm.test Removed: ################################################################################ diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp index 919cc21c32ffd..b3144f28f4913 100644 --- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp +++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp @@ -251,11 +251,11 @@ bool ObjectFileWasm::ParseHeader() { static llvm::Expected<std::vector<AddressRange>> ParseFunctions(SectionSP code_section_sp) { - DataExtractor code_section_data; - code_section_sp->GetSectionData(code_section_data); + DataExtractor data; + code_section_sp->GetSectionData(data); lldb::offset_t offset = 0; - const uint64_t function_count = code_section_data.GetULEB128(&offset); + const uint64_t function_count = data.GetULEB128(&offset); if (function_count > std::numeric_limits<uint32_t>::max()) return llvm::createStringError("function count overflows uint32_t"); @@ -263,7 +263,7 @@ ParseFunctions(SectionSP code_section_sp) { functions.reserve(function_count); for (uint32_t i = 0; i < function_count; ++i) { - const uint64_t function_size = code_section_data.GetULEB128(&offset); + const uint64_t function_size = data.GetULEB128(&offset); if (function_size > std::numeric_limits<uint32_t>::max()) return llvm::createStringError("function size overflows uint32_t"); // llvm-objdump considers the ULEB with the function size to be part of the @@ -281,9 +281,45 @@ ParseFunctions(SectionSP code_section_sp) { return functions; } +static llvm::Expected<std::vector<AddressRange>> +ParseData(SectionSP data_section_sp) { + DataExtractor data; + data_section_sp->GetSectionData(data); + + lldb::offset_t offset = 0; + + const uint64_t segment_count = data.GetULEB128(&offset); + if (segment_count > std::numeric_limits<uint32_t>::max()) + return llvm::createStringError("segment count overflows uint32_t"); + + std::vector<AddressRange> segments; + segments.reserve(segment_count); + + for (uint32_t i = 0; i < segment_count; ++i) { + const uint64_t flags = data.GetULEB128(&offset); + if (flags > std::numeric_limits<uint32_t>::max()) + return llvm::createStringError("segment flags overflows uint32_t"); + + const uint64_t segment_size = data.GetULEB128(&offset); + if (flags > std::numeric_limits<uint32_t>::max()) + return llvm::createStringError("segment size overflows uint32_t"); + + segments.emplace_back(data_section_sp, offset, segment_size); + + std::optional<lldb::offset_t> next_offset = + llvm::checkedAddUnsigned(offset, segment_size); + if (!next_offset) + return llvm::createStringError("segment offset overflows uint64_t"); + offset = *next_offset; + } + + return segments; +} + static llvm::Expected<std::vector<Symbol>> ParseNames(SectionSP name_section_sp, - const std::vector<AddressRange> &functions) { + const std::vector<AddressRange> &function_ranges, + const std::vector<AddressRange> &segment_ranges) { DataExtractor name_section_data; name_section_sp->GetSectionData(name_section_data); @@ -305,17 +341,34 @@ ParseNames(SectionSP name_section_sp, for (uint64_t i = 0; c && i < count; ++i) { const uint64_t idx = data.getULEB128(c); const std::optional<std::string> name = GetWasmString(data, c); - if (!name || idx >= functions.size()) + if (!name || idx >= function_ranges.size()) continue; symbols.emplace_back( symbols.size(), Mangled(*name), lldb::eSymbolTypeCode, /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false, - /*is_artificial=*/false, functions[idx], + /*is_artificial=*/false, function_ranges[idx], /*size_is_valid=*/true, /*contains_linker_annotations=*/false, /*flags=*/0); } } break; - case llvm::wasm::WASM_NAMES_DATA_SEGMENT: + case llvm::wasm::WASM_NAMES_DATA_SEGMENT: { + const uint64_t count = data.getULEB128(c); + if (count > std::numeric_limits<uint32_t>::max()) + return llvm::createStringError("data count overflows uint32_t"); + for (uint64_t i = 0; c && i < count; ++i) { + const uint64_t idx = data.getULEB128(c); + const std::optional<std::string> name = GetWasmString(data, c); + if (!name || idx >= segment_ranges.size()) + continue; + symbols.emplace_back( + symbols.size(), Mangled(*name), lldb::eSymbolTypeData, + /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false, + /*is_artificial=*/false, segment_ranges[idx], + /*size_is_valid=*/true, /*contains_linker_annotations=*/false, + /*flags=*/0); + } + + } break; case llvm::wasm::WASM_NAMES_GLOBAL: case llvm::wasm::WASM_NAMES_LOCAL: default: @@ -336,21 +389,35 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) { assert(m_sections_up && "sections must be parsed"); Log *log = GetLog(LLDBLog::Object); - // The name section contains names and indexes. First parse the functions from - // the code section so we can access them by their index. - SectionSP code_section_sp = - m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false); - if (!code_section_sp) { - LLDB_LOG(log, "Failed to parse Wasm symbol table: no functions section"); - return; + // The name section contains names and indexes. First parse the data from the + // relevant sections so we can access it by its index. + std::vector<AddressRange> function_ranges; + std::vector<AddressRange> segment_ranges; + + // Parse the code section. + if (SectionSP code_section_sp = + m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false)) { + llvm::Expected<std::vector<AddressRange>> functions = + ParseFunctions(code_section_sp); + if (!functions) { + LLDB_LOG_ERROR(log, functions.takeError(), + "Failed to parse Wasm code section: {0}"); + return; + } + function_ranges = *functions; } - llvm::Expected<std::vector<AddressRange>> functions = - ParseFunctions(code_section_sp); - if (!functions) { - LLDB_LOG_ERROR(log, functions.takeError(), - "Failed to parse Wasm functions: {0}"); - return; + // Parse the data section. + if (SectionSP data_section_sp = + m_sections_up->FindSectionByType(lldb::eSectionTypeData, false)) { + llvm::Expected<std::vector<AddressRange>> segments = + ParseData(data_section_sp); + if (!segments) { + LLDB_LOG_ERROR(log, segments.takeError(), + "Failed to parse Wasm data section: {0}"); + return; + } + segment_ranges = *segments; } // Parse the name section. @@ -362,7 +429,7 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) { } llvm::Expected<std::vector<Symbol>> symbols = - ParseNames(name_section_sp, *functions); + ParseNames(name_section_sp, function_ranges, segment_ranges); if (!symbols) { LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}"); return; @@ -408,6 +475,9 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { // For this reason Section::GetFileAddress() must return zero for the // Code section. vm_addr = 0; + } else if (llvm::wasm::WASM_SEC_DATA == sect_info.id) { + section_type = eSectionTypeData; + section_name = ConstString("data"); } else { section_type = GetSectionTypeFromName(sect_info.name.GetStringRef()); if (section_type == eSectionTypeOther) diff --git a/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml index 165bb53662f40..088d6163d6b0b 100644 --- a/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml +++ b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml @@ -1,3 +1,15 @@ +# clang -target wasm32 -nostdlib -Wl,--no-entry -Wl,--export-all -O0 -g -o simple.wasm simple.c +# char* str = "data str"; +# +# int add(int a, int b) { +# return a + b; +# } +# +# int main() { +# int i = 1; +# int j = 2; +# return add(i, j); +# } --- !WASM FileHeader: Version: 0x1 @@ -37,13 +49,13 @@ Sections: Mutable: true InitExpr: Opcode: I32_CONST - Value: 66560 + Value: 66576 - Index: 1 Type: I32 Mutable: false InitExpr: Opcode: I32_CONST - Value: 1024 + Value: 1036 - Index: 2 Type: I32 Mutable: false @@ -55,44 +67,50 @@ Sections: Mutable: false InitExpr: Opcode: I32_CONST - Value: 1024 + Value: 1040 - Index: 4 Type: I32 Mutable: false InitExpr: Opcode: I32_CONST - Value: 66560 + Value: 1040 - Index: 5 Type: I32 Mutable: false InitExpr: Opcode: I32_CONST - Value: 1024 + Value: 66576 - Index: 6 Type: I32 Mutable: false InitExpr: Opcode: I32_CONST - Value: 66560 + Value: 1024 - Index: 7 Type: I32 Mutable: false InitExpr: Opcode: I32_CONST - Value: 131072 + Value: 66576 - Index: 8 Type: I32 Mutable: false InitExpr: Opcode: I32_CONST - Value: 0 + Value: 131072 - Index: 9 Type: I32 Mutable: false InitExpr: Opcode: I32_CONST - Value: 1 + Value: 0 - Index: 10 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1 + - Index: 11 Type: I32 Mutable: false InitExpr: @@ -115,6 +133,9 @@ Sections: - Name: main Kind: FUNCTION Index: 3 + - Name: str + Kind: GLOBAL + Index: 1 - Name: __main_void Kind: FUNCTION Index: 2 @@ -123,34 +144,34 @@ Sections: Index: 0 - Name: __dso_handle Kind: GLOBAL - Index: 1 + Index: 2 - Name: __data_end Kind: GLOBAL - Index: 2 + Index: 3 - Name: __stack_low Kind: GLOBAL - Index: 3 + Index: 4 - Name: __stack_high Kind: GLOBAL - Index: 4 + Index: 5 - Name: __global_base Kind: GLOBAL - Index: 5 + Index: 6 - Name: __heap_base Kind: GLOBAL - Index: 6 + Index: 7 - Name: __heap_end Kind: GLOBAL - Index: 7 + Index: 8 - Name: __memory_base Kind: GLOBAL - Index: 8 + Index: 9 - Name: __table_base Kind: GLOBAL - Index: 9 + Index: 10 - Name: __wasm_first_page_end Kind: GLOBAL - Index: 10 + Index: 11 - Type: CODE Functions: - Index: 0 @@ -169,6 +190,20 @@ Sections: - Index: 3 Locals: [] Body: 1082808080000F0B + - Type: DATA + Segments: + - SectionOffset: 7 + InitFlags: 0 + Offset: + Opcode: I32_CONST + Value: 1024 + Content: '646174612073747200' + - SectionOffset: 22 + InitFlags: 0 + Offset: + Opcode: I32_CONST + Value: 1036 + Content: '00040000' - Type: CUSTOM Name: name FunctionNames: @@ -183,8 +218,17 @@ Sections: GlobalNames: - Index: 0 Name: __stack_pointer + DataSegmentNames: + - Index: 0 + Name: .rodata + - Index: 1 + Name: .data - Type: CUSTOM + HeaderSecSizeEncodingLen: 2 Name: producers + Languages: + - Name: C11 + Version: '' Tools: - Name: clang Version: '22.0.0git' diff --git a/lldb/test/Shell/Symtab/symtab-wasm.test b/lldb/test/Shell/Symtab/symtab-wasm.test index fc185cd81a0ec..5374b0c2f2892 100644 --- a/lldb/test/Shell/Symtab/symtab-wasm.test +++ b/lldb/test/Shell/Symtab/symtab-wasm.test @@ -1,7 +1,9 @@ # RUN: yaml2obj %S/Inputs/simple.wasm.yaml -o %t.wasm # RUN: %lldb %t.wasm -o 'image dump symtab' -# CHECK: Code 0x0000000000000002 {{.*}} __wasm_call_ctors -# CHECK: Code 0x0000000000000005 {{.*}} add -# CHECK: Code 0x000000000000002f {{.*}} __original_main -# CHECK: Code 0x000000000000007c {{.*}} main +# CHECK: Code 0x0000000000000002 0x0000000000000002 {{.*}} __wasm_call_ctors +# CHECK: Code 0x0000000000000005 0x0000000000000029 {{.*}} add +# CHECK: Code 0x000000000000002f 0x000000000000004c {{.*}} __original_main +# CHECK: Code 0x000000000000007c 0x0000000000000009 {{.*}} main +# CHECK: Data 0x000000000000022f 0x0000000000000041 {{.*}} .rodata +# CHECK: Data 0x0000000000000270 0x0000000000000000 {{.*}} .data _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits