Author: Ely Ronnen Date: 2025-05-21T06:50:13+02:00 New Revision: 9d0614e77ab1c5e2264e07d9b4b5f9780b38781c
URL: https://github.com/llvm/llvm-project/commit/9d0614e77ab1c5e2264e07d9b4b5f9780b38781c DIFF: https://github.com/llvm/llvm-project/commit/9d0614e77ab1c5e2264e07d9b4b5f9780b38781c.diff LOG: [lldb-dap] fix disassembly request instruction offset handling (#140486) Fix the handling of the `instructionOffset` parameter, which resulted in always returning the wrong disassembly because VSCode always uses `instructionOffset = -50` and expects 50 instructions before the given address, instead of 50 bytes before Added: Modified: lldb/include/lldb/API/SBTarget.h lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py lldb/source/API/SBTarget.cpp lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py lldb/test/API/tools/lldb-dap/disassemble/main.c lldb/test/API/tools/lldb-dap/instruction-breakpoint/TestDAP_instruction_breakpoint.py lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp Removed: ################################################################################ diff --git a/lldb/include/lldb/API/SBTarget.h b/lldb/include/lldb/API/SBTarget.h index 17735fdca6559..2776a8f9010fe 100644 --- a/lldb/include/lldb/API/SBTarget.h +++ b/lldb/include/lldb/API/SBTarget.h @@ -349,6 +349,18 @@ class LLDB_API SBTarget { SBError SetLabel(const char *label); + /// Architecture opcode byte size width accessor + /// + /// \return + /// The minimum size in 8-bit (host) bytes of an opcode. + uint32_t GetMinimumOpcodeByteSize() const; + + /// Architecture opcode byte size width accessor + /// + /// \return + /// The maximum size in 8-bit (host) bytes of an opcode. + uint32_t GetMaximumOpcodeByteSize() const; + /// Architecture data byte width accessor /// /// \return diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index 70fd0b0c419db..9937618a2cf54 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -136,7 +136,6 @@ def __init__( self.initialized = False self.frame_scopes = {} self.init_commands = init_commands - self.disassembled_instructions = {} @classmethod def encode_content(cls, s: str) -> bytes: @@ -735,11 +734,15 @@ def request_disconnect(self, terminateDebuggee=None): return self.send_recv(command_dict) def request_disassemble( - self, memoryReference, offset=-50, instructionCount=200, resolveSymbols=True + self, + memoryReference, + instructionOffset=-50, + instructionCount=200, + resolveSymbols=True, ): args_dict = { "memoryReference": memoryReference, - "offset": offset, + "instructionOffset": instructionOffset, "instructionCount": instructionCount, "resolveSymbols": resolveSymbols, } @@ -748,9 +751,7 @@ def request_disassemble( "type": "request", "arguments": args_dict, } - instructions = self.send_recv(command_dict)["body"]["instructions"] - for inst in instructions: - self.disassembled_instructions[inst["address"]] = inst + return self.send_recv(command_dict)["body"]["instructions"] def request_readMemory(self, memoryReference, offset, count): args_dict = { diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py index afdc746ed0d0d..4028ae4a2525f 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py @@ -346,10 +346,14 @@ def disassemble(self, threadId=None, frameIndex=None): memoryReference = stackFrames[0]["instructionPointerReference"] self.assertIsNotNone(memoryReference) - if memoryReference not in self.dap_server.disassembled_instructions: - self.dap_server.request_disassemble(memoryReference=memoryReference) + instructions = self.dap_server.request_disassemble( + memoryReference=memoryReference + ) + disassembled_instructions = {} + for inst in instructions: + disassembled_instructions[inst["address"]] = inst - return self.dap_server.disassembled_instructions[memoryReference] + return disassembled_instructions, disassembled_instructions[memoryReference] def attach( self, diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp index b42ada42b0931..cd8a770a0ec04 100644 --- a/lldb/source/API/SBTarget.cpp +++ b/lldb/source/API/SBTarget.cpp @@ -1668,6 +1668,26 @@ SBError SBTarget::SetLabel(const char *label) { return Status::FromError(target_sp->SetLabel(label)); } +uint32_t SBTarget::GetMinimumOpcodeByteSize() const { + LLDB_INSTRUMENT_VA(this); + + TargetSP target_sp(GetSP()); + if (target_sp) + return target_sp->GetArchitecture().GetMinimumOpcodeByteSize(); + + return 0; +} + +uint32_t SBTarget::GetMaximumOpcodeByteSize() const { + LLDB_INSTRUMENT_VA(this); + + TargetSP target_sp(GetSP()); + if (target_sp) + return target_sp->GetArchitecture().GetMaximumOpcodeByteSize(); + + return 0; +} + uint32_t SBTarget::GetDataByteSize() { LLDB_INSTRUMENT_VA(this); diff --git a/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py b/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py index 9e8ef5b289f2e..a045dfd32ef2d 100644 --- a/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py +++ b/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py @@ -20,21 +20,56 @@ def test_disassemble(self): program = self.getBuildArtifact("a.out") self.build_and_launch(program) source = "main.c" - self.source_path = os.path.join(os.getcwd(), source) - self.set_source_breakpoints( - source, - [ - line_number(source, "// breakpoint 1"), - ], - ) + self.set_source_breakpoints(source, [line_number(source, "// breakpoint 1")]) self.continue_to_next_stop() - pc_assembly = self.disassemble(frameIndex=0) + _, pc_assembly = self.disassemble(frameIndex=0) self.assertIn("location", pc_assembly, "Source location missing.") self.assertIn("instruction", pc_assembly, "Assembly instruction missing.") # The calling frame (qsort) is coming from a system library, as a result # we should not have a source location. - qsort_assembly = self.disassemble(frameIndex=1) + _, qsort_assembly = self.disassemble(frameIndex=1) self.assertNotIn("location", qsort_assembly, "Source location not expected.") self.assertIn("instruction", pc_assembly, "Assembly instruction missing.") + + def test_disassemble_backwards(self): + """ + Tests the 'disassemble' request with a backwards disassembly range. + """ + program = self.getBuildArtifact("a.out") + self.build_and_launch(program) + source = "main.c" + self.set_source_breakpoints(source, [line_number(source, "// breakpoint 1")]) + self.continue_to_next_stop() + + instruction_pointer_reference = self.get_stackFrames()[1][ + "instructionPointerReference" + ] + backwards_instructions = 200 + instructions_count = 400 + instructions = self.dap_server.request_disassemble( + memoryReference=instruction_pointer_reference, + instructionOffset=-backwards_instructions, + instructionCount=instructions_count, + ) + + self.assertEqual( + len(instructions), + instructions_count, + "Disassemble request should return the exact requested number of instructions.", + ) + + frame_instruction_index = next( + ( + i + for i, instruction in enumerate(instructions) + if instruction["address"] == instruction_pointer_reference + ), + -1, + ) + self.assertEqual( + frame_instruction_index, + backwards_instructions, + f"requested instruction should be preceeded by {backwards_instructions} instructions. Actual index: {frame_instruction_index}", + ) diff --git a/lldb/test/API/tools/lldb-dap/disassemble/main.c b/lldb/test/API/tools/lldb-dap/disassemble/main.c index 6609a4c37a70f..9da119ef70262 100644 --- a/lldb/test/API/tools/lldb-dap/disassemble/main.c +++ b/lldb/test/API/tools/lldb-dap/disassemble/main.c @@ -27,4 +27,4 @@ int main(void) { printf("\n"); return 0; -} \ No newline at end of file +} diff --git a/lldb/test/API/tools/lldb-dap/instruction-breakpoint/TestDAP_instruction_breakpoint.py b/lldb/test/API/tools/lldb-dap/instruction-breakpoint/TestDAP_instruction_breakpoint.py index 53b7df9e54af2..07d2059b2749b 100644 --- a/lldb/test/API/tools/lldb-dap/instruction-breakpoint/TestDAP_instruction_breakpoint.py +++ b/lldb/test/API/tools/lldb-dap/instruction-breakpoint/TestDAP_instruction_breakpoint.py @@ -66,7 +66,7 @@ def instruction_breakpoint_test(self): ) # Check disassembly view - instruction = self.disassemble(frameIndex=0) + disassembled_instructions, instruction = self.disassemble(frameIndex=0) self.assertEqual( instruction["address"], intstructionPointerReference[0], @@ -74,8 +74,7 @@ def instruction_breakpoint_test(self): ) # Get next instruction address to set instruction breakpoint - disassembled_instruction_list = self.dap_server.disassembled_instructions - instruction_addr_list = list(disassembled_instruction_list.keys()) + instruction_addr_list = list(disassembled_instructions.keys()) index = instruction_addr_list.index(intstructionPointerReference[0]) if len(instruction_addr_list) >= (index + 1): next_inst_addr = instruction_addr_list[index + 1] diff --git a/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp index d779de1e8e834..9aa6995e5d668 100644 --- a/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp +++ b/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp @@ -12,22 +12,166 @@ #include "Protocol/ProtocolRequests.h" #include "Protocol/ProtocolTypes.h" #include "RequestHandler.h" +#include "lldb/API/SBAddress.h" #include "lldb/API/SBInstruction.h" +#include "lldb/API/SBTarget.h" #include "lldb/lldb-types.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Error.h" +#include <cstdint> #include <optional> using namespace lldb_dap::protocol; namespace lldb_dap { +static protocol::DisassembledInstruction GetInvalidInstruction() { + DisassembledInstruction invalid_inst; + invalid_inst.presentationHint = + DisassembledInstruction::eDisassembledInstructionPresentationHintInvalid; + return invalid_inst; +} + +static lldb::SBAddress GetDisassembleStartAddress(lldb::SBTarget target, + lldb::SBAddress addr, + int64_t instruction_offset) { + if (instruction_offset == 0) + return addr; + + if (target.GetMinimumOpcodeByteSize() == target.GetMaximumOpcodeByteSize()) { + // We have fixed opcode size, so we can calculate the address directly, + // negative or positive. + lldb::addr_t load_addr = addr.GetLoadAddress(target); + load_addr += instruction_offset * target.GetMinimumOpcodeByteSize(); + return lldb::SBAddress(load_addr, target); + } + + if (instruction_offset > 0) { + lldb::SBInstructionList forward_insts = + target.ReadInstructions(addr, instruction_offset + 1); + return forward_insts.GetInstructionAtIndex(forward_insts.GetSize() - 1) + .GetAddress(); + } + + // We have a negative instruction offset, so we need to disassemble backwards. + // The opcode size is not fixed, so we have no idea where to start from. + // Let's try from the start of the current symbol if available. + auto symbol = addr.GetSymbol(); + if (!symbol.IsValid()) + return addr; + + // Add valid instructions before the current instruction using the symbol. + lldb::SBInstructionList symbol_insts = + target.ReadInstructions(symbol.GetStartAddress(), addr, nullptr); + if (!symbol_insts.IsValid() || symbol_insts.GetSize() == 0) + return addr; + + const auto backwards_instructions_count = + static_cast<size_t>(std::abs(instruction_offset)); + if (symbol_insts.GetSize() < backwards_instructions_count) { + // We don't have enough instructions to disassemble backwards, so just + // return the start address of the symbol. + return symbol_insts.GetInstructionAtIndex(0).GetAddress(); + } + + return symbol_insts + .GetInstructionAtIndex(symbol_insts.GetSize() - + backwards_instructions_count) + .GetAddress(); +} + +static DisassembledInstruction ConvertSBInstructionToDisassembledInstruction( + lldb::SBTarget &target, lldb::SBInstruction &inst, bool resolve_symbols) { + if (!inst.IsValid()) + return GetInvalidInstruction(); + + auto addr = inst.GetAddress(); + const auto inst_addr = addr.GetLoadAddress(target); + const char *m = inst.GetMnemonic(target); + const char *o = inst.GetOperands(target); + const char *c = inst.GetComment(target); + auto d = inst.GetData(target); + + std::string bytes; + llvm::raw_string_ostream sb(bytes); + for (unsigned i = 0; i < inst.GetByteSize(); i++) { + lldb::SBError error; + uint8_t b = d.GetUnsignedInt8(error, i); + if (error.Success()) + sb << llvm::format("%2.2x ", b); + } + + DisassembledInstruction disassembled_inst; + disassembled_inst.address = inst_addr; + disassembled_inst.instructionBytes = + bytes.size() > 0 ? bytes.substr(0, bytes.size() - 1) : ""; + + std::string instruction; + llvm::raw_string_ostream si(instruction); + + lldb::SBSymbol symbol = addr.GetSymbol(); + // Only add the symbol on the first line of the function. + if (symbol.IsValid() && symbol.GetStartAddress() == addr) { + // If we have a valid symbol, append it as a label prefix for the first + // instruction. This is so you can see the start of a function/callsite + // in the assembly, at the moment VS Code (1.80) does not visualize the + // symbol associated with the assembly instruction. + si << (symbol.GetMangledName() != nullptr ? symbol.GetMangledName() + : symbol.GetName()) + << ": "; + + if (resolve_symbols) + disassembled_inst.symbol = symbol.GetDisplayName(); + } + + si << llvm::formatv("{0,7} {1,12}", m, o); + if (c && c[0]) { + si << " ; " << c; + } + + disassembled_inst.instruction = std::move(instruction); + + auto line_entry = addr.GetLineEntry(); + // If the line number is 0 then the entry represents a compiler generated + // location. + + if (line_entry.GetStartAddress() == addr && line_entry.IsValid() && + line_entry.GetFileSpec().IsValid() && line_entry.GetLine() != 0) { + auto source = CreateSource(line_entry); + disassembled_inst.location = std::move(source); + + const auto line = line_entry.GetLine(); + if (line != 0 && line != LLDB_INVALID_LINE_NUMBER) + disassembled_inst.line = line; + + const auto column = line_entry.GetColumn(); + if (column != 0 && column != LLDB_INVALID_COLUMN_NUMBER) + disassembled_inst.column = column; + + auto end_line_entry = line_entry.GetEndAddress().GetLineEntry(); + if (end_line_entry.IsValid() && + end_line_entry.GetFileSpec() == line_entry.GetFileSpec()) { + const auto end_line = end_line_entry.GetLine(); + if (end_line != 0 && end_line != LLDB_INVALID_LINE_NUMBER && + end_line != line) { + disassembled_inst.endLine = end_line; + + const auto end_column = end_line_entry.GetColumn(); + if (end_column != 0 && end_column != LLDB_INVALID_COLUMN_NUMBER && + end_column != column) + disassembled_inst.endColumn = end_column - 1; + } + } + } + + return disassembled_inst; +} + /// Disassembles code stored at the provided location. /// Clients should only call this request if the corresponding capability /// `supportsDisassembleRequest` is true. llvm::Expected<DisassembleResponseBody> DisassembleRequestHandler::Run(const DisassembleArguments &args) const { - std::vector<DisassembledInstruction> instructions; - std::optional<lldb::addr_t> addr_opt = DecodeMemoryReference(args.memoryReference); if (!addr_opt.has_value()) @@ -35,7 +179,7 @@ DisassembleRequestHandler::Run(const DisassembleArguments &args) const { args.memoryReference); lldb::addr_t addr_ptr = *addr_opt; - addr_ptr += args.instructionOffset.value_or(0); + addr_ptr += args.offset.value_or(0); lldb::SBAddress addr(addr_ptr, dap.target); if (!addr.IsValid()) return llvm::make_error<DAPError>( @@ -56,97 +200,59 @@ DisassembleRequestHandler::Run(const DisassembleArguments &args) const { } } - lldb::SBInstructionList insts = dap.target.ReadInstructions( - addr, args.instructionCount, flavor_string.c_str()); + // Offset (in instructions) to be applied after the byte offset (if any) + // before disassembling. Can be negative. + int64_t instruction_offset = args.instructionOffset.value_or(0); + + // Calculate a sufficient address to start disassembling from. + lldb::SBAddress disassemble_start_addr = + GetDisassembleStartAddress(dap.target, addr, instruction_offset); + if (!disassemble_start_addr.IsValid()) + return llvm::make_error<DAPError>( + "Unexpected error while disassembling instructions."); + lldb::SBInstructionList insts = dap.target.ReadInstructions( + disassemble_start_addr, args.instructionCount, flavor_string.c_str()); if (!insts.IsValid()) return llvm::make_error<DAPError>( - "Failed to find instructions for memory address."); + "Unexpected error while disassembling instructions."); + // Conver the found instructions to the DAP format. const bool resolve_symbols = args.resolveSymbols.value_or(false); - const auto num_insts = insts.GetSize(); - for (size_t i = 0; i < num_insts; ++i) { + std::vector<DisassembledInstruction> instructions; + size_t original_address_index = args.instructionCount; + for (size_t i = 0; i < insts.GetSize(); ++i) { lldb::SBInstruction inst = insts.GetInstructionAtIndex(i); - auto addr = inst.GetAddress(); - const auto inst_addr = addr.GetLoadAddress(dap.target); - const char *m = inst.GetMnemonic(dap.target); - const char *o = inst.GetOperands(dap.target); - const char *c = inst.GetComment(dap.target); - auto d = inst.GetData(dap.target); - - std::string bytes; - llvm::raw_string_ostream sb(bytes); - for (unsigned i = 0; i < inst.GetByteSize(); i++) { - lldb::SBError error; - uint8_t b = d.GetUnsignedInt8(error, i); - if (error.Success()) { - sb << llvm::format("%2.2x ", b); - } - } + if (inst.GetAddress() == addr) + original_address_index = i; - DisassembledInstruction disassembled_inst; - disassembled_inst.address = inst_addr; - disassembled_inst.instructionBytes = - bytes.size() > 0 ? bytes.substr(0, bytes.size() - 1) : ""; - - std::string instruction; - llvm::raw_string_ostream si(instruction); - - lldb::SBSymbol symbol = addr.GetSymbol(); - // Only add the symbol on the first line of the function. - if (symbol.IsValid() && symbol.GetStartAddress() == addr) { - // If we have a valid symbol, append it as a label prefix for the first - // instruction. This is so you can see the start of a function/callsite - // in the assembly, at the moment VS Code (1.80) does not visualize the - // symbol associated with the assembly instruction. - si << (symbol.GetMangledName() != nullptr ? symbol.GetMangledName() - : symbol.GetName()) - << ": "; - - if (resolve_symbols) - disassembled_inst.symbol = symbol.GetDisplayName(); - } + instructions.push_back(ConvertSBInstructionToDisassembledInstruction( + dap.target, inst, resolve_symbols)); + } - si << llvm::formatv("{0,7} {1,12}", m, o); - if (c && c[0]) { - si << " ; " << c; - } + // Check if we miss instructions at the beginning. + if (instruction_offset < 0) { + const auto backwards_instructions_count = + static_cast<size_t>(std::abs(instruction_offset)); + if (original_address_index < backwards_instructions_count) { + // We don't have enough instructions before the main address as was + // requested. Let's pad the start of the instructions with invalid + // instructions. + std::vector<DisassembledInstruction> invalid_instructions( + backwards_instructions_count - original_address_index, + GetInvalidInstruction()); + instructions.insert(instructions.begin(), invalid_instructions.begin(), + invalid_instructions.end()); - disassembled_inst.instruction = instruction; - - auto line_entry = addr.GetLineEntry(); - // If the line number is 0 then the entry represents a compiler generated - // location. - if (line_entry.GetStartAddress() == addr && line_entry.IsValid() && - line_entry.GetFileSpec().IsValid() && line_entry.GetLine() != 0) { - auto source = CreateSource(line_entry); - disassembled_inst.location = std::move(source); - - const auto line = line_entry.GetLine(); - if (line != 0 && line != LLDB_INVALID_LINE_NUMBER) - disassembled_inst.line = line; - - const auto column = line_entry.GetColumn(); - if (column != 0 && column != LLDB_INVALID_COLUMN_NUMBER) - disassembled_inst.column = column; - - auto end_line_entry = line_entry.GetEndAddress().GetLineEntry(); - if (end_line_entry.IsValid() && - end_line_entry.GetFileSpec() == line_entry.GetFileSpec()) { - const auto end_line = end_line_entry.GetLine(); - if (end_line != 0 && end_line != LLDB_INVALID_LINE_NUMBER && - end_line != line) { - disassembled_inst.endLine = end_line; - - const auto end_column = end_line_entry.GetColumn(); - if (end_column != 0 && end_column != LLDB_INVALID_COLUMN_NUMBER && - end_column != column) - disassembled_inst.endColumn = end_column - 1; - } - } + // Trim excess instructions if needed. + if (instructions.size() > args.instructionCount) + instructions.resize(args.instructionCount); } + } - instructions.push_back(std::move(disassembled_inst)); + // Pad the instructions with invalid instructions if needed. + while (instructions.size() < args.instructionCount) { + instructions.push_back(GetInvalidInstruction()); } return DisassembleResponseBody{std::move(instructions)}; _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits