Author: Felipe de Azevedo Piovezan Date: 2025-12-02T14:53:02Z New Revision: 5a32fd3ea501ab78f5a5fc820f61fe81a98edc40
URL: https://github.com/llvm/llvm-project/commit/5a32fd3ea501ab78f5a5fc820f61fe81a98edc40 DIFF: https://github.com/llvm/llvm-project/commit/5a32fd3ea501ab78f5a5fc820f61fe81a98edc40.diff LOG: [lldb][NFCI] Rewrite UnwindAssemblyInstEmulation in terms of a CFG visit (#169630) Currently, UnwindAssemblyInstEmulation visits instructions in the order in which they appear in a function. This commit makes an NFCI change to UnwindAssemblyInstEmulation so that it follows the function's CFG: 1. The first instruction is enqueued. 2. While the queue is not empty: 2.1 Visit the instruction in the *back* queue to compute the new unwind state. 2.2 Push(+) the next instruction to the *back* of the queue. 2.3 If the instruction is a forward branch with a known branch target, push(+) the destination instruction to the *front* of the queue. (+) Only push if this instruction hasn't been enqueued before. (+) When pushing an instruction, the current unwind state is attached to it. Note that: * the "next instruction" is pushed to the *back* of the queue, * a branch target is pushed to the *front* of the queue, and * we always dequeue from the *back* of the queue. This means that consecutive instructions are visited one after the other; this is important to support "conditional blocks" [1] of instructions (see the line with "if last_condition != new_condition"). This is arguably a very Thumb specific thing, so maybe it shouldn't be in the generic algorithm; that said, it is already in the code, so we have to support it. The main reason this patch is NFCI and not NFC is that, now, the destination of a forward branch is visited in a slightly different moment than before. This should not cause any changes in output, as if a branch destination is reachable through two different paths, any well behaved compiler will generate the same unwind state in both paths. The motivation for this patch is to change step 2.2 so that it _only_ pushes the next instruction if the current instruction is not an unconditional branch / return, and to change step 2.3 so that backwards branches are also allowed, fixing the bug described by [2]. [1]: https://developer.arm.com/documentation/dui0473/m/arm-and-thumb-instructions/it [2]: https://github.com/llvm/llvm-project/pull/168398 Part of a sequence of PRs: [lldb][NFCI] Rewrite UnwindAssemblyInstEmulation in terms of a CFG visit #169630 [lldb][NFC] Rename forward_branch_offset to branch_offset in UnwindAssemblyInstEmulation #169631 [lldb] Add DisassemblerLLVMC::IsBarrier API #169632 [lldb] Handle backwards branches in UnwindAssemblyInstEmulation #169633 commit-id:dce6b515 Added: Modified: lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp Removed: ################################################################################ diff --git a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp index b9d665902dc45..c3d92f6a13d97 100644 --- a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp +++ b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp @@ -25,6 +25,8 @@ #include "lldb/Utility/Log.h" #include "lldb/Utility/Status.h" #include "lldb/Utility/StreamString.h" +#include "llvm/ADT/SmallSet.h" +#include <deque> using namespace lldb; using namespace lldb_private; @@ -150,29 +152,38 @@ bool UnwindAssemblyInstEmulation::GetNonCallSiteUnwindPlanFromAssembly( EmulateInstruction::InstructionCondition last_condition = EmulateInstruction::UnconditionalCondition; - for (const InstructionSP &inst : inst_list.Instructions()) { - if (!inst) - continue; - DumpInstToLog(log, *inst, inst_list); + std::deque<std::size_t> to_visit = {0}; + llvm::SmallSet<std::size_t, 0> enqueued = {0}; + + // Instructions reachable through jumps are inserted on the front. + // The next instruction is inserted on the back. + // Pop from the back to ensure non-branching instructions are visited + // sequentially. + while (!to_visit.empty()) { + const std::size_t current_index = to_visit.back(); + Instruction &inst = *inst_list.GetInstructionAtIndex(current_index); + to_visit.pop_back(); + DumpInstToLog(log, inst, inst_list); m_curr_row_modified = false; m_forward_branch_offset = 0; lldb::addr_t current_offset = - inst->GetAddress().GetFileAddress() - base_addr; + inst.GetAddress().GetFileAddress() - base_addr; auto it = saved_unwind_states.upper_bound(current_offset); assert(it != saved_unwind_states.begin() && "Unwind row for the function entry missing"); --it; // Move it to the row corresponding to the current offset - // If the offset of m_state.row doesn't match with the offset we see in - // saved_unwind_states then we have to update current unwind state to - // the saved values. It is happening after we processed an epilogue and a - // return to caller instruction. + // When state is forwarded through a branch, the offset of m_state.row is + // diff erent from the offset available in saved_unwind_states. Use the + // forwarded state in this case, as the previous instruction may have been + // an unconditional jump. + // FIXME: this assignment can always be done unconditionally. if (it->second.row.GetOffset() != m_state.row.GetOffset()) m_state = it->second; - m_inst_emulator_up->SetInstruction(inst->GetOpcode(), inst->GetAddress(), + m_inst_emulator_up->SetInstruction(inst.GetOpcode(), inst.GetAddress(), nullptr); const EmulateInstruction::InstructionCondition new_condition = m_inst_emulator_up->GetInstructionCondition(); @@ -199,13 +210,21 @@ bool UnwindAssemblyInstEmulation::GetNonCallSiteUnwindPlanFromAssembly( // If the current instruction is a branch forward then save the current // CFI information for the offset where we are branching. + Address branch_address = inst.GetAddress(); + branch_address.Slide(m_forward_branch_offset); if (m_forward_branch_offset != 0 && - range.ContainsFileAddress(inst->GetAddress().GetFileAddress() + - m_forward_branch_offset)) { + range.ContainsFileAddress(branch_address.GetFileAddress())) { if (auto [it, inserted] = saved_unwind_states.emplace( current_offset + m_forward_branch_offset, m_state); - inserted) + inserted) { it->second.row.SetOffset(current_offset + m_forward_branch_offset); + if (std::size_t dest_instr_index = + inst_list.GetIndexOfInstructionAtAddress(branch_address); + dest_instr_index < inst_list.GetSize()) { + to_visit.push_front(dest_instr_index); + enqueued.insert(dest_instr_index); + } + } } // Were there any changes to the CFI while evaluating this instruction? @@ -213,12 +232,17 @@ bool UnwindAssemblyInstEmulation::GetNonCallSiteUnwindPlanFromAssembly( // Save the modified row if we don't already have a CFI row in the // current address const lldb::addr_t next_inst_offset = - current_offset + inst->GetOpcode().GetByteSize(); + current_offset + inst.GetOpcode().GetByteSize(); if (saved_unwind_states.count(next_inst_offset) == 0) { m_state.row.SetOffset(next_inst_offset); saved_unwind_states.emplace(next_inst_offset, m_state); } } + + const size_t next_idx = current_index + 1; + const bool never_enqueued = enqueued.insert(next_idx).second; + if (never_enqueued && next_idx < inst_list.GetSize()) + to_visit.push_back(next_idx); } for (auto &[_, state] : saved_unwind_states) _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
