https://github.com/felipepiovezan created 
https://github.com/llvm/llvm-project/pull/169630

Currently, UnwindAssemblyInstEmulation visits instructions in the order
in which they appear in a function. This commit makes an NFCI change to
UnwindAssemblyInstEmulation so that it follows the function's CFG:

1. The first instruction is enqueued.
2. While the queue is not empty:
2.1 Visit the instruction in the *back* queue to compute the new unwind
    state.
2.2 Push(*) the next instruction to the *back* of the queue.
2.3 If the instruction is a forward branch with a known branch target,
    push(*) the destination instruction to the *front* of the queue.

(*) Only push if this instruction hasn't been enqueued before.
(*) When pushing an instruction, the current unwind state is attached to
it.

Note that:
* the "next instruction" is pushed to the *back* of the queue,
* a branch target is pushed to the *front* of the queue, and
* we always dequeue from the *back* of the queue.

This means that consecutive instructions are visited one after the
other; this is important to support "conditional blocks" [1] of
instructions (see the line with "if last_condition != new_condition").
This is arguably a very Thumb specific thing, so maybe it shouldn't be
in the generic algorithm; that said, it is already in the code, so we
have to support it.

The main reason this patch is NFCI and not NFC is that, now, the
destination of a forward branch is visited in a slightly different
moment than before. This should not cause any changes in output, as if a
branch destination is reachable through two different paths, any well
behaved compiler will generate the same unwind state in both paths.

The motivation for this patch is to change step 2.2 so that it _only_
pushes the next instruction if the current instruction is not an
unconditional branch / return, and to change step 2.3 so that backwards
branches are also allowed, fixing the bug described by [2].

[1]: 
https://developer.arm.com/documentation/dui0473/m/arm-and-thumb-instructions/it
[2]: https://github.com/llvm/llvm-project/pull/168398

commit-id:dce6b515


>From 9295ef8614e7f2d14cb7d95c8d963e4128d5992c Mon Sep 17 00:00:00 2001
From: Felipe de Azevedo Piovezan <[email protected]>
Date: Wed, 26 Nov 2025 10:55:12 +0000
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.7
---
 .../UnwindAssemblyInstEmulation.cpp           | 52 ++++++++++++++-----
 1 file changed, 38 insertions(+), 14 deletions(-)

diff --git 
a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
 
b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
index b9d665902dc45..074746c0fab3d 100644
--- 
a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
+++ 
b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
@@ -25,6 +25,8 @@
 #include "lldb/Utility/Log.h"
 #include "lldb/Utility/Status.h"
 #include "lldb/Utility/StreamString.h"
+#include "llvm/ADT/SmallSet.h"
+#include <deque>
 
 using namespace lldb;
 using namespace lldb_private;
@@ -150,29 +152,38 @@ bool 
UnwindAssemblyInstEmulation::GetNonCallSiteUnwindPlanFromAssembly(
   EmulateInstruction::InstructionCondition last_condition =
       EmulateInstruction::UnconditionalCondition;
 
-  for (const InstructionSP &inst : inst_list.Instructions()) {
-    if (!inst)
-      continue;
-    DumpInstToLog(log, *inst, inst_list);
+  std::deque<std::size_t> to_visit = {0};
+  llvm::SmallSet<std::size_t, 0> enqueued = {0};
+
+  // Instructions reachable through jumps are inserted on the front.
+  // The next instruction in inserted on the back.
+  // Pop from the back to ensure non-branching instructions are visited
+  // sequentially.
+  while (!to_visit.empty()) {
+    std::size_t current_index = to_visit.back();
+    Instruction &inst = *inst_list.GetInstructionAtIndex(current_index);
+    to_visit.pop_back();
+    DumpInstToLog(log, inst, inst_list);
 
     m_curr_row_modified = false;
     m_forward_branch_offset = 0;
 
     lldb::addr_t current_offset =
-        inst->GetAddress().GetFileAddress() - base_addr;
+        inst.GetAddress().GetFileAddress() - base_addr;
     auto it = saved_unwind_states.upper_bound(current_offset);
     assert(it != saved_unwind_states.begin() &&
            "Unwind row for the function entry missing");
     --it; // Move it to the row corresponding to the current offset
 
-    // If the offset of m_state.row doesn't match with the offset we see in
-    // saved_unwind_states then we have to update current unwind state to
-    // the saved values. It is happening after we processed an epilogue and a
-    // return to caller instruction.
+    // When state is forwarded through a branch, the offset of m_state.row is
+    // different from the offset available in saved_unwind_states. Use the
+    // forwarded state in this case, as the previous instruction may have been
+    // an unconditional jump.
+    // FIXME: this assignment can always be done unconditionally.
     if (it->second.row.GetOffset() != m_state.row.GetOffset())
       m_state = it->second;
 
-    m_inst_emulator_up->SetInstruction(inst->GetOpcode(), inst->GetAddress(),
+    m_inst_emulator_up->SetInstruction(inst.GetOpcode(), inst.GetAddress(),
                                        nullptr);
     const EmulateInstruction::InstructionCondition new_condition =
         m_inst_emulator_up->GetInstructionCondition();
@@ -199,13 +210,21 @@ bool 
UnwindAssemblyInstEmulation::GetNonCallSiteUnwindPlanFromAssembly(
 
     // If the current instruction is a branch forward then save the current
     // CFI information for the offset where we are branching.
+    Address branch_address = inst.GetAddress();
+    branch_address.Slide(m_forward_branch_offset);
     if (m_forward_branch_offset != 0 &&
-        range.ContainsFileAddress(inst->GetAddress().GetFileAddress() +
-                                  m_forward_branch_offset)) {
+        range.ContainsFileAddress(branch_address.GetFileAddress())) {
       if (auto [it, inserted] = saved_unwind_states.emplace(
               current_offset + m_forward_branch_offset, m_state);
-          inserted)
+          inserted) {
         it->second.row.SetOffset(current_offset + m_forward_branch_offset);
+        if (std::size_t dest_instr_index =
+                inst_list.GetIndexOfInstructionAtAddress(branch_address);
+            dest_instr_index < inst_list.GetSize()) {
+          to_visit.push_front(dest_instr_index);
+          enqueued.insert(dest_instr_index);
+        }
+      }
     }
 
     // Were there any changes to the CFI while evaluating this instruction?
@@ -213,12 +232,17 @@ bool 
UnwindAssemblyInstEmulation::GetNonCallSiteUnwindPlanFromAssembly(
       // Save the modified row if we don't already have a CFI row in the
       // current address
       const lldb::addr_t next_inst_offset =
-          current_offset + inst->GetOpcode().GetByteSize();
+          current_offset + inst.GetOpcode().GetByteSize();
       if (saved_unwind_states.count(next_inst_offset) == 0) {
         m_state.row.SetOffset(next_inst_offset);
         saved_unwind_states.emplace(next_inst_offset, m_state);
       }
     }
+
+    const size_t next_idx = current_index + 1;
+    const bool never_enqueued = enqueued.insert(next_idx).second;
+    if (never_enqueued && next_idx < inst_list.GetSize())
+      to_visit.push_back(next_idx);
   }
 
   for (auto &[_, state] : saved_unwind_states)

_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to