[Lldb-commits] [PATCH] D135917: [lldb][trace] Add a basic function call dump [2] - Implement the reconstruction algorithm

walter erquinigo via Phabricator via lldb-commits Thu, 13 Oct 2022 14:21:14 -0700

wallace updated this revision to Diff 467605.
wallace added a comment.

nit



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135917/new/

https://reviews.llvm.org/D135917

Files:
  lldb/include/lldb/Target/TraceDumper.h
  lldb/source/Commands/CommandObjectThread.cpp
  lldb/source/Target/TraceDumper.cpp
  lldb/test/API/commands/trace/TestTraceDumpFunctionCalls.py
  lldb/test/API/commands/trace/TestTraceLoad.py
  lldb/test/API/commands/trace/inline-function/a.out
  lldb/test/API/commands/trace/inline-function/inline.cpp

Index: lldb/test/API/commands/trace/inline-function/inline.cpp
===================================================================
--- /dev/null
+++ lldb/test/API/commands/trace/inline-function/inline.cpp
@@ -0,0 +1,18 @@
+__attribute__((always_inline)) inline int mult(int x, int y) {
+  int f = x * y;
+  f++;
+  f *= f;
+  return f;
+}
+
+int foo(int x) {
+  int z = mult(x, x - 1);
+  z++;
+  return z;
+}
+
+int main() {
+  int x = 12;
+  int z = foo(x);
+  return z + x;
+}
Index: lldb/test/API/commands/trace/TestTraceLoad.py
===================================================================
--- lldb/test/API/commands/trace/TestTraceLoad.py
+++ lldb/test/API/commands/trace/TestTraceLoad.py
@@ -68,8 +68,8 @@
       "individualCounts": {
         "software disabled tracing": 1,
         "trace synchronization point": 1,
-        "HW clock tick": 8,
-        "CPU core changed": 1
+        "CPU core changed": 1,
+        "HW clock tick": 8
       }
     },
     "continuousExecutions": 1,
Index: lldb/test/API/commands/trace/TestTraceDumpFunctionCalls.py
===================================================================
--- lldb/test/API/commands/trace/TestTraceDumpFunctionCalls.py
+++ lldb/test/API/commands/trace/TestTraceDumpFunctionCalls.py
@@ -3,15 +3,106 @@
 from lldbsuite.test.decorators import *
 
 class TestTraceDumpInfo(TraceIntelPTTestCaseBase):
-    def testDumpFunctionCalls(self):
+    def testDumpSimpleFunctionCalls(self):
       self.expect("trace load -v " +
         os.path.join(self.getSourceDir(), "intelpt-trace", "trace.json"))
 
       self.expect("thread trace dump function-calls 2",
         error=True, substrs=['error: no thread with index: "2"'])
 
-      self.expect("thread trace dump function-calls 1 -j",
-        substrs=['json = true, pretty_json = false, file = false, thread = 3842849'])
+      # We don't support yet JSON
+      self.expect("thread trace dump function-calls 1 -j", substrs=['[]'])
 
-      self.expect("thread trace dump function-calls 1 -F /tmp -J",
-        substrs=['false, pretty_json = true, file = true, thread = 3842849'])
+      # We test first some code without function call
+      self.expect("thread trace dump function-calls 1",
+        substrs=['''thread #1: tid = 3842849
+
+[call tree #0]
+a.out`main + 4 at main.cpp:2 to 4:0  [1, 22]'''])
+
+    def testFunctionCallsWithErrors(self):
+      self.expect("trace load -v " +
+        os.path.join(self.getSourceDir(), "intelpt-multi-core-trace", "trace.json"))
+
+      # We expect that tracing errors appear as a different tree
+      self.expect("thread trace dump function-calls 2",
+        substrs=['''thread #2: tid = 3497496
+
+[call tree #0]
+m.out`foo() + 65 at multi_thread.cpp:12:21 to 12:21  [4, 19524]
+
+[call tree #1]
+<tracing errors>  [19532, 19532]'''])
+      self.expect("thread trace dump function-calls 3",
+        substrs=['''thread #3: tid = 3497497
+
+[call tree #0]
+m.out`bar() + 30 at multi_thread.cpp:19:3 to 20:6  [5, 61831]
+
+[call tree #1]
+<tracing errors>  [61834, 61834]'''])
+
+    def testInlineFunctionCalls(self):
+      self.expect("file " + os.path.join(self.getSourceDir(), "inline-function", "a.out"))
+      self.expect("b main") # we'll trace from the beginning of main
+      self.expect("b 17")
+      self.expect("r")
+      self.expect("thread trace start")
+      self.expect("c")
+      self.expect("thread trace dump function-calls",
+        substrs=['''[call tree #0]
+a.out`main + 8 at inline.cpp:15:7 to 16:14  [1, 5]
+  a.out`foo(int) at inline.cpp:8:16 to 9:15  [6, 13]
+    a.out`foo(int) + 22 [inlined] mult(int, int) at inline.cpp:2:7 to 5:10  [14, 21]
+  a.out`foo(int) + 49 at inline.cpp:9:15 to 12:1  [22, 26]
+a.out`main + 25 at inline.cpp:16:14 to 16:14  [27, 27]'''])
+
+    def testIncompleteInlineFunctionCalls(self):
+      self.expect("file " + os.path.join(self.getSourceDir(), "inline-function", "a.out"))
+      self.expect("b 4") # we'll trace from the middle of the inline method
+      self.expect("b 17")
+      self.expect("r")
+      self.expect("thread trace start")
+      self.expect("c")
+      self.expect("thread trace dump function-calls",
+        substrs=['''[call tree #0]
+a.out`main
+  a.out`foo(int)
+    a.out`foo(int) + 36 [inlined] mult(int, int) + 14 at inline.cpp:4:5 to 5:10  [1, 5]
+  a.out`foo(int) + 49 at inline.cpp:9:15 to 12:1  [6, 10]
+a.out`main + 25 at inline.cpp:16:14 to 16:14  [11, 11]'''])
+
+    def testMultifileFunctionCalls(self):
+      # This test is extremely important because it first calls the method foo() which requires going through the dynamic linking.
+      # You'll see the entry "a.out`symbol stub for: foo()" which will invoke the ld linker, which will in turn find the actual foo
+      # function and eventually invoke it.  However, we don't have the image of the linker in the trace bundle, so we'll see errors
+      # because the decoder couldn't find the linker binary! After those failures, the linker will resume right where we return to
+      # main after foo() finished.
+      # Then, we call foo() again, but because it has already been loaded by the linker, we don't invoke the linker anymore! And
+      # we'll see a nice tree without errors in this second invocation. Something interesting happens here. We still have an
+      # invocation to the symbol stub for foo(), but it modifies the stack so that when we return from foo() we don't stop again
+      # at the symbol stub, but instead we return directly to main. This is an example of returning several levels up in the
+      # call stack.
+      # Not only that, we also have an inline method in between.
+      self.expect("trace load " + os.path.join(self.getSourceDir(), "intelpt-trace-multi-file", "multi-file-no-ld.json"))
+      self.expect("thread trace dump function-calls",
+        substrs=['''thread #1: tid = 815455
+
+[call tree #0]
+a.out`main + 15 at main.cpp:10 to 10:0  [1, 1]
+  a.out`symbol stub for: foo() to <+11>  [3, 5]
+    a.out`a.out[0x0000000000400510] to a.out[0x0000000000400516]  [6, 7]
+
+[call tree #1]
+<tracing errors>  [8, 8]
+
+[call tree #2]
+a.out`main + 20 at main.cpp:10 to 12:0  [10, 14]
+  a.out`main + 34 [inlined] inline_function() at main.cpp:4 to 6:0  [16, 20]
+a.out`main + 55 at main.cpp:14 to 16:0  [21, 25]
+  a.out`symbol stub for: foo() to <+0>  [26, 26]
+    libfoo.so`foo() at foo.cpp:3 to 4:0  [27, 30]
+      libfoo.so`symbol stub for: bar() to <+0>  [31, 31]
+        libbar.so`bar() at bar.cpp:1 to 4:0  [32, 40]
+    libfoo.so`foo() + 13 at foo.cpp:4 to 6:0  [41, 48]
+a.out`main + 68 at main.cpp:16 to 16:0  [49, 51]'''])
Index: lldb/source/Target/TraceDumper.cpp
===================================================================
--- lldb/source/Target/TraceDumper.cpp
+++ lldb/source/Target/TraceDumper.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/Target/TraceDumper.h"
-
 #include "lldb/Core/Module.h"
 #include "lldb/Symbol/CompileUnit.h"
 #include "lldb/Symbol/Function.h"
@@ -26,15 +25,20 @@
     return None;
   return s;
 }
+
+static const char *GetModuleName(const SymbolContext &sc) {
+  if (!sc.module_sp)
+    return nullptr;
+  return sc.module_sp->GetFileSpec().GetFilename().AsCString();
+}
+
 /// \return
 ///   The module name (basename if the module is a file, or the actual name if
 ///   it's a virtual module), or \b nullptr if no name nor module was found.
 static const char *GetModuleName(const TraceDumper::TraceItem &item) {
-  if (!item.symbol_info || !item.symbol_info->sc.module_sp)
+  if (!item.symbol_info)
     return nullptr;
-  return item.symbol_info->sc.module_sp->GetFileSpec()
-      .GetFilename()
-      .AsCString();
+  return GetModuleName(item.symbol_info->sc);
 }
 
 // This custom LineEntry validator is neded because some line_entries have
@@ -64,10 +68,12 @@
 ///       - module
 ///       - symbol
 ///       - function
-///       - line
+///       - inlined function
+///       - source line info
 static bool
 IsSameInstructionSymbolContext(const TraceDumper::SymbolInfo &prev_insn,
-                               const TraceDumper::SymbolInfo &insn) {
+                               const TraceDumper::SymbolInfo &insn,
+                               bool check_source_line_info = true) {
   // module checks
   if (insn.sc.module_sp != prev_insn.sc.module_sp)
     return false;
@@ -78,11 +84,23 @@
 
   // function checks
   if (!insn.sc.function && !prev_insn.sc.function)
-    return true;
+    return true; // This means two dangling instruction in the same module. We
+                 // can assume they are part of the same unnamed symbol
   else if (insn.sc.function != prev_insn.sc.function)
     return false;
 
+  Block *inline_block_a =
+      insn.sc.block ? insn.sc.block->GetContainingInlinedBlock() : nullptr;
+  Block *inline_block_b = prev_insn.sc.block
+                              ? prev_insn.sc.block->GetContainingInlinedBlock()
+                              : nullptr;
+  if (inline_block_a != inline_block_b)
+    return false;
+
   // line entry checks
+  if (!check_source_line_info)
+    return true;
+
   const bool curr_line_valid = IsLineEntryValid(insn.sc.line_entry);
   const bool prev_line_valid = IsLineEntryValid(prev_insn.sc.line_entry);
   if (curr_line_valid && prev_line_valid)
@@ -100,6 +118,14 @@
 
   void NoMoreData() override { m_s << "    no more data\n"; }
 
+  void FunctionCallForest(
+      const std::vector<TraceDumper::FunctionCallUP> &forest) override {
+    for (size_t i = 0; i < forest.size(); i++) {
+      m_s.Format("\n[call tree #{0}]\n", i);
+      DumpFunctionCallTree(*forest[i]);
+    }
+  }
+
   void TraceItem(const TraceDumper::TraceItem &item) override {
     if (item.symbol_info) {
       if (!item.prev_symbol_info ||
@@ -174,6 +200,77 @@
   }
 
 private:
+  void
+  DumpSegmentContext(const TraceDumper::FunctionCall::TracedSegment &segment) {
+    if (segment.call.is_error) {
+      m_s << "<tracing errors>";
+      return;
+    }
+
+    const SymbolContext &first_sc = segment.first_symbol_info.sc;
+    first_sc.DumpStopContext(
+        &m_s, segment.first_symbol_info.exe_ctx.GetTargetPtr(),
+        segment.first_symbol_info.address,
+        /*show_fullpaths=*/false,
+        /*show_module=*/true, /*show_inlined_frames=*/false,
+        /*show_function_arguments=*/true,
+        /*show_function_name=*/true);
+    m_s << " to ";
+    const SymbolContext &last_sc = segment.last_symbol_info.sc;
+    if (IsLineEntryValid(first_sc.line_entry) &&
+        IsLineEntryValid(last_sc.line_entry)) {
+      m_s.Format("{0}:{1}", last_sc.line_entry.line, last_sc.line_entry.column);
+    } else {
+      last_sc.DumpStopContext(
+          &m_s, segment.first_symbol_info.exe_ctx.GetTargetPtr(),
+          segment.last_symbol_info.address,
+          /*show_fullpaths=*/false,
+          /*show_module=*/false, /*show_inlined_frames=*/false,
+          /*show_function_arguments=*/false,
+          /*show_function_name=*/false);
+    }
+  }
+
+  void DumpUntracedContext(const TraceDumper::FunctionCall &function_call) {
+    if (function_call.is_error) {
+      m_s << "tracing error";
+    }
+    const SymbolContext &sc = function_call.GetLastSymbolInfo().sc;
+
+    const char *module_name = GetModuleName(sc);
+    if (!module_name)
+      m_s << "(none)";
+    else if (!sc.function && !sc.symbol)
+      m_s << module_name << "`(none)";
+    else
+      m_s << module_name << "`" << sc.GetFunctionName().AsCString();
+  }
+
+  void DumpFunctionCallTree(const TraceDumper::FunctionCall &function_call) {
+    if (function_call.untraced_segment) {
+      m_s.Indent();
+      DumpUntracedContext(function_call);
+      m_s << "\n";
+
+      m_s.IndentMore();
+      DumpFunctionCallTree(*function_call.untraced_segment->nested_call);
+      m_s.IndentLess();
+    }
+
+    for (const TraceDumper::FunctionCall::TracedSegment &segment :
+         function_call.traced_segments) {
+      m_s.Indent();
+      DumpSegmentContext(segment);
+      m_s.Format("  [{0}, {1}]\n", segment.from_id, segment.to_id);
+
+      if (segment.nested_call) {
+        m_s.IndentMore();
+        DumpFunctionCallTree(**segment.nested_call);
+        m_s.IndentLess();
+      }
+    }
+  }
+
   Stream &m_s;
   TraceDumperOptions m_options;
   bool m_was_prev_instruction_an_error = false;
@@ -335,13 +432,12 @@
 /// instruction's symbol context when possible.
 static SymbolContext
 CalculateSymbolContext(const Address &address,
-                       const TraceDumper::SymbolInfo &prev_symbol_info) {
+                       const SymbolContext &prev_symbol_context) {
   AddressRange range;
-  if (prev_symbol_info.sc.GetAddressRange(eSymbolContextEverything, 0,
-                                          /*inline_block_range*/ false,
-                                          range) &&
+  if (prev_symbol_context.GetAddressRange(eSymbolContextEverything, 0,
+                                          /*inline_block_range*/ true, range) &&
       range.Contains(address))
-    return prev_symbol_info.sc;
+    return prev_symbol_context;
 
   SymbolContext sc;
   address.CalculateSymbolContext(&sc, eSymbolContextEverything);
@@ -384,6 +480,19 @@
                    : InstructionSP());
 }
 
+static TraceDumper::SymbolInfo
+CalculateSymbolInfo(const ExecutionContext &exe_ctx, lldb::addr_t load_address,
+                    const TraceDumper::SymbolInfo &prev_symbol_info) {
+  TraceDumper::SymbolInfo symbol_info;
+  symbol_info.exe_ctx = exe_ctx;
+  symbol_info.address.SetLoadAddress(load_address, exe_ctx.GetTargetPtr());
+  symbol_info.sc =
+      CalculateSymbolContext(symbol_info.address, prev_symbol_info.sc);
+  std::tie(symbol_info.disassembler, symbol_info.instruction) =
+      CalculateDisass(symbol_info, prev_symbol_info, exe_ctx);
+  return symbol_info;
+}
+
 Optional<lldb::user_id_t> TraceDumper::DumpInstructions(size_t count) {
   ThreadSP thread_sp = m_cursor_sp->GetExecutionContextRef().GetThreadSP();
 
@@ -424,14 +533,8 @@
       item.load_address = m_cursor_sp->GetLoadAddress();
 
       if (!m_options.raw) {
-        SymbolInfo symbol_info;
-        symbol_info.exe_ctx = exe_ctx;
-        symbol_info.address.SetLoadAddress(item.load_address,
-                                           exe_ctx.GetTargetPtr());
-        symbol_info.sc =
-            CalculateSymbolContext(symbol_info.address, prev_symbol_info);
-        std::tie(symbol_info.disassembler, symbol_info.instruction) =
-            CalculateDisass(symbol_info, prev_symbol_info, exe_ctx);
+        SymbolInfo symbol_info =
+            CalculateSymbolInfo(exe_ctx, item.load_address, prev_symbol_info);
         item.prev_symbol_info = prev_symbol_info;
         item.symbol_info = symbol_info;
         prev_symbol_info = symbol_info;
@@ -443,3 +546,256 @@
     m_writer_up->NoMoreData();
   return last_id;
 }
+
+void TraceDumper::FunctionCall::TracedSegment::AppendInsn(
+    const TraceCursorSP &cursor_sp,
+    const TraceDumper::SymbolInfo &symbol_info) {
+  to_id = cursor_sp->GetId();
+  last_symbol_info = symbol_info;
+}
+
+TraceDumper::FunctionCall &
+TraceDumper::FunctionCall::TracedSegment::CreateNestedCall(
+    const TraceCursorSP &cursor_sp,
+    const TraceDumper::SymbolInfo &symbol_info) {
+  nested_call.emplace(std::make_unique<FunctionCall>(cursor_sp, symbol_info));
+  (*nested_call)->parent_call = &call;
+  return **nested_call;
+}
+
+TraceDumper::FunctionCall::FunctionCall(
+    const TraceCursorSP &cursor_sp,
+    const TraceDumper::SymbolInfo &symbol_info) {
+  is_error = cursor_sp->IsError();
+  AppendSegment(cursor_sp, symbol_info);
+}
+
+void TraceDumper::FunctionCall::AppendSegment(
+    const TraceCursorSP &cursor_sp,
+    const TraceDumper::SymbolInfo &symbol_info) {
+  traced_segments.emplace_back(cursor_sp, symbol_info, *this);
+}
+
+const TraceDumper::SymbolInfo &
+TraceDumper::FunctionCall::GetLastSymbolInfo() const {
+  return traced_segments.back().last_symbol_info;
+}
+
+/// Given an instruction that happens after a return, find the ancestor function
+/// call that owns it. If this ancestor doesn't exist, create a new ancestor and
+/// make it the root of the tree.
+///
+/// \param[in] last_function_call
+///   The function call that performs the return.
+///
+/// \param[in] symbol_info
+///   The symbol information of the instruction after the return.
+///
+/// \param[in] cursor_sp
+///   The cursor pointing to the instruction after the return.
+///
+/// \param[in,out] roots
+///   The object owning the roots. It might be modified if a new root needs to
+///   be created.
+///
+/// \return
+///   A reference to the function call that owns the new instruction
+static TraceDumper::FunctionCall &AppendReturnedInstructionToFunctionCallForest(
+    TraceDumper::FunctionCall &last_function_call,
+    const TraceDumper::SymbolInfo &symbol_info, const TraceCursorSP &cursor_sp,
+    std::vector<TraceDumper::FunctionCallUP> &roots) {
+
+  // We omit the current node because we can't return to itself.
+  TraceDumper::FunctionCall *ancestor = last_function_call.parent_call;
+
+  for (; ancestor; ancestor = ancestor->parent_call) {
+    // This loop traverses the tree until it finds a call that we can return to.
+    if (IsSameInstructionSymbolContext(ancestor->GetLastSymbolInfo(),
+                                       symbol_info,
+                                       /*check_source_line_info=*/false)) {
+      // We returned to this symbol, so we are assuming we are returning there
+      // Note: If this is not robust enough, we should actually check if we
+      // returning to the instruction that follows the last instruction from
+      // that call, as that's the behavior of CALL instructions.
+      ancestor->AppendSegment(cursor_sp, symbol_info);
+      return *ancestor;
+    }
+  }
+
+  // We didn't find the call we were looking for, so we now create a synthetic
+  // one that will contain the new instruction in its first traced segment.
+  TraceDumper::FunctionCallUP new_root =
+      std::make_unique<TraceDumper::FunctionCall>(cursor_sp, symbol_info);
+  // This new root will own the previous root through an untraced segment.
+  new_root->untraced_segment.emplace(std::move(roots.back()));
+  // We update the roots container to point to the new root
+  roots.pop_back();
+  roots.emplace_back(std::move(new_root));
+  return *roots.back();
+}
+
+/// Append an instruction to a function call forest. The new instruction might
+/// be appended to the current segment, to a new nest call, or return to an
+/// ancestor call.
+///
+/// \param[in] exe_ctx
+///   The exeuction context of the traced thread.
+///
+/// \param[in] last_function_call
+///   The chronologically most recent function call before the new instruction.
+///
+/// \param[in] prev_symbol_info
+///   The symbol information of the previous instruction in the trace.
+///
+/// \param[in] symbol_info
+///   The symbol information of the new instruction.
+///
+/// \param[in] cursor_sp
+///   The cursor pointing to the new instruction.
+///
+/// \param[in,out] roots
+///   The object owning the roots. It might be modified if a new root needs to
+///   be created.
+///
+/// \return
+///   A reference to the function call that owns the new instruction.
+static TraceDumper::FunctionCall &AppendInstructionToFunctionCallForest(
+    const ExecutionContext &exe_ctx,
+    TraceDumper::FunctionCall *last_function_call,
+    const TraceDumper::SymbolInfo &prev_symbol_info,
+    const TraceDumper::SymbolInfo &symbol_info, const TraceCursorSP &cursor_sp,
+    std::vector<TraceDumper::FunctionCallUP> &roots) {
+  if (!last_function_call || last_function_call->is_error) {
+    // We create a brand new root
+    roots.emplace_back(
+        std::make_unique<TraceDumper::FunctionCall>(cursor_sp, symbol_info));
+    return *roots.back();
+  }
+
+  AddressRange range;
+  if (symbol_info.sc.GetAddressRange(
+          eSymbolContextBlock | eSymbolContextFunction | eSymbolContextSymbol,
+          0, /*inline_block_range*/ true, range)) {
+    if (range.GetBaseAddress() == symbol_info.address) {
+      // Our instruction is the first instruction of a function. This has
+      // to be a call. This should also identify if a trampoline or the linker
+      // is making a call using a non-CALL instruction.
+      return last_function_call->traced_segments.back().CreateNestedCall(
+          cursor_sp, symbol_info);
+    }
+  }
+  if (IsSameInstructionSymbolContext(prev_symbol_info, symbol_info,
+                                     /*check_source_line_info=*/false)) {
+    // We are still in the same function. This can't be a call because otherwise
+    // we would be in the first instruction of the symbol.
+    last_function_call->traced_segments.back().AppendInsn(cursor_sp,
+                                                          symbol_info);
+    return *last_function_call;
+  }
+  // Now we are in a different symbol. Let's see if this is a return or a
+  // call
+  switch (last_function_call->traced_segments.back()
+              .last_symbol_info.instruction->GetControlFlowKind(&exe_ctx)) {
+  case lldb::eInstructionControlFlowKindCall:
+  case lldb::eInstructionControlFlowKindFarCall: {
+    // This is a regular call
+    return last_function_call->traced_segments.back().CreateNestedCall(
+        cursor_sp, symbol_info);
+  }
+  case lldb::eInstructionControlFlowKindFarReturn:
+  case lldb::eInstructionControlFlowKindReturn: {
+    // We should have caught most trampolines and linker functions earlier, so let's assume
+    // this is a regular return.
+    return AppendReturnedInstructionToFunctionCallForest(
+        *last_function_call, symbol_info, cursor_sp, roots);
+  }
+  default:
+    // we changed symbols not using a call or return and we are not in the
+    // beginning of a symbol, so this should be something very artificial
+    // or maybe a jump to some label in the middle of it section.
+
+    // We first check if it's a return from an inline method
+    if (prev_symbol_info.sc.block &&
+        prev_symbol_info.sc.block->GetContainingInlinedBlock()) {
+      return AppendReturnedInstructionToFunctionCallForest(
+          *last_function_call, symbol_info, cursor_sp, roots);
+    }
+    // Now We assume it's a call. We should revisit this in the future.
+    // Ideally we should be able to decide whether to create a new tree,
+    // or go deeper or higher in the stack.
+    return last_function_call->traced_segments.back().CreateNestedCall(
+        cursor_sp, symbol_info);
+  }
+}
+
+/// Append an error to a function call forest. The new error might be appended
+/// to the current segment if it contains errors or will create a new root.
+///
+/// \param[in] last_function_call
+///   The chronologically most recent function call before the new error.
+///
+/// \param[in] cursor_sp
+///   The cursor pointing to the new error.
+///
+/// \param[in,out] roots
+///   The object owning the roots. It might be modified if a new root needs to
+///   be created.
+///
+/// \return
+///   A reference to the function call that owns the new error.
+TraceDumper::FunctionCall &AppendErrorToFunctionCallForest(
+    TraceDumper::FunctionCall *last_function_call, TraceCursorSP &cursor_sp,
+    std::vector<TraceDumper::FunctionCallUP> &roots) {
+  if (last_function_call && last_function_call->is_error) {
+    last_function_call->traced_segments.back().AppendInsn(
+        cursor_sp, TraceDumper::SymbolInfo{});
+    return *last_function_call;
+  } else {
+    roots.emplace_back(std::make_unique<TraceDumper::FunctionCall>(
+        cursor_sp, TraceDumper::SymbolInfo{}));
+    return *roots.back();
+  }
+}
+
+static std::vector<TraceDumper::FunctionCallUP>
+CreateFunctionCallForest(TraceCursorSP &cursor_sp,
+                         const ExecutionContext &exe_ctx) {
+
+  std::vector<TraceDumper::FunctionCallUP> roots;
+  TraceDumper::SymbolInfo prev_symbol_info;
+
+  TraceDumper::FunctionCall *last_function_call = nullptr;
+
+  for (; cursor_sp->HasValue(); cursor_sp->Next()) {
+    if (cursor_sp->IsError()) {
+      last_function_call = &AppendErrorToFunctionCallForest(last_function_call,
+                                                            cursor_sp, roots);
+      prev_symbol_info = {};
+    } else if (cursor_sp->IsInstruction()) {
+      TraceDumper::SymbolInfo symbol_info = CalculateSymbolInfo(
+          exe_ctx, cursor_sp->GetLoadAddress(), prev_symbol_info);
+
+      last_function_call = &AppendInstructionToFunctionCallForest(
+          exe_ctx, last_function_call, prev_symbol_info, symbol_info, cursor_sp,
+          roots);
+      prev_symbol_info = symbol_info;
+    } else if (cursor_sp->GetEventType() == eTraceEventCPUChanged) {
+      // TODO: In case of a CPU change, we create a new root because we haven't
+      // investigated yet if a call tree can safely continue or if interrupts
+      // could have polluted the original call tree.
+      last_function_call = nullptr;
+      prev_symbol_info = {};
+    }
+  }
+
+  return roots;
+}
+
+void TraceDumper::DumpFunctionCalls() {
+  ThreadSP thread_sp = m_cursor_sp->GetExecutionContextRef().GetThreadSP();
+  ExecutionContext exe_ctx;
+  thread_sp->GetProcess()->GetTarget().CalculateExecutionContext(exe_ctx);
+
+  m_writer_up->FunctionCallForest(
+      CreateFunctionCallForest(m_cursor_sp, exe_ctx));
+}
Index: lldb/source/Commands/CommandObjectThread.cpp
===================================================================
--- lldb/source/Commands/CommandObjectThread.cpp
+++ lldb/source/Commands/CommandObjectThread.cpp
@@ -2128,11 +2128,11 @@
 
       switch (short_option) {
       case 'j': {
-        m_json = true;
+        m_dumper_options.json = true;
         break;
       }
       case 'J': {
-        m_pretty_json = true;
+        m_dumper_options.pretty_print_json = true;
         break;
       }
       case 'F': {
@@ -2146,8 +2146,7 @@
     }
 
     void OptionParsingStarting(ExecutionContext *execution_context) override {
-      m_json = false;
-      m_pretty_json = false;
+      m_dumper_options = {};
       m_output_file = llvm::None;
     }
 
@@ -2158,8 +2157,7 @@
     static const size_t kDefaultCount = 20;
 
     // Instance variables to hold the values for command options.
-    bool m_json;
-    bool m_pretty_json;
+    TraceDumperOptions m_dumper_options;
     llvm::Optional<FileSpec> m_output_file;
   };
 
@@ -2187,10 +2185,30 @@
       result.AppendError("invalid thread\n");
       return false;
     }
-    result.AppendMessageWithFormatv(
-        "json = {0}, pretty_json = {1}, file = {2}, thread = {3}",
-        m_options.m_json, m_options.m_pretty_json, !!m_options.m_output_file,
-        thread_sp->GetID());
+
+    llvm::Expected<TraceCursorSP> cursor_or_error =
+        m_exe_ctx.GetTargetSP()->GetTrace()->CreateNewCursor(*thread_sp);
+
+    if (!cursor_or_error) {
+      result.AppendError(llvm::toString(cursor_or_error.takeError()));
+      return false;
+    }
+    TraceCursorSP &cursor_sp = *cursor_or_error;
+
+    llvm::Optional<StreamFile> out_file;
+    if (m_options.m_output_file) {
+      out_file.emplace(m_options.m_output_file->GetPath().c_str(),
+                       File::eOpenOptionWriteOnly | File::eOpenOptionCanCreate,
+                       lldb::eFilePermissionsFileDefault);
+    }
+
+    m_options.m_dumper_options.forwards = true;
+
+    TraceDumper dumper(std::move(cursor_sp),
+                       out_file ? *out_file : result.GetOutputStream(),
+                       m_options.m_dumper_options);
+
+    dumper.DumpFunctionCalls();
     return true;
   }
 
Index: lldb/include/lldb/Target/TraceDumper.h
===================================================================
--- lldb/include/lldb/Target/TraceDumper.h
+++ lldb/include/lldb/Target/TraceDumper.h
@@ -73,6 +73,238 @@
     llvm::Optional<lldb::cpu_id_t> cpu_id;
   };
 
+  /// Forward declaration
+  struct FunctionCall;
+  using FunctionCallUP = std::unique_ptr<FunctionCall>;
+
+  /// An object representing a traced function call.
+  ///
+  /// A function call is represented using segments and subcalls.
+  ///
+  /// TracedSegment:
+  ///   A traced segment is a maximal list of consecutive traced instructions
+  ///   that belong to the same call. A traced segment will end in three
+  ///   possible ways:
+  ///     - With a call to a function deeper in the callstack. In this case,
+  ///     most of the times this nested call will return
+  ///       and resume with the next segment of this segment's owning function
+  ///       call. More on this later.
+  ///     - Abruptly due to end of trace. In this case, we weren't able to trace
+  ///     the end of this function call.
+  ///     - Simply a return higher in the callstack.
+  ///
+  ///   In terms of implementation details, as segment can be represented with
+  ///   the beginning and ending instruction IDs from the instruction trace.
+  ///
+  ///  UntracedSegment:
+  ///   It might happen that we didn't trace the beginning of a function and we
+  ///   saw it for the first time as part of a return. As a way to signal these
+  ///   cases, we have a placeholder UntracedSegment class that completes the
+  ///   callgraph.
+  ///
+  ///  Example:
+  ///   We might have this piece of execution:
+  ///
+  ///     main() [offset 0x00 to 0x20] [traced instruction ids 1 to 4]
+  ///       foo()  [offset 0x00 to 0x80] [traced instruction ids 5 to 20] # main
+  ///       invoked foo
+  ///     main() [offset 0x24 to 0x40] [traced instruction ids 21 to 30]
+  ///
+  ///   In this case, our function main invokes foo. We have 3 segments: main
+  ///   [offset 0x00 to 0x20], foo() [offset 0x00 to 0x80], and main() [offset
+  ///   0x24 to 0x40]. We also have the instruction ids from the corresponding
+  ///   linear instruction trace for each segment.
+  ///
+  ///   But what if we started tracing since the middle of foo? Then we'd have
+  ///   an incomplete trace
+  ///
+  ///       foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10]
+  ///     main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20]
+  ///
+  ///   Notice that we changed the instruction ids because this is a new trace.
+  ///   Here, in order to have a somewhat complete tree with good traversal
+  ///   capabilities, we can create an UntracedSegment to signal the portion of
+  ///   main() that we didn't trace. We don't know if this segment was in fact
+  ///   multiple segments with many function calls. We'll never know. The
+  ///   resulting tree looks like the following:
+  ///
+  ///     main() [untraced]
+  ///       foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10]
+  ///     main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20]
+  ///
+  ///   And in pseudo-code:
+  ///
+  ///     FunctionCall [
+  ///       UntracedSegment {
+  ///         symbol: main()
+  ///         nestedCall: FunctionCall [ # this untraced segment has a nested
+  ///         call
+  ///           TracedSegment {
+  ///             symbol: foo()
+  ///             fromInstructionId: 1
+  ///             toInstructionId: 10
+  ///             nestedCall: none # this doesn't have a nested call
+  ///           }
+  ///         }
+  ///       ],
+  ///       TracedSegment {
+  ///         symbol: main()
+  ///         fromInstructionId: 11
+  ///         toInstructionId: 20
+  ///         nestedCall: none # this also doesn't have a nested call
+  ///       }
+  ///   ]
+  ///
+  ///   We can see the nested structure and how instructions are represented as
+  ///   segments.
+  ///
+  ///
+  ///   Returns:
+  ///     Code doesn't always behave intuitively. Some interesting functions
+  ///     might modify the stack and thus change the behavior of common
+  ///     instructions like CALL and RET. We try to identify these cases, and
+  ///     the result is that the return edge from a segment might connect with a
+  ///     function call very high the stack. For example, you might have
+  ///
+  ///     main()
+  ///       foo()
+  ///         bar()
+  ///         # here bar modifies the stack and pops foo() from it. Then it
+  ///         finished the a RET (return)
+  ///     main() # we came back directly to main()
+  ///
+  ///     I have observed some trampolines doing this, as well as some std
+  ///     functions (like ostream functions). So consumers should be aware of
+  ///     this.
+  ///
+  ///     There are all sorts of "abnormal" behaviors you can see in code, and
+  ///     whenever we fail at identifying what's going on, we prefer to create a
+  ///     new tree.
+  ///
+  ///   Function call forest:
+  ///     A single tree would suffice if a trace didn't contain errors nor
+  ///     abnormal behaviors that made our algorithms fail. Sadly these
+  ///     anomalies exist and we prefer not to use too many heuristics and
+  ///     probably end up lying to the user. So we create a new tree from the
+  ///     point we can't continue using the previous tree. This results in
+  ///     having a forest instead of a single tree. This is probably the best we
+  ///     can do if we consumers want to use this data to perform performance
+  ///     analysis or reverse debugging.
+  ///
+  ///   Non-functions:
+  ///     Not everything in a program is a function. There are blocks of
+  ///     instructions that are simply labeled or even regions without symbol
+  ///     information that we don't what they are. We treat all of them as
+  ///     functions for simplicity.
+  ///
+  ///   Errors:
+  ///     Whenever an error is found, a new tree with a single segment is
+  ///     created. All consecutive errors after the original one are then
+  ///     appended to this segment. As a note, something that GDB does is to use
+  ///     some heuristics to merge trees that were interrupted by errors. We are
+  ///     leaving that out of scope until a feature like that one is really
+  ///     needed.
+  struct FunctionCall {
+    struct TracedSegment {
+      /// Delimiting instruction IDs taken chronologically.
+      lldb::user_id_t from_id;
+      lldb::user_id_t to_id;
+      /// The symbol information of the delimiting instructions.
+      SymbolInfo first_symbol_info;
+      SymbolInfo last_symbol_info;
+      /// A nested call starting at the end of this segment.
+      llvm::Optional<FunctionCallUP> nested_call;
+      /// Owning call
+      FunctionCall &call;
+
+      /// \param[in] cursor_sp
+      ///   A cursor pointing to the beginning of the segment.
+      ///
+      /// \param[in] symbol_info
+      ///   The symbol information of the first instruction of the segment.
+      ///
+      /// \param[in] call
+      ///   The FunctionCall object that owns this segment.
+      TracedSegment(const lldb::TraceCursorSP &cursor_sp,
+                    const SymbolInfo &symbol_info, FunctionCall &call)
+          : from_id(cursor_sp->GetId()), to_id(cursor_sp->GetId()),
+            first_symbol_info(symbol_info), last_symbol_info(symbol_info),
+            call(call) {}
+
+      /// Append a new instruction to this segment.
+      ///
+      /// \param[in] cursor_sp
+      ///   A cursor pointing to the new instruction.
+      ///
+      /// \param[in] symbol_info
+      ///   The symbol information of the new instruction.
+      void AppendInsn(const lldb::TraceCursorSP &cursor_sp,
+                      const SymbolInfo &symbol_info);
+
+      /// Create a nested call at the end of this segment.
+      ///
+      /// \param[in] cursor_sp
+      ///   A cursor pointing to the first instruction of the nested call.
+      ///
+      /// \param[in] symbol_info
+      ///   The symbol information of the first instruction of the nested call.
+      FunctionCall &CreateNestedCall(const lldb::TraceCursorSP &cursor_sp,
+                                     const SymbolInfo &symbol_info);
+
+      TracedSegment(const TracedSegment &) = delete;
+      TracedSegment &operator=(TracedSegment const &);
+    };
+
+    struct UntracedSegment {
+      FunctionCallUP nested_call;
+      /// Note: Untraced segments can only exist if have also seen a traced
+      /// segment of the same function call. Thus, we can use those traced
+      /// segments if we want symbol information and such.
+
+      UntracedSegment(FunctionCallUP &&nested_call)
+          : nested_call(std::move(nested_call)) {}
+
+      UntracedSegment(const UntracedSegment &) = delete;
+      UntracedSegment &operator=(UntracedSegment const &);
+    };
+
+    /// Create a new function call given an instruction. This will also create a
+    /// segment for that instruction.
+    ///
+    /// \param[in] cursor_sp
+    ///   A cursor pointing to the first instruction of that function call.
+    ///
+    /// \param[in] symbol_info
+    ///   The symbol information of that first instruction.
+    FunctionCall(const lldb::TraceCursorSP &cursor_sp,
+                 const SymbolInfo &symbol_info);
+
+    /// Append a new traced segment to this funciton call.
+    ///
+    /// \param[in] cursor_sp
+    ///   A cursor pointing to the first instruction of the new segment.
+    ///
+    /// \param[in] symbol_info
+    ///   The symbol information of that first instruction.
+    void AppendSegment(const lldb::TraceCursorSP &cursor_sp,
+                       const SymbolInfo &symbol_info);
+
+    /// \return
+    ///   The symbol info of the last traced instruction of this call.
+    const SymbolInfo &GetLastSymbolInfo() const;
+
+    /// An optional untraced segment that precedes all the traced segments.
+    llvm::Optional<UntracedSegment> untraced_segment;
+    /// The traced segments in order. We used a deque to prevent moving these
+    /// objects when appending to the list, which would happen with vector.
+    std::deque<TracedSegment> traced_segments;
+    /// The parent call, which might be null. Useful for reconstructing
+    /// callstacks.
+    FunctionCall *parent_call = nullptr;
+    /// Whether this call represents a list of consecutive errors.
+    bool is_error;
+  };
+
   /// Interface used to abstract away the format in which the instruction
   /// information will be dumped.
   class OutputWriter {
@@ -84,6 +316,10 @@
 
     /// Dump a trace item (instruction, error or event).
     virtual void TraceItem(const TraceItem &item) = 0;
+
+    /// Dump a function call forest.
+    virtual void FunctionCallForest(const std::vector<FunctionCallUP> &forest) {
+    }
   };
 
   /// Create a instruction dumper for the cursor.
@@ -113,6 +349,9 @@
   ///     if no instructions were visited.
   llvm::Optional<lldb::user_id_t> DumpInstructions(size_t count);
 
+  /// Dump all function calls forwards chronologically and hierarchically
+  void DumpFunctionCalls();
+
 private:
   /// Create a trace item for the current position without symbol information.
   TraceItem CreatRawTraceItem();

_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

[Lldb-commits] [PATCH] D135917: [lldb][trace] Add a basic function call dump [2] - Implement the reconstruction algorithm

Reply via email to