On Thu, Apr 24, 2025 at 5:53 PM Serhei Makarov <ser...@serhei.io> wrote: > > Changes for v4: > > - Separate out libdwfl_stacktrace, as requested. > > Changes for v3: > > - use const void *stack, not void *, to allow users > to pass a const stack sample > > Changes for v2: > > - use renamed __libdwfl_set_initial_registers_thread > > - oops, should use provided sample_arg->perf_regs_mask > in sample_set_initial registers > > * * * > > This is a new interface for unwinding that doesn't require the Dwfl to > be attached to a live process (via ptrace) or via corefile. Instead, > data from a perf_events stack sample is provided along with an Elf > struct used to identify the architecture. Based on code from > eu-stacktrace. > > * libdwfl_stacktrace/libdwfl_stacktrace.h (dwflst_perf_sample_getframes): > New function. > * libdwfl_stacktrace/dwflst_perf_frame.c > (struct __libdwfl_stacktrace_perf_sample_info): New struct, based on > src/stacktrace.c struct sample_arg. > (sample_next_thread): New function, based on src/stacktrace.c. > (sample_getthread): Ditto. > (copy_word_64): New macro, based on src/stacktrace.c. > (copy_word_32): Ditto. > (copy_word): Ditto. > (elf_memory_read): New function, based on src/stacktrace.c. > (sample_memory_read): Ditto. > (sample_set_initial_registers): Ditto. > (sample_detach): Ditto. > (sample_thread_callbacks): New struct, set of callbacks based on > src/stacktrace.c sample_thread_callbacks. > (dwflst_perf_sample_getframes): New function, based on parts of > src/stacktrace.c sysprof_find_dwfl. If the Dwfl is not attached, > attaches it with sample_thread_callbacks and > __libdwfl_stacktrace_perf_sample_info. Populates the > __libdwfl_stacktrace_perf_sample_info with data from the stack > sample and calls dwfl_getthread_frames to unwind it using the > sample_thread_callbacks. > * libdw/libdw.map (ELFUTILS_0.193): Add dwflst_perf_sample_getframes. > --- > libdw/libdw.map | 1 + > libdwfl_stacktrace/dwflst_perf_frame.c | 193 +++++++++++++++++++++++- > libdwfl_stacktrace/libdwfl_stacktrace.h | 15 +- > 3 files changed, 207 insertions(+), 2 deletions(-) > > diff --git a/libdw/libdw.map b/libdw/libdw.map > index 688e415c..137b5738 100644 > --- a/libdw/libdw.map > +++ b/libdw/libdw.map > @@ -405,4 +405,5 @@ ELFUTILS_0.193_EXPERIMENTAL { > dwflst_module_gettracker; > dwflst_tracker_linux_proc_find_elf; > dwflst_tracker_find_pid; > + dwflst_perf_sample_getframes; > }; > diff --git a/libdwfl_stacktrace/dwflst_perf_frame.c > b/libdwfl_stacktrace/dwflst_perf_frame.c > index 79e8e482..591097e8 100644 > --- a/libdwfl_stacktrace/dwflst_perf_frame.c > +++ b/libdwfl_stacktrace/dwflst_perf_frame.c > @@ -60,4 +60,195 @@ uint64_t dwflst_perf_sample_preferred_regs_mask > (GElf_Half machine) > return 0; > } > > -/* XXX dwflst_perf_sample_getframes to be added in subsequent patch */ > +struct __libdwfl_stacktrace_perf_sample_info {
Should this have a __libdwfl prefix? It isn't a struct meant for internal libdwfl use. I don't think it should have a __libdwflst prefix either since this struct is local to this file. > + pid_t pid; > + pid_t tid; > + Dwarf_Addr base_addr; > + const uint8_t *stack; > + size_t stack_size; > + const Dwarf_Word *regs; > + uint n_regs; > + uint64_t perf_regs_mask; > + uint abi; > + Dwarf_Addr pc; > +}; > + > +/* The next few functions imitate the corefile interface for a single > + stack sample, with very restricted access to registers and memory. */ > + > +/* Just yield the single thread id matching the sample. */ > +static pid_t > +sample_next_thread (Dwfl *dwfl __attribute__ ((unused)), void *dwfl_arg, > + void **thread_argp) > +{ > + struct __libdwfl_stacktrace_perf_sample_info *sample_arg = > + (struct __libdwfl_stacktrace_perf_sample_info *)dwfl_arg; > + if (*thread_argp == NULL) > + { > + *thread_argp = (void *)0xea7b3375; > + return sample_arg->tid; > + } > + else > + return 0; > +} > + > +/* Just check that the thread id matches the sample. */ > +static bool > +sample_getthread (Dwfl *dwfl __attribute__ ((unused)), pid_t tid, > + void *dwfl_arg, void **thread_argp) > +{ > + struct __libdwfl_stacktrace_perf_sample_info *sample_arg = > + (struct __libdwfl_stacktrace_perf_sample_info *)dwfl_arg; > + *thread_argp = (void *)sample_arg; > + if (sample_arg->tid != tid) > + { > + __libdwfl_seterrno(DWFL_E_INVALID_ARGUMENT); > + return false; > + } > + return true; > +} > + > +#define copy_word_64(result, d) \ > + if ((((uintptr_t) (d)) & (sizeof (uint64_t) - 1)) == 0) \ > + *(result) = *(uint64_t *)(d); \ > + else \ > + memcpy ((result), (d), sizeof (uint64_t)); > + > +#define copy_word_32(result, d) \ > + if ((((uintptr_t) (d)) & (sizeof (uint32_t) - 1)) == 0) \ > + *(result) = *(uint32_t *)(d); \ > + else \ > + memcpy ((result), (d), sizeof (uint32_t)); > + > +#define copy_word(result, d, abi) \ > + if ((abi) == PERF_SAMPLE_REGS_ABI_64) \ > + { copy_word_64((result), (d)); } \ > + else if ((abi) == PERF_SAMPLE_REGS_ABI_32) \ > + { copy_word_32((result), (d)); } \ > + else \ > + *(result) = 0; > + > +static bool > +elf_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void *arg) > +{ > + struct __libdwfl_stacktrace_perf_sample_info *sample_arg = > + (struct __libdwfl_stacktrace_perf_sample_info *)arg; > + Dwfl_Module *mod = INTUSE(dwfl_addrmodule) (dwfl, addr); > + Dwarf_Addr bias; > + Elf_Scn *section = INTUSE(dwfl_module_address_section) (mod, &addr, &bias); > + > + if (!section) > + { > + __libdwfl_seterrno(DWFL_E_ADDR_OUTOFRANGE); > + return false; > + } > + > + Elf_Data *data = elf_getdata(section, NULL); > + if (data && data->d_buf && data->d_size > addr) { > + uint8_t *d = ((uint8_t *)data->d_buf) + addr; > + copy_word(result, d, sample_arg->abi); > + return true; > + } > + __libdwfl_seterrno(DWFL_E_ADDR_OUTOFRANGE); > + return false; > +} > + > +static bool > +sample_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void > *arg) > +{ > + struct __libdwfl_stacktrace_perf_sample_info *sample_arg = > + (struct __libdwfl_stacktrace_perf_sample_info *)arg; > + /* Imitate read_cached_memory() with the stack sample data as the cache. */ > + if (addr < sample_arg->base_addr || > + addr - sample_arg->base_addr >= sample_arg->stack_size) > + return elf_memory_read(dwfl, addr, result, arg); > + const uint8_t *d = &sample_arg->stack[addr - sample_arg->base_addr]; > + copy_word(result, d, sample_arg->abi); > + return true; > +} > + > +static bool > +sample_set_initial_registers (Dwfl_Thread *thread, void *arg) > +{ > + struct __libdwfl_stacktrace_perf_sample_info *sample_arg = > + (struct __libdwfl_stacktrace_perf_sample_info *)arg; > + INTUSE(dwfl_thread_state_register_pc) (thread, sample_arg->pc); > + Dwfl_Process *process = thread->process; > + Ebl *ebl = process->ebl; > + return ebl_set_initial_registers_sample > + (ebl, sample_arg->regs, sample_arg->n_regs, > + sample_arg->perf_regs_mask, sample_arg->abi, > + __libdwfl_set_initial_registers_thread, thread); > +} > + > +static void > +sample_detach (Dwfl *dwfl __attribute__ ((unused)), void *dwfl_arg) > +{ > + struct __libdwfl_stacktrace_perf_sample_info *sample_arg = > + (struct __libdwfl_stacktrace_perf_sample_info *)dwfl_arg; > + free (sample_arg); > +} > + > +static const Dwfl_Thread_Callbacks sample_thread_callbacks = > + { > + sample_next_thread, > + sample_getthread, > + sample_memory_read, > + sample_set_initial_registers, > + sample_detach, > + NULL, /* sample_thread_detach */ > + }; > + > +int > +dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf, > + pid_t pid, pid_t tid, > + const void *stack, size_t stack_size, > + const Dwarf_Word *regs, uint n_regs, > + uint64_t perf_regs_mask, uint abi, > + int (*callback) (Dwfl_Frame *state, void *arg), > + void *arg) > +{ > + /* TODO: Lock the dwfl to ensure attach_state does not interfere > + with other dwfl_perf_sample_getframes calls. */ > + > + struct __libdwfl_stacktrace_perf_sample_info *sample_arg; > + bool attached = false; > + if (dwfl->process != NULL) > + { > + sample_arg = dwfl->process->callbacks_arg; > + attached = true; > + } > + else > + { > + sample_arg = malloc (sizeof *sample_arg); > + if (sample_arg == NULL) > + { > + __libdwfl_seterrno(DWFL_E_NOMEM); > + return -1; > + } > + } > + > + sample_arg->pid = pid; > + sample_arg->tid = tid; > + sample_arg->stack = (const uint8_t *)stack; > + sample_arg->stack_size = stack_size; > + sample_arg->regs = regs; > + sample_arg->n_regs = n_regs; > + sample_arg->perf_regs_mask = perf_regs_mask; > + sample_arg->abi = abi; > + > + if (! attached > + && ! INTUSE(dwfl_attach_state) (dwfl, elf, pid, > + &sample_thread_callbacks, sample_arg)) > + return -1; > + > + /* Now that Dwfl is attached, we can access its Ebl: */ > + Dwfl_Process *process = dwfl->process; > + Ebl *ebl = process->ebl; > + sample_arg->base_addr = ebl_sample_base_addr(ebl, regs, n_regs, > + perf_regs_mask, abi); > + sample_arg->pc = ebl_sample_pc(ebl, regs, n_regs, > + perf_regs_mask, abi); > + > + return INTUSE(dwfl_getthread_frames) (dwfl, tid, callback, arg); > +} > diff --git a/libdwfl_stacktrace/libdwfl_stacktrace.h > b/libdwfl_stacktrace/libdwfl_stacktrace.h > index ed6a6a5c..ed0b612f 100644 > --- a/libdwfl_stacktrace/libdwfl_stacktrace.h > +++ b/libdwfl_stacktrace/libdwfl_stacktrace.h > @@ -107,7 +107,20 @@ extern int dwflst_tracker_linux_proc_find_elf > (Dwfl_Module *mod, void **userdata > char **file_name, Elf **); > > > -/* XXX dwflst_perf_sample_getframes to be added in subsequent patch */ > +/* Like dwfl_thread_getframes, but iterates through the frames for a > + linux perf_events stack sample rather than a live thread. Calls > + dwfl_attach_state on DWFL, with architecture specified by ELF, ELF > + must remain valid during Dwfl lifetime. Returns zero if all frames > + have been processed by the callback, returns -1 on error, or the > + value of the callback when not DWARF_CB_OK. -1 returned on error > + will set dwfl_errno (). */ > +int dwflst_perf_sample_getframes (Dwfl *dwfl, Elf *elf, pid_t pid, pid_t tid, > + const void *stack, size_t stack_size, > + const Dwarf_Word *regs, uint32_t n_regs, > + uint64_t perf_regs_mask, uint32_t abi, > + int (*callback) (Dwfl_Frame *state, void > *arg), > + void *arg) > + __nonnull_attribute__ (1, 5, 7, 11); > > /* Returns the linux perf_events register mask describing a set of > registers sufficient for unwinding on MACHINE, or 0 if libdwfl does > -- > 2.47.0 > Aaron