On 22/01/2021 18:18, Alexandre Truong wrote: > +} > + > +static int add_entry(struct unwind_entry *entry, void *arg) > +{ > + struct entries *entries = arg; > + > + entries->stack[entries->i++] = entry->ip; > + return 0; > +} > + > +u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread > *thread) > +{ > + u64 leaf_frame; > + struct entries entries = {{0, 0}, 0}; > + > + if (get_leaf_frame_caller_enabled(sample)) > + return 0; > + > + unwind__get_entries(add_entry, &entries, thread, sample, 2); > + leaf_frame = callchain_param.order == ORDER_CALLER ? > + entries.stack[0] : entries.stack[1]; > + > + if (leaf_frame + 1 == sample->user_regs.regs[PERF_REG_ARM64_LR]) > + return sample->user_regs.regs[PERF_REG_ARM64_LR]; Hi Alex, >From your other reply about your investigation it looks like the check against >PERF_REG_ARM64_LR isn't required because libunwind won't return a value if it's not correct. Whether it's equal to the LR or not. And PERF_REG_ARM64_LR points to the instruction _after_ the call site. i.e. where to return to, not where the call was made from. So just leaf_frame rather than leaf_frame+1 would be more accurate. I was also looking at unwind_entry in machine.c which is similar to your add_entry function and saw that it does some extra bits like this: if (symbol_conf.hide_unresolved && entry->ms.sym == NULL) return 0; if (append_inlines(cursor, &entry->ms, entry->ip) == 0) return 0; /* * Convert entry->ip from a virtual address to an offset in * its corresponding binary. */ if (entry->ms.map) addr = map__map_ip(entry->ms.map, entry->ip); I have a feeling you will also need to do those on your values returned from libunwind to make it 100% equivalent. James > + return 0; > +} > diff --git a/tools/perf/util/arm-frame-pointer-unwind-support.h > b/tools/perf/util/arm-frame-pointer-unwind-support.h > new file mode 100644 > index 000000000000..16dc03fa9abe > --- /dev/null > +++ b/tools/perf/util/arm-frame-pointer-unwind-support.h > @@ -0,0 +1,7 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H > +#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H > + > +u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread > *thread); > + > +#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */ > diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c > index 40082d70eec1..bc6147e46c89 100644 > --- a/tools/perf/util/machine.c > +++ b/tools/perf/util/machine.c > @@ -34,6 +34,7 @@ > #include "bpf-event.h" > #include <internal/lib.h> // page_size > #include "cgroup.h" > +#include "arm-frame-pointer-unwind-support.h" > > #include <linux/ctype.h> > #include <symbol/kallsyms.h> > @@ -2671,10 +2672,12 @@ static int find_prev_cpumode(struct ip_callchain > *chain, struct thread *thread, > return err; > } > > -static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused, > - struct thread *thread __maybe_unused) > +static u64 get_leaf_frame_caller(struct perf_sample *sample, struct thread > *thread) > { > - return 0; > + if (strncmp(thread->maps->machine->env->arch, "aarch64", 7) == 0) > + return get_leaf_frame_caller_aarch64(sample, thread); > + else > + return 0; > } > > static int thread__resolve_callchain_sample(struct thread *thread, >