From: Andi Kleen <a...@linux.intel.com> With perf script --itrace=cr we can synthesize calls and returns out of a PT log. However both calls and returns are marked with the same event, called branches. This makes it difficult to read and post process, because calls and returns are somewhat diffferent.
Create a separate return event and mark the returns as return. Cc: adrian.hun...@intel.com v2: Add extra filter for returns. Signed-off-by: Andi Kleen <a...@linux.intel.com> --- tools/perf/util/intel-pt.c | 61 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 137196990012..c72b9074e86e 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -82,9 +82,13 @@ struct intel_pt { u64 instructions_id; bool sample_branches; + bool sample_returns; u32 branches_filter; u64 branches_sample_type; + u64 returns_sample_type; + u32 returns_filter; u64 branches_id; + u64 returns_id; bool sample_transactions; u64 transactions_sample_type; @@ -960,7 +964,9 @@ static int intel_pt_inject_event(union perf_event *event, return perf_event__synthesize_sample(event, type, 0, sample, swapped); } -static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) +static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq, + bool is_return, + u32 filter) { int ret; struct intel_pt *pt = ptq->pt; @@ -971,7 +977,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) struct branch_entry entries; } dummy_bs; - if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) + if (filter && !(filter & ptq->flags)) return 0; if (pt->synth_opts.initial_skip && @@ -990,8 +996,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.pid = ptq->pid; sample.tid = ptq->tid; sample.addr = ptq->state->to_ip; - sample.id = ptq->pt->branches_id; - sample.stream_id = ptq->pt->branches_id; + if (is_return) { + sample.id = ptq->pt->returns_id; + sample.stream_id = ptq->pt->returns_id; + } else { + sample.id = ptq->pt->branches_id; + sample.stream_id = ptq->pt->branches_id; + } sample.period = 1; sample.cpu = ptq->cpu; sample.flags = ptq->flags; @@ -1014,6 +1025,8 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (pt->synth_opts.inject) { ret = intel_pt_inject_event(event, &sample, + is_return ? + pt->returns_sample_type : pt->branches_sample_type, pt->synth_needs_swap); if (ret) @@ -1241,7 +1254,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) thread_stack__set_trace_nr(ptq->thread, state->trace_nr); if (pt->sample_branches) { - err = intel_pt_synth_branch_sample(ptq); + err = intel_pt_synth_branch_sample(ptq, false, + pt->branches_filter); + if (err) + return err; + } + + if (pt->sample_returns) { + err = intel_pt_synth_branch_sample(ptq, true, + pt->returns_filter); if (err) return err; } @@ -1956,7 +1977,33 @@ static int intel_pt_synth_events(struct intel_pt *pt, } pt->sample_branches = true; pt->branches_sample_type = attr.sample_type; - pt->branches_id = id; + pt->branches_id = id++; + } + if (pt->synth_opts.returns) { + attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + attr.sample_period = 1; + attr.sample_type |= PERF_SAMPLE_ADDR; + attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; + attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK; + pr_debug("Synthesizing 'return' event with id %" PRIu64 " sample type %#" PRIx64 "\n", + id, (u64)attr.sample_type); + err = intel_pt_synth_event(session, &attr, id); + if (err) { + pr_err("%s: failed to synthesize 'return' event type\n", + __func__); + return err; + } + pt->sample_returns = true; + pt->returns_sample_type = attr.sample_type; + pt->returns_id = id; + evlist__for_each(evlist, evsel) { + if (evsel->id && evsel->id[0] == pt->returns_id) { + if (evsel->name) + zfree(&evsel->name); + evsel->name = strdup("return"); + break; + } + } } pt->synth_needs_swap = evsel->needs_swap; @@ -2155,7 +2202,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_TRACE_END; if (pt->synth_opts.returns) - pt->branches_filter |= PERF_IP_FLAG_RETURN | + pt->returns_filter |= PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_BEGIN; if (pt->synth_opts.callchain && !symbol_conf.use_callchain) { -- 2.5.5