The current implementation of the python database export API only includes call path information when using some form of call/return tracing, but is unable to do so when sampling.
The following API extensions allow exporting of data collected by perf record when using --call-graph. The additions to the python api include the following: - add call_path_id to sample_table to allow association of samples with call paths - add dso_id to call_path_table to more closely align the data with that of a callchain_node db-export and trace-script-python were both extended to accomidate the API changes listed above. Thread-stack's functionality was expanded to allow storage and exporting of callchains that result from individual samples. - Introduced a new static function (thread_stack__process_callchain) to resolve call paths using the existing callchain resolution provided by thread__resolve_callchain - The existing call_path tree in call_return_processor is used for storing the data from the resolved callchain. - Call_return_processor was also extended to allow the ability to export full call paths in addtion to the existing individual call/return pairs, since call/return pairs are not available when doing sampling. The code was tested using call graphs from fp and dwarf. export-to-postgresqlwas utilized with intel-pt data to verify that changes did not negatively affect existing behavior of the db-export api. Signed-off-by: Chris Phlipot <cphlip...@gmail.com> --- tools/perf/util/db-export.c | 21 ++- tools/perf/util/db-export.h | 2 + .../util/scripting-engines/trace-event-python.c | 20 ++- tools/perf/util/thread-stack.c | 162 +++++++++++++++++---- tools/perf/util/thread-stack.h | 14 +- 5 files changed, 184 insertions(+), 35 deletions(-) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 049438d..69c9a9d 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -329,6 +329,13 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, if (err) goto out_put; + dbe->call_path_last_seen_db_id = 0; + if(dbe->crp) { + thread_stack__process_callchain(thread, comm, evsel, + al->machine, sample, + PERF_MAX_STACK_DEPTH, dbe->crp); + } + if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && sample_addr_correlates_sym(&evsel->attr)) { struct addr_location addr_al; @@ -346,6 +353,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, goto out_put; } } + es.call_path_db_id = dbe->call_path_last_seen_db_id; if (dbe->export_sample) err = dbe->export_sample(dbe, &es); @@ -397,9 +405,10 @@ int db_export__branch_types(struct db_export *dbe) int db_export__call_path(struct db_export *dbe, struct call_path *cp) { int err; - - if (cp->db_id) + if (cp->db_id) { + dbe->call_path_last_seen_db_id = cp->db_id; return 0; + } if (cp->parent) { err = db_export__call_path(dbe, cp->parent); @@ -409,8 +418,14 @@ int db_export__call_path(struct db_export *dbe, struct call_path *cp) cp->db_id = ++dbe->call_path_last_db_id; - if (dbe->export_call_path) + if (dbe->export_call_path) { + if (cp->dso) + db_export__dso(dbe, cp->dso, cp->machine); + if (cp->sym && cp->dso) + db_export__symbol(dbe, cp->sym, cp->dso); + dbe->call_path_last_seen_db_id = cp->db_id; return dbe->export_call_path(dbe, cp); + } return 0; } diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index 25e22fd..40e3b07 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -43,6 +43,7 @@ struct export_sample { u64 addr_dso_db_id; u64 addr_sym_db_id; u64 addr_offset; /* addr offset from symbol start */ + u64 call_path_db_id; }; struct db_export { @@ -73,6 +74,7 @@ struct db_export { u64 symbol_last_db_id; u64 sample_last_db_id; u64 call_path_last_db_id; + u64 call_path_last_seen_db_id; /* last db_id seen(exported or not) */ u64 call_return_last_db_id; struct list_head deferred; }; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 525eb49..ca3f9c6 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -681,7 +681,7 @@ static int python_export_sample(struct db_export *dbe, struct tables *tables = container_of(dbe, struct tables, dbe); PyObject *t; - t = tuple_new(21); + t = tuple_new(22); tuple_set_u64(t, 0, es->db_id); tuple_set_u64(t, 1, es->evsel->db_id); @@ -704,6 +704,8 @@ static int python_export_sample(struct db_export *dbe, tuple_set_u64(t, 18, es->sample->data_src); tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK); tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX)); + tuple_set_s32(t, 21, es->call_path_db_id); + call_object(tables->sample_handler, t, "sample_table"); @@ -716,17 +718,19 @@ static int python_export_call_path(struct db_export *dbe, struct call_path *cp) { struct tables *tables = container_of(dbe, struct tables, dbe); PyObject *t; - u64 parent_db_id, sym_db_id; + u64 parent_db_id, sym_db_id, dso_db_id; parent_db_id = cp->parent ? cp->parent->db_id : 0; sym_db_id = cp->sym ? *(u64 *)symbol__priv(cp->sym) : 0; + dso_db_id = cp->dso ? cp->dso->db_id : 0; - t = tuple_new(4); + t = tuple_new(5); tuple_set_u64(t, 0, cp->db_id); tuple_set_u64(t, 1, parent_db_id); tuple_set_u64(t, 2, sym_db_id); tuple_set_u64(t, 3, cp->ip); + tuple_set_u64(t, 4, dso_db_id); call_object(tables->call_path_handler, t, "call_path_table"); @@ -763,6 +767,13 @@ static int python_export_call_return(struct db_export *dbe, return 0; } +static int python_process_call_path(struct call_path *cp, void *data) +{ + struct db_export *dbe = data; + + return db_export__call_path(dbe, cp); +} + static int python_process_call_return(struct call_return *cr, void *data) { struct db_export *dbe = data; @@ -1027,7 +1038,8 @@ static void set_table_handlers(struct tables *tables) if (export_calls) { tables->dbe.crp = - call_return_processor__new(python_process_call_return, + call_return_processor__new(python_process_call_path, + python_process_call_return, &tables->dbe); if (!tables->dbe.crp) Py_FatalError("failed to create calls processor"); diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 679688e..38a749d 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -22,6 +22,7 @@ #include "debug.h" #include "symbol.h" #include "comm.h" +#include "callchain.h" #include "thread-stack.h" #define CALL_PATH_BLOCK_SHIFT 8 @@ -56,7 +57,8 @@ struct call_path_root { */ struct call_return_processor { struct call_path_root *cpr; - int (*process)(struct call_return *cr, void *data); + int (*process_call_path)(struct call_path *cp, void *data); + int (*process_call_return)(struct call_return *cr, void *data); void *data; }; @@ -216,7 +218,7 @@ static int thread_stack__call_return(struct thread *thread, if (no_return) cr.flags |= CALL_RETURN_NO_RETURN; - return crp->process(&cr, crp->data); + return crp->process_call_return(&cr, crp->data); } static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) @@ -336,9 +338,12 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, } static void call_path__init(struct call_path *cp, struct call_path *parent, + struct machine *machine, struct dso *dso, struct symbol *sym, u64 ip, bool in_kernel) { cp->parent = parent; + cp->machine = machine; + cp->dso = dso; cp->sym = sym; cp->ip = sym ? 0 : ip; cp->db_id = 0; @@ -354,7 +359,7 @@ static struct call_path_root *call_path_root__new(void) cpr = zalloc(sizeof(struct call_path_root)); if (!cpr) return NULL; - call_path__init(&cpr->call_path, NULL, NULL, 0, false); + call_path__init(&cpr->call_path, NULL, NULL, NULL, NULL, 0, false); INIT_LIST_HEAD(&cpr->blocks); return cpr; } @@ -372,8 +377,9 @@ static void call_path_root__free(struct call_path_root *cpr) static struct call_path *call_path__new(struct call_path_root *cpr, struct call_path *parent, - struct symbol *sym, u64 ip, - bool in_kernel) + struct machine *machine, + struct dso *dso, struct symbol *sym, + u64 ip, bool in_kernel) { struct call_path_block *cpb; struct call_path *cp; @@ -393,14 +399,16 @@ static struct call_path *call_path__new(struct call_path_root *cpr, n = cpr->next++ & CALL_PATH_BLOCK_MASK; cp = &cpb->cp[n]; - call_path__init(cp, parent, sym, ip, in_kernel); + call_path__init(cp, parent, machine, dso, sym, ip, in_kernel); return cp; } static struct call_path *call_path__findnew(struct call_path_root *cpr, struct call_path *parent, - struct symbol *sym, u64 ip, u64 ks) + struct machine *machine, + struct dso *dso, struct symbol *sym, + u64 ip, u64 ks) { struct rb_node **p; struct rb_node *node_parent = NULL; @@ -411,23 +419,28 @@ static struct call_path *call_path__findnew(struct call_path_root *cpr, ip = 0; if (!parent) - return call_path__new(cpr, parent, sym, ip, in_kernel); + return call_path__new(cpr, parent, machine, dso, sym, ip, + in_kernel); p = &parent->children.rb_node; while (*p != NULL) { node_parent = *p; cp = rb_entry(node_parent, struct call_path, rb_node); - if (cp->sym == sym && cp->ip == ip) + if (cp->sym == sym && cp->ip == ip && cp->dso == dso) return cp; - if (sym < cp->sym || (sym == cp->sym && ip < cp->ip)) + if (sym < cp->sym || (sym == cp->sym && ip < cp->ip) || + (sym == cp->sym && ip == cp->ip + && dso < cp->dso) || + (sym == cp->sym && ip == cp->ip + && dso == cp->dso && machine < cp->machine)) p = &(*p)->rb_left; else p = &(*p)->rb_right; } - cp = call_path__new(cpr, parent, sym, ip, in_kernel); + cp = call_path__new(cpr, parent, machine, dso, sym, ip, in_kernel); if (!cp) return NULL; @@ -438,7 +451,10 @@ static struct call_path *call_path__findnew(struct call_path_root *cpr, } struct call_return_processor * -call_return_processor__new(int (*process)(struct call_return *cr, void *data), +call_return_processor__new(int (*process_call_path)(struct call_path *cp, + void *data), + int (*process_call_return)(struct call_return *cr, + void *data), void *data) { struct call_return_processor *crp; @@ -449,7 +465,8 @@ call_return_processor__new(int (*process)(struct call_return *cr, void *data), crp->cpr = call_path_root__new(); if (!crp->cpr) goto out_free; - crp->process = process; + crp->process_call_path = process_call_path; + crp->process_call_return = process_call_return; crp->data = data; return crp; @@ -492,7 +509,7 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, u64 ret_addr, u64 timestamp, u64 ref, - struct symbol *sym) + struct dso *dso, struct symbol *sym) { int err; @@ -502,7 +519,7 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, if (ts->cnt == 1) { struct thread_stack_entry *tse = &ts->stack[0]; - if (tse->cp->sym == sym) + if (tse->cp->dso == dso && tse->cp->sym == sym) return thread_stack__call_return(thread, ts, --ts->cnt, timestamp, ref, false); } @@ -540,20 +557,28 @@ static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts, { struct call_path_root *cpr = ts->crp->cpr; struct call_path *cp; + struct machine *machine; + struct dso *dso = NULL; struct symbol *sym; u64 ip; if (sample->ip) { ip = sample->ip; sym = from_al->sym; + if (from_al->map) + dso = from_al->map->dso; + machine = from_al->machine; } else if (sample->addr) { ip = sample->addr; sym = to_al->sym; + if (to_al->map) + dso = to_al->map->dso; + machine = to_al->machine; } else { return 0; } - cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, + cp = call_path__findnew(cpr, &cpr->call_path, machine, dso, sym, ip, ts->kernel_start); if (!cp) return -ENOMEM; @@ -586,6 +611,7 @@ static int thread_stack__no_call_return(struct thread *thread, /* If the stack is empty, push the userspace address */ if (!ts->cnt) { cp = call_path__findnew(cpr, &cpr->call_path, + to_al->machine, to_al->map->dso, to_al->sym, sample->addr, ts->kernel_start); if (!cp) @@ -610,7 +636,8 @@ static int thread_stack__no_call_return(struct thread *thread, parent = &cpr->call_path; /* This 'return' had no 'call', so push and pop top of stack */ - cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip, + cp = call_path__findnew(cpr, parent, from_al->machine, + from_al->map->dso, from_al->sym, sample->ip, ts->kernel_start); if (!cp) return -ENOMEM; @@ -621,7 +648,7 @@ static int thread_stack__no_call_return(struct thread *thread, return err; return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref, - to_al->sym); + to_al->map->dso, to_al->sym); } static int thread_stack__trace_begin(struct thread *thread, @@ -636,7 +663,7 @@ static int thread_stack__trace_begin(struct thread *thread, /* Pop trace end */ tse = &ts->stack[ts->cnt - 1]; - if (tse->cp->sym == NULL && tse->cp->ip == 0) { + if (tse->cp->dso == NULL && tse->cp->sym == NULL && tse->cp->ip == 0) { err = thread_stack__call_return(thread, ts, --ts->cnt, timestamp, ref, false); if (err) @@ -657,7 +684,7 @@ static int thread_stack__trace_end(struct thread_stack *ts, if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref)) return 0; - cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, + cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, NULL, NULL, 0, ts->kernel_start); if (!cp) return -ENOMEM; @@ -668,14 +695,11 @@ static int thread_stack__trace_end(struct thread_stack *ts, false); } -int thread_stack__process(struct thread *thread, struct comm *comm, - struct perf_sample *sample, - struct addr_location *from_al, - struct addr_location *to_al, u64 ref, - struct call_return_processor *crp) +static int __thread_stack__process_init(struct thread *thread, + struct comm *comm, + struct call_return_processor *crp) { struct thread_stack *ts = thread->ts; - int err = 0; if (ts) { if (!ts->crp) { @@ -694,6 +718,80 @@ int thread_stack__process(struct thread *thread, struct comm *comm, ts = thread->ts; ts->comm = comm; } + return 0; +} + +int thread_stack__process_callchain(struct thread *thread, struct comm *comm, + struct perf_evsel *evsel, + struct machine *machine, + struct perf_sample *sample, int max_stack, + struct call_return_processor *crp) +{ + struct call_path *current = &crp->cpr->call_path; + struct thread_stack *ts = NULL; + enum chain_order saved_order = callchain_param.order; + int err = 0; + + if (!symbol_conf.use_callchain || !sample->callchain) + return err; + + err = __thread_stack__process_init(thread, comm, crp); + if(err) + return err; + + ts = thread->ts; + + + callchain_param.order = ORDER_CALLER; + err = thread__resolve_callchain(thread, &callchain_cursor, evsel, + sample, NULL, NULL, max_stack); + if (err) { + callchain_param.order = saved_order; + return err; + } + callchain_cursor_commit(&callchain_cursor); + + while (1) { + struct callchain_cursor_node *node; + struct dso *dso = NULL; + node = callchain_cursor_current(&callchain_cursor); + if (!node) + break; + if (node->map) + dso = node->map->dso; + + current = call_path__findnew(ts->crp->cpr, current, machine, + dso, node->sym, node->ip, + ts->kernel_start); + + callchain_cursor_advance(&callchain_cursor); + } + callchain_param.order = saved_order; + + if (current == &crp->cpr->call_path) { + /* Bail because the callchain was empty. */ + return 1; + } + + err = ts->crp->process_call_path(current,ts->crp->data); + return err; +} + +int thread_stack__process(struct thread *thread, struct comm *comm, + struct perf_sample *sample, + struct addr_location *from_al, + struct addr_location *to_al, u64 ref, + struct call_return_processor *crp) +{ + struct thread_stack *ts = NULL; + + int err = 0; + + err = __thread_stack__process_init(thread, comm, crp); + if(err) + return err; + + ts = thread->ts; /* Flush stack on exec */ if (ts->comm != comm && thread->pid_ == thread->tid) { @@ -717,8 +815,12 @@ int thread_stack__process(struct thread *thread, struct comm *comm, if (sample->flags & PERF_IP_FLAG_CALL) { struct call_path_root *cpr = ts->crp->cpr; struct call_path *cp; + struct dso *dso = NULL; u64 ret_addr; + if(to_al->map) + dso = to_al->map->dso; + if (!sample->ip || !sample->addr) return 0; @@ -727,6 +829,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, return 0; /* Zero-length calls are excluded */ cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, + to_al->machine, dso, to_al->sym, sample->addr, ts->kernel_start); if (!cp) @@ -734,11 +837,16 @@ int thread_stack__process(struct thread *thread, struct comm *comm, err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, false); } else if (sample->flags & PERF_IP_FLAG_RETURN) { + struct dso *dso = NULL; + if(from_al->map) + dso = from_al->map->dso; + if (!sample->ip || !sample->addr) return 0; err = thread_stack__pop_cp(thread, ts, sample->addr, - sample->time, ref, from_al->sym); + sample->time, ref, dso, + from_al->sym); if (err) { if (err < 0) return err; diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index e1528f1..7b9615e 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -26,6 +26,7 @@ struct comm; struct ip_callchain; struct symbol; struct dso; +struct machine; struct call_return_processor; struct comm; struct perf_sample; @@ -83,6 +84,8 @@ struct call_return { */ struct call_path { struct call_path *parent; + struct machine *machine; + struct dso *dso; struct symbol *sym; u64 ip; u64 db_id; @@ -100,9 +103,18 @@ int thread_stack__flush(struct thread *thread); void thread_stack__free(struct thread *thread); struct call_return_processor * -call_return_processor__new(int (*process)(struct call_return *cr, void *data), +call_return_processor__new(int (*process_call_path)(struct call_path *cp, + void *data), + int (*process_call_return)(struct call_return *cr, + void *data), void *data); void call_return_processor__free(struct call_return_processor *crp); + +int thread_stack__process_callchain(struct thread *thread, struct comm *comm, + struct perf_evsel *evsel, + struct machine *machine, + struct perf_sample *sample, int max_stack, + struct call_return_processor *crp); int thread_stack__process(struct thread *thread, struct comm *comm, struct perf_sample *sample, struct addr_location *from_al, -- 2.7.4