Having following commands running concurrently: # perf record -e ftrace:function -a -o krava.data sleep 10 # perf record -e ftrace:function --filter 'ip == SyS_read' ls
will end up in the latter one to fail on the filter rules and store all functions (in perf.data) as instructed by the first perf record instead of just SyS_read records. The reason for this is, that tracepoint code by default triggers all events that registered for the tracepoint. While ftrace:function is special because ftrace_ops itself carries a filter and only the event that owns ftrace_ops is eligible to be triggered. Fixing this by using ftrace_ops::private value to keep the perf_event pointer. This way we don't need to search for triggered event (as tracepoint handler does) and directly store sample. Suggested-by: Steven Rostedt <rost...@goodmis.org> Signed-off-by: Jiri Olsa <jo...@kernel.org> --- include/linux/perf_event.h | 3 +++ kernel/events/core.c | 22 ++++++++++++++++++++++ kernel/trace/trace_event_perf.c | 10 +++------- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a9d8cab18b00..a330dc06d90d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1008,6 +1008,9 @@ extern void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, struct task_struct *task); +void perf_function_event(struct perf_event *event, + void *record, int entry_size, + struct pt_regs *regs); extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags diff --git a/kernel/events/core.c b/kernel/events/core.c index ca68fdcf47ce..18da90859c17 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7063,6 +7063,28 @@ static void perf_event_free_bpf_prog(struct perf_event *event) } } +#ifdef CONFIG_FUNCTION_TRACER +void perf_function_event(struct perf_event *event, + void *record, int entry_size, + struct pt_regs *regs) + +{ + struct perf_sample_data data; + struct perf_raw_record raw = { + .size = entry_size, + .data = record, + }; + + if (event->hw.state & PERF_HES_STOPPED) + return; + + perf_sample_data_init(&data, 0, 0); + data.raw = &raw; + + perf_swevent_event(event, 1, &data, regs); +} +#endif /* CONFIG_FUNCTION_TRACER */ + #else static inline void perf_tp_register(void) diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 0a3779bd18a1..087c811db04c 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -328,14 +328,9 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *pt_regs) { struct ftrace_entry *entry; - struct hlist_head *head; struct pt_regs regs; int rctx; - head = this_cpu_ptr(event_function.perf_events); - if (hlist_empty(head)) - return; - #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ sizeof(u64)) - sizeof(u32)) @@ -349,8 +344,8 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, entry->ip = ip; entry->parent_ip = parent_ip; - perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, - 1, ®s, head, NULL); + perf_function_event(ops->private, entry, ENTRY_SIZE, ®s); + perf_swevent_put_recursion_context(rctx); #undef ENTRY_SIZE } @@ -359,6 +354,7 @@ static int perf_ftrace_function_register(struct perf_event *event) { struct ftrace_ops *ops = &event->ftrace_ops; + ops->private = event; ops->flags |= FTRACE_OPS_FL_PER_CPU | FTRACE_OPS_FL_RCU; ops->func = perf_ftrace_function_call; return register_ftrace_function(ops); -- 2.4.3