In perf_event_attr a new bit 'write_backward' is appended to indicate this event should write ring buffer from its end to beginning.
In perf_output_begin(), prepare ring buffer according this bit. This patch introduces small overhead into perf_output_begin(): an extra memory read and a conditional branch. Further patch can remove this overhead by using custom output handler. Signed-off-by: Wang Nan <wangn...@huawei.com> Cc: He Kuang <heku...@huawei.com> Cc: Alexei Starovoitov <a...@kernel.org> Cc: Arnaldo Carvalho de Melo <a...@redhat.com> Cc: Brendan Gregg <brendan.d.gr...@gmail.com> Cc: Ingo Molnar <mi...@redhat.com> Cc: Jiri Olsa <jo...@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu...@hitachi.com> Cc: Namhyung Kim <namhy...@kernel.org> Cc: Peter Zijlstra <pet...@infradead.org> Cc: Zefan Li <lize...@huawei.com> Cc: pi3or...@163.com --- include/linux/perf_event.h | 5 +++++ include/uapi/linux/perf_event.h | 3 ++- kernel/events/core.c | 7 +++++++ kernel/events/ring_buffer.c | 2 ++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a9d8cab..88e2b47 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1031,6 +1031,11 @@ static inline bool has_aux(struct perf_event *event) return event->pmu->setup_aux; } +static inline bool is_write_backward(struct perf_event *event) +{ + return !!event->attr.write_backward; +} + extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); extern void perf_output_end(struct perf_output_handle *handle); diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index a3c1903..43fc8d2 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -340,7 +340,8 @@ struct perf_event_attr { comm_exec : 1, /* flag comm events that are due to an exec */ use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ - __reserved_1 : 37; + write_backward : 1, /* Write ring buffer from end to beginning */ + __reserved_1 : 36; union { __u32 wakeup_events; /* wakeup every n events */ diff --git a/kernel/events/core.c b/kernel/events/core.c index ed94c91..513b556 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8196,6 +8196,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) goto out; /* + * Either writing ring buffer from beginning or from end. + * Mixing is not allowed. + */ + if (is_write_backward(output_event) != is_write_backward(event)) + goto out; + + /* * If both events generate aux data, they must be on the same PMU */ if (has_aux(event) && has_aux(output_event) && diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 37c11c6..80b1fa7 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -233,6 +233,8 @@ out: int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size) { + if (unlikely(is_write_backward(event))) + return __perf_output_begin(handle, event, size, true); return __perf_output_begin(handle, event, size, false); } -- 1.8.3.4