This patch adds support for non-linear data on raw records. It means that for such data, the newly introduced __output_custom() helper will be used instead of __output_copy(). __output_custom() will invoke whatever custom callback is passed in via struct perf_raw_record_frag to extract the data into the ring buffer slot.
To keep changes in perf_prepare_sample() and in perf_output_sample() minimal, size/size_head split was added to perf_raw_record that call sites fill out, so that two extra tests in fast-path can be avoided. The few users of raw records are adapted to initialize their size_head and frag data; no change in behavior for them. Later patch will extend BPF side with a first user and callback for this facility, future users could be things like XDP BPF programs (that work on different context though and would thus have a different callback), etc. Signed-off-by: Daniel Borkmann <dan...@iogearbox.net> Acked-by: Alexei Starovoitov <a...@kernel.org> --- arch/s390/kernel/perf_cpum_sf.c | 2 ++ arch/x86/events/amd/ibs.c | 2 ++ include/linux/perf_event.h | 8 ++++++++ kernel/events/core.c | 13 ++++++++++--- kernel/events/internal.h | 18 ++++++++++++++---- kernel/trace/bpf_trace.c | 1 + 6 files changed, 37 insertions(+), 7 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index a8e8321..99c5952 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -984,7 +984,9 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) /* Setup perf sample */ perf_sample_data_init(&data, 0, event->hw.last_period); raw.size = sfr->size; + raw.size_head = raw.size; raw.data = sfr; + raw.frag = NULL; data.raw = &raw; /* Setup pt_regs to look like an CPU-measurement external interrupt diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index feb90f6..9b27dff 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -656,7 +656,9 @@ fail: if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.size = sizeof(u32) + ibs_data.size; + raw.size_head = raw.size; raw.data = ibs_data.data; + raw.frag = NULL; data.raw = &raw; } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1a827ce..bf08bdf 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -69,9 +69,17 @@ struct perf_callchain_entry_ctx { bool contexts_maxed; }; +struct perf_raw_record_frag { + void *data; + unsigned long (*copy_cb) (void *dst, const void *src, + unsigned long n); +}; + struct perf_raw_record { u32 size; + u32 size_head; void *data; + struct perf_raw_record_frag *frag; }; /* diff --git a/kernel/events/core.c b/kernel/events/core.c index 9c51ec3..3e1dd7a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5553,14 +5553,20 @@ void perf_output_sample(struct perf_output_handle *handle, } if (sample_type & PERF_SAMPLE_RAW) { - if (data->raw) { - u32 raw_size = data->raw->size; + struct perf_raw_record *raw = data->raw; + + if (raw) { + u32 raw_size = raw->size; u32 real_size = round_up(raw_size + sizeof(u32), sizeof(u64)) - sizeof(u32); u64 zero = 0; perf_output_put(handle, real_size); - __output_copy(handle, data->raw->data, raw_size); + __output_copy(handle, raw->data, raw->size_head); + if (raw->frag) + __output_custom(handle, raw->frag->copy_cb, + raw->frag->data, + raw->size - raw->size_head); if (real_size - raw_size) __output_copy(handle, &zero, real_size - raw_size); } else { @@ -7388,6 +7394,7 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct perf_raw_record raw = { .size = entry_size, + .size_head = entry_size, .data = record, }; diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 05f9f6d..1b08d94 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -123,10 +123,7 @@ static inline unsigned long perf_aux_size(struct ring_buffer *rb) return rb->aux_nr_pages << PAGE_SHIFT; } -#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ -static inline unsigned long \ -func_name(struct perf_output_handle *handle, \ - const void *buf, unsigned long len) \ +#define __DEFINE_OUTPUT_COPY_BODY(memcpy_func) \ { \ unsigned long size, written; \ \ @@ -152,6 +149,19 @@ func_name(struct perf_output_handle *handle, \ return len; \ } +#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ +static inline unsigned long \ +func_name(struct perf_output_handle *handle, \ + const void *buf, unsigned long len) \ +__DEFINE_OUTPUT_COPY_BODY(memcpy_func) + +static inline unsigned long +__output_custom(struct perf_output_handle *handle, + unsigned long (*copy_cb)(void *dst, const void *src, + unsigned long n), + const void *buf, unsigned long len) +__DEFINE_OUTPUT_COPY_BODY(copy_cb) + static inline unsigned long memcpy_common(void *dst, const void *src, unsigned long n) { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 094c716..8540bd5 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -246,6 +246,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) struct perf_event *event; struct perf_raw_record raw = { .size = size, + .size_head = size, .data = data, }; -- 1.9.3