On Tue, Jul 21, 2015 at 03:08:50AM +0000, He Kuang wrote:
> There're scenarios that we need an eBPF program to record not only
> kprobe point args, but also the PMU counters, time latencies or the
> number of cache misses between two probe points and other information
> when the probe point is entered.
> 
> This patch adds a new trace event to establish infrastruction for bpf to
> output data to perf. Userspace perf tools can detect and use this event
> as using the existing tracepoint events.
> 
> New bpf trace event entry in debugfs:
> 
>      /sys/kernel/debug/tracing/events/bpf/bpf_output_data
> 
> Userspace perf tools detect the new tracepoint event as:
> 
>      bpf:bpf_output_data                          [Tracepoint event]
> 
> Data in ring-buffer of perf events added to this event will be polled
> out, sample types and other attributes can be adjusted to those events
> directly without touching the original kprobe events.
> 
> The bpf helper function gives eBPF program ability to output data as
> perf sample event. This helper simple call the new trace event and
> userspace perf tools can record the BPF ftrace event to collect those
> records.
> 
> Signed-off-by: He Kuang <heku...@huawei.com>
> Acked-by: Alexei Starovoitov <a...@plumgrid.com>

Acked-by: Namhyung Kim <namhy...@kernel.org>

Thanks,
Namhyung


> ---
>  include/trace/events/bpf.h | 30 ++++++++++++++++++++++++++++++
>  include/uapi/linux/bpf.h   |  7 +++++++
>  kernel/trace/bpf_trace.c   | 23 +++++++++++++++++++++++
>  samples/bpf/bpf_helpers.h  |  2 ++
>  4 files changed, 62 insertions(+)
>  create mode 100644 include/trace/events/bpf.h
> 
> diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h
> new file mode 100644
> index 0000000..6b739b8
> --- /dev/null
> +++ b/include/trace/events/bpf.h
> @@ -0,0 +1,30 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM bpf
> +
> +#if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_BPF_H
> +
> +#include <linux/tracepoint.h>
> +
> +TRACE_EVENT(bpf_output_data,
> +
> +     TP_PROTO(u64 *src, int size),
> +
> +     TP_ARGS(src, size),
> +
> +     TP_STRUCT__entry(
> +             __dynamic_array(u8,             buf,            size)
> +     ),
> +
> +     TP_fast_assign(
> +             memcpy(__get_dynamic_array(buf), src, size);
> +     ),
> +
> +     TP_printk("%s", __print_hex(__get_dynamic_array(buf),
> +                                 __get_dynamic_array_len(buf)))
> +);
> +
> +#endif /* _TRACE_BPF_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 29ef6f9..5068ab1 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -249,6 +249,13 @@ enum bpf_func_id {
>        * Return: 0 on success
>        */
>       BPF_FUNC_get_current_comm,
> +
> +     /**
> +      * int bpf_output_trace_data(void *src, int size)
> +      * Return: 0 on success
> +      */
> +     BPF_FUNC_output_trace_data,
> +
>       __BPF_FUNC_MAX_ID,
>  };
>  
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 88a041a..219f670 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -11,7 +11,10 @@
>  #include <linux/filter.h>
>  #include <linux/uaccess.h>
>  #include <linux/ctype.h>
> +
>  #include "trace.h"
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/bpf.h>
>  
>  static DEFINE_PER_CPU(int, bpf_prog_active);
>  
> @@ -79,6 +82,24 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
>       .arg3_type      = ARG_ANYTHING,
>  };
>  
> +static u64 bpf_output_trace_data(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
> +{
> +     void *src = (void *) (long) r1;
> +     int size = (int) r2;
> +
> +     trace_bpf_output_data(src, size);
> +
> +     return 0;
> +}
> +
> +static const struct bpf_func_proto bpf_output_trace_data_proto = {
> +     .func           = bpf_output_trace_data,
> +     .gpl_only       = true,
> +     .ret_type       = RET_INTEGER,
> +     .arg1_type      = ARG_PTR_TO_STACK,
> +     .arg2_type      = ARG_CONST_STACK_SIZE,
> +};
> +
>  /*
>   * limited trace_printk()
>   * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed
> @@ -169,6 +190,8 @@ static const struct bpf_func_proto 
> *kprobe_prog_func_proto(enum bpf_func_id func
>               return &bpf_map_delete_elem_proto;
>       case BPF_FUNC_probe_read:
>               return &bpf_probe_read_proto;
> +     case BPF_FUNC_output_trace_data:
> +             return &bpf_output_trace_data_proto;
>       case BPF_FUNC_ktime_get_ns:
>               return &bpf_ktime_get_ns_proto;
>       case BPF_FUNC_tail_call:
> diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
> index bdf1c16..0aeaebe 100644
> --- a/samples/bpf/bpf_helpers.h
> +++ b/samples/bpf/bpf_helpers.h
> @@ -59,5 +59,7 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int 
> from, int to, int flag
>       (void *) BPF_FUNC_l3_csum_replace;
>  static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int 
> flags) =
>       (void *) BPF_FUNC_l4_csum_replace;
> +static int (*bpf_output_trace_data)(void *src, int size) =
> +     (void *) BPF_FUNC_output_trace_data;
>  
>  #endif
> -- 
> 1.8.5.2
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to