On Thu, May 03, 2018 at 06:37:16PM -0700, Jakub Kicinski wrote: > Users of BPF sooner or later discover perf_event_output() helpers > and BPF_MAP_TYPE_PERF_EVENT_ARRAY. Dumping this array type is > not possible, however, we can add simple reading of perf events. > Create a new event_pipe subcommand for maps, this sub command > will only work with BPF_MAP_TYPE_PERF_EVENT_ARRAY maps. > > Parts of the code from samples/bpf/trace_output_user.c. > > Signed-off-by: Jakub Kicinski <jakub.kicin...@netronome.com> > Reviewed-by: Quentin Monnet <quentin.mon...@netronome.com> > --- > .../bpf/bpftool/Documentation/bpftool-map.rst | 29 +- > tools/bpf/bpftool/Documentation/bpftool.rst | 2 +- > tools/bpf/bpftool/Makefile | 7 +- > tools/bpf/bpftool/bash-completion/bpftool | 36 +- > tools/bpf/bpftool/common.c | 19 + > tools/bpf/bpftool/main.h | 4 + > tools/bpf/bpftool/map.c | 19 +- > tools/bpf/bpftool/map_perf_ring.c | 347 ++++++++++++++++++ > 8 files changed, 444 insertions(+), 19 deletions(-) > create mode 100644 tools/bpf/bpftool/map_perf_ring.c > > diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst > b/tools/bpf/bpftool/Documentation/bpftool-map.rst > index c3eef8c972cd..a6258bc8ec4f 100644 > --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst > +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst > @@ -22,12 +22,13 @@ MAP COMMANDS > ============= > > | **bpftool** **map { show | list }** [*MAP*] > -| **bpftool** **map dump** *MAP* > -| **bpftool** **map update** *MAP* **key** *DATA* **value** *VALUE* > [*UPDATE_FLAGS*] > -| **bpftool** **map lookup** *MAP* **key** *DATA* > -| **bpftool** **map getnext** *MAP* [**key** *DATA*] > -| **bpftool** **map delete** *MAP* **key** *DATA* > -| **bpftool** **map pin** *MAP* *FILE* > +| **bpftool** **map dump** *MAP* > +| **bpftool** **map update** *MAP* **key** *DATA* **value** > *VALUE* [*UPDATE_FLAGS*] > +| **bpftool** **map lookup** *MAP* **key** *DATA* > +| **bpftool** **map getnext** *MAP* [**key** *DATA*] > +| **bpftool** **map delete** *MAP* **key** *DATA* > +| **bpftool** **map pin** *MAP* *FILE* > +| **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] > | **bpftool** **map help** > | > | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } > @@ -76,6 +77,22 @@ DESCRIPTION > > Note: *FILE* must be located in *bpffs* mount. > > + **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] > + Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map. > + > + Install perf rings into a perf event array map and dump > + output of any bpf_perf_event_output() call in the kernel. > + By default read the number of CPUs on the system and > + install perf ring for each CPU in the corresponding index > + in the array. > + > + If **cpu** and **index** are specified, install perf ring > + for given **cpu** at **index** in the array (single ring). > + > + Note that installing a perf ring into an array will silently > + replace any existing ring. Any other application will stop > + receiving events if it installed its rings earlier. > + > **bpftool map help** > Print short help message. > > diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst > b/tools/bpf/bpftool/Documentation/bpftool.rst > index 20689a321ffe..564cb0d9692b 100644 > --- a/tools/bpf/bpftool/Documentation/bpftool.rst > +++ b/tools/bpf/bpftool/Documentation/bpftool.rst > @@ -23,7 +23,7 @@ SYNOPSIS > > *MAP-COMMANDS* := > { **show** | **list** | **dump** | **update** | **lookup** | > **getnext** | **delete** > - | **pin** | **help** } > + | **pin** | **event_pipe** | **help** } > > *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump > xlated** | **pin** > | **load** | **help** } > diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile > index 4e69782c4a79..892dbf095bff 100644 > --- a/tools/bpf/bpftool/Makefile > +++ b/tools/bpf/bpftool/Makefile > @@ -39,7 +39,12 @@ CC = gcc > > CFLAGS += -O2 > CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow > -Wno-missing-field-initializers > -CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ > -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include > -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ > +CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ > + -I$(srctree)/kernel/bpf/ \ > + -I$(srctree)/tools/include \ > + -I$(srctree)/tools/include/uapi \ > + -I$(srctree)/tools/lib/bpf \ > + -I$(srctree)/tools/perf > CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' > LIBS = -lelf -lbfd -lopcodes $(LIBBPF) > > diff --git a/tools/bpf/bpftool/bash-completion/bpftool > b/tools/bpf/bpftool/bash-completion/bpftool > index 852d84a98acd..b301c9b315f1 100644 > --- a/tools/bpf/bpftool/bash-completion/bpftool > +++ b/tools/bpf/bpftool/bash-completion/bpftool > @@ -1,6 +1,6 @@ > # bpftool(8) bash completion -*- shell-script > -*- > # > -# Copyright (C) 2017 Netronome Systems, Inc. > +# Copyright (C) 2017-2018 Netronome Systems, Inc. > # > # This software is dual licensed under the GNU General License > # Version 2, June 1991 as shown in the file COPYING in the top-level > @@ -79,6 +79,14 @@ _bpftool_get_map_ids() > command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) > } > > +_bpftool_get_perf_map_ids() > +{ > + COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \ > + command grep -C2 perf_event_array | \ > + command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) > +} > + > + > _bpftool_get_prog_ids() > { > COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ > @@ -359,10 +367,34 @@ _bpftool() > fi > return 0 > ;; > + event_pipe) > + case $prev in > + $command) > + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" > ) ) > + return 0 > + ;; > + id) > + _bpftool_get_perf_map_ids > + return 0 > + ;; > + cpu) > + return 0 > + ;; > + index) > + return 0 > + ;; > + *) > + _bpftool_once_attr 'cpu' > + _bpftool_once_attr 'index' > + return 0 > + ;; > + esac > + ;; > *) > [[ $prev == $object ]] && \ > COMPREPLY=( $( compgen -W 'delete dump getnext help \ > - lookup pin show list update' -- "$cur" ) ) > + lookup pin event_pipe show list update' -- \ > + "$cur" ) ) > ;; > esac > ;; > diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c > index 9c620770c6ed..32f9e397a6c0 100644 > --- a/tools/bpf/bpftool/common.c > +++ b/tools/bpf/bpftool/common.c > @@ -331,6 +331,16 @@ char *get_fdinfo(int fd, const char *key) > return NULL; > } > > +void print_data_json(uint8_t *data, size_t len) > +{ > + unsigned int i; > + > + jsonw_start_array(json_wtr); > + for (i = 0; i < len; i++) > + jsonw_printf(json_wtr, "%d", data[i]); > + jsonw_end_array(json_wtr); > +} > + > void print_hex_data_json(uint8_t *data, size_t len) > { > unsigned int i; > @@ -421,6 +431,15 @@ void delete_pinned_obj_table(struct pinned_obj_table > *tab) > } > } > > +unsigned int get_page_size(void) > +{ > + static int result; > + > + if (!result) > + result = getpagesize(); > + return result; > +} > + > unsigned int get_possible_cpus(void) > { > static unsigned int result; > diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h > index cbf8985da362..6173cd997e7a 100644 > --- a/tools/bpf/bpftool/main.h > +++ b/tools/bpf/bpftool/main.h > @@ -117,14 +117,18 @@ int do_pin_fd(int fd, const char *name); > > int do_prog(int argc, char **arg); > int do_map(int argc, char **arg); > +int do_event_pipe(int argc, char **argv); > int do_cgroup(int argc, char **arg); > > int prog_parse_fd(int *argc, char ***argv); > +int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 > *info_len); > > void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, > const char *arch); > +void print_data_json(uint8_t *data, size_t len); > void print_hex_data_json(uint8_t *data, size_t len); > > +unsigned int get_page_size(void); > unsigned int get_possible_cpus(void); > const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 > ns_ino); > > diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c > index 5efefde5f578..af6766e956ba 100644 > --- a/tools/bpf/bpftool/map.c > +++ b/tools/bpf/bpftool/map.c > @@ -130,8 +130,7 @@ static int map_parse_fd(int *argc, char ***argv) > return -1; > } > > -static int > -map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) > +int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 > *info_len) > { > int err; > int fd; > @@ -817,12 +816,13 @@ static int do_help(int argc, char **argv) > > fprintf(stderr, > "Usage: %s %s { show | list } [MAP]\n" > - " %s %s dump MAP\n" > - " %s %s update MAP key DATA value VALUE > [UPDATE_FLAGS]\n" > - " %s %s lookup MAP key DATA\n" > - " %s %s getnext MAP [key DATA]\n" > - " %s %s delete MAP key DATA\n" > - " %s %s pin MAP FILE\n" > + " %s %s dump MAP\n" > + " %s %s update MAP key DATA value VALUE > [UPDATE_FLAGS]\n" > + " %s %s lookup MAP key DATA\n" > + " %s %s getnext MAP [key DATA]\n" > + " %s %s delete MAP key DATA\n" > + " %s %s pin MAP FILE\n" > + " %s %s event_pipe MAP [cpu N index M]\n" > " %s %s help\n" > "\n" > " MAP := { id MAP_ID | pinned FILE }\n" > @@ -834,7 +834,7 @@ static int do_help(int argc, char **argv) > "", > bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], > bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], > - bin_name, argv[-2], bin_name, argv[-2]); > + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); > > return 0; > } > @@ -849,6 +849,7 @@ static const struct cmd cmds[] = { > { "getnext", do_getnext }, > { "delete", do_delete }, > { "pin", do_pin }, > + { "event_pipe", do_event_pipe }, > { 0 } > }; > > diff --git a/tools/bpf/bpftool/map_perf_ring.c > b/tools/bpf/bpftool/map_perf_ring.c > new file mode 100644 > index 000000000000..c5a2ced8552d > --- /dev/null > +++ b/tools/bpf/bpftool/map_perf_ring.c > @@ -0,0 +1,347 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* Copyright (C) 2018 Netronome Systems, Inc. */ > +/* This program is free software; you can redistribute it and/or > + * modify it under the terms of version 2 of the GNU General Public > + * License as published by the Free Software Foundation. > + */ > +#include <errno.h> > +#include <fcntl.h> > +#include <libbpf.h> > +#include <poll.h> > +#include <signal.h> > +#include <stdbool.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <time.h> > +#include <unistd.h> > +#include <linux/bpf.h> > +#include <linux/perf_event.h> > +#include <sys/ioctl.h> > +#include <sys/mman.h> > +#include <sys/syscall.h> > + > +#include <bpf.h> > +#include <perf-sys.h> > + > +#include "main.h" > + > +#define MMAP_PAGE_CNT 16 > + > +static bool stop; > + > +struct event_ring_info { > + int fd; > + int key; > + unsigned int cpu; > + void *mem; > +}; > + > +struct perf_event_sample { > + struct perf_event_header header; > + __u32 size; > + unsigned char data[]; > +}; > + > +static void int_exit(int signo) > +{ > + fprintf(stderr, "Stopping...\n"); > + stop = true; > +} > + > +static void > +print_bpf_output(struct event_ring_info *ring, struct perf_event_sample *e) > +{ > + struct { > + struct perf_event_header header; > + __u64 id; > + __u64 lost; > + } *lost = (void *)e; > + struct timespec ts; > + > + if (clock_gettime(CLOCK_MONOTONIC, &ts)) { > + perror("Can't read clock for timestamp"); > + return; > + } > + > + if (json_output) { > + jsonw_start_object(json_wtr); > + jsonw_name(json_wtr, "timestamp"); > + jsonw_uint(json_wtr, ts.tv_sec * 1000000000ull + ts.tv_nsec); > + jsonw_name(json_wtr, "type"); > + jsonw_uint(json_wtr, e->header.type); > + jsonw_name(json_wtr, "cpu"); > + jsonw_uint(json_wtr, ring->cpu); > + jsonw_name(json_wtr, "index"); > + jsonw_uint(json_wtr, ring->key); > + if (e->header.type == PERF_RECORD_SAMPLE) { > + jsonw_name(json_wtr, "data"); > + print_data_json(e->data, e->size); > + } else if (e->header.type == PERF_RECORD_LOST) { > + jsonw_name(json_wtr, "lost"); > + jsonw_start_object(json_wtr); > + jsonw_name(json_wtr, "id"); > + jsonw_uint(json_wtr, lost->id); > + jsonw_name(json_wtr, "count"); > + jsonw_uint(json_wtr, lost->lost); > + jsonw_end_object(json_wtr); > + } > + jsonw_end_object(json_wtr); > + } else { > + if (e->header.type == PERF_RECORD_SAMPLE) { > + printf("== @%ld.%ld CPU: %d index: %d =====\n", > + (long)ts.tv_sec, ts.tv_nsec, > + ring->cpu, ring->key); > + fprint_hex(stdout, e->data, e->size, " "); > + printf("\n"); > + } else if (e->header.type == PERF_RECORD_LOST) { > + printf("lost %lld events\n", lost->lost); > + } else { > + printf("unknown event type=%d size=%d\n", > + e->header.type, e->header.size); > + } > + } > +} > + > +static void > +perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len) > +{ > + volatile struct perf_event_mmap_page *header = ring->mem; > + __u64 buffer_size = MMAP_PAGE_CNT * get_page_size(); > + __u64 data_tail = header->data_tail; > + __u64 data_head = header->data_head; > + void *base, *begin, *end; > + > + asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ > + if (data_head == data_tail) > + return;
this function was copied several times into different places. I think it's time to put into common lib. Like libbpf. Would be great if you can do it in the follow up. for the set: Acked-by: Alexei Starovoitov <a...@kernel.org>