A helper is added to support tracing kernel type information in BPF
using the BPF Type Format (BTF).  Its signature is

long bpf_trace_btf(struct btf_ptr *ptr, u32 btf_ptr_size, u32 trace_id,
                   u64 flags);

struct btf_ptr * specifies

- a pointer to the data to be traced;
- the BTF id of the type of data pointed to; or
- a string representation of the type of data pointed to
- a flags field is provided for future use; these flags
  are not to be confused with the BTF_TRACE_F_* flags
  below that control how the btf_ptr is displayed; the
  flags member of the struct btf_ptr may be used to
  disambiguate types in kernel versus module BTF, etc;
  the main distinction is the flags relate to the type
  and information needed in identifying it; not how it
  is displayed.

The helper also specifies a trace id which is set for the
bpf_trace_printk tracepoint; this allows BPF programs
to filter on specific trace ids, ensuring output does
not become mixed between different traced events and
hard to read.

For example a BPF program with a struct sk_buff *skb
could do the following:

        static const char *skb_type = "struct sk_buff";
        static struct btf_ptr b = { };

        b.ptr = skb;
        b.type = skb_type;
        bpf_trace_btf(&b, sizeof(b), 0, 0);

Default output in the trace_pipe looks like this:

          <idle>-0     [023] d.s.  1825.778400: bpf_trace_printk: (struct 
sk_buff){
          <idle>-0     [023] d.s.  1825.778409: bpf_trace_printk:  (union){
          <idle>-0     [023] d.s.  1825.778410: bpf_trace_printk:   (struct){
          <idle>-0     [023] d.s.  1825.778412: bpf_trace_printk:    .prev = 
(struct sk_buff *)0x00000000b2a3df7e,
          <idle>-0     [023] d.s.  1825.778413: bpf_trace_printk:    (union){
          <idle>-0     [023] d.s.  1825.778414: bpf_trace_printk:     .dev = 
(struct net_device *)0x000000001658808b,
          <idle>-0     [023] d.s.  1825.778416: bpf_trace_printk:     
.dev_scratch = (long unsigned int)18446628460391432192,
          <idle>-0     [023] d.s.  1825.778417: bpf_trace_printk:    },
          <idle>-0     [023] d.s.  1825.778417: bpf_trace_printk:   },
          <idle>-0     [023] d.s.  1825.778418: bpf_trace_printk:   .rbnode = 
(struct rb_node){
          <idle>-0     [023] d.s.  1825.778419: bpf_trace_printk:    .rb_right 
= (struct rb_node *)0x00000000b2a3df7e,
          <idle>-0     [023] d.s.  1825.778420: bpf_trace_printk:    .rb_left = 
(struct rb_node *)0x000000001658808b,
          <idle>-0     [023] d.s.  1825.778420: bpf_trace_printk:   },
          <idle>-0     [023] d.s.  1825.778421: bpf_trace_printk:   .list = 
(struct list_head){
          <idle>-0     [023] d.s.  1825.778422: bpf_trace_printk:    .prev = 
(struct list_head *)0x00000000b2a3df7e,
          <idle>-0     [023] d.s.  1825.778422: bpf_trace_printk:   },
          <idle>-0     [023] d.s.  1825.778422: bpf_trace_printk:  },
          <idle>-0     [023] d.s.  1825.778426: bpf_trace_printk:  .len = 
(unsigned int)168,
          <idle>-0     [023] d.s.  1825.778427: bpf_trace_printk:  .mac_len = 
(__u16)14,
          <idle>-0     [023] d.s.  1825.778428: bpf_trace_printk:  
.queue_mapping = (__u16)17,
          <idle>-0     [023] d.s.  1825.778430: bpf_trace_printk:  .head_frag = 
(__u8)0x1,
          <idle>-0     [023] d.s.  1825.778431: bpf_trace_printk:  .ip_summed = 
(__u8)0x1,
          <idle>-0     [023] d.s.  1825.778432: bpf_trace_printk:  .l4_hash = 
(__u8)0x1,
          <idle>-0     [023] d.s.  1825.778433: bpf_trace_printk:  .hash = 
(__u32)1873247608,
          <idle>-0     [023] d.s.  1825.778434: bpf_trace_printk:  (union){
          <idle>-0     [023] d.s.  1825.778435: bpf_trace_printk:   .napi_id = 
(unsigned int)8209,
          <idle>-0     [023] d.s.  1825.778436: bpf_trace_printk:   .sender_cpu 
= (unsigned int)8209,
          <idle>-0     [023] d.s.  1825.778436: bpf_trace_printk:  },
          <idle>-0     [023] d.s.  1825.778437: bpf_trace_printk:  .protocol = 
(__be16)8,
          <idle>-0     [023] d.s.  1825.778438: bpf_trace_printk:  
.transport_header = (__u16)226,
          <idle>-0     [023] d.s.  1825.778439: bpf_trace_printk:  
.network_header = (__u16)206,
          <idle>-0     [023] d.s.  1825.778440: bpf_trace_printk:  .mac_header 
= (__u16)192,
          <idle>-0     [023] d.s.  1825.778440: bpf_trace_printk:  .tail = 
(sk_buff_data_t)374,
          <idle>-0     [023] d.s.  1825.778441: bpf_trace_printk:  .end = 
(sk_buff_data_t)1728,
          <idle>-0     [023] d.s.  1825.778442: bpf_trace_printk:  .head = 
(unsigned char *)0x000000009798cb6b,
          <idle>-0     [023] d.s.  1825.778443: bpf_trace_printk:  .data = 
(unsigned char *)0x0000000064823282,
          <idle>-0     [023] d.s.  1825.778444: bpf_trace_printk:  .truesize = 
(unsigned int)2304,
          <idle>-0     [023] d.s.  1825.778445: bpf_trace_printk:  .users = 
(refcount_t){
          <idle>-0     [023] d.s.  1825.778445: bpf_trace_printk:   .refs = 
(atomic_t){
          <idle>-0     [023] d.s.  1825.778447: bpf_trace_printk:    .counter = 
(int)1,
          <idle>-0     [023] d.s.  1825.778447: bpf_trace_printk:   },
          <idle>-0     [023] d.s.  1825.778447: bpf_trace_printk:  },
          <idle>-0     [023] d.s.  1825.778448: bpf_trace_printk: }

Flags modifying display are as follows:

- BTF_TRACE_F_COMPACT:  no formatting around type information
- BTF_TRACE_F_NONAME:   no struct/union member names/types
- BTF_TRACE_F_PTR_RAW:  show raw (unobfuscated) pointer values;
                        equivalent to %px.
- BTF_TRACE_F_ZERO:     show zero-valued struct/union members;
                        they are not displayed by default

Signed-off-by: Alan Maguire <alan.magu...@oracle.com>
---
 include/linux/bpf.h            |   1 +
 include/linux/btf.h            |   9 ++--
 include/uapi/linux/bpf.h       |  63 +++++++++++++++++++++++++
 kernel/bpf/core.c              |   5 ++
 kernel/bpf/helpers.c           |   4 ++
 kernel/trace/bpf_trace.c       | 102 ++++++++++++++++++++++++++++++++++++++++-
 scripts/bpf_helpers_doc.py     |   2 +
 tools/include/uapi/linux/bpf.h |  63 +++++++++++++++++++++++++
 8 files changed, 243 insertions(+), 6 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6143b6e..f67819d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -934,6 +934,7 @@ struct bpf_event_entry {
 const char *kernel_type_name(u32 btf_type_id);
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
+const struct bpf_func_proto *bpf_get_trace_btf_proto(void);
 
 typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
                                        unsigned long off, unsigned long len);
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 46bf9f4..3d31e28 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -6,6 +6,7 @@
 
 #include <linux/types.h>
 #include <uapi/linux/btf.h>
+#include <uapi/linux/bpf.h>
 
 #define BTF_TYPE_EMIT(type) ((void)(type *)0)
 
@@ -61,10 +62,10 @@ const struct btf_type *btf_type_id_size(const struct btf 
*btf,
  *     - BTF_SHOW_UNSAFE: skip use of bpf_probe_read() to safely read
  *       data before displaying it.
  */
-#define BTF_SHOW_COMPACT       (1ULL << 0)
-#define BTF_SHOW_NONAME                (1ULL << 1)
-#define BTF_SHOW_PTR_RAW       (1ULL << 2)
-#define BTF_SHOW_ZERO          (1ULL << 3)
+#define BTF_SHOW_COMPACT       BTF_TRACE_F_COMPACT
+#define BTF_SHOW_NONAME                BTF_TRACE_F_NONAME
+#define BTF_SHOW_PTR_RAW       BTF_TRACE_F_PTR_RAW
+#define BTF_SHOW_ZERO          BTF_TRACE_F_ZERO
 #define BTF_SHOW_NONEWLINE     (1ULL << 32)
 #define BTF_SHOW_UNSAFE                (1ULL << 33)
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b134e67..726fee4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3394,6 +3394,36 @@ struct bpf_stack_build_id {
  *             A non-negative value equal to or less than *size* on success,
  *             or a negative error in case of failure.
  *
+ * long bpf_trace_btf(struct btf_ptr *ptr, u32 btf_ptr_size, u32 trace_id, u64 
flags)
+ *     Description
+ *             Utilize BTF to trace a representation of *ptr*->ptr, using
+ *             *ptr*->type name or *ptr*->type_id.  *ptr*->type_name
+ *             should specify the type *ptr*->ptr points to. Traversing that
+ *             data structure using BTF, the type information and values are
+ *             bpf_trace_printk()ed.  Safe copy of the pointer data is
+ *             carried out to avoid kernel crashes during data display.
+ *             Tracing specifies *trace_id* as the id associated with the
+ *             trace event; this can be used to filter trace events
+ *             to show a subset of all traced output, helping to avoid
+ *             the situation where BTF output is intermixed with other
+ *             output.
+ *
+ *             *flags* is a combination of
+ *
+ *             **BTF_TRACE_F_COMPACT**
+ *                     no formatting around type information
+ *             **BTF_TRACE_F_NONAME**
+ *                     no struct/union member names/types
+ *             **BTF_TRACE_F_PTR_RAW**
+ *                     show raw (unobfuscated) pointer values;
+ *                     equivalent to printk specifier %px.
+ *             **BTF_TRACE_F_ZERO**
+ *                     show zero-valued struct/union members; they
+ *                     are not displayed by default
+ *
+ *     Return
+ *             The number of bytes traced, or a negative error in cases of
+ *             failure.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -3538,6 +3568,7 @@ struct bpf_stack_build_id {
        FN(skc_to_tcp_request_sock),    \
        FN(skc_to_udp6_sock),           \
        FN(get_task_stack),             \
+       FN(trace_btf),                  \
        /* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4446,4 +4477,36 @@ struct bpf_sk_lookup {
        __u32 local_port;       /* Host byte order */
 };
 
+/*
+ * struct btf_ptr is used for typed pointer display; the
+ * additional type string/BTF type id are used to render the pointer
+ * data as the appropriate type via the bpf_trace_btf() helper
+ * above.  A flags field - potentially to specify additional details
+ * about the BTF pointer (rather than its mode of display) - is
+ * present for future use.  Display flags - BTF_TRACE_F_* - are
+ * passed to display functions separately.
+ */
+struct btf_ptr {
+       void *ptr;
+       const char *type;
+       __u32 type_id;
+       __u32 flags;            /* BTF ptr flags; unused at present. */
+};
+
+/*
+ * Flags to control bpf_trace_btf() behaviour.
+ *     - BTF_TRACE_F_COMPACT: no formatting around type information
+ *     - BTF_TRACE_F_NONAME: no struct/union member names/types
+ *     - BTF_TRACE_F_PTR_RAW: show raw (unobfuscated) pointer values;
+ *       equivalent to %px.
+ *     - BTF_TRACE_F_ZERO: show zero-valued struct/union members; they
+ *       are not displayed by default
+ */
+enum {
+       BTF_TRACE_F_COMPACT     =       (1ULL << 0),
+       BTF_TRACE_F_NONAME      =       (1ULL << 1),
+       BTF_TRACE_F_PTR_RAW     =       (1ULL << 2),
+       BTF_TRACE_F_ZERO        =       (1ULL << 3),
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index bde9334..82b3a98 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2214,6 +2214,11 @@ const struct bpf_func_proto * __weak 
bpf_get_trace_printk_proto(void)
        return NULL;
 }
 
+const struct bpf_func_proto * __weak bpf_get_trace_btf_proto(void)
+{
+       return NULL;
+}
+
 u64 __weak
 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
                 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index be43ab3..b9a842b 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -661,6 +661,10 @@ static int __bpf_strtoll(const char *buf, size_t buf_len, 
u64 flags,
                if (!perfmon_capable())
                        return NULL;
                return bpf_get_trace_printk_proto();
+       case BPF_FUNC_trace_btf:
+               if (!perfmon_capable())
+                       return NULL;
+               return bpf_get_trace_btf_proto();
        case BPF_FUNC_jiffies64:
                return &bpf_jiffies64_proto;
        default:
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 6453a75..92212a1 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -14,8 +14,12 @@
 #include <linux/spinlock.h>
 #include <linux/syscalls.h>
 #include <linux/error-injection.h>
+#include <linux/btf.h>
 #include <linux/btf_ids.h>
 
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/btf.h>
+
 #include <asm/tlb.h>
 
 #include "trace_probe.h"
@@ -555,10 +559,91 @@ static __printf(1, 0) int bpf_do_trace_printk(const char 
*fmt, ...)
        .arg2_type      = ARG_CONST_SIZE,
 };
 
-const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
+#define BTF_TRACE_F_ALL        (BTF_TRACE_F_COMPACT | BTF_TRACE_F_NONAME | \
+                        BTF_TRACE_F_PTR_RAW | BTF_TRACE_F_ZERO)
+
+BPF_CALL_4(bpf_trace_btf, struct btf_ptr *, ptr, u32, btf_ptr_size,
+          u32, trace_id, u64, flags)
+{
+       u8 btf_kind = BTF_KIND_TYPEDEF;
+       char type_name[KSYM_NAME_LEN];
+       const struct btf_type *t;
+       const struct btf *btf;
+       const char *btf_type;
+       s32 btf_id;
+       int ret;
+
+       if (unlikely(flags & ~(BTF_TRACE_F_ALL)))
+               return -EINVAL;
+
+       if (btf_ptr_size != sizeof(struct btf_ptr))
+               return -EINVAL;
+
+       btf = bpf_get_btf_vmlinux();
+
+       if (IS_ERR_OR_NULL(btf))
+               return PTR_ERR(btf);
+
+       if (ptr->type != NULL) {
+               ret = copy_from_kernel_nofault(type_name, ptr->type,
+                                              sizeof(type_name));
+               if (ret)
+                       return ret;
+
+               btf_type = type_name;
+
+               if (strncmp(btf_type, "struct ", strlen("struct ")) == 0) {
+                       btf_kind = BTF_KIND_STRUCT;
+                       btf_type += strlen("struct ");
+               } else if (strncmp(btf_type, "union ", strlen("union ")) == 0) {
+                       btf_kind = BTF_KIND_UNION;
+                       btf_type += strlen("union ");
+               } else if (strncmp(btf_type, "enum ", strlen("enum ")) == 0) {
+                       btf_kind = BTF_KIND_ENUM;
+                       btf_type += strlen("enum ");
+               }
+
+               if (strlen(btf_type) == 0)
+                       return -EINVAL;
+
+               /*
+                * Assume type specified is a typedef as there's not much
+                * benefit in specifying int types other than wasting time
+                * on BTF lookups; we optimize for the most useful path.
+                *
+                * Fall back to BTF_KIND_INT if this fails.
+                */
+               btf_id = btf_find_by_name_kind(btf, btf_type, btf_kind);
+               if (btf_id < 0)
+                       btf_id = btf_find_by_name_kind(btf, btf_type,
+                                                      BTF_KIND_INT);
+       } else if (ptr->type_id > 0)
+               btf_id = ptr->type_id;
+       else
+               return -EINVAL;
+
+       if (btf_id > 0)
+               t = btf_type_by_id(btf, btf_id);
+       if (btf_id <= 0 || !t)
+               return -ENOENT;
+
+       return btf_type_trace_show(btf, btf_id, ptr->ptr, trace_id, flags);
+}
+
+static const struct bpf_func_proto bpf_trace_btf_proto = {
+       .func           = bpf_trace_btf,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_MEM,
+       .arg2_type      = ARG_CONST_SIZE,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_ANYTHING,
+};
+
+static void bpf_trace_printk_enable(void)
 {
        /*
-        * This program might be calling bpf_trace_printk,
+        * This program might be calling bpf_trace_[printk|btf],
         * so enable the associated bpf_trace/bpf_trace_printk event.
         * Repeat this each time as it is possible a user has
         * disabled bpf_trace_printk events.  By loading a program
@@ -567,10 +652,21 @@ const struct bpf_func_proto 
*bpf_get_trace_printk_proto(void)
         */
        if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
                pr_warn_ratelimited("could not enable bpf_trace_printk events");
+}
+const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
+{
+       bpf_trace_printk_enable();
 
        return &bpf_trace_printk_proto;
 }
 
+const struct bpf_func_proto *bpf_get_trace_btf_proto(void)
+{
+       bpf_trace_printk_enable();
+
+       return &bpf_trace_btf_proto;
+}
+
 #define MAX_SEQ_PRINTF_VARARGS         12
 #define MAX_SEQ_PRINTF_MAX_MEMCPY      6
 #define MAX_SEQ_PRINTF_STR_LEN         128
@@ -1139,6 +1235,8 @@ static int bpf_send_signal_common(u32 sig, enum pid_type 
type)
                return &bpf_get_current_comm_proto;
        case BPF_FUNC_trace_printk:
                return bpf_get_trace_printk_proto();
+       case BPF_FUNC_trace_btf:
+               return bpf_get_trace_btf_proto();
        case BPF_FUNC_get_smp_processor_id:
                return &bpf_get_smp_processor_id_proto;
        case BPF_FUNC_get_numa_node_id:
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 5bfa448..7c7384b 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -432,6 +432,7 @@ class PrinterHelpers(Printer):
             'struct __sk_buff',
             'struct sk_msg_md',
             'struct xdp_md',
+            'struct btf_ptr',
     ]
     known_types = {
             '...',
@@ -472,6 +473,7 @@ class PrinterHelpers(Printer):
             'struct tcp_request_sock',
             'struct udp6_sock',
             'struct task_struct',
+            'struct btf_ptr',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b134e67..726fee4 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3394,6 +3394,36 @@ struct bpf_stack_build_id {
  *             A non-negative value equal to or less than *size* on success,
  *             or a negative error in case of failure.
  *
+ * long bpf_trace_btf(struct btf_ptr *ptr, u32 btf_ptr_size, u32 trace_id, u64 
flags)
+ *     Description
+ *             Utilize BTF to trace a representation of *ptr*->ptr, using
+ *             *ptr*->type name or *ptr*->type_id.  *ptr*->type_name
+ *             should specify the type *ptr*->ptr points to. Traversing that
+ *             data structure using BTF, the type information and values are
+ *             bpf_trace_printk()ed.  Safe copy of the pointer data is
+ *             carried out to avoid kernel crashes during data display.
+ *             Tracing specifies *trace_id* as the id associated with the
+ *             trace event; this can be used to filter trace events
+ *             to show a subset of all traced output, helping to avoid
+ *             the situation where BTF output is intermixed with other
+ *             output.
+ *
+ *             *flags* is a combination of
+ *
+ *             **BTF_TRACE_F_COMPACT**
+ *                     no formatting around type information
+ *             **BTF_TRACE_F_NONAME**
+ *                     no struct/union member names/types
+ *             **BTF_TRACE_F_PTR_RAW**
+ *                     show raw (unobfuscated) pointer values;
+ *                     equivalent to printk specifier %px.
+ *             **BTF_TRACE_F_ZERO**
+ *                     show zero-valued struct/union members; they
+ *                     are not displayed by default
+ *
+ *     Return
+ *             The number of bytes traced, or a negative error in cases of
+ *             failure.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -3538,6 +3568,7 @@ struct bpf_stack_build_id {
        FN(skc_to_tcp_request_sock),    \
        FN(skc_to_udp6_sock),           \
        FN(get_task_stack),             \
+       FN(trace_btf),                  \
        /* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4446,4 +4477,36 @@ struct bpf_sk_lookup {
        __u32 local_port;       /* Host byte order */
 };
 
+/*
+ * struct btf_ptr is used for typed pointer display; the
+ * additional type string/BTF type id are used to render the pointer
+ * data as the appropriate type via the bpf_trace_btf() helper
+ * above.  A flags field - potentially to specify additional details
+ * about the BTF pointer (rather than its mode of display) - is
+ * present for future use.  Display flags - BTF_TRACE_F_* - are
+ * passed to display functions separately.
+ */
+struct btf_ptr {
+       void *ptr;
+       const char *type;
+       __u32 type_id;
+       __u32 flags;            /* BTF ptr flags; unused at present. */
+};
+
+/*
+ * Flags to control bpf_trace_btf() behaviour.
+ *     - BTF_TRACE_F_COMPACT: no formatting around type information
+ *     - BTF_TRACE_F_NONAME: no struct/union member names/types
+ *     - BTF_TRACE_F_PTR_RAW: show raw (unobfuscated) pointer values;
+ *       equivalent to %px.
+ *     - BTF_TRACE_F_ZERO: show zero-valued struct/union members; they
+ *       are not displayed by default
+ */
+enum {
+       BTF_TRACE_F_COMPACT     =       (1ULL << 0),
+       BTF_TRACE_F_NONAME      =       (1ULL << 1),
+       BTF_TRACE_F_PTR_RAW     =       (1ULL << 2),
+       BTF_TRACE_F_ZERO        =       (1ULL << 3),
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
1.8.3.1

Reply via email to