A new pmu, perf_kprobe, is created for PERF_TYPE_KPROBE. Based on
input from perf_event_open(), perf_kprobe creates a kprobe (or
kretprobe) for the perf_event. This kprobe is private to this
perf_event, and thus not added to global lists, and not
available in tracefs.

Two functions, create_local_trace_kprobe() and
destroy_local_trace_kprobe()  are added to created and destroy these
local trace_kprobe.

Signed-off-by: Song Liu <songliubrav...@fb.com>
Reviewed-by: Yonghong Song <y...@fb.com>
Reviewed-by: Josef Bacik <jba...@fb.com>
---
 include/linux/trace_events.h    |  2 +
 kernel/events/core.c            | 41 +++++++++++++++++--
 kernel/trace/trace_event_perf.c | 53 ++++++++++++++++++++++++
 kernel/trace/trace_kprobe.c     | 91 +++++++++++++++++++++++++++++++++++++----
 kernel/trace/trace_probe.h      |  7 ++++
 5 files changed, 183 insertions(+), 11 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 2bcb4dc..51f748c9 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -494,6 +494,8 @@ extern int  perf_trace_init(struct perf_event *event);
 extern void perf_trace_destroy(struct perf_event *event);
 extern int  perf_trace_add(struct perf_event *event, int flags);
 extern void perf_trace_del(struct perf_event *event, int flags);
+extern int  perf_kprobe_init(struct perf_event *event);
+extern void perf_kprobe_destroy(struct perf_event *event);
 extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
                                     char *filter_str);
 extern void ftrace_profile_free_filter(struct perf_event *event);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 494eca1..daa6e0a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7970,6 +7970,28 @@ static int perf_tp_event_init(struct perf_event *event)
        return 0;
 }
 
+static int perf_kprobe_event_init(struct perf_event *event)
+{
+       int err;
+
+       if (event->attr.type != PERF_TYPE_KPROBE)
+               return -ENOENT;
+
+       /*
+        * no branch sampling for probe events
+        */
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
+       err = perf_kprobe_init(event);
+       if (err)
+               return err;
+
+       event->destroy = perf_kprobe_destroy;
+
+       return 0;
+}
+
 static struct pmu perf_tracepoint = {
        .task_ctx_nr    = perf_sw_context,
 
@@ -7981,9 +8003,20 @@ static struct pmu perf_tracepoint = {
        .read           = perf_swevent_read,
 };
 
+static struct pmu perf_kprobe = {
+       .task_ctx_nr    = perf_sw_context,
+       .event_init     = perf_kprobe_event_init,
+       .add            = perf_trace_add,
+       .del            = perf_trace_del,
+       .start          = perf_swevent_start,
+       .stop           = perf_swevent_stop,
+       .read           = perf_swevent_read,
+};
+
 static inline void perf_tp_register(void)
 {
        perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
+       perf_pmu_register(&perf_kprobe, "kprobe", PERF_TYPE_KPROBE);
 }
 
 static void perf_event_free_filter(struct perf_event *event)
@@ -8065,7 +8098,8 @@ static int perf_event_set_bpf_prog(struct perf_event 
*event, u32 prog_fd)
        bool is_kprobe, is_tracepoint, is_syscall_tp;
        struct bpf_prog *prog;
 
-       if (event->attr.type != PERF_TYPE_TRACEPOINT)
+       if (event->attr.type != PERF_TYPE_TRACEPOINT &&
+           event->attr.type != PERF_TYPE_KPROBE)
                return perf_event_set_bpf_handler(event, prog_fd);
 
        if (event->tp_event->prog)
@@ -8537,8 +8571,9 @@ static int perf_event_set_filter(struct perf_event 
*event, void __user *arg)
        char *filter_str;
        int ret = -EINVAL;
 
-       if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
-           !IS_ENABLED(CONFIG_EVENT_TRACING)) &&
+       if (((event->attr.type != PERF_TYPE_TRACEPOINT &&
+             event->attr.type != PERF_TYPE_KPROBE) ||
+            !IS_ENABLED(CONFIG_EVENT_TRACING)) &&
            !has_addr_filter(event))
                return -EINVAL;
 
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 13ba2d3..7cf0d99 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include "trace.h"
+#include "trace_probe.h"
 
 static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
 
@@ -229,6 +230,48 @@ int perf_trace_init(struct perf_event *p_event)
        return ret;
 }
 
+int perf_kprobe_init(struct perf_event *p_event)
+{
+       int ret;
+       char *func = NULL;
+       struct trace_event_call *tp_event;
+
+#ifdef CONFIG_KPROBE_EVENTS
+       if (p_event->attr.kprobe_func) {
+               func = kzalloc(MAX_PROBE_FUNC_NAME_LEN, GFP_KERNEL);
+               if (!func)
+                       return -ENOMEM;
+               ret = strncpy_from_user(
+                       func, u64_to_user_ptr(p_event->attr.kprobe_func),
+                       MAX_PROBE_FUNC_NAME_LEN);
+               if (ret < 0)
+                       goto out;
+
+               if (func[0] == '\0') {
+                       kfree(func);
+                       func = NULL;
+               }
+       }
+
+       tp_event = create_local_trace_kprobe(
+               func, (void *)(unsigned long)(p_event->attr.kprobe_addr),
+               p_event->attr.probe_offset, p_event->attr.config != 0);
+       if (IS_ERR(tp_event)) {
+               ret = PTR_ERR(tp_event);
+               goto out;
+       }
+
+       ret = perf_trace_event_init(tp_event, p_event);
+       if (ret)
+               destroy_local_trace_kprobe(tp_event);
+out:
+       kfree(func);
+       return ret;
+#else
+       return -EOPNOTSUPP;
+#endif /* CONFIG_KPROBE_EVENTS */
+}
+
 void perf_trace_destroy(struct perf_event *p_event)
 {
        mutex_lock(&event_mutex);
@@ -237,6 +280,16 @@ void perf_trace_destroy(struct perf_event *p_event)
        mutex_unlock(&event_mutex);
 }
 
+void perf_kprobe_destroy(struct perf_event *p_event)
+{
+       perf_trace_event_close(p_event);
+       perf_trace_event_unreg(p_event);
+
+#ifdef CONFIG_KPROBE_EVENTS
+       destroy_local_trace_kprobe(p_event->tp_event);
+#endif
+}
+
 int perf_trace_add(struct perf_event *p_event, int flags)
 {
        struct trace_event_call *tp_event = p_event->tp_event;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 8a907e1..16b334a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -438,6 +438,14 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct 
trace_event_file *file)
                        disable_kprobe(&tk->rp.kp);
                wait = 1;
        }
+
+       /*
+        * if tk is not added to any list, it must be a local trace_kprobe
+        * created with perf_event_open. We don't need to wait for these
+        * trace_kprobes
+        */
+       if (list_empty(&tk->list))
+               wait = 0;
  out:
        if (wait) {
                /*
@@ -1315,12 +1323,9 @@ static struct trace_event_functions kprobe_funcs = {
        .trace          = print_kprobe_event
 };
 
-static int register_kprobe_event(struct trace_kprobe *tk)
+static inline void init_trace_event_call(struct trace_kprobe *tk,
+                                        struct trace_event_call *call)
 {
-       struct trace_event_call *call = &tk->tp.call;
-       int ret;
-
-       /* Initialize trace_event_call */
        INIT_LIST_HEAD(&call->class->fields);
        if (trace_kprobe_is_return(tk)) {
                call->event.funcs = &kretprobe_funcs;
@@ -1329,6 +1334,19 @@ static int register_kprobe_event(struct trace_kprobe *tk)
                call->event.funcs = &kprobe_funcs;
                call->class->define_fields = kprobe_event_define_fields;
        }
+
+       call->flags = TRACE_EVENT_FL_KPROBE;
+       call->class->reg = kprobe_register;
+       call->data = tk;
+}
+
+static int register_kprobe_event(struct trace_kprobe *tk)
+{
+       struct trace_event_call *call = &tk->tp.call;
+       int ret = 0;
+
+       init_trace_event_call(tk, call);
+
        if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
                return -ENOMEM;
        ret = register_trace_event(&call->event);
@@ -1336,9 +1354,6 @@ static int register_kprobe_event(struct trace_kprobe *tk)
                kfree(call->print_fmt);
                return -ENODEV;
        }
-       call->flags = TRACE_EVENT_FL_KPROBE;
-       call->class->reg = kprobe_register;
-       call->data = tk;
        ret = trace_add_event_call(call);
        if (ret) {
                pr_info("Failed to register kprobe event: %s\n",
@@ -1360,6 +1375,66 @@ static int unregister_kprobe_event(struct trace_kprobe 
*tk)
        return ret;
 }
 
+#ifdef CONFIG_PERF_EVENTS
+/* create a trace_kprobe, but don't add it to global lists */
+struct trace_event_call *
+create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
+                         bool is_return)
+{
+       struct trace_kprobe *tk;
+       int ret;
+       char *event;
+
+       /*
+        * local trace_kprobes are not added to probe_list, so they are never
+        * searched in find_trace_kprobe(). Therefore, there is no concern of
+        * duplicated name here.
+        */
+       event = func ? func : "DUMMY_EVENT";
+
+       tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func,
+                               offs, 0 /* maxactive */, 0 /* nargs */,
+                               is_return);
+
+       if (IS_ERR(tk)) {
+               pr_info("Failed to allocate trace_probe.(%d)\n",
+                       (int)PTR_ERR(tk));
+               return ERR_CAST(tk);
+       }
+
+       init_trace_event_call(tk, &tk->tp.call);
+
+       if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
+               ret = -ENOMEM;
+               goto error;
+       }
+
+       ret = __register_trace_kprobe(tk);
+       if (ret < 0)
+               goto error;
+
+       return &tk->tp.call;
+error:
+       free_trace_kprobe(tk);
+       return ERR_PTR(ret);
+}
+
+void destroy_local_trace_kprobe(struct trace_event_call *event_call)
+{
+       struct trace_kprobe *tk;
+
+       tk = container_of(event_call, struct trace_kprobe, tp.call);
+
+       if (trace_probe_is_enabled(&tk->tp)) {
+               WARN_ON(1);
+               return;
+       }
+
+       __unregister_trace_kprobe(tk);
+       free_trace_kprobe(tk);
+}
+#endif /* CONFIG_PERF_EVENTS */
+
 /* Make a tracefs interface for controlling probe points */
 static __init int init_kprobe_trace(void)
 {
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 903273c..910ae1b 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -411,3 +411,10 @@ store_trace_args(int ent_size, struct trace_probe *tp, 
struct pt_regs *regs,
 }
 
 extern int set_print_fmt(struct trace_probe *tp, bool is_return);
+
+#ifdef CONFIG_PERF_EVENTS
+extern struct trace_event_call *
+create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
+                         bool is_return);
+extern void destroy_local_trace_kprobe(struct trace_event_call *event_call);
+#endif
-- 
2.9.5

Reply via email to