kp_events.c handle ktap events management(registry, destroy, event callback)
This file is core event management interface between ktap and kernel. Exposed functions: 1). kp_events_init/kp_events_exit 2). kp_event_create_kprobe create kprobe event, for example: kdebug.kprobe("SyS_futex", function () {}) 3). kp_event_create_tracepoint create tracepoint event, for example" kdebug.tracepoint("sys_futex_enter", function () {}) 4). kp_event_create create perf backend event, for example: trace sched:sched_switch { print(argstr) } It call kernel function 'perf_event_create_kernel_counter' to register event(tracepoint/kprobe/uprobe) 5). kp_event_getarg get argument of event, from arg0 to arg9, only can be called in probe context. trace sched:sched_switch { print(arg0, arg1) } 6). kp_event_stringify/kp_event_tostr stringify argstr, sometimes if store argstr as key to table, then it need to stringify firstly, like below: var s={} trace sched:sched_switch { s[argstr] += 1 } (This is quite rare usage, but ktap support it) Note: Why ktap support 'kdebug.kprobe' and 'kdebug.tracepoint' when it already support perf backend event(trace xxx {})? Because benchmark shows raw kprobe and tracpoint interface is faster than perf backed tracing, nearly 10+%, it's more fair to compare with Systemtap by raw tracing syntax, not perf backend tracing. perf backend tracing have a long code path before reach ktap callback, and it need to copy event buffer firstly. Signed-off-by: Jovi Zhangwei <jovi.zhang...@gmail.com> --- tools/ktap/runtime/kp_events.c | 832 +++++++++++++++++++++++++++++++++++++++++ tools/ktap/runtime/kp_events.h | 71 ++++ 2 files changed, 903 insertions(+) create mode 100644 tools/ktap/runtime/kp_events.c create mode 100644 tools/ktap/runtime/kp_events.h diff --git a/tools/ktap/runtime/kp_events.c b/tools/ktap/runtime/kp_events.c new file mode 100644 index 0000000..b008626 --- /dev/null +++ b/tools/ktap/runtime/kp_events.c @@ -0,0 +1,832 @@ +/* + * kp_events.c - ktap events management (registry, destroy, event callback) + * + * This file is part of ktap by Jovi Zhangwei. + * + * Copyright (C) 2012-2014 Jovi Zhangwei <jovi.zhang...@gmail.com>. + * + * ktap is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * ktap is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/module.h> +#include <linux/ctype.h> +#include <linux/slab.h> +#include <linux/version.h> +#include <asm/syscall.h> +#include "../include/ktap_types.h" +#include "ktap.h" +#include "kp_obj.h" +#include "kp_str.h" +#include "kp_transport.h" +#include "kp_vm.h" +#include "kp_events.h" + +const char *kp_event_tostr(ktap_state_t *ks) +{ + struct ktap_event_data *e = ks->current_event; + struct ftrace_event_call *call; + struct trace_iterator *iter; + struct trace_event *ev; + enum print_line_t ret = TRACE_TYPE_NO_CONSUME; + static const char *dummy_msg = "argstr_not_available"; + + /* need to check current context is vaild tracing context */ + if (!ks->current_event) { + kp_error(ks, "cannot stringify event str in invalid context\n"); + return NULL; + } + + /*check if stringified before */ + if (ks->current_event->argstr) + return getstr(ks->current_event->argstr); + + /* timer event and raw tracepoint don't have associated argstr */ + if (e->event->type == KTAP_EVENT_TYPE_PERF && e->event->perf->tp_event) + call = e->event->perf->tp_event; + else + return dummy_msg; + + /* Simulate the iterator */ + + /* + * use temp percpu buffer as trace_iterator + * we cannot use same print_buffer because we may called from printf. + */ + iter = kp_this_cpu_temp_buffer(ks); + + trace_seq_init(&iter->seq); + iter->ent = e->data->raw->data; + + ev = &(call->event); + if (ev) + ret = ev->funcs->trace(iter, 0, ev); + + if (ret != TRACE_TYPE_NO_CONSUME) { + struct trace_seq *s = &iter->seq; + int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; + + s->buffer[len] = '\0'; + return &s->buffer[0]; + } + + return dummy_msg; +} + +/* return string repr of 'argstr' */ +const ktap_str_t *kp_event_stringify(ktap_state_t *ks) +{ + const char *str; + ktap_str_t *ts; + + /*check if stringified before */ + if (ks->current_event->argstr) + return ks->current_event->argstr; + + str = kp_event_tostr(ks); + if (!str) + return NULL; + + ts = kp_str_newz(ks, str); + ks->current_event->argstr = ts; + return ts; +} + +/* + * This definition should keep update with kernel/trace/trace.h + * TODO: export this struct in kernel + */ +struct ftrace_event_field { + struct list_head link; + const char *name; + const char *type; + int filter_type; + int offset; + int size; + int is_signed; +}; + +static struct list_head *get_fields(struct ftrace_event_call *event_call) +{ + if (!event_call->class->get_fields) + return &event_call->class->fields; + return event_call->class->get_fields(event_call); +} + +void kp_event_getarg(ktap_state_t *ks, ktap_val_t *ra, int idx) +{ + struct ktap_event_data *e = ks->current_event; + struct ktap_event *event = e->event; + struct ktap_event_field *event_fields = &event->fields[idx]; + + switch (event_fields->type) { + case KTAP_EVENT_FIELD_TYPE_INT: { + struct trace_entry *entry = e->data->raw->data; + void *value = (unsigned char *)entry + event_fields->offset; + int n = *(int *)value; + set_number(ra, n); + return; + } + case KTAP_EVENT_FIELD_TYPE_LONG: { + struct trace_entry *entry = e->data->raw->data; + void *value = (unsigned char *)entry + event_fields->offset; + long n = *(long *)value; + set_number(ra, n); + return; + } + case KTAP_EVENT_FIELD_TYPE_STRING: { + struct trace_entry *entry = e->data->raw->data; + ktap_str_t *ts; + void *value = (unsigned char *)entry + event_fields->offset; + ts = kp_str_newz(ks, (char *)value); + if (ts) + set_string(ra, ts); + else + set_nil(ra); + return; + } + case KTAP_EVENT_FIELD_TYPE_CONST: { + set_number(ra, (ktap_number)event_fields->offset); + return; + } + case KTAP_EVENT_FIELD_TYPE_REGESTER: { + unsigned long *reg = (unsigned long *)((u8 *)e->regs + + event_fields->offset); + set_number(ra, *reg); + return; + } + case KTAP_EVENT_FIELD_TYPE_NIL: + set_nil(ra); + return; + case KTAP_EVENT_FIELD_TYPE_INVALID: + kp_error(ks, "the field type is not supported yet\n"); + set_nil(ra); + return; + } +} + +/* init all fields of event, for quick arg1..arg9 access */ +static int init_event_fields(ktap_state_t *ks, struct ktap_event *event) +{ + struct ftrace_event_call *event_call = event->perf->tp_event; + struct ktap_event_field *event_fields = &event->fields[0]; + struct ftrace_event_field *field; + struct list_head *head; + int idx = 0, n = 0; + + /* only init fields for tracepoint, not timer event */ + if (!event_call) + return 0; + + /* intern probename */ + event->name = kp_str_newz(ks, event_call->name); + if (unlikely(!event->name)) + return -ENOMEM; + + head = get_fields(event_call); + list_for_each_entry_reverse(field, head, link) { + if (n++ == 9) { + /* + * For some events have fields more than 9, just ignore + * those rest fields at present. + * + * TODO: support access all fields in tracepoint event + * + * Examples: mce:mce_record, ext4:ext4_writepages, ... + */ + return 0; + } + + event_fields[idx].offset = field->offset; + + if (field->size == 4) { + event_fields[idx].type = KTAP_EVENT_FIELD_TYPE_INT; + idx++; + continue; + } else if (field->size == 8) { + event_fields[idx].type = KTAP_EVENT_FIELD_TYPE_LONG; + idx++; + continue; + } + if (!strncmp(field->type, "char", 4)) { + event_fields[idx].type = KTAP_EVENT_FIELD_TYPE_STRING; + idx++; + continue; + } + + /* TODO: add more type check */ + event_fields[idx++].type = KTAP_EVENT_FIELD_TYPE_INVALID; + } + + /* init all rest fields as NIL */ + while (idx < 9) + event_fields[idx++].type = KTAP_EVENT_FIELD_TYPE_NIL; + + return 0; +} + +static inline void call_probe_closure(ktap_state_t *mainthread, + ktap_func_t *fn, + struct ktap_event_data *e, int rctx) +{ + ktap_state_t *ks; + ktap_val_t *func; + + ks = kp_vm_new_thread(mainthread, rctx); + set_func(ks->top, fn); + func = ks->top; + incr_top(ks); + + ks->current_event = e; + + kp_vm_call(ks, func, 0); + + ks->current_event = NULL; + kp_vm_exit_thread(ks); +} + +/* + * Callback tracing function for perf event subsystem. + * + * make ktap reentrant, don't disable irq in callback function, + * same as perf and ftrace. to make reentrant, we need some + * percpu data to be context isolation(irq/sirq/nmi/process) + * + * The recursion checking in here is mainly purpose for avoiding + * corrupt ktap_state_t with timer closure callback. For tracepoint + * recusion, perf core already handle it. + * + * Note tracepoint handler is calling with rcu_read_lock. + */ +static void perf_callback(struct perf_event *perf_event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct ktap_event *event; + struct ktap_event_data e; + ktap_state_t *ks; + int rctx; + + event = perf_event->overflow_handler_context; + ks = event->ks; + + if (unlikely(ks->stop)) + return; + + rctx = get_recursion_context(ks); + if (unlikely(rctx < 0)) + return; + + e.event = event; + e.data = data; + e.regs = regs; + e.argstr = NULL; + + call_probe_closure(ks, event->fn, &e, rctx); + + put_recursion_context(ks, rctx); +} + +/* + * Generic ktap event creation function (based on perf callback) + * purpose for tracepoints/kprobe/uprobe/profile-timer/hw_breakpoint/pmu. + */ +int kp_event_create(ktap_state_t *ks, struct perf_event_attr *attr, + struct task_struct *task, const char *filter, + ktap_func_t *fn) +{ + struct ktap_event *event; + struct perf_event *perf_event; + void *callback = perf_callback; + int cpu, ret; + + if (G(ks)->parm->dry_run) + callback = NULL; + + /* + * don't tracing until ktap_wait, the reason is: + * 1). some event may hit before apply filter + * 2). more simple to manage tracing thread + * 3). avoid race with mainthread. + * + * Another way to do this is make attr.disabled as 1, then use + * perf_event_enable after filter apply, however, perf_event_enable + * was not exported in kernel older than 3.3, so we drop this method. + */ + ks->stop = 1; + + for_each_cpu(cpu, G(ks)->cpumask) { + event = kzalloc(sizeof(struct ktap_event), GFP_KERNEL); + if (!event) + return -ENOMEM; + + event->type = KTAP_EVENT_TYPE_PERF; + event->ks = ks; + event->fn = fn; + perf_event = perf_event_create_kernel_counter(attr, cpu, task, + callback, event); + if (IS_ERR(perf_event)) { + int err = PTR_ERR(perf_event); + kp_error(ks, "unable register perf event: " + "[cpu: %d; id: %d; err: %d]\n", + cpu, attr->config, err); + kfree(event); + return err; + } + + if (attr->type == PERF_TYPE_TRACEPOINT) { + const char *name = perf_event->tp_event->name; + kp_verbose_printf(ks, "enable perf event: " + "[cpu: %d; id: %d; name: %s; " + "filter: %s; pid: %d]\n", + cpu, attr->config, name, filter, + task ? task_tgid_vnr(task) : -1); + } else if (attr->type == PERF_TYPE_SOFTWARE && + attr->config == PERF_COUNT_SW_CPU_CLOCK) { + kp_verbose_printf(ks, "enable profile event: " + "[cpu: %d; sample_period: %d]\n", + cpu, attr->sample_period); + } else { + kp_verbose_printf(ks, "unknown perf event type\n"); + } + + event->perf = perf_event; + INIT_LIST_HEAD(&event->list); + list_add_tail(&event->list, &G(ks)->events_head); + + if (init_event_fields(ks, event)) { + kp_error(ks, "unable init event fields id %d\n", + attr->config); + perf_event_release_kernel(event->perf); + list_del(&event->list); + kfree(event); + return ret; + } + + if (!filter) + continue; + + ret = kp_ftrace_profile_set_filter(perf_event, attr->config, + filter); + if (ret) { + kp_error(ks, "unable set event filter: " + "[id: %d; filter: %s; ret: %d]\n", + attr->config, filter, ret); + perf_event_release_kernel(event->perf); + list_del(&event->list); + kfree(event); + return ret; + } + } + + return 0; +} + +/* + * Ignore function proto in here, just use first argument. + */ +static void probe_callback(void *__data) +{ + struct ktap_event *event = __data; + ktap_state_t *ks = event->ks; + struct ktap_event_data e; + struct pt_regs regs; /* pt_regs maybe is large for stack */ + int rctx; + + if (unlikely(ks->stop)) + return; + + rctx = get_recursion_context(ks); + if (unlikely(rctx < 0)) + return; + + perf_fetch_caller_regs(®s); + + e.event = event; + e.regs = ®s; + e.argstr = NULL; + + call_probe_closure(ks, event->fn, &e, rctx); + + put_recursion_context(ks, rctx); +} + +/* + * syscall events handling + */ + +static DEFINE_MUTEX(syscall_trace_lock); +static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); +static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); +static int sys_refcount_enter; +static int sys_refcount_exit; + +static int get_syscall_num(const char *name) +{ + int i; + + for (i = 0; i < NR_syscalls; i++) { + if (syscalls_metadata[i] && + !strcmp(name, syscalls_metadata[i]->name + 4)) + return i; + } + return -1; +} + +static void trace_syscall_enter(void *data, struct pt_regs *regs, long id) +{ + struct ktap_event *event = data; + ktap_state_t *ks = event->ks; + struct ktap_event_data e; + int syscall_nr; + int rctx; + + if (unlikely(ks->stop)) + return; + + syscall_nr = syscall_get_nr(current, regs); + if (unlikely(syscall_nr < 0)) + return; + if (!test_bit(syscall_nr, enabled_enter_syscalls)) + return; + + rctx = get_recursion_context(ks); + if (unlikely(rctx < 0)) + return; + + e.event = event; + e.regs = regs; + e.argstr = NULL; + + call_probe_closure(ks, event->fn, &e, rctx); + + put_recursion_context(ks, rctx); +} + +static void trace_syscall_exit(void *data, struct pt_regs *regs, long id) +{ + struct ktap_event *event = data; + ktap_state_t *ks = event->ks; + struct ktap_event_data e; + int syscall_nr; + int rctx; + + syscall_nr = syscall_get_nr(current, regs); + if (unlikely(syscall_nr < 0)) + return; + if (!test_bit(syscall_nr, enabled_exit_syscalls)) + return; + + if (unlikely(ks->stop)) + return; + + rctx = get_recursion_context(ks); + if (unlikely(rctx < 0)) + return; + + e.event = event; + e.regs = regs; + e.argstr = NULL; + + call_probe_closure(ks, event->fn, &e, rctx); + + put_recursion_context(ks, rctx); +} + +/* called in dry-run mode, purpose for compare overhead with normal vm call */ +static void dry_run_callback(void *data, struct pt_regs *regs, long id) +{ + +} + +static void init_syscall_event_fields(struct ktap_event *event, int is_enter) +{ + struct ftrace_event_call *event_call; + struct ktap_event_field *event_fields = &event->fields[0]; + struct syscall_metadata *meta = syscalls_metadata[event->syscall_nr]; + int idx = 0; + + event_call = is_enter ? meta->enter_event : meta->exit_event; + + event_fields[0].type = KTAP_EVENT_FIELD_TYPE_CONST; + event_fields[0].offset = event->syscall_nr; + + if (!is_enter) { +#ifdef CONFIG_X86_64 + event_fields[1].type = KTAP_EVENT_FIELD_TYPE_REGESTER; + event_fields[1].offset = offsetof(struct pt_regs, ax); +#endif + return; + } + + while (idx++ < meta->nb_args) { + event_fields[idx].type = KTAP_EVENT_FIELD_TYPE_REGESTER; +#ifdef CONFIG_X86_64 + switch (idx) { + case 1: + event_fields[idx].offset = offsetof(struct pt_regs, di); + break; + case 2: + event_fields[idx].offset = offsetof(struct pt_regs, si); + break; + case 3: + event_fields[idx].offset = offsetof(struct pt_regs, dx); + break; + case 4: + event_fields[idx].offset = + offsetof(struct pt_regs, r10); + break; + case 5: + event_fields[idx].offset = offsetof(struct pt_regs, r8); + break; + case 6: + event_fields[idx].offset = offsetof(struct pt_regs, r9); + break; + } +#else +#error "don't support syscall tracepoint event register access in this arch, " + "use 'trace syscalls:* {}' instead" +#endif + } + + /* init all rest fields as NIL */ + while (idx < 9) + event_fields[idx++].type = KTAP_EVENT_FIELD_TYPE_NIL; +} + +static int syscall_event_register(ktap_state_t *ks, const char *event_name, + struct ktap_event *event) +{ + int syscall_nr = 0, is_enter = 0; + void *callback = NULL; + int ret = 0; + + if (!strncmp(event_name, "sys_enter_", 10)) { + is_enter = 1; + event->type = KTAP_EVENT_TYPE_SYSCALL_ENTER; + syscall_nr = get_syscall_num(event_name + 10); + callback = trace_syscall_enter; + } else if (!strncmp(event_name, "sys_exit_", 9)) { + is_enter = 0; + event->type = KTAP_EVENT_TYPE_SYSCALL_EXIT; + syscall_nr = get_syscall_num(event_name + 9); + callback = trace_syscall_exit; + } + + if (G(ks)->parm->dry_run) + callback = dry_run_callback; + + if (syscall_nr < 0) + return -1; + + event->syscall_nr = syscall_nr; + + init_syscall_event_fields(event, is_enter); + + mutex_lock(&syscall_trace_lock); + if (is_enter) { + if (!sys_refcount_enter) + ret = register_trace_sys_enter(callback, event); + if (!ret) { + set_bit(syscall_nr, enabled_enter_syscalls); + sys_refcount_enter++; + } + } else { + if (!sys_refcount_exit) + ret = register_trace_sys_exit(callback, event); + if (!ret) { + set_bit(syscall_nr, enabled_exit_syscalls); + sys_refcount_exit++; + } + } + mutex_unlock(&syscall_trace_lock); + + return ret; +} + +static int syscall_event_unregister(ktap_state_t *ks, struct ktap_event *event) +{ + int ret = 0; + void *callback; + + if (event->type == KTAP_EVENT_TYPE_SYSCALL_ENTER) + callback = trace_syscall_enter; + else + callback = trace_syscall_exit; + + if (G(ks)->parm->dry_run) + callback = dry_run_callback; + + mutex_lock(&syscall_trace_lock); + if (event->type == KTAP_EVENT_TYPE_SYSCALL_ENTER) { + sys_refcount_enter--; + clear_bit(event->syscall_nr, enabled_enter_syscalls); + if (!sys_refcount_enter) + unregister_trace_sys_enter(callback, event); + } else { + sys_refcount_exit--; + clear_bit(event->syscall_nr, enabled_exit_syscalls); + if (!sys_refcount_exit) + unregister_trace_sys_exit(callback, event); + } + mutex_unlock(&syscall_trace_lock); + + return ret; +} + +/* + * Register tracepoint event directly, not based on perf callback + * + * This tracing method would be more faster than perf callback, + * because it won't need to write trace data into any temp buffer, + * and code path is much shorter than perf callback. + */ +int kp_event_create_tracepoint(ktap_state_t *ks, const char *event_name, + ktap_func_t *fn) +{ + struct ktap_event *event; + void *callback = probe_callback; + int is_syscall = 0; + int ret; + + if (G(ks)->parm->dry_run) + callback = NULL; + + if (!strncmp(event_name, "sys_enter_", 10) || + !strncmp(event_name, "sys_exit_", 9)) + is_syscall = 1; + + event = kzalloc(sizeof(struct ktap_event), GFP_KERNEL); + if (!event) + return -ENOMEM; + + event->ks = ks; + event->fn = fn; + event->name = kp_str_newz(ks, event_name); + if (unlikely(!event->name)) { + kfree(event); + return -ENOMEM; + } + + INIT_LIST_HEAD(&event->list); + list_add_tail(&event->list, &G(ks)->events_head); + + if (is_syscall) { + ret = syscall_event_register(ks, event_name, event); + } else { + event->type = KTAP_EVENT_TYPE_TRACEPOINT; + ret = tracepoint_probe_register(event_name, callback, event); + } + + if (ret) { + kp_error(ks, "register tracepoint %s failed, ret: %d\n", + event_name, ret); + list_del(&event->list); + kfree(event); + return ret; + } + return 0; +} + +/* kprobe handler */ +static int __kprobes pre_handler_kprobe(struct kprobe *p, struct pt_regs *regs) +{ + struct ktap_event *event = container_of(p, struct ktap_event, kp); + ktap_state_t *ks = event->ks; + struct ktap_event_data e; + int rctx; + + if (unlikely(ks->stop)) + return 0; + + rctx = get_recursion_context(ks); + if (unlikely(rctx < 0)) + return 0; + + e.event = event; + e.regs = regs; + e.argstr = NULL; + + call_probe_closure(ks, event->fn, &e, rctx); + + put_recursion_context(ks, rctx); + return 0; +} + +/* + * Register kprobe event directly, not based on perf callback + * + * This tracing method would be more faster than perf callback, + * because it won't need to write trace data into any temp buffer, + * and code path is much shorter than perf callback. + */ +int kp_event_create_kprobe(ktap_state_t *ks, const char *event_name, + ktap_func_t *fn) +{ + struct ktap_event *event; + void *callback = pre_handler_kprobe; + int ret; + + if (G(ks)->parm->dry_run) + callback = NULL; + + event = kzalloc(sizeof(struct ktap_event), GFP_KERNEL); + if (!event) + return -ENOMEM; + + event->ks = ks; + event->fn = fn; + event->name = kp_str_newz(ks, event_name); + if (unlikely(!event->name)) { + kfree(event); + return -ENOMEM; + } + + INIT_LIST_HEAD(&event->list); + list_add_tail(&event->list, &G(ks)->events_head); + + event->type = KTAP_EVENT_TYPE_KPROBE; + + event->kp.symbol_name = event_name; + event->kp.pre_handler = callback; + ret = register_kprobe(&event->kp); + if (ret) { + kp_error(ks, "register kprobe event %s failed, ret: %d\n", + event_name, ret); + list_del(&event->list); + kfree(event); + return ret; + } + return 0; +} + + +static void events_destroy(ktap_state_t *ks) +{ + struct ktap_event *event; + struct list_head *tmp, *pos; + struct list_head *head = &G(ks)->events_head; + + list_for_each(pos, head) { + event = container_of(pos, struct ktap_event, + list); + if (event->type == KTAP_EVENT_TYPE_PERF) + perf_event_release_kernel(event->perf); + else if (event->type == KTAP_EVENT_TYPE_TRACEPOINT) + tracepoint_probe_unregister(getstr(event->name), + probe_callback, event); + else if (event->type == KTAP_EVENT_TYPE_SYSCALL_ENTER || + event->type == KTAP_EVENT_TYPE_SYSCALL_EXIT ) + syscall_event_unregister(ks, event); + else if (event->type == KTAP_EVENT_TYPE_KPROBE) + unregister_kprobe(&event->kp); + } + /* + * Ensure our callback won't be called anymore. The buffers + * will be freed after that. + */ + tracepoint_synchronize_unregister(); + + list_for_each_safe(pos, tmp, head) { + event = container_of(pos, struct ktap_event, + list); + list_del(&event->list); + kfree(event); + } +} + +void kp_events_exit(ktap_state_t *ks) +{ + if (!G(ks)->trace_enabled) + return; + + events_destroy(ks); + + /* call trace_end_closure after all event unregistered */ + if ((G(ks)->state != KTAP_ERROR) && G(ks)->trace_end_closure) { + G(ks)->state = KTAP_TRACE_END; + set_func(ks->top, G(ks)->trace_end_closure); + incr_top(ks); + kp_vm_call(ks, ks->top - 1, 0); + G(ks)->trace_end_closure = NULL; + } + + G(ks)->trace_enabled = 0; +} + +int kp_events_init(ktap_state_t *ks) +{ + G(ks)->trace_enabled = 1; + return 0; +} + diff --git a/tools/ktap/runtime/kp_events.h b/tools/ktap/runtime/kp_events.h new file mode 100644 index 0000000..b24f723 --- /dev/null +++ b/tools/ktap/runtime/kp_events.h @@ -0,0 +1,71 @@ +#ifndef __KTAP_EVENTS_H__ +#define __KTAP_EVENTS_H__ + +#include <linux/ftrace_event.h> +#include <trace/syscall.h> +#include <trace/events/syscalls.h> +#include <linux/syscalls.h> +#include <linux/kprobes.h> + +enum KTAP_EVENT_FIELD_TYPE { + KTAP_EVENT_FIELD_TYPE_INVALID = 0, /* arg type not support yet */ + + KTAP_EVENT_FIELD_TYPE_INT, + KTAP_EVENT_FIELD_TYPE_LONG, + KTAP_EVENT_FIELD_TYPE_STRING, + + KTAP_EVENT_FIELD_TYPE_REGESTER, + KTAP_EVENT_FIELD_TYPE_CONST, + KTAP_EVENT_FIELD_TYPE_NIL /* arg not exist */ +}; + +struct ktap_event_field { + enum KTAP_EVENT_FIELD_TYPE type; + int offset; +}; + +enum KTAP_EVENT_TYPE { + KTAP_EVENT_TYPE_PERF, + KTAP_EVENT_TYPE_TRACEPOINT, + KTAP_EVENT_TYPE_SYSCALL_ENTER, + KTAP_EVENT_TYPE_SYSCALL_EXIT, + KTAP_EVENT_TYPE_KPROBE, +}; + +struct ktap_event { + struct list_head list; + int type; + ktap_state_t *ks; + ktap_func_t *fn; + struct perf_event *perf; + int syscall_nr; /* for syscall event */ + struct ktap_event_field fields[9]; /* arg1..arg9 */ + ktap_str_t *name; /* intern probename string */ + + struct kprobe kp; /* kprobe event */ +}; + +/* this structure allocate on stack */ +struct ktap_event_data { + struct ktap_event *event; + struct perf_sample_data *data; + struct pt_regs *regs; + ktap_str_t *argstr; /* for cache argstr intern string */ +}; + +int kp_events_init(ktap_state_t *ks); +void kp_events_exit(ktap_state_t *ks); + +int kp_event_create(ktap_state_t *ks, struct perf_event_attr *attr, + struct task_struct *task, const char *filter, + ktap_func_t *fn); +int kp_event_create_tracepoint(ktap_state_t *ks, const char *event_name, + ktap_func_t *fn); + +int kp_event_create_kprobe(ktap_state_t *ks, const char *event_name, + ktap_func_t *fn); +void kp_event_getarg(ktap_state_t *ks, ktap_val_t *ra, int idx); +const char *kp_event_tostr(ktap_state_t *ks); +const ktap_str_t *kp_event_stringify(ktap_state_t *ks); + +#endif /* __KTAP_EVENTS_H__ */ -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/