On Nov 16, 2007 5:37 AM, Jim Keniston <[EMAIL PROTECTED]> wrote: > On Thu, 2007-11-15 at 20:30 +0530, Abhishek Sagar wrote: > > On Nov 15, 2007 4:21 AM, Jim Keniston <[EMAIL PROTECTED]> wrote: > > > 2. Simplify the task of correlating data (e.g., timestamps) between > > > function entry and function return. > > > > Would adding of data and len fields in ri help? Instead of "pouching" > > data in one go at registration time, this would let user handlers do > > the allocation > > Yes and no. Adding just a data field -- void*, or maybe unsigned long > long so it's big enought to accommodate big timestamps -- would be a big > improvement on your current proposal. That would save the user the > drudgery of mapping the ri pointer to his/her per-instance data. > There's plenty of precedent for passing "private_data" values to > callbacks. > > I don't think a len field would help much. If such info were needed, it > could be stored in the data structure pointed to by the data field. > > I still don't think "letting [i.e., requiring that] user handlers do the > allocation" is a win. I'm still interested to see how this plays out in > real examples. > > > and allow them to use different kinds of data > > structures per-instance. > > I haven't been able to think of any scenarios where this would be > useful. A "data pouch" could always contain a union, FWIW.
I'm inlining a sample module which uses a data pointer in ri. I didn't go for a timestamp because it's not reliable. Some platforms might simply not have any h/w timestamp counters. For the same reason a lot of platforms (on ARM, say) have their sched_clock() mapped on jiffies. This may prevent timestamps from being distinct across function entry and exit. Plus a data pointer looks pretty harmless. --- test module --- #include <linux/kernel.h> #include <linux/version.h> #include <linux/module.h> #include <linux/kprobes.h> #include <linux/ktime.h> #define PRINT_DELAY (10 * HZ) /* This module calculates the total time and instances of func being called * across all cpu's. An average is calculated every 10 seconds and displayed. * Only one function instance per-task is monitored. This cuts out the * possibility of measuring time for recursive and nested function * invocations. * * Note: If compiling as a standalone module, make sure sched_clock() is * exported in the kernel. */ /* per-task data */ struct prof_data { struct task_struct *task; struct list_head list; unsigned long long entry_stamp; }; static const char *func = "sys_open"; static spinlock_t time_lock; static ktime_t total_time; static unsigned long hits; static LIST_HEAD(data_nodes); /* list of per-task data */ static struct delayed_work work_print; static struct prof_data *get_per_task_data(struct task_struct *tsk) { struct prof_data *p; /* lookup prof_data corresponding to tsk */ list_for_each_entry(p, &data_nodes, list) { if (p->task == tsk) return p; } return NULL; } /* called with kretprobe_lock held */ static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) { struct prof_data *stats; stats = get_per_task_data(current); if (stats) return 1; /* recursive/nested call */ stats = kmalloc(sizeof(struct prof_data), GFP_ATOMIC); if (!stats) return 1; stats->entry_stamp = sched_clock(); stats->task = current; INIT_LIST_HEAD(&stats->list); list_add(&stats->list, &data_nodes); ri->data = stats; return 0; } /* called with kretprobe_lock held */ static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs) { unsigned long flags; struct prof_data *stats = (struct prof_data *)ri->data; u64 elapsed; BUG_ON(ri->data == NULL); elapsed = (long long)sched_clock() - (long long)stats->entry_stamp; /* update stats */ spin_lock_irqsave(&time_lock, flags); ++hits; total_time = ktime_add_ns(total_time, elapsed); spin_unlock_irqrestore(&time_lock, flags); list_del(&stats->list); kfree(stats); return 0; } static struct kretprobe my_kretprobe = { .handler = return_handler, .entry_handler = entry_handler, }; /* called after every PRINT_DELAY seconds */ static void print_time(struct work_struct *work) { unsigned long flags; s64 time_ns; struct timespec ts; BUG_ON(work != &work_print.work); spin_lock_irqsave(&time_lock, flags); time_ns = ktime_to_ns(total_time); do_div(time_ns, hits); spin_unlock_irqrestore(&time_lock, flags); ts = ns_to_timespec(time_ns); printk(KERN_DEBUG "Avg. running time of %s = %ld sec, %ld nsec\n", func, ts.tv_sec, ts.tv_nsec); schedule_delayed_work(&work_print, PRINT_DELAY); } static int __init test_module_init(void) { int ret; my_kretprobe.kp.symbol_name = (char *)func; spin_lock_init(&time_lock); if ((ret = register_kretprobe(&my_kretprobe)) < 0) { printk("Failed to register test kretprobe!\n"); return -1; } printk("Kretprobe active on %s\n", my_kretprobe.kp.symbol_name); INIT_DELAYED_WORK(&work_print, print_time); schedule_delayed_work(&work_print, PRINT_DELAY); return 0; } static void __exit test_module_exit(void) { unregister_kretprobe(&my_kretprobe); printk("kretprobe unregistered\n"); printk("Missed probing %d instances of %s\n", my_kretprobe.nmissed, func); } module_init(test_module_init) module_exit(test_module_exit) MODULE_LICENSE("GPL"); -- Abhishek Sagar - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/