----- On Oct 29, 2015, at 3:07 AM, rostedt rost...@goodmis.org wrote: > From: "Steven Rostedt (Red Hat)" <rost...@goodmis.org> > > In order to guarantee that a probe will be called before other probes that > are attached to a tracepoint, there needs to be a mechanism to provide > priority of one probe over the others. > > Adding a prio field to the struct tracepoint_func, which lets the probes be > sorted by the priority set in the structure. If no priority is specified, > then a priority of 10 is given (this is a macro, and perhaps may be changed > in the future). > > Now probes may be added to affect other probes that are attached to a > tracepoint with a guaranteed order. > > One use case would be to allow tracing of tracepoints be able to filter by > pid. A special (higher priority probe) may be added to the sched_switch > tracepoint and set the necessary flags of the other tracepoints to notify > them if they should be traced or not. In case a tracepoint is enabled at the > sched_switch tracepoint too, the order of the two are not random. > > Cc: Mathieu Desnoyers <mathieu.desnoy...@efficios.com> > Signed-off-by: Steven Rostedt <rost...@goodmis.org>
Sounds good to me, Acked-by: Mathieu Desnoyers <mathieu.desnoy...@efficios.com> Thanks, Mathieu > --- > include/linux/tracepoint.h | 13 ++++++++++ > kernel/tracepoint.c | 61 +++++++++++++++++++++++++++++++++++++--------- > 2 files changed, 63 insertions(+), 11 deletions(-) > > diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h > index afada369c5b7..6b79537a42b1 100644 > --- a/include/linux/tracepoint.h > +++ b/include/linux/tracepoint.h > @@ -26,6 +26,7 @@ struct notifier_block; > struct tracepoint_func { > void *func; > void *data; > + int prio; > }; > > struct tracepoint { > @@ -42,9 +43,14 @@ struct trace_enum_map { > unsigned long enum_value; > }; > > +#define TRACEPOINT_DEFAULT_PRIO 10 > + > extern int > tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data); > extern int > +tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void > *data, > + int prio); > +extern int > tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data); > extern void > for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv), > @@ -207,6 +213,13 @@ extern void syscall_unregfunc(void); > (void *)probe, data); \ > } \ > static inline int \ > + register_trace_prio_##name(void (*probe)(data_proto), void *data,\ > + int prio) \ > + { \ > + return tracepoint_probe_register_prio(&__tracepoint_##name, \ > + (void *)probe, data, prio); \ > + } \ > + static inline int \ > unregister_trace_##name(void (*probe)(data_proto), void *data) \ > { \ > return tracepoint_probe_unregister(&__tracepoint_##name,\ > diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c > index 3490407dc7b7..ecd536de603a 100644 > --- a/kernel/tracepoint.c > +++ b/kernel/tracepoint.c > @@ -91,11 +91,13 @@ static void debug_print_probes(struct tracepoint_func > *funcs) > printk(KERN_DEBUG "Probe %d : %p\n", i, funcs[i].func); > } > > -static struct tracepoint_func *func_add(struct tracepoint_func **funcs, > - struct tracepoint_func *tp_func) > +static struct tracepoint_func * > +func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, > + int prio) > { > - int nr_probes = 0; > struct tracepoint_func *old, *new; > + int nr_probes = 0; > + int pos = -1; > > if (WARN_ON(!tp_func->func)) > return ERR_PTR(-EINVAL); > @@ -104,18 +106,33 @@ static struct tracepoint_func *func_add(struct > tracepoint_func **funcs, > old = *funcs; > if (old) { > /* (N -> N+1), (N != 0, 1) probes */ > - for (nr_probes = 0; old[nr_probes].func; nr_probes++) > + for (nr_probes = 0; old[nr_probes].func; nr_probes++) { > + /* Insert before probes of lower priority */ > + if (pos < 0 && old[nr_probes].prio < prio) > + pos = nr_probes; > if (old[nr_probes].func == tp_func->func && > old[nr_probes].data == tp_func->data) > return ERR_PTR(-EEXIST); > + } > } > /* + 2 : one for new probe, one for NULL func */ > new = allocate_probes(nr_probes + 2); > if (new == NULL) > return ERR_PTR(-ENOMEM); > - if (old) > - memcpy(new, old, nr_probes * sizeof(struct tracepoint_func)); > - new[nr_probes] = *tp_func; > + if (old) { > + if (pos < 0) { > + pos = nr_probes; > + memcpy(new, old, nr_probes * sizeof(struct > tracepoint_func)); > + } else { > + /* Copy higher priority probes ahead of the new probe */ > + memcpy(new, old, pos * sizeof(struct tracepoint_func)); > + /* Copy the rest after it. */ > + memcpy(new + pos + 1, old + pos, > + (nr_probes - pos) * sizeof(struct > tracepoint_func)); > + } > + } else > + pos = 0; > + new[pos] = *tp_func; > new[nr_probes + 1].func = NULL; > *funcs = new; > debug_print_probes(*funcs); > @@ -174,7 +191,7 @@ static void *func_remove(struct tracepoint_func **funcs, > * Add the probe function to a tracepoint. > */ > static int tracepoint_add_func(struct tracepoint *tp, > - struct tracepoint_func *func) > + struct tracepoint_func *func, int prio) > { > struct tracepoint_func *old, *tp_funcs; > > @@ -183,7 +200,7 @@ static int tracepoint_add_func(struct tracepoint *tp, > > tp_funcs = rcu_dereference_protected(tp->funcs, > lockdep_is_held(&tracepoints_mutex)); > - old = func_add(&tp_funcs, func); > + old = func_add(&tp_funcs, func, prio); > if (IS_ERR(old)) { > WARN_ON_ONCE(1); > return PTR_ERR(old); > @@ -240,6 +257,7 @@ static int tracepoint_remove_func(struct tracepoint *tp, > * @tp: tracepoint > * @probe: probe handler > * @data: tracepoint data > + * @prio: priority of this function over other registered functions > * > * Returns 0 if ok, error value on error. > * Note: if @tp is within a module, the caller is responsible for > @@ -247,7 +265,8 @@ static int tracepoint_remove_func(struct tracepoint *tp, > * performed either with a tracepoint module going notifier, or from > * within module exit functions. > */ > -int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data) > +int tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, > + void *data, int prio) > { > struct tracepoint_func tp_func; > int ret; > @@ -255,10 +274,30 @@ int tracepoint_probe_register(struct tracepoint *tp, > void > *probe, void *data) > mutex_lock(&tracepoints_mutex); > tp_func.func = probe; > tp_func.data = data; > - ret = tracepoint_add_func(tp, &tp_func); > + tp_func.prio = prio; > + ret = tracepoint_add_func(tp, &tp_func, prio); > mutex_unlock(&tracepoints_mutex); > return ret; > } > +EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio); > + > +/** > + * tracepoint_probe_register - Connect a probe to a tracepoint > + * @tp: tracepoint > + * @probe: probe handler > + * @data: tracepoint data > + * @prio: priority of this function over other registered functions > + * > + * Returns 0 if ok, error value on error. > + * Note: if @tp is within a module, the caller is responsible for > + * unregistering the probe before the module is gone. This can be > + * performed either with a tracepoint module going notifier, or from > + * within module exit functions. > + */ > +int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data) > +{ > + return tracepoint_probe_register_prio(tp, probe, data, > TRACEPOINT_DEFAULT_PRIO); > +} > EXPORT_SYMBOL_GPL(tracepoint_probe_register); > > /** > -- > 2.6.1 -- Mathieu Desnoyers EfficiOS Inc. http://www.efficios.com -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/