----- On Jul 5, 2016, at 11:19 AM, rostedt rost...@goodmis.org wrote: > On Mon, 4 Jul 2016 15:46:04 -0400 > Julien Desfossez <jdesfos...@efficios.com> wrote: > > >> diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h >> index 9b90c57..fcb0f29 100644 >> --- a/include/trace/events/sched.h >> +++ b/include/trace/events/sched.h >> @@ -8,6 +8,34 @@ >> #include <linux/tracepoint.h> >> #include <linux/binfmts.h> >> >> +#define SCHEDULING_POLICY \ >> + EM( SCHED_NORMAL, "SCHED_NORMAL") \ >> + EM( SCHED_FIFO, "SCHED_FIFO") \ >> + EM( SCHED_RR, "SCHED_RR") \ >> + EM( SCHED_BATCH, "SCHED_BATCH") \ >> + EM( SCHED_IDLE, "SCHED_IDLE") \ >> + EMe(SCHED_DEADLINE, "SCHED_DEADLINE") >> + >> +/* >> + * First define the enums in the above macros to be exported to userspace >> + * via TRACE_DEFINE_ENUM(). >> + */ >> +#undef EM >> +#undef EMe >> +#define EM(a, b) TRACE_DEFINE_ENUM(a); >> +#define EMe(a, b) TRACE_DEFINE_ENUM(a); >> + >> +SCHEDULING_POLICY >> + >> +/* >> + * Now redefine the EM() and EMe() macros to map the enums to the strings >> + * that will be printed in the output. >> + */ >> +#undef EM >> +#undef EMe >> +#define EM(a, b) {a, b}, >> +#define EMe(a, b) {a, b} >> + >> /* >> * Tracepoint for calling kthread_stop, performed to end a kthread: >> */ >> @@ -562,6 +590,46 @@ TRACE_EVENT(sched_wake_idle_without_ipi, >> >> TP_printk("cpu=%d", __entry->cpu) >> ); >> + >> +/* >> + * Tracepoint for showing scheduling priority changes. >> + */ >> +TRACE_EVENT(sched_prio_update, > > I'm fine with the addition of this tracepoint. You'll have to get by > Peter Zijlstra for it.
Great! > >> + >> + TP_PROTO(struct task_struct *tsk), >> + >> + TP_ARGS(tsk), >> + >> + TP_STRUCT__entry( >> + __array( char, comm, TASK_COMM_LEN ) > > I could imagine this being a high frequency tracepoint, especially with > a lot of boosting going on. Can we nuke the comm recording and let the > userspace tools just hook to the sched_switch tracepoint for that? We can surely do that. Just to clarify: currently this tracepoint is *not* hooked on PI boosting, as described in the changelog. This tracepoint is about the prio attributes set by user-space. The PI boosting temporarily changes the task struct prio without updating the associated policy, which seems rather implementation-specific and odd to expose. Thoughts ? Thanks, Mathieu > > -- Steve > > >> + __field( pid_t, pid ) >> + __field( unsigned int, policy ) >> + __field( int, nice ) >> + __field( unsigned int, rt_priority ) >> + __field( u64, dl_runtime ) >> + __field( u64, dl_deadline ) >> + __field( u64, dl_period ) >> + ), >> + >> + TP_fast_assign( >> + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); >> + __entry->pid = tsk->pid; >> + __entry->policy = tsk->policy; >> + __entry->nice = task_nice(tsk); >> + __entry->rt_priority = tsk->rt_priority; >> + __entry->dl_runtime = tsk->dl.dl_runtime; >> + __entry->dl_deadline = tsk->dl.dl_deadline; >> + __entry->dl_period = tsk->dl.dl_period; >> + ), >> + >> + TP_printk("comm=%s pid=%d, policy=%s, nice=%d, rt_priority=%u, " >> + "dl_runtime=%Lu, dl_deadline=%Lu, dl_period=%Lu", >> + __entry->comm, __entry->pid, >> + __print_symbolic(__entry->policy, SCHEDULING_POLICY), >> + __entry->nice, __entry->rt_priority, >> + __entry->dl_runtime, __entry->dl_deadline, >> + __entry->dl_period) >> +); >> #endif /* _TRACE_SCHED_H */ >> >> /* This part must be outside protection */ >> diff --git a/kernel/fork.c b/kernel/fork.c >> index 7926993..ac4294a 100644 >> --- a/kernel/fork.c >> +++ b/kernel/fork.c >> @@ -1773,6 +1773,7 @@ long _do_fork(unsigned long clone_flags, >> struct pid *pid; >> >> trace_sched_process_fork(current, p); >> + trace_sched_prio_update(p); >> >> pid = get_task_pid(p, PIDTYPE_PID); >> nr = pid_vnr(pid); >> diff --git a/kernel/sched/core.c b/kernel/sched/core.c >> index ce83e39..c729425 100644 >> --- a/kernel/sched/core.c >> +++ b/kernel/sched/core.c >> @@ -3708,6 +3708,7 @@ void set_user_nice(struct task_struct *p, long nice) >> resched_curr(rq); >> } >> out_unlock: >> + trace_sched_prio_update(p); >> task_rq_unlock(rq, p, &rf); >> } >> EXPORT_SYMBOL(set_user_nice); >> @@ -3912,6 +3913,8 @@ static void __setscheduler(struct rq *rq, struct >> task_struct *p, >> p->sched_class = &rt_sched_class; >> else >> p->sched_class = &fair_sched_class; >> + >> + trace_sched_prio_update(p); >> } >> > > static void -- Mathieu Desnoyers EfficiOS Inc. http://www.efficios.com