----- On Jul 5, 2016, at 11:19 AM, rostedt rost...@goodmis.org wrote:

> On Mon,  4 Jul 2016 15:46:04 -0400
> Julien Desfossez <jdesfos...@efficios.com> wrote:
> 
> 
>> diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
>> index 9b90c57..fcb0f29 100644
>> --- a/include/trace/events/sched.h
>> +++ b/include/trace/events/sched.h
>> @@ -8,6 +8,34 @@
>>  #include <linux/tracepoint.h>
>>  #include <linux/binfmts.h>
>>  
>> +#define SCHEDULING_POLICY                           \
>> +    EM( SCHED_NORMAL,       "SCHED_NORMAL")         \
>> +    EM( SCHED_FIFO,         "SCHED_FIFO")           \
>> +    EM( SCHED_RR,           "SCHED_RR")             \
>> +    EM( SCHED_BATCH,        "SCHED_BATCH")          \
>> +    EM( SCHED_IDLE,         "SCHED_IDLE")           \
>> +    EMe(SCHED_DEADLINE,     "SCHED_DEADLINE")
>> +
>> +/*
>> + * First define the enums in the above macros to be exported to userspace
>> + * via TRACE_DEFINE_ENUM().
>> + */
>> +#undef EM
>> +#undef EMe
>> +#define EM(a, b)    TRACE_DEFINE_ENUM(a);
>> +#define EMe(a, b)   TRACE_DEFINE_ENUM(a);
>> +
>> +SCHEDULING_POLICY
>> +
>> +/*
>> + * Now redefine the EM() and EMe() macros to map the enums to the strings
>> + * that will be printed in the output.
>> + */
>> +#undef EM
>> +#undef EMe
>> +#define EM(a, b)    {a, b},
>> +#define EMe(a, b)   {a, b}
>> +
>>  /*
>>   * Tracepoint for calling kthread_stop, performed to end a kthread:
>>   */
>> @@ -562,6 +590,46 @@ TRACE_EVENT(sched_wake_idle_without_ipi,
>>  
>>      TP_printk("cpu=%d", __entry->cpu)
>>  );
>> +
>> +/*
>> + * Tracepoint for showing scheduling priority changes.
>> + */
>> +TRACE_EVENT(sched_prio_update,
> 
> I'm fine with the addition of this tracepoint. You'll have to get by
> Peter Zijlstra for it.

Great!

> 
>> +
>> +    TP_PROTO(struct task_struct *tsk),
>> +
>> +    TP_ARGS(tsk),
>> +
>> +    TP_STRUCT__entry(
>> +            __array( char,  comm,   TASK_COMM_LEN   )
> 
> I could imagine this being a high frequency tracepoint, especially with
> a lot of boosting going on. Can we nuke the comm recording and let the
> userspace tools just hook to the sched_switch tracepoint for that?

We can surely do that.

Just to clarify: currently this tracepoint is *not* hooked on PI boosting,
as described in the changelog. This tracepoint is about the prio attributes
set by user-space. The PI boosting temporarily changes the task struct prio
without updating the associated policy, which seems rather
implementation-specific and odd to expose.

Thoughts ?

Thanks,

Mathieu


> 
> -- Steve
> 
> 
>> +            __field( pid_t, pid                     )
>> +            __field( unsigned int,  policy          )
>> +            __field( int,   nice                    )
>> +            __field( unsigned int,  rt_priority     )
>> +            __field( u64,   dl_runtime              )
>> +            __field( u64,   dl_deadline             )
>> +            __field( u64,   dl_period               )
>> +    ),
>> +
>> +    TP_fast_assign(
>> +            memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
>> +            __entry->pid            = tsk->pid;
>> +            __entry->policy         = tsk->policy;
>> +            __entry->nice           = task_nice(tsk);
>> +            __entry->rt_priority    = tsk->rt_priority;
>> +            __entry->dl_runtime     = tsk->dl.dl_runtime;
>> +            __entry->dl_deadline    = tsk->dl.dl_deadline;
>> +            __entry->dl_period      = tsk->dl.dl_period;
>> +    ),
>> +
>> +    TP_printk("comm=%s pid=%d, policy=%s, nice=%d, rt_priority=%u, "
>> +                    "dl_runtime=%Lu, dl_deadline=%Lu, dl_period=%Lu",
>> +                    __entry->comm, __entry->pid,
>> +                    __print_symbolic(__entry->policy, SCHEDULING_POLICY),
>> +                    __entry->nice, __entry->rt_priority,
>> +                    __entry->dl_runtime, __entry->dl_deadline,
>> +                    __entry->dl_period)
>> +);
>>  #endif /* _TRACE_SCHED_H */
>>  
>>  /* This part must be outside protection */
>> diff --git a/kernel/fork.c b/kernel/fork.c
>> index 7926993..ac4294a 100644
>> --- a/kernel/fork.c
>> +++ b/kernel/fork.c
>> @@ -1773,6 +1773,7 @@ long _do_fork(unsigned long clone_flags,
>>              struct pid *pid;
>>  
>>              trace_sched_process_fork(current, p);
>> +            trace_sched_prio_update(p);
>>  
>>              pid = get_task_pid(p, PIDTYPE_PID);
>>              nr = pid_vnr(pid);
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index ce83e39..c729425 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -3708,6 +3708,7 @@ void set_user_nice(struct task_struct *p, long nice)
>>                      resched_curr(rq);
>>      }
>>  out_unlock:
>> +    trace_sched_prio_update(p);
>>      task_rq_unlock(rq, p, &rf);
>>  }
>>  EXPORT_SYMBOL(set_user_nice);
>> @@ -3912,6 +3913,8 @@ static void __setscheduler(struct rq *rq, struct
>> task_struct *p,
>>              p->sched_class = &rt_sched_class;
>>      else
>>              p->sched_class = &fair_sched_class;
>> +
>> +    trace_sched_prio_update(p);
>>  }
>>  
> >  static void

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

Reply via email to