On 26 May 2016 at 03:14, Yuyang Du <yuyang...@intel.com> wrote: > Vincent reported that the first task to a new task group's cfs_rq will > be attached in attach_task_cfs_rq() and once more when it is enqueued > (see https://lkml.org/lkml/2016/5/25/388). > > Actually, it is worse, attach_task_cfs_rq() is called for new task even > way before init_entity_runnable_average(). > > Solve this by avoiding attach as well as detach new task's sched avgs > in task_move_group_fair(). To do it, we need to know whether the task > is forked or not, so we pass this info all the way from sched_move_task() > to attach_task_cfs_rq().
Not sure that this is the right way to solve this problem because you continue to attach the task twice without detaching it in the mean time: - once during the copy of the process in cpu_cgroup_fork (you skip the attach of load average but the task is still attached to the local cpu) In the mean time, sched_entity is initialized and the last_update_time is reset - one more time when the task is enqueued because the last_update_time has been reset (this time you don't skip the attache of load_avg Should you better detach the sched_entity with a copy of its parent metrics before initializing it and attaching it to the new cpu ? > > Reported-by: Vincent Guittot <vincent.guit...@linaro.org> > Signed-off-by: Yuyang Du <yuyang...@intel.com> > --- > kernel/sched/auto_group.c | 2 +- > kernel/sched/core.c | 8 ++++---- > kernel/sched/fair.c | 23 ++++++++++++----------- > kernel/sched/sched.h | 4 ++-- > 4 files changed, 19 insertions(+), 18 deletions(-) > > diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c > index a5d966c..e5f0be2 100644 > --- a/kernel/sched/auto_group.c > +++ b/kernel/sched/auto_group.c > @@ -143,7 +143,7 @@ autogroup_move_group(struct task_struct *p, struct > autogroup *ag) > goto out; > > for_each_thread(p, t) > - sched_move_task(t); > + sched_move_task(t, 0); > out: > unlock_task_sighand(p, &flags); > autogroup_kref_put(prev); > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index 7f2cae4..8585032 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -7724,7 +7724,7 @@ void sched_offline_group(struct task_group *tg) > * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent > to > * reflect its new group. > */ > -void sched_move_task(struct task_struct *tsk) > +void sched_move_task(struct task_struct *tsk, int fork) > { > struct task_group *tg; > int queued, running; > @@ -7753,7 +7753,7 @@ void sched_move_task(struct task_struct *tsk) > > #ifdef CONFIG_FAIR_GROUP_SCHED > if (tsk->sched_class->task_move_group) > - tsk->sched_class->task_move_group(tsk); > + tsk->sched_class->task_move_group(tsk, fork); > else > #endif > set_task_rq(tsk, task_cpu(tsk)); > @@ -8186,7 +8186,7 @@ static void cpu_cgroup_css_free(struct > cgroup_subsys_state *css) > > static void cpu_cgroup_fork(struct task_struct *task) > { > - sched_move_task(task); > + sched_move_task(task, 1); > } > > static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) > @@ -8213,7 +8213,7 @@ static void cpu_cgroup_attach(struct cgroup_taskset > *tset) > struct cgroup_subsys_state *css; > > cgroup_taskset_for_each(task, css, tset) > - sched_move_task(task); > + sched_move_task(task, 0); > } > > #ifdef CONFIG_FAIR_GROUP_SCHED > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index e89c39b..e5a61b1 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -2970,6 +2970,7 @@ static void attach_entity_load_avg(struct cfs_rq > *cfs_rq, struct sched_entity *s > cfs_rq_util_change(cfs_rq); > } > > +/* Catch up with the cfs_rq and then remove our sched avgs from it */ > static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct > sched_entity *se) > { > __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)), > @@ -8368,9 +8369,6 @@ static void detach_task_cfs_rq(struct task_struct *p) > place_entity(cfs_rq, se, 0); > se->vruntime -= cfs_rq->min_vruntime; > } > - > - /* Catch up with the cfs_rq and remove our load when we leave */ > - detach_entity_load_avg(cfs_rq, se); > } > > static void attach_task_cfs_rq(struct task_struct *p) > @@ -8386,9 +8384,6 @@ static void attach_task_cfs_rq(struct task_struct *p) > se->depth = se->parent ? se->parent->depth + 1 : 0; > #endif > > - /* Synchronize task with its cfs_rq */ > - attach_entity_load_avg(cfs_rq, se); > - > if (!vruntime_normalized(p)) > se->vruntime += cfs_rq->min_vruntime; > } > @@ -8396,6 +8391,7 @@ static void attach_task_cfs_rq(struct task_struct *p) > static void switched_from_fair(struct rq *rq, struct task_struct *p) > { > detach_task_cfs_rq(p); > + detach_entity_load_avg(cfs_rq_of(&p->se), &p->se); > } > > static void switched_to_fair(struct rq *rq, struct task_struct *p) > @@ -8422,6 +8418,7 @@ static void switched_to_fair(struct rq *rq, struct > task_struct *p) > skip_aging: > #endif > attach_task_cfs_rq(p); > + attach_entity_load_avg(cfs_rq_of(se), se); > > if (task_on_rq_queued(p)) { > /* > @@ -8468,16 +8465,20 @@ void init_cfs_rq(struct cfs_rq *cfs_rq) > } > > #ifdef CONFIG_FAIR_GROUP_SCHED > -static void task_move_group_fair(struct task_struct *p) > +static void task_move_group_fair(struct task_struct *p, int fork) > { > detach_task_cfs_rq(p); > + /* > + * New task does not need detach or attach load (see below) > + */ > + if (!fork) > + detach_entity_load_avg(cfs_rq_of(&p->se), &p->se); > + > set_task_rq(p, task_cpu(p)); > > -#ifdef CONFIG_SMP > - /* Tell se's cfs_rq has been changed -- migrated */ > - p->se.avg.last_update_time = 0; > -#endif > attach_task_cfs_rq(p); > + if (!fork) > + attach_entity_load_avg(cfs_rq_of(&p->se), &p->se); > } > > void free_fair_sched_group(struct task_group *tg) > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h > index 72f1f30..58b1259 100644 > --- a/kernel/sched/sched.h > +++ b/kernel/sched/sched.h > @@ -343,7 +343,7 @@ extern void sched_online_group(struct task_group *tg, > extern void sched_destroy_group(struct task_group *tg); > extern void sched_offline_group(struct task_group *tg); > > -extern void sched_move_task(struct task_struct *tsk); > +extern void sched_move_task(struct task_struct *tsk, int fork); > > #ifdef CONFIG_FAIR_GROUP_SCHED > extern int sched_group_set_shares(struct task_group *tg, unsigned long > shares); > @@ -1247,7 +1247,7 @@ struct sched_class { > void (*update_curr) (struct rq *rq); > > #ifdef CONFIG_FAIR_GROUP_SCHED > - void (*task_move_group) (struct task_struct *p); > + void (*task_move_group) (struct task_struct *p, int fork); > #endif > }; > > -- > 1.7.9.5 >