On 22/08/2019 04:17, Rik van Riel wrote: > Now that enqueue_task_fair and dequeue_task_fair no longer iterate up > the hierarchy all the time, a method to lazily propagate sum_exec_runtime > up the hierarchy is necessary. > > Once a tick, propagate the newly accumulated exec_runtime up the hierarchy, > and feed it into CFS bandwidth control. > > Remove the pointless call to account_cfs_rq_runtime from update_curr, > which is always called with a root cfs_rq.
But what about the call to account_cfs_rq_runtime() in set_curr_task_fair()? Here you always call it with the root cfs_rq. Shouldn't this be called also in a loop over all se's until !se->parent (like in propagate_exec_runtime() further below). > Signed-off-by: Rik van Riel <r...@surriel.com> > --- > include/linux/sched.h | 1 + > kernel/sched/core.c | 1 + > kernel/sched/fair.c | 22 ++++++++++++++++++++-- > 3 files changed, 22 insertions(+), 2 deletions(-) > > diff --git a/include/linux/sched.h b/include/linux/sched.h > index 901c710363e7..bdca15b3afe7 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -454,6 +454,7 @@ struct sched_entity { > int depth; > unsigned long enqueued_h_load; > unsigned long enqueued_h_weight; > + u64 propagated_exec_runtime; > struct load_weight h_load; > struct sched_entity *parent; > /* rq on which this entity is (to be) queued: */ > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index fbd96900f715..9915d20e84a9 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -2137,6 +2137,7 @@ static void __sched_fork(unsigned long clone_flags, > struct task_struct *p) > INIT_LIST_HEAD(&p->se.group_node); > > #ifdef CONFIG_FAIR_GROUP_SCHED > + p->se.propagated_exec_runtime = 0; > p->se.cfs_rq = NULL; > #endif > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 5cfa3dbeba49..d6c881c5c4d5 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -898,8 +898,6 @@ static void update_curr(struct cfs_rq *cfs_rq) > trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime); > cgroup_account_cputime(curtask, delta_exec); > account_group_exec_runtime(curtask, delta_exec); > - > - account_cfs_rq_runtime(cfs_rq, delta_exec); > } > > static void update_curr_fair(struct rq *rq) > @@ -3412,6 +3410,20 @@ static inline bool skip_blocked_update(struct > sched_entity *se) > return true; > } > > +static void propagate_exec_runtime(struct cfs_rq *cfs_rq, > + struct sched_entity *se) > +{ > + struct sched_entity *parent = se->parent; > + u64 diff = se->sum_exec_runtime - se->propagated_exec_runtime; > + > + if (parent) { > + parent->sum_exec_runtime += diff; > + account_cfs_rq_runtime(cfs_rq, diff); > + } > + > + se->propagated_exec_runtime = se->sum_exec_runtime; > +} > + > #else /* CONFIG_FAIR_GROUP_SCHED */ > > static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {} > @@ -3423,6 +3435,11 @@ static inline int propagate_entity_load_avg(struct > sched_entity *se) > > static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long > runnable_sum) {} > > +static void propagate_exec_runtime(struct cfs_rq *cfs_rq, > + struct sched_entity *se); > +{ > +} > + > #endif /* CONFIG_FAIR_GROUP_SCHED */ > > /** > @@ -10157,6 +10174,7 @@ static void propagate_entity_cfs_rq(struct > sched_entity *se, int flags) > if (!(flags & DO_ATTACH)) > break; > > + propagate_exec_runtime(cfs_rq, se); > update_cfs_group(se); > } > } >