On 22/08/2019 04:17, Rik van Riel wrote:
> Now that enqueue_task_fair and dequeue_task_fair no longer iterate up
> the hierarchy all the time, a method to lazily propagate sum_exec_runtime
> up the hierarchy is necessary.
> 
> Once a tick, propagate the newly accumulated exec_runtime up the hierarchy,
> and feed it into CFS bandwidth control.
> 
> Remove the pointless call to account_cfs_rq_runtime from update_curr,
> which is always called with a root cfs_rq.

But what about the call to account_cfs_rq_runtime() in
set_curr_task_fair()? Here you always call it with the root cfs_rq.
Shouldn't this be called also in a loop over all se's until !se->parent
(like in propagate_exec_runtime() further below).

> Signed-off-by: Rik van Riel <r...@surriel.com>
> ---
>  include/linux/sched.h |  1 +
>  kernel/sched/core.c   |  1 +
>  kernel/sched/fair.c   | 22 ++++++++++++++++++++--
>  3 files changed, 22 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 901c710363e7..bdca15b3afe7 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -454,6 +454,7 @@ struct sched_entity {
>       int                             depth;
>       unsigned long                   enqueued_h_load;
>       unsigned long                   enqueued_h_weight;
> +     u64                             propagated_exec_runtime;
>       struct load_weight              h_load;
>       struct sched_entity             *parent;
>       /* rq on which this entity is (to be) queued: */
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index fbd96900f715..9915d20e84a9 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -2137,6 +2137,7 @@ static void __sched_fork(unsigned long clone_flags, 
> struct task_struct *p)
>       INIT_LIST_HEAD(&p->se.group_node);
>  
>  #ifdef CONFIG_FAIR_GROUP_SCHED
> +     p->se.propagated_exec_runtime   = 0;
>       p->se.cfs_rq                    = NULL;
>  #endif
>  
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 5cfa3dbeba49..d6c881c5c4d5 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -898,8 +898,6 @@ static void update_curr(struct cfs_rq *cfs_rq)
>       trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
>       cgroup_account_cputime(curtask, delta_exec);
>       account_group_exec_runtime(curtask, delta_exec);
> -
> -     account_cfs_rq_runtime(cfs_rq, delta_exec);
>  }
>  
>  static void update_curr_fair(struct rq *rq)
> @@ -3412,6 +3410,20 @@ static inline bool skip_blocked_update(struct 
> sched_entity *se)
>       return true;
>  }
>  
> +static void propagate_exec_runtime(struct cfs_rq *cfs_rq,
> +                             struct sched_entity *se)
> +{
> +     struct sched_entity *parent = se->parent;
> +     u64 diff = se->sum_exec_runtime - se->propagated_exec_runtime;
> +
> +     if (parent) {
> +             parent->sum_exec_runtime += diff;
> +             account_cfs_rq_runtime(cfs_rq, diff);
> +     }
> +
> +     se->propagated_exec_runtime = se->sum_exec_runtime;
> +}
> +
>  #else /* CONFIG_FAIR_GROUP_SCHED */
>  
>  static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
> @@ -3423,6 +3435,11 @@ static inline int propagate_entity_load_avg(struct 
> sched_entity *se)
>  
>  static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long 
> runnable_sum) {}
>  
> +static void propagate_exec_runtime(struct cfs_rq *cfs_rq,
> +                             struct sched_entity *se);
> +{
> +}
> +
>  #endif /* CONFIG_FAIR_GROUP_SCHED */
>  
>  /**
> @@ -10157,6 +10174,7 @@ static void propagate_entity_cfs_rq(struct 
> sched_entity *se, int flags)
>                       if (!(flags & DO_ATTACH))
>                               break;
>  
> +             propagate_exec_runtime(cfs_rq, se);
>               update_cfs_group(se);
>       }
>  }
> 

Reply via email to