Commit-ID:  31bc6aeaab1d1de8959b67edbed5c7a4b3cdbe7c
Gitweb:     https://git.kernel.org/tip/31bc6aeaab1d1de8959b67edbed5c7a4b3cdbe7c
Author:     Vincent Guittot <vincent.guit...@linaro.org>
AuthorDate: Wed, 6 Feb 2019 17:14:21 +0100
Committer:  Ingo Molnar <mi...@kernel.org>
CommitDate: Mon, 11 Feb 2019 08:02:12 +0100

sched/fair: Optimize update_blocked_averages()

Removing a cfs_rq from rq->leaf_cfs_rq_list can break the parent/child
ordering of the list when it will be added back. In order to remove an
empty and fully decayed cfs_rq, we must remove its children too, so they
will be added back in the right order next time.

With a normal decay of PELT, a parent will be empty and fully decayed
if all children are empty and fully decayed too. In such a case, we just
have to ensure that the whole branch will be added when a new task is
enqueued. This is default behavior since :

  commit f6783319737f ("sched/fair: Fix insertion in rq->leaf_cfs_rq_list")

In case of throttling, the PELT of throttled cfs_rq will not be updated
whereas the parent will. This breaks the assumption made above unless we
remove the children of a cfs_rq that is throttled. Then, they will be
added back when unthrottled and a sched_entity will be enqueued.

As throttled cfs_rq are now removed from the list, we can remove the
associated test in update_blocked_averages().

Signed-off-by: Vincent Guittot <vincent.guit...@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: sar...@sargun.me
Cc: t...@kernel.org
Cc: xiexi...@huawei.com
Cc: xiezhipe...@huawei.com
Link: 
https://lkml.kernel.org/r/1549469662-13614-2-git-send-email-vincent.guit...@linaro.org
Signed-off-by: Ingo Molnar <mi...@kernel.org>
---
 kernel/sched/fair.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 38d4669aa2ef..027f8e1b5b66 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -346,6 +346,18 @@ static inline bool list_add_leaf_cfs_rq(struct cfs_rq 
*cfs_rq)
 static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 {
        if (cfs_rq->on_list) {
+               struct rq *rq = rq_of(cfs_rq);
+
+               /*
+                * With cfs_rq being unthrottled/throttled during an enqueue,
+                * it can happen the tmp_alone_branch points the a leaf that
+                * we finally want to del. In this case, tmp_alone_branch moves
+                * to the prev element but it will point to rq->leaf_cfs_rq_list
+                * at the end of the enqueue.
+                */
+               if (rq->tmp_alone_branch == &cfs_rq->leaf_cfs_rq_list)
+                       rq->tmp_alone_branch = cfs_rq->leaf_cfs_rq_list.prev;
+
                list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
                cfs_rq->on_list = 0;
        }
@@ -4438,6 +4450,10 @@ static int tg_unthrottle_up(struct task_group *tg, void 
*data)
                /* adjust cfs_rq_clock_task() */
                cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
                                             cfs_rq->throttled_clock_task;
+
+               /* Add cfs_rq with already running entity in the list */
+               if (cfs_rq->nr_running >= 1)
+                       list_add_leaf_cfs_rq(cfs_rq);
        }
 
        return 0;
@@ -4449,8 +4465,10 @@ static int tg_throttle_down(struct task_group *tg, void 
*data)
        struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
 
        /* group is entering throttled state, stop time */
-       if (!cfs_rq->throttle_count)
+       if (!cfs_rq->throttle_count) {
                cfs_rq->throttled_clock_task = rq_clock_task(rq);
+               list_del_leaf_cfs_rq(cfs_rq);
+       }
        cfs_rq->throttle_count++;
 
        return 0;
@@ -4553,6 +4571,8 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
                        break;
        }
 
+       assert_list_leaf_cfs_rq(rq);
+
        if (!se)
                add_nr_running(rq, task_delta);
 
@@ -7700,10 +7720,6 @@ static void update_blocked_averages(int cpu)
        for_each_leaf_cfs_rq(rq, cfs_rq) {
                struct sched_entity *se;
 
-               /* throttled entities do not contribute to load */
-               if (throttled_hierarchy(cfs_rq))
-                       continue;
-
                if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq))
                        update_tg_load_avg(cfs_rq, 0);
 

Reply via email to