On Wed, Jan 30, 2019 at 06:22:47AM +0100, Vincent Guittot wrote:
> Sargun reported a crash:
>   "I picked up c40f7d74c741a907cfaeb73a7697081881c497d0 sched/fair: Fix
>    infinite loop in update_blocked_averages() by reverting a9e7f6544b9c
>    and put it on top of 4.19.13. In addition to this, I uninlined
>    list_add_leaf_cfs_rq for debugging.
> 
>    This revealed a new bug that we didn't get to because we kept getting
>    crashes from the previous issue. When we are running with cgroups that
>    are rapidly changing, with CFS bandwidth control, and in addition
>    using the cpusets cgroup, we see this crash. Specifically, it seems to
>    occur with cgroups that are throttled and we change the allowed
>    cpuset."
> 
> The algorithm used to order cfs_rq in rq->leaf_cfs_rq_list assumes that
> it will walk down to root the 1st time a cfs_rq is used and we will finish
> to add either a cfs_rq without parent or a cfs_rq with a parent that is
> already on the list. But this is not always true in presence of throttling.
> Because a cfs_rq can be throttled even if it has never been used but other 
> CPUs
> of the cgroup have already used all the bandwdith, we are not sure to go down 
> to
> the root and add all cfs_rq in the list.
> 
> Ensure that all cfs_rq will be added in the list even if they are throttled.
> 
> Reported-by: Sargun Dhillon <sar...@sargun.me>
> Fixes: 9c2791f936ef ("Fix hierarchical order in rq->leaf_cfs_rq_list")
> Signed-off-by: Vincent Guittot <vincent.guit...@linaro.org>

Given my previous patch; how about something like so instead?

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -282,13 +282,15 @@ static inline struct cfs_rq *group_cfs_r
        return grp->my_q;
 }
 
-static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 {
        struct rq *rq = rq_of(cfs_rq);
        int cpu = cpu_of(rq);
 
        if (cfs_rq->on_list)
-               return;
+               return rq->tmp_alone_branch == &rq->leaf_cfs_rq_list;
+
+       cfs_rq->on_list = 1;
 
        /*
         * Ensure we either appear before our parent (if already
@@ -315,7 +317,10 @@ static inline void list_add_leaf_cfs_rq(
                 * list.
                 */
                rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
-       } else if (!cfs_rq->tg->parent) {
+               return true;
+       }
+
+       if (!cfs_rq->tg->parent) {
                /*
                 * cfs rq without parent should be put
                 * at the tail of the list.
@@ -327,23 +332,22 @@ static inline void list_add_leaf_cfs_rq(
                 * tmp_alone_branch to the beginning of the list.
                 */
                rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
-       } else {
-               /*
-                * The parent has not already been added so we want to
-                * make sure that it will be put after us.
-                * tmp_alone_branch points to the begin of the branch
-                * where we will add parent.
-                */
-               list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
-                       rq->tmp_alone_branch);
-               /*
-                * update tmp_alone_branch to points to the new begin
-                * of the branch
-                */
-               rq->tmp_alone_branch = &cfs_rq->leaf_cfs_rq_list;
+               return true;
        }
 
-       cfs_rq->on_list = 1;
+       /*
+        * The parent has not already been added so we want to
+        * make sure that it will be put after us.
+        * tmp_alone_branch points to the begin of the branch
+        * where we will add parent.
+        */
+       list_add_rcu(&cfs_rq->leaf_cfs_rq_list, rq->tmp_alone_branch);
+       /*
+        * update tmp_alone_branch to points to the new begin
+        * of the branch
+        */
+       rq->tmp_alone_branch = &cfs_rq->leaf_cfs_rq_list;
+       return false;
 }
 
 static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
@@ -4999,6 +5003,12 @@ static void __maybe_unused unthrottle_of
 }
 
 #else /* CONFIG_CFS_BANDWIDTH */
+
+static inline bool cfs_bandwidth_used(void)
+{
+       return false;
+}
+
 static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
 {
        return rq_clock_task(rq_of(cfs_rq));
@@ -5190,6 +5200,21 @@ enqueue_task_fair(struct rq *rq, struct
 
        }
 
+       if (cfs_bandwidth_used()) {
+               /*
+                * When bandwidth control is enabled; the cfs_rq_throttled()
+                * breaks in the above iteration can result in incomplete
+                * leaf list maintenance, resulting in triggering the assertion
+                * below.
+                */
+               for_each_sched_entity(se) {
+                       cfs_rq = cfs_rq_of(se);
+
+                       if (list_add_leaf_cfs_rq(cfs_rq))
+                               break;
+               }
+       }
+
        assert_list_leaf_cfs_rq(rq);
 
        hrtick_update(rq);

Reply via email to