The code in changeset 83230590 can occasionally get tripped up
by the "src_rq->nr_running == 1 && dst_rq->nr_running == 1"
check, and lead to undesirable and/or useless task moves, as
well as idle CPUs.

Luckily this bug can be avoided in a way that also simplifies
the code.

Signed-off-by: Rik van Riel <r...@redhat.com>
---
 kernel/sched/fair.c | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 376bc07c..32edd2c56 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1159,7 +1159,7 @@ static void task_numa_compare(struct task_numa_env *env,
        long src_load, dst_load;
        long load;
        long imp = env->p->numa_group ? groupimp : taskimp;
-       long moveimp = imp;
+       long moveimp = imp - 1;
 
        rcu_read_lock();
        cur = ACCESS_ONCE(dst_rq->curr);
@@ -1206,8 +1206,18 @@ static void task_numa_compare(struct task_numa_env *env,
                }
        }
 
-       if (imp <= env->best_imp && moveimp <= env->best_imp)
-               goto unlock;
+       if (imp <= env->best_imp) {
+               if (moveimp <= env->best_imp)
+                       goto unlock;
+
+               /*
+                * A task swap is not going to work; a task move may be
+                * required to consolidate this workload, especially if
+                * both nodes are overloaded and there are no idle CPUs.
+                */
+               imp = moveimp;
+               cur = NULL;
+       }
 
        if (!cur) {
                /* Is there capacity at our destination? */
@@ -1231,23 +1241,6 @@ static void task_numa_compare(struct task_numa_env *env,
        dst_load = env->dst_stats.load + load;
        src_load = env->src_stats.load - load;
 
-       if (moveimp > imp && moveimp > env->best_imp) {
-               /*
-                * If the improvement from just moving env->p direction is
-                * better than swapping tasks around, check if a move is
-                * possible. Store a slightly smaller score than moveimp,
-                * so an actually idle CPU will win.
-                */
-               if (!load_too_imbalanced(src_load, dst_load, env)) {
-                       imp = moveimp - 1;
-                       cur = NULL;
-                       goto assign;
-               }
-       }
-
-       if (imp <= env->best_imp)
-               goto unlock;
-
        if (cur) {
                load = task_h_load(cur);
                dst_load -= load;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to