The code in changeset 83230590 can occasionally get tripped up by the "src_rq->nr_running == 1 && dst_rq->nr_running == 1" check, and lead to undesirable and/or useless task moves, as well as idle CPUs.
Luckily this bug can be avoided in a way that also simplifies the code. Signed-off-by: Rik van Riel <r...@redhat.com> --- kernel/sched/fair.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 376bc07c..32edd2c56 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1159,7 +1159,7 @@ static void task_numa_compare(struct task_numa_env *env, long src_load, dst_load; long load; long imp = env->p->numa_group ? groupimp : taskimp; - long moveimp = imp; + long moveimp = imp - 1; rcu_read_lock(); cur = ACCESS_ONCE(dst_rq->curr); @@ -1206,8 +1206,18 @@ static void task_numa_compare(struct task_numa_env *env, } } - if (imp <= env->best_imp && moveimp <= env->best_imp) - goto unlock; + if (imp <= env->best_imp) { + if (moveimp <= env->best_imp) + goto unlock; + + /* + * A task swap is not going to work; a task move may be + * required to consolidate this workload, especially if + * both nodes are overloaded and there are no idle CPUs. + */ + imp = moveimp; + cur = NULL; + } if (!cur) { /* Is there capacity at our destination? */ @@ -1231,23 +1241,6 @@ static void task_numa_compare(struct task_numa_env *env, dst_load = env->dst_stats.load + load; src_load = env->src_stats.load - load; - if (moveimp > imp && moveimp > env->best_imp) { - /* - * If the improvement from just moving env->p direction is - * better than swapping tasks around, check if a move is - * possible. Store a slightly smaller score than moveimp, - * so an actually idle CPU will win. - */ - if (!load_too_imbalanced(src_load, dst_load, env)) { - imp = moveimp - 1; - cur = NULL; - goto assign; - } - } - - if (imp <= env->best_imp) - goto unlock; - if (cur) { load = task_h_load(cur); dst_load -= load; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/