While looking for CPUs to place running tasks on, the scheduler completely ignores the capacity stolen away by RT/IRQ tasks. This patch changes that behavior to also take the scaled capacity into account.
Signed-off-by: Rohit Jain <rohit.k.j...@oracle.com> --- kernel/sched/fair.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index eaede50..5b1f7b9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6004,7 +6004,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int for_each_cpu(cpu, cpu_smt_mask(core)) { cpumask_clear_cpu(cpu, cpus); - if (!idle_cpu(cpu)) + if (!idle_cpu(cpu) || !full_capacity(cpu)) idle = false; } @@ -6025,7 +6025,8 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int */ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) { - int cpu; + int cpu, backup_cpu = -1; + unsigned int backup_cap = 0; if (!static_branch_likely(&sched_smt_present)) return -1; @@ -6033,11 +6034,17 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t for_each_cpu(cpu, cpu_smt_mask(target)) { if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) continue; - if (idle_cpu(cpu)) - return cpu; + if (idle_cpu(cpu)) { + if (full_capacity(cpu)) + return cpu; + if (capacity_of(cpu) > backup_cap) { + backup_cap = capacity_of(cpu); + backup_cpu = cpu; + } + } } - return -1; + return backup_cpu; } #else /* CONFIG_SCHED_SMT */ @@ -6066,6 +6073,8 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t u64 time, cost; s64 delta; int cpu, nr = INT_MAX; + int backup_cpu = -1; + unsigned int backup_cap = 0; this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); if (!this_sd) @@ -6096,10 +6105,19 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t return -1; if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) continue; - if (idle_cpu(cpu)) - break; + if (idle_cpu(cpu)) { + if (full_capacity(cpu)) { + backup_cpu = -1; + break; + } else if (capacity_of(cpu) > backup_cap) { + backup_cap = capacity_of(cpu); + backup_cpu = cpu; + } + } } + if (backup_cpu >= 0) + cpu = backup_cpu; time = local_clock() - time; cost = this_sd->avg_scan_cost; delta = (s64)(time - cost) / 8; @@ -6116,13 +6134,14 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) struct sched_domain *sd; int i; - if (idle_cpu(target)) + if (idle_cpu(target) && full_capacity(target)) return target; /* * If the previous cpu is cache affine and idle, don't be stupid. */ - if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev)) + if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev) + && full_capacity(prev)) return prev; sd = rcu_dereference(per_cpu(sd_llc, target)); -- 2.7.4