While looking for CPUs to place running tasks on, the scheduler
completely ignores the capacity stolen away by RT/IRQ tasks. This patch
changes that behavior to also take the scaled capacity into account.

Signed-off-by: Rohit Jain <rohit.k.j...@oracle.com>
---
 kernel/sched/fair.c | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index eaede50..5b1f7b9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6004,7 +6004,7 @@ static int select_idle_core(struct task_struct *p, struct 
sched_domain *sd, int
 
                for_each_cpu(cpu, cpu_smt_mask(core)) {
                        cpumask_clear_cpu(cpu, cpus);
-                       if (!idle_cpu(cpu))
+                       if (!idle_cpu(cpu) || !full_capacity(cpu))
                                idle = false;
                }
 
@@ -6025,7 +6025,8 @@ static int select_idle_core(struct task_struct *p, struct 
sched_domain *sd, int
  */
 static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int 
target)
 {
-       int cpu;
+       int cpu, backup_cpu = -1;
+       unsigned int backup_cap = 0;
 
        if (!static_branch_likely(&sched_smt_present))
                return -1;
@@ -6033,11 +6034,17 @@ static int select_idle_smt(struct task_struct *p, 
struct sched_domain *sd, int t
        for_each_cpu(cpu, cpu_smt_mask(target)) {
                if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
                        continue;
-               if (idle_cpu(cpu))
-                       return cpu;
+               if (idle_cpu(cpu)) {
+                       if (full_capacity(cpu))
+                               return cpu;
+                       if (capacity_of(cpu) > backup_cap) {
+                               backup_cap = capacity_of(cpu);
+                               backup_cpu = cpu;
+                       }
+               }
        }
 
-       return -1;
+       return backup_cpu;
 }
 
 #else /* CONFIG_SCHED_SMT */
@@ -6066,6 +6073,8 @@ static int select_idle_cpu(struct task_struct *p, struct 
sched_domain *sd, int t
        u64 time, cost;
        s64 delta;
        int cpu, nr = INT_MAX;
+       int backup_cpu = -1;
+       unsigned int backup_cap = 0;
 
        this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
        if (!this_sd)
@@ -6096,10 +6105,19 @@ static int select_idle_cpu(struct task_struct *p, 
struct sched_domain *sd, int t
                        return -1;
                if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
                        continue;
-               if (idle_cpu(cpu))
-                       break;
+               if (idle_cpu(cpu)) {
+                       if (full_capacity(cpu)) {
+                               backup_cpu = -1;
+                               break;
+                       } else if (capacity_of(cpu) > backup_cap) {
+                               backup_cap = capacity_of(cpu);
+                               backup_cpu = cpu;
+                       }
+               }
        }
 
+       if (backup_cpu >= 0)
+               cpu = backup_cpu;
        time = local_clock() - time;
        cost = this_sd->avg_scan_cost;
        delta = (s64)(time - cost) / 8;
@@ -6116,13 +6134,14 @@ static int select_idle_sibling(struct task_struct *p, 
int prev, int target)
        struct sched_domain *sd;
        int i;
 
-       if (idle_cpu(target))
+       if (idle_cpu(target) && full_capacity(target))
                return target;
 
        /*
         * If the previous cpu is cache affine and idle, don't be stupid.
         */
-       if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
+       if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev)
+           && full_capacity(prev))
                return prev;
 
        sd = rcu_dereference(per_cpu(sd_llc, target));
-- 
2.7.4

Reply via email to