On Mon, 2012-09-24 at 18:54 +0200, Peter Zijlstra wrote:
> But let me try and come up with the list thing, I think we've
> actually got that someplace as well. 

OK, I'm sure the below can be written better, but my brain is gone for
the day...

---
 include/linux/sched.h |   1 +
 kernel/sched/core.c   |   1 +
 kernel/sched/fair.c   | 102 +++++++++++++++++++++++++++++++++++---------------
 3 files changed, 73 insertions(+), 31 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0beac68..d72ea68 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -888,6 +888,7 @@ struct sched_group {
        atomic_t ref;
 
        unsigned int group_weight;
+       int group_first;
        struct sched_group_power *sgp;
 
        /*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b38f00e..1177eb1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5781,6 +5781,7 @@ static void init_sched_groups_power(int cpu, struct 
sched_domain *sd)
 
        do {
                sg->group_weight = cpumask_weight(sched_group_cpus(sg));
+               sg->group_first = cpumask_first(sched_group_cpus(sg));
                sg = sg->next;
        } while (sg != sd->groups);
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b800a1..601bc38 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2634,50 +2634,90 @@ find_idlest_cpu(struct sched_group *group, struct 
task_struct *p, int this_cpu)
  */
 static int select_idle_sibling(struct task_struct *p, int target)
 {
-       int cpu = smp_processor_id();
-       int prev_cpu = task_cpu(p);
-       struct sched_domain *sd;
-       struct sched_group *sg;
-       int i;
+       struct sched_domain *sd_smt, *sd_llc;
+       struct sched_group *sg_smt, *sg_llc;
 
        /*
-        * If the task is going to be woken-up on this cpu and if it is
-        * already idle, then it is the right target.
+        * Of the target is idle, easy peasy, we're done.
         */
-       if (target == cpu && idle_cpu(cpu))
-               return cpu;
+       if (idle_cpu(target))
+               return target;
 
        /*
-        * If the task is going to be woken-up on the cpu where it previously
-        * ran and if it is currently idle, then it the right target.
+        * Otherwise, see if there's an idle core in the cache domain.
         */
-       if (target == prev_cpu && idle_cpu(prev_cpu))
-               return prev_cpu;
+       sd_llc = rcu_dereference(per_cpu(sd_llc, target));
+       sg_llc = sd_llc->groups;
+       do {
+               int candidate = -1;
+
+               sd_smt = rcu_dereference(per_cpu(sd_llc, sg_llc->group_first));
+               for_each_lower_domain(sd_smt) {
+                       if (sd_smt->flags & SD_SHARE_CPUPOWER) /* aka. SMT */
+                               break;
+               }
+
+               if (!sd_smt) {
+                       int cpu = sg_llc->group_first; /* Assume singleton 
group */
+
+                       if (!idle_cpu(cpu))
+                               goto next_llc;
+
+                       if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+                               goto next_llc;
+
+                       return cpu;
+               }
+
+               sg_smt = sd_smt->groups;
+               do {
+                       int cpu = sg_smt->group_first; /* Assume singleton 
group */
+
+                       if (!idle_cpu(cpu)) /* core is not idle, skip to next 
core */
+                               goto next_llc;
+
+                       if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+                               goto next_smt;
+
+                       if (candidate < 0)
+                               candidate = cpu;
+
+next_smt:
+                       sg_smt = sg_smt->next;
+               } while (sg_smt != sd_smt->groups);
+
+               if (candidate >= 0)
+                       return candidate;
+
+next_llc:
+               sg_llc = sg_llc->next;
+       } while (sg_llc != sd_llc->groups);
 
        /*
-        * Otherwise, iterate the domains and find an elegible idle cpu.
+        * Failing that, see if there's an idle SMT sibling.
         */
-       sd = rcu_dereference(per_cpu(sd_llc, target));
-       for_each_lower_domain(sd) {
-               sg = sd->groups;
+       sd_smt = rcu_dereference(per_cpu(sd_llc, target));
+       for_each_lower_domain(sd_smt) {
+               if (sd_smt->flags & SD_SHARE_CPUPOWER) /* aka. SMT */
+                       break;
+       }
+
+       if (sd_smt) {
+               sg_smt = sd_smt->groups;
                do {
-                       if (!cpumask_intersects(sched_group_cpus(sg),
-                                               tsk_cpus_allowed(p)))
-                               goto next;
+                       int cpu = sg_smt->group_first; /* Assume singleton 
group */
 
-                       for_each_cpu(i, sched_group_cpus(sg)) {
-                               if (!idle_cpu(i))
-                                       goto next;
-                       }
+                       if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) &&
+                           idle_cpu(cpu))
+                               return cpu;
 
-                       target = cpumask_first_and(sched_group_cpus(sg),
-                                       tsk_cpus_allowed(p));
-                       goto done;
-next:
-                       sg = sg->next;
-               } while (sg != sd->groups);
+                       sg_smt = sg_smt->next;
+               } while (sg_smt != sd_smt->groups);
        }
-done:
+
+       /*
+        * OK, no idle siblings of any kind, take what we started with.
+        */
        return target;
 }
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to