On Mon, 2012-09-24 at 18:54 +0200, Peter Zijlstra wrote: > But let me try and come up with the list thing, I think we've > actually got that someplace as well.
OK, I'm sure the below can be written better, but my brain is gone for the day... --- include/linux/sched.h | 1 + kernel/sched/core.c | 1 + kernel/sched/fair.c | 102 +++++++++++++++++++++++++++++++++++--------------- 3 files changed, 73 insertions(+), 31 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 0beac68..d72ea68 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -888,6 +888,7 @@ struct sched_group { atomic_t ref; unsigned int group_weight; + int group_first; struct sched_group_power *sgp; /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b38f00e..1177eb1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5781,6 +5781,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) do { sg->group_weight = cpumask_weight(sched_group_cpus(sg)); + sg->group_first = cpumask_first(sched_group_cpus(sg)); sg = sg->next; } while (sg != sd->groups); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6b800a1..601bc38 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2634,50 +2634,90 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) */ static int select_idle_sibling(struct task_struct *p, int target) { - int cpu = smp_processor_id(); - int prev_cpu = task_cpu(p); - struct sched_domain *sd; - struct sched_group *sg; - int i; + struct sched_domain *sd_smt, *sd_llc; + struct sched_group *sg_smt, *sg_llc; /* - * If the task is going to be woken-up on this cpu and if it is - * already idle, then it is the right target. + * Of the target is idle, easy peasy, we're done. */ - if (target == cpu && idle_cpu(cpu)) - return cpu; + if (idle_cpu(target)) + return target; /* - * If the task is going to be woken-up on the cpu where it previously - * ran and if it is currently idle, then it the right target. + * Otherwise, see if there's an idle core in the cache domain. */ - if (target == prev_cpu && idle_cpu(prev_cpu)) - return prev_cpu; + sd_llc = rcu_dereference(per_cpu(sd_llc, target)); + sg_llc = sd_llc->groups; + do { + int candidate = -1; + + sd_smt = rcu_dereference(per_cpu(sd_llc, sg_llc->group_first)); + for_each_lower_domain(sd_smt) { + if (sd_smt->flags & SD_SHARE_CPUPOWER) /* aka. SMT */ + break; + } + + if (!sd_smt) { + int cpu = sg_llc->group_first; /* Assume singleton group */ + + if (!idle_cpu(cpu)) + goto next_llc; + + if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) + goto next_llc; + + return cpu; + } + + sg_smt = sd_smt->groups; + do { + int cpu = sg_smt->group_first; /* Assume singleton group */ + + if (!idle_cpu(cpu)) /* core is not idle, skip to next core */ + goto next_llc; + + if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) + goto next_smt; + + if (candidate < 0) + candidate = cpu; + +next_smt: + sg_smt = sg_smt->next; + } while (sg_smt != sd_smt->groups); + + if (candidate >= 0) + return candidate; + +next_llc: + sg_llc = sg_llc->next; + } while (sg_llc != sd_llc->groups); /* - * Otherwise, iterate the domains and find an elegible idle cpu. + * Failing that, see if there's an idle SMT sibling. */ - sd = rcu_dereference(per_cpu(sd_llc, target)); - for_each_lower_domain(sd) { - sg = sd->groups; + sd_smt = rcu_dereference(per_cpu(sd_llc, target)); + for_each_lower_domain(sd_smt) { + if (sd_smt->flags & SD_SHARE_CPUPOWER) /* aka. SMT */ + break; + } + + if (sd_smt) { + sg_smt = sd_smt->groups; do { - if (!cpumask_intersects(sched_group_cpus(sg), - tsk_cpus_allowed(p))) - goto next; + int cpu = sg_smt->group_first; /* Assume singleton group */ - for_each_cpu(i, sched_group_cpus(sg)) { - if (!idle_cpu(i)) - goto next; - } + if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) && + idle_cpu(cpu)) + return cpu; - target = cpumask_first_and(sched_group_cpus(sg), - tsk_cpus_allowed(p)); - goto done; -next: - sg = sg->next; - } while (sg != sd->groups); + sg_smt = sg_smt->next; + } while (sg_smt != sd_smt->groups); } -done: + + /* + * OK, no idle siblings of any kind, take what we started with. + */ return target; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/