Siddha, Suresh B wrote:
Jack Steiner brought this issue at my OLS talk.

Take a scenario where two tasks are pinned to two HT threads in a physical
package. Idle packages in the system will keep kicking migration_thread
on the busy package with out any success.

We will run into similar scenarios in the presence of CMP/NUMA.

Patch appended.


Hmm, I would have hoped the new "all_pinned" logic should have
handled this case properly. Are you actually seeing this happen?

I have a patch here which I still need to do more testing with,
which might help performance on HT systems.

I found that idle siblings could cause SMP and NUMA balancing to
be too aggressive in some cases.

Thanks,
Nick

--
SUSE Labs, Novell Inc.

If an idle sibling of an HT queue encounters a busy sibling, then
make higher level load balancing of the non-idle variety.

Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c       2005-07-29 19:30:39.000000000 +1000
+++ linux-2.6/kernel/sched.c    2005-07-29 19:35:01.000000000 +1000
@@ -1889,7 +1889,7 @@ out:
  */
 static struct sched_group *
 find_busiest_group(struct sched_domain *sd, int this_cpu,
-                  unsigned long *imbalance, enum idle_type idle)
+                  unsigned long *imbalance, enum idle_type idle, int *sd_idle)
 {
        struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
        unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -1914,6 +1914,9 @@ find_busiest_group(struct sched_domain *
                avg_load = 0;
 
                for_each_cpu_mask(i, group->cpumask) {
+                       if (*sd_idle && !idle_cpu(i))
+                               *sd_idle = 0;
+
                        /* Bias balancing toward cpus of our domain */
                        if (local_group)
                                load = target_load(i, load_idx);
@@ -2057,11 +2060,15 @@ static int load_balance(int this_cpu, ru
        unsigned long imbalance;
        int nr_moved, all_pinned = 0;
        int active_balance = 0;
+       int sd_idle = 0;
+
+       if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER)
+               sd_idle = 1;
 
        spin_lock(&this_rq->lock);
        schedstat_inc(sd, lb_cnt[idle]);
 
-       group = find_busiest_group(sd, this_cpu, &imbalance, idle);
+       group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle);
        if (!group) {
                schedstat_inc(sd, lb_nobusyg[idle]);
                goto out_balanced;
@@ -2136,6 +2143,8 @@ static int load_balance(int this_cpu, ru
                        sd->balance_interval *= 2;
        }
 
+       if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER)
+               return -1;
        return nr_moved;
 
 out_balanced:
@@ -2149,6 +2158,8 @@ out_balanced:
                        (sd->balance_interval < sd->max_interval))
                sd->balance_interval *= 2;
 
+       if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
+               return -1;
        return 0;
 }
 
@@ -2166,9 +2177,13 @@ static int load_balance_newidle(int this
        runqueue_t *busiest = NULL;
        unsigned long imbalance;
        int nr_moved = 0;
+       int sd_idle = 0;
 
+       if (sd->flags & SD_SHARE_CPUPOWER)
+               sd_idle = 1;
+       
        schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
-       group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE);
+       group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, 
&sd_idle);
        if (!group) {
                schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
                goto out_balanced;
@@ -2193,15 +2208,19 @@ static int load_balance_newidle(int this
                spin_unlock(&busiest->lock);
        }
 
-       if (!nr_moved)
+       if (!nr_moved) {
                schedstat_inc(sd, lb_failed[NEWLY_IDLE]);
-       else
+               if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
+                       return -1;
+       } else
                sd->nr_balance_failed = 0;
 
        return nr_moved;
 
 out_balanced:
        schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
+       if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
+               return -1;
        sd->nr_balance_failed = 0;
        return 0;
 }

Reply via email to