In message <1266573672.1806.70.ca...@laptop> you wrote:
> On Fri, 2010-02-19 at 17:05 +1100, Michael Neuling wrote:
> > >  include/linux/sched.h |    2 +-
> > >  kernel/sched_fair.c   |   61 +++++++++++++++++++++++++++++++++++++++++++
++--
> > -
> > >  2 files changed, 58 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/include/linux/sched.h b/include/linux/sched.h
> > > index 0eef87b..42fa5c6 100644
> > > --- a/include/linux/sched.h
> > > +++ b/include/linux/sched.h
> > > @@ -849,7 +849,7 @@ enum cpu_idle_type {
> > >  #define SD_POWERSAVINGS_BALANCE  0x0100  /* Balance for power savings */
> > >  #define SD_SHARE_PKG_RESOURCES   0x0200  /* Domain members share cpu pkg
> >  resources */
> > >  #define SD_SERIALIZE             0x0400  /* Only a single load balancing
 instanc
> > e */
> > > -
> > > +#define SD_ASYM_PACKING          0x0800
> > 
> > Would we eventually add this to SD_SIBLING_INIT in a arch specific hook,
> > or is this ok to add it generically?
> 
> I'd think we'd want to keep that limited to architectures that actually
> need it.

OK

> >  
> > > +static int update_sd_pick_busiest(struct sched_domain *sd,
> > > +                           struct sd_lb_stats *sds,
> > > +                           struct sched_group *sg,
> > > +                           struct sg_lb_stats *sgs)
> > > +{
> > > + if (sgs->sum_nr_running > sgs->group_capacity)
> > > +         return 1;
> > > +
> > > + if (sgs->group_imb)
> > > +         return 1;
> > > +
> > > + if ((sd->flags & SD_ASYM_PACKING) && sgs->sum_nr_running) {
> > > +         if (!sds->busiest)
> > > +                 return 1;
> > > +
> > > +         if (group_first_cpu(sds->busiest) < group_first_cpu(group))
> > 
> > "group" => "sg" here? (I get a compile error otherwise)
> 
> Oh, quite ;-)
> 
> > > +static int check_asym_packing(struct sched_domain *sd,
> > > +                             struct sd_lb_stats *sds, 
> > > +                             int cpu, unsigned long *imbalance)
> > > +{
> > > + int i, cpu, busiest_cpu;
> > 
> > Redefining cpu here.  Looks like the cpu parameter is not really needed?
> 
> Seems that way indeed, I went back and forth a few times on the actual
> implementation of this function (which started out live as a copy of
> check_power_save_busiest_group), its amazing there were only these two
> compile glitches ;-)

:-)

Below are the cleanups + the arch specific bits.  It doesn't change your
logic at all.  Obviously the PPC arch bits would need to be split into a
separate patch.

Compiles and boots against linux-next.

Mikey
---
 arch/powerpc/include/asm/cputable.h |    3 +
 arch/powerpc/kernel/process.c       |    7 +++
 include/linux/sched.h               |    4 +-
 include/linux/topology.h            |    1 
 kernel/sched_fair.c                 |   64 ++++++++++++++++++++++++++++++++++--
 5 files changed, 74 insertions(+), 5 deletions(-)

Index: linux-next/arch/powerpc/include/asm/cputable.h
===================================================================
--- linux-next.orig/arch/powerpc/include/asm/cputable.h
+++ linux-next/arch/powerpc/include/asm/cputable.h
@@ -195,6 +195,7 @@ extern const char *powerpc_base_platform
 #define CPU_FTR_SAO                    LONG_ASM_CONST(0x0020000000000000)
 #define CPU_FTR_CP_USE_DCBTZ           LONG_ASM_CONST(0x0040000000000000)
 #define CPU_FTR_UNALIGNED_LD_STD       LONG_ASM_CONST(0x0080000000000000)
+#define CPU_FTR_ASYM_SMT4              LONG_ASM_CONST(0x0100000000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -409,7 +410,7 @@ extern const char *powerpc_base_platform
            CPU_FTR_MMCRA | CPU_FTR_SMT | \
            CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
            CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
-           CPU_FTR_DSCR | CPU_FTR_SAO)
+           CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT4)
 #define CPU_FTRS_CELL  (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
            CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
            CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
Index: linux-next/arch/powerpc/kernel/process.c
===================================================================
--- linux-next.orig/arch/powerpc/kernel/process.c
+++ linux-next/arch/powerpc/kernel/process.c
@@ -1265,3 +1265,10 @@ unsigned long randomize_et_dyn(unsigned 
 
        return ret;
 }
+
+int arch_sd_asym_packing(void)
+{
+       if (cpu_has_feature(CPU_FTR_ASYM_SMT4))
+               return SD_ASYM_PACKING;
+       return 0;
+}
Index: linux-next/include/linux/sched.h
===================================================================
--- linux-next.orig/include/linux/sched.h
+++ linux-next/include/linux/sched.h
@@ -849,7 +849,7 @@ enum cpu_idle_type {
 #define SD_POWERSAVINGS_BALANCE        0x0100  /* Balance for power savings */
 #define SD_SHARE_PKG_RESOURCES 0x0200  /* Domain members share cpu pkg 
resources */
 #define SD_SERIALIZE           0x0400  /* Only a single load balancing 
instance */
-
+#define SD_ASYM_PACKING                0x0800  /* Asymetric SMT packing */
 #define SD_PREFER_SIBLING      0x1000  /* Prefer to place tasks in a sibling 
domain */
 
 enum powersavings_balance_level {
@@ -881,6 +881,8 @@ static inline int sd_balance_for_package
        return SD_PREFER_SIBLING;
 }
 
+extern int arch_sd_asym_packing(void);
+
 /*
  * Optimise SD flags for power savings:
  * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
Index: linux-next/include/linux/topology.h
===================================================================
--- linux-next.orig/include/linux/topology.h
+++ linux-next/include/linux/topology.h
@@ -102,6 +102,7 @@ int arch_update_cpu_topology(void);
                                | 1*SD_SHARE_PKG_RESOURCES              \
                                | 0*SD_SERIALIZE                        \
                                | 0*SD_PREFER_SIBLING                   \
+                               | arch_sd_asym_packing()                \
                                ,                                       \
        .last_balance           = jiffies,                              \
        .balance_interval       = 1,                                    \
Index: linux-next/kernel/sched_fair.c
===================================================================
--- linux-next.orig/kernel/sched_fair.c
+++ linux-next/kernel/sched_fair.c
@@ -2086,6 +2086,7 @@ struct sd_lb_stats {
        struct sched_group *this;  /* Local group in this sd */
        unsigned long total_load;  /* Total load of all groups in sd */
        unsigned long total_pwr;   /*   Total power of all groups in sd */
+       unsigned long total_nr_running;
        unsigned long avg_load;    /* Average load across all groups in sd */
 
        /** Statistics of this group */
@@ -2493,6 +2494,28 @@ static inline void update_sg_lb_stats(st
                DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
 }
 
+static int update_sd_pick_busiest(struct sched_domain *sd,
+                                 struct sd_lb_stats *sds,
+                                 struct sched_group *sg,
+                                 struct sg_lb_stats *sgs)
+{
+       if (sgs->sum_nr_running > sgs->group_capacity)
+               return 1;
+
+       if (sgs->group_imb)
+               return 1;
+
+       if ((sd->flags & SD_ASYM_PACKING) && sgs->sum_nr_running) {
+               if (!sds->busiest)
+                       return 1;
+
+               if (group_first_cpu(sds->busiest) < group_first_cpu(sg))
+                       return 1;
+       }
+
+       return 0;
+}
+
 /**
  * update_sd_lb_stats - Update sched_group's statistics for load balancing.
  * @sd: sched_domain whose statistics are to be updated.
@@ -2533,6 +2556,7 @@ static inline void update_sd_lb_stats(st
 
                sds->total_load += sgs.group_load;
                sds->total_pwr += group->cpu_power;
+               sds->total_nr_running += sgs.sum_nr_running;
 
                /*
                 * In case the child domain prefers tasks go to siblings
@@ -2547,9 +2571,8 @@ static inline void update_sd_lb_stats(st
                        sds->this = group;
                        sds->this_nr_running = sgs.sum_nr_running;
                        sds->this_load_per_task = sgs.sum_weighted_load;
-               } else if (sgs.avg_load > sds->max_load &&
-                          (sgs.sum_nr_running > sgs.group_capacity ||
-                               sgs.group_imb)) {
+               } else if (sgs.avg_load >= sds->max_load &&
+                          update_sd_pick_busiest(sd, sds, group, &sgs)) {
                        sds->max_load = sgs.avg_load;
                        sds->busiest = group;
                        sds->busiest_nr_running = sgs.sum_nr_running;
@@ -2562,6 +2585,38 @@ static inline void update_sd_lb_stats(st
        } while (group != sd->groups);
 }
 
+int __weak sd_asym_packing_arch(void)
+{
+       return 0;
+}
+
+static int check_asym_packing(struct sched_domain *sd,
+                                   struct sd_lb_stats *sds,
+                                   unsigned long *imbalance)
+{
+       int i, cpu, busiest_cpu;
+
+       if (!(sd->flags & SD_ASYM_PACKING))
+               return 0;
+
+       if (!sds->busiest)
+               return 0;
+
+       i = 0;
+       busiest_cpu = group_first_cpu(sds->busiest);
+       for_each_cpu(cpu, sched_domain_span(sd)) {
+               i++;
+               if (cpu == busiest_cpu)
+                       break;
+       }
+
+       if (sds->total_nr_running > i)
+               return 0;
+
+       *imbalance = sds->max_load;
+       return 1;
+}
+
 /**
  * fix_small_imbalance - Calculate the minor imbalance that exists
  *                     amongst the groups of a sched_domain, during
@@ -2761,6 +2816,9 @@ find_busiest_group(struct sched_domain *
        return sds.busiest;
 
 out_balanced:
+       if (check_asym_packing(sd, &sds, imbalance))
+               return sds.busiest;
+
        /*
         * There is no obvious imbalance. But check if we can do some balancing
         * to save power.
_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to