Extended sched_group_energy() to support energy prediction with usage
(tasks) added/removed from a specific cpu or migrated between a pair of
cpus. Useful for load-balancing decision making.

cc: Ingo Molnar <mi...@redhat.com>
cc: Peter Zijlstra <pet...@infradead.org>

Signed-off-by: Morten Rasmussen <morten.rasmus...@arm.com>
---
 kernel/sched/fair.c | 90 +++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 66 insertions(+), 24 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d12aa63..07c84af 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4592,23 +4592,44 @@ static unsigned long capacity_curr_of(int cpu)
  * Without capping the usage, a group could be seen as overloaded (CPU0 usage
  * at 121% + CPU1 usage at 80%) whereas CPU1 has 20% of available capacity/
  */
-static int get_cpu_usage(int cpu)
+static int __get_cpu_usage(int cpu, int delta)
 {
+       int sum;
        unsigned long usage = cpu_rq(cpu)->cfs.utilization_load_avg;
        unsigned long blocked = cpu_rq(cpu)->cfs.utilization_blocked_avg;
        unsigned long capacity_curr = capacity_curr_of(cpu);
 
-       if (usage + blocked >= capacity_curr)
+       sum = usage + blocked + delta;
+
+       if (sum < 0)
+               return 0;
+
+       if (sum >= capacity_curr)
                return capacity_curr;
 
-       return usage + blocked;
+       return sum;
 }
 
+static int get_cpu_usage(int cpu)
+{
+       return __get_cpu_usage(cpu, 0);
+}
+
+
 static inline bool energy_aware(void)
 {
        return sched_feat(ENERGY_AWARE);
 }
 
+struct energy_env {
+       struct sched_group      *sg_top;
+       struct sched_group      *sg_cap;
+       int                     usage_delta;
+       int                     src_cpu;
+       int                     dst_cpu;
+       int                     energy;
+};
+
 /*
  * cpu_norm_usage() returns the cpu usage relative to it's current capacity,
  * i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE] which is useful for
@@ -4623,20 +4644,38 @@ static inline bool energy_aware(void)
  *
  *   norm_usage = running_time/time ~ usage/capacity_curr
  */
-static inline unsigned long cpu_norm_usage(int cpu)
+static inline unsigned long __cpu_norm_usage(int cpu, int delta)
 {
        unsigned long capacity_curr = capacity_curr_of(cpu);
 
-       return (get_cpu_usage(cpu) << SCHED_CAPACITY_SHIFT)/capacity_curr;
+       return (__get_cpu_usage(cpu, delta) << SCHED_CAPACITY_SHIFT)
+                                                       /capacity_curr;
 }
 
-static unsigned group_max_usage(struct sched_group *sg)
+static inline unsigned long cpu_norm_usage(int cpu)
 {
-       int i;
+       return __cpu_norm_usage(cpu, 0);
+}
+
+static inline int calc_usage_delta(struct energy_env *eenv, int cpu)
+{
+       if (cpu == eenv->src_cpu)
+               return -eenv->usage_delta;
+       if (cpu == eenv->dst_cpu)
+               return eenv->usage_delta;
+       return 0;
+}
+
+static unsigned group_max_usage(struct energy_env *eenv,
+                                       struct sched_group *sg)
+{
+       int i, delta;
        int max_usage = 0;
 
-       for_each_cpu(i, sched_group_cpus(sg))
-               max_usage = max(max_usage, get_cpu_usage(i));
+       for_each_cpu(i, sched_group_cpus(sg)) {
+               delta = calc_usage_delta(eenv, i);
+               max_usage = max(max_usage, __get_cpu_usage(i, delta));
+       }
 
        return max_usage;
 }
@@ -4650,24 +4689,27 @@ static unsigned group_max_usage(struct sched_group *sg)
  * latter is used as the estimate as it leads to a more pessimistic energy
  * estimate (more busy).
  */
-static unsigned group_norm_usage(struct sched_group *sg)
+static unsigned group_norm_usage(struct energy_env *eenv,
+                                       struct sched_group *sg)
 {
-       int i;
+       int i, delta;
        unsigned long usage_sum = 0;
 
-       for_each_cpu(i, sched_group_cpus(sg))
-               usage_sum += cpu_norm_usage(i);
+       for_each_cpu(i, sched_group_cpus(sg)) {
+               delta = calc_usage_delta(eenv, i);
+               usage_sum += __cpu_norm_usage(i, delta);
+       }
 
        if (usage_sum > SCHED_CAPACITY_SCALE)
                return SCHED_CAPACITY_SCALE;
        return usage_sum;
 }
 
-static int find_new_capacity(struct sched_group *sg,
+static int find_new_capacity(struct energy_env *eenv,
                struct sched_group_energy *sge)
 {
        int idx;
-       unsigned long util = group_max_usage(sg);
+       unsigned long util = group_max_usage(eenv, eenv->sg_cap);
 
        for (idx = 0; idx < sge->nr_cap_states; idx++) {
                if (sge->cap_states[idx].cap >= util)
@@ -4686,16 +4728,16 @@ static int find_new_capacity(struct sched_group *sg,
  * gather the same usage statistics multiple times. This can probably be done 
in
  * a faster but more complex way.
  */
-static unsigned int sched_group_energy(struct sched_group *sg_top)
+static unsigned int sched_group_energy(struct energy_env *eenv)
 {
        struct sched_domain *sd;
        int cpu, total_energy = 0;
        struct cpumask visit_cpus;
        struct sched_group *sg;
 
-       WARN_ON(!sg_top->sge);
+       WARN_ON(!eenv->sg_top->sge);
 
-       cpumask_copy(&visit_cpus, sched_group_cpus(sg_top));
+       cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top));
 
        while (!cpumask_empty(&visit_cpus)) {
                struct sched_group *sg_shared_cap = NULL;
@@ -4718,18 +4760,17 @@ static unsigned int sched_group_energy(struct 
sched_group *sg_top)
                                break;
 
                        do {
-                               struct sched_group *sg_cap_util;
                                unsigned group_util;
                                int sg_busy_energy, sg_idle_energy;
                                int cap_idx;
 
                                if (sg_shared_cap && 
sg_shared_cap->group_weight >= sg->group_weight)
-                                       sg_cap_util = sg_shared_cap;
+                                       eenv->sg_cap = sg_shared_cap;
                                else
-                                       sg_cap_util = sg;
+                                       eenv->sg_cap = sg;
 
-                               cap_idx = find_new_capacity(sg_cap_util, 
sg->sge);
-                               group_util = group_norm_usage(sg);
+                               cap_idx = find_new_capacity(eenv, sg->sge);
+                               group_util = group_norm_usage(eenv, sg);
                                sg_busy_energy = (group_util * 
sg->sge->cap_states[cap_idx].power)
                                                                                
>> SCHED_CAPACITY_SHIFT;
                                sg_idle_energy = ((SCHED_LOAD_SCALE-group_util) 
* sg->sge->idle_states[0].power)
@@ -4740,7 +4781,7 @@ static unsigned int sched_group_energy(struct sched_group 
*sg_top)
                                if (!sd->child)
                                        cpumask_xor(&visit_cpus, &visit_cpus, 
sched_group_cpus(sg));
 
-                               if (cpumask_equal(sched_group_cpus(sg), 
sched_group_cpus(sg_top)))
+                               if (cpumask_equal(sched_group_cpus(sg), 
sched_group_cpus(eenv->sg_top)))
                                        goto next_cpu;
 
                        } while (sg = sg->next, sg != sd->groups);
@@ -4749,6 +4790,7 @@ static unsigned int sched_group_energy(struct sched_group 
*sg_top)
                continue;
        }
 
+       eenv->energy = total_energy;
        return total_energy;
 }
 
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to