On Fri, 19 Jul 2019 at 14:54, Peter Zijlstra <pet...@infradead.org> wrote: > > On Fri, Jul 19, 2019 at 09:58:23AM +0200, Vincent Guittot wrote: > > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > > index 67f0acd..472959df 100644 > > --- a/kernel/sched/fair.c > > +++ b/kernel/sched/fair.c > > @@ -5376,18 +5376,6 @@ static unsigned long capacity_of(int cpu) > > return cpu_rq(cpu)->cpu_capacity; > > } > > > > -static unsigned long cpu_avg_load_per_task(int cpu) > > -{ > > - struct rq *rq = cpu_rq(cpu); > > - unsigned long nr_running = READ_ONCE(rq->cfs.h_nr_running); > > - unsigned long load_avg = cpu_runnable_load(rq); > > - > > - if (nr_running) > > - return load_avg / nr_running; > > - > > - return 0; > > -} > > - > > static void record_wakee(struct task_struct *p) > > { > > /* > > > @@ -7646,7 +7669,6 @@ static unsigned long task_h_load(struct task_struct > > *p) > > struct sg_lb_stats { > > unsigned long avg_load; /*Avg load across the CPUs of the group */ > > unsigned long group_load; /* Total load over the CPUs of the group */ > > - unsigned long load_per_task; > > unsigned long group_capacity; > > unsigned long group_util; /* Total utilization of the group */ > > unsigned int sum_nr_running; /* Nr tasks running in the group */ > > > > @@ -8266,76 +8293,6 @@ static inline void update_sd_lb_stats(struct lb_env > > *env, struct sd_lb_stats *sd > > } > > > > /** > > - * fix_small_imbalance - Calculate the minor imbalance that exists > > - * amongst the groups of a sched_domain, during > > - * load balancing. > > - * @env: The load balancing environment. > > - * @sds: Statistics of the sched_domain whose imbalance is to be > > calculated. > > - */ > > -static inline > > -void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) > > -{ > > - unsigned long tmp, capa_now = 0, capa_move = 0; > > - unsigned int imbn = 2; > > - unsigned long scaled_busy_load_per_task; > > - struct sg_lb_stats *local, *busiest; > > - > > - local = &sds->local_stat; > > - busiest = &sds->busiest_stat; > > - > > - if (!local->sum_h_nr_running) > > - local->load_per_task = cpu_avg_load_per_task(env->dst_cpu); > > - else if (busiest->load_per_task > local->load_per_task) > > - imbn = 1; > > - > > - scaled_busy_load_per_task = > > - (busiest->load_per_task * SCHED_CAPACITY_SCALE) / > > - busiest->group_capacity; > > - > > - if (busiest->avg_load + scaled_busy_load_per_task >= > > - local->avg_load + (scaled_busy_load_per_task * imbn)) { > > - env->imbalance = busiest->load_per_task; > > - return; > > - } > > - > > - /* > > - * OK, we don't have enough imbalance to justify moving tasks, > > - * however we may be able to increase total CPU capacity used by > > - * moving them. > > - */ > > - > > - capa_now += busiest->group_capacity * > > - min(busiest->load_per_task, busiest->avg_load); > > - capa_now += local->group_capacity * > > - min(local->load_per_task, local->avg_load); > > - capa_now /= SCHED_CAPACITY_SCALE; > > - > > - /* Amount of load we'd subtract */ > > - if (busiest->avg_load > scaled_busy_load_per_task) { > > - capa_move += busiest->group_capacity * > > - min(busiest->load_per_task, > > - busiest->avg_load - > > scaled_busy_load_per_task); > > - } > > - > > - /* Amount of load we'd add */ > > - if (busiest->avg_load * busiest->group_capacity < > > - busiest->load_per_task * SCHED_CAPACITY_SCALE) { > > - tmp = (busiest->avg_load * busiest->group_capacity) / > > - local->group_capacity; > > - } else { > > - tmp = (busiest->load_per_task * SCHED_CAPACITY_SCALE) / > > - local->group_capacity; > > - } > > - capa_move += local->group_capacity * > > - min(local->load_per_task, local->avg_load + tmp); > > - capa_move /= SCHED_CAPACITY_SCALE; > > - > > - /* Move if we gain throughput */ > > - if (capa_move > capa_now) > > - env->imbalance = busiest->load_per_task; > > -} > > - > > -/** > > * calculate_imbalance - Calculate the amount of imbalance present within > > the > > * groups of a given sched_domain during load balance. > > * @env: load balance environment > > Maybe strip this out first, in a separate patch. It's all magic doo-doo.
If I remove that 1st, we will have commit in the branch that might regress some performance temporarily and bisect might spot it while looking for a culprit, isn't it ?