On Mon, 25 Jan 2021 at 09:59, Mel Gorman <mgor...@techsingularity.net> wrote: > > From: Peter Zijlstra <pet...@infradead.org> > > From: Peter Zijlstra (Intel) <pet...@infradead.org> > > Both select_idle_core() and select_idle_cpu() do a loop over the same > cpumask. Observe that by clearing the already visited CPUs, we can > fold the iteration and iterate a core at a time. > > All we need to do is remember any non-idle CPU we encountered while > scanning for an idle core. This way we'll only iterate every CPU once. > > Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> > Signed-off-by: Mel Gorman <mgor...@techsingularity.net> > --- > kernel/sched/fair.c | 101 ++++++++++++++++++++++++++------------------ > 1 file changed, 61 insertions(+), 40 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index fe587350ea14..52a650aa2108 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -6006,6 +6006,14 @@ static inline int find_idlest_cpu(struct sched_domain > *sd, struct task_struct *p > return new_cpu; > } > > +static inline int __select_idle_cpu(int cpu) > +{ > + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) > + return cpu; > + > + return -1; > +} > + > #ifdef CONFIG_SCHED_SMT > DEFINE_STATIC_KEY_FALSE(sched_smt_present); > EXPORT_SYMBOL_GPL(sched_smt_present); > @@ -6064,48 +6072,51 @@ void __update_idle_core(struct rq *rq) > * there are no idle cores left in the system; tracked through > * sd_llc->shared->has_idle_cores and enabled through update_idle_core() > above. > */ > -static int select_idle_core(struct task_struct *p, struct sched_domain *sd, > int target) > +static int select_idle_core(struct task_struct *p, int core, struct cpumask > *cpus, int *idle_cpu) > { > - struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); > - int core, cpu; > + bool idle = true; > + int cpu; > > if (!static_branch_likely(&sched_smt_present)) > - return -1; > - > - if (!test_idle_cores(target, false)) > - return -1; > - > - cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); > + return __select_idle_cpu(core); > > - for_each_cpu_wrap(core, cpus, target) { > - bool idle = true; > - > - for_each_cpu(cpu, cpu_smt_mask(core)) { > - if (!available_idle_cpu(cpu)) { > - idle = false; > - break; > + for_each_cpu(cpu, cpu_smt_mask(core)) { > + if (!available_idle_cpu(cpu)) { > + idle = false; > + if (*idle_cpu == -1) { > + if (sched_idle_cpu(cpu) && > cpumask_test_cpu(cpu, p->cpus_ptr)) { > + *idle_cpu = cpu; > + break; > + } > + continue; > } > + break; > } > - > - if (idle) > - return core; > - > - cpumask_andnot(cpus, cpus, cpu_smt_mask(core)); > + if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr)) > + *idle_cpu = cpu; > } > > - /* > - * Failed to find an idle core; stop looking for one. > - */ > - set_idle_cores(target, 0); > + if (idle) > + return core; > > + cpumask_andnot(cpus, cpus, cpu_smt_mask(core)); > return -1; > } > > #else /* CONFIG_SCHED_SMT */ > > -static inline int select_idle_core(struct task_struct *p, struct > sched_domain *sd, int target) > +static inline void set_idle_cores(int cpu, int val) > { > - return -1; > +} > + > +static inline bool test_idle_cores(int cpu, bool def) > +{ > + return def; > +} > + > +static inline int select_idle_core(struct task_struct *p, int core, struct > cpumask *cpus, int *idle_cpu) > +{ > + return __select_idle_cpu(core); > } > > #endif /* CONFIG_SCHED_SMT */ > @@ -6118,10 +6129,11 @@ static inline int select_idle_core(struct task_struct > *p, struct sched_domain *s > static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, > int target) > { > struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); > + int i, cpu, idle_cpu = -1, nr = INT_MAX; > + bool smt = test_idle_cores(target, false); > + int this = smp_processor_id(); > struct sched_domain *this_sd; > u64 time; > - int this = smp_processor_id(); > - int cpu, nr = INT_MAX; > > this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); > if (!this_sd) > @@ -6129,7 +6141,7 @@ static int select_idle_cpu(struct task_struct *p, > struct sched_domain *sd, int t > > cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); > > - if (sched_feat(SIS_PROP)) { > + if (sched_feat(SIS_PROP) && !smt) { > u64 avg_cost, avg_idle, span_avg; > > /* > @@ -6149,18 +6161,31 @@ static int select_idle_cpu(struct task_struct *p, > struct sched_domain *sd, int t > } > > for_each_cpu_wrap(cpu, cpus, target) { > - if (!--nr) > - return -1; > - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) > - break; > + if (smt) { > + i = select_idle_core(p, cpu, cpus, &idle_cpu); > + if ((unsigned int)i < nr_cpumask_bits) > + return i; > + > + } else { > + if (!--nr) > + return -1; > + i = __select_idle_cpu(cpu);
you should use idle_cpu directly instead of this intermediate i variable + idle_cpu = __select_idle_cpu(cpu); + if ((unsigned int)idle_cpu < nr_cpumask_bits) + break; Apart ths small comment above, the patch looks good to me and I haven't any performance regression anymore > + if ((unsigned int)i < nr_cpumask_bits) { > + idle_cpu = i; > + break; > + } > + } > } > > - if (sched_feat(SIS_PROP)) { > + if (smt) > + set_idle_cores(this, false); > + > + if (sched_feat(SIS_PROP) && !smt) { > time = cpu_clock(this) - time; > update_avg(&this_sd->avg_scan_cost, time); > } > > - return cpu; > + return idle_cpu; > } > > /* > @@ -6289,10 +6314,6 @@ static int select_idle_sibling(struct task_struct *p, > int prev, int target) > if (!sd) > return target; > > - i = select_idle_core(p, sd, target); > - if ((unsigned)i < nr_cpumask_bits) > - return i; > - > i = select_idle_cpu(p, sd, target); > if ((unsigned)i < nr_cpumask_bits) > return i; > -- > 2.26.2 >