On Tue, Jul 14, 2020 at 10:06:19AM +0530, Srikar Dronamraju wrote: > Currently "CACHE" domain happens to be the 2nd sched domain as per > powerpc_topology. This domain will collapse if cpumask of l2-cache is > same as SMT domain. However we could generalize this domain such that it > could mean either be a "CACHE" domain or a "BIGCORE" domain. > > While setting up the "CACHE" domain, check if shared_cache is already > set. > > Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org> > Cc: Michael Ellerman <micha...@au1.ibm.com> > Cc: Nick Piggin <npig...@au1.ibm.com> > Cc: Oliver OHalloran <olive...@au1.ibm.com> > Cc: Nathan Lynch <nath...@linux.ibm.com> > Cc: Michael Neuling <mi...@linux.ibm.com> > Cc: Anton Blanchard <an...@au1.ibm.com> > Cc: Gautham R Shenoy <e...@linux.vnet.ibm.com> > Cc: Vaidyanathan Srinivasan <sva...@linux.ibm.com> > Signed-off-by: Srikar Dronamraju <sri...@linux.vnet.ibm.com> > --- > arch/powerpc/kernel/smp.c | 48 +++++++++++++++++++++++++++------------ > 1 file changed, 34 insertions(+), 14 deletions(-) > > diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c > index 875f57e41355..f8faf75135af 100644 > --- a/arch/powerpc/kernel/smp.c > +++ b/arch/powerpc/kernel/smp.c > @@ -85,6 +85,14 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); > EXPORT_PER_CPU_SYMBOL(cpu_core_map); > EXPORT_SYMBOL_GPL(has_big_cores); > > +enum { > +#ifdef CONFIG_SCHED_SMT > + smt_idx, > +#endif > + bigcore_idx, > + die_idx, > +}; > + > #define MAX_THREAD_LIST_SIZE 8 > #define THREAD_GROUP_SHARE_L1 1 > struct thread_groups { > @@ -851,13 +859,7 @@ static int powerpc_shared_cache_flags(void) > */ > static const struct cpumask *shared_cache_mask(int cpu) > { > - if (shared_caches) > - return cpu_l2_cache_mask(cpu); > - > - if (has_big_cores) > - return cpu_smallcore_mask(cpu); > - > - return cpu_smt_mask(cpu); > + return per_cpu(cpu_l2_cache_map, cpu); > } > > #ifdef CONFIG_SCHED_SMT > @@ -867,11 +869,16 @@ static const struct cpumask *smallcore_smt_mask(int cpu) > } > #endif > > +static const struct cpumask *cpu_bigcore_mask(int cpu) > +{ > + return cpu_core_mask(cpu);
It should be cpu_smt_mask() if we want the redundant big-core to be degenerated in favour of the SMT level on P8, no? Because cpu_core_mask refers to all the CPUs that are in the same chip. > +} > + > static struct sched_domain_topology_level powerpc_topology[] = { > #ifdef CONFIG_SCHED_SMT > { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, > #endif > - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, > + { cpu_bigcore_mask, SD_INIT_NAME(BIGCORE) }, > { cpu_cpu_mask, SD_INIT_NAME(DIE) }, > { NULL, }, > }; > @@ -895,7 +902,7 @@ static int init_big_cores(void) > > #ifdef CONFIG_SCHED_SMT > pr_info("Big cores detected. Using small core scheduling\n"); > - powerpc_topology[0].mask = smallcore_smt_mask; > + powerpc_topology[smt_idx].mask = smallcore_smt_mask; > #endif > > return 0; > @@ -1319,7 +1326,6 @@ static void add_cpu_to_masks(int cpu) > void start_secondary(void *unused) > { > unsigned int cpu = smp_processor_id(); > - struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; > > mmgrab(&init_mm); > current->active_mm = &init_mm; > @@ -1345,14 +1351,20 @@ void start_secondary(void *unused) > /* Update topology CPU masks */ > add_cpu_to_masks(cpu); > > - if (has_big_cores) > - sibling_mask = cpu_smallcore_mask; > /* > * Check for any shared caches. Note that this must be done on a > * per-core basis because one core in the pair might be disabled. > */ > - if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu))) > - shared_caches = true; > + if (!shared_caches) { > + struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; > + struct cpumask *mask = cpu_l2_cache_mask(cpu); > + > + if (has_big_cores) > + sibling_mask = cpu_smallcore_mask; > + > + if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu))) > + shared_caches = true; Shouldn't we use cpumask_subset() here ? > + } > > set_numa_node(numa_cpu_lookup_table[cpu]); > set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); > @@ -1390,6 +1402,14 @@ void __init smp_cpus_done(unsigned int max_cpus) > smp_ops->bringup_done(); > > dump_numa_cpu_topology(); > + if (shared_caches) { > + pr_info("Using shared cache scheduler topology\n"); > + powerpc_topology[bigcore_idx].mask = shared_cache_mask; > +#ifdef CONFIG_SCHED_DEBUG > + powerpc_topology[bigcore_idx].name = "CACHE"; > +#endif > + powerpc_topology[bigcore_idx].sd_flags = > powerpc_shared_cache_flags; > + } I would much rather that we have all the topology-fixups done in one function. fixup_topology(void) { if (has_big_core) powerpc_topology[smt_idx].mask = smallcore_smt_mask; if (shared_caches) { const char *name = "CACHE"; powerpc_topology[bigcore_idx].mask = shared_cache_mask; strlcpy(powerpc_topology[bigcore_idx].name, name, strlen(name)); powerpc_topology[bigcore_idx].sd_flags = powerpc_shared_cache_flags; } /* Any other changes to the topology structure here */ And also as an optimization, get rid of degenerate structures here itself so that we don't pay additional penalty while building the sched-domains each time. } > > set_sched_topology(powerpc_topology); > } > -- > 2.17.1 >