From: Dave Hansen <dave.han...@linux.intel.com> As noted by multiple reports:
https://lkml.org/lkml/2014/9/15/1240 https://lkml.org/lkml/2014/7/28/442 the sched domains code has some assumptions that break on newer AMD and Intel CPUs. Namely, the code assumes that NUMA node boundaries always lie outside of a CPU package. That assumption is no longer true with Intel's Cluster-on-Die found in Haswell CPUs (with a special BIOS config knob) and AMD's DCM feature. Essentially, the 'cpu_core_map' is no longer suitable for enumerating all the CPUs in a physical package. This patch introduces a new map which is specifically built by consulting the the physical package ids instead of inferring the information from NUMA nodes. This still leaves us with a broken 'core_siblings_list' in sysfs, but a later patch will fix that up too. Signed-off-by: Dave Hansen <dave.han...@linux.intel.com> --- b/arch/x86/include/asm/smp.h | 6 ++++++ b/arch/x86/include/asm/topology.h | 1 + b/arch/x86/kernel/smpboot.c | 13 +++++++++++++ b/arch/x86/xen/smp.c | 1 + 4 files changed, 21 insertions(+) diff -puN arch/x86/include/asm/smp.h~introduce-package-sd-level arch/x86/include/asm/smp.h --- a/arch/x86/include/asm/smp.h~introduce-package-sd-level 2014-09-17 15:28:57.075552056 -0700 +++ b/arch/x86/include/asm/smp.h 2014-09-17 15:28:57.084552469 -0700 @@ -32,6 +32,7 @@ static inline bool cpu_has_ht_siblings(v DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_package_map); /* cpus sharing the last level cache: */ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); @@ -52,6 +53,11 @@ static inline struct cpumask *cpu_llc_sh return per_cpu(cpu_llc_shared_map, cpu); } +static inline struct cpumask *cpu_package_mask(int cpu) +{ + return per_cpu(cpu_package_map, cpu); +} + DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) diff -puN arch/x86/include/asm/topology.h~introduce-package-sd-level arch/x86/include/asm/topology.h --- a/arch/x86/include/asm/topology.h~introduce-package-sd-level 2014-09-17 15:28:57.077552149 -0700 +++ b/arch/x86/include/asm/topology.h 2014-09-17 15:28:57.084552469 -0700 @@ -118,6 +118,7 @@ static inline void setup_node_to_cpumask #include <asm-generic/topology.h> extern const struct cpumask *cpu_coregroup_mask(int cpu); +extern const struct cpumask *cpu_package_mask_func(int cpu); #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) diff -puN arch/x86/kernel/smpboot.c~introduce-package-sd-level arch/x86/kernel/smpboot.c --- a/arch/x86/kernel/smpboot.c~introduce-package-sd-level 2014-09-17 15:28:57.079552240 -0700 +++ b/arch/x86/kernel/smpboot.c 2014-09-17 15:28:57.085552515 -0700 @@ -98,6 +98,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map); DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_package_map); + /* Per CPU bogomips and other parameters */ DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); @@ -352,6 +354,13 @@ static bool match_mc(struct cpuinfo_x86 return false; } +static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) +{ + if (c->phys_proc_id == o->phys_proc_id) + return true; + return false; +} + void set_cpu_sibling_map(int cpu) { bool has_smt = smp_num_siblings > 1; @@ -365,6 +374,7 @@ void set_cpu_sibling_map(int cpu) if (!has_mp) { cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); + cpumask_set_cpu(cpu, cpu_package_mask(cpu)); cpumask_set_cpu(cpu, cpu_core_mask(cpu)); c->booted_cores = 1; return; @@ -410,6 +420,9 @@ void set_cpu_sibling_map(int cpu) } else if (i != cpu && !c->booted_cores) c->booted_cores = cpu_data(i).booted_cores; } + if ((i == cpu) || (has_mp && match_pkg(c, o))) { + link_mask(package, cpu, i); + } } } diff -puN arch/x86/xen/smp.c~introduce-package-sd-level arch/x86/xen/smp.c --- a/arch/x86/xen/smp.c~introduce-package-sd-level 2014-09-17 15:28:57.080552285 -0700 +++ b/arch/x86/xen/smp.c 2014-09-17 15:28:57.085552515 -0700 @@ -331,6 +331,7 @@ static void __init xen_smp_prepare_cpus( zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(cpu_package_map, i), GFP_KERNEL); } set_cpu_sibling_map(0); _ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/