From: Dave Hansen <dave.han...@linux.intel.com>

As noted by multiple reports:

        https://lkml.org/lkml/2014/9/15/1240
        https://lkml.org/lkml/2014/7/28/442

the sched domains code has some assumptions that break on newer
AMD and Intel CPUs.  Namely, the code assumes that NUMA node
boundaries always lie outside of a CPU package.  That assumption
is no longer true with Intel's Cluster-on-Die found in Haswell
CPUs (with a special BIOS config knob) and AMD's DCM feature.

Essentially, the 'cpu_core_map' is no longer suitable for
enumerating all the CPUs in a physical package.

This patch introduces a new map which is specifically built by
consulting the the physical package ids instead of inferring the
information from NUMA nodes.

This still leaves us with a broken 'core_siblings_list' in sysfs,
but a later patch will fix that up too.

Signed-off-by: Dave Hansen <dave.han...@linux.intel.com>
---

 b/arch/x86/include/asm/smp.h      |    6 ++++++
 b/arch/x86/include/asm/topology.h |    1 +
 b/arch/x86/kernel/smpboot.c       |   13 +++++++++++++
 b/arch/x86/xen/smp.c              |    1 +
 4 files changed, 21 insertions(+)

diff -puN arch/x86/include/asm/smp.h~introduce-package-sd-level 
arch/x86/include/asm/smp.h
--- a/arch/x86/include/asm/smp.h~introduce-package-sd-level     2014-09-17 
15:28:57.075552056 -0700
+++ b/arch/x86/include/asm/smp.h        2014-09-17 15:28:57.084552469 -0700
@@ -32,6 +32,7 @@ static inline bool cpu_has_ht_siblings(v
 
 DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
 DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
+DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_package_map);
 /* cpus sharing the last level cache: */
 DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
 DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
@@ -52,6 +53,11 @@ static inline struct cpumask *cpu_llc_sh
        return per_cpu(cpu_llc_shared_map, cpu);
 }
 
+static inline struct cpumask *cpu_package_mask(int cpu)
+{
+       return per_cpu(cpu_package_map, cpu);
+}
+
 DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
 DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
diff -puN arch/x86/include/asm/topology.h~introduce-package-sd-level 
arch/x86/include/asm/topology.h
--- a/arch/x86/include/asm/topology.h~introduce-package-sd-level        
2014-09-17 15:28:57.077552149 -0700
+++ b/arch/x86/include/asm/topology.h   2014-09-17 15:28:57.084552469 -0700
@@ -118,6 +118,7 @@ static inline void setup_node_to_cpumask
 #include <asm-generic/topology.h>
 
 extern const struct cpumask *cpu_coregroup_mask(int cpu);
+extern const struct cpumask *cpu_package_mask_func(int cpu);
 
 #define topology_physical_package_id(cpu)      (cpu_data(cpu).phys_proc_id)
 #define topology_core_id(cpu)                  (cpu_data(cpu).cpu_core_id)
diff -puN arch/x86/kernel/smpboot.c~introduce-package-sd-level 
arch/x86/kernel/smpboot.c
--- a/arch/x86/kernel/smpboot.c~introduce-package-sd-level      2014-09-17 
15:28:57.079552240 -0700
+++ b/arch/x86/kernel/smpboot.c 2014-09-17 15:28:57.085552515 -0700
@@ -98,6 +98,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
 
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_package_map);
+
 /* Per CPU bogomips and other parameters */
 DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
@@ -352,6 +354,13 @@ static bool match_mc(struct cpuinfo_x86
        return false;
 }
 
+static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+       if (c->phys_proc_id == o->phys_proc_id)
+               return true;
+       return false;
+}
+
 void set_cpu_sibling_map(int cpu)
 {
        bool has_smt = smp_num_siblings > 1;
@@ -365,6 +374,7 @@ void set_cpu_sibling_map(int cpu)
        if (!has_mp) {
                cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
                cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
+               cpumask_set_cpu(cpu, cpu_package_mask(cpu));
                cpumask_set_cpu(cpu, cpu_core_mask(cpu));
                c->booted_cores = 1;
                return;
@@ -410,6 +420,9 @@ void set_cpu_sibling_map(int cpu)
                        } else if (i != cpu && !c->booted_cores)
                                c->booted_cores = cpu_data(i).booted_cores;
                }
+               if ((i == cpu) || (has_mp && match_pkg(c, o))) {
+                       link_mask(package, cpu, i);
+               }
        }
 }
 
diff -puN arch/x86/xen/smp.c~introduce-package-sd-level arch/x86/xen/smp.c
--- a/arch/x86/xen/smp.c~introduce-package-sd-level     2014-09-17 
15:28:57.080552285 -0700
+++ b/arch/x86/xen/smp.c        2014-09-17 15:28:57.085552515 -0700
@@ -331,6 +331,7 @@ static void __init xen_smp_prepare_cpus(
                zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
                zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
                zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
+               zalloc_cpumask_var(&per_cpu(cpu_package_map, i), GFP_KERNEL);
        }
        set_cpu_sibling_map(0);
 
_
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to