According to the Documentation/x86/topology.txt, AMD nomenclature for
package is NUMA node (or die). However, this is not the case on AMD
family17h multi-die processor platforms, which can have up to 4 dies
per socket as shown in the following system topology.

Die (Dx) View :
             ----------------------------
         C0  | T0 T1 |    ||    | T0 T1 | C4
             --------|    ||    |--------
         C1  | T0 T1 | L3 || L3 | T0 T1 | C5
             --------|    ||    |--------
         C2  | T0 T1 | #0 || #1 | T0 T1 | C6
             --------|    ||    |--------
         C3  | T0 T1 |    ||    | T0 T1 | C7
             ----------------------------

System View (with 2 socket) :
           --------------------
           |     -------------|------
           |     |            |     |
         ------------       ------------
         | D1 -- D0 |       | D7 -- D6 |
         | |  \/ |  |       | |  \/ |  |
 SOCKET0 | |  /\ |  |       | |  /\ |  | SOCKET1
         | D2 -- D3 |       | D4 -- D5 |
         ------------       ------------
           |     |            |     |
           ------|------------|     |
                 --------------------

Current logic interpretes package as socket (i.e. phys_proc_id is
socket id), which results in setting x86_has_numa_in_package, and omits
the DIE schedule domain. However, NUMA schedule domains are derived from
SRAT/SLIT, which assumes NUMA node is a die, and build NUMA schedule
domains on top of NUMA nodes. This results in incomplete schedule domains
as following:
    domain 0: SMT
    domain 1: MC       /* core complex w/ shared L3*/
    ---- Missing DIE level domain ----
    domain 2: NUMA     /* socket */
    domain 3: NUMA     /* platform */

Presenting package-as-die does not set x86_has_numa_in_package.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpa...@amd.com>
Signed-off-by: Leo Duran <leo.du...@amd.com>
Signed-off-by: Yazen Ghannam <yazen.ghan...@amd.com>
Cc: <sta...@vger.kernel.org> # v4.10+
---
 arch/x86/kernel/cpu/amd.c | 189 +++++++++++++++++++++++++++-------------------
 1 file changed, 112 insertions(+), 77 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bb5abe8..2f5869c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) "x86/AMD: " fmt
+
 #include <linux/export.h>
 #include <linux/bitops.h>
 #include <linux/elf.h>
@@ -32,6 +34,12 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, 
const int *erratum);
  */
 static u32 nodes_per_socket = 1;
 
+/*
+ * l3_num_threads_sharing: Stores the number of threads sharing L3 cache.
+ * Refer to CPUID_Fn8000001D_EAX_x03 [Cache Properties (L3)] NumSharingCache.
+ */
+static u32 l3_num_threads_sharing;
+
 static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
 {
        u32 gprs[8] = { 0 };
@@ -296,96 +304,122 @@ static int nearby_node(int apicid)
 }
 #endif
 
+#ifdef CONFIG_SMP
+
 /*
- * Fixup core topology information for
- * (1) AMD multi-node processors
+ * Per Documentation/x86/topology.c, the kernel works with
+ *  {packages, cores, threads}, and we will map:
+ *
+ *  thread  = core in compute-unit (CMT), or thread in core (SMT)
+ *  core    = compute-unit (CMT), or core (SMT)
+ *  package = node (die)
+ *
+ * Discover topology based on available information from CPUID first,
+ * and only derive them as needed.
+ *
+ * (1) phys_proc_id is die ID in AMD multi-die processors.
  *     Assumption: Number of cores in each internal node is the same.
- * (2) AMD processors supporting compute units
+ * (2) cpu_core_id is derived from either CPUID topology extension
+ *     or initial APIC_ID.
+ * (3) cpu_llc_id is either L3 or per-node
  */
-#ifdef CONFIG_SMP
 static void amd_get_topology(struct cpuinfo_x86 *c)
 {
-       u8 node_id;
        int cpu = smp_processor_id();
 
-       /* get information required for multi-node processors */
        if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
                u32 eax, ebx, ecx, edx;
 
                cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
 
-               node_id  = ecx & 0xff;
+               c->phys_proc_id = ecx & 0xff;
                smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
 
-               if (c->x86 == 0x15)
-                       c->cu_id = ebx & 0xff;
-
-               if (c->x86 >= 0x17) {
-                       c->cpu_core_id = ebx & 0xff;
-
-                       if (smp_num_siblings > 1)
-                               c->x86_max_cores /= smp_num_siblings;
-               }
+               /* Adjustment to get core per die */
+               c->x86_max_cores /= smp_num_siblings;
 
                /*
-                * We may have multiple LLCs if L3 caches exist, so check if we
-                * have an L3 cache by looking at the L3 cache CPUID leaf.
+                * For family15h/16h, this is ComputeUnitId per socket
+                * For family17h, this is CoreId per socket
                 */
+               c->cpu_core_id = (ebx & 0xff);
+
                if (cpuid_edx(0x80000006)) {
-                       if (c->x86 == 0x17) {
+                       cpuid_count(0x8000001d, 3, &eax, &ebx, &ecx, &edx);
+                       l3_num_threads_sharing = ((eax >> 14) & 0xfff) + 1;
+               }
+
+               if (c->x86 == 0x17) {
+                       /*
+                        * In family 17h, the CPUID_Fn8000001E_EBX[7:0] (CoreId)
+                        * is non-contiguous in down-coring and non-SMT cases.
+                        * This logic fixes up the cpu_core_id to be contiguous
+                        * for cores within the die.
+                        */
+                       u32 tmp = c->cpu_core_id;
+                       u32 die_offset, ccx_offset, cpu_offset;
+
+                       if (smp_num_siblings == 1) {
                                /*
-                                * LLC is at the core complex level.
-                                * Core complex id is ApicId[3].
+                                * For SMT-disabled case, the CoreId 
bit-encoding is
+                                * [7:4] : die
+                                * [3]   : ccx
+                                * [2:0] : core
                                 */
-                               per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
+                               die_offset = ((tmp >> 4) & 0xf) * 
c->x86_max_cores;
+                               ccx_offset = ((tmp >> 3) & 1) * 
l3_num_threads_sharing;
+                               cpu_offset = tmp & 7;
                        } else {
-                               /* LLC is at the node level. */
-                               per_cpu(cpu_llc_id, cpu) = node_id;
+                               /*
+                                * For SMT-enabled case, the CoreId 
bit-encoding is
+                                * [7:3] : die
+                                * [2]   : ccx
+                                * [1:0] : core
+                                */
+                               die_offset = ((tmp >> 3) & 0x1f) * 
c->x86_max_cores;
+                               ccx_offset = ((tmp >> 2) & 1) * 
l3_num_threads_sharing / smp_num_siblings;
+                               cpu_offset = tmp & 3;
                        }
+                       c->cpu_core_id = die_offset + ccx_offset + cpu_offset;
+                       pr_debug("Fixup CoreId:%#x to cpu_core_id:%#x\n", tmp, 
c->cpu_core_id);
                }
-       } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
-               u64 value;
+       } else {
+               if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
+                       u64 value;
 
-               rdmsrl(MSR_FAM10H_NODE_ID, value);
-               node_id = value & 7;
-
-               per_cpu(cpu_llc_id, cpu) = node_id;
-       } else
-               return;
-
-       /* fixup multi-node processor information */
-       if (nodes_per_socket > 1) {
-               u32 cus_per_node;
-
-               set_cpu_cap(c, X86_FEATURE_AMD_DCM);
-               cus_per_node = c->x86_max_cores / nodes_per_socket;
+                       /* Use MSR provided node ID */
+                       rdmsrl(MSR_FAM10H_NODE_ID, value);
+                       c->phys_proc_id = value & 7;
+               } else {
+                       /*
+                        * On older AMD dual core setup the lower
+                        * bits of the APIC id distinguish the cores.
+                        * Assumes number of cores is a power of two.
+                        */
+                       c->phys_proc_id = c->initial_apicid >> 
c->x86_coreid_bits;
+               }
 
-               /* core id has to be in the [0 .. cores_per_node - 1] range */
-               c->cpu_core_id %= cus_per_node;
+               /* Get core id from APIC */
+               c->cpu_core_id = c->initial_apicid & ((1 << c->x86_coreid_bits) 
- 1);
        }
-}
-#endif
 
-/*
- * On a AMD dual core setup the lower bits of the APIC id distinguish the 
cores.
- * Assumes number of cores is a power of two.
- */
-static void amd_detect_cmp(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_SMP
-       unsigned bits;
-       int cpu = smp_processor_id();
+       /* core id has to be in the [0 .. cores_per_die - 1] range */
+       c->cpu_core_id %= c->x86_max_cores;
 
-       bits = c->x86_coreid_bits;
-       /* Low order bits define the core id (index of core in socket) */
-       c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
-       /* Convert the initial APIC ID into the socket ID */
-       c->phys_proc_id = c->initial_apicid >> bits;
-       /* use socket ID also for last level cache */
+       /* Default LLC is at the die level. */
        per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
-       amd_get_topology(c);
-#endif
+
+       /*
+        * We may have multiple LLCs if L3 caches exist, so check if we
+        * have an L3 cache by looking at the L3 cache CPUID leaf.
+        * For family17h, LLC is at the core complex level.
+        * Core complex id is ApicId[3].
+        */
+       if (cpuid_edx(0x80000006) && c->x86 == 0x17)
+               per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
+
 }
+#endif
 
 u16 amd_get_nb_id(int cpu)
 {
@@ -412,7 +446,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
 
        node = numa_cpu_node(cpu);
        if (node == NUMA_NO_NODE)
-               node = per_cpu(cpu_llc_id, cpu);
+               node = c->phys_proc_id;
 
        /*
         * On multi-fabric platform (e.g. Numascale NumaChip) a
@@ -457,26 +491,23 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
 static void early_init_amd_mc(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
-       unsigned bits, ecx;
+       u32 threads_per_socket;
 
        /* Multi core CPU? */
        if (c->extended_cpuid_level < 0x80000008)
                return;
 
-       ecx = cpuid_ecx(0x80000008);
-
-       c->x86_max_cores = (ecx & 0xff) + 1;
-
-       /* CPU telling us the core id bits shift? */
-       bits = (ecx >> 12) & 0xF;
-
-       /* Otherwise recompute */
-       if (bits == 0) {
-               while ((1 << bits) < c->x86_max_cores)
-                       bits++;
-       }
+       /* Threads per socket */
+       threads_per_socket = (cpuid_ecx(0x80000008) & 0xff) + 1;
+       /* Thread per die */
+       c->x86_max_cores = threads_per_socket / nodes_per_socket;
 
-       c->x86_coreid_bits = bits;
+       /*
+        * This is per socket, and should only be used to decode APIC ID,
+        * which is needed on older systems where X86_FEATURE_TOPOEXT
+        * is not supported.
+        */
+       c->x86_coreid_bits = get_count_order(threads_per_socket);
 #endif
 }
 
@@ -765,11 +796,15 @@ static void init_amd(struct cpuinfo_x86 *c)
 
        cpu_detect_cache_sizes(c);
 
-       /* Multi core CPU? */
+#ifdef CONFIG_SMP
        if (c->extended_cpuid_level >= 0x80000008) {
-               amd_detect_cmp(c);
+               amd_get_topology(c);
                srat_detect_node(c);
        }
+#endif
+       /* Multi-die? */
+       if (nodes_per_socket > 1)
+               set_cpu_cap(c, X86_FEATURE_AMD_DCM);
 
 #ifdef CONFIG_X86_32
        detect_ht(c);
-- 
2.7.4

Reply via email to