Heya,
I have this rotting in my tree, since actually using it effectively is
way harder than it seems, anyhow, this correctly builds the topology in
amd64, we know 3 things about each cpu now:
- thread id (smt id)
- core id
- package id
This is not complete but is enough IMHO, it lacks x2apic detection.
I've tried to trim it up, but the mask logic is a bit cryptic.
obs: I left a print on dmesg just so that people can test, I intend to
remove if it goes in.
an atom d270 reports the following:
cpu0: smt 0, core 0, package 0
cpu1: smt 1, core 0, package 0
cpu2: smt 0, core 1, package 0
cpu3: smt 1, core 1, package 0
a core2duo L7500:
cpu0: smt 0, core 0, package 0
cpu1: smt 0, core 1, package 0
Do we want this ?
Index: arch/amd64/amd64/identcpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/identcpu.c,v
retrieving revision 1.36
diff -d -u -p -r1.36 identcpu.c
--- arch/amd64/amd64/identcpu.c 22 Apr 2012 19:36:09 -0000 1.36
+++ arch/amd64/amd64/identcpu.c 8 Jul 2012 09:03:02 -0000
@@ -446,4 +446,123 @@ identifycpu(struct cpu_info *ci)
sensordev_install(&ci->ci_sensordev);
#endif
}
+
+ cpu_topology(ci);
+}
+
+/*
+ * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
+ */
+static int
+log2(unsigned int i)
+{
+ int ret = 0;
+
+ while (i >>= 1)
+ ret++;
+
+ return (ret);
+}
+
+static int
+mask_width(u_int x)
+{
+ int bit;
+ int mask;
+ int powerof2;
+
+ powerof2 = ((x - 1) & x) == 0;
+ mask = (x << (1 - powerof2)) - 1;
+
+ /* fls */
+ if (mask == 0)
+ return (0);
+ for (bit = 1; mask != 1; bit++)
+ mask = (unsigned int)mask >> 1;
+
+ return (bit);
+}
+
+/*
+ * Build up cpu topology for given cpu, must run on the core itself.
+ */
+void
+cpu_topology(struct cpu_info *ci)
+{
+ u_int32_t eax, ebx, ecx, edx;
+ u_int32_t apicid, max_apicid, max_coreid;
+ u_int32_t smt_bits, core_bits, pkg_bits;
+ u_int32_t smt_mask, core_mask, pkg_mask;
+
+ /* We need at least apicid at CPUID 1 */
+ CPUID(0, eax, ebx, ecx, edx);
+ if (eax < 1)
+ goto no_topology;
+
+ /* Initial apicid */
+ CPUID(1, eax, ebx, ecx, edx);
+ apicid = (ebx >> 24) & 0xff;
+
+ if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
+ /* We need at least apicid at CPUID 0x80000008 */
+ CPUID(0x80000000, eax, ebx, ecx, edx);
+ if (eax < 0x80000008)
+ goto no_topology;
+
+ CPUID(0x80000008, eax, ebx, ecx, edx);
+ core_bits = (ecx >> 12) & 0xf;
+ if (core_bits == 0)
+ goto no_topology;
+ /* So coreidsize 2 gives 3, 3 gives 7... */
+ core_mask = (1 << core_bits) - 1;
+ /* Core id is the least significant considering mask */
+ ci->ci_core_id = apicid & core_mask;
+ /* Pkg id is the upper remaining bits */
+ ci->ci_pkg_id = apicid & ~core_mask;
+ ci->ci_pkg_id >>= core_bits;
+ } else if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
+ /* We only support leaf 1/4 detection */
+ CPUID(0, eax, ebx, ecx, edx);
+ if (eax < 4)
+ goto no_topology;
+ /* Get max_apicid */
+ CPUID(1, eax, ebx, ecx, edx);
+ max_apicid = (ebx >> 16) & 0xff;
+ /* Get max_coreid */
+ CPUID2(4, 0, eax, ebx, ecx, edx);
+ max_coreid = ((eax >> 26) & 0x3f) + 1;
+ /* SMT */
+ smt_bits = mask_width(max_apicid / max_coreid);
+ smt_mask = (1 << smt_bits) - 1;
+ /* Core */
+ core_bits = log2(max_coreid);
+ core_mask = (1 << (core_bits + smt_bits)) - 1;
+ core_mask ^= smt_mask;
+ /* Pkg */
+ pkg_bits = core_bits + smt_bits;
+ pkg_mask = -1 << core_bits;
+
+ ci->ci_smt_id = apicid & smt_mask;
+ ci->ci_core_id = (apicid & core_mask) >> smt_bits;
+ ci->ci_pkg_id = (apicid & pkg_mask) >> pkg_bits;
+ } else
+ goto no_topology;
+#ifdef DEBUG
+ printf("cpu%d: smt %u, core %u, pkg %u "
+ "(apicid 0x%x, max_apicid 0x%x, max_coreid 0x%x, smt_bits 0x%x,
smt_mask 0x%x, "
+ "core_bits 0x%x, core_mask 0x%x, pkg_bits 0x%x, pkg_mask 0x%x)\n",
+ ci->ci_cpuid, ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id,
+ apicid, max_apicid, max_coreid, smt_bits, smt_mask, core_bits,
+ core_mask, pkg_bits, pkg_mask);
+#else
+ printf("cpu%d: smt %u, core %u, package %u\n", ci->ci_cpuid,
+ ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id);
+
+#endif
+ return;
+ /* We can't map, so consider ci_core_id as ci_cpuid */
+no_topology:
+ ci->ci_smt_id = 0;
+ ci->ci_core_id = ci->ci_cpuid;
+ ci->ci_pkg_id = 0;
}
Index: arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.73
diff -d -u -p -r1.73 cpu.h
--- arch/amd64/include/cpu.h 17 Apr 2012 16:02:33 -0000 1.73
+++ arch/amd64/include/cpu.h 8 Jul 2012 07:48:10 -0000
@@ -100,7 +100,9 @@ struct cpu_info {
u_int32_t ci_model;
u_int32_t ci_cflushsz;
u_int64_t ci_tsc_freq;
-
+ u_int32_t ci_smt_id;
+ u_int32_t ci_core_id;
+ u_int32_t ci_pkg_id;
struct cpu_functions *ci_func;
void (*cpu_setup)(struct cpu_info *);
void (*ci_info)(struct cpu_info *);
@@ -266,6 +268,7 @@ extern int cpuspeed;
/* identcpu.c */
void identifycpu(struct cpu_info *);
int cpu_amd64speed(int *);
+void cpu_topology(struct cpu_info *);
/* machdep.c */
void dumpconf(void);
Index: arch/amd64/include/specialreg.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/specialreg.h,v
retrieving revision 1.21
diff -d -u -p -r1.21 specialreg.h
--- arch/amd64/include/specialreg.h 27 Mar 2012 05:59:46 -0000 1.21
+++ arch/amd64/include/specialreg.h 8 Jul 2012 07:46:34 -0000
@@ -187,11 +187,13 @@
#define CPUID2MODEL(cpuid) (((cpuid) >> 4) & 15)
#define CPUID2STEPPING(cpuid) ((cpuid) & 15)
-#define CPUID(code, eax, ebx, ecx, edx) \
+#define CPUID2(eax_code, ecx_code, eax, ebx, ecx, edx) \
__asm("cpuid" \
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
- : "a" (code));
+ : "a" (eax_code), "c" (ecx_code));
+#define CPUID(code, eax, ebx, ecx, edx) \
+ CPUID2(code, 0, eax, ebx, ecx, edx)
/*
* Model-specific registers for the i386 family