cpu: Introduce cache model for SapphireRapids

Tejus GK Wed, 23 Apr 2025 21:55:06 -0700

On 23/04/25 5:16 PM, Zhao Liu wrote:

!-------------------------------------------------------------------|
   CAUTION: External Email


|-------------------------------------------------------------------!

Add the cache model to SapphireRapids (v4) to better emulate its
environment.

The cache model is based on SapphireRapids-SP (Scalable Performance):

       --- cache 0 ---
       cache type                         = data cache (1)
       cache level                        = 0x1 (1)
       self-initializing cache level      = true
       fully associative cache            = false
       maximum IDs for CPUs sharing cache = 0x1 (1)
       maximum IDs for cores in pkg       = 0x3f (63)
       system coherency line size         = 0x40 (64)
       physical line partitions           = 0x1 (1)
       ways of associativity              = 0xc (12)
       number of sets                     = 0x40 (64)
       WBINVD/INVD acts on lower caches   = false
       inclusive to lower caches          = false
       complex cache indexing             = false
       number of sets (s)                 = 64
       (size synth)                       = 49152 (48 KB)
       --- cache 1 ---
       cache type                         = instruction cache (2)
       cache level                        = 0x1 (1)
       self-initializing cache level      = true
       fully associative cache            = false
       maximum IDs for CPUs sharing cache = 0x1 (1)
       maximum IDs for cores in pkg       = 0x3f (63)
       system coherency line size         = 0x40 (64)
       physical line partitions           = 0x1 (1)
       ways of associativity              = 0x8 (8)
       number of sets                     = 0x40 (64)
       WBINVD/INVD acts on lower caches   = false
       inclusive to lower caches          = false
       complex cache indexing             = false
       number of sets (s)                 = 64
       (size synth)                       = 32768 (32 KB)
       --- cache 2 ---
       cache type                         = unified cache (3)
       cache level                        = 0x2 (2)
       self-initializing cache level      = true
       fully associative cache            = false
       maximum IDs for CPUs sharing cache = 0x1 (1)
       maximum IDs for cores in pkg       = 0x3f (63)
       system coherency line size         = 0x40 (64)
       physical line partitions           = 0x1 (1)
       ways of associativity              = 0x10 (16)
       number of sets                     = 0x800 (2048)
       WBINVD/INVD acts on lower caches   = false
       inclusive to lower caches          = false
       complex cache indexing             = false
       number of sets (s)                 = 2048
       (size synth)                       = 2097152 (2 MB)
       --- cache 3 ---
       cache type                         = unified cache (3)
       cache level                        = 0x3 (3)
       self-initializing cache level      = true
       fully associative cache            = false
       maximum IDs for CPUs sharing cache = 0x7f (127)
       maximum IDs for cores in pkg       = 0x3f (63)
       system coherency line size         = 0x40 (64)
       physical line partitions           = 0x1 (1)
       ways of associativity              = 0xf (15)
       number of sets                     = 0x10000 (65536)
       WBINVD/INVD acts on lower caches   = false
       inclusive to lower caches          = false
       complex cache indexing             = true
       number of sets (s)                 = 65536
       (size synth)                       = 62914560 (60 MB)
       --- cache 4 ---
       cache type                         = no more caches (0)

Suggested-by: Tejus GK <tejus...@nutanix.com>
Suggested-by: Jason Zeng <jason.z...@intel.com>
Suggested-by: "Daniel P . Berrangé" <berra...@redhat.com>
Signed-off-by: Zhao Liu <zhao1....@intel.com>
---
  target/i386/cpu.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++
  1 file changed, 96 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 00e4a8372c28..d90e048d48f2 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -2453,6 +2453,97 @@ static const CPUCaches epyc_genoa_cache_info = {
      },
  };

+static const CPUCaches xeon_spr_cache_info = {

+    .l1d_cache = &(CPUCacheInfo) {
+        // CPUID 0x4.0x0.EAX
+        .type = DATA_CACHE,
+        .level = 1,
+        .self_init = true,
+
+        // CPUID 0x4.0x0.EBX
+        .line_size = 64,
+        .partitions = 1,
+        .associativity = 12,
+
+        // CPUID 0x4.0x0.ECX
+        .sets = 64,
+
+        // CPUID 0x4.0x0.EDX
+        .no_invd_sharing = false,
+        .inclusive = false,
+        .complex_indexing = false,
+
+        .size = 48 * KiB,
+        .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+    },
+    .l1i_cache = &(CPUCacheInfo) {
+        // CPUID 0x4.0x1.EAX
+        .type = INSTRUCTION_CACHE,
+        .level = 1,
+        .self_init = true,
+
+        // CPUID 0x4.0x1.EBX
+        .line_size = 64,
+        .partitions = 1,
+        .associativity = 8,
+
+        // CPUID 0x4.0x1.ECX
+        .sets = 64,
+
+        // CPUID 0x4.0x1.EDX
+        .no_invd_sharing = false,
+        .inclusive = false,
+        .complex_indexing = false,
+
+        .size = 32 * KiB,
+        .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+    },
+    .l2_cache = &(CPUCacheInfo) {
+        // CPUID 0x4.0x2.EAX
+        .type = UNIFIED_CACHE,
+        .level = 2,
+        .self_init = true,
+
+        // CPUID 0x4.0x2.EBX
+        .line_size = 64,
+        .partitions = 1,
+        .associativity = 16,
+
+        // CPUID 0x4.0x2.ECX
+        .sets = 2048,
+
+        // CPUID 0x4.0x2.EDX
+        .no_invd_sharing = false,
+        .inclusive = false,
+        .complex_indexing = false,
+
+        .size = 2 * MiB,
+        .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+    },
+    .l3_cache = &(CPUCacheInfo) {
+        // CPUID 0x4.0x3.EAX
+        .type = UNIFIED_CACHE,
+        .level = 3,
+        .self_init = true,
+
+        // CPUID 0x4.0x3.EBX
+        .line_size = 64,
+        .partitions = 1,
+        .associativity = 15,
+
+        // CPUID 0x4.0x3.ECX
+        .sets = 65536,
+
+        // CPUID 0x4.0x3.EDX
+        .no_invd_sharing = false,
+        .inclusive = false,
+        .complex_indexing = true,
+
+        .size = 60 * MiB,
+        .share_level = CPU_TOPOLOGY_LEVEL_SOCKET,
+    },
+};
+
  static const CPUCaches xeon_gnr_cache_info = {
      .l1d_cache = &(CPUCacheInfo) {
          // CPUID 0x4.0x0.EAX
@@ -4455,6 +4546,11 @@ static const X86CPUDefinition builtin_x86_defs[] = {
                      { /* end of list */ }
                  }
              },
+            {
+                .version = 4,
+                .note = "with spr-sp cache model",
+                .cache_info = &xeon_spr_cache_info,
+            },
              { /* end of list */ }
          }
      },

Thank you for this improvement! I see that even within the SPR-SP lineof Processors, the cache sizes vary across different models. Whathappens for an instance when a processor only has 37.5 MiB of L3 persocket, but the CPU Model exposes 60 MiB of L3 to the VM?


regards,
Tejus

Re: [RFC 05/10] i386/cpu: Introduce cache model for SapphireRapids

Reply via email to