Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode CPUID[4]

Moger, Babu Fri, 04 Aug 2023 08:54:26 -0700

Hi Zhao,

On 8/4/23 04:48, Zhao Liu wrote:
> Hi Babu,
> 
> On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote:
>> Date: Thu, 3 Aug 2023 11:41:40 -0500
>> From: "Moger, Babu" <babu.mo...@amd.com>
>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
>>  CPUID[4]
>>
>> Hi Zhao,
>>
>> On 8/2/23 18:49, Moger, Babu wrote:
>>> Hi Zhao,
>>>
>>> Hitting this error after this patch.
>>>
>>> ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should
>>> not be reached
>>> Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code
>>> should not be reached
>>> Aborted (core dumped)
>>>
>>> Looks like share_level for all the caches for AMD is not initialized.
> 
> I missed these change when I rebase. Sorry for that.
> 
> BTW, could I ask a question? From a previous discussion[1], I understand
> that the cache info is used to show the correct cache information in
> new machine. And from [2], the wrong cache info may cause "compatibility
> issues".
> 
> Is this "compatibility issues" AMD specific? I'm not sure if Intel should
> update the cache info like that. thanks!


I was going to comment about that. Good that you asked me.

Compatibility is qemu requirement.  Otherwise the migrations will fail.

Any changes in the topology is going to cause migration problems.

I am not sure how you are going to handle this. You can probably look at
the feature "x-intel-pt-auto-level".

make sure to test the migration.

Thanks
Babu


> 
> [1]: 
> https://patchwork.kernel.org/project/kvm/patch/cy4pr12mb1768a3cbe42aafb03cb1081e95...@cy4pr12mb1768.namprd12.prod.outlook.com/
> [2]: 
> https://lore.kernel.org/qemu-devel/20180510204148.11687-1-babu.mo...@amd.com/
> 
>>
>> The following patch fixes the problem.
>>
>> ======================================================
>>
>>
>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
>> index f4c48e19fa..976a2755d8 100644
>> --- a/target/i386/cpu.c
>> +++ b/target/i386/cpu.c
>> @@ -528,6 +528,7 @@ static CPUCacheInfo legacy_l2_cache_cpuid2 = {
>>      .size = 2 * MiB,
>>      .line_size = 64,
>>      .associativity = 8,
>> +    .share_level = CPU_TOPO_LEVEL_CORE,
> 
> This "legacy_l2_cache_cpuid2" is not used to encode cache topology.
> I should explicitly set this default topo level as CPU_TOPO_LEVEL_UNKNOW.
> 
>>  };
>>
>>
>> @@ -1904,6 +1905,7 @@ static CPUCaches epyc_v4_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l1i_cache = &(CPUCacheInfo) {
>>          .type = INSTRUCTION_CACHE,
>> @@ -1916,6 +1918,7 @@ static CPUCaches epyc_v4_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l2_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -1926,6 +1929,7 @@ static CPUCaches epyc_v4_cache_info = {
>>          .partitions = 1,
>>          .sets = 1024,
>>          .lines_per_tag = 1,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l3_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -1939,6 +1943,7 @@ static CPUCaches epyc_v4_cache_info = {
>>          .self_init = true,
>>          .inclusive = true,
>>          .complex_indexing = false,
>> +        .share_level = CPU_TOPO_LEVEL_DIE,
>>      },
>>  };
>>
>> @@ -2008,6 +2013,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l1i_cache = &(CPUCacheInfo) {
>>          .type = INSTRUCTION_CACHE,
>> @@ -2020,6 +2026,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l2_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -2030,6 +2037,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
>>          .partitions = 1,
>>          .sets = 1024,
>>          .lines_per_tag = 1,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l3_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -2043,6 +2051,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
>>          .self_init = true,
>>          .inclusive = true,
>>          .complex_indexing = false,
>> +        .share_level = CPU_TOPO_LEVEL_DIE,
>>      },
>>  };
>>
>> @@ -2112,6 +2121,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l1i_cache = &(CPUCacheInfo) {
>>          .type = INSTRUCTION_CACHE,
>> @@ -2124,6 +2134,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l2_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -2134,6 +2145,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
>>          .partitions = 1,
>>          .sets = 1024,
>>          .lines_per_tag = 1,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l3_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -2147,6 +2159,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
>>          .self_init = true,
>>          .inclusive = true,
>>          .complex_indexing = false,
>> +        .share_level = CPU_TOPO_LEVEL_DIE,
>>      },
>>  };
>>
>> @@ -2162,6 +2175,7 @@ static const CPUCaches epyc_genoa_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l1i_cache = &(CPUCacheInfo) {
>>          .type = INSTRUCTION_CACHE,
>> @@ -2174,6 +2188,7 @@ static const CPUCaches epyc_genoa_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l2_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -2184,6 +2199,7 @@ static const CPUCaches epyc_genoa_cache_info = {
>>          .partitions = 1,
>>          .sets = 2048,
>>          .lines_per_tag = 1,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l3_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -2197,6 +2213,7 @@ static const CPUCaches epyc_genoa_cache_info = {
>>          .self_init = true,
>>          .inclusive = true,
>>          .complex_indexing = false,
>> +        .share_level = CPU_TOPO_LEVEL_DIE,
>>      },
>>  };
>>
>>
>> =========================================================================
> 
> 
> Look good to me except legacy_l2_cache_cpuid2, thanks very much!
> I'll add this in next version.
> 
> -Zhao
> 
>>
>> Thanks
>> Babu
>>>
>>> On 8/1/23 05:35, Zhao Liu wrote:
>>>> From: Zhao Liu <zhao1....@intel.com>
>>>>
>>>> CPUID[4].EAX[bits 25:14] is used to represent the cache topology for
>>>> intel CPUs.
>>>>
>>>> After cache models have topology information, we can use
>>>> CPUCacheInfo.share_level to decide which topology level to be encoded
>>>> into CPUID[4].EAX[bits 25:14].
>>>>
>>>> And since maximum_processor_id (original "num_apic_ids") is parsed
>>>> based on cpu topology levels, which are verified when parsing smp, it's
>>>> no need to check this value by "assert(num_apic_ids > 0)" again, so
>>>> remove this assert.
>>>>
>>>> Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a
>>>> helper to make the code cleaner.
>>>>
>>>> Signed-off-by: Zhao Liu <zhao1....@intel.com>
>>>> ---
>>>> Changes since v1:
>>>>  * Use "enum CPUTopoLevel share_level" as the parameter in
>>>>    max_processor_ids_for_cache().
>>>>  * Make cache_into_passthrough case also use
>>>>    max_processor_ids_for_cache() and max_core_ids_in_package() to
>>>>    encode CPUID[4]. (Yanan)
>>>>  * Rename the title of this patch (the original is "i386: Use
>>>>    CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]").
>>>> ---
>>>>  target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------
>>>>  1 file changed, 43 insertions(+), 27 deletions(-)
>>>>
>>>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
>>>> index 55aba4889628..c9897c0fe91a 100644
>>>> --- a/target/i386/cpu.c
>>>> +++ b/target/i386/cpu.c
>>>> @@ -234,22 +234,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo 
>>>> *cache)
>>>>                         ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \
>>>>                         0 /* Invalid value */)
>>>>  
>>>> +static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info,
>>>> +                                            enum CPUTopoLevel share_level)
>>>> +{
>>>> +    uint32_t num_ids = 0;
>>>> +
>>>> +    switch (share_level) {
>>>> +    case CPU_TOPO_LEVEL_CORE:
>>>> +        num_ids = 1 << apicid_core_offset(topo_info);
>>>> +        break;
>>>> +    case CPU_TOPO_LEVEL_DIE:
>>>> +        num_ids = 1 << apicid_die_offset(topo_info);
>>>> +        break;
>>>> +    case CPU_TOPO_LEVEL_PACKAGE:
>>>> +        num_ids = 1 << apicid_pkg_offset(topo_info);
>>>> +        break;
>>>> +    default:
>>>> +        /*
>>>> +         * Currently there is no use case for SMT and MODULE, so use
>>>> +         * assert directly to facilitate debugging.
>>>> +         */
>>>> +        g_assert_not_reached();
>>>> +    }
>>>> +
>>>> +    return num_ids - 1;
>>>> +}
>>>> +
>>>> +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info)
>>>> +{
>>>> +    uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) -
>>>> +                               apicid_core_offset(topo_info));
>>>> +    return num_cores - 1;
>>>> +}
>>>>  
>>>>  /* Encode cache info for CPUID[4] */
>>>>  static void encode_cache_cpuid4(CPUCacheInfo *cache,
>>>> -                                int num_apic_ids, int num_cores,
>>>> +                                X86CPUTopoInfo *topo_info,
>>>>                                  uint32_t *eax, uint32_t *ebx,
>>>>                                  uint32_t *ecx, uint32_t *edx)
>>>>  {
>>>>      assert(cache->size == cache->line_size * cache->associativity *
>>>>                            cache->partitions * cache->sets);
>>>>  
>>>> -    assert(num_apic_ids > 0);
>>>>      *eax = CACHE_TYPE(cache->type) |
>>>>             CACHE_LEVEL(cache->level) |
>>>>             (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) |
>>>> -           ((num_cores - 1) << 26) |
>>>> -           ((num_apic_ids - 1) << 14);
>>>> +           (max_core_ids_in_package(topo_info) << 26) |
>>>> +           (max_processor_ids_for_cache(topo_info, cache->share_level) << 
>>>> 14);
>>>>  
>>>>      assert(cache->line_size > 0);
>>>>      assert(cache->partitions > 0);
>>>> @@ -6116,56 +6147,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t 
>>>> index, uint32_t count,
>>>>                  int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
>>>>  
>>>>                  if (cores_per_pkg > 1) {
>>>> -                    int addressable_cores_offset =
>>>> -                                                
>>>> apicid_pkg_offset(&topo_info) -
>>>> -                                                
>>>> apicid_core_offset(&topo_info);
>>>> -
>>>>                      *eax &= ~0xFC000000;
>>>> -                    *eax |= (1 << addressable_cores_offset - 1) << 26;
>>>> +                    *eax |= max_core_ids_in_package(&topo_info) << 26;
>>>>                  }
>>>>                  if (host_vcpus_per_cache > cpus_per_pkg) {
>>>> -                    int pkg_offset = apicid_pkg_offset(&topo_info);
>>>> -
>>>>                      *eax &= ~0x3FFC000;
>>>> -                    *eax |= (1 << pkg_offset - 1) << 14;
>>>> +                    *eax |=
>>>> +                        max_processor_ids_for_cache(&topo_info,
>>>> +                                                CPU_TOPO_LEVEL_PACKAGE) 
>>>> << 14;
>>>>                  }
>>>>              }
>>>>          } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
>>>>              *eax = *ebx = *ecx = *edx = 0;
>>>>          } else {
>>>>              *eax = 0;
>>>> -            int addressable_cores_offset = apicid_pkg_offset(&topo_info) -
>>>> -                                           apicid_core_offset(&topo_info);
>>>> -            int core_offset, die_offset;
>>>>  
>>>>              switch (count) {
>>>>              case 0: /* L1 dcache info */
>>>> -                core_offset = apicid_core_offset(&topo_info);
>>>>                  encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache,
>>>> -                                    (1 << core_offset),
>>>> -                                    (1 << addressable_cores_offset),
>>>> +                                    &topo_info,
>>>>                                      eax, ebx, ecx, edx);
>>>>                  break;
>>>>              case 1: /* L1 icache info */
>>>> -                core_offset = apicid_core_offset(&topo_info);
>>>>                  encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache,
>>>> -                                    (1 << core_offset),
>>>> -                                    (1 << addressable_cores_offset),
>>>> +                                    &topo_info,
>>>>                                      eax, ebx, ecx, edx);
>>>>                  break;
>>>>              case 2: /* L2 cache info */
>>>> -                core_offset = apicid_core_offset(&topo_info);
>>>>                  encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache,
>>>> -                                    (1 << core_offset),
>>>> -                                    (1 << addressable_cores_offset),
>>>> +                                    &topo_info,
>>>>                                      eax, ebx, ecx, edx);
>>>>                  break;
>>>>              case 3: /* L3 cache info */
>>>> -                die_offset = apicid_die_offset(&topo_info);
>>>>                  if (cpu->enable_l3_cache) {
>>>>                      encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache,
>>>> -                                        (1 << die_offset),
>>>> -                                        (1 << addressable_cores_offset),
>>>> +                                        &topo_info,
>>>>                                          eax, ebx, ecx, edx);
>>>>                      break;
>>>>                  }
>>>
>>
>> -- 
>> Thanks
>> Babu Moger

-- 
Thanks
Babu Moger

Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode CPUID[4]

Reply via email to