diff mbox series

[v3,14/17] i386: Use CPUCacheInfo.share_level to encode CPUID[4]

Message ID 20230801103527.397756-15-zhao1.liu@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Support smp.clusters for x86 | expand

Commit Message

Zhao Liu Aug. 1, 2023, 10:35 a.m. UTC
From: Zhao Liu <zhao1.liu@intel.com>

CPUID[4].EAX[bits 25:14] is used to represent the cache topology for
intel CPUs.

After cache models have topology information, we can use
CPUCacheInfo.share_level to decide which topology level to be encoded
into CPUID[4].EAX[bits 25:14].

And since maximum_processor_id (original "num_apic_ids") is parsed
based on cpu topology levels, which are verified when parsing smp, it's
no need to check this value by "assert(num_apic_ids > 0)" again, so
remove this assert.

Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a
helper to make the code cleaner.

Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
---
Changes since v1:
 * Use "enum CPUTopoLevel share_level" as the parameter in
   max_processor_ids_for_cache().
 * Make cache_into_passthrough case also use
   max_processor_ids_for_cache() and max_core_ids_in_package() to
   encode CPUID[4]. (Yanan)
 * Rename the title of this patch (the original is "i386: Use
   CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]").
---
 target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 27 deletions(-)

Comments

Moger, Babu Aug. 2, 2023, 11:49 p.m. UTC | #1
Hi Zhao,

Hitting this error after this patch.

ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should
not be reached
Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code
should not be reached
Aborted (core dumped)

Looks like share_level for all the caches for AMD is not initialized.

Thanks
Babu

On 8/1/23 05:35, Zhao Liu wrote:
> From: Zhao Liu <zhao1.liu@intel.com>
> 
> CPUID[4].EAX[bits 25:14] is used to represent the cache topology for
> intel CPUs.
> 
> After cache models have topology information, we can use
> CPUCacheInfo.share_level to decide which topology level to be encoded
> into CPUID[4].EAX[bits 25:14].
> 
> And since maximum_processor_id (original "num_apic_ids") is parsed
> based on cpu topology levels, which are verified when parsing smp, it's
> no need to check this value by "assert(num_apic_ids > 0)" again, so
> remove this assert.
> 
> Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a
> helper to make the code cleaner.
> 
> Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> ---
> Changes since v1:
>  * Use "enum CPUTopoLevel share_level" as the parameter in
>    max_processor_ids_for_cache().
>  * Make cache_into_passthrough case also use
>    max_processor_ids_for_cache() and max_core_ids_in_package() to
>    encode CPUID[4]. (Yanan)
>  * Rename the title of this patch (the original is "i386: Use
>    CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]").
> ---
>  target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------
>  1 file changed, 43 insertions(+), 27 deletions(-)
> 
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 55aba4889628..c9897c0fe91a 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -234,22 +234,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache)
>                         ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \
>                         0 /* Invalid value */)
>  
> +static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info,
> +                                            enum CPUTopoLevel share_level)
> +{
> +    uint32_t num_ids = 0;
> +
> +    switch (share_level) {
> +    case CPU_TOPO_LEVEL_CORE:
> +        num_ids = 1 << apicid_core_offset(topo_info);
> +        break;
> +    case CPU_TOPO_LEVEL_DIE:
> +        num_ids = 1 << apicid_die_offset(topo_info);
> +        break;
> +    case CPU_TOPO_LEVEL_PACKAGE:
> +        num_ids = 1 << apicid_pkg_offset(topo_info);
> +        break;
> +    default:
> +        /*
> +         * Currently there is no use case for SMT and MODULE, so use
> +         * assert directly to facilitate debugging.
> +         */
> +        g_assert_not_reached();
> +    }
> +
> +    return num_ids - 1;
> +}
> +
> +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info)
> +{
> +    uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) -
> +                               apicid_core_offset(topo_info));
> +    return num_cores - 1;
> +}
>  
>  /* Encode cache info for CPUID[4] */
>  static void encode_cache_cpuid4(CPUCacheInfo *cache,
> -                                int num_apic_ids, int num_cores,
> +                                X86CPUTopoInfo *topo_info,
>                                  uint32_t *eax, uint32_t *ebx,
>                                  uint32_t *ecx, uint32_t *edx)
>  {
>      assert(cache->size == cache->line_size * cache->associativity *
>                            cache->partitions * cache->sets);
>  
> -    assert(num_apic_ids > 0);
>      *eax = CACHE_TYPE(cache->type) |
>             CACHE_LEVEL(cache->level) |
>             (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) |
> -           ((num_cores - 1) << 26) |
> -           ((num_apic_ids - 1) << 14);
> +           (max_core_ids_in_package(topo_info) << 26) |
> +           (max_processor_ids_for_cache(topo_info, cache->share_level) << 14);
>  
>      assert(cache->line_size > 0);
>      assert(cache->partitions > 0);
> @@ -6116,56 +6147,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>                  int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
>  
>                  if (cores_per_pkg > 1) {
> -                    int addressable_cores_offset =
> -                                                apicid_pkg_offset(&topo_info) -
> -                                                apicid_core_offset(&topo_info);
> -
>                      *eax &= ~0xFC000000;
> -                    *eax |= (1 << addressable_cores_offset - 1) << 26;
> +                    *eax |= max_core_ids_in_package(&topo_info) << 26;
>                  }
>                  if (host_vcpus_per_cache > cpus_per_pkg) {
> -                    int pkg_offset = apicid_pkg_offset(&topo_info);
> -
>                      *eax &= ~0x3FFC000;
> -                    *eax |= (1 << pkg_offset - 1) << 14;
> +                    *eax |=
> +                        max_processor_ids_for_cache(&topo_info,
> +                                                CPU_TOPO_LEVEL_PACKAGE) << 14;
>                  }
>              }
>          } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
>              *eax = *ebx = *ecx = *edx = 0;
>          } else {
>              *eax = 0;
> -            int addressable_cores_offset = apicid_pkg_offset(&topo_info) -
> -                                           apicid_core_offset(&topo_info);
> -            int core_offset, die_offset;
>  
>              switch (count) {
>              case 0: /* L1 dcache info */
> -                core_offset = apicid_core_offset(&topo_info);
>                  encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache,
> -                                    (1 << core_offset),
> -                                    (1 << addressable_cores_offset),
> +                                    &topo_info,
>                                      eax, ebx, ecx, edx);
>                  break;
>              case 1: /* L1 icache info */
> -                core_offset = apicid_core_offset(&topo_info);
>                  encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache,
> -                                    (1 << core_offset),
> -                                    (1 << addressable_cores_offset),
> +                                    &topo_info,
>                                      eax, ebx, ecx, edx);
>                  break;
>              case 2: /* L2 cache info */
> -                core_offset = apicid_core_offset(&topo_info);
>                  encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache,
> -                                    (1 << core_offset),
> -                                    (1 << addressable_cores_offset),
> +                                    &topo_info,
>                                      eax, ebx, ecx, edx);
>                  break;
>              case 3: /* L3 cache info */
> -                die_offset = apicid_die_offset(&topo_info);
>                  if (cpu->enable_l3_cache) {
>                      encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache,
> -                                        (1 << die_offset),
> -                                        (1 << addressable_cores_offset),
> +                                        &topo_info,
>                                          eax, ebx, ecx, edx);
>                      break;
>                  }
Moger, Babu Aug. 3, 2023, 4:41 p.m. UTC | #2
Hi Zhao,

On 8/2/23 18:49, Moger, Babu wrote:
> Hi Zhao,
> 
> Hitting this error after this patch.
> 
> ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should
> not be reached
> Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code
> should not be reached
> Aborted (core dumped)
> 
> Looks like share_level for all the caches for AMD is not initialized.

The following patch fixes the problem.

======================================================


diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index f4c48e19fa..976a2755d8 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -528,6 +528,7 @@ static CPUCacheInfo legacy_l2_cache_cpuid2 = {
     .size = 2 * MiB,
     .line_size = 64,
     .associativity = 8,
+    .share_level = CPU_TOPO_LEVEL_CORE,
 };


@@ -1904,6 +1905,7 @@ static CPUCaches epyc_v4_cache_info = {
         .lines_per_tag = 1,
         .self_init = 1,
         .no_invd_sharing = true,
+        .share_level = CPU_TOPO_LEVEL_CORE,
     },
     .l1i_cache = &(CPUCacheInfo) {
         .type = INSTRUCTION_CACHE,
@@ -1916,6 +1918,7 @@ static CPUCaches epyc_v4_cache_info = {
         .lines_per_tag = 1,
         .self_init = 1,
         .no_invd_sharing = true,
+        .share_level = CPU_TOPO_LEVEL_CORE,
     },
     .l2_cache = &(CPUCacheInfo) {
         .type = UNIFIED_CACHE,
@@ -1926,6 +1929,7 @@ static CPUCaches epyc_v4_cache_info = {
         .partitions = 1,
         .sets = 1024,
         .lines_per_tag = 1,
+        .share_level = CPU_TOPO_LEVEL_CORE,
     },
     .l3_cache = &(CPUCacheInfo) {
         .type = UNIFIED_CACHE,
@@ -1939,6 +1943,7 @@ static CPUCaches epyc_v4_cache_info = {
         .self_init = true,
         .inclusive = true,
         .complex_indexing = false,
+        .share_level = CPU_TOPO_LEVEL_DIE,
     },
 };

@@ -2008,6 +2013,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
         .lines_per_tag = 1,
         .self_init = 1,
         .no_invd_sharing = true,
+        .share_level = CPU_TOPO_LEVEL_CORE,
     },
     .l1i_cache = &(CPUCacheInfo) {
         .type = INSTRUCTION_CACHE,
@@ -2020,6 +2026,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
         .lines_per_tag = 1,
         .self_init = 1,
         .no_invd_sharing = true,
+        .share_level = CPU_TOPO_LEVEL_CORE,
     },
     .l2_cache = &(CPUCacheInfo) {
         .type = UNIFIED_CACHE,
@@ -2030,6 +2037,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
         .partitions = 1,
         .sets = 1024,
         .lines_per_tag = 1,
+        .share_level = CPU_TOPO_LEVEL_CORE,
     },
     .l3_cache = &(CPUCacheInfo) {
         .type = UNIFIED_CACHE,
@@ -2043,6 +2051,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
         .self_init = true,
         .inclusive = true,
         .complex_indexing = false,
+        .share_level = CPU_TOPO_LEVEL_DIE,
     },
 };

@@ -2112,6 +2121,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
         .lines_per_tag = 1,
         .self_init = 1,
         .no_invd_sharing = true,
+        .share_level = CPU_TOPO_LEVEL_CORE,
     },
     .l1i_cache = &(CPUCacheInfo) {
         .type = INSTRUCTION_CACHE,
@@ -2124,6 +2134,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
         .lines_per_tag = 1,
         .self_init = 1,
         .no_invd_sharing = true,
+        .share_level = CPU_TOPO_LEVEL_CORE,
     },
     .l2_cache = &(CPUCacheInfo) {
         .type = UNIFIED_CACHE,
@@ -2134,6 +2145,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
         .partitions = 1,
         .sets = 1024,
         .lines_per_tag = 1,
+        .share_level = CPU_TOPO_LEVEL_CORE,
     },
     .l3_cache = &(CPUCacheInfo) {
         .type = UNIFIED_CACHE,
@@ -2147,6 +2159,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
         .self_init = true,
         .inclusive = true,
         .complex_indexing = false,
+        .share_level = CPU_TOPO_LEVEL_DIE,
     },
 };

@@ -2162,6 +2175,7 @@ static const CPUCaches epyc_genoa_cache_info = {
         .lines_per_tag = 1,
         .self_init = 1,
         .no_invd_sharing = true,
+        .share_level = CPU_TOPO_LEVEL_CORE,
     },
     .l1i_cache = &(CPUCacheInfo) {
         .type = INSTRUCTION_CACHE,
@@ -2174,6 +2188,7 @@ static const CPUCaches epyc_genoa_cache_info = {
         .lines_per_tag = 1,
         .self_init = 1,
         .no_invd_sharing = true,
+        .share_level = CPU_TOPO_LEVEL_CORE,
     },
     .l2_cache = &(CPUCacheInfo) {
         .type = UNIFIED_CACHE,
@@ -2184,6 +2199,7 @@ static const CPUCaches epyc_genoa_cache_info = {
         .partitions = 1,
         .sets = 2048,
         .lines_per_tag = 1,
+        .share_level = CPU_TOPO_LEVEL_CORE,
     },
     .l3_cache = &(CPUCacheInfo) {
         .type = UNIFIED_CACHE,
@@ -2197,6 +2213,7 @@ static const CPUCaches epyc_genoa_cache_info = {
         .self_init = true,
         .inclusive = true,
         .complex_indexing = false,
+        .share_level = CPU_TOPO_LEVEL_DIE,
     },
 };


=========================================================================

Thanks
Babu
> 
> On 8/1/23 05:35, Zhao Liu wrote:
>> From: Zhao Liu <zhao1.liu@intel.com>
>>
>> CPUID[4].EAX[bits 25:14] is used to represent the cache topology for
>> intel CPUs.
>>
>> After cache models have topology information, we can use
>> CPUCacheInfo.share_level to decide which topology level to be encoded
>> into CPUID[4].EAX[bits 25:14].
>>
>> And since maximum_processor_id (original "num_apic_ids") is parsed
>> based on cpu topology levels, which are verified when parsing smp, it's
>> no need to check this value by "assert(num_apic_ids > 0)" again, so
>> remove this assert.
>>
>> Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a
>> helper to make the code cleaner.
>>
>> Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
>> ---
>> Changes since v1:
>>  * Use "enum CPUTopoLevel share_level" as the parameter in
>>    max_processor_ids_for_cache().
>>  * Make cache_into_passthrough case also use
>>    max_processor_ids_for_cache() and max_core_ids_in_package() to
>>    encode CPUID[4]. (Yanan)
>>  * Rename the title of this patch (the original is "i386: Use
>>    CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]").
>> ---
>>  target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------
>>  1 file changed, 43 insertions(+), 27 deletions(-)
>>
>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
>> index 55aba4889628..c9897c0fe91a 100644
>> --- a/target/i386/cpu.c
>> +++ b/target/i386/cpu.c
>> @@ -234,22 +234,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache)
>>                         ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \
>>                         0 /* Invalid value */)
>>  
>> +static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info,
>> +                                            enum CPUTopoLevel share_level)
>> +{
>> +    uint32_t num_ids = 0;
>> +
>> +    switch (share_level) {
>> +    case CPU_TOPO_LEVEL_CORE:
>> +        num_ids = 1 << apicid_core_offset(topo_info);
>> +        break;
>> +    case CPU_TOPO_LEVEL_DIE:
>> +        num_ids = 1 << apicid_die_offset(topo_info);
>> +        break;
>> +    case CPU_TOPO_LEVEL_PACKAGE:
>> +        num_ids = 1 << apicid_pkg_offset(topo_info);
>> +        break;
>> +    default:
>> +        /*
>> +         * Currently there is no use case for SMT and MODULE, so use
>> +         * assert directly to facilitate debugging.
>> +         */
>> +        g_assert_not_reached();
>> +    }
>> +
>> +    return num_ids - 1;
>> +}
>> +
>> +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info)
>> +{
>> +    uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) -
>> +                               apicid_core_offset(topo_info));
>> +    return num_cores - 1;
>> +}
>>  
>>  /* Encode cache info for CPUID[4] */
>>  static void encode_cache_cpuid4(CPUCacheInfo *cache,
>> -                                int num_apic_ids, int num_cores,
>> +                                X86CPUTopoInfo *topo_info,
>>                                  uint32_t *eax, uint32_t *ebx,
>>                                  uint32_t *ecx, uint32_t *edx)
>>  {
>>      assert(cache->size == cache->line_size * cache->associativity *
>>                            cache->partitions * cache->sets);
>>  
>> -    assert(num_apic_ids > 0);
>>      *eax = CACHE_TYPE(cache->type) |
>>             CACHE_LEVEL(cache->level) |
>>             (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) |
>> -           ((num_cores - 1) << 26) |
>> -           ((num_apic_ids - 1) << 14);
>> +           (max_core_ids_in_package(topo_info) << 26) |
>> +           (max_processor_ids_for_cache(topo_info, cache->share_level) << 14);
>>  
>>      assert(cache->line_size > 0);
>>      assert(cache->partitions > 0);
>> @@ -6116,56 +6147,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>>                  int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
>>  
>>                  if (cores_per_pkg > 1) {
>> -                    int addressable_cores_offset =
>> -                                                apicid_pkg_offset(&topo_info) -
>> -                                                apicid_core_offset(&topo_info);
>> -
>>                      *eax &= ~0xFC000000;
>> -                    *eax |= (1 << addressable_cores_offset - 1) << 26;
>> +                    *eax |= max_core_ids_in_package(&topo_info) << 26;
>>                  }
>>                  if (host_vcpus_per_cache > cpus_per_pkg) {
>> -                    int pkg_offset = apicid_pkg_offset(&topo_info);
>> -
>>                      *eax &= ~0x3FFC000;
>> -                    *eax |= (1 << pkg_offset - 1) << 14;
>> +                    *eax |=
>> +                        max_processor_ids_for_cache(&topo_info,
>> +                                                CPU_TOPO_LEVEL_PACKAGE) << 14;
>>                  }
>>              }
>>          } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
>>              *eax = *ebx = *ecx = *edx = 0;
>>          } else {
>>              *eax = 0;
>> -            int addressable_cores_offset = apicid_pkg_offset(&topo_info) -
>> -                                           apicid_core_offset(&topo_info);
>> -            int core_offset, die_offset;
>>  
>>              switch (count) {
>>              case 0: /* L1 dcache info */
>> -                core_offset = apicid_core_offset(&topo_info);
>>                  encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache,
>> -                                    (1 << core_offset),
>> -                                    (1 << addressable_cores_offset),
>> +                                    &topo_info,
>>                                      eax, ebx, ecx, edx);
>>                  break;
>>              case 1: /* L1 icache info */
>> -                core_offset = apicid_core_offset(&topo_info);
>>                  encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache,
>> -                                    (1 << core_offset),
>> -                                    (1 << addressable_cores_offset),
>> +                                    &topo_info,
>>                                      eax, ebx, ecx, edx);
>>                  break;
>>              case 2: /* L2 cache info */
>> -                core_offset = apicid_core_offset(&topo_info);
>>                  encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache,
>> -                                    (1 << core_offset),
>> -                                    (1 << addressable_cores_offset),
>> +                                    &topo_info,
>>                                      eax, ebx, ecx, edx);
>>                  break;
>>              case 3: /* L3 cache info */
>> -                die_offset = apicid_die_offset(&topo_info);
>>                  if (cpu->enable_l3_cache) {
>>                      encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache,
>> -                                        (1 << die_offset),
>> -                                        (1 << addressable_cores_offset),
>> +                                        &topo_info,
>>                                          eax, ebx, ecx, edx);
>>                      break;
>>                  }
>
Zhao Liu Aug. 4, 2023, 9:48 a.m. UTC | #3
Hi Babu,

On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote:
> Date: Thu, 3 Aug 2023 11:41:40 -0500
> From: "Moger, Babu" <babu.moger@amd.com>
> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
>  CPUID[4]
> 
> Hi Zhao,
> 
> On 8/2/23 18:49, Moger, Babu wrote:
> > Hi Zhao,
> > 
> > Hitting this error after this patch.
> > 
> > ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should
> > not be reached
> > Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code
> > should not be reached
> > Aborted (core dumped)
> > 
> > Looks like share_level for all the caches for AMD is not initialized.

I missed these change when I rebase. Sorry for that.

BTW, could I ask a question? From a previous discussion[1], I understand
that the cache info is used to show the correct cache information in
new machine. And from [2], the wrong cache info may cause "compatibility
issues".

Is this "compatibility issues" AMD specific? I'm not sure if Intel should
update the cache info like that. thanks!

[1]: https://patchwork.kernel.org/project/kvm/patch/CY4PR12MB1768A3CBE42AAFB03CB1081E95AA0@CY4PR12MB1768.namprd12.prod.outlook.com/
[2]: https://lore.kernel.org/qemu-devel/20180510204148.11687-1-babu.moger@amd.com/

> 
> The following patch fixes the problem.
> 
> ======================================================
> 
> 
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index f4c48e19fa..976a2755d8 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -528,6 +528,7 @@ static CPUCacheInfo legacy_l2_cache_cpuid2 = {
>      .size = 2 * MiB,
>      .line_size = 64,
>      .associativity = 8,
> +    .share_level = CPU_TOPO_LEVEL_CORE,

This "legacy_l2_cache_cpuid2" is not used to encode cache topology.
I should explicitly set this default topo level as CPU_TOPO_LEVEL_UNKNOW.

>  };
> 
> 
> @@ -1904,6 +1905,7 @@ static CPUCaches epyc_v4_cache_info = {
>          .lines_per_tag = 1,
>          .self_init = 1,
>          .no_invd_sharing = true,
> +        .share_level = CPU_TOPO_LEVEL_CORE,
>      },
>      .l1i_cache = &(CPUCacheInfo) {
>          .type = INSTRUCTION_CACHE,
> @@ -1916,6 +1918,7 @@ static CPUCaches epyc_v4_cache_info = {
>          .lines_per_tag = 1,
>          .self_init = 1,
>          .no_invd_sharing = true,
> +        .share_level = CPU_TOPO_LEVEL_CORE,
>      },
>      .l2_cache = &(CPUCacheInfo) {
>          .type = UNIFIED_CACHE,
> @@ -1926,6 +1929,7 @@ static CPUCaches epyc_v4_cache_info = {
>          .partitions = 1,
>          .sets = 1024,
>          .lines_per_tag = 1,
> +        .share_level = CPU_TOPO_LEVEL_CORE,
>      },
>      .l3_cache = &(CPUCacheInfo) {
>          .type = UNIFIED_CACHE,
> @@ -1939,6 +1943,7 @@ static CPUCaches epyc_v4_cache_info = {
>          .self_init = true,
>          .inclusive = true,
>          .complex_indexing = false,
> +        .share_level = CPU_TOPO_LEVEL_DIE,
>      },
>  };
> 
> @@ -2008,6 +2013,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
>          .lines_per_tag = 1,
>          .self_init = 1,
>          .no_invd_sharing = true,
> +        .share_level = CPU_TOPO_LEVEL_CORE,
>      },
>      .l1i_cache = &(CPUCacheInfo) {
>          .type = INSTRUCTION_CACHE,
> @@ -2020,6 +2026,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
>          .lines_per_tag = 1,
>          .self_init = 1,
>          .no_invd_sharing = true,
> +        .share_level = CPU_TOPO_LEVEL_CORE,
>      },
>      .l2_cache = &(CPUCacheInfo) {
>          .type = UNIFIED_CACHE,
> @@ -2030,6 +2037,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
>          .partitions = 1,
>          .sets = 1024,
>          .lines_per_tag = 1,
> +        .share_level = CPU_TOPO_LEVEL_CORE,
>      },
>      .l3_cache = &(CPUCacheInfo) {
>          .type = UNIFIED_CACHE,
> @@ -2043,6 +2051,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
>          .self_init = true,
>          .inclusive = true,
>          .complex_indexing = false,
> +        .share_level = CPU_TOPO_LEVEL_DIE,
>      },
>  };
> 
> @@ -2112,6 +2121,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
>          .lines_per_tag = 1,
>          .self_init = 1,
>          .no_invd_sharing = true,
> +        .share_level = CPU_TOPO_LEVEL_CORE,
>      },
>      .l1i_cache = &(CPUCacheInfo) {
>          .type = INSTRUCTION_CACHE,
> @@ -2124,6 +2134,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
>          .lines_per_tag = 1,
>          .self_init = 1,
>          .no_invd_sharing = true,
> +        .share_level = CPU_TOPO_LEVEL_CORE,
>      },
>      .l2_cache = &(CPUCacheInfo) {
>          .type = UNIFIED_CACHE,
> @@ -2134,6 +2145,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
>          .partitions = 1,
>          .sets = 1024,
>          .lines_per_tag = 1,
> +        .share_level = CPU_TOPO_LEVEL_CORE,
>      },
>      .l3_cache = &(CPUCacheInfo) {
>          .type = UNIFIED_CACHE,
> @@ -2147,6 +2159,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
>          .self_init = true,
>          .inclusive = true,
>          .complex_indexing = false,
> +        .share_level = CPU_TOPO_LEVEL_DIE,
>      },
>  };
> 
> @@ -2162,6 +2175,7 @@ static const CPUCaches epyc_genoa_cache_info = {
>          .lines_per_tag = 1,
>          .self_init = 1,
>          .no_invd_sharing = true,
> +        .share_level = CPU_TOPO_LEVEL_CORE,
>      },
>      .l1i_cache = &(CPUCacheInfo) {
>          .type = INSTRUCTION_CACHE,
> @@ -2174,6 +2188,7 @@ static const CPUCaches epyc_genoa_cache_info = {
>          .lines_per_tag = 1,
>          .self_init = 1,
>          .no_invd_sharing = true,
> +        .share_level = CPU_TOPO_LEVEL_CORE,
>      },
>      .l2_cache = &(CPUCacheInfo) {
>          .type = UNIFIED_CACHE,
> @@ -2184,6 +2199,7 @@ static const CPUCaches epyc_genoa_cache_info = {
>          .partitions = 1,
>          .sets = 2048,
>          .lines_per_tag = 1,
> +        .share_level = CPU_TOPO_LEVEL_CORE,
>      },
>      .l3_cache = &(CPUCacheInfo) {
>          .type = UNIFIED_CACHE,
> @@ -2197,6 +2213,7 @@ static const CPUCaches epyc_genoa_cache_info = {
>          .self_init = true,
>          .inclusive = true,
>          .complex_indexing = false,
> +        .share_level = CPU_TOPO_LEVEL_DIE,
>      },
>  };
> 
> 
> =========================================================================


Look good to me except legacy_l2_cache_cpuid2, thanks very much!
I'll add this in next version.

-Zhao

> 
> Thanks
> Babu
> > 
> > On 8/1/23 05:35, Zhao Liu wrote:
> >> From: Zhao Liu <zhao1.liu@intel.com>
> >>
> >> CPUID[4].EAX[bits 25:14] is used to represent the cache topology for
> >> intel CPUs.
> >>
> >> After cache models have topology information, we can use
> >> CPUCacheInfo.share_level to decide which topology level to be encoded
> >> into CPUID[4].EAX[bits 25:14].
> >>
> >> And since maximum_processor_id (original "num_apic_ids") is parsed
> >> based on cpu topology levels, which are verified when parsing smp, it's
> >> no need to check this value by "assert(num_apic_ids > 0)" again, so
> >> remove this assert.
> >>
> >> Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a
> >> helper to make the code cleaner.
> >>
> >> Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> >> ---
> >> Changes since v1:
> >>  * Use "enum CPUTopoLevel share_level" as the parameter in
> >>    max_processor_ids_for_cache().
> >>  * Make cache_into_passthrough case also use
> >>    max_processor_ids_for_cache() and max_core_ids_in_package() to
> >>    encode CPUID[4]. (Yanan)
> >>  * Rename the title of this patch (the original is "i386: Use
> >>    CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]").
> >> ---
> >>  target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------
> >>  1 file changed, 43 insertions(+), 27 deletions(-)
> >>
> >> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> >> index 55aba4889628..c9897c0fe91a 100644
> >> --- a/target/i386/cpu.c
> >> +++ b/target/i386/cpu.c
> >> @@ -234,22 +234,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache)
> >>                         ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \
> >>                         0 /* Invalid value */)
> >>  
> >> +static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info,
> >> +                                            enum CPUTopoLevel share_level)
> >> +{
> >> +    uint32_t num_ids = 0;
> >> +
> >> +    switch (share_level) {
> >> +    case CPU_TOPO_LEVEL_CORE:
> >> +        num_ids = 1 << apicid_core_offset(topo_info);
> >> +        break;
> >> +    case CPU_TOPO_LEVEL_DIE:
> >> +        num_ids = 1 << apicid_die_offset(topo_info);
> >> +        break;
> >> +    case CPU_TOPO_LEVEL_PACKAGE:
> >> +        num_ids = 1 << apicid_pkg_offset(topo_info);
> >> +        break;
> >> +    default:
> >> +        /*
> >> +         * Currently there is no use case for SMT and MODULE, so use
> >> +         * assert directly to facilitate debugging.
> >> +         */
> >> +        g_assert_not_reached();
> >> +    }
> >> +
> >> +    return num_ids - 1;
> >> +}
> >> +
> >> +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info)
> >> +{
> >> +    uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) -
> >> +                               apicid_core_offset(topo_info));
> >> +    return num_cores - 1;
> >> +}
> >>  
> >>  /* Encode cache info for CPUID[4] */
> >>  static void encode_cache_cpuid4(CPUCacheInfo *cache,
> >> -                                int num_apic_ids, int num_cores,
> >> +                                X86CPUTopoInfo *topo_info,
> >>                                  uint32_t *eax, uint32_t *ebx,
> >>                                  uint32_t *ecx, uint32_t *edx)
> >>  {
> >>      assert(cache->size == cache->line_size * cache->associativity *
> >>                            cache->partitions * cache->sets);
> >>  
> >> -    assert(num_apic_ids > 0);
> >>      *eax = CACHE_TYPE(cache->type) |
> >>             CACHE_LEVEL(cache->level) |
> >>             (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) |
> >> -           ((num_cores - 1) << 26) |
> >> -           ((num_apic_ids - 1) << 14);
> >> +           (max_core_ids_in_package(topo_info) << 26) |
> >> +           (max_processor_ids_for_cache(topo_info, cache->share_level) << 14);
> >>  
> >>      assert(cache->line_size > 0);
> >>      assert(cache->partitions > 0);
> >> @@ -6116,56 +6147,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
> >>                  int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
> >>  
> >>                  if (cores_per_pkg > 1) {
> >> -                    int addressable_cores_offset =
> >> -                                                apicid_pkg_offset(&topo_info) -
> >> -                                                apicid_core_offset(&topo_info);
> >> -
> >>                      *eax &= ~0xFC000000;
> >> -                    *eax |= (1 << addressable_cores_offset - 1) << 26;
> >> +                    *eax |= max_core_ids_in_package(&topo_info) << 26;
> >>                  }
> >>                  if (host_vcpus_per_cache > cpus_per_pkg) {
> >> -                    int pkg_offset = apicid_pkg_offset(&topo_info);
> >> -
> >>                      *eax &= ~0x3FFC000;
> >> -                    *eax |= (1 << pkg_offset - 1) << 14;
> >> +                    *eax |=
> >> +                        max_processor_ids_for_cache(&topo_info,
> >> +                                                CPU_TOPO_LEVEL_PACKAGE) << 14;
> >>                  }
> >>              }
> >>          } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
> >>              *eax = *ebx = *ecx = *edx = 0;
> >>          } else {
> >>              *eax = 0;
> >> -            int addressable_cores_offset = apicid_pkg_offset(&topo_info) -
> >> -                                           apicid_core_offset(&topo_info);
> >> -            int core_offset, die_offset;
> >>  
> >>              switch (count) {
> >>              case 0: /* L1 dcache info */
> >> -                core_offset = apicid_core_offset(&topo_info);
> >>                  encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache,
> >> -                                    (1 << core_offset),
> >> -                                    (1 << addressable_cores_offset),
> >> +                                    &topo_info,
> >>                                      eax, ebx, ecx, edx);
> >>                  break;
> >>              case 1: /* L1 icache info */
> >> -                core_offset = apicid_core_offset(&topo_info);
> >>                  encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache,
> >> -                                    (1 << core_offset),
> >> -                                    (1 << addressable_cores_offset),
> >> +                                    &topo_info,
> >>                                      eax, ebx, ecx, edx);
> >>                  break;
> >>              case 2: /* L2 cache info */
> >> -                core_offset = apicid_core_offset(&topo_info);
> >>                  encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache,
> >> -                                    (1 << core_offset),
> >> -                                    (1 << addressable_cores_offset),
> >> +                                    &topo_info,
> >>                                      eax, ebx, ecx, edx);
> >>                  break;
> >>              case 3: /* L3 cache info */
> >> -                die_offset = apicid_die_offset(&topo_info);
> >>                  if (cpu->enable_l3_cache) {
> >>                      encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache,
> >> -                                        (1 << die_offset),
> >> -                                        (1 << addressable_cores_offset),
> >> +                                        &topo_info,
> >>                                          eax, ebx, ecx, edx);
> >>                      break;
> >>                  }
> > 
> 
> -- 
> Thanks
> Babu Moger
Moger, Babu Aug. 4, 2023, 3:48 p.m. UTC | #4
Hi Zhao,

On 8/4/23 04:48, Zhao Liu wrote:
> Hi Babu,
> 
> On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote:
>> Date: Thu, 3 Aug 2023 11:41:40 -0500
>> From: "Moger, Babu" <babu.moger@amd.com>
>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
>>  CPUID[4]
>>
>> Hi Zhao,
>>
>> On 8/2/23 18:49, Moger, Babu wrote:
>>> Hi Zhao,
>>>
>>> Hitting this error after this patch.
>>>
>>> ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should
>>> not be reached
>>> Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code
>>> should not be reached
>>> Aborted (core dumped)
>>>
>>> Looks like share_level for all the caches for AMD is not initialized.
> 
> I missed these change when I rebase. Sorry for that.
> 
> BTW, could I ask a question? From a previous discussion[1], I understand
> that the cache info is used to show the correct cache information in
> new machine. And from [2], the wrong cache info may cause "compatibility
> issues".
> 
> Is this "compatibility issues" AMD specific? I'm not sure if Intel should
> update the cache info like that. thanks!

I was going to comment about that. Good that you asked me.

Compatibility is qemu requirement.  Otherwise the migrations will fail.

Any changes in the topology is going to cause migration problems.

I am not sure how you are going to handle this. You can probably look at
the feature "x-intel-pt-auto-level".

make sure to test the migration.

Thanks
Babu


> 
> [1]: https://patchwork.kernel.org/project/kvm/patch/CY4PR12MB1768A3CBE42AAFB03CB1081E95AA0@CY4PR12MB1768.namprd12.prod.outlook.com/
> [2]: https://lore.kernel.org/qemu-devel/20180510204148.11687-1-babu.moger@amd.com/
> 
>>
>> The following patch fixes the problem.
>>
>> ======================================================
>>
>>
>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
>> index f4c48e19fa..976a2755d8 100644
>> --- a/target/i386/cpu.c
>> +++ b/target/i386/cpu.c
>> @@ -528,6 +528,7 @@ static CPUCacheInfo legacy_l2_cache_cpuid2 = {
>>      .size = 2 * MiB,
>>      .line_size = 64,
>>      .associativity = 8,
>> +    .share_level = CPU_TOPO_LEVEL_CORE,
> 
> This "legacy_l2_cache_cpuid2" is not used to encode cache topology.
> I should explicitly set this default topo level as CPU_TOPO_LEVEL_UNKNOW.
> 
>>  };
>>
>>
>> @@ -1904,6 +1905,7 @@ static CPUCaches epyc_v4_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l1i_cache = &(CPUCacheInfo) {
>>          .type = INSTRUCTION_CACHE,
>> @@ -1916,6 +1918,7 @@ static CPUCaches epyc_v4_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l2_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -1926,6 +1929,7 @@ static CPUCaches epyc_v4_cache_info = {
>>          .partitions = 1,
>>          .sets = 1024,
>>          .lines_per_tag = 1,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l3_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -1939,6 +1943,7 @@ static CPUCaches epyc_v4_cache_info = {
>>          .self_init = true,
>>          .inclusive = true,
>>          .complex_indexing = false,
>> +        .share_level = CPU_TOPO_LEVEL_DIE,
>>      },
>>  };
>>
>> @@ -2008,6 +2013,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l1i_cache = &(CPUCacheInfo) {
>>          .type = INSTRUCTION_CACHE,
>> @@ -2020,6 +2026,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l2_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -2030,6 +2037,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
>>          .partitions = 1,
>>          .sets = 1024,
>>          .lines_per_tag = 1,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l3_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -2043,6 +2051,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
>>          .self_init = true,
>>          .inclusive = true,
>>          .complex_indexing = false,
>> +        .share_level = CPU_TOPO_LEVEL_DIE,
>>      },
>>  };
>>
>> @@ -2112,6 +2121,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l1i_cache = &(CPUCacheInfo) {
>>          .type = INSTRUCTION_CACHE,
>> @@ -2124,6 +2134,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l2_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -2134,6 +2145,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
>>          .partitions = 1,
>>          .sets = 1024,
>>          .lines_per_tag = 1,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l3_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -2147,6 +2159,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
>>          .self_init = true,
>>          .inclusive = true,
>>          .complex_indexing = false,
>> +        .share_level = CPU_TOPO_LEVEL_DIE,
>>      },
>>  };
>>
>> @@ -2162,6 +2175,7 @@ static const CPUCaches epyc_genoa_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l1i_cache = &(CPUCacheInfo) {
>>          .type = INSTRUCTION_CACHE,
>> @@ -2174,6 +2188,7 @@ static const CPUCaches epyc_genoa_cache_info = {
>>          .lines_per_tag = 1,
>>          .self_init = 1,
>>          .no_invd_sharing = true,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l2_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -2184,6 +2199,7 @@ static const CPUCaches epyc_genoa_cache_info = {
>>          .partitions = 1,
>>          .sets = 2048,
>>          .lines_per_tag = 1,
>> +        .share_level = CPU_TOPO_LEVEL_CORE,
>>      },
>>      .l3_cache = &(CPUCacheInfo) {
>>          .type = UNIFIED_CACHE,
>> @@ -2197,6 +2213,7 @@ static const CPUCaches epyc_genoa_cache_info = {
>>          .self_init = true,
>>          .inclusive = true,
>>          .complex_indexing = false,
>> +        .share_level = CPU_TOPO_LEVEL_DIE,
>>      },
>>  };
>>
>>
>> =========================================================================
> 
> 
> Look good to me except legacy_l2_cache_cpuid2, thanks very much!
> I'll add this in next version.
> 
> -Zhao
> 
>>
>> Thanks
>> Babu
>>>
>>> On 8/1/23 05:35, Zhao Liu wrote:
>>>> From: Zhao Liu <zhao1.liu@intel.com>
>>>>
>>>> CPUID[4].EAX[bits 25:14] is used to represent the cache topology for
>>>> intel CPUs.
>>>>
>>>> After cache models have topology information, we can use
>>>> CPUCacheInfo.share_level to decide which topology level to be encoded
>>>> into CPUID[4].EAX[bits 25:14].
>>>>
>>>> And since maximum_processor_id (original "num_apic_ids") is parsed
>>>> based on cpu topology levels, which are verified when parsing smp, it's
>>>> no need to check this value by "assert(num_apic_ids > 0)" again, so
>>>> remove this assert.
>>>>
>>>> Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a
>>>> helper to make the code cleaner.
>>>>
>>>> Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
>>>> ---
>>>> Changes since v1:
>>>>  * Use "enum CPUTopoLevel share_level" as the parameter in
>>>>    max_processor_ids_for_cache().
>>>>  * Make cache_into_passthrough case also use
>>>>    max_processor_ids_for_cache() and max_core_ids_in_package() to
>>>>    encode CPUID[4]. (Yanan)
>>>>  * Rename the title of this patch (the original is "i386: Use
>>>>    CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]").
>>>> ---
>>>>  target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------
>>>>  1 file changed, 43 insertions(+), 27 deletions(-)
>>>>
>>>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
>>>> index 55aba4889628..c9897c0fe91a 100644
>>>> --- a/target/i386/cpu.c
>>>> +++ b/target/i386/cpu.c
>>>> @@ -234,22 +234,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache)
>>>>                         ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \
>>>>                         0 /* Invalid value */)
>>>>  
>>>> +static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info,
>>>> +                                            enum CPUTopoLevel share_level)
>>>> +{
>>>> +    uint32_t num_ids = 0;
>>>> +
>>>> +    switch (share_level) {
>>>> +    case CPU_TOPO_LEVEL_CORE:
>>>> +        num_ids = 1 << apicid_core_offset(topo_info);
>>>> +        break;
>>>> +    case CPU_TOPO_LEVEL_DIE:
>>>> +        num_ids = 1 << apicid_die_offset(topo_info);
>>>> +        break;
>>>> +    case CPU_TOPO_LEVEL_PACKAGE:
>>>> +        num_ids = 1 << apicid_pkg_offset(topo_info);
>>>> +        break;
>>>> +    default:
>>>> +        /*
>>>> +         * Currently there is no use case for SMT and MODULE, so use
>>>> +         * assert directly to facilitate debugging.
>>>> +         */
>>>> +        g_assert_not_reached();
>>>> +    }
>>>> +
>>>> +    return num_ids - 1;
>>>> +}
>>>> +
>>>> +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info)
>>>> +{
>>>> +    uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) -
>>>> +                               apicid_core_offset(topo_info));
>>>> +    return num_cores - 1;
>>>> +}
>>>>  
>>>>  /* Encode cache info for CPUID[4] */
>>>>  static void encode_cache_cpuid4(CPUCacheInfo *cache,
>>>> -                                int num_apic_ids, int num_cores,
>>>> +                                X86CPUTopoInfo *topo_info,
>>>>                                  uint32_t *eax, uint32_t *ebx,
>>>>                                  uint32_t *ecx, uint32_t *edx)
>>>>  {
>>>>      assert(cache->size == cache->line_size * cache->associativity *
>>>>                            cache->partitions * cache->sets);
>>>>  
>>>> -    assert(num_apic_ids > 0);
>>>>      *eax = CACHE_TYPE(cache->type) |
>>>>             CACHE_LEVEL(cache->level) |
>>>>             (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) |
>>>> -           ((num_cores - 1) << 26) |
>>>> -           ((num_apic_ids - 1) << 14);
>>>> +           (max_core_ids_in_package(topo_info) << 26) |
>>>> +           (max_processor_ids_for_cache(topo_info, cache->share_level) << 14);
>>>>  
>>>>      assert(cache->line_size > 0);
>>>>      assert(cache->partitions > 0);
>>>> @@ -6116,56 +6147,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>>>>                  int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
>>>>  
>>>>                  if (cores_per_pkg > 1) {
>>>> -                    int addressable_cores_offset =
>>>> -                                                apicid_pkg_offset(&topo_info) -
>>>> -                                                apicid_core_offset(&topo_info);
>>>> -
>>>>                      *eax &= ~0xFC000000;
>>>> -                    *eax |= (1 << addressable_cores_offset - 1) << 26;
>>>> +                    *eax |= max_core_ids_in_package(&topo_info) << 26;
>>>>                  }
>>>>                  if (host_vcpus_per_cache > cpus_per_pkg) {
>>>> -                    int pkg_offset = apicid_pkg_offset(&topo_info);
>>>> -
>>>>                      *eax &= ~0x3FFC000;
>>>> -                    *eax |= (1 << pkg_offset - 1) << 14;
>>>> +                    *eax |=
>>>> +                        max_processor_ids_for_cache(&topo_info,
>>>> +                                                CPU_TOPO_LEVEL_PACKAGE) << 14;
>>>>                  }
>>>>              }
>>>>          } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
>>>>              *eax = *ebx = *ecx = *edx = 0;
>>>>          } else {
>>>>              *eax = 0;
>>>> -            int addressable_cores_offset = apicid_pkg_offset(&topo_info) -
>>>> -                                           apicid_core_offset(&topo_info);
>>>> -            int core_offset, die_offset;
>>>>  
>>>>              switch (count) {
>>>>              case 0: /* L1 dcache info */
>>>> -                core_offset = apicid_core_offset(&topo_info);
>>>>                  encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache,
>>>> -                                    (1 << core_offset),
>>>> -                                    (1 << addressable_cores_offset),
>>>> +                                    &topo_info,
>>>>                                      eax, ebx, ecx, edx);
>>>>                  break;
>>>>              case 1: /* L1 icache info */
>>>> -                core_offset = apicid_core_offset(&topo_info);
>>>>                  encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache,
>>>> -                                    (1 << core_offset),
>>>> -                                    (1 << addressable_cores_offset),
>>>> +                                    &topo_info,
>>>>                                      eax, ebx, ecx, edx);
>>>>                  break;
>>>>              case 2: /* L2 cache info */
>>>> -                core_offset = apicid_core_offset(&topo_info);
>>>>                  encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache,
>>>> -                                    (1 << core_offset),
>>>> -                                    (1 << addressable_cores_offset),
>>>> +                                    &topo_info,
>>>>                                      eax, ebx, ecx, edx);
>>>>                  break;
>>>>              case 3: /* L3 cache info */
>>>> -                die_offset = apicid_die_offset(&topo_info);
>>>>                  if (cpu->enable_l3_cache) {
>>>>                      encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache,
>>>> -                                        (1 << die_offset),
>>>> -                                        (1 << addressable_cores_offset),
>>>> +                                        &topo_info,
>>>>                                          eax, ebx, ecx, edx);
>>>>                      break;
>>>>                  }
>>>
>>
>> -- 
>> Thanks
>> Babu Moger
Zhao Liu Aug. 14, 2023, 8:22 a.m. UTC | #5
Hi Babu,

On Fri, Aug 04, 2023 at 10:48:29AM -0500, Moger, Babu wrote:
> Date: Fri, 4 Aug 2023 10:48:29 -0500
> From: "Moger, Babu" <babu.moger@amd.com>
> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
>  CPUID[4]
> 
> Hi Zhao,
> 
> On 8/4/23 04:48, Zhao Liu wrote:
> > Hi Babu,
> > 
> > On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote:
> >> Date: Thu, 3 Aug 2023 11:41:40 -0500
> >> From: "Moger, Babu" <babu.moger@amd.com>
> >> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
> >>  CPUID[4]
> >>
> >> Hi Zhao,
> >>
> >> On 8/2/23 18:49, Moger, Babu wrote:
> >>> Hi Zhao,
> >>>
> >>> Hitting this error after this patch.
> >>>
> >>> ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should
> >>> not be reached
> >>> Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code
> >>> should not be reached
> >>> Aborted (core dumped)
> >>>
> >>> Looks like share_level for all the caches for AMD is not initialized.
> > 
> > I missed these change when I rebase. Sorry for that.
> > 
> > BTW, could I ask a question? From a previous discussion[1], I understand
> > that the cache info is used to show the correct cache information in
> > new machine. And from [2], the wrong cache info may cause "compatibility
> > issues".
> > 
> > Is this "compatibility issues" AMD specific? I'm not sure if Intel should
> > update the cache info like that. thanks!
> 
> I was going to comment about that. Good that you asked me.
> 
> Compatibility is qemu requirement.  Otherwise the migrations will fail.
> 
> Any changes in the topology is going to cause migration problems.

Could you please educate me more about the details of the "migration
problem"?

I didn't understand why it was causing the problem and wasn't sure if I
was missing any cases.

Thanks,
Zhao

> 
> I am not sure how you are going to handle this. You can probably look at
> the feature "x-intel-pt-auto-level".
> 
> make sure to test the migration.
> 
> Thanks
> Babu
> 
> 
> > 
> > [1]: https://patchwork.kernel.org/project/kvm/patch/CY4PR12MB1768A3CBE42AAFB03CB1081E95AA0@CY4PR12MB1768.namprd12.prod.outlook.com/
> > [2]: https://lore.kernel.org/qemu-devel/20180510204148.11687-1-babu.moger@amd.com/
> > 
> >>
> >> The following patch fixes the problem.
> >>
> >> ======================================================
> >>
> >>
> >> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> >> index f4c48e19fa..976a2755d8 100644
> >> --- a/target/i386/cpu.c
> >> +++ b/target/i386/cpu.c
> >> @@ -528,6 +528,7 @@ static CPUCacheInfo legacy_l2_cache_cpuid2 = {
> >>      .size = 2 * MiB,
> >>      .line_size = 64,
> >>      .associativity = 8,
> >> +    .share_level = CPU_TOPO_LEVEL_CORE,
> > 
> > This "legacy_l2_cache_cpuid2" is not used to encode cache topology.
> > I should explicitly set this default topo level as CPU_TOPO_LEVEL_UNKNOW.
> > 
> >>  };
> >>
> >>
> >> @@ -1904,6 +1905,7 @@ static CPUCaches epyc_v4_cache_info = {
> >>          .lines_per_tag = 1,
> >>          .self_init = 1,
> >>          .no_invd_sharing = true,
> >> +        .share_level = CPU_TOPO_LEVEL_CORE,
> >>      },
> >>      .l1i_cache = &(CPUCacheInfo) {
> >>          .type = INSTRUCTION_CACHE,
> >> @@ -1916,6 +1918,7 @@ static CPUCaches epyc_v4_cache_info = {
> >>          .lines_per_tag = 1,
> >>          .self_init = 1,
> >>          .no_invd_sharing = true,
> >> +        .share_level = CPU_TOPO_LEVEL_CORE,
> >>      },
> >>      .l2_cache = &(CPUCacheInfo) {
> >>          .type = UNIFIED_CACHE,
> >> @@ -1926,6 +1929,7 @@ static CPUCaches epyc_v4_cache_info = {
> >>          .partitions = 1,
> >>          .sets = 1024,
> >>          .lines_per_tag = 1,
> >> +        .share_level = CPU_TOPO_LEVEL_CORE,
> >>      },
> >>      .l3_cache = &(CPUCacheInfo) {
> >>          .type = UNIFIED_CACHE,
> >> @@ -1939,6 +1943,7 @@ static CPUCaches epyc_v4_cache_info = {
> >>          .self_init = true,
> >>          .inclusive = true,
> >>          .complex_indexing = false,
> >> +        .share_level = CPU_TOPO_LEVEL_DIE,
> >>      },
> >>  };
> >>
> >> @@ -2008,6 +2013,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
> >>          .lines_per_tag = 1,
> >>          .self_init = 1,
> >>          .no_invd_sharing = true,
> >> +        .share_level = CPU_TOPO_LEVEL_CORE,
> >>      },
> >>      .l1i_cache = &(CPUCacheInfo) {
> >>          .type = INSTRUCTION_CACHE,
> >> @@ -2020,6 +2026,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
> >>          .lines_per_tag = 1,
> >>          .self_init = 1,
> >>          .no_invd_sharing = true,
> >> +        .share_level = CPU_TOPO_LEVEL_CORE,
> >>      },
> >>      .l2_cache = &(CPUCacheInfo) {
> >>          .type = UNIFIED_CACHE,
> >> @@ -2030,6 +2037,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
> >>          .partitions = 1,
> >>          .sets = 1024,
> >>          .lines_per_tag = 1,
> >> +        .share_level = CPU_TOPO_LEVEL_CORE,
> >>      },
> >>      .l3_cache = &(CPUCacheInfo) {
> >>          .type = UNIFIED_CACHE,
> >> @@ -2043,6 +2051,7 @@ static const CPUCaches epyc_rome_v3_cache_info = {
> >>          .self_init = true,
> >>          .inclusive = true,
> >>          .complex_indexing = false,
> >> +        .share_level = CPU_TOPO_LEVEL_DIE,
> >>      },
> >>  };
> >>
> >> @@ -2112,6 +2121,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
> >>          .lines_per_tag = 1,
> >>          .self_init = 1,
> >>          .no_invd_sharing = true,
> >> +        .share_level = CPU_TOPO_LEVEL_CORE,
> >>      },
> >>      .l1i_cache = &(CPUCacheInfo) {
> >>          .type = INSTRUCTION_CACHE,
> >> @@ -2124,6 +2134,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
> >>          .lines_per_tag = 1,
> >>          .self_init = 1,
> >>          .no_invd_sharing = true,
> >> +        .share_level = CPU_TOPO_LEVEL_CORE,
> >>      },
> >>      .l2_cache = &(CPUCacheInfo) {
> >>          .type = UNIFIED_CACHE,
> >> @@ -2134,6 +2145,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
> >>          .partitions = 1,
> >>          .sets = 1024,
> >>          .lines_per_tag = 1,
> >> +        .share_level = CPU_TOPO_LEVEL_CORE,
> >>      },
> >>      .l3_cache = &(CPUCacheInfo) {
> >>          .type = UNIFIED_CACHE,
> >> @@ -2147,6 +2159,7 @@ static const CPUCaches epyc_milan_v2_cache_info = {
> >>          .self_init = true,
> >>          .inclusive = true,
> >>          .complex_indexing = false,
> >> +        .share_level = CPU_TOPO_LEVEL_DIE,
> >>      },
> >>  };
> >>
> >> @@ -2162,6 +2175,7 @@ static const CPUCaches epyc_genoa_cache_info = {
> >>          .lines_per_tag = 1,
> >>          .self_init = 1,
> >>          .no_invd_sharing = true,
> >> +        .share_level = CPU_TOPO_LEVEL_CORE,
> >>      },
> >>      .l1i_cache = &(CPUCacheInfo) {
> >>          .type = INSTRUCTION_CACHE,
> >> @@ -2174,6 +2188,7 @@ static const CPUCaches epyc_genoa_cache_info = {
> >>          .lines_per_tag = 1,
> >>          .self_init = 1,
> >>          .no_invd_sharing = true,
> >> +        .share_level = CPU_TOPO_LEVEL_CORE,
> >>      },
> >>      .l2_cache = &(CPUCacheInfo) {
> >>          .type = UNIFIED_CACHE,
> >> @@ -2184,6 +2199,7 @@ static const CPUCaches epyc_genoa_cache_info = {
> >>          .partitions = 1,
> >>          .sets = 2048,
> >>          .lines_per_tag = 1,
> >> +        .share_level = CPU_TOPO_LEVEL_CORE,
> >>      },
> >>      .l3_cache = &(CPUCacheInfo) {
> >>          .type = UNIFIED_CACHE,
> >> @@ -2197,6 +2213,7 @@ static const CPUCaches epyc_genoa_cache_info = {
> >>          .self_init = true,
> >>          .inclusive = true,
> >>          .complex_indexing = false,
> >> +        .share_level = CPU_TOPO_LEVEL_DIE,
> >>      },
> >>  };
> >>
> >>
> >> =========================================================================
> > 
> > 
> > Look good to me except legacy_l2_cache_cpuid2, thanks very much!
> > I'll add this in next version.
> > 
> > -Zhao
> > 
> >>
> >> Thanks
> >> Babu
> >>>
> >>> On 8/1/23 05:35, Zhao Liu wrote:
> >>>> From: Zhao Liu <zhao1.liu@intel.com>
> >>>>
> >>>> CPUID[4].EAX[bits 25:14] is used to represent the cache topology for
> >>>> intel CPUs.
> >>>>
> >>>> After cache models have topology information, we can use
> >>>> CPUCacheInfo.share_level to decide which topology level to be encoded
> >>>> into CPUID[4].EAX[bits 25:14].
> >>>>
> >>>> And since maximum_processor_id (original "num_apic_ids") is parsed
> >>>> based on cpu topology levels, which are verified when parsing smp, it's
> >>>> no need to check this value by "assert(num_apic_ids > 0)" again, so
> >>>> remove this assert.
> >>>>
> >>>> Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a
> >>>> helper to make the code cleaner.
> >>>>
> >>>> Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> >>>> ---
> >>>> Changes since v1:
> >>>>  * Use "enum CPUTopoLevel share_level" as the parameter in
> >>>>    max_processor_ids_for_cache().
> >>>>  * Make cache_into_passthrough case also use
> >>>>    max_processor_ids_for_cache() and max_core_ids_in_package() to
> >>>>    encode CPUID[4]. (Yanan)
> >>>>  * Rename the title of this patch (the original is "i386: Use
> >>>>    CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]").
> >>>> ---
> >>>>  target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------
> >>>>  1 file changed, 43 insertions(+), 27 deletions(-)
> >>>>
> >>>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> >>>> index 55aba4889628..c9897c0fe91a 100644
> >>>> --- a/target/i386/cpu.c
> >>>> +++ b/target/i386/cpu.c
> >>>> @@ -234,22 +234,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache)
> >>>>                         ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \
> >>>>                         0 /* Invalid value */)
> >>>>  
> >>>> +static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info,
> >>>> +                                            enum CPUTopoLevel share_level)
> >>>> +{
> >>>> +    uint32_t num_ids = 0;
> >>>> +
> >>>> +    switch (share_level) {
> >>>> +    case CPU_TOPO_LEVEL_CORE:
> >>>> +        num_ids = 1 << apicid_core_offset(topo_info);
> >>>> +        break;
> >>>> +    case CPU_TOPO_LEVEL_DIE:
> >>>> +        num_ids = 1 << apicid_die_offset(topo_info);
> >>>> +        break;
> >>>> +    case CPU_TOPO_LEVEL_PACKAGE:
> >>>> +        num_ids = 1 << apicid_pkg_offset(topo_info);
> >>>> +        break;
> >>>> +    default:
> >>>> +        /*
> >>>> +         * Currently there is no use case for SMT and MODULE, so use
> >>>> +         * assert directly to facilitate debugging.
> >>>> +         */
> >>>> +        g_assert_not_reached();
> >>>> +    }
> >>>> +
> >>>> +    return num_ids - 1;
> >>>> +}
> >>>> +
> >>>> +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info)
> >>>> +{
> >>>> +    uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) -
> >>>> +                               apicid_core_offset(topo_info));
> >>>> +    return num_cores - 1;
> >>>> +}
> >>>>  
> >>>>  /* Encode cache info for CPUID[4] */
> >>>>  static void encode_cache_cpuid4(CPUCacheInfo *cache,
> >>>> -                                int num_apic_ids, int num_cores,
> >>>> +                                X86CPUTopoInfo *topo_info,
> >>>>                                  uint32_t *eax, uint32_t *ebx,
> >>>>                                  uint32_t *ecx, uint32_t *edx)
> >>>>  {
> >>>>      assert(cache->size == cache->line_size * cache->associativity *
> >>>>                            cache->partitions * cache->sets);
> >>>>  
> >>>> -    assert(num_apic_ids > 0);
> >>>>      *eax = CACHE_TYPE(cache->type) |
> >>>>             CACHE_LEVEL(cache->level) |
> >>>>             (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) |
> >>>> -           ((num_cores - 1) << 26) |
> >>>> -           ((num_apic_ids - 1) << 14);
> >>>> +           (max_core_ids_in_package(topo_info) << 26) |
> >>>> +           (max_processor_ids_for_cache(topo_info, cache->share_level) << 14);
> >>>>  
> >>>>      assert(cache->line_size > 0);
> >>>>      assert(cache->partitions > 0);
> >>>> @@ -6116,56 +6147,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
> >>>>                  int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
> >>>>  
> >>>>                  if (cores_per_pkg > 1) {
> >>>> -                    int addressable_cores_offset =
> >>>> -                                                apicid_pkg_offset(&topo_info) -
> >>>> -                                                apicid_core_offset(&topo_info);
> >>>> -
> >>>>                      *eax &= ~0xFC000000;
> >>>> -                    *eax |= (1 << addressable_cores_offset - 1) << 26;
> >>>> +                    *eax |= max_core_ids_in_package(&topo_info) << 26;
> >>>>                  }
> >>>>                  if (host_vcpus_per_cache > cpus_per_pkg) {
> >>>> -                    int pkg_offset = apicid_pkg_offset(&topo_info);
> >>>> -
> >>>>                      *eax &= ~0x3FFC000;
> >>>> -                    *eax |= (1 << pkg_offset - 1) << 14;
> >>>> +                    *eax |=
> >>>> +                        max_processor_ids_for_cache(&topo_info,
> >>>> +                                                CPU_TOPO_LEVEL_PACKAGE) << 14;
> >>>>                  }
> >>>>              }
> >>>>          } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
> >>>>              *eax = *ebx = *ecx = *edx = 0;
> >>>>          } else {
> >>>>              *eax = 0;
> >>>> -            int addressable_cores_offset = apicid_pkg_offset(&topo_info) -
> >>>> -                                           apicid_core_offset(&topo_info);
> >>>> -            int core_offset, die_offset;
> >>>>  
> >>>>              switch (count) {
> >>>>              case 0: /* L1 dcache info */
> >>>> -                core_offset = apicid_core_offset(&topo_info);
> >>>>                  encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache,
> >>>> -                                    (1 << core_offset),
> >>>> -                                    (1 << addressable_cores_offset),
> >>>> +                                    &topo_info,
> >>>>                                      eax, ebx, ecx, edx);
> >>>>                  break;
> >>>>              case 1: /* L1 icache info */
> >>>> -                core_offset = apicid_core_offset(&topo_info);
> >>>>                  encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache,
> >>>> -                                    (1 << core_offset),
> >>>> -                                    (1 << addressable_cores_offset),
> >>>> +                                    &topo_info,
> >>>>                                      eax, ebx, ecx, edx);
> >>>>                  break;
> >>>>              case 2: /* L2 cache info */
> >>>> -                core_offset = apicid_core_offset(&topo_info);
> >>>>                  encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache,
> >>>> -                                    (1 << core_offset),
> >>>> -                                    (1 << addressable_cores_offset),
> >>>> +                                    &topo_info,
> >>>>                                      eax, ebx, ecx, edx);
> >>>>                  break;
> >>>>              case 3: /* L3 cache info */
> >>>> -                die_offset = apicid_die_offset(&topo_info);
> >>>>                  if (cpu->enable_l3_cache) {
> >>>>                      encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache,
> >>>> -                                        (1 << die_offset),
> >>>> -                                        (1 << addressable_cores_offset),
> >>>> +                                        &topo_info,
> >>>>                                          eax, ebx, ecx, edx);
> >>>>                      break;
> >>>>                  }
> >>>
> >>
> >> -- 
> >> Thanks
> >> Babu Moger
> 
> -- 
> Thanks
> Babu Moger
Moger, Babu Aug. 14, 2023, 4:03 p.m. UTC | #6
Hi Zhao,


On 8/14/23 03:22, Zhao Liu wrote:
> Hi Babu,
> 
> On Fri, Aug 04, 2023 at 10:48:29AM -0500, Moger, Babu wrote:
>> Date: Fri, 4 Aug 2023 10:48:29 -0500
>> From: "Moger, Babu" <babu.moger@amd.com>
>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
>>  CPUID[4]
>>
>> Hi Zhao,
>>
>> On 8/4/23 04:48, Zhao Liu wrote:
>>> Hi Babu,
>>>
>>> On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote:
>>>> Date: Thu, 3 Aug 2023 11:41:40 -0500
>>>> From: "Moger, Babu" <babu.moger@amd.com>
>>>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
>>>>  CPUID[4]
>>>>
>>>> Hi Zhao,
>>>>
>>>> On 8/2/23 18:49, Moger, Babu wrote:
>>>>> Hi Zhao,
>>>>>
>>>>> Hitting this error after this patch.
>>>>>
>>>>> ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should
>>>>> not be reached
>>>>> Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code
>>>>> should not be reached
>>>>> Aborted (core dumped)
>>>>>
>>>>> Looks like share_level for all the caches for AMD is not initialized.
>>>
>>> I missed these change when I rebase. Sorry for that.
>>>
>>> BTW, could I ask a question? From a previous discussion[1], I understand
>>> that the cache info is used to show the correct cache information in
>>> new machine. And from [2], the wrong cache info may cause "compatibility
>>> issues".
>>>
>>> Is this "compatibility issues" AMD specific? I'm not sure if Intel should
>>> update the cache info like that. thanks!
>>
>> I was going to comment about that. Good that you asked me.
>>
>> Compatibility is qemu requirement.  Otherwise the migrations will fail.
>>
>> Any changes in the topology is going to cause migration problems.
> 
> Could you please educate me more about the details of the "migration
> problem"?
> 
> I didn't understand why it was causing the problem and wasn't sure if I
> was missing any cases.
> 

I am not an expert on migration but I test VM migration sometimes.
Here are some guidelines.
https://developers.redhat.com/blog/2015/03/24/live-migrating-qemu-kvm-virtual-machines

When you migrate a VM to newer qemu using the same CPU type, migration
should work seamless. That means list of CPU features should be compatible
when you move to newer qemu version with CPU type.

Thanks
Babu
Zhao Liu Aug. 18, 2023, 7:37 a.m. UTC | #7
Hi Babu,

On Mon, Aug 14, 2023 at 11:03:53AM -0500, Moger, Babu wrote:
> Date: Mon, 14 Aug 2023 11:03:53 -0500
> From: "Moger, Babu" <babu.moger@amd.com>
> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
>  CPUID[4]
> 
> Hi Zhao,
> 
> 
> On 8/14/23 03:22, Zhao Liu wrote:
> > Hi Babu,
> > 
> > On Fri, Aug 04, 2023 at 10:48:29AM -0500, Moger, Babu wrote:
> >> Date: Fri, 4 Aug 2023 10:48:29 -0500
> >> From: "Moger, Babu" <babu.moger@amd.com>
> >> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
> >>  CPUID[4]
> >>
> >> Hi Zhao,
> >>
> >> On 8/4/23 04:48, Zhao Liu wrote:
> >>> Hi Babu,
> >>>
> >>> On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote:
> >>>> Date: Thu, 3 Aug 2023 11:41:40 -0500
> >>>> From: "Moger, Babu" <babu.moger@amd.com>
> >>>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
> >>>>  CPUID[4]
> >>>>
> >>>> Hi Zhao,
> >>>>
> >>>> On 8/2/23 18:49, Moger, Babu wrote:
> >>>>> Hi Zhao,
> >>>>>
> >>>>> Hitting this error after this patch.
> >>>>>
> >>>>> ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should
> >>>>> not be reached
> >>>>> Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code
> >>>>> should not be reached
> >>>>> Aborted (core dumped)
> >>>>>
> >>>>> Looks like share_level for all the caches for AMD is not initialized.
> >>>
> >>> I missed these change when I rebase. Sorry for that.
> >>>
> >>> BTW, could I ask a question? From a previous discussion[1], I understand
> >>> that the cache info is used to show the correct cache information in
> >>> new machine. And from [2], the wrong cache info may cause "compatibility
> >>> issues".
> >>>
> >>> Is this "compatibility issues" AMD specific? I'm not sure if Intel should
> >>> update the cache info like that. thanks!
> >>
> >> I was going to comment about that. Good that you asked me.
> >>
> >> Compatibility is qemu requirement.  Otherwise the migrations will fail.
> >>
> >> Any changes in the topology is going to cause migration problems.
> > 
> > Could you please educate me more about the details of the "migration
> > problem"?
> > 
> > I didn't understand why it was causing the problem and wasn't sure if I
> > was missing any cases.
> > 
> 
> I am not an expert on migration but I test VM migration sometimes.
> Here are some guidelines.
> https://developers.redhat.com/blog/2015/03/24/live-migrating-qemu-kvm-virtual-machines

Thanks for the material!

> 
> When you migrate a VM to newer qemu using the same CPU type, migration
> should work seamless. That means list of CPU features should be compatible
> when you move to newer qemu version with CPU type.

I see. This patches set adds the "-smp cluster" command and the
"x-l2-cache-topo" command. Migration requires that the target and
source VM command lines are the same, so the new commands ensure that
the migration is consistent.

But this patch set also includes some topology fixes (nr_cores fix and
l1 cache topology fix) and encoding change (use APIC ID offset to encode
addressable ids), these changes would affect migration and may cause
CPUID change for VM view. Thus if this patch set is accepted, these
changes also need to be pushed into stable versions. Do you agree?

And about cache info for different CPU generations, migration usually
happens on the same CPU type, and Intel uses the same default cache
info for all CPU types. With the consistent cache info, migration is
also Ok. So if we don't care about the specific cache info in the VM,
it's okay to use the same default cache info for all CPU types. Right?

Thanks,
Zhao
Moger, Babu Aug. 23, 2023, 5:18 p.m. UTC | #8
Hi Zhao,

On 8/18/23 02:37, Zhao Liu wrote:
> Hi Babu,
> 
> On Mon, Aug 14, 2023 at 11:03:53AM -0500, Moger, Babu wrote:
>> Date: Mon, 14 Aug 2023 11:03:53 -0500
>> From: "Moger, Babu" <babu.moger@amd.com>
>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
>>  CPUID[4]
>>
>> Hi Zhao,
>>
>>
>> On 8/14/23 03:22, Zhao Liu wrote:
>>> Hi Babu,
>>>
>>> On Fri, Aug 04, 2023 at 10:48:29AM -0500, Moger, Babu wrote:
>>>> Date: Fri, 4 Aug 2023 10:48:29 -0500
>>>> From: "Moger, Babu" <babu.moger@amd.com>
>>>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
>>>>  CPUID[4]
>>>>
>>>> Hi Zhao,
>>>>
>>>> On 8/4/23 04:48, Zhao Liu wrote:
>>>>> Hi Babu,
>>>>>
>>>>> On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote:
>>>>>> Date: Thu, 3 Aug 2023 11:41:40 -0500
>>>>>> From: "Moger, Babu" <babu.moger@amd.com>
>>>>>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
>>>>>>  CPUID[4]
>>>>>>
>>>>>> Hi Zhao,
>>>>>>
>>>>>> On 8/2/23 18:49, Moger, Babu wrote:
>>>>>>> Hi Zhao,
>>>>>>>
>>>>>>> Hitting this error after this patch.
>>>>>>>
>>>>>>> ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should
>>>>>>> not be reached
>>>>>>> Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code
>>>>>>> should not be reached
>>>>>>> Aborted (core dumped)
>>>>>>>
>>>>>>> Looks like share_level for all the caches for AMD is not initialized.
>>>>>
>>>>> I missed these change when I rebase. Sorry for that.
>>>>>
>>>>> BTW, could I ask a question? From a previous discussion[1], I understand
>>>>> that the cache info is used to show the correct cache information in
>>>>> new machine. And from [2], the wrong cache info may cause "compatibility
>>>>> issues".
>>>>>
>>>>> Is this "compatibility issues" AMD specific? I'm not sure if Intel should
>>>>> update the cache info like that. thanks!
>>>>
>>>> I was going to comment about that. Good that you asked me.
>>>>
>>>> Compatibility is qemu requirement.  Otherwise the migrations will fail.
>>>>
>>>> Any changes in the topology is going to cause migration problems.
>>>
>>> Could you please educate me more about the details of the "migration
>>> problem"?
>>>
>>> I didn't understand why it was causing the problem and wasn't sure if I
>>> was missing any cases.
>>>
>>
>> I am not an expert on migration but I test VM migration sometimes.
>> Here are some guidelines.
>> https://developers.redhat.com/blog/2015/03/24/live-migrating-qemu-kvm-virtual-machines
> 
> Thanks for the material!
> 
>>
>> When you migrate a VM to newer qemu using the same CPU type, migration
>> should work seamless. That means list of CPU features should be compatible
>> when you move to newer qemu version with CPU type.
> 
> I see. This patches set adds the "-smp cluster" command and the
> "x-l2-cache-topo" command. Migration requires that the target and

Shouldn't the command x-l2-cache-topo disabled by default? (For example
look at hw/i386/pc.c the property x-migrate-smi-count).

It will be enabled when user passes "-cpu x-l2-cache-topo=[core|cluster]".
Current code enables it by default as far I can see.

> source VM command lines are the same, so the new commands ensure that
> the migration is consistent.
> 
> But this patch set also includes some topology fixes (nr_cores fix and
> l1 cache topology fix) and encoding change (use APIC ID offset to encode
> addressable ids), these changes would affect migration and may cause
> CPUID change for VM view. Thus if this patch set is accepted, these
> changes also need to be pushed into stable versions. Do you agree?

Yes. That sounds right.

> 
> And about cache info for different CPU generations, migration usually
> happens on the same CPU type, and Intel uses the same default cache
> info for all CPU types. With the consistent cache info, migration is
> also Ok. So if we don't care about the specific cache info in the VM,
> it's okay to use the same default cache info for all CPU types. Right?

I am not sure about this. Please run migration tests to be sure.
Zhao Liu Sept. 1, 2023, 8:43 a.m. UTC | #9
Hi Babu,

On Wed, Aug 23, 2023 at 12:18:30PM -0500, Moger, Babu wrote:
> Date: Wed, 23 Aug 2023 12:18:30 -0500
> From: "Moger, Babu" <babu.moger@amd.com>
> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
>  CPUID[4]
> 
> Hi Zhao,
> 
> On 8/18/23 02:37, Zhao Liu wrote:
> > Hi Babu,
> > 
> > On Mon, Aug 14, 2023 at 11:03:53AM -0500, Moger, Babu wrote:
> >> Date: Mon, 14 Aug 2023 11:03:53 -0500
> >> From: "Moger, Babu" <babu.moger@amd.com>
> >> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
> >>  CPUID[4]
> >>
> >> Hi Zhao,
> >>
> >>
> >> On 8/14/23 03:22, Zhao Liu wrote:
> >>> Hi Babu,
> >>>
> >>> On Fri, Aug 04, 2023 at 10:48:29AM -0500, Moger, Babu wrote:
> >>>> Date: Fri, 4 Aug 2023 10:48:29 -0500
> >>>> From: "Moger, Babu" <babu.moger@amd.com>
> >>>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
> >>>>  CPUID[4]
> >>>>
> >>>> Hi Zhao,
> >>>>
> >>>> On 8/4/23 04:48, Zhao Liu wrote:
> >>>>> Hi Babu,
> >>>>>
> >>>>> On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote:
> >>>>>> Date: Thu, 3 Aug 2023 11:41:40 -0500
> >>>>>> From: "Moger, Babu" <babu.moger@amd.com>
> >>>>>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode
> >>>>>>  CPUID[4]
> >>>>>>
> >>>>>> Hi Zhao,
> >>>>>>
> >>>>>> On 8/2/23 18:49, Moger, Babu wrote:
> >>>>>>> Hi Zhao,
> >>>>>>>
> >>>>>>> Hitting this error after this patch.
> >>>>>>>
> >>>>>>> ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should
> >>>>>>> not be reached
> >>>>>>> Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code
> >>>>>>> should not be reached
> >>>>>>> Aborted (core dumped)
> >>>>>>>
> >>>>>>> Looks like share_level for all the caches for AMD is not initialized.
> >>>>>
> >>>>> I missed these change when I rebase. Sorry for that.
> >>>>>
> >>>>> BTW, could I ask a question? From a previous discussion[1], I understand
> >>>>> that the cache info is used to show the correct cache information in
> >>>>> new machine. And from [2], the wrong cache info may cause "compatibility
> >>>>> issues".
> >>>>>
> >>>>> Is this "compatibility issues" AMD specific? I'm not sure if Intel should
> >>>>> update the cache info like that. thanks!
> >>>>
> >>>> I was going to comment about that. Good that you asked me.
> >>>>
> >>>> Compatibility is qemu requirement.  Otherwise the migrations will fail.
> >>>>
> >>>> Any changes in the topology is going to cause migration problems.
> >>>
> >>> Could you please educate me more about the details of the "migration
> >>> problem"?
> >>>
> >>> I didn't understand why it was causing the problem and wasn't sure if I
> >>> was missing any cases.
> >>>
> >>
> >> I am not an expert on migration but I test VM migration sometimes.
> >> Here are some guidelines.
> >> https://developers.redhat.com/blog/2015/03/24/live-migrating-qemu-kvm-virtual-machines
> > 
> > Thanks for the material!
> > 
> >>
> >> When you migrate a VM to newer qemu using the same CPU type, migration
> >> should work seamless. That means list of CPU features should be compatible
> >> when you move to newer qemu version with CPU type.
> > 
> > I see. This patches set adds the "-smp cluster" command and the
> > "x-l2-cache-topo" command. Migration requires that the target and
> 
> Shouldn't the command x-l2-cache-topo disabled by default? (For example
> look at hw/i386/pc.c the property x-migrate-smi-count).

Thanks!

Since we add the default topology level in cache models, so the default
l2 topology is the level hardcoded in cache model.

From this point, this option won't affect the migration between
different QEMU versions. If user doesn't change l2 to cluster, the
default l2 topology levels are the same (core level).

> 
> It will be enabled when user passes "-cpu x-l2-cache-topo=[core|cluster]".
> Current code enables it by default as far I can see.

I think the compatibility issue for x-migrate-smi-count is because it
has differnt default settings for different QEMU versions.

And for x-l2-cache-topo, it defaults to use the level hardcoded in cache
model, this is no difference between new and old QEMUs.

> 
> > source VM command lines are the same, so the new commands ensure that
> > the migration is consistent.
> > 
> > But this patch set also includes some topology fixes (nr_cores fix and
> > l1 cache topology fix) and encoding change (use APIC ID offset to encode
> > addressable ids), these changes would affect migration and may cause
> > CPUID change for VM view. Thus if this patch set is accepted, these
> > changes also need to be pushed into stable versions. Do you agree?
> 
> Yes. That sounds right.
> 
> > 
> > And about cache info for different CPU generations, migration usually
> > happens on the same CPU type, and Intel uses the same default cache
> > info for all CPU types. With the consistent cache info, migration is
> > also Ok. So if we don't care about the specific cache info in the VM,
> > it's okay to use the same default cache info for all CPU types. Right?
> 
> I am not sure about this. Please run migration tests to be sure.

We tested for these cases:

1. v3 <-> v3: same cli (same setting in x-l2-cache-topo) cases succeed.

2. v3 <-> master base (no v3 patches): same cli or v3 with default level
   (as hardcoded in cache models) cases succeed.

Thanks,
Zhao
diff mbox series

Patch

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 55aba4889628..c9897c0fe91a 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -234,22 +234,53 @@  static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache)
                        ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \
                        0 /* Invalid value */)
 
+static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info,
+                                            enum CPUTopoLevel share_level)
+{
+    uint32_t num_ids = 0;
+
+    switch (share_level) {
+    case CPU_TOPO_LEVEL_CORE:
+        num_ids = 1 << apicid_core_offset(topo_info);
+        break;
+    case CPU_TOPO_LEVEL_DIE:
+        num_ids = 1 << apicid_die_offset(topo_info);
+        break;
+    case CPU_TOPO_LEVEL_PACKAGE:
+        num_ids = 1 << apicid_pkg_offset(topo_info);
+        break;
+    default:
+        /*
+         * Currently there is no use case for SMT and MODULE, so use
+         * assert directly to facilitate debugging.
+         */
+        g_assert_not_reached();
+    }
+
+    return num_ids - 1;
+}
+
+static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info)
+{
+    uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) -
+                               apicid_core_offset(topo_info));
+    return num_cores - 1;
+}
 
 /* Encode cache info for CPUID[4] */
 static void encode_cache_cpuid4(CPUCacheInfo *cache,
-                                int num_apic_ids, int num_cores,
+                                X86CPUTopoInfo *topo_info,
                                 uint32_t *eax, uint32_t *ebx,
                                 uint32_t *ecx, uint32_t *edx)
 {
     assert(cache->size == cache->line_size * cache->associativity *
                           cache->partitions * cache->sets);
 
-    assert(num_apic_ids > 0);
     *eax = CACHE_TYPE(cache->type) |
            CACHE_LEVEL(cache->level) |
            (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) |
-           ((num_cores - 1) << 26) |
-           ((num_apic_ids - 1) << 14);
+           (max_core_ids_in_package(topo_info) << 26) |
+           (max_processor_ids_for_cache(topo_info, cache->share_level) << 14);
 
     assert(cache->line_size > 0);
     assert(cache->partitions > 0);
@@ -6116,56 +6147,41 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                 int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
 
                 if (cores_per_pkg > 1) {
-                    int addressable_cores_offset =
-                                                apicid_pkg_offset(&topo_info) -
-                                                apicid_core_offset(&topo_info);
-
                     *eax &= ~0xFC000000;
-                    *eax |= (1 << addressable_cores_offset - 1) << 26;
+                    *eax |= max_core_ids_in_package(&topo_info) << 26;
                 }
                 if (host_vcpus_per_cache > cpus_per_pkg) {
-                    int pkg_offset = apicid_pkg_offset(&topo_info);
-
                     *eax &= ~0x3FFC000;
-                    *eax |= (1 << pkg_offset - 1) << 14;
+                    *eax |=
+                        max_processor_ids_for_cache(&topo_info,
+                                                CPU_TOPO_LEVEL_PACKAGE) << 14;
                 }
             }
         } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
             *eax = *ebx = *ecx = *edx = 0;
         } else {
             *eax = 0;
-            int addressable_cores_offset = apicid_pkg_offset(&topo_info) -
-                                           apicid_core_offset(&topo_info);
-            int core_offset, die_offset;
 
             switch (count) {
             case 0: /* L1 dcache info */
-                core_offset = apicid_core_offset(&topo_info);
                 encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache,
-                                    (1 << core_offset),
-                                    (1 << addressable_cores_offset),
+                                    &topo_info,
                                     eax, ebx, ecx, edx);
                 break;
             case 1: /* L1 icache info */
-                core_offset = apicid_core_offset(&topo_info);
                 encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache,
-                                    (1 << core_offset),
-                                    (1 << addressable_cores_offset),
+                                    &topo_info,
                                     eax, ebx, ecx, edx);
                 break;
             case 2: /* L2 cache info */
-                core_offset = apicid_core_offset(&topo_info);
                 encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache,
-                                    (1 << core_offset),
-                                    (1 << addressable_cores_offset),
+                                    &topo_info,
                                     eax, ebx, ecx, edx);
                 break;
             case 3: /* L3 cache info */
-                die_offset = apicid_die_offset(&topo_info);
                 if (cpu->enable_l3_cache) {
                     encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache,
-                                        (1 << die_offset),
-                                        (1 << addressable_cores_offset),
+                                        &topo_info,
                                         eax, ebx, ecx, edx);
                     break;
                 }