Message ID | 20230801103527.397756-15-zhao1.liu@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Support smp.clusters for x86 | expand |
Hi Zhao, Hitting this error after this patch. ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should not be reached Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should not be reached Aborted (core dumped) Looks like share_level for all the caches for AMD is not initialized. Thanks Babu On 8/1/23 05:35, Zhao Liu wrote: > From: Zhao Liu <zhao1.liu@intel.com> > > CPUID[4].EAX[bits 25:14] is used to represent the cache topology for > intel CPUs. > > After cache models have topology information, we can use > CPUCacheInfo.share_level to decide which topology level to be encoded > into CPUID[4].EAX[bits 25:14]. > > And since maximum_processor_id (original "num_apic_ids") is parsed > based on cpu topology levels, which are verified when parsing smp, it's > no need to check this value by "assert(num_apic_ids > 0)" again, so > remove this assert. > > Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a > helper to make the code cleaner. > > Signed-off-by: Zhao Liu <zhao1.liu@intel.com> > --- > Changes since v1: > * Use "enum CPUTopoLevel share_level" as the parameter in > max_processor_ids_for_cache(). > * Make cache_into_passthrough case also use > max_processor_ids_for_cache() and max_core_ids_in_package() to > encode CPUID[4]. (Yanan) > * Rename the title of this patch (the original is "i386: Use > CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]"). > --- > target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------ > 1 file changed, 43 insertions(+), 27 deletions(-) > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > index 55aba4889628..c9897c0fe91a 100644 > --- a/target/i386/cpu.c > +++ b/target/i386/cpu.c > @@ -234,22 +234,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) > ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \ > 0 /* Invalid value */) > > +static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info, > + enum CPUTopoLevel share_level) > +{ > + uint32_t num_ids = 0; > + > + switch (share_level) { > + case CPU_TOPO_LEVEL_CORE: > + num_ids = 1 << apicid_core_offset(topo_info); > + break; > + case CPU_TOPO_LEVEL_DIE: > + num_ids = 1 << apicid_die_offset(topo_info); > + break; > + case CPU_TOPO_LEVEL_PACKAGE: > + num_ids = 1 << apicid_pkg_offset(topo_info); > + break; > + default: > + /* > + * Currently there is no use case for SMT and MODULE, so use > + * assert directly to facilitate debugging. > + */ > + g_assert_not_reached(); > + } > + > + return num_ids - 1; > +} > + > +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info) > +{ > + uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) - > + apicid_core_offset(topo_info)); > + return num_cores - 1; > +} > > /* Encode cache info for CPUID[4] */ > static void encode_cache_cpuid4(CPUCacheInfo *cache, > - int num_apic_ids, int num_cores, > + X86CPUTopoInfo *topo_info, > uint32_t *eax, uint32_t *ebx, > uint32_t *ecx, uint32_t *edx) > { > assert(cache->size == cache->line_size * cache->associativity * > cache->partitions * cache->sets); > > - assert(num_apic_ids > 0); > *eax = CACHE_TYPE(cache->type) | > CACHE_LEVEL(cache->level) | > (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) | > - ((num_cores - 1) << 26) | > - ((num_apic_ids - 1) << 14); > + (max_core_ids_in_package(topo_info) << 26) | > + (max_processor_ids_for_cache(topo_info, cache->share_level) << 14); > > assert(cache->line_size > 0); > assert(cache->partitions > 0); > @@ -6116,56 +6147,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, > int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14); > > if (cores_per_pkg > 1) { > - int addressable_cores_offset = > - apicid_pkg_offset(&topo_info) - > - apicid_core_offset(&topo_info); > - > *eax &= ~0xFC000000; > - *eax |= (1 << addressable_cores_offset - 1) << 26; > + *eax |= max_core_ids_in_package(&topo_info) << 26; > } > if (host_vcpus_per_cache > cpus_per_pkg) { > - int pkg_offset = apicid_pkg_offset(&topo_info); > - > *eax &= ~0x3FFC000; > - *eax |= (1 << pkg_offset - 1) << 14; > + *eax |= > + max_processor_ids_for_cache(&topo_info, > + CPU_TOPO_LEVEL_PACKAGE) << 14; > } > } > } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { > *eax = *ebx = *ecx = *edx = 0; > } else { > *eax = 0; > - int addressable_cores_offset = apicid_pkg_offset(&topo_info) - > - apicid_core_offset(&topo_info); > - int core_offset, die_offset; > > switch (count) { > case 0: /* L1 dcache info */ > - core_offset = apicid_core_offset(&topo_info); > encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache, > - (1 << core_offset), > - (1 << addressable_cores_offset), > + &topo_info, > eax, ebx, ecx, edx); > break; > case 1: /* L1 icache info */ > - core_offset = apicid_core_offset(&topo_info); > encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache, > - (1 << core_offset), > - (1 << addressable_cores_offset), > + &topo_info, > eax, ebx, ecx, edx); > break; > case 2: /* L2 cache info */ > - core_offset = apicid_core_offset(&topo_info); > encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache, > - (1 << core_offset), > - (1 << addressable_cores_offset), > + &topo_info, > eax, ebx, ecx, edx); > break; > case 3: /* L3 cache info */ > - die_offset = apicid_die_offset(&topo_info); > if (cpu->enable_l3_cache) { > encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache, > - (1 << die_offset), > - (1 << addressable_cores_offset), > + &topo_info, > eax, ebx, ecx, edx); > break; > }
Hi Zhao, On 8/2/23 18:49, Moger, Babu wrote: > Hi Zhao, > > Hitting this error after this patch. > > ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should > not be reached > Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code > should not be reached > Aborted (core dumped) > > Looks like share_level for all the caches for AMD is not initialized. The following patch fixes the problem. ====================================================== diff --git a/target/i386/cpu.c b/target/i386/cpu.c index f4c48e19fa..976a2755d8 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -528,6 +528,7 @@ static CPUCacheInfo legacy_l2_cache_cpuid2 = { .size = 2 * MiB, .line_size = 64, .associativity = 8, + .share_level = CPU_TOPO_LEVEL_CORE, }; @@ -1904,6 +1905,7 @@ static CPUCaches epyc_v4_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l1i_cache = &(CPUCacheInfo) { .type = INSTRUCTION_CACHE, @@ -1916,6 +1918,7 @@ static CPUCaches epyc_v4_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l2_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -1926,6 +1929,7 @@ static CPUCaches epyc_v4_cache_info = { .partitions = 1, .sets = 1024, .lines_per_tag = 1, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l3_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -1939,6 +1943,7 @@ static CPUCaches epyc_v4_cache_info = { .self_init = true, .inclusive = true, .complex_indexing = false, + .share_level = CPU_TOPO_LEVEL_DIE, }, }; @@ -2008,6 +2013,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l1i_cache = &(CPUCacheInfo) { .type = INSTRUCTION_CACHE, @@ -2020,6 +2026,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l2_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -2030,6 +2037,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { .partitions = 1, .sets = 1024, .lines_per_tag = 1, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l3_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -2043,6 +2051,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { .self_init = true, .inclusive = true, .complex_indexing = false, + .share_level = CPU_TOPO_LEVEL_DIE, }, }; @@ -2112,6 +2121,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l1i_cache = &(CPUCacheInfo) { .type = INSTRUCTION_CACHE, @@ -2124,6 +2134,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l2_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -2134,6 +2145,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { .partitions = 1, .sets = 1024, .lines_per_tag = 1, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l3_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -2147,6 +2159,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { .self_init = true, .inclusive = true, .complex_indexing = false, + .share_level = CPU_TOPO_LEVEL_DIE, }, }; @@ -2162,6 +2175,7 @@ static const CPUCaches epyc_genoa_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l1i_cache = &(CPUCacheInfo) { .type = INSTRUCTION_CACHE, @@ -2174,6 +2188,7 @@ static const CPUCaches epyc_genoa_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l2_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -2184,6 +2199,7 @@ static const CPUCaches epyc_genoa_cache_info = { .partitions = 1, .sets = 2048, .lines_per_tag = 1, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l3_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -2197,6 +2213,7 @@ static const CPUCaches epyc_genoa_cache_info = { .self_init = true, .inclusive = true, .complex_indexing = false, + .share_level = CPU_TOPO_LEVEL_DIE, }, }; ========================================================================= Thanks Babu > > On 8/1/23 05:35, Zhao Liu wrote: >> From: Zhao Liu <zhao1.liu@intel.com> >> >> CPUID[4].EAX[bits 25:14] is used to represent the cache topology for >> intel CPUs. >> >> After cache models have topology information, we can use >> CPUCacheInfo.share_level to decide which topology level to be encoded >> into CPUID[4].EAX[bits 25:14]. >> >> And since maximum_processor_id (original "num_apic_ids") is parsed >> based on cpu topology levels, which are verified when parsing smp, it's >> no need to check this value by "assert(num_apic_ids > 0)" again, so >> remove this assert. >> >> Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a >> helper to make the code cleaner. >> >> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> >> --- >> Changes since v1: >> * Use "enum CPUTopoLevel share_level" as the parameter in >> max_processor_ids_for_cache(). >> * Make cache_into_passthrough case also use >> max_processor_ids_for_cache() and max_core_ids_in_package() to >> encode CPUID[4]. (Yanan) >> * Rename the title of this patch (the original is "i386: Use >> CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]"). >> --- >> target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------ >> 1 file changed, 43 insertions(+), 27 deletions(-) >> >> diff --git a/target/i386/cpu.c b/target/i386/cpu.c >> index 55aba4889628..c9897c0fe91a 100644 >> --- a/target/i386/cpu.c >> +++ b/target/i386/cpu.c >> @@ -234,22 +234,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) >> ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \ >> 0 /* Invalid value */) >> >> +static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info, >> + enum CPUTopoLevel share_level) >> +{ >> + uint32_t num_ids = 0; >> + >> + switch (share_level) { >> + case CPU_TOPO_LEVEL_CORE: >> + num_ids = 1 << apicid_core_offset(topo_info); >> + break; >> + case CPU_TOPO_LEVEL_DIE: >> + num_ids = 1 << apicid_die_offset(topo_info); >> + break; >> + case CPU_TOPO_LEVEL_PACKAGE: >> + num_ids = 1 << apicid_pkg_offset(topo_info); >> + break; >> + default: >> + /* >> + * Currently there is no use case for SMT and MODULE, so use >> + * assert directly to facilitate debugging. >> + */ >> + g_assert_not_reached(); >> + } >> + >> + return num_ids - 1; >> +} >> + >> +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info) >> +{ >> + uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) - >> + apicid_core_offset(topo_info)); >> + return num_cores - 1; >> +} >> >> /* Encode cache info for CPUID[4] */ >> static void encode_cache_cpuid4(CPUCacheInfo *cache, >> - int num_apic_ids, int num_cores, >> + X86CPUTopoInfo *topo_info, >> uint32_t *eax, uint32_t *ebx, >> uint32_t *ecx, uint32_t *edx) >> { >> assert(cache->size == cache->line_size * cache->associativity * >> cache->partitions * cache->sets); >> >> - assert(num_apic_ids > 0); >> *eax = CACHE_TYPE(cache->type) | >> CACHE_LEVEL(cache->level) | >> (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) | >> - ((num_cores - 1) << 26) | >> - ((num_apic_ids - 1) << 14); >> + (max_core_ids_in_package(topo_info) << 26) | >> + (max_processor_ids_for_cache(topo_info, cache->share_level) << 14); >> >> assert(cache->line_size > 0); >> assert(cache->partitions > 0); >> @@ -6116,56 +6147,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, >> int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14); >> >> if (cores_per_pkg > 1) { >> - int addressable_cores_offset = >> - apicid_pkg_offset(&topo_info) - >> - apicid_core_offset(&topo_info); >> - >> *eax &= ~0xFC000000; >> - *eax |= (1 << addressable_cores_offset - 1) << 26; >> + *eax |= max_core_ids_in_package(&topo_info) << 26; >> } >> if (host_vcpus_per_cache > cpus_per_pkg) { >> - int pkg_offset = apicid_pkg_offset(&topo_info); >> - >> *eax &= ~0x3FFC000; >> - *eax |= (1 << pkg_offset - 1) << 14; >> + *eax |= >> + max_processor_ids_for_cache(&topo_info, >> + CPU_TOPO_LEVEL_PACKAGE) << 14; >> } >> } >> } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { >> *eax = *ebx = *ecx = *edx = 0; >> } else { >> *eax = 0; >> - int addressable_cores_offset = apicid_pkg_offset(&topo_info) - >> - apicid_core_offset(&topo_info); >> - int core_offset, die_offset; >> >> switch (count) { >> case 0: /* L1 dcache info */ >> - core_offset = apicid_core_offset(&topo_info); >> encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache, >> - (1 << core_offset), >> - (1 << addressable_cores_offset), >> + &topo_info, >> eax, ebx, ecx, edx); >> break; >> case 1: /* L1 icache info */ >> - core_offset = apicid_core_offset(&topo_info); >> encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache, >> - (1 << core_offset), >> - (1 << addressable_cores_offset), >> + &topo_info, >> eax, ebx, ecx, edx); >> break; >> case 2: /* L2 cache info */ >> - core_offset = apicid_core_offset(&topo_info); >> encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache, >> - (1 << core_offset), >> - (1 << addressable_cores_offset), >> + &topo_info, >> eax, ebx, ecx, edx); >> break; >> case 3: /* L3 cache info */ >> - die_offset = apicid_die_offset(&topo_info); >> if (cpu->enable_l3_cache) { >> encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache, >> - (1 << die_offset), >> - (1 << addressable_cores_offset), >> + &topo_info, >> eax, ebx, ecx, edx); >> break; >> } >
Hi Babu, On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote: > Date: Thu, 3 Aug 2023 11:41:40 -0500 > From: "Moger, Babu" <babu.moger@amd.com> > Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode > CPUID[4] > > Hi Zhao, > > On 8/2/23 18:49, Moger, Babu wrote: > > Hi Zhao, > > > > Hitting this error after this patch. > > > > ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should > > not be reached > > Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code > > should not be reached > > Aborted (core dumped) > > > > Looks like share_level for all the caches for AMD is not initialized. I missed these change when I rebase. Sorry for that. BTW, could I ask a question? From a previous discussion[1], I understand that the cache info is used to show the correct cache information in new machine. And from [2], the wrong cache info may cause "compatibility issues". Is this "compatibility issues" AMD specific? I'm not sure if Intel should update the cache info like that. thanks! [1]: https://patchwork.kernel.org/project/kvm/patch/CY4PR12MB1768A3CBE42AAFB03CB1081E95AA0@CY4PR12MB1768.namprd12.prod.outlook.com/ [2]: https://lore.kernel.org/qemu-devel/20180510204148.11687-1-babu.moger@amd.com/ > > The following patch fixes the problem. > > ====================================================== > > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > index f4c48e19fa..976a2755d8 100644 > --- a/target/i386/cpu.c > +++ b/target/i386/cpu.c > @@ -528,6 +528,7 @@ static CPUCacheInfo legacy_l2_cache_cpuid2 = { > .size = 2 * MiB, > .line_size = 64, > .associativity = 8, > + .share_level = CPU_TOPO_LEVEL_CORE, This "legacy_l2_cache_cpuid2" is not used to encode cache topology. I should explicitly set this default topo level as CPU_TOPO_LEVEL_UNKNOW. > }; > > > @@ -1904,6 +1905,7 @@ static CPUCaches epyc_v4_cache_info = { > .lines_per_tag = 1, > .self_init = 1, > .no_invd_sharing = true, > + .share_level = CPU_TOPO_LEVEL_CORE, > }, > .l1i_cache = &(CPUCacheInfo) { > .type = INSTRUCTION_CACHE, > @@ -1916,6 +1918,7 @@ static CPUCaches epyc_v4_cache_info = { > .lines_per_tag = 1, > .self_init = 1, > .no_invd_sharing = true, > + .share_level = CPU_TOPO_LEVEL_CORE, > }, > .l2_cache = &(CPUCacheInfo) { > .type = UNIFIED_CACHE, > @@ -1926,6 +1929,7 @@ static CPUCaches epyc_v4_cache_info = { > .partitions = 1, > .sets = 1024, > .lines_per_tag = 1, > + .share_level = CPU_TOPO_LEVEL_CORE, > }, > .l3_cache = &(CPUCacheInfo) { > .type = UNIFIED_CACHE, > @@ -1939,6 +1943,7 @@ static CPUCaches epyc_v4_cache_info = { > .self_init = true, > .inclusive = true, > .complex_indexing = false, > + .share_level = CPU_TOPO_LEVEL_DIE, > }, > }; > > @@ -2008,6 +2013,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { > .lines_per_tag = 1, > .self_init = 1, > .no_invd_sharing = true, > + .share_level = CPU_TOPO_LEVEL_CORE, > }, > .l1i_cache = &(CPUCacheInfo) { > .type = INSTRUCTION_CACHE, > @@ -2020,6 +2026,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { > .lines_per_tag = 1, > .self_init = 1, > .no_invd_sharing = true, > + .share_level = CPU_TOPO_LEVEL_CORE, > }, > .l2_cache = &(CPUCacheInfo) { > .type = UNIFIED_CACHE, > @@ -2030,6 +2037,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { > .partitions = 1, > .sets = 1024, > .lines_per_tag = 1, > + .share_level = CPU_TOPO_LEVEL_CORE, > }, > .l3_cache = &(CPUCacheInfo) { > .type = UNIFIED_CACHE, > @@ -2043,6 +2051,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { > .self_init = true, > .inclusive = true, > .complex_indexing = false, > + .share_level = CPU_TOPO_LEVEL_DIE, > }, > }; > > @@ -2112,6 +2121,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { > .lines_per_tag = 1, > .self_init = 1, > .no_invd_sharing = true, > + .share_level = CPU_TOPO_LEVEL_CORE, > }, > .l1i_cache = &(CPUCacheInfo) { > .type = INSTRUCTION_CACHE, > @@ -2124,6 +2134,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { > .lines_per_tag = 1, > .self_init = 1, > .no_invd_sharing = true, > + .share_level = CPU_TOPO_LEVEL_CORE, > }, > .l2_cache = &(CPUCacheInfo) { > .type = UNIFIED_CACHE, > @@ -2134,6 +2145,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { > .partitions = 1, > .sets = 1024, > .lines_per_tag = 1, > + .share_level = CPU_TOPO_LEVEL_CORE, > }, > .l3_cache = &(CPUCacheInfo) { > .type = UNIFIED_CACHE, > @@ -2147,6 +2159,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { > .self_init = true, > .inclusive = true, > .complex_indexing = false, > + .share_level = CPU_TOPO_LEVEL_DIE, > }, > }; > > @@ -2162,6 +2175,7 @@ static const CPUCaches epyc_genoa_cache_info = { > .lines_per_tag = 1, > .self_init = 1, > .no_invd_sharing = true, > + .share_level = CPU_TOPO_LEVEL_CORE, > }, > .l1i_cache = &(CPUCacheInfo) { > .type = INSTRUCTION_CACHE, > @@ -2174,6 +2188,7 @@ static const CPUCaches epyc_genoa_cache_info = { > .lines_per_tag = 1, > .self_init = 1, > .no_invd_sharing = true, > + .share_level = CPU_TOPO_LEVEL_CORE, > }, > .l2_cache = &(CPUCacheInfo) { > .type = UNIFIED_CACHE, > @@ -2184,6 +2199,7 @@ static const CPUCaches epyc_genoa_cache_info = { > .partitions = 1, > .sets = 2048, > .lines_per_tag = 1, > + .share_level = CPU_TOPO_LEVEL_CORE, > }, > .l3_cache = &(CPUCacheInfo) { > .type = UNIFIED_CACHE, > @@ -2197,6 +2213,7 @@ static const CPUCaches epyc_genoa_cache_info = { > .self_init = true, > .inclusive = true, > .complex_indexing = false, > + .share_level = CPU_TOPO_LEVEL_DIE, > }, > }; > > > ========================================================================= Look good to me except legacy_l2_cache_cpuid2, thanks very much! I'll add this in next version. -Zhao > > Thanks > Babu > > > > On 8/1/23 05:35, Zhao Liu wrote: > >> From: Zhao Liu <zhao1.liu@intel.com> > >> > >> CPUID[4].EAX[bits 25:14] is used to represent the cache topology for > >> intel CPUs. > >> > >> After cache models have topology information, we can use > >> CPUCacheInfo.share_level to decide which topology level to be encoded > >> into CPUID[4].EAX[bits 25:14]. > >> > >> And since maximum_processor_id (original "num_apic_ids") is parsed > >> based on cpu topology levels, which are verified when parsing smp, it's > >> no need to check this value by "assert(num_apic_ids > 0)" again, so > >> remove this assert. > >> > >> Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a > >> helper to make the code cleaner. > >> > >> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> > >> --- > >> Changes since v1: > >> * Use "enum CPUTopoLevel share_level" as the parameter in > >> max_processor_ids_for_cache(). > >> * Make cache_into_passthrough case also use > >> max_processor_ids_for_cache() and max_core_ids_in_package() to > >> encode CPUID[4]. (Yanan) > >> * Rename the title of this patch (the original is "i386: Use > >> CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]"). > >> --- > >> target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------ > >> 1 file changed, 43 insertions(+), 27 deletions(-) > >> > >> diff --git a/target/i386/cpu.c b/target/i386/cpu.c > >> index 55aba4889628..c9897c0fe91a 100644 > >> --- a/target/i386/cpu.c > >> +++ b/target/i386/cpu.c > >> @@ -234,22 +234,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) > >> ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \ > >> 0 /* Invalid value */) > >> > >> +static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info, > >> + enum CPUTopoLevel share_level) > >> +{ > >> + uint32_t num_ids = 0; > >> + > >> + switch (share_level) { > >> + case CPU_TOPO_LEVEL_CORE: > >> + num_ids = 1 << apicid_core_offset(topo_info); > >> + break; > >> + case CPU_TOPO_LEVEL_DIE: > >> + num_ids = 1 << apicid_die_offset(topo_info); > >> + break; > >> + case CPU_TOPO_LEVEL_PACKAGE: > >> + num_ids = 1 << apicid_pkg_offset(topo_info); > >> + break; > >> + default: > >> + /* > >> + * Currently there is no use case for SMT and MODULE, so use > >> + * assert directly to facilitate debugging. > >> + */ > >> + g_assert_not_reached(); > >> + } > >> + > >> + return num_ids - 1; > >> +} > >> + > >> +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info) > >> +{ > >> + uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) - > >> + apicid_core_offset(topo_info)); > >> + return num_cores - 1; > >> +} > >> > >> /* Encode cache info for CPUID[4] */ > >> static void encode_cache_cpuid4(CPUCacheInfo *cache, > >> - int num_apic_ids, int num_cores, > >> + X86CPUTopoInfo *topo_info, > >> uint32_t *eax, uint32_t *ebx, > >> uint32_t *ecx, uint32_t *edx) > >> { > >> assert(cache->size == cache->line_size * cache->associativity * > >> cache->partitions * cache->sets); > >> > >> - assert(num_apic_ids > 0); > >> *eax = CACHE_TYPE(cache->type) | > >> CACHE_LEVEL(cache->level) | > >> (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) | > >> - ((num_cores - 1) << 26) | > >> - ((num_apic_ids - 1) << 14); > >> + (max_core_ids_in_package(topo_info) << 26) | > >> + (max_processor_ids_for_cache(topo_info, cache->share_level) << 14); > >> > >> assert(cache->line_size > 0); > >> assert(cache->partitions > 0); > >> @@ -6116,56 +6147,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, > >> int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14); > >> > >> if (cores_per_pkg > 1) { > >> - int addressable_cores_offset = > >> - apicid_pkg_offset(&topo_info) - > >> - apicid_core_offset(&topo_info); > >> - > >> *eax &= ~0xFC000000; > >> - *eax |= (1 << addressable_cores_offset - 1) << 26; > >> + *eax |= max_core_ids_in_package(&topo_info) << 26; > >> } > >> if (host_vcpus_per_cache > cpus_per_pkg) { > >> - int pkg_offset = apicid_pkg_offset(&topo_info); > >> - > >> *eax &= ~0x3FFC000; > >> - *eax |= (1 << pkg_offset - 1) << 14; > >> + *eax |= > >> + max_processor_ids_for_cache(&topo_info, > >> + CPU_TOPO_LEVEL_PACKAGE) << 14; > >> } > >> } > >> } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { > >> *eax = *ebx = *ecx = *edx = 0; > >> } else { > >> *eax = 0; > >> - int addressable_cores_offset = apicid_pkg_offset(&topo_info) - > >> - apicid_core_offset(&topo_info); > >> - int core_offset, die_offset; > >> > >> switch (count) { > >> case 0: /* L1 dcache info */ > >> - core_offset = apicid_core_offset(&topo_info); > >> encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache, > >> - (1 << core_offset), > >> - (1 << addressable_cores_offset), > >> + &topo_info, > >> eax, ebx, ecx, edx); > >> break; > >> case 1: /* L1 icache info */ > >> - core_offset = apicid_core_offset(&topo_info); > >> encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache, > >> - (1 << core_offset), > >> - (1 << addressable_cores_offset), > >> + &topo_info, > >> eax, ebx, ecx, edx); > >> break; > >> case 2: /* L2 cache info */ > >> - core_offset = apicid_core_offset(&topo_info); > >> encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache, > >> - (1 << core_offset), > >> - (1 << addressable_cores_offset), > >> + &topo_info, > >> eax, ebx, ecx, edx); > >> break; > >> case 3: /* L3 cache info */ > >> - die_offset = apicid_die_offset(&topo_info); > >> if (cpu->enable_l3_cache) { > >> encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache, > >> - (1 << die_offset), > >> - (1 << addressable_cores_offset), > >> + &topo_info, > >> eax, ebx, ecx, edx); > >> break; > >> } > > > > -- > Thanks > Babu Moger
Hi Zhao, On 8/4/23 04:48, Zhao Liu wrote: > Hi Babu, > > On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote: >> Date: Thu, 3 Aug 2023 11:41:40 -0500 >> From: "Moger, Babu" <babu.moger@amd.com> >> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode >> CPUID[4] >> >> Hi Zhao, >> >> On 8/2/23 18:49, Moger, Babu wrote: >>> Hi Zhao, >>> >>> Hitting this error after this patch. >>> >>> ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should >>> not be reached >>> Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code >>> should not be reached >>> Aborted (core dumped) >>> >>> Looks like share_level for all the caches for AMD is not initialized. > > I missed these change when I rebase. Sorry for that. > > BTW, could I ask a question? From a previous discussion[1], I understand > that the cache info is used to show the correct cache information in > new machine. And from [2], the wrong cache info may cause "compatibility > issues". > > Is this "compatibility issues" AMD specific? I'm not sure if Intel should > update the cache info like that. thanks! I was going to comment about that. Good that you asked me. Compatibility is qemu requirement. Otherwise the migrations will fail. Any changes in the topology is going to cause migration problems. I am not sure how you are going to handle this. You can probably look at the feature "x-intel-pt-auto-level". make sure to test the migration. Thanks Babu > > [1]: https://patchwork.kernel.org/project/kvm/patch/CY4PR12MB1768A3CBE42AAFB03CB1081E95AA0@CY4PR12MB1768.namprd12.prod.outlook.com/ > [2]: https://lore.kernel.org/qemu-devel/20180510204148.11687-1-babu.moger@amd.com/ > >> >> The following patch fixes the problem. >> >> ====================================================== >> >> >> diff --git a/target/i386/cpu.c b/target/i386/cpu.c >> index f4c48e19fa..976a2755d8 100644 >> --- a/target/i386/cpu.c >> +++ b/target/i386/cpu.c >> @@ -528,6 +528,7 @@ static CPUCacheInfo legacy_l2_cache_cpuid2 = { >> .size = 2 * MiB, >> .line_size = 64, >> .associativity = 8, >> + .share_level = CPU_TOPO_LEVEL_CORE, > > This "legacy_l2_cache_cpuid2" is not used to encode cache topology. > I should explicitly set this default topo level as CPU_TOPO_LEVEL_UNKNOW. > >> }; >> >> >> @@ -1904,6 +1905,7 @@ static CPUCaches epyc_v4_cache_info = { >> .lines_per_tag = 1, >> .self_init = 1, >> .no_invd_sharing = true, >> + .share_level = CPU_TOPO_LEVEL_CORE, >> }, >> .l1i_cache = &(CPUCacheInfo) { >> .type = INSTRUCTION_CACHE, >> @@ -1916,6 +1918,7 @@ static CPUCaches epyc_v4_cache_info = { >> .lines_per_tag = 1, >> .self_init = 1, >> .no_invd_sharing = true, >> + .share_level = CPU_TOPO_LEVEL_CORE, >> }, >> .l2_cache = &(CPUCacheInfo) { >> .type = UNIFIED_CACHE, >> @@ -1926,6 +1929,7 @@ static CPUCaches epyc_v4_cache_info = { >> .partitions = 1, >> .sets = 1024, >> .lines_per_tag = 1, >> + .share_level = CPU_TOPO_LEVEL_CORE, >> }, >> .l3_cache = &(CPUCacheInfo) { >> .type = UNIFIED_CACHE, >> @@ -1939,6 +1943,7 @@ static CPUCaches epyc_v4_cache_info = { >> .self_init = true, >> .inclusive = true, >> .complex_indexing = false, >> + .share_level = CPU_TOPO_LEVEL_DIE, >> }, >> }; >> >> @@ -2008,6 +2013,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { >> .lines_per_tag = 1, >> .self_init = 1, >> .no_invd_sharing = true, >> + .share_level = CPU_TOPO_LEVEL_CORE, >> }, >> .l1i_cache = &(CPUCacheInfo) { >> .type = INSTRUCTION_CACHE, >> @@ -2020,6 +2026,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { >> .lines_per_tag = 1, >> .self_init = 1, >> .no_invd_sharing = true, >> + .share_level = CPU_TOPO_LEVEL_CORE, >> }, >> .l2_cache = &(CPUCacheInfo) { >> .type = UNIFIED_CACHE, >> @@ -2030,6 +2037,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { >> .partitions = 1, >> .sets = 1024, >> .lines_per_tag = 1, >> + .share_level = CPU_TOPO_LEVEL_CORE, >> }, >> .l3_cache = &(CPUCacheInfo) { >> .type = UNIFIED_CACHE, >> @@ -2043,6 +2051,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { >> .self_init = true, >> .inclusive = true, >> .complex_indexing = false, >> + .share_level = CPU_TOPO_LEVEL_DIE, >> }, >> }; >> >> @@ -2112,6 +2121,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { >> .lines_per_tag = 1, >> .self_init = 1, >> .no_invd_sharing = true, >> + .share_level = CPU_TOPO_LEVEL_CORE, >> }, >> .l1i_cache = &(CPUCacheInfo) { >> .type = INSTRUCTION_CACHE, >> @@ -2124,6 +2134,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { >> .lines_per_tag = 1, >> .self_init = 1, >> .no_invd_sharing = true, >> + .share_level = CPU_TOPO_LEVEL_CORE, >> }, >> .l2_cache = &(CPUCacheInfo) { >> .type = UNIFIED_CACHE, >> @@ -2134,6 +2145,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { >> .partitions = 1, >> .sets = 1024, >> .lines_per_tag = 1, >> + .share_level = CPU_TOPO_LEVEL_CORE, >> }, >> .l3_cache = &(CPUCacheInfo) { >> .type = UNIFIED_CACHE, >> @@ -2147,6 +2159,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { >> .self_init = true, >> .inclusive = true, >> .complex_indexing = false, >> + .share_level = CPU_TOPO_LEVEL_DIE, >> }, >> }; >> >> @@ -2162,6 +2175,7 @@ static const CPUCaches epyc_genoa_cache_info = { >> .lines_per_tag = 1, >> .self_init = 1, >> .no_invd_sharing = true, >> + .share_level = CPU_TOPO_LEVEL_CORE, >> }, >> .l1i_cache = &(CPUCacheInfo) { >> .type = INSTRUCTION_CACHE, >> @@ -2174,6 +2188,7 @@ static const CPUCaches epyc_genoa_cache_info = { >> .lines_per_tag = 1, >> .self_init = 1, >> .no_invd_sharing = true, >> + .share_level = CPU_TOPO_LEVEL_CORE, >> }, >> .l2_cache = &(CPUCacheInfo) { >> .type = UNIFIED_CACHE, >> @@ -2184,6 +2199,7 @@ static const CPUCaches epyc_genoa_cache_info = { >> .partitions = 1, >> .sets = 2048, >> .lines_per_tag = 1, >> + .share_level = CPU_TOPO_LEVEL_CORE, >> }, >> .l3_cache = &(CPUCacheInfo) { >> .type = UNIFIED_CACHE, >> @@ -2197,6 +2213,7 @@ static const CPUCaches epyc_genoa_cache_info = { >> .self_init = true, >> .inclusive = true, >> .complex_indexing = false, >> + .share_level = CPU_TOPO_LEVEL_DIE, >> }, >> }; >> >> >> ========================================================================= > > > Look good to me except legacy_l2_cache_cpuid2, thanks very much! > I'll add this in next version. > > -Zhao > >> >> Thanks >> Babu >>> >>> On 8/1/23 05:35, Zhao Liu wrote: >>>> From: Zhao Liu <zhao1.liu@intel.com> >>>> >>>> CPUID[4].EAX[bits 25:14] is used to represent the cache topology for >>>> intel CPUs. >>>> >>>> After cache models have topology information, we can use >>>> CPUCacheInfo.share_level to decide which topology level to be encoded >>>> into CPUID[4].EAX[bits 25:14]. >>>> >>>> And since maximum_processor_id (original "num_apic_ids") is parsed >>>> based on cpu topology levels, which are verified when parsing smp, it's >>>> no need to check this value by "assert(num_apic_ids > 0)" again, so >>>> remove this assert. >>>> >>>> Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a >>>> helper to make the code cleaner. >>>> >>>> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> >>>> --- >>>> Changes since v1: >>>> * Use "enum CPUTopoLevel share_level" as the parameter in >>>> max_processor_ids_for_cache(). >>>> * Make cache_into_passthrough case also use >>>> max_processor_ids_for_cache() and max_core_ids_in_package() to >>>> encode CPUID[4]. (Yanan) >>>> * Rename the title of this patch (the original is "i386: Use >>>> CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]"). >>>> --- >>>> target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------ >>>> 1 file changed, 43 insertions(+), 27 deletions(-) >>>> >>>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c >>>> index 55aba4889628..c9897c0fe91a 100644 >>>> --- a/target/i386/cpu.c >>>> +++ b/target/i386/cpu.c >>>> @@ -234,22 +234,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) >>>> ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \ >>>> 0 /* Invalid value */) >>>> >>>> +static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info, >>>> + enum CPUTopoLevel share_level) >>>> +{ >>>> + uint32_t num_ids = 0; >>>> + >>>> + switch (share_level) { >>>> + case CPU_TOPO_LEVEL_CORE: >>>> + num_ids = 1 << apicid_core_offset(topo_info); >>>> + break; >>>> + case CPU_TOPO_LEVEL_DIE: >>>> + num_ids = 1 << apicid_die_offset(topo_info); >>>> + break; >>>> + case CPU_TOPO_LEVEL_PACKAGE: >>>> + num_ids = 1 << apicid_pkg_offset(topo_info); >>>> + break; >>>> + default: >>>> + /* >>>> + * Currently there is no use case for SMT and MODULE, so use >>>> + * assert directly to facilitate debugging. >>>> + */ >>>> + g_assert_not_reached(); >>>> + } >>>> + >>>> + return num_ids - 1; >>>> +} >>>> + >>>> +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info) >>>> +{ >>>> + uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) - >>>> + apicid_core_offset(topo_info)); >>>> + return num_cores - 1; >>>> +} >>>> >>>> /* Encode cache info for CPUID[4] */ >>>> static void encode_cache_cpuid4(CPUCacheInfo *cache, >>>> - int num_apic_ids, int num_cores, >>>> + X86CPUTopoInfo *topo_info, >>>> uint32_t *eax, uint32_t *ebx, >>>> uint32_t *ecx, uint32_t *edx) >>>> { >>>> assert(cache->size == cache->line_size * cache->associativity * >>>> cache->partitions * cache->sets); >>>> >>>> - assert(num_apic_ids > 0); >>>> *eax = CACHE_TYPE(cache->type) | >>>> CACHE_LEVEL(cache->level) | >>>> (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) | >>>> - ((num_cores - 1) << 26) | >>>> - ((num_apic_ids - 1) << 14); >>>> + (max_core_ids_in_package(topo_info) << 26) | >>>> + (max_processor_ids_for_cache(topo_info, cache->share_level) << 14); >>>> >>>> assert(cache->line_size > 0); >>>> assert(cache->partitions > 0); >>>> @@ -6116,56 +6147,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, >>>> int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14); >>>> >>>> if (cores_per_pkg > 1) { >>>> - int addressable_cores_offset = >>>> - apicid_pkg_offset(&topo_info) - >>>> - apicid_core_offset(&topo_info); >>>> - >>>> *eax &= ~0xFC000000; >>>> - *eax |= (1 << addressable_cores_offset - 1) << 26; >>>> + *eax |= max_core_ids_in_package(&topo_info) << 26; >>>> } >>>> if (host_vcpus_per_cache > cpus_per_pkg) { >>>> - int pkg_offset = apicid_pkg_offset(&topo_info); >>>> - >>>> *eax &= ~0x3FFC000; >>>> - *eax |= (1 << pkg_offset - 1) << 14; >>>> + *eax |= >>>> + max_processor_ids_for_cache(&topo_info, >>>> + CPU_TOPO_LEVEL_PACKAGE) << 14; >>>> } >>>> } >>>> } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { >>>> *eax = *ebx = *ecx = *edx = 0; >>>> } else { >>>> *eax = 0; >>>> - int addressable_cores_offset = apicid_pkg_offset(&topo_info) - >>>> - apicid_core_offset(&topo_info); >>>> - int core_offset, die_offset; >>>> >>>> switch (count) { >>>> case 0: /* L1 dcache info */ >>>> - core_offset = apicid_core_offset(&topo_info); >>>> encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache, >>>> - (1 << core_offset), >>>> - (1 << addressable_cores_offset), >>>> + &topo_info, >>>> eax, ebx, ecx, edx); >>>> break; >>>> case 1: /* L1 icache info */ >>>> - core_offset = apicid_core_offset(&topo_info); >>>> encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache, >>>> - (1 << core_offset), >>>> - (1 << addressable_cores_offset), >>>> + &topo_info, >>>> eax, ebx, ecx, edx); >>>> break; >>>> case 2: /* L2 cache info */ >>>> - core_offset = apicid_core_offset(&topo_info); >>>> encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache, >>>> - (1 << core_offset), >>>> - (1 << addressable_cores_offset), >>>> + &topo_info, >>>> eax, ebx, ecx, edx); >>>> break; >>>> case 3: /* L3 cache info */ >>>> - die_offset = apicid_die_offset(&topo_info); >>>> if (cpu->enable_l3_cache) { >>>> encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache, >>>> - (1 << die_offset), >>>> - (1 << addressable_cores_offset), >>>> + &topo_info, >>>> eax, ebx, ecx, edx); >>>> break; >>>> } >>> >> >> -- >> Thanks >> Babu Moger
Hi Babu, On Fri, Aug 04, 2023 at 10:48:29AM -0500, Moger, Babu wrote: > Date: Fri, 4 Aug 2023 10:48:29 -0500 > From: "Moger, Babu" <babu.moger@amd.com> > Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode > CPUID[4] > > Hi Zhao, > > On 8/4/23 04:48, Zhao Liu wrote: > > Hi Babu, > > > > On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote: > >> Date: Thu, 3 Aug 2023 11:41:40 -0500 > >> From: "Moger, Babu" <babu.moger@amd.com> > >> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode > >> CPUID[4] > >> > >> Hi Zhao, > >> > >> On 8/2/23 18:49, Moger, Babu wrote: > >>> Hi Zhao, > >>> > >>> Hitting this error after this patch. > >>> > >>> ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should > >>> not be reached > >>> Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code > >>> should not be reached > >>> Aborted (core dumped) > >>> > >>> Looks like share_level for all the caches for AMD is not initialized. > > > > I missed these change when I rebase. Sorry for that. > > > > BTW, could I ask a question? From a previous discussion[1], I understand > > that the cache info is used to show the correct cache information in > > new machine. And from [2], the wrong cache info may cause "compatibility > > issues". > > > > Is this "compatibility issues" AMD specific? I'm not sure if Intel should > > update the cache info like that. thanks! > > I was going to comment about that. Good that you asked me. > > Compatibility is qemu requirement. Otherwise the migrations will fail. > > Any changes in the topology is going to cause migration problems. Could you please educate me more about the details of the "migration problem"? I didn't understand why it was causing the problem and wasn't sure if I was missing any cases. Thanks, Zhao > > I am not sure how you are going to handle this. You can probably look at > the feature "x-intel-pt-auto-level". > > make sure to test the migration. > > Thanks > Babu > > > > > > [1]: https://patchwork.kernel.org/project/kvm/patch/CY4PR12MB1768A3CBE42AAFB03CB1081E95AA0@CY4PR12MB1768.namprd12.prod.outlook.com/ > > [2]: https://lore.kernel.org/qemu-devel/20180510204148.11687-1-babu.moger@amd.com/ > > > >> > >> The following patch fixes the problem. > >> > >> ====================================================== > >> > >> > >> diff --git a/target/i386/cpu.c b/target/i386/cpu.c > >> index f4c48e19fa..976a2755d8 100644 > >> --- a/target/i386/cpu.c > >> +++ b/target/i386/cpu.c > >> @@ -528,6 +528,7 @@ static CPUCacheInfo legacy_l2_cache_cpuid2 = { > >> .size = 2 * MiB, > >> .line_size = 64, > >> .associativity = 8, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > > > > This "legacy_l2_cache_cpuid2" is not used to encode cache topology. > > I should explicitly set this default topo level as CPU_TOPO_LEVEL_UNKNOW. > > > >> }; > >> > >> > >> @@ -1904,6 +1905,7 @@ static CPUCaches epyc_v4_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l1i_cache = &(CPUCacheInfo) { > >> .type = INSTRUCTION_CACHE, > >> @@ -1916,6 +1918,7 @@ static CPUCaches epyc_v4_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l2_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -1926,6 +1929,7 @@ static CPUCaches epyc_v4_cache_info = { > >> .partitions = 1, > >> .sets = 1024, > >> .lines_per_tag = 1, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l3_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -1939,6 +1943,7 @@ static CPUCaches epyc_v4_cache_info = { > >> .self_init = true, > >> .inclusive = true, > >> .complex_indexing = false, > >> + .share_level = CPU_TOPO_LEVEL_DIE, > >> }, > >> }; > >> > >> @@ -2008,6 +2013,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l1i_cache = &(CPUCacheInfo) { > >> .type = INSTRUCTION_CACHE, > >> @@ -2020,6 +2026,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l2_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -2030,6 +2037,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { > >> .partitions = 1, > >> .sets = 1024, > >> .lines_per_tag = 1, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l3_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -2043,6 +2051,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { > >> .self_init = true, > >> .inclusive = true, > >> .complex_indexing = false, > >> + .share_level = CPU_TOPO_LEVEL_DIE, > >> }, > >> }; > >> > >> @@ -2112,6 +2121,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l1i_cache = &(CPUCacheInfo) { > >> .type = INSTRUCTION_CACHE, > >> @@ -2124,6 +2134,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l2_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -2134,6 +2145,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { > >> .partitions = 1, > >> .sets = 1024, > >> .lines_per_tag = 1, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l3_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -2147,6 +2159,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { > >> .self_init = true, > >> .inclusive = true, > >> .complex_indexing = false, > >> + .share_level = CPU_TOPO_LEVEL_DIE, > >> }, > >> }; > >> > >> @@ -2162,6 +2175,7 @@ static const CPUCaches epyc_genoa_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l1i_cache = &(CPUCacheInfo) { > >> .type = INSTRUCTION_CACHE, > >> @@ -2174,6 +2188,7 @@ static const CPUCaches epyc_genoa_cache_info = { > >> .lines_per_tag = 1, > >> .self_init = 1, > >> .no_invd_sharing = true, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l2_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -2184,6 +2199,7 @@ static const CPUCaches epyc_genoa_cache_info = { > >> .partitions = 1, > >> .sets = 2048, > >> .lines_per_tag = 1, > >> + .share_level = CPU_TOPO_LEVEL_CORE, > >> }, > >> .l3_cache = &(CPUCacheInfo) { > >> .type = UNIFIED_CACHE, > >> @@ -2197,6 +2213,7 @@ static const CPUCaches epyc_genoa_cache_info = { > >> .self_init = true, > >> .inclusive = true, > >> .complex_indexing = false, > >> + .share_level = CPU_TOPO_LEVEL_DIE, > >> }, > >> }; > >> > >> > >> ========================================================================= > > > > > > Look good to me except legacy_l2_cache_cpuid2, thanks very much! > > I'll add this in next version. > > > > -Zhao > > > >> > >> Thanks > >> Babu > >>> > >>> On 8/1/23 05:35, Zhao Liu wrote: > >>>> From: Zhao Liu <zhao1.liu@intel.com> > >>>> > >>>> CPUID[4].EAX[bits 25:14] is used to represent the cache topology for > >>>> intel CPUs. > >>>> > >>>> After cache models have topology information, we can use > >>>> CPUCacheInfo.share_level to decide which topology level to be encoded > >>>> into CPUID[4].EAX[bits 25:14]. > >>>> > >>>> And since maximum_processor_id (original "num_apic_ids") is parsed > >>>> based on cpu topology levels, which are verified when parsing smp, it's > >>>> no need to check this value by "assert(num_apic_ids > 0)" again, so > >>>> remove this assert. > >>>> > >>>> Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a > >>>> helper to make the code cleaner. > >>>> > >>>> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> > >>>> --- > >>>> Changes since v1: > >>>> * Use "enum CPUTopoLevel share_level" as the parameter in > >>>> max_processor_ids_for_cache(). > >>>> * Make cache_into_passthrough case also use > >>>> max_processor_ids_for_cache() and max_core_ids_in_package() to > >>>> encode CPUID[4]. (Yanan) > >>>> * Rename the title of this patch (the original is "i386: Use > >>>> CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]"). > >>>> --- > >>>> target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------ > >>>> 1 file changed, 43 insertions(+), 27 deletions(-) > >>>> > >>>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c > >>>> index 55aba4889628..c9897c0fe91a 100644 > >>>> --- a/target/i386/cpu.c > >>>> +++ b/target/i386/cpu.c > >>>> @@ -234,22 +234,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) > >>>> ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \ > >>>> 0 /* Invalid value */) > >>>> > >>>> +static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info, > >>>> + enum CPUTopoLevel share_level) > >>>> +{ > >>>> + uint32_t num_ids = 0; > >>>> + > >>>> + switch (share_level) { > >>>> + case CPU_TOPO_LEVEL_CORE: > >>>> + num_ids = 1 << apicid_core_offset(topo_info); > >>>> + break; > >>>> + case CPU_TOPO_LEVEL_DIE: > >>>> + num_ids = 1 << apicid_die_offset(topo_info); > >>>> + break; > >>>> + case CPU_TOPO_LEVEL_PACKAGE: > >>>> + num_ids = 1 << apicid_pkg_offset(topo_info); > >>>> + break; > >>>> + default: > >>>> + /* > >>>> + * Currently there is no use case for SMT and MODULE, so use > >>>> + * assert directly to facilitate debugging. > >>>> + */ > >>>> + g_assert_not_reached(); > >>>> + } > >>>> + > >>>> + return num_ids - 1; > >>>> +} > >>>> + > >>>> +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info) > >>>> +{ > >>>> + uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) - > >>>> + apicid_core_offset(topo_info)); > >>>> + return num_cores - 1; > >>>> +} > >>>> > >>>> /* Encode cache info for CPUID[4] */ > >>>> static void encode_cache_cpuid4(CPUCacheInfo *cache, > >>>> - int num_apic_ids, int num_cores, > >>>> + X86CPUTopoInfo *topo_info, > >>>> uint32_t *eax, uint32_t *ebx, > >>>> uint32_t *ecx, uint32_t *edx) > >>>> { > >>>> assert(cache->size == cache->line_size * cache->associativity * > >>>> cache->partitions * cache->sets); > >>>> > >>>> - assert(num_apic_ids > 0); > >>>> *eax = CACHE_TYPE(cache->type) | > >>>> CACHE_LEVEL(cache->level) | > >>>> (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) | > >>>> - ((num_cores - 1) << 26) | > >>>> - ((num_apic_ids - 1) << 14); > >>>> + (max_core_ids_in_package(topo_info) << 26) | > >>>> + (max_processor_ids_for_cache(topo_info, cache->share_level) << 14); > >>>> > >>>> assert(cache->line_size > 0); > >>>> assert(cache->partitions > 0); > >>>> @@ -6116,56 +6147,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, > >>>> int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14); > >>>> > >>>> if (cores_per_pkg > 1) { > >>>> - int addressable_cores_offset = > >>>> - apicid_pkg_offset(&topo_info) - > >>>> - apicid_core_offset(&topo_info); > >>>> - > >>>> *eax &= ~0xFC000000; > >>>> - *eax |= (1 << addressable_cores_offset - 1) << 26; > >>>> + *eax |= max_core_ids_in_package(&topo_info) << 26; > >>>> } > >>>> if (host_vcpus_per_cache > cpus_per_pkg) { > >>>> - int pkg_offset = apicid_pkg_offset(&topo_info); > >>>> - > >>>> *eax &= ~0x3FFC000; > >>>> - *eax |= (1 << pkg_offset - 1) << 14; > >>>> + *eax |= > >>>> + max_processor_ids_for_cache(&topo_info, > >>>> + CPU_TOPO_LEVEL_PACKAGE) << 14; > >>>> } > >>>> } > >>>> } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { > >>>> *eax = *ebx = *ecx = *edx = 0; > >>>> } else { > >>>> *eax = 0; > >>>> - int addressable_cores_offset = apicid_pkg_offset(&topo_info) - > >>>> - apicid_core_offset(&topo_info); > >>>> - int core_offset, die_offset; > >>>> > >>>> switch (count) { > >>>> case 0: /* L1 dcache info */ > >>>> - core_offset = apicid_core_offset(&topo_info); > >>>> encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache, > >>>> - (1 << core_offset), > >>>> - (1 << addressable_cores_offset), > >>>> + &topo_info, > >>>> eax, ebx, ecx, edx); > >>>> break; > >>>> case 1: /* L1 icache info */ > >>>> - core_offset = apicid_core_offset(&topo_info); > >>>> encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache, > >>>> - (1 << core_offset), > >>>> - (1 << addressable_cores_offset), > >>>> + &topo_info, > >>>> eax, ebx, ecx, edx); > >>>> break; > >>>> case 2: /* L2 cache info */ > >>>> - core_offset = apicid_core_offset(&topo_info); > >>>> encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache, > >>>> - (1 << core_offset), > >>>> - (1 << addressable_cores_offset), > >>>> + &topo_info, > >>>> eax, ebx, ecx, edx); > >>>> break; > >>>> case 3: /* L3 cache info */ > >>>> - die_offset = apicid_die_offset(&topo_info); > >>>> if (cpu->enable_l3_cache) { > >>>> encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache, > >>>> - (1 << die_offset), > >>>> - (1 << addressable_cores_offset), > >>>> + &topo_info, > >>>> eax, ebx, ecx, edx); > >>>> break; > >>>> } > >>> > >> > >> -- > >> Thanks > >> Babu Moger > > -- > Thanks > Babu Moger
Hi Zhao, On 8/14/23 03:22, Zhao Liu wrote: > Hi Babu, > > On Fri, Aug 04, 2023 at 10:48:29AM -0500, Moger, Babu wrote: >> Date: Fri, 4 Aug 2023 10:48:29 -0500 >> From: "Moger, Babu" <babu.moger@amd.com> >> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode >> CPUID[4] >> >> Hi Zhao, >> >> On 8/4/23 04:48, Zhao Liu wrote: >>> Hi Babu, >>> >>> On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote: >>>> Date: Thu, 3 Aug 2023 11:41:40 -0500 >>>> From: "Moger, Babu" <babu.moger@amd.com> >>>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode >>>> CPUID[4] >>>> >>>> Hi Zhao, >>>> >>>> On 8/2/23 18:49, Moger, Babu wrote: >>>>> Hi Zhao, >>>>> >>>>> Hitting this error after this patch. >>>>> >>>>> ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should >>>>> not be reached >>>>> Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code >>>>> should not be reached >>>>> Aborted (core dumped) >>>>> >>>>> Looks like share_level for all the caches for AMD is not initialized. >>> >>> I missed these change when I rebase. Sorry for that. >>> >>> BTW, could I ask a question? From a previous discussion[1], I understand >>> that the cache info is used to show the correct cache information in >>> new machine. And from [2], the wrong cache info may cause "compatibility >>> issues". >>> >>> Is this "compatibility issues" AMD specific? I'm not sure if Intel should >>> update the cache info like that. thanks! >> >> I was going to comment about that. Good that you asked me. >> >> Compatibility is qemu requirement. Otherwise the migrations will fail. >> >> Any changes in the topology is going to cause migration problems. > > Could you please educate me more about the details of the "migration > problem"? > > I didn't understand why it was causing the problem and wasn't sure if I > was missing any cases. > I am not an expert on migration but I test VM migration sometimes. Here are some guidelines. https://developers.redhat.com/blog/2015/03/24/live-migrating-qemu-kvm-virtual-machines When you migrate a VM to newer qemu using the same CPU type, migration should work seamless. That means list of CPU features should be compatible when you move to newer qemu version with CPU type. Thanks Babu
Hi Babu, On Mon, Aug 14, 2023 at 11:03:53AM -0500, Moger, Babu wrote: > Date: Mon, 14 Aug 2023 11:03:53 -0500 > From: "Moger, Babu" <babu.moger@amd.com> > Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode > CPUID[4] > > Hi Zhao, > > > On 8/14/23 03:22, Zhao Liu wrote: > > Hi Babu, > > > > On Fri, Aug 04, 2023 at 10:48:29AM -0500, Moger, Babu wrote: > >> Date: Fri, 4 Aug 2023 10:48:29 -0500 > >> From: "Moger, Babu" <babu.moger@amd.com> > >> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode > >> CPUID[4] > >> > >> Hi Zhao, > >> > >> On 8/4/23 04:48, Zhao Liu wrote: > >>> Hi Babu, > >>> > >>> On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote: > >>>> Date: Thu, 3 Aug 2023 11:41:40 -0500 > >>>> From: "Moger, Babu" <babu.moger@amd.com> > >>>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode > >>>> CPUID[4] > >>>> > >>>> Hi Zhao, > >>>> > >>>> On 8/2/23 18:49, Moger, Babu wrote: > >>>>> Hi Zhao, > >>>>> > >>>>> Hitting this error after this patch. > >>>>> > >>>>> ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should > >>>>> not be reached > >>>>> Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code > >>>>> should not be reached > >>>>> Aborted (core dumped) > >>>>> > >>>>> Looks like share_level for all the caches for AMD is not initialized. > >>> > >>> I missed these change when I rebase. Sorry for that. > >>> > >>> BTW, could I ask a question? From a previous discussion[1], I understand > >>> that the cache info is used to show the correct cache information in > >>> new machine. And from [2], the wrong cache info may cause "compatibility > >>> issues". > >>> > >>> Is this "compatibility issues" AMD specific? I'm not sure if Intel should > >>> update the cache info like that. thanks! > >> > >> I was going to comment about that. Good that you asked me. > >> > >> Compatibility is qemu requirement. Otherwise the migrations will fail. > >> > >> Any changes in the topology is going to cause migration problems. > > > > Could you please educate me more about the details of the "migration > > problem"? > > > > I didn't understand why it was causing the problem and wasn't sure if I > > was missing any cases. > > > > I am not an expert on migration but I test VM migration sometimes. > Here are some guidelines. > https://developers.redhat.com/blog/2015/03/24/live-migrating-qemu-kvm-virtual-machines Thanks for the material! > > When you migrate a VM to newer qemu using the same CPU type, migration > should work seamless. That means list of CPU features should be compatible > when you move to newer qemu version with CPU type. I see. This patches set adds the "-smp cluster" command and the "x-l2-cache-topo" command. Migration requires that the target and source VM command lines are the same, so the new commands ensure that the migration is consistent. But this patch set also includes some topology fixes (nr_cores fix and l1 cache topology fix) and encoding change (use APIC ID offset to encode addressable ids), these changes would affect migration and may cause CPUID change for VM view. Thus if this patch set is accepted, these changes also need to be pushed into stable versions. Do you agree? And about cache info for different CPU generations, migration usually happens on the same CPU type, and Intel uses the same default cache info for all CPU types. With the consistent cache info, migration is also Ok. So if we don't care about the specific cache info in the VM, it's okay to use the same default cache info for all CPU types. Right? Thanks, Zhao
Hi Zhao, On 8/18/23 02:37, Zhao Liu wrote: > Hi Babu, > > On Mon, Aug 14, 2023 at 11:03:53AM -0500, Moger, Babu wrote: >> Date: Mon, 14 Aug 2023 11:03:53 -0500 >> From: "Moger, Babu" <babu.moger@amd.com> >> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode >> CPUID[4] >> >> Hi Zhao, >> >> >> On 8/14/23 03:22, Zhao Liu wrote: >>> Hi Babu, >>> >>> On Fri, Aug 04, 2023 at 10:48:29AM -0500, Moger, Babu wrote: >>>> Date: Fri, 4 Aug 2023 10:48:29 -0500 >>>> From: "Moger, Babu" <babu.moger@amd.com> >>>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode >>>> CPUID[4] >>>> >>>> Hi Zhao, >>>> >>>> On 8/4/23 04:48, Zhao Liu wrote: >>>>> Hi Babu, >>>>> >>>>> On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote: >>>>>> Date: Thu, 3 Aug 2023 11:41:40 -0500 >>>>>> From: "Moger, Babu" <babu.moger@amd.com> >>>>>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode >>>>>> CPUID[4] >>>>>> >>>>>> Hi Zhao, >>>>>> >>>>>> On 8/2/23 18:49, Moger, Babu wrote: >>>>>>> Hi Zhao, >>>>>>> >>>>>>> Hitting this error after this patch. >>>>>>> >>>>>>> ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should >>>>>>> not be reached >>>>>>> Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code >>>>>>> should not be reached >>>>>>> Aborted (core dumped) >>>>>>> >>>>>>> Looks like share_level for all the caches for AMD is not initialized. >>>>> >>>>> I missed these change when I rebase. Sorry for that. >>>>> >>>>> BTW, could I ask a question? From a previous discussion[1], I understand >>>>> that the cache info is used to show the correct cache information in >>>>> new machine. And from [2], the wrong cache info may cause "compatibility >>>>> issues". >>>>> >>>>> Is this "compatibility issues" AMD specific? I'm not sure if Intel should >>>>> update the cache info like that. thanks! >>>> >>>> I was going to comment about that. Good that you asked me. >>>> >>>> Compatibility is qemu requirement. Otherwise the migrations will fail. >>>> >>>> Any changes in the topology is going to cause migration problems. >>> >>> Could you please educate me more about the details of the "migration >>> problem"? >>> >>> I didn't understand why it was causing the problem and wasn't sure if I >>> was missing any cases. >>> >> >> I am not an expert on migration but I test VM migration sometimes. >> Here are some guidelines. >> https://developers.redhat.com/blog/2015/03/24/live-migrating-qemu-kvm-virtual-machines > > Thanks for the material! > >> >> When you migrate a VM to newer qemu using the same CPU type, migration >> should work seamless. That means list of CPU features should be compatible >> when you move to newer qemu version with CPU type. > > I see. This patches set adds the "-smp cluster" command and the > "x-l2-cache-topo" command. Migration requires that the target and Shouldn't the command x-l2-cache-topo disabled by default? (For example look at hw/i386/pc.c the property x-migrate-smi-count). It will be enabled when user passes "-cpu x-l2-cache-topo=[core|cluster]". Current code enables it by default as far I can see. > source VM command lines are the same, so the new commands ensure that > the migration is consistent. > > But this patch set also includes some topology fixes (nr_cores fix and > l1 cache topology fix) and encoding change (use APIC ID offset to encode > addressable ids), these changes would affect migration and may cause > CPUID change for VM view. Thus if this patch set is accepted, these > changes also need to be pushed into stable versions. Do you agree? Yes. That sounds right. > > And about cache info for different CPU generations, migration usually > happens on the same CPU type, and Intel uses the same default cache > info for all CPU types. With the consistent cache info, migration is > also Ok. So if we don't care about the specific cache info in the VM, > it's okay to use the same default cache info for all CPU types. Right? I am not sure about this. Please run migration tests to be sure.
Hi Babu, On Wed, Aug 23, 2023 at 12:18:30PM -0500, Moger, Babu wrote: > Date: Wed, 23 Aug 2023 12:18:30 -0500 > From: "Moger, Babu" <babu.moger@amd.com> > Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode > CPUID[4] > > Hi Zhao, > > On 8/18/23 02:37, Zhao Liu wrote: > > Hi Babu, > > > > On Mon, Aug 14, 2023 at 11:03:53AM -0500, Moger, Babu wrote: > >> Date: Mon, 14 Aug 2023 11:03:53 -0500 > >> From: "Moger, Babu" <babu.moger@amd.com> > >> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode > >> CPUID[4] > >> > >> Hi Zhao, > >> > >> > >> On 8/14/23 03:22, Zhao Liu wrote: > >>> Hi Babu, > >>> > >>> On Fri, Aug 04, 2023 at 10:48:29AM -0500, Moger, Babu wrote: > >>>> Date: Fri, 4 Aug 2023 10:48:29 -0500 > >>>> From: "Moger, Babu" <babu.moger@amd.com> > >>>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode > >>>> CPUID[4] > >>>> > >>>> Hi Zhao, > >>>> > >>>> On 8/4/23 04:48, Zhao Liu wrote: > >>>>> Hi Babu, > >>>>> > >>>>> On Thu, Aug 03, 2023 at 11:41:40AM -0500, Moger, Babu wrote: > >>>>>> Date: Thu, 3 Aug 2023 11:41:40 -0500 > >>>>>> From: "Moger, Babu" <babu.moger@amd.com> > >>>>>> Subject: Re: [PATCH v3 14/17] i386: Use CPUCacheInfo.share_level to encode > >>>>>> CPUID[4] > >>>>>> > >>>>>> Hi Zhao, > >>>>>> > >>>>>> On 8/2/23 18:49, Moger, Babu wrote: > >>>>>>> Hi Zhao, > >>>>>>> > >>>>>>> Hitting this error after this patch. > >>>>>>> > >>>>>>> ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code should > >>>>>>> not be reached > >>>>>>> Bail out! ERROR:../target/i386/cpu.c:257:max_processor_ids_for_cache: code > >>>>>>> should not be reached > >>>>>>> Aborted (core dumped) > >>>>>>> > >>>>>>> Looks like share_level for all the caches for AMD is not initialized. > >>>>> > >>>>> I missed these change when I rebase. Sorry for that. > >>>>> > >>>>> BTW, could I ask a question? From a previous discussion[1], I understand > >>>>> that the cache info is used to show the correct cache information in > >>>>> new machine. And from [2], the wrong cache info may cause "compatibility > >>>>> issues". > >>>>> > >>>>> Is this "compatibility issues" AMD specific? I'm not sure if Intel should > >>>>> update the cache info like that. thanks! > >>>> > >>>> I was going to comment about that. Good that you asked me. > >>>> > >>>> Compatibility is qemu requirement. Otherwise the migrations will fail. > >>>> > >>>> Any changes in the topology is going to cause migration problems. > >>> > >>> Could you please educate me more about the details of the "migration > >>> problem"? > >>> > >>> I didn't understand why it was causing the problem and wasn't sure if I > >>> was missing any cases. > >>> > >> > >> I am not an expert on migration but I test VM migration sometimes. > >> Here are some guidelines. > >> https://developers.redhat.com/blog/2015/03/24/live-migrating-qemu-kvm-virtual-machines > > > > Thanks for the material! > > > >> > >> When you migrate a VM to newer qemu using the same CPU type, migration > >> should work seamless. That means list of CPU features should be compatible > >> when you move to newer qemu version with CPU type. > > > > I see. This patches set adds the "-smp cluster" command and the > > "x-l2-cache-topo" command. Migration requires that the target and > > Shouldn't the command x-l2-cache-topo disabled by default? (For example > look at hw/i386/pc.c the property x-migrate-smi-count). Thanks! Since we add the default topology level in cache models, so the default l2 topology is the level hardcoded in cache model. From this point, this option won't affect the migration between different QEMU versions. If user doesn't change l2 to cluster, the default l2 topology levels are the same (core level). > > It will be enabled when user passes "-cpu x-l2-cache-topo=[core|cluster]". > Current code enables it by default as far I can see. I think the compatibility issue for x-migrate-smi-count is because it has differnt default settings for different QEMU versions. And for x-l2-cache-topo, it defaults to use the level hardcoded in cache model, this is no difference between new and old QEMUs. > > > source VM command lines are the same, so the new commands ensure that > > the migration is consistent. > > > > But this patch set also includes some topology fixes (nr_cores fix and > > l1 cache topology fix) and encoding change (use APIC ID offset to encode > > addressable ids), these changes would affect migration and may cause > > CPUID change for VM view. Thus if this patch set is accepted, these > > changes also need to be pushed into stable versions. Do you agree? > > Yes. That sounds right. > > > > > And about cache info for different CPU generations, migration usually > > happens on the same CPU type, and Intel uses the same default cache > > info for all CPU types. With the consistent cache info, migration is > > also Ok. So if we don't care about the specific cache info in the VM, > > it's okay to use the same default cache info for all CPU types. Right? > > I am not sure about this. Please run migration tests to be sure. We tested for these cases: 1. v3 <-> v3: same cli (same setting in x-l2-cache-topo) cases succeed. 2. v3 <-> master base (no v3 patches): same cli or v3 with default level (as hardcoded in cache models) cases succeed. Thanks, Zhao
diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 55aba4889628..c9897c0fe91a 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -234,22 +234,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \ 0 /* Invalid value */) +static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info, + enum CPUTopoLevel share_level) +{ + uint32_t num_ids = 0; + + switch (share_level) { + case CPU_TOPO_LEVEL_CORE: + num_ids = 1 << apicid_core_offset(topo_info); + break; + case CPU_TOPO_LEVEL_DIE: + num_ids = 1 << apicid_die_offset(topo_info); + break; + case CPU_TOPO_LEVEL_PACKAGE: + num_ids = 1 << apicid_pkg_offset(topo_info); + break; + default: + /* + * Currently there is no use case for SMT and MODULE, so use + * assert directly to facilitate debugging. + */ + g_assert_not_reached(); + } + + return num_ids - 1; +} + +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info) +{ + uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) - + apicid_core_offset(topo_info)); + return num_cores - 1; +} /* Encode cache info for CPUID[4] */ static void encode_cache_cpuid4(CPUCacheInfo *cache, - int num_apic_ids, int num_cores, + X86CPUTopoInfo *topo_info, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { assert(cache->size == cache->line_size * cache->associativity * cache->partitions * cache->sets); - assert(num_apic_ids > 0); *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) | (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) | - ((num_cores - 1) << 26) | - ((num_apic_ids - 1) << 14); + (max_core_ids_in_package(topo_info) << 26) | + (max_processor_ids_for_cache(topo_info, cache->share_level) << 14); assert(cache->line_size > 0); assert(cache->partitions > 0); @@ -6116,56 +6147,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14); if (cores_per_pkg > 1) { - int addressable_cores_offset = - apicid_pkg_offset(&topo_info) - - apicid_core_offset(&topo_info); - *eax &= ~0xFC000000; - *eax |= (1 << addressable_cores_offset - 1) << 26; + *eax |= max_core_ids_in_package(&topo_info) << 26; } if (host_vcpus_per_cache > cpus_per_pkg) { - int pkg_offset = apicid_pkg_offset(&topo_info); - *eax &= ~0x3FFC000; - *eax |= (1 << pkg_offset - 1) << 14; + *eax |= + max_processor_ids_for_cache(&topo_info, + CPU_TOPO_LEVEL_PACKAGE) << 14; } } } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { *eax = *ebx = *ecx = *edx = 0; } else { *eax = 0; - int addressable_cores_offset = apicid_pkg_offset(&topo_info) - - apicid_core_offset(&topo_info); - int core_offset, die_offset; switch (count) { case 0: /* L1 dcache info */ - core_offset = apicid_core_offset(&topo_info); encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache, - (1 << core_offset), - (1 << addressable_cores_offset), + &topo_info, eax, ebx, ecx, edx); break; case 1: /* L1 icache info */ - core_offset = apicid_core_offset(&topo_info); encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache, - (1 << core_offset), - (1 << addressable_cores_offset), + &topo_info, eax, ebx, ecx, edx); break; case 2: /* L2 cache info */ - core_offset = apicid_core_offset(&topo_info); encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache, - (1 << core_offset), - (1 << addressable_cores_offset), + &topo_info, eax, ebx, ecx, edx); break; case 3: /* L3 cache info */ - die_offset = apicid_die_offset(&topo_info); if (cpu->enable_l3_cache) { encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache, - (1 << die_offset), - (1 << addressable_cores_offset), + &topo_info, eax, ebx, ecx, edx); break; }