Message ID | 20240724075226.212882-1-manish.mishra@nutanix.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v1] target/i386: Always set leaf 0x1f | expand |
On Wed, 24 Jul 2024 07:52:26 +0000 "manish.mishra" <manish.mishra@nutanix.com> wrote: > From: Manish Mishra <manish.mishra@nutanix.com> > > QEMU does not set 0x1f in case VM does not have extended CPU topology > and expects guests to fallback to 0xb. Some versions of Windows does not > like this behavior and expects this leaf to be populated. As a result Windows > VM fails with blue screen. BSOD usually has error code displayed, it would be better to specify it here this way whomever searching for the error, can find this patch/commit > Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent > to 0xb by default and workaround windows issue.> > This change adds a > new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in > case extended CPU topology is not configured and behave as before otherwise. repeating question why we need to use extra property instead of just adding 0x1f leaf for CPU models that supposed to have it? > Steps to reproduce this issue: > This requires Windows 10 or 11 VM, with credential guard and HyperV role > enabled. Also this issue shows starting SapphireRapids which raised cpuid > level to 0x20, hence exposing 0x1f to guests. > ./qemu-system-x86_64 > -drive file=/usr/share/OVMF/OVMF_CODE_4MB.secboot.fd,if=pflash,format=raw,unit=0,readonly=on > -drive file=/usr/share/OVMF/OVMF_VARS_4MB.fd,if=pflash,format=raw > -machine pc-q35,smm=on > -global mch.extended-tseg-mbytes=80 > -accel kvm > -m 2G > -cpu SapphireRapids-v1,invtsc=on,vmx=on > -smp 2,maxcpus=240,sockets=120,dies=1,cores=2,threads=1 > -hda systest_ahv_win10_cg.qcow2 > > Signed-off-by: Manish Mishra <manish.mishra@nutanix.com> > --- > hw/i386/pc.c | 1 + > target/i386/cpu.c | 7 +++++-- > target/i386/cpu.h | 5 +++++ > target/i386/kvm/kvm.c | 4 +++- > 4 files changed, 14 insertions(+), 3 deletions(-) > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index c74931d577..4cab04e443 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -85,6 +85,7 @@ GlobalProperty pc_compat_9_0[] = { > { TYPE_X86_CPU, "guest-phys-bits", "0" }, > { "sev-guest", "legacy-vm-type", "on" }, > { TYPE_X86_CPU, "legacy-multi-node", "on" }, > + { TYPE_X86_CPU, "cpuid-0x1f-enforce", "false" }, > }; > const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0); > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > index 4688d140c2..7b71083bc9 100644 > --- a/target/i386/cpu.c > +++ b/target/i386/cpu.c > @@ -6637,7 +6637,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, > break; > case 0x1F: > /* V2 Extended Topology Enumeration Leaf */ > - if (!x86_has_extended_topo(env->avail_cpu_topo)) { > + if (!x86_has_extended_topo(env->avail_cpu_topo) && > + !cpu->enable_cpuid_0x1f_enforce) { > *eax = *ebx = *ecx = *edx = 0; > break; > } > @@ -7450,7 +7451,8 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) > * cpu->vendor_cpuid_only has been unset for compatibility with older > * machine types. > */ > - if (x86_has_extended_topo(env->avail_cpu_topo) && > + if ((x86_has_extended_topo(env->avail_cpu_topo) || > + cpu->enable_cpuid_0x1f_enforce) && > (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) { > x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F); > } > @@ -8316,6 +8318,7 @@ static Property x86_cpu_properties[] = { > DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), > DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor), > DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), > + DEFINE_PROP_BOOL("cpuid-0x1f-enforce", X86CPU, enable_cpuid_0x1f_enforce, true), > DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true), > DEFINE_PROP_BOOL("x-amd-topoext-features-only", X86CPU, amd_topoext_features_only, true), > DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > index 1e121acef5..49c5641ba8 100644 > --- a/target/i386/cpu.h > +++ b/target/i386/cpu.h > @@ -2102,6 +2102,11 @@ struct ArchCPU { > /* Compatibility bits for old machine types: */ > bool enable_cpuid_0xb; > > + /* Always return values for 0x1f leaf. In cases where extended CPU topology > + * is not configured, return values equivalent of leaf 0xb. > + */ > + bool enable_cpuid_0x1f_enforce; > + > /* Enable auto level-increase for all CPUID leaves */ > bool full_cpuid_auto_level; > > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c > index becca2efa5..a9c6f02900 100644 > --- a/target/i386/kvm/kvm.c > +++ b/target/i386/kvm/kvm.c > @@ -1799,6 +1799,7 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, > uint32_t limit, i, j; > uint32_t unused; > struct kvm_cpuid_entry2 *c; > + X86CPU *cpu = env_archcpu(env); > > cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); > > @@ -1831,7 +1832,8 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, > break; > } > case 0x1f: > - if (!x86_has_extended_topo(env->avail_cpu_topo)) { > + if (!x86_has_extended_topo(env->avail_cpu_topo) && > + !cpu->enable_cpuid_0x1f_enforce) { > cpuid_i--; > break; > }
Thanks Igor On 24/07/24 2:30 pm, Igor Mammedov wrote: > !-------------------------------------------------------------------| > CAUTION: External Email > > |-------------------------------------------------------------------! > > On Wed, 24 Jul 2024 07:52:26 +0000 > "manish.mishra"<manish.mishra@nutanix.com> wrote: > >> From: Manish Mishra<manish.mishra@nutanix.com> >> >> QEMU does not set 0x1f in case VM does not have extended CPU topology >> and expects guests to fallback to 0xb. Some versions of Windows does not >> like this behavior and expects this leaf to be populated. As a result Windows >> VM fails with blue screen. > BSOD usually has error code displayed, it would be better to specify it here > this way whomever searching for the error, can find this patch/commit Sorry for earlier response, i do not see blue screen it seems to be falling in uefi back quickly and i do not see any details here. I am attaching image. > >> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent >> to 0xb by default and workaround windows issue.> >> This change adds a >> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in >> case extended CPU topology is not configured and behave as before otherwise. > repeating question > why we need to use extra property instead of just adding 0x1f leaf for CPU models > that supposed to have it? As i mentioned in earlier response. "Windows expects it only when we have set max cpuid level greater than or equal to 0x1f. I mean if it is exposed it should not be all zeros. SapphireRapids CPU definition raised cpuid level to 0x20, so we starting seeing it with SapphireRapids." Windows does not expect 0x1f to be present for any CPU model. But if it is exposed to the guest, it expects non-zero values. > >> Steps to reproduce this issue: >> This requires Windows 10 or 11 VM, with credential guard and HyperV role >> enabled. Also this issue shows starting SapphireRapids which raised cpuid >> level to 0x20, hence exposing 0x1f to guests. >> ./qemu-system-x86_64 >> -drive file=/usr/share/OVMF/OVMF_CODE_4MB.secboot.fd,if=pflash,format=raw,unit=0,readonly=on >> -drive file=/usr/share/OVMF/OVMF_VARS_4MB.fd,if=pflash,format=raw >> -machine pc-q35,smm=on >> -global mch.extended-tseg-mbytes=80 >> -accel kvm >> -m 2G >> -cpu SapphireRapids-v1,invtsc=on,vmx=on >> -smp 2,maxcpus=240,sockets=120,dies=1,cores=2,threads=1 >> -hda systest_ahv_win10_cg.qcow2 >> >> Signed-off-by: Manish Mishra<manish.mishra@nutanix.com> >> --- >> hw/i386/pc.c | 1 + >> target/i386/cpu.c | 7 +++++-- >> target/i386/cpu.h | 5 +++++ >> target/i386/kvm/kvm.c | 4 +++- >> 4 files changed, 14 insertions(+), 3 deletions(-) >> >> diff --git a/hw/i386/pc.c b/hw/i386/pc.c >> index c74931d577..4cab04e443 100644 >> --- a/hw/i386/pc.c >> +++ b/hw/i386/pc.c >> @@ -85,6 +85,7 @@ GlobalProperty pc_compat_9_0[] = { >> { TYPE_X86_CPU, "guest-phys-bits", "0" }, >> { "sev-guest", "legacy-vm-type", "on" }, >> { TYPE_X86_CPU, "legacy-multi-node", "on" }, >> + { TYPE_X86_CPU, "cpuid-0x1f-enforce", "false" }, >> }; >> const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0); >> >> diff --git a/target/i386/cpu.c b/target/i386/cpu.c >> index 4688d140c2..7b71083bc9 100644 >> --- a/target/i386/cpu.c >> +++ b/target/i386/cpu.c >> @@ -6637,7 +6637,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, >> break; >> case 0x1F: >> /* V2 Extended Topology Enumeration Leaf */ >> - if (!x86_has_extended_topo(env->avail_cpu_topo)) { >> + if (!x86_has_extended_topo(env->avail_cpu_topo) && >> + !cpu->enable_cpuid_0x1f_enforce) { >> *eax = *ebx = *ecx = *edx = 0; >> break; >> } >> @@ -7450,7 +7451,8 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) >> * cpu->vendor_cpuid_only has been unset for compatibility with older >> * machine types. >> */ >> - if (x86_has_extended_topo(env->avail_cpu_topo) && >> + if ((x86_has_extended_topo(env->avail_cpu_topo) || >> + cpu->enable_cpuid_0x1f_enforce) && >> (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) { >> x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F); >> } >> @@ -8316,6 +8318,7 @@ static Property x86_cpu_properties[] = { >> DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), >> DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor), >> DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), >> + DEFINE_PROP_BOOL("cpuid-0x1f-enforce", X86CPU, enable_cpuid_0x1f_enforce, true), >> DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true), >> DEFINE_PROP_BOOL("x-amd-topoext-features-only", X86CPU, amd_topoext_features_only, true), >> DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), >> diff --git a/target/i386/cpu.h b/target/i386/cpu.h >> index 1e121acef5..49c5641ba8 100644 >> --- a/target/i386/cpu.h >> +++ b/target/i386/cpu.h >> @@ -2102,6 +2102,11 @@ struct ArchCPU { >> /* Compatibility bits for old machine types: */ >> bool enable_cpuid_0xb; >> >> + /* Always return values for 0x1f leaf. In cases where extended CPU topology >> + * is not configured, return values equivalent of leaf 0xb. >> + */ >> + bool enable_cpuid_0x1f_enforce; >> + >> /* Enable auto level-increase for all CPUID leaves */ >> bool full_cpuid_auto_level; >> >> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c >> index becca2efa5..a9c6f02900 100644 >> --- a/target/i386/kvm/kvm.c >> +++ b/target/i386/kvm/kvm.c >> @@ -1799,6 +1799,7 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, >> uint32_t limit, i, j; >> uint32_t unused; >> struct kvm_cpuid_entry2 *c; >> + X86CPU *cpu = env_archcpu(env); >> >> cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); >> >> @@ -1831,7 +1832,8 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, >> break; >> } >> case 0x1f: >> - if (!x86_has_extended_topo(env->avail_cpu_topo)) { >> + if (!x86_has_extended_topo(env->avail_cpu_topo) && >> + !cpu->enable_cpuid_0x1f_enforce) { >> cpuid_i--; >> break; >> } Thanks Manish Mishra
On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote: > > > Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent > > > to 0xb by default and workaround windows issue.> > > > This change adds a > > > new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in > > > case extended CPU topology is not configured and behave as before otherwise. > > repeating question > > why we need to use extra property instead of just adding 0x1f leaf for CPU models > > that supposed to have it? > > As i mentioned in earlier response. "Windows expects it only when we have > set max cpuid level greater than or equal to 0x1f. I mean if it is exposed > it should not be all zeros. SapphireRapids CPU definition raised cpuid level > to 0x20, so we starting seeing it with SapphireRapids." > > Windows does not expect 0x1f to be present for any CPU model. But if it is > exposed to the guest, it expects non-zero values. I think Igor is suggesting: - leave x86_cpu_expand_features() alone completely - change the 0x1f handling to always report topology i.e. never report all zeroes Yes, that would mean that if something requests 0x1f leaf even though the max leaf is lower, they'd get data back, but it's not clear why that'd be an issue? regards john
On 24/07/24 4:43 pm, John Levon wrote: > On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote: > >>>> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent >>>> to 0xb by default and workaround windows issue.> >>>> This change adds a >>>> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in >>>> case extended CPU topology is not configured and behave as before otherwise. >>> repeating question >>> why we need to use extra property instead of just adding 0x1f leaf for CPU models >>> that supposed to have it? >> As i mentioned in earlier response. "Windows expects it only when we have >> set max cpuid level greater than or equal to 0x1f. I mean if it is exposed >> it should not be all zeros. SapphireRapids CPU definition raised cpuid level >> to 0x20, so we starting seeing it with SapphireRapids." >> >> Windows does not expect 0x1f to be present for any CPU model. But if it is >> exposed to the guest, it expects non-zero values. > I think Igor is suggesting: > > - leave x86_cpu_expand_features() alone completely > > - change the 0x1f handling to always report topology i.e. never report all > zeroes > > Yes, that would mean that if something requests 0x1f leaf even though the max > leaf is lower, they'd get data back, but it's not clear why that'd be an issue? > > regards > john Thanks John, I believe making such changes without any user or machine-type control can be risky for live migrations? Thanks Manish Mishra
On Wed, 24 Jul 2024 12:13:28 +0100 John Levon <john.levon@nutanix.com> wrote: > On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote: > > > > > Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent > > > > to 0xb by default and workaround windows issue.> > > > > This change adds a > > > > new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in > > > > case extended CPU topology is not configured and behave as before otherwise. > > > repeating question > > > why we need to use extra property instead of just adding 0x1f leaf for CPU models > > > that supposed to have it? > > > > As i mentioned in earlier response. "Windows expects it only when we have > > set max cpuid level greater than or equal to 0x1f. I mean if it is exposed > > it should not be all zeros. SapphireRapids CPU definition raised cpuid level > > to 0x20, so we starting seeing it with SapphireRapids." > > > > Windows does not expect 0x1f to be present for any CPU model. But if it is > > exposed to the guest, it expects non-zero values. > > I think Igor is suggesting: > > - leave x86_cpu_expand_features() alone completely yep, drop that if possible > - change the 0x1f handling to always report topology i.e. never report all > zeroes Do this but only for CPU models that have this leaf per spec, to avoid live migration issues create a new version of CPU model, so it would apply only for new version. This way older versions and migration won't be affected. > > Yes, that would mean that if something requests 0x1f leaf even though the max > leaf is lower, they'd get data back, but it's not clear why that'd be an issue? > > regards > john >
On 24/07/24 6:24 pm, Igor Mammedov wrote: > !-------------------------------------------------------------------| > CAUTION: External Email > > |-------------------------------------------------------------------! > > On Wed, 24 Jul 2024 12:13:28 +0100 > John Levon <john.levon@nutanix.com> wrote: > >> On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote: >> >>>>> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent >>>>> to 0xb by default and workaround windows issue.> >>>>> This change adds a >>>>> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in >>>>> case extended CPU topology is not configured and behave as before otherwise. >>>> repeating question >>>> why we need to use extra property instead of just adding 0x1f leaf for CPU models >>>> that supposed to have it? >>> As i mentioned in earlier response. "Windows expects it only when we have >>> set max cpuid level greater than or equal to 0x1f. I mean if it is exposed >>> it should not be all zeros. SapphireRapids CPU definition raised cpuid level >>> to 0x20, so we starting seeing it with SapphireRapids." >>> >>> Windows does not expect 0x1f to be present for any CPU model. But if it is >>> exposed to the guest, it expects non-zero values. >> I think Igor is suggesting: >> >> - leave x86_cpu_expand_features() alone completely > yep, drop that if possible This was suggested by Zhao, probably could related to TDX work mentioned? As i did not see any harm, i did not mind changing it. > > >> - change the 0x1f handling to always report topology i.e. never report all >> zeroes > Do this but only for CPU models that have this leaf per spec, > to avoid live migration issues create a new version of CPU model, > so it would apply only for new version. This way older versions > and migration won't be affected. Yes, I can do that too. But i beleive it may not be a CPU model related property or bug. What if someone uses older CPU model but explicitily passes some extra flags to include 0x1f? > >> Yes, that would mean that if something requests 0x1f leaf even though the max >> leaf is lower, they'd get data back, but it's not clear why that'd be an issue? >> >> regards >> john >> Thanks Manish Mishra
Hi Igor, On Wed, Jul 24, 2024 at 02:54:32PM +0200, Igor Mammedov wrote: > Date: Wed, 24 Jul 2024 14:54:32 +0200 > From: Igor Mammedov <imammedo@redhat.com> > Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f > X-Mailer: Claws Mail 4.3.0 (GTK 3.24.42; x86_64-redhat-linux-gnu) > > On Wed, 24 Jul 2024 12:13:28 +0100 > John Levon <john.levon@nutanix.com> wrote: > > > On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote: > > > > > > > Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent > > > > > to 0xb by default and workaround windows issue.> > > > > > This change adds a > > > > > new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in > > > > > case extended CPU topology is not configured and behave as before otherwise. > > > > repeating question > > > > why we need to use extra property instead of just adding 0x1f leaf for CPU models > > > > that supposed to have it? > > > > > > As i mentioned in earlier response. "Windows expects it only when we have > > > set max cpuid level greater than or equal to 0x1f. I mean if it is exposed > > > it should not be all zeros. SapphireRapids CPU definition raised cpuid level > > > to 0x20, so we starting seeing it with SapphireRapids." > > > > > > Windows does not expect 0x1f to be present for any CPU model. But if it is > > > exposed to the guest, it expects non-zero values. > > > > I think Igor is suggesting: > > > > - leave x86_cpu_expand_features() alone completely > yep, drop that if possible > > > > - change the 0x1f handling to always report topology i.e. never report all > > zeroes > > Do this but only for CPU models that have this leaf per spec, > to avoid live migration issues create a new version of CPU model, > so it would apply only for new version. This way older versions > and migration won't be affected. So that in the future every new Intel CPU model will need to always enable 0x1f. Sounds like an endless game. So my question is: at what point is it ok to consider defaulting to always enable 0x1f and just disable it for the old CPU model? Thanks, Zhao
Hi Igor, Anything further on this? Thanks Manish Mishra On 24/07/24 8:30 pm, Zhao Liu wrote: > !-------------------------------------------------------------------| > CAUTION: External Email > > |-------------------------------------------------------------------! > > Hi Igor, > > On Wed, Jul 24, 2024 at 02:54:32PM +0200, Igor Mammedov wrote: >> Date: Wed, 24 Jul 2024 14:54:32 +0200 >> From: Igor Mammedov <imammedo@redhat.com> >> Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f >> X-Mailer: Claws Mail 4.3.0 (GTK 3.24.42; x86_64-redhat-linux-gnu) >> >> On Wed, 24 Jul 2024 12:13:28 +0100 >> John Levon <john.levon@nutanix.com> wrote: >> >>> On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote: >>> >>>>>> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent >>>>>> to 0xb by default and workaround windows issue.> >>>>>> This change adds a >>>>>> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in >>>>>> case extended CPU topology is not configured and behave as before otherwise. >>>>> repeating question >>>>> why we need to use extra property instead of just adding 0x1f leaf for CPU models >>>>> that supposed to have it? >>>> As i mentioned in earlier response. "Windows expects it only when we have >>>> set max cpuid level greater than or equal to 0x1f. I mean if it is exposed >>>> it should not be all zeros. SapphireRapids CPU definition raised cpuid level >>>> to 0x20, so we starting seeing it with SapphireRapids." >>>> >>>> Windows does not expect 0x1f to be present for any CPU model. But if it is >>>> exposed to the guest, it expects non-zero values. >>> I think Igor is suggesting: >>> >>> - leave x86_cpu_expand_features() alone completely >> yep, drop that if possible >> >> >>> - change the 0x1f handling to always report topology i.e. never report all >>> zeroes >> Do this but only for CPU models that have this leaf per spec, >> to avoid live migration issues create a new version of CPU model, >> so it would apply only for new version. This way older versions >> and migration won't be affected. > So that in the future every new Intel CPU model will need to always > enable 0x1f. Sounds like an endless game. So my question is: at what > point is it ok to consider defaulting to always enable 0x1f and just > disable it for the old CPU model? > > Thanks, > Zhao >
On Wed, 24 Jul 2024 23:00:13 +0800 Zhao Liu <zhao1.liu@intel.com> wrote: > Hi Igor, > > On Wed, Jul 24, 2024 at 02:54:32PM +0200, Igor Mammedov wrote: > > Date: Wed, 24 Jul 2024 14:54:32 +0200 > > From: Igor Mammedov <imammedo@redhat.com> > > Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f > > X-Mailer: Claws Mail 4.3.0 (GTK 3.24.42; x86_64-redhat-linux-gnu) > > > > On Wed, 24 Jul 2024 12:13:28 +0100 > > John Levon <john.levon@nutanix.com> wrote: > > > > > On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote: > > > > > > > > > Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent > > > > > > to 0xb by default and workaround windows issue.> > > > > > > This change adds a > > > > > > new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in > > > > > > case extended CPU topology is not configured and behave as before otherwise. > > > > > repeating question > > > > > why we need to use extra property instead of just adding 0x1f leaf for CPU models > > > > > that supposed to have it? > > > > > > > > As i mentioned in earlier response. "Windows expects it only when we have > > > > set max cpuid level greater than or equal to 0x1f. I mean if it is exposed > > > > it should not be all zeros. SapphireRapids CPU definition raised cpuid level > > > > to 0x20, so we starting seeing it with SapphireRapids." > > > > > > > > Windows does not expect 0x1f to be present for any CPU model. But if it is > > > > exposed to the guest, it expects non-zero values. > > > > > > I think Igor is suggesting: > > > > > > - leave x86_cpu_expand_features() alone completely > > yep, drop that if possible > > > > > > > - change the 0x1f handling to always report topology i.e. never report all > > > zeroes > > > > Do this but only for CPU models that have this leaf per spec, > > to avoid live migration issues create a new version of CPU model, > > so it would apply only for new version. This way older versions > > and migration won't be affected. > > So that in the future every new Intel CPU model will need to always > enable 0x1f. Sounds like an endless game. So my question is: at what > point is it ok to consider defaulting to always enable 0x1f and just > disable it for the old CPU model? I have suggested to enable 0x1f leaf excluding: * existing cpu models (versions) * cpu models that do not have this leaf in real world should not have it in QEMU either. If cpu model already exists, you'd need a new version of cpu model to enable new leaf by default. For completely new cpu model, it could be enabled from the start. i.e. workflow for enabling that should be the same as with CPU features (or as you said 'endless game' of copying base model and making it look like should be according to spec, but that's the process we currently use for describing CPU models). > Thanks, > Zhao >
On 29/07/24 7:18 pm, Igor Mammedov wrote: > !-------------------------------------------------------------------| > CAUTION: External Email > > |-------------------------------------------------------------------! > > On Wed, 24 Jul 2024 23:00:13 +0800 > Zhao Liu <zhao1.liu@intel.com> wrote: > >> Hi Igor, >> >> On Wed, Jul 24, 2024 at 02:54:32PM +0200, Igor Mammedov wrote: >>> Date: Wed, 24 Jul 2024 14:54:32 +0200 >>> From: Igor Mammedov <imammedo@redhat.com> >>> Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f >>> X-Mailer: Claws Mail 4.3.0 (GTK 3.24.42; x86_64-redhat-linux-gnu) >>> >>> On Wed, 24 Jul 2024 12:13:28 +0100 >>> John Levon <john.levon@nutanix.com> wrote: >>> >>>> On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote: >>>> >>>>>>> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent >>>>>>> to 0xb by default and workaround windows issue.> >>>>>>> This change adds a >>>>>>> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in >>>>>>> case extended CPU topology is not configured and behave as before otherwise. >>>>>> repeating question >>>>>> why we need to use extra property instead of just adding 0x1f leaf for CPU models >>>>>> that supposed to have it? >>>>> As i mentioned in earlier response. "Windows expects it only when we have >>>>> set max cpuid level greater than or equal to 0x1f. I mean if it is exposed >>>>> it should not be all zeros. SapphireRapids CPU definition raised cpuid level >>>>> to 0x20, so we starting seeing it with SapphireRapids." >>>>> >>>>> Windows does not expect 0x1f to be present for any CPU model. But if it is >>>>> exposed to the guest, it expects non-zero values. >>>> I think Igor is suggesting: >>>> >>>> - leave x86_cpu_expand_features() alone completely >>> yep, drop that if possible >>> >>> >>>> - change the 0x1f handling to always report topology i.e. never report all >>>> zeroes >>> Do this but only for CPU models that have this leaf per spec, >>> to avoid live migration issues create a new version of CPU model, >>> so it would apply only for new version. This way older versions >>> and migration won't be affected. >> So that in the future every new Intel CPU model will need to always >> enable 0x1f. Sounds like an endless game. So my question is: at what >> point is it ok to consider defaulting to always enable 0x1f and just >> disable it for the old CPU model? > I have suggested to enable 0x1f leaf excluding: > * existing cpu models (versions) > * cpu models that do not have this leaf in real world should > not have it in QEMU either. > > If cpu model already exists, you'd need a new version of cpu model to > enable new leaf by default. > > For completely new cpu model, it could be enabled from the start. > i.e. workflow for enabling that should be the same as with CPU features > (or as you said 'endless game' of copying base model and making it look like > should be according to spec, > but that's the process we currently use for describing CPU models). Igor my understanding was that there are two type of features one is real CPU feature, yes those makes sense in CPU models. But on other hand there are features which are emulated ones i.e. kvm-*, these make sense enabling regardless of any CPU model and we usually use machine types to enable these. Does not this features makes sense in 2nd category. > >> Thanks, >> Zhao >> Thanks Manish Mishra
On Mon, 29 Jul 2024 19:42:39 +0700 Manish <manish.mishra@nutanix.com> wrote: > On 29/07/24 7:18 pm, Igor Mammedov wrote: > > !-------------------------------------------------------------------| > > CAUTION: External Email > > > > |-------------------------------------------------------------------! > > > > On Wed, 24 Jul 2024 23:00:13 +0800 > > Zhao Liu <zhao1.liu@intel.com> wrote: > > > >> Hi Igor, > >> > >> On Wed, Jul 24, 2024 at 02:54:32PM +0200, Igor Mammedov wrote: > >>> Date: Wed, 24 Jul 2024 14:54:32 +0200 > >>> From: Igor Mammedov <imammedo@redhat.com> > >>> Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f > >>> X-Mailer: Claws Mail 4.3.0 (GTK 3.24.42; x86_64-redhat-linux-gnu) > >>> > >>> On Wed, 24 Jul 2024 12:13:28 +0100 > >>> John Levon <john.levon@nutanix.com> wrote: > >>> > >>>> On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote: > >>>> > >>>>>>> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent > >>>>>>> to 0xb by default and workaround windows issue.> > >>>>>>> This change adds a > >>>>>>> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in > >>>>>>> case extended CPU topology is not configured and behave as before otherwise. > >>>>>> repeating question > >>>>>> why we need to use extra property instead of just adding 0x1f leaf for CPU models > >>>>>> that supposed to have it? > >>>>> As i mentioned in earlier response. "Windows expects it only when we have > >>>>> set max cpuid level greater than or equal to 0x1f. I mean if it is exposed > >>>>> it should not be all zeros. SapphireRapids CPU definition raised cpuid level > >>>>> to 0x20, so we starting seeing it with SapphireRapids." > >>>>> > >>>>> Windows does not expect 0x1f to be present for any CPU model. But if it is > >>>>> exposed to the guest, it expects non-zero values. > >>>> I think Igor is suggesting: > >>>> > >>>> - leave x86_cpu_expand_features() alone completely > >>> yep, drop that if possible > >>> > >>> > >>>> - change the 0x1f handling to always report topology i.e. never report all > >>>> zeroes > >>> Do this but only for CPU models that have this leaf per spec, > >>> to avoid live migration issues create a new version of CPU model, > >>> so it would apply only for new version. This way older versions > >>> and migration won't be affected. > >> So that in the future every new Intel CPU model will need to always > >> enable 0x1f. Sounds like an endless game. So my question is: at what > >> point is it ok to consider defaulting to always enable 0x1f and just > >> disable it for the old CPU model? > > I have suggested to enable 0x1f leaf excluding: > > * existing cpu models (versions) > > * cpu models that do not have this leaf in real world should > > not have it in QEMU either. > > > > If cpu model already exists, you'd need a new version of cpu model to > > enable new leaf by default. > > > > For completely new cpu model, it could be enabled from the start. > > i.e. workflow for enabling that should be the same as with CPU features > > (or as you said 'endless game' of copying base model and making it look like > > should be according to spec, > > but that's the process we currently use for describing CPU models). > > Igor my understanding was that there are two type of features one is > real CPU feature, yes those makes sense in CPU models. But on other hand > there are features which are emulated ones i.e. kvm-*, these make sense > enabling regardless of any CPU model and we usually use machine types to > enable these. Does not this features makes sense in 2nd category. I'm not convinced that it applies to any CPU model. I'd say it's risky to expose new leaf on CPUs that don't have it to begin with. Also what about AMD cpus? Do they also support this leaf? > > > > > >> Thanks, > >> Zhao > >> > > Thanks > > Manish Mishra >
On 7/24/2024 6:29 PM, Manish wrote: > Thanks Igor > > On 24/07/24 2:30 pm, Igor Mammedov wrote: >> !-------------------------------------------------------------------| >> CAUTION: External Email >> >> |-------------------------------------------------------------------! >> >> On Wed, 24 Jul 2024 07:52:26 +0000 >> "manish.mishra"<manish.mishra@nutanix.com> wrote: >> >>> From: Manish Mishra<manish.mishra@nutanix.com> >>> >>> QEMU does not set 0x1f in case VM does not have extended CPU topology >>> and expects guests to fallback to 0xb. Some versions of Windows does not >>> like this behavior and expects this leaf to be populated. As a result >>> Windows >>> VM fails with blue screen. >> BSOD usually has error code displayed, it would be better to specify >> it here >> this way whomever searching for the error, can find this patch/commit > Sorry for earlier response, i do not see blue screen it seems to be > falling in uefi back quickly and i do not see any details here. I am > attaching image. >> >>> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent >>> to 0xb by default and workaround windows issue.> >>> This change adds a >>> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in >>> case extended CPU topology is not configured and behave as before >>> otherwise. >> repeating question >> why we need to use extra property instead of just adding 0x1f leaf for >> CPU models >> that supposed to have it? > > As i mentioned in earlier response. "Windows expects it only when we > have set max cpuid level greater than or equal to 0x1f. I mean if it is > exposed it should not be all zeros. SapphireRapids CPU definition raised > cpuid level to 0x20, so we starting seeing it with SapphireRapids." > > Windows does not expect 0x1f to be present for any CPU model. But if it > is exposed to the guest, it expects non-zero values. Please fix Windows! No guarantee from Intel that leaf 0x1f should report non-zero value when max cpuid level >= 0x1f. Please see SDM.vol2.CPUID chapter. INPUT EAX = 1FH: Returns V2 Extended Topology Information When CPUID executes with EAX set to 1FH, the processor returns information about extended topology enumeration data. Software must detect the presence of CPUID leaf 1FH by verifying (a) the highest leaf index supported by CPUID is >= 1FH, and (b) CPUID.1FH:EBX[15:0] reports a non-zero value. See Table 3-17.
On Wed, Jul 31, 2024 at 03:02:15PM +0800, Xiaoyao Li wrote: > > Windows does not expect 0x1f to be present for any CPU model. But if it > > is exposed to the guest, it expects non-zero values. > > Please fix Windows! A ticket has been filed with MSFT, we are aware this is a guest bug. But that doesn't really help anybody trying to use Windows right now. regards john
On 30/07/24 6:39 pm, Igor Mammedov wrote: > !-------------------------------------------------------------------| > CAUTION: External Email > > |-------------------------------------------------------------------! > > On Mon, 29 Jul 2024 19:42:39 +0700 > Manish <manish.mishra@nutanix.com> wrote: > >> On 29/07/24 7:18 pm, Igor Mammedov wrote: >>> !-------------------------------------------------------------------| >>> CAUTION: External Email >>> >>> |-------------------------------------------------------------------! >>> >>> On Wed, 24 Jul 2024 23:00:13 +0800 >>> Zhao Liu <zhao1.liu@intel.com> wrote: >>> >>>> Hi Igor, >>>> >>>> On Wed, Jul 24, 2024 at 02:54:32PM +0200, Igor Mammedov wrote: >>>>> Date: Wed, 24 Jul 2024 14:54:32 +0200 >>>>> From: Igor Mammedov <imammedo@redhat.com> >>>>> Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f >>>>> X-Mailer: Claws Mail 4.3.0 (GTK 3.24.42; x86_64-redhat-linux-gnu) >>>>> >>>>> On Wed, 24 Jul 2024 12:13:28 +0100 >>>>> John Levon <john.levon@nutanix.com> wrote: >>>>> >>>>>> On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote: >>>>>> >>>>>>>>> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent >>>>>>>>> to 0xb by default and workaround windows issue.> >>>>>>>>> This change adds a >>>>>>>>> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in >>>>>>>>> case extended CPU topology is not configured and behave as before otherwise. >>>>>>>> repeating question >>>>>>>> why we need to use extra property instead of just adding 0x1f leaf for CPU models >>>>>>>> that supposed to have it? >>>>>>> As i mentioned in earlier response. "Windows expects it only when we have >>>>>>> set max cpuid level greater than or equal to 0x1f. I mean if it is exposed >>>>>>> it should not be all zeros. SapphireRapids CPU definition raised cpuid level >>>>>>> to 0x20, so we starting seeing it with SapphireRapids." >>>>>>> >>>>>>> Windows does not expect 0x1f to be present for any CPU model. But if it is >>>>>>> exposed to the guest, it expects non-zero values. >>>>>> I think Igor is suggesting: >>>>>> >>>>>> - leave x86_cpu_expand_features() alone completely >>>>> yep, drop that if possible >>>>> >>>>> >>>>>> - change the 0x1f handling to always report topology i.e. never report all >>>>>> zeroes >>>>> Do this but only for CPU models that have this leaf per spec, >>>>> to avoid live migration issues create a new version of CPU model, >>>>> so it would apply only for new version. This way older versions >>>>> and migration won't be affected. >>>> So that in the future every new Intel CPU model will need to always >>>> enable 0x1f. Sounds like an endless game. So my question is: at what >>>> point is it ok to consider defaulting to always enable 0x1f and just >>>> disable it for the old CPU model? >>> I have suggested to enable 0x1f leaf excluding: >>> * existing cpu models (versions) >>> * cpu models that do not have this leaf in real world should >>> not have it in QEMU either. >>> >>> If cpu model already exists, you'd need a new version of cpu model to >>> enable new leaf by default. >>> >>> For completely new cpu model, it could be enabled from the start. >>> i.e. workflow for enabling that should be the same as with CPU features >>> (or as you said 'endless game' of copying base model and making it look like >>> should be according to spec, >>> but that's the process we currently use for describing CPU models). >> Igor my understanding was that there are two type of features one is >> real CPU feature, yes those makes sense in CPU models. But on other hand >> there are features which are emulated ones i.e. kvm-*, these make sense >> enabling regardless of any CPU model and we usually use machine types to >> enable these. Does not this features makes sense in 2nd category. > I'm not convinced that it applies to any CPU model. > I'd say it's risky to expose new leaf on CPUs that don't have it to begin with. > Also what about AMD cpus? Do they also support this leaf? Hi Igor, No AMD has different leaf for this purpose, i think 8000_0026. But should not AMD platforms or guest ignore it in that case. This flag was anyway getting populated earlier too for nr_dies > 1? > >> >>> >>>> Thanks, >>>> Zhao >>>> >> Thanks >> >> Manish Mishra >> Thanks Manish
On 7/31/2024 4:49 PM, John Levon wrote: > On Wed, Jul 31, 2024 at 03:02:15PM +0800, Xiaoyao Li wrote: > >>> Windows does not expect 0x1f to be present for any CPU model. But if it >>> is exposed to the guest, it expects non-zero values. >> >> Please fix Windows! > > A ticket has been filed with MSFT, we are aware this is a guest bug. > > But that doesn't really help anybody trying to use Windows right now. For existing buggy Windows, we can still introduce "cpuid-0x1f-enforce" but not make it default on. People want to boot the buggy Windows needs to opt-in it themselves via "-cpu xxx,cpuid-0x1f-enforce=on". This way, we don't have live migration issue and it doesn't affect anything. > regards > john
On 31/07/24 9:01 pm, Xiaoyao Li wrote: > !-------------------------------------------------------------------| > CAUTION: External Email > > |-------------------------------------------------------------------! > > On 7/31/2024 4:49 PM, John Levon wrote: >> On Wed, Jul 31, 2024 at 03:02:15PM +0800, Xiaoyao Li wrote: >> >>>> Windows does not expect 0x1f to be present for any CPU model. But >>>> if it >>>> is exposed to the guest, it expects non-zero values. >>> >>> Please fix Windows! >> >> A ticket has been filed with MSFT, we are aware this is a guest bug. >> >> But that doesn't really help anybody trying to use Windows right now. > > For existing buggy Windows, we can still introduce > "cpuid-0x1f-enforce" but not make it default on. > > People want to boot the buggy Windows needs to opt-in it themselves > via "-cpu xxx,cpuid-0x1f-enforce=on". This way, we don't have live > migration issue and it doesn't affect anything. Yes, that makes sense, I will send a updated patch by tomorrow if no one has any objection with this. > >> regards >> john Thanks Manish Mishra
On Thu, 1 Aug 2024 15:36:10 +0530 Manish <manish.mishra@nutanix.com> wrote: > On 31/07/24 9:01 pm, Xiaoyao Li wrote: > > !-------------------------------------------------------------------| > > CAUTION: External Email > > > > |-------------------------------------------------------------------! > > > > On 7/31/2024 4:49 PM, John Levon wrote: > >> On Wed, Jul 31, 2024 at 03:02:15PM +0800, Xiaoyao Li wrote: > >> > >>>> Windows does not expect 0x1f to be present for any CPU model. But > >>>> if it > >>>> is exposed to the guest, it expects non-zero values. > >>> > >>> Please fix Windows! > >> > >> A ticket has been filed with MSFT, we are aware this is a guest bug. > >> > >> But that doesn't really help anybody trying to use Windows right now. > > > > For existing buggy Windows, we can still introduce > > "cpuid-0x1f-enforce" but not make it default on. > > > > People want to boot the buggy Windows needs to opt-in it themselves > > via "-cpu xxx,cpuid-0x1f-enforce=on". This way, we don't have live > > migration issue and it doesn't affect anything. > > > Yes, that makes sense, I will send a updated patch by tomorrow if no one > has any objection with this. I'd rename it to x-have-cpuid-0x1f-leaf (x-) to reflect that it's not stable/maintained and subject to be dropped in future Also please clearly spell out that it's a temporary workaround for ... in commit message. > > > > >> regards > >> john > > > Thanks > > Manish Mishra >
On 8/1/2024 6:25 PM, Igor Mammedov wrote: > On Thu, 1 Aug 2024 15:36:10 +0530 > Manish <manish.mishra@nutanix.com> wrote: > >> On 31/07/24 9:01 pm, Xiaoyao Li wrote: >>> !-------------------------------------------------------------------| >>> CAUTION: External Email >>> >>> |-------------------------------------------------------------------! >>> >>> On 7/31/2024 4:49 PM, John Levon wrote: >>>> On Wed, Jul 31, 2024 at 03:02:15PM +0800, Xiaoyao Li wrote: >>>> >>>>>> Windows does not expect 0x1f to be present for any CPU model. But >>>>>> if it >>>>>> is exposed to the guest, it expects non-zero values. >>>>> >>>>> Please fix Windows! >>>> >>>> A ticket has been filed with MSFT, we are aware this is a guest bug. >>>> >>>> But that doesn't really help anybody trying to use Windows right now. >>> >>> For existing buggy Windows, we can still introduce >>> "cpuid-0x1f-enforce" but not make it default on. >>> >>> People want to boot the buggy Windows needs to opt-in it themselves >>> via "-cpu xxx,cpuid-0x1f-enforce=on". This way, we don't have live >>> migration issue and it doesn't affect anything. >> >> >> Yes, that makes sense, I will send a updated patch by tomorrow if no one >> has any objection with this. > > I'd rename it to > x-have-cpuid-0x1f-leaf > (x-) to reflect that it's not stable/maintained and subject > to be dropped in future > > Also please clearly spell out that it's a temporary workaround for ... > in commit message. I have a patch at hand, to introduce enable_cpuid_0x1f similar as enable_cpuid_0xb, for TDX: https://github.com/intel-staging/qemu-tdx/commit/de08fd30926bc9d7997af6bd12cfff1b998da8b7 It is not a temporary solution. So I would suggest to drop (x-). If no objection, I think Manish can start from my patch and it only misses a property definition for windows case: DEFINE_PROP_BOOL("cpuid-0x1f", X86CPU, enable_cpuid_0x1f, false); > >> >>> >>>> regards >>>> john >> >> >> Thanks >> >> Manish Mishra >> >
On 01/08/24 8:41 pm, Xiaoyao Li wrote: > !-------------------------------------------------------------------| > CAUTION: External Email > > |-------------------------------------------------------------------! > > On 8/1/2024 6:25 PM, Igor Mammedov wrote: >> On Thu, 1 Aug 2024 15:36:10 +0530 >> Manish <manish.mishra@nutanix.com> wrote: >> >>> On 31/07/24 9:01 pm, Xiaoyao Li wrote: >>>> !-------------------------------------------------------------------| >>>> CAUTION: External Email >>>> >>>> |-------------------------------------------------------------------! >>>> >>>> On 7/31/2024 4:49 PM, John Levon wrote: >>>>> On Wed, Jul 31, 2024 at 03:02:15PM +0800, Xiaoyao Li wrote: >>>>>>> Windows does not expect 0x1f to be present for any CPU model. But >>>>>>> if it >>>>>>> is exposed to the guest, it expects non-zero values. >>>>>> >>>>>> Please fix Windows! >>>>> >>>>> A ticket has been filed with MSFT, we are aware this is a guest bug. >>>>> >>>>> But that doesn't really help anybody trying to use Windows right now. >>>> >>>> For existing buggy Windows, we can still introduce >>>> "cpuid-0x1f-enforce" but not make it default on. >>>> >>>> People want to boot the buggy Windows needs to opt-in it themselves >>>> via "-cpu xxx,cpuid-0x1f-enforce=on". This way, we don't have live >>>> migration issue and it doesn't affect anything. >>> >>> >>> Yes, that makes sense, I will send a updated patch by tomorrow if no >>> one >>> has any objection with this. >> >> I'd rename it to >> x-have-cpuid-0x1f-leaf >> (x-) to reflect that it's not stable/maintained and subject >> to be dropped in future >> >> Also please clearly spell out that it's a temporary workaround for ... >> in commit message. > > I have a patch at hand, to introduce enable_cpuid_0x1f similar as > enable_cpuid_0xb, for TDX: > > https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_intel-2Dstaging_qemu-2Dtdx_commit_de08fd30926bc9d7997af6bd12cfff1b998da8b7&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=c4KON2DiMd-szjwjggQcuUvTsPWblztAL0gVzaHnNmc&m=CIrxjRd0KG4ww4BtSxZysWS0tFYfPGTRBG731EmlUcy7BFlAw3-5PLp2SlKPR83m&s=2gKDZXpqB7wE8v0vtN8l65WBqTtXOUJ-FkMblXcT_Ws&e= > > It is not a temporary solution. So I would suggest to drop (x-). > If no objection, I think Manish can start from my patch and it only > misses a property definition for windows case: > > DEFINE_PROP_BOOL("cpuid-0x1f", X86CPU, enable_cpuid_0x1f, false); > > Thanks Xiaoyao, I see your patch does what we require but i am not able to track these patches, are these in line to be merged soon? We need this urgently. Also as it is just a single line change on top of your changes, how i manage my change? Should i wait for you to merge and then send patch or you will be fine to directly include it in your series? > >> >>> >>>>> regards >>>>> john >>> >>> >>> Thanks >>> >>> Manish Mishra >>> >> > Thanks Manish Mishra
On Wed, Jul 31, 2024 at 07:30:44PM +0530, Manish wrote: > Date: Wed, 31 Jul 2024 19:30:44 +0530 > From: Manish <manish.mishra@nutanix.com> > Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f > > > On 30/07/24 6:39 pm, Igor Mammedov wrote: > > !-------------------------------------------------------------------| > > CAUTION: External Email > > > > |-------------------------------------------------------------------! > > > > On Mon, 29 Jul 2024 19:42:39 +0700 > > Manish <manish.mishra@nutanix.com> wrote: > > > > > On 29/07/24 7:18 pm, Igor Mammedov wrote: > > > > !-------------------------------------------------------------------| > > > > CAUTION: External Email > > > > > > > > |-------------------------------------------------------------------! > > > > > > > > On Wed, 24 Jul 2024 23:00:13 +0800 > > > > Zhao Liu <zhao1.liu@intel.com> wrote: > > > > > Hi Igor, > > > > > > > > > > On Wed, Jul 24, 2024 at 02:54:32PM +0200, Igor Mammedov wrote: > > > > > > Date: Wed, 24 Jul 2024 14:54:32 +0200 > > > > > > From: Igor Mammedov <imammedo@redhat.com> > > > > > > Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f > > > > > > X-Mailer: Claws Mail 4.3.0 (GTK 3.24.42; x86_64-redhat-linux-gnu) > > > > > > > > > > > > On Wed, 24 Jul 2024 12:13:28 +0100 > > > > > > John Levon <john.levon@nutanix.com> wrote: > > > > > > > On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote: > > > > > > > > > > Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent > > > > > > > > > > to 0xb by default and workaround windows issue.> > > > > > > > > > > This change adds a > > > > > > > > > > new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in > > > > > > > > > > case extended CPU topology is not configured and behave as before otherwise. > > > > > > > > > repeating question > > > > > > > > > why we need to use extra property instead of just adding 0x1f leaf for CPU models > > > > > > > > > that supposed to have it? > > > > > > > > As i mentioned in earlier response. "Windows expects it only when we have > > > > > > > > set max cpuid level greater than or equal to 0x1f. I mean if it is exposed > > > > > > > > it should not be all zeros. SapphireRapids CPU definition raised cpuid level > > > > > > > > to 0x20, so we starting seeing it with SapphireRapids." > > > > > > > > > > > > > > > > Windows does not expect 0x1f to be present for any CPU model. But if it is > > > > > > > > exposed to the guest, it expects non-zero values. > > > > > > > I think Igor is suggesting: > > > > > > > > > > > > > > - leave x86_cpu_expand_features() alone completely > > > > > > yep, drop that if possible > > > > > > > > > > > > > - change the 0x1f handling to always report topology i.e. never report all > > > > > > > zeroes > > > > > > Do this but only for CPU models that have this leaf per spec, > > > > > > to avoid live migration issues create a new version of CPU model, > > > > > > so it would apply only for new version. This way older versions > > > > > > and migration won't be affected. > > > > > So that in the future every new Intel CPU model will need to always > > > > > enable 0x1f. Sounds like an endless game. So my question is: at what > > > > > point is it ok to consider defaulting to always enable 0x1f and just > > > > > disable it for the old CPU model? > > > > I have suggested to enable 0x1f leaf excluding: > > > > * existing cpu models (versions) > > > > * cpu models that do not have this leaf in real world should > > > > not have it in QEMU either. > > > > > > > > If cpu model already exists, you'd need a new version of cpu model to > > > > enable new leaf by default. > > > > > > > > For completely new cpu model, it could be enabled from the start. > > > > i.e. workflow for enabling that should be the same as with CPU features > > > > (or as you said 'endless game' of copying base model and making it look like > > > > should be according to spec, > > > > but that's the process we currently use for describing CPU models). > > > Igor my understanding was that there are two type of features one is > > > real CPU feature, yes those makes sense in CPU models. But on other hand > > > there are features which are emulated ones i.e. kvm-*, these make sense > > > enabling regardless of any CPU model and we usually use machine types to > > > enable these. Does not this features makes sense in 2nd category. > > I'm not convinced that it applies to any CPU model. > > I'd say it's risky to expose new leaf on CPUs that don't have it to begin with. > > Also what about AMD cpus? Do they also support this leaf? > > Hi Igor, No AMD has different leaf for this purpose, i think 8000_0026. But > should not AMD platforms or guest ignore it in that case. This flag was > anyway getting populated earlier too for nr_dies > 1? > AMD hasn't supported that leaf in QEMU (cpu_x86_cpuid()). Igor's concern is right, as there is no way to ensure how the software will behave (just like Windows doesn't respect the x86 spec). It is least risky to consider this property as the 1st category, because the particular CPU model supports this leaf, and software that runs properly on the corresponding bare metal certainly won't complain about 0x1f or even prefer to have 0x1f in Guest (also as in your Windows example). Disable "cpuid-0x1f-enforce" by default is also fine for me. If you find it's not easy to add 0x1f for the CPU models, I can add 0x1f into CPU models in the future with my other cache model work, to help you fill in the last piece that Igor suggested. Regards, Zhao
On 8/2/2024 12:46 AM, Manish wrote: > > On 01/08/24 8:41 pm, Xiaoyao Li wrote: >> !-------------------------------------------------------------------| >> CAUTION: External Email >> >> |-------------------------------------------------------------------! >> >> On 8/1/2024 6:25 PM, Igor Mammedov wrote: >>> On Thu, 1 Aug 2024 15:36:10 +0530 >>> Manish <manish.mishra@nutanix.com> wrote: >>> >>>> On 31/07/24 9:01 pm, Xiaoyao Li wrote: >>>>> !-------------------------------------------------------------------| >>>>> CAUTION: External Email >>>>> >>>>> |-------------------------------------------------------------------! >>>>> >>>>> On 7/31/2024 4:49 PM, John Levon wrote: >>>>>> On Wed, Jul 31, 2024 at 03:02:15PM +0800, Xiaoyao Li wrote: >>>>>>>> Windows does not expect 0x1f to be present for any CPU model. But >>>>>>>> if it >>>>>>>> is exposed to the guest, it expects non-zero values. >>>>>>> >>>>>>> Please fix Windows! >>>>>> >>>>>> A ticket has been filed with MSFT, we are aware this is a guest bug. >>>>>> >>>>>> But that doesn't really help anybody trying to use Windows right now. >>>>> >>>>> For existing buggy Windows, we can still introduce >>>>> "cpuid-0x1f-enforce" but not make it default on. >>>>> >>>>> People want to boot the buggy Windows needs to opt-in it themselves >>>>> via "-cpu xxx,cpuid-0x1f-enforce=on". This way, we don't have live >>>>> migration issue and it doesn't affect anything. >>>> >>>> >>>> Yes, that makes sense, I will send a updated patch by tomorrow if no >>>> one >>>> has any objection with this. >>> >>> I'd rename it to >>> x-have-cpuid-0x1f-leaf >>> (x-) to reflect that it's not stable/maintained and subject >>> to be dropped in future >>> >>> Also please clearly spell out that it's a temporary workaround for ... >>> in commit message. >> >> I have a patch at hand, to introduce enable_cpuid_0x1f similar as >> enable_cpuid_0xb, for TDX: >> >> https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_intel-2Dstaging_qemu-2Dtdx_commit_de08fd30926bc9d7997af6bd12cfff1b998da8b7&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=c4KON2DiMd-szjwjggQcuUvTsPWblztAL0gVzaHnNmc&m=CIrxjRd0KG4ww4BtSxZysWS0tFYfPGTRBG731EmlUcy7BFlAw3-5PLp2SlKPR83m&s=2gKDZXpqB7wE8v0vtN8l65WBqTtXOUJ-FkMblXcT_Ws&e= >> It is not a temporary solution. So I would suggest to drop (x-). >> If no objection, I think Manish can start from my patch and it only >> misses a property definition for windows case: >> >> DEFINE_PROP_BOOL("cpuid-0x1f", X86CPU, enable_cpuid_0x1f, false); >> >> > Thanks Xiaoyao, I see your patch does what we require but i am not able > to track these patches, are these in line to be merged soon? We need > this urgently. Also as it is just a single line change on top of your > changes, how i manage my change? Should i wait for you to merge and then > send patch or you will be fine to directly include it in your series? Sent it: https://lore.kernel.org/qemu-devel/20240802072426.4016194-1-xiaoyao.li@intel.com/ We can continue the discussion from there. > >> >>> >>>> >>>>>> regards >>>>>> john >>>> >>>> >>>> Thanks >>>> >>>> Manish Mishra >>>> >>> >> > > Thanks > > Manish Mishra >
diff --git a/hw/i386/pc.c b/hw/i386/pc.c index c74931d577..4cab04e443 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -85,6 +85,7 @@ GlobalProperty pc_compat_9_0[] = { { TYPE_X86_CPU, "guest-phys-bits", "0" }, { "sev-guest", "legacy-vm-type", "on" }, { TYPE_X86_CPU, "legacy-multi-node", "on" }, + { TYPE_X86_CPU, "cpuid-0x1f-enforce", "false" }, }; const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0); diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 4688d140c2..7b71083bc9 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -6637,7 +6637,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x1F: /* V2 Extended Topology Enumeration Leaf */ - if (!x86_has_extended_topo(env->avail_cpu_topo)) { + if (!x86_has_extended_topo(env->avail_cpu_topo) && + !cpu->enable_cpuid_0x1f_enforce) { *eax = *ebx = *ecx = *edx = 0; break; } @@ -7450,7 +7451,8 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) * cpu->vendor_cpuid_only has been unset for compatibility with older * machine types. */ - if (x86_has_extended_topo(env->avail_cpu_topo) && + if ((x86_has_extended_topo(env->avail_cpu_topo) || + cpu->enable_cpuid_0x1f_enforce) && (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) { x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F); } @@ -8316,6 +8318,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor), DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), + DEFINE_PROP_BOOL("cpuid-0x1f-enforce", X86CPU, enable_cpuid_0x1f_enforce, true), DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true), DEFINE_PROP_BOOL("x-amd-topoext-features-only", X86CPU, amd_topoext_features_only, true), DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 1e121acef5..49c5641ba8 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2102,6 +2102,11 @@ struct ArchCPU { /* Compatibility bits for old machine types: */ bool enable_cpuid_0xb; + /* Always return values for 0x1f leaf. In cases where extended CPU topology + * is not configured, return values equivalent of leaf 0xb. + */ + bool enable_cpuid_0x1f_enforce; + /* Enable auto level-increase for all CPUID leaves */ bool full_cpuid_auto_level; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index becca2efa5..a9c6f02900 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -1799,6 +1799,7 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, uint32_t limit, i, j; uint32_t unused; struct kvm_cpuid_entry2 *c; + X86CPU *cpu = env_archcpu(env); cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); @@ -1831,7 +1832,8 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, break; } case 0x1f: - if (!x86_has_extended_topo(env->avail_cpu_topo)) { + if (!x86_has_extended_topo(env->avail_cpu_topo) && + !cpu->enable_cpuid_0x1f_enforce) { cpuid_i--; break; }