diff mbox series

[v1] target/i386: Always set leaf 0x1f

Message ID 20240724075226.212882-1-manish.mishra@nutanix.com (mailing list archive)
State New, archived
Headers show
Series [v1] target/i386: Always set leaf 0x1f | expand

Commit Message

Manish July 24, 2024, 7:52 a.m. UTC
From: Manish Mishra <manish.mishra@nutanix.com>

QEMU does not set 0x1f in case VM does not have extended CPU topology
and expects guests to fallback to 0xb. Some versions of Windows does not
like this behavior and expects this leaf to be populated. As a result Windows
VM fails with blue screen.

Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent
to 0xb by default and workaround windows issue. This change adds a
new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in
case extended CPU topology is not configured and behave as before otherwise.

Steps to reproduce this issue:
This requires Windows 10 or 11 VM, with credential guard and HyperV role
enabled. Also this issue shows starting SapphireRapids which raised cpuid
level to 0x20, hence exposing 0x1f to guests.
./qemu-system-x86_64
-drive file=/usr/share/OVMF/OVMF_CODE_4MB.secboot.fd,if=pflash,format=raw,unit=0,readonly=on
-drive file=/usr/share/OVMF/OVMF_VARS_4MB.fd,if=pflash,format=raw
-machine pc-q35,smm=on
-global mch.extended-tseg-mbytes=80
-accel kvm
-m 2G
-cpu SapphireRapids-v1,invtsc=on,vmx=on
-smp 2,maxcpus=240,sockets=120,dies=1,cores=2,threads=1
-hda systest_ahv_win10_cg.qcow2

Signed-off-by: Manish Mishra <manish.mishra@nutanix.com>
---
 hw/i386/pc.c          | 1 +
 target/i386/cpu.c     | 7 +++++--
 target/i386/cpu.h     | 5 +++++
 target/i386/kvm/kvm.c | 4 +++-
 4 files changed, 14 insertions(+), 3 deletions(-)

Comments

Igor Mammedov July 24, 2024, 9 a.m. UTC | #1
On Wed, 24 Jul 2024 07:52:26 +0000
"manish.mishra" <manish.mishra@nutanix.com> wrote:

> From: Manish Mishra <manish.mishra@nutanix.com>
> 
> QEMU does not set 0x1f in case VM does not have extended CPU topology
> and expects guests to fallback to 0xb. Some versions of Windows does not
> like this behavior and expects this leaf to be populated. As a result Windows
> VM fails with blue screen.

BSOD usually has error code displayed, it would be better to specify it here
this way whomever searching for the error, can find this patch/commit

> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent
> to 0xb by default and workaround windows issue.>

> This change adds a
> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in
> case extended CPU topology is not configured and behave as before otherwise.

repeating question
why we need to use extra property instead of just adding 0x1f leaf for CPU models
that supposed to have it?

> Steps to reproduce this issue:
> This requires Windows 10 or 11 VM, with credential guard and HyperV role
> enabled. Also this issue shows starting SapphireRapids which raised cpuid
> level to 0x20, hence exposing 0x1f to guests.
> ./qemu-system-x86_64
> -drive file=/usr/share/OVMF/OVMF_CODE_4MB.secboot.fd,if=pflash,format=raw,unit=0,readonly=on
> -drive file=/usr/share/OVMF/OVMF_VARS_4MB.fd,if=pflash,format=raw
> -machine pc-q35,smm=on
> -global mch.extended-tseg-mbytes=80
> -accel kvm
> -m 2G
> -cpu SapphireRapids-v1,invtsc=on,vmx=on
> -smp 2,maxcpus=240,sockets=120,dies=1,cores=2,threads=1
> -hda systest_ahv_win10_cg.qcow2
> 
> Signed-off-by: Manish Mishra <manish.mishra@nutanix.com>
> ---
>  hw/i386/pc.c          | 1 +
>  target/i386/cpu.c     | 7 +++++--
>  target/i386/cpu.h     | 5 +++++
>  target/i386/kvm/kvm.c | 4 +++-
>  4 files changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index c74931d577..4cab04e443 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -85,6 +85,7 @@ GlobalProperty pc_compat_9_0[] = {
>      { TYPE_X86_CPU, "guest-phys-bits", "0" },
>      { "sev-guest", "legacy-vm-type", "on" },
>      { TYPE_X86_CPU, "legacy-multi-node", "on" },
> +    { TYPE_X86_CPU, "cpuid-0x1f-enforce", "false" },
>  };
>  const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0);
>  
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 4688d140c2..7b71083bc9 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -6637,7 +6637,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>          break;
>      case 0x1F:
>          /* V2 Extended Topology Enumeration Leaf */
> -        if (!x86_has_extended_topo(env->avail_cpu_topo)) {
> +        if (!x86_has_extended_topo(env->avail_cpu_topo) &&
> +            !cpu->enable_cpuid_0x1f_enforce) {
>              *eax = *ebx = *ecx = *edx = 0;
>              break;
>          }
> @@ -7450,7 +7451,8 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
>           * cpu->vendor_cpuid_only has been unset for compatibility with older
>           * machine types.
>           */
> -        if (x86_has_extended_topo(env->avail_cpu_topo) &&
> +        if ((x86_has_extended_topo(env->avail_cpu_topo) ||
> +             cpu->enable_cpuid_0x1f_enforce) &&
>              (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) {
>              x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F);
>          }
> @@ -8316,6 +8318,7 @@ static Property x86_cpu_properties[] = {
>      DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true),
>      DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor),
>      DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true),
> +    DEFINE_PROP_BOOL("cpuid-0x1f-enforce", X86CPU, enable_cpuid_0x1f_enforce, true),
>      DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true),
>      DEFINE_PROP_BOOL("x-amd-topoext-features-only", X86CPU, amd_topoext_features_only, true),
>      DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false),
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 1e121acef5..49c5641ba8 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -2102,6 +2102,11 @@ struct ArchCPU {
>      /* Compatibility bits for old machine types: */
>      bool enable_cpuid_0xb;
>  
> +    /* Always return values for 0x1f leaf. In cases where extended CPU topology
> +     * is not configured, return values equivalent of leaf 0xb.
> +     */
> +    bool enable_cpuid_0x1f_enforce;
> +
>      /* Enable auto level-increase for all CPUID leaves */
>      bool full_cpuid_auto_level;
>  
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index becca2efa5..a9c6f02900 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -1799,6 +1799,7 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
>      uint32_t limit, i, j;
>      uint32_t unused;
>      struct kvm_cpuid_entry2 *c;
> +    X86CPU *cpu = env_archcpu(env);
>  
>      cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
>  
> @@ -1831,7 +1832,8 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
>              break;
>          }
>          case 0x1f:
> -            if (!x86_has_extended_topo(env->avail_cpu_topo)) {
> +            if (!x86_has_extended_topo(env->avail_cpu_topo) &&
> +                !cpu->enable_cpuid_0x1f_enforce) {
>                  cpuid_i--;
>                  break;
>              }
Manish July 24, 2024, 10:29 a.m. UTC | #2
Thanks Igor

On 24/07/24 2:30 pm, Igor Mammedov wrote:
> !-------------------------------------------------------------------|
>    CAUTION: External Email
>
> |-------------------------------------------------------------------!
>
> On Wed, 24 Jul 2024 07:52:26 +0000
> "manish.mishra"<manish.mishra@nutanix.com>  wrote:
>
>> From: Manish Mishra<manish.mishra@nutanix.com>
>>
>> QEMU does not set 0x1f in case VM does not have extended CPU topology
>> and expects guests to fallback to 0xb. Some versions of Windows does not
>> like this behavior and expects this leaf to be populated. As a result Windows
>> VM fails with blue screen.
> BSOD usually has error code displayed, it would be better to specify it here
> this way whomever searching for the error, can find this patch/commit
Sorry for earlier response, i do not see blue screen it seems to be 
falling in uefi back quickly and i do not see any details here. I am 
attaching image.
>
>> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent
>> to 0xb by default and workaround windows issue.>
>> This change adds a
>> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in
>> case extended CPU topology is not configured and behave as before otherwise.
> repeating question
> why we need to use extra property instead of just adding 0x1f leaf for CPU models
> that supposed to have it?

As i mentioned in earlier response. "Windows expects it only when we 
have set max cpuid level greater than or equal to 0x1f. I mean if it is 
exposed it should not be all zeros. SapphireRapids CPU definition raised 
cpuid level to 0x20, so we starting seeing it with SapphireRapids."

Windows does not expect 0x1f to be present for any CPU model. But if it 
is exposed to the guest, it expects non-zero values.


>
>> Steps to reproduce this issue:
>> This requires Windows 10 or 11 VM, with credential guard and HyperV role
>> enabled. Also this issue shows starting SapphireRapids which raised cpuid
>> level to 0x20, hence exposing 0x1f to guests.
>> ./qemu-system-x86_64
>> -drive file=/usr/share/OVMF/OVMF_CODE_4MB.secboot.fd,if=pflash,format=raw,unit=0,readonly=on
>> -drive file=/usr/share/OVMF/OVMF_VARS_4MB.fd,if=pflash,format=raw
>> -machine pc-q35,smm=on
>> -global mch.extended-tseg-mbytes=80
>> -accel kvm
>> -m 2G
>> -cpu SapphireRapids-v1,invtsc=on,vmx=on
>> -smp 2,maxcpus=240,sockets=120,dies=1,cores=2,threads=1
>> -hda systest_ahv_win10_cg.qcow2
>>
>> Signed-off-by: Manish Mishra<manish.mishra@nutanix.com>
>> ---
>>   hw/i386/pc.c          | 1 +
>>   target/i386/cpu.c     | 7 +++++--
>>   target/i386/cpu.h     | 5 +++++
>>   target/i386/kvm/kvm.c | 4 +++-
>>   4 files changed, 14 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index c74931d577..4cab04e443 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -85,6 +85,7 @@ GlobalProperty pc_compat_9_0[] = {
>>       { TYPE_X86_CPU, "guest-phys-bits", "0" },
>>       { "sev-guest", "legacy-vm-type", "on" },
>>       { TYPE_X86_CPU, "legacy-multi-node", "on" },
>> +    { TYPE_X86_CPU, "cpuid-0x1f-enforce", "false" },
>>   };
>>   const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0);
>>   
>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
>> index 4688d140c2..7b71083bc9 100644
>> --- a/target/i386/cpu.c
>> +++ b/target/i386/cpu.c
>> @@ -6637,7 +6637,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>>           break;
>>       case 0x1F:
>>           /* V2 Extended Topology Enumeration Leaf */
>> -        if (!x86_has_extended_topo(env->avail_cpu_topo)) {
>> +        if (!x86_has_extended_topo(env->avail_cpu_topo) &&
>> +            !cpu->enable_cpuid_0x1f_enforce) {
>>               *eax = *ebx = *ecx = *edx = 0;
>>               break;
>>           }
>> @@ -7450,7 +7451,8 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
>>            * cpu->vendor_cpuid_only has been unset for compatibility with older
>>            * machine types.
>>            */
>> -        if (x86_has_extended_topo(env->avail_cpu_topo) &&
>> +        if ((x86_has_extended_topo(env->avail_cpu_topo) ||
>> +             cpu->enable_cpuid_0x1f_enforce) &&
>>               (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) {
>>               x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F);
>>           }
>> @@ -8316,6 +8318,7 @@ static Property x86_cpu_properties[] = {
>>       DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true),
>>       DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor),
>>       DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true),
>> +    DEFINE_PROP_BOOL("cpuid-0x1f-enforce", X86CPU, enable_cpuid_0x1f_enforce, true),
>>       DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true),
>>       DEFINE_PROP_BOOL("x-amd-topoext-features-only", X86CPU, amd_topoext_features_only, true),
>>       DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false),
>> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
>> index 1e121acef5..49c5641ba8 100644
>> --- a/target/i386/cpu.h
>> +++ b/target/i386/cpu.h
>> @@ -2102,6 +2102,11 @@ struct ArchCPU {
>>       /* Compatibility bits for old machine types: */
>>       bool enable_cpuid_0xb;
>>   
>> +    /* Always return values for 0x1f leaf. In cases where extended CPU topology
>> +     * is not configured, return values equivalent of leaf 0xb.
>> +     */
>> +    bool enable_cpuid_0x1f_enforce;
>> +
>>       /* Enable auto level-increase for all CPUID leaves */
>>       bool full_cpuid_auto_level;
>>   
>> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
>> index becca2efa5..a9c6f02900 100644
>> --- a/target/i386/kvm/kvm.c
>> +++ b/target/i386/kvm/kvm.c
>> @@ -1799,6 +1799,7 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
>>       uint32_t limit, i, j;
>>       uint32_t unused;
>>       struct kvm_cpuid_entry2 *c;
>> +    X86CPU *cpu = env_archcpu(env);
>>   
>>       cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
>>   
>> @@ -1831,7 +1832,8 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
>>               break;
>>           }
>>           case 0x1f:
>> -            if (!x86_has_extended_topo(env->avail_cpu_topo)) {
>> +            if (!x86_has_extended_topo(env->avail_cpu_topo) &&
>> +                !cpu->enable_cpuid_0x1f_enforce) {
>>                   cpuid_i--;
>>                   break;
>>               }

Thanks

Manish Mishra
John Levon July 24, 2024, 11:13 a.m. UTC | #3
On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote:

> > > Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent
> > > to 0xb by default and workaround windows issue.>
> > > This change adds a
> > > new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in
> > > case extended CPU topology is not configured and behave as before otherwise.
> > repeating question
> > why we need to use extra property instead of just adding 0x1f leaf for CPU models
> > that supposed to have it?
> 
> As i mentioned in earlier response. "Windows expects it only when we have
> set max cpuid level greater than or equal to 0x1f. I mean if it is exposed
> it should not be all zeros. SapphireRapids CPU definition raised cpuid level
> to 0x20, so we starting seeing it with SapphireRapids."
> 
> Windows does not expect 0x1f to be present for any CPU model. But if it is
> exposed to the guest, it expects non-zero values.

I think Igor is suggesting:

 - leave x86_cpu_expand_features() alone completely

 - change the 0x1f handling to always report topology i.e. never report all
   zeroes

Yes, that would mean that if something requests 0x1f leaf even though the max
leaf is lower, they'd get data back, but it's not clear why that'd be an issue?

regards
john
Manish July 24, 2024, 12:38 p.m. UTC | #4
On 24/07/24 4:43 pm, John Levon wrote:
> On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote:
>
>>>> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent
>>>> to 0xb by default and workaround windows issue.>
>>>> This change adds a
>>>> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in
>>>> case extended CPU topology is not configured and behave as before otherwise.
>>> repeating question
>>> why we need to use extra property instead of just adding 0x1f leaf for CPU models
>>> that supposed to have it?
>> As i mentioned in earlier response. "Windows expects it only when we have
>> set max cpuid level greater than or equal to 0x1f. I mean if it is exposed
>> it should not be all zeros. SapphireRapids CPU definition raised cpuid level
>> to 0x20, so we starting seeing it with SapphireRapids."
>>
>> Windows does not expect 0x1f to be present for any CPU model. But if it is
>> exposed to the guest, it expects non-zero values.
> I think Igor is suggesting:
>
>   - leave x86_cpu_expand_features() alone completely
>
>   - change the 0x1f handling to always report topology i.e. never report all
>     zeroes
>
> Yes, that would mean that if something requests 0x1f leaf even though the max
> leaf is lower, they'd get data back, but it's not clear why that'd be an issue?
>
> regards
> john


Thanks John,

I believe making such changes without any user or machine-type control 
can be risky for live migrations?


Thanks

Manish Mishra
Igor Mammedov July 24, 2024, 12:54 p.m. UTC | #5
On Wed, 24 Jul 2024 12:13:28 +0100
John Levon <john.levon@nutanix.com> wrote:

> On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote:
> 
> > > > Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent
> > > > to 0xb by default and workaround windows issue.>
> > > > This change adds a
> > > > new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in
> > > > case extended CPU topology is not configured and behave as before otherwise.  
> > > repeating question
> > > why we need to use extra property instead of just adding 0x1f leaf for CPU models
> > > that supposed to have it?  
> > 
> > As i mentioned in earlier response. "Windows expects it only when we have
> > set max cpuid level greater than or equal to 0x1f. I mean if it is exposed
> > it should not be all zeros. SapphireRapids CPU definition raised cpuid level
> > to 0x20, so we starting seeing it with SapphireRapids."
> > 
> > Windows does not expect 0x1f to be present for any CPU model. But if it is
> > exposed to the guest, it expects non-zero values.  
> 
> I think Igor is suggesting:
> 
>  - leave x86_cpu_expand_features() alone completely
yep, drop that if possible

 
>  - change the 0x1f handling to always report topology i.e. never report all
>    zeroes

Do this but only for CPU models that have this leaf per spec,
to avoid live migration issues create a new version of CPU model,
so it would apply only for new version. This way older versions
and migration won't be affected. 

> 
> Yes, that would mean that if something requests 0x1f leaf even though the max
> leaf is lower, they'd get data back, but it's not clear why that'd be an issue?
> 
> regards
> john
>
Manish July 24, 2024, 1:50 p.m. UTC | #6
On 24/07/24 6:24 pm, Igor Mammedov wrote:
> !-------------------------------------------------------------------|
>    CAUTION: External Email
>
> |-------------------------------------------------------------------!
>
> On Wed, 24 Jul 2024 12:13:28 +0100
> John Levon <john.levon@nutanix.com> wrote:
>
>> On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote:
>>
>>>>> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent
>>>>> to 0xb by default and workaround windows issue.>
>>>>> This change adds a
>>>>> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in
>>>>> case extended CPU topology is not configured and behave as before otherwise.
>>>> repeating question
>>>> why we need to use extra property instead of just adding 0x1f leaf for CPU models
>>>> that supposed to have it?
>>> As i mentioned in earlier response. "Windows expects it only when we have
>>> set max cpuid level greater than or equal to 0x1f. I mean if it is exposed
>>> it should not be all zeros. SapphireRapids CPU definition raised cpuid level
>>> to 0x20, so we starting seeing it with SapphireRapids."
>>>
>>> Windows does not expect 0x1f to be present for any CPU model. But if it is
>>> exposed to the guest, it expects non-zero values.
>> I think Igor is suggesting:
>>
>>   - leave x86_cpu_expand_features() alone completely
> yep, drop that if possible
This was suggested by Zhao, probably could related to TDX work 
mentioned? As i did not see any harm, i did not mind changing it.
>
>   
>>   - change the 0x1f handling to always report topology i.e. never report all
>>     zeroes
> Do this but only for CPU models that have this leaf per spec,
> to avoid live migration issues create a new version of CPU model,
> so it would apply only for new version. This way older versions
> and migration won't be affected.

Yes, I can do that too. But i beleive it may not be a CPU model related 
property or bug. What if someone uses older CPU model but explicitily 
passes some extra flags to include 0x1f?

>
>> Yes, that would mean that if something requests 0x1f leaf even though the max
>> leaf is lower, they'd get data back, but it's not clear why that'd be an issue?
>>
>> regards
>> john
>>

Thanks

Manish Mishra
Zhao Liu July 24, 2024, 3 p.m. UTC | #7
Hi Igor,

On Wed, Jul 24, 2024 at 02:54:32PM +0200, Igor Mammedov wrote:
> Date: Wed, 24 Jul 2024 14:54:32 +0200
> From: Igor Mammedov <imammedo@redhat.com>
> Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f
> X-Mailer: Claws Mail 4.3.0 (GTK 3.24.42; x86_64-redhat-linux-gnu)
> 
> On Wed, 24 Jul 2024 12:13:28 +0100
> John Levon <john.levon@nutanix.com> wrote:
> 
> > On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote:
> > 
> > > > > Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent
> > > > > to 0xb by default and workaround windows issue.>
> > > > > This change adds a
> > > > > new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in
> > > > > case extended CPU topology is not configured and behave as before otherwise.  
> > > > repeating question
> > > > why we need to use extra property instead of just adding 0x1f leaf for CPU models
> > > > that supposed to have it?  
> > > 
> > > As i mentioned in earlier response. "Windows expects it only when we have
> > > set max cpuid level greater than or equal to 0x1f. I mean if it is exposed
> > > it should not be all zeros. SapphireRapids CPU definition raised cpuid level
> > > to 0x20, so we starting seeing it with SapphireRapids."
> > > 
> > > Windows does not expect 0x1f to be present for any CPU model. But if it is
> > > exposed to the guest, it expects non-zero values.  
> > 
> > I think Igor is suggesting:
> > 
> >  - leave x86_cpu_expand_features() alone completely
> yep, drop that if possible
> 
>  
> >  - change the 0x1f handling to always report topology i.e. never report all
> >    zeroes
> 
> Do this but only for CPU models that have this leaf per spec,
> to avoid live migration issues create a new version of CPU model,
> so it would apply only for new version. This way older versions
> and migration won't be affected. 

So that in the future every new Intel CPU model will need to always
enable 0x1f. Sounds like an endless game. So my question is: at what
point is it ok to consider defaulting to always enable 0x1f and just
disable it for the old CPU model?

Thanks,
Zhao
Manish July 29, 2024, 6:49 a.m. UTC | #8
Hi Igor, Anything further on this?

Thanks

Manish Mishra


On 24/07/24 8:30 pm, Zhao Liu wrote:
> !-------------------------------------------------------------------|
>    CAUTION: External Email
>
> |-------------------------------------------------------------------!
>
> Hi Igor,
>
> On Wed, Jul 24, 2024 at 02:54:32PM +0200, Igor Mammedov wrote:
>> Date: Wed, 24 Jul 2024 14:54:32 +0200
>> From: Igor Mammedov <imammedo@redhat.com>
>> Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f
>> X-Mailer: Claws Mail 4.3.0 (GTK 3.24.42; x86_64-redhat-linux-gnu)
>>
>> On Wed, 24 Jul 2024 12:13:28 +0100
>> John Levon <john.levon@nutanix.com> wrote:
>>
>>> On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote:
>>>
>>>>>> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent
>>>>>> to 0xb by default and workaround windows issue.>
>>>>>> This change adds a
>>>>>> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in
>>>>>> case extended CPU topology is not configured and behave as before otherwise.
>>>>> repeating question
>>>>> why we need to use extra property instead of just adding 0x1f leaf for CPU models
>>>>> that supposed to have it?
>>>> As i mentioned in earlier response. "Windows expects it only when we have
>>>> set max cpuid level greater than or equal to 0x1f. I mean if it is exposed
>>>> it should not be all zeros. SapphireRapids CPU definition raised cpuid level
>>>> to 0x20, so we starting seeing it with SapphireRapids."
>>>>
>>>> Windows does not expect 0x1f to be present for any CPU model. But if it is
>>>> exposed to the guest, it expects non-zero values.
>>> I think Igor is suggesting:
>>>
>>>   - leave x86_cpu_expand_features() alone completely
>> yep, drop that if possible
>>
>>   
>>>   - change the 0x1f handling to always report topology i.e. never report all
>>>     zeroes
>> Do this but only for CPU models that have this leaf per spec,
>> to avoid live migration issues create a new version of CPU model,
>> so it would apply only for new version. This way older versions
>> and migration won't be affected.
> So that in the future every new Intel CPU model will need to always
> enable 0x1f. Sounds like an endless game. So my question is: at what
> point is it ok to consider defaulting to always enable 0x1f and just
> disable it for the old CPU model?
>
> Thanks,
> Zhao
>
Igor Mammedov July 29, 2024, 12:18 p.m. UTC | #9
On Wed, 24 Jul 2024 23:00:13 +0800
Zhao Liu <zhao1.liu@intel.com> wrote:

> Hi Igor,
> 
> On Wed, Jul 24, 2024 at 02:54:32PM +0200, Igor Mammedov wrote:
> > Date: Wed, 24 Jul 2024 14:54:32 +0200
> > From: Igor Mammedov <imammedo@redhat.com>
> > Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f
> > X-Mailer: Claws Mail 4.3.0 (GTK 3.24.42; x86_64-redhat-linux-gnu)
> > 
> > On Wed, 24 Jul 2024 12:13:28 +0100
> > John Levon <john.levon@nutanix.com> wrote:
> >   
> > > On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote:
> > >   
> > > > > > Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent
> > > > > > to 0xb by default and workaround windows issue.>
> > > > > > This change adds a
> > > > > > new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in
> > > > > > case extended CPU topology is not configured and behave as before otherwise.    
> > > > > repeating question
> > > > > why we need to use extra property instead of just adding 0x1f leaf for CPU models
> > > > > that supposed to have it?    
> > > > 
> > > > As i mentioned in earlier response. "Windows expects it only when we have
> > > > set max cpuid level greater than or equal to 0x1f. I mean if it is exposed
> > > > it should not be all zeros. SapphireRapids CPU definition raised cpuid level
> > > > to 0x20, so we starting seeing it with SapphireRapids."
> > > > 
> > > > Windows does not expect 0x1f to be present for any CPU model. But if it is
> > > > exposed to the guest, it expects non-zero values.    
> > > 
> > > I think Igor is suggesting:
> > > 
> > >  - leave x86_cpu_expand_features() alone completely  
> > yep, drop that if possible
> > 
> >    
> > >  - change the 0x1f handling to always report topology i.e. never report all
> > >    zeroes  
> > 
> > Do this but only for CPU models that have this leaf per spec,
> > to avoid live migration issues create a new version of CPU model,
> > so it would apply only for new version. This way older versions
> > and migration won't be affected.   
> 
> So that in the future every new Intel CPU model will need to always
> enable 0x1f. Sounds like an endless game. So my question is: at what
> point is it ok to consider defaulting to always enable 0x1f and just
> disable it for the old CPU model?

I have suggested to enable 0x1f leaf excluding:
   * existing cpu models (versions)
   * cpu models that do not have this leaf in real world should
     not have it in QEMU either.

If cpu model already exists, you'd need a new version of cpu model to
enable new leaf by default.

For completely new cpu model, it could be enabled from the start.
i.e. workflow for enabling that should be the same as with CPU features
(or as you said 'endless game' of copying base model and making it look like
should be according to spec,
but that's the process we currently use for describing CPU models).
 
> Thanks,
> Zhao
>
Manish July 29, 2024, 12:42 p.m. UTC | #10
On 29/07/24 7:18 pm, Igor Mammedov wrote:
> !-------------------------------------------------------------------|
>    CAUTION: External Email
>
> |-------------------------------------------------------------------!
>
> On Wed, 24 Jul 2024 23:00:13 +0800
> Zhao Liu <zhao1.liu@intel.com> wrote:
>
>> Hi Igor,
>>
>> On Wed, Jul 24, 2024 at 02:54:32PM +0200, Igor Mammedov wrote:
>>> Date: Wed, 24 Jul 2024 14:54:32 +0200
>>> From: Igor Mammedov <imammedo@redhat.com>
>>> Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f
>>> X-Mailer: Claws Mail 4.3.0 (GTK 3.24.42; x86_64-redhat-linux-gnu)
>>>
>>> On Wed, 24 Jul 2024 12:13:28 +0100
>>> John Levon <john.levon@nutanix.com> wrote:
>>>    
>>>> On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote:
>>>>    
>>>>>>> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent
>>>>>>> to 0xb by default and workaround windows issue.>
>>>>>>> This change adds a
>>>>>>> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in
>>>>>>> case extended CPU topology is not configured and behave as before otherwise.
>>>>>> repeating question
>>>>>> why we need to use extra property instead of just adding 0x1f leaf for CPU models
>>>>>> that supposed to have it?
>>>>> As i mentioned in earlier response. "Windows expects it only when we have
>>>>> set max cpuid level greater than or equal to 0x1f. I mean if it is exposed
>>>>> it should not be all zeros. SapphireRapids CPU definition raised cpuid level
>>>>> to 0x20, so we starting seeing it with SapphireRapids."
>>>>>
>>>>> Windows does not expect 0x1f to be present for any CPU model. But if it is
>>>>> exposed to the guest, it expects non-zero values.
>>>> I think Igor is suggesting:
>>>>
>>>>   - leave x86_cpu_expand_features() alone completely
>>> yep, drop that if possible
>>>
>>>     
>>>>   - change the 0x1f handling to always report topology i.e. never report all
>>>>     zeroes
>>> Do this but only for CPU models that have this leaf per spec,
>>> to avoid live migration issues create a new version of CPU model,
>>> so it would apply only for new version. This way older versions
>>> and migration won't be affected.
>> So that in the future every new Intel CPU model will need to always
>> enable 0x1f. Sounds like an endless game. So my question is: at what
>> point is it ok to consider defaulting to always enable 0x1f and just
>> disable it for the old CPU model?
> I have suggested to enable 0x1f leaf excluding:
>     * existing cpu models (versions)
>     * cpu models that do not have this leaf in real world should
>       not have it in QEMU either.
>
> If cpu model already exists, you'd need a new version of cpu model to
> enable new leaf by default.
>
> For completely new cpu model, it could be enabled from the start.
> i.e. workflow for enabling that should be the same as with CPU features
> (or as you said 'endless game' of copying base model and making it look like
> should be according to spec,
> but that's the process we currently use for describing CPU models).

Igor my understanding was that there are two type of features one is 
real CPU feature, yes those makes sense in CPU models. But on other hand 
there are features which are emulated ones i.e. kvm-*, these make sense 
enabling regardless of any CPU model and we usually use machine types to 
enable these. Does not this features makes sense in 2nd category.


>   
>> Thanks,
>> Zhao
>>

Thanks

Manish Mishra
Igor Mammedov July 30, 2024, 11:39 a.m. UTC | #11
On Mon, 29 Jul 2024 19:42:39 +0700
Manish <manish.mishra@nutanix.com> wrote:

> On 29/07/24 7:18 pm, Igor Mammedov wrote:
> > !-------------------------------------------------------------------|
> >    CAUTION: External Email
> >
> > |-------------------------------------------------------------------!
> >
> > On Wed, 24 Jul 2024 23:00:13 +0800
> > Zhao Liu <zhao1.liu@intel.com> wrote:
> >  
> >> Hi Igor,
> >>
> >> On Wed, Jul 24, 2024 at 02:54:32PM +0200, Igor Mammedov wrote:  
> >>> Date: Wed, 24 Jul 2024 14:54:32 +0200
> >>> From: Igor Mammedov <imammedo@redhat.com>
> >>> Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f
> >>> X-Mailer: Claws Mail 4.3.0 (GTK 3.24.42; x86_64-redhat-linux-gnu)
> >>>
> >>> On Wed, 24 Jul 2024 12:13:28 +0100
> >>> John Levon <john.levon@nutanix.com> wrote:
> >>>      
> >>>> On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote:
> >>>>      
> >>>>>>> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent
> >>>>>>> to 0xb by default and workaround windows issue.>
> >>>>>>> This change adds a
> >>>>>>> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in
> >>>>>>> case extended CPU topology is not configured and behave as before otherwise.  
> >>>>>> repeating question
> >>>>>> why we need to use extra property instead of just adding 0x1f leaf for CPU models
> >>>>>> that supposed to have it?  
> >>>>> As i mentioned in earlier response. "Windows expects it only when we have
> >>>>> set max cpuid level greater than or equal to 0x1f. I mean if it is exposed
> >>>>> it should not be all zeros. SapphireRapids CPU definition raised cpuid level
> >>>>> to 0x20, so we starting seeing it with SapphireRapids."
> >>>>>
> >>>>> Windows does not expect 0x1f to be present for any CPU model. But if it is
> >>>>> exposed to the guest, it expects non-zero values.  
> >>>> I think Igor is suggesting:
> >>>>
> >>>>   - leave x86_cpu_expand_features() alone completely  
> >>> yep, drop that if possible
> >>>
> >>>       
> >>>>   - change the 0x1f handling to always report topology i.e. never report all
> >>>>     zeroes  
> >>> Do this but only for CPU models that have this leaf per spec,
> >>> to avoid live migration issues create a new version of CPU model,
> >>> so it would apply only for new version. This way older versions
> >>> and migration won't be affected.  
> >> So that in the future every new Intel CPU model will need to always
> >> enable 0x1f. Sounds like an endless game. So my question is: at what
> >> point is it ok to consider defaulting to always enable 0x1f and just
> >> disable it for the old CPU model?  
> > I have suggested to enable 0x1f leaf excluding:
> >     * existing cpu models (versions)
> >     * cpu models that do not have this leaf in real world should
> >       not have it in QEMU either.
> >
> > If cpu model already exists, you'd need a new version of cpu model to
> > enable new leaf by default.
> >
> > For completely new cpu model, it could be enabled from the start.
> > i.e. workflow for enabling that should be the same as with CPU features
> > (or as you said 'endless game' of copying base model and making it look like
> > should be according to spec,
> > but that's the process we currently use for describing CPU models).  
> 
> Igor my understanding was that there are two type of features one is 
> real CPU feature, yes those makes sense in CPU models. But on other hand 
> there are features which are emulated ones i.e. kvm-*, these make sense 
> enabling regardless of any CPU model and we usually use machine types to 
> enable these. Does not this features makes sense in 2nd category.

I'm not convinced that it applies to any CPU model.
I'd say it's risky to expose new leaf on CPUs that don't have it to begin with.
Also what about AMD cpus? Do they also support this leaf?

> 
> 
> >     
> >> Thanks,
> >> Zhao
> >>  
> 
> Thanks
> 
> Manish Mishra
>
Xiaoyao Li July 31, 2024, 7:02 a.m. UTC | #12
On 7/24/2024 6:29 PM, Manish wrote:
> Thanks Igor
> 
> On 24/07/24 2:30 pm, Igor Mammedov wrote:
>> !-------------------------------------------------------------------|
>>    CAUTION: External Email
>>
>> |-------------------------------------------------------------------!
>>
>> On Wed, 24 Jul 2024 07:52:26 +0000
>> "manish.mishra"<manish.mishra@nutanix.com>  wrote:
>>
>>> From: Manish Mishra<manish.mishra@nutanix.com>
>>>
>>> QEMU does not set 0x1f in case VM does not have extended CPU topology
>>> and expects guests to fallback to 0xb. Some versions of Windows does not
>>> like this behavior and expects this leaf to be populated. As a result 
>>> Windows
>>> VM fails with blue screen.
>> BSOD usually has error code displayed, it would be better to specify 
>> it here
>> this way whomever searching for the error, can find this patch/commit
> Sorry for earlier response, i do not see blue screen it seems to be 
> falling in uefi back quickly and i do not see any details here. I am 
> attaching image.
>>
>>> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent
>>> to 0xb by default and workaround windows issue.>
>>> This change adds a
>>> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in
>>> case extended CPU topology is not configured and behave as before 
>>> otherwise.
>> repeating question
>> why we need to use extra property instead of just adding 0x1f leaf for 
>> CPU models
>> that supposed to have it?
> 
> As i mentioned in earlier response. "Windows expects it only when we 
> have set max cpuid level greater than or equal to 0x1f. I mean if it is 
> exposed it should not be all zeros. SapphireRapids CPU definition raised 
> cpuid level to 0x20, so we starting seeing it with SapphireRapids."
> 
> Windows does not expect 0x1f to be present for any CPU model. But if it 
> is exposed to the guest, it expects non-zero values.

Please fix Windows!

No guarantee from Intel that leaf 0x1f should report non-zero value when 
max cpuid level >= 0x1f.

Please see SDM.vol2.CPUID chapter.

INPUT EAX = 1FH: Returns V2 Extended Topology Information

When CPUID executes with EAX set to 1FH, the processor returns 
information about extended topology enumeration data. Software must 
detect the presence of CPUID leaf 1FH by verifying (a) the highest leaf 
index supported by CPUID is >= 1FH, and (b) CPUID.1FH:EBX[15:0] reports 
a non-zero value. See Table 3-17.
John Levon July 31, 2024, 8:49 a.m. UTC | #13
On Wed, Jul 31, 2024 at 03:02:15PM +0800, Xiaoyao Li wrote:

> > Windows does not expect 0x1f to be present for any CPU model. But if it
> > is exposed to the guest, it expects non-zero values.
> 
> Please fix Windows!

A ticket has been filed with MSFT, we are aware this is a guest bug.

But that doesn't really help anybody trying to use Windows right now.

regards
john
Manish July 31, 2024, 2 p.m. UTC | #14
On 30/07/24 6:39 pm, Igor Mammedov wrote:
> !-------------------------------------------------------------------|
>    CAUTION: External Email
>
> |-------------------------------------------------------------------!
>
> On Mon, 29 Jul 2024 19:42:39 +0700
> Manish <manish.mishra@nutanix.com> wrote:
>
>> On 29/07/24 7:18 pm, Igor Mammedov wrote:
>>> !-------------------------------------------------------------------|
>>>     CAUTION: External Email
>>>
>>> |-------------------------------------------------------------------!
>>>
>>> On Wed, 24 Jul 2024 23:00:13 +0800
>>> Zhao Liu <zhao1.liu@intel.com> wrote:
>>>   
>>>> Hi Igor,
>>>>
>>>> On Wed, Jul 24, 2024 at 02:54:32PM +0200, Igor Mammedov wrote:
>>>>> Date: Wed, 24 Jul 2024 14:54:32 +0200
>>>>> From: Igor Mammedov <imammedo@redhat.com>
>>>>> Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f
>>>>> X-Mailer: Claws Mail 4.3.0 (GTK 3.24.42; x86_64-redhat-linux-gnu)
>>>>>
>>>>> On Wed, 24 Jul 2024 12:13:28 +0100
>>>>> John Levon <john.levon@nutanix.com> wrote:
>>>>>       
>>>>>> On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote:
>>>>>>       
>>>>>>>>> Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent
>>>>>>>>> to 0xb by default and workaround windows issue.>
>>>>>>>>> This change adds a
>>>>>>>>> new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in
>>>>>>>>> case extended CPU topology is not configured and behave as before otherwise.
>>>>>>>> repeating question
>>>>>>>> why we need to use extra property instead of just adding 0x1f leaf for CPU models
>>>>>>>> that supposed to have it?
>>>>>>> As i mentioned in earlier response. "Windows expects it only when we have
>>>>>>> set max cpuid level greater than or equal to 0x1f. I mean if it is exposed
>>>>>>> it should not be all zeros. SapphireRapids CPU definition raised cpuid level
>>>>>>> to 0x20, so we starting seeing it with SapphireRapids."
>>>>>>>
>>>>>>> Windows does not expect 0x1f to be present for any CPU model. But if it is
>>>>>>> exposed to the guest, it expects non-zero values.
>>>>>> I think Igor is suggesting:
>>>>>>
>>>>>>    - leave x86_cpu_expand_features() alone completely
>>>>> yep, drop that if possible
>>>>>
>>>>>        
>>>>>>    - change the 0x1f handling to always report topology i.e. never report all
>>>>>>      zeroes
>>>>> Do this but only for CPU models that have this leaf per spec,
>>>>> to avoid live migration issues create a new version of CPU model,
>>>>> so it would apply only for new version. This way older versions
>>>>> and migration won't be affected.
>>>> So that in the future every new Intel CPU model will need to always
>>>> enable 0x1f. Sounds like an endless game. So my question is: at what
>>>> point is it ok to consider defaulting to always enable 0x1f and just
>>>> disable it for the old CPU model?
>>> I have suggested to enable 0x1f leaf excluding:
>>>      * existing cpu models (versions)
>>>      * cpu models that do not have this leaf in real world should
>>>        not have it in QEMU either.
>>>
>>> If cpu model already exists, you'd need a new version of cpu model to
>>> enable new leaf by default.
>>>
>>> For completely new cpu model, it could be enabled from the start.
>>> i.e. workflow for enabling that should be the same as with CPU features
>>> (or as you said 'endless game' of copying base model and making it look like
>>> should be according to spec,
>>> but that's the process we currently use for describing CPU models).
>> Igor my understanding was that there are two type of features one is
>> real CPU feature, yes those makes sense in CPU models. But on other hand
>> there are features which are emulated ones i.e. kvm-*, these make sense
>> enabling regardless of any CPU model and we usually use machine types to
>> enable these. Does not this features makes sense in 2nd category.
> I'm not convinced that it applies to any CPU model.
> I'd say it's risky to expose new leaf on CPUs that don't have it to begin with.
> Also what about AMD cpus? Do they also support this leaf?

Hi Igor, No AMD has different leaf for this purpose, i think 8000_0026. 
But should not AMD platforms or guest ignore it in that case. This flag 
was anyway getting populated earlier too for nr_dies > 1?


>
>>
>>>      
>>>> Thanks,
>>>> Zhao
>>>>   
>> Thanks
>>
>> Manish Mishra
>>
Thanks

Manish
Xiaoyao Li July 31, 2024, 3:31 p.m. UTC | #15
On 7/31/2024 4:49 PM, John Levon wrote:
> On Wed, Jul 31, 2024 at 03:02:15PM +0800, Xiaoyao Li wrote:
> 
>>> Windows does not expect 0x1f to be present for any CPU model. But if it
>>> is exposed to the guest, it expects non-zero values.
>>
>> Please fix Windows!
> 
> A ticket has been filed with MSFT, we are aware this is a guest bug.
> 
> But that doesn't really help anybody trying to use Windows right now.

For existing buggy Windows, we can still introduce "cpuid-0x1f-enforce" 
but not make it default on.

People want to boot the buggy Windows needs to opt-in it themselves via 
"-cpu xxx,cpuid-0x1f-enforce=on". This way, we don't have live migration 
issue and it doesn't affect anything.

> regards
> john
Manish Aug. 1, 2024, 10:06 a.m. UTC | #16
On 31/07/24 9:01 pm, Xiaoyao Li wrote:
> !-------------------------------------------------------------------|
>  CAUTION: External Email
>
> |-------------------------------------------------------------------!
>
> On 7/31/2024 4:49 PM, John Levon wrote:
>> On Wed, Jul 31, 2024 at 03:02:15PM +0800, Xiaoyao Li wrote:
>>
>>>> Windows does not expect 0x1f to be present for any CPU model. But 
>>>> if it
>>>> is exposed to the guest, it expects non-zero values.
>>>
>>> Please fix Windows!
>>
>> A ticket has been filed with MSFT, we are aware this is a guest bug.
>>
>> But that doesn't really help anybody trying to use Windows right now.
>
> For existing buggy Windows, we can still introduce 
> "cpuid-0x1f-enforce" but not make it default on.
>
> People want to boot the buggy Windows needs to opt-in it themselves 
> via "-cpu xxx,cpuid-0x1f-enforce=on". This way, we don't have live 
> migration issue and it doesn't affect anything.


Yes, that makes sense, I will send a updated patch by tomorrow if no one 
has any objection with this.

>
>> regards
>> john


Thanks

Manish Mishra
Igor Mammedov Aug. 1, 2024, 10:25 a.m. UTC | #17
On Thu, 1 Aug 2024 15:36:10 +0530
Manish <manish.mishra@nutanix.com> wrote:

> On 31/07/24 9:01 pm, Xiaoyao Li wrote:
> > !-------------------------------------------------------------------|
> >  CAUTION: External Email
> >
> > |-------------------------------------------------------------------!
> >
> > On 7/31/2024 4:49 PM, John Levon wrote:  
> >> On Wed, Jul 31, 2024 at 03:02:15PM +0800, Xiaoyao Li wrote:
> >>  
> >>>> Windows does not expect 0x1f to be present for any CPU model. But 
> >>>> if it
> >>>> is exposed to the guest, it expects non-zero values.  
> >>>
> >>> Please fix Windows!  
> >>
> >> A ticket has been filed with MSFT, we are aware this is a guest bug.
> >>
> >> But that doesn't really help anybody trying to use Windows right now.  
> >
> > For existing buggy Windows, we can still introduce 
> > "cpuid-0x1f-enforce" but not make it default on.
> >
> > People want to boot the buggy Windows needs to opt-in it themselves 
> > via "-cpu xxx,cpuid-0x1f-enforce=on". This way, we don't have live 
> > migration issue and it doesn't affect anything.  
> 
> 
> Yes, that makes sense, I will send a updated patch by tomorrow if no one 
> has any objection with this.

I'd rename it to
   x-have-cpuid-0x1f-leaf
(x-) to reflect that it's not stable/maintained and subject
to be dropped in future 

Also please clearly spell out that it's a temporary workaround for ...
in commit message.


> 
> >  
> >> regards
> >> john  
> 
> 
> Thanks
> 
> Manish Mishra
>
Xiaoyao Li Aug. 1, 2024, 3:11 p.m. UTC | #18
On 8/1/2024 6:25 PM, Igor Mammedov wrote:
> On Thu, 1 Aug 2024 15:36:10 +0530
> Manish <manish.mishra@nutanix.com> wrote:
> 
>> On 31/07/24 9:01 pm, Xiaoyao Li wrote:
>>> !-------------------------------------------------------------------|
>>>   CAUTION: External Email
>>>
>>> |-------------------------------------------------------------------!
>>>
>>> On 7/31/2024 4:49 PM, John Levon wrote:
>>>> On Wed, Jul 31, 2024 at 03:02:15PM +0800, Xiaoyao Li wrote:
>>>>   
>>>>>> Windows does not expect 0x1f to be present for any CPU model. But
>>>>>> if it
>>>>>> is exposed to the guest, it expects non-zero values.
>>>>>
>>>>> Please fix Windows!
>>>>
>>>> A ticket has been filed with MSFT, we are aware this is a guest bug.
>>>>
>>>> But that doesn't really help anybody trying to use Windows right now.
>>>
>>> For existing buggy Windows, we can still introduce
>>> "cpuid-0x1f-enforce" but not make it default on.
>>>
>>> People want to boot the buggy Windows needs to opt-in it themselves
>>> via "-cpu xxx,cpuid-0x1f-enforce=on". This way, we don't have live
>>> migration issue and it doesn't affect anything.
>>
>>
>> Yes, that makes sense, I will send a updated patch by tomorrow if no one
>> has any objection with this.
> 
> I'd rename it to
>     x-have-cpuid-0x1f-leaf
> (x-) to reflect that it's not stable/maintained and subject
> to be dropped in future
> 
> Also please clearly spell out that it's a temporary workaround for ...
> in commit message.

I have a patch at hand, to introduce enable_cpuid_0x1f similar as 
enable_cpuid_0xb, for TDX:

https://github.com/intel-staging/qemu-tdx/commit/de08fd30926bc9d7997af6bd12cfff1b998da8b7

It is not a temporary solution. So I would suggest to drop (x-).
If no objection, I think Manish can start from my patch and it only 
misses a property definition for windows case:

   DEFINE_PROP_BOOL("cpuid-0x1f", X86CPU, enable_cpuid_0x1f, false);



> 
>>
>>>   
>>>> regards
>>>> john
>>
>>
>> Thanks
>>
>> Manish Mishra
>>
>
Manish Aug. 1, 2024, 4:46 p.m. UTC | #19
On 01/08/24 8:41 pm, Xiaoyao Li wrote:
> !-------------------------------------------------------------------|
>  CAUTION: External Email
>
> |-------------------------------------------------------------------!
>
> On 8/1/2024 6:25 PM, Igor Mammedov wrote:
>> On Thu, 1 Aug 2024 15:36:10 +0530
>> Manish <manish.mishra@nutanix.com> wrote:
>>
>>> On 31/07/24 9:01 pm, Xiaoyao Li wrote:
>>>> !-------------------------------------------------------------------|
>>>>   CAUTION: External Email
>>>>
>>>> |-------------------------------------------------------------------!
>>>>
>>>> On 7/31/2024 4:49 PM, John Levon wrote:
>>>>> On Wed, Jul 31, 2024 at 03:02:15PM +0800, Xiaoyao Li wrote:
>>>>>>> Windows does not expect 0x1f to be present for any CPU model. But
>>>>>>> if it
>>>>>>> is exposed to the guest, it expects non-zero values.
>>>>>>
>>>>>> Please fix Windows!
>>>>>
>>>>> A ticket has been filed with MSFT, we are aware this is a guest bug.
>>>>>
>>>>> But that doesn't really help anybody trying to use Windows right now.
>>>>
>>>> For existing buggy Windows, we can still introduce
>>>> "cpuid-0x1f-enforce" but not make it default on.
>>>>
>>>> People want to boot the buggy Windows needs to opt-in it themselves
>>>> via "-cpu xxx,cpuid-0x1f-enforce=on". This way, we don't have live
>>>> migration issue and it doesn't affect anything.
>>>
>>>
>>> Yes, that makes sense, I will send a updated patch by tomorrow if no 
>>> one
>>> has any objection with this.
>>
>> I'd rename it to
>>     x-have-cpuid-0x1f-leaf
>> (x-) to reflect that it's not stable/maintained and subject
>> to be dropped in future
>>
>> Also please clearly spell out that it's a temporary workaround for ...
>> in commit message.
>
> I have a patch at hand, to introduce enable_cpuid_0x1f similar as 
> enable_cpuid_0xb, for TDX:
>
> https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_intel-2Dstaging_qemu-2Dtdx_commit_de08fd30926bc9d7997af6bd12cfff1b998da8b7&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=c4KON2DiMd-szjwjggQcuUvTsPWblztAL0gVzaHnNmc&m=CIrxjRd0KG4ww4BtSxZysWS0tFYfPGTRBG731EmlUcy7BFlAw3-5PLp2SlKPR83m&s=2gKDZXpqB7wE8v0vtN8l65WBqTtXOUJ-FkMblXcT_Ws&e= 
>
> It is not a temporary solution. So I would suggest to drop (x-).
> If no objection, I think Manish can start from my patch and it only 
> misses a property definition for windows case:
>
>   DEFINE_PROP_BOOL("cpuid-0x1f", X86CPU, enable_cpuid_0x1f, false);
>
>
Thanks Xiaoyao, I see your patch does what we require but i am not able 
to track these patches, are these in line to be merged soon? We need 
this urgently. Also as it is just a single line change on top of your 
changes, how i manage my change? Should i wait for you to merge and then 
send patch or you will be fine to directly include it in your series?


>
>>
>>>
>>>>> regards
>>>>> john
>>>
>>>
>>> Thanks
>>>
>>> Manish Mishra
>>>
>>
>

Thanks

Manish Mishra
Zhao Liu Aug. 2, 2024, 2:33 a.m. UTC | #20
On Wed, Jul 31, 2024 at 07:30:44PM +0530, Manish wrote:
> Date: Wed, 31 Jul 2024 19:30:44 +0530
> From: Manish <manish.mishra@nutanix.com>
> Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f
> 
> 
> On 30/07/24 6:39 pm, Igor Mammedov wrote:
> > !-------------------------------------------------------------------|
> >    CAUTION: External Email
> > 
> > |-------------------------------------------------------------------!
> > 
> > On Mon, 29 Jul 2024 19:42:39 +0700
> > Manish <manish.mishra@nutanix.com> wrote:
> > 
> > > On 29/07/24 7:18 pm, Igor Mammedov wrote:
> > > > !-------------------------------------------------------------------|
> > > >     CAUTION: External Email
> > > > 
> > > > |-------------------------------------------------------------------!
> > > > 
> > > > On Wed, 24 Jul 2024 23:00:13 +0800
> > > > Zhao Liu <zhao1.liu@intel.com> wrote:
> > > > > Hi Igor,
> > > > > 
> > > > > On Wed, Jul 24, 2024 at 02:54:32PM +0200, Igor Mammedov wrote:
> > > > > > Date: Wed, 24 Jul 2024 14:54:32 +0200
> > > > > > From: Igor Mammedov <imammedo@redhat.com>
> > > > > > Subject: Re: [PATCH v1] target/i386: Always set leaf 0x1f
> > > > > > X-Mailer: Claws Mail 4.3.0 (GTK 3.24.42; x86_64-redhat-linux-gnu)
> > > > > > 
> > > > > > On Wed, 24 Jul 2024 12:13:28 +0100
> > > > > > John Levon <john.levon@nutanix.com> wrote:
> > > > > > > On Wed, Jul 24, 2024 at 03:59:29PM +0530, Manish wrote:
> > > > > > > > > > Leaf 0x1f is superset of 0xb, so it makes sense to set 0x1f equivalent
> > > > > > > > > > to 0xb by default and workaround windows issue.>
> > > > > > > > > > This change adds a
> > > > > > > > > > new property 'cpuid-0x1f-enforce' to set leaf 0x1f equivalent to 0xb in
> > > > > > > > > > case extended CPU topology is not configured and behave as before otherwise.
> > > > > > > > > repeating question
> > > > > > > > > why we need to use extra property instead of just adding 0x1f leaf for CPU models
> > > > > > > > > that supposed to have it?
> > > > > > > > As i mentioned in earlier response. "Windows expects it only when we have
> > > > > > > > set max cpuid level greater than or equal to 0x1f. I mean if it is exposed
> > > > > > > > it should not be all zeros. SapphireRapids CPU definition raised cpuid level
> > > > > > > > to 0x20, so we starting seeing it with SapphireRapids."
> > > > > > > > 
> > > > > > > > Windows does not expect 0x1f to be present for any CPU model. But if it is
> > > > > > > > exposed to the guest, it expects non-zero values.
> > > > > > > I think Igor is suggesting:
> > > > > > > 
> > > > > > >    - leave x86_cpu_expand_features() alone completely
> > > > > > yep, drop that if possible
> > > > > > 
> > > > > > >    - change the 0x1f handling to always report topology i.e. never report all
> > > > > > >      zeroes
> > > > > > Do this but only for CPU models that have this leaf per spec,
> > > > > > to avoid live migration issues create a new version of CPU model,
> > > > > > so it would apply only for new version. This way older versions
> > > > > > and migration won't be affected.
> > > > > So that in the future every new Intel CPU model will need to always
> > > > > enable 0x1f. Sounds like an endless game. So my question is: at what
> > > > > point is it ok to consider defaulting to always enable 0x1f and just
> > > > > disable it for the old CPU model?
> > > > I have suggested to enable 0x1f leaf excluding:
> > > >      * existing cpu models (versions)
> > > >      * cpu models that do not have this leaf in real world should
> > > >        not have it in QEMU either.
> > > > 
> > > > If cpu model already exists, you'd need a new version of cpu model to
> > > > enable new leaf by default.
> > > > 
> > > > For completely new cpu model, it could be enabled from the start.
> > > > i.e. workflow for enabling that should be the same as with CPU features
> > > > (or as you said 'endless game' of copying base model and making it look like
> > > > should be according to spec,
> > > > but that's the process we currently use for describing CPU models).
> > > Igor my understanding was that there are two type of features one is
> > > real CPU feature, yes those makes sense in CPU models. But on other hand
> > > there are features which are emulated ones i.e. kvm-*, these make sense
> > > enabling regardless of any CPU model and we usually use machine types to
> > > enable these. Does not this features makes sense in 2nd category.
> > I'm not convinced that it applies to any CPU model.
> > I'd say it's risky to expose new leaf on CPUs that don't have it to begin with.
> > Also what about AMD cpus? Do they also support this leaf?
> 
> Hi Igor, No AMD has different leaf for this purpose, i think 8000_0026. But
> should not AMD platforms or guest ignore it in that case. This flag was
> anyway getting populated earlier too for nr_dies > 1?
> 

AMD hasn't supported that leaf in QEMU (cpu_x86_cpuid()).

Igor's concern is right, as there is no way to ensure how the software
will behave (just like Windows doesn't respect the x86 spec). It is
least risky to consider this property as the 1st category, because the
particular CPU model supports this leaf, and software that runs
properly on the corresponding bare metal certainly won't complain about
0x1f or even prefer to have 0x1f in Guest (also as in your Windows
example).

Disable "cpuid-0x1f-enforce" by default is also fine for me. If you find
it's not easy to add 0x1f for the CPU models, I can add 0x1f into CPU
models in the future with my other cache model work, to help you fill in
the last piece that Igor suggested.

Regards,
Zhao
Xiaoyao Li Aug. 2, 2024, 7:35 a.m. UTC | #21
On 8/2/2024 12:46 AM, Manish wrote:
> 
> On 01/08/24 8:41 pm, Xiaoyao Li wrote:
>> !-------------------------------------------------------------------|
>>  CAUTION: External Email
>>
>> |-------------------------------------------------------------------!
>>
>> On 8/1/2024 6:25 PM, Igor Mammedov wrote:
>>> On Thu, 1 Aug 2024 15:36:10 +0530
>>> Manish <manish.mishra@nutanix.com> wrote:
>>>
>>>> On 31/07/24 9:01 pm, Xiaoyao Li wrote:
>>>>> !-------------------------------------------------------------------|
>>>>>   CAUTION: External Email
>>>>>
>>>>> |-------------------------------------------------------------------!
>>>>>
>>>>> On 7/31/2024 4:49 PM, John Levon wrote:
>>>>>> On Wed, Jul 31, 2024 at 03:02:15PM +0800, Xiaoyao Li wrote:
>>>>>>>> Windows does not expect 0x1f to be present for any CPU model. But
>>>>>>>> if it
>>>>>>>> is exposed to the guest, it expects non-zero values.
>>>>>>>
>>>>>>> Please fix Windows!
>>>>>>
>>>>>> A ticket has been filed with MSFT, we are aware this is a guest bug.
>>>>>>
>>>>>> But that doesn't really help anybody trying to use Windows right now.
>>>>>
>>>>> For existing buggy Windows, we can still introduce
>>>>> "cpuid-0x1f-enforce" but not make it default on.
>>>>>
>>>>> People want to boot the buggy Windows needs to opt-in it themselves
>>>>> via "-cpu xxx,cpuid-0x1f-enforce=on". This way, we don't have live
>>>>> migration issue and it doesn't affect anything.
>>>>
>>>>
>>>> Yes, that makes sense, I will send a updated patch by tomorrow if no 
>>>> one
>>>> has any objection with this.
>>>
>>> I'd rename it to
>>>     x-have-cpuid-0x1f-leaf
>>> (x-) to reflect that it's not stable/maintained and subject
>>> to be dropped in future
>>>
>>> Also please clearly spell out that it's a temporary workaround for ...
>>> in commit message.
>>
>> I have a patch at hand, to introduce enable_cpuid_0x1f similar as 
>> enable_cpuid_0xb, for TDX:
>>
>> https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_intel-2Dstaging_qemu-2Dtdx_commit_de08fd30926bc9d7997af6bd12cfff1b998da8b7&d=DwIDaQ&c=s883GpUCOChKOHiocYtGcg&r=c4KON2DiMd-szjwjggQcuUvTsPWblztAL0gVzaHnNmc&m=CIrxjRd0KG4ww4BtSxZysWS0tFYfPGTRBG731EmlUcy7BFlAw3-5PLp2SlKPR83m&s=2gKDZXpqB7wE8v0vtN8l65WBqTtXOUJ-FkMblXcT_Ws&e=
>> It is not a temporary solution. So I would suggest to drop (x-).
>> If no objection, I think Manish can start from my patch and it only 
>> misses a property definition for windows case:
>>
>>   DEFINE_PROP_BOOL("cpuid-0x1f", X86CPU, enable_cpuid_0x1f, false);
>>
>>
> Thanks Xiaoyao, I see your patch does what we require but i am not able 
> to track these patches, are these in line to be merged soon? We need 
> this urgently. Also as it is just a single line change on top of your 
> changes, how i manage my change? Should i wait for you to merge and then 
> send patch or you will be fine to directly include it in your series?

Sent it:

https://lore.kernel.org/qemu-devel/20240802072426.4016194-1-xiaoyao.li@intel.com/

We can continue the discussion from there.

> 
>>
>>>
>>>>
>>>>>> regards
>>>>>> john
>>>>
>>>>
>>>> Thanks
>>>>
>>>> Manish Mishra
>>>>
>>>
>>
> 
> Thanks
> 
> Manish Mishra
>
diff mbox series

Patch

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index c74931d577..4cab04e443 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -85,6 +85,7 @@  GlobalProperty pc_compat_9_0[] = {
     { TYPE_X86_CPU, "guest-phys-bits", "0" },
     { "sev-guest", "legacy-vm-type", "on" },
     { TYPE_X86_CPU, "legacy-multi-node", "on" },
+    { TYPE_X86_CPU, "cpuid-0x1f-enforce", "false" },
 };
 const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0);
 
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 4688d140c2..7b71083bc9 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6637,7 +6637,8 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         break;
     case 0x1F:
         /* V2 Extended Topology Enumeration Leaf */
-        if (!x86_has_extended_topo(env->avail_cpu_topo)) {
+        if (!x86_has_extended_topo(env->avail_cpu_topo) &&
+            !cpu->enable_cpuid_0x1f_enforce) {
             *eax = *ebx = *ecx = *edx = 0;
             break;
         }
@@ -7450,7 +7451,8 @@  void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
          * cpu->vendor_cpuid_only has been unset for compatibility with older
          * machine types.
          */
-        if (x86_has_extended_topo(env->avail_cpu_topo) &&
+        if ((x86_has_extended_topo(env->avail_cpu_topo) ||
+             cpu->enable_cpuid_0x1f_enforce) &&
             (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) {
             x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F);
         }
@@ -8316,6 +8318,7 @@  static Property x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true),
     DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor),
     DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true),
+    DEFINE_PROP_BOOL("cpuid-0x1f-enforce", X86CPU, enable_cpuid_0x1f_enforce, true),
     DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true),
     DEFINE_PROP_BOOL("x-amd-topoext-features-only", X86CPU, amd_topoext_features_only, true),
     DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false),
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 1e121acef5..49c5641ba8 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2102,6 +2102,11 @@  struct ArchCPU {
     /* Compatibility bits for old machine types: */
     bool enable_cpuid_0xb;
 
+    /* Always return values for 0x1f leaf. In cases where extended CPU topology
+     * is not configured, return values equivalent of leaf 0xb.
+     */
+    bool enable_cpuid_0x1f_enforce;
+
     /* Enable auto level-increase for all CPUID leaves */
     bool full_cpuid_auto_level;
 
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index becca2efa5..a9c6f02900 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1799,6 +1799,7 @@  static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
     uint32_t limit, i, j;
     uint32_t unused;
     struct kvm_cpuid_entry2 *c;
+    X86CPU *cpu = env_archcpu(env);
 
     cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
 
@@ -1831,7 +1832,8 @@  static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
             break;
         }
         case 0x1f:
-            if (!x86_has_extended_topo(env->avail_cpu_topo)) {
+            if (!x86_has_extended_topo(env->avail_cpu_topo) &&
+                !cpu->enable_cpuid_0x1f_enforce) {
                 cpuid_i--;
                 break;
             }