diff mbox series

[1/1] kvm: add support for guest physical bits

Message ID 20240301101713.356759-2-kraxel@redhat.com (mailing list archive)
State New, archived
Headers show
Series kvm: add support for guest physical bits | expand

Commit Message

Gerd Hoffmann March 1, 2024, 10:17 a.m. UTC
query kvm for supported guest physical address bits using
KVM_CAP_VM_GPA_BITS.  Expose the value to the guest via cpuid
(leaf 0x80000008, eax, bits 16-23).

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 target/i386/cpu.h     | 1 +
 target/i386/cpu.c     | 1 +
 target/i386/kvm/kvm.c | 8 ++++++++
 3 files changed, 10 insertions(+)

Comments

Xiaoyao Li March 4, 2024, 1:54 a.m. UTC | #1
On 3/1/2024 6:17 PM, Gerd Hoffmann wrote:
> query kvm for supported guest physical address bits using
> KVM_CAP_VM_GPA_BITS.  Expose the value to the guest via cpuid
> (leaf 0x80000008, eax, bits 16-23).
> 
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>   target/i386/cpu.h     | 1 +
>   target/i386/cpu.c     | 1 +
>   target/i386/kvm/kvm.c | 8 ++++++++
>   3 files changed, 10 insertions(+)
> 
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 952174bb6f52..d427218827f6 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -2026,6 +2026,7 @@ struct ArchCPU {
>   
>       /* Number of physical address bits supported */
>       uint32_t phys_bits;
> +    uint32_t guest_phys_bits;
>   
>       /* in order to simplify APIC support, we leave this pointer to the
>          user */
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 2666ef380891..1a6cfc75951e 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -6570,6 +6570,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>           if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
>               /* 64 bit processor */
>                *eax |= (cpu_x86_virtual_addr_width(env) << 8);
> +             *eax |= (cpu->guest_phys_bits << 16);

I think you misunderstand this field.

If you expose this field to guest, it's the information for nested 
guest. i.e., the guest itself runs as a hypervisor will know its nested 
guest can have guest_phys_bits for physical addr.

>           }
>           *ebx = env->features[FEAT_8000_0008_EBX];
>           if (cs->nr_cores * cs->nr_threads > 1) {
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 42970ab046fa..e06c9d66bb01 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -1716,6 +1716,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
>       X86CPU *cpu = X86_CPU(cs);
>       CPUX86State *env = &cpu->env;
>       uint32_t limit, i, j, cpuid_i;
> +    uint32_t guest_phys_bits;
>       uint32_t unused;
>       struct kvm_cpuid_entry2 *c;
>       uint32_t signature[3];
> @@ -1751,6 +1752,13 @@ int kvm_arch_init_vcpu(CPUState *cs)
>   
>       env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY;
>   
> +    guest_phys_bits = kvm_check_extension(cs->kvm_state, KVM_CAP_VM_GPA_BITS);
> +    if (guest_phys_bits &&
> +        (cpu->guest_phys_bits == 0 ||
> +         cpu->guest_phys_bits > guest_phys_bits)) {
> +        cpu->guest_phys_bits = guest_phys_bits;
> +    }
> +
>       /*
>        * kvm_hyperv_expand_features() is called here for the second time in case
>        * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle
Tao Su March 4, 2024, 4:05 a.m. UTC | #2
On Mon, Mar 04, 2024 at 09:54:40AM +0800, Xiaoyao Li wrote:
> On 3/1/2024 6:17 PM, Gerd Hoffmann wrote:
> > query kvm for supported guest physical address bits using
> > KVM_CAP_VM_GPA_BITS.  Expose the value to the guest via cpuid
> > (leaf 0x80000008, eax, bits 16-23).
> > 
> > Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> > ---
> >   target/i386/cpu.h     | 1 +
> >   target/i386/cpu.c     | 1 +
> >   target/i386/kvm/kvm.c | 8 ++++++++
> >   3 files changed, 10 insertions(+)
> > 
> > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > index 952174bb6f52..d427218827f6 100644
> > --- a/target/i386/cpu.h
> > +++ b/target/i386/cpu.h
> > @@ -2026,6 +2026,7 @@ struct ArchCPU {
> >       /* Number of physical address bits supported */
> >       uint32_t phys_bits;
> > +    uint32_t guest_phys_bits;
> >       /* in order to simplify APIC support, we leave this pointer to the
> >          user */
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 2666ef380891..1a6cfc75951e 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -6570,6 +6570,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
> >           if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
> >               /* 64 bit processor */
> >                *eax |= (cpu_x86_virtual_addr_width(env) << 8);
> > +             *eax |= (cpu->guest_phys_bits << 16);
> 
> I think you misunderstand this field.
> 
> If you expose this field to guest, it's the information for nested guest.
> i.e., the guest itself runs as a hypervisor will know its nested guest can
> have guest_phys_bits for physical addr.

I'm also thinking about this issue...

Currently guest KVM doesn't use this field to advertise MAXPHYADDR because
nested guest hasn't tdp. And this patch only affects KVM hypervisor now.

Thanks,
Tao

> 
> >           }
> >           *ebx = env->features[FEAT_8000_0008_EBX];
> >           if (cs->nr_cores * cs->nr_threads > 1) {
> > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> > index 42970ab046fa..e06c9d66bb01 100644
> > --- a/target/i386/kvm/kvm.c
> > +++ b/target/i386/kvm/kvm.c
> > @@ -1716,6 +1716,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
> >       X86CPU *cpu = X86_CPU(cs);
> >       CPUX86State *env = &cpu->env;
> >       uint32_t limit, i, j, cpuid_i;
> > +    uint32_t guest_phys_bits;
> >       uint32_t unused;
> >       struct kvm_cpuid_entry2 *c;
> >       uint32_t signature[3];
> > @@ -1751,6 +1752,13 @@ int kvm_arch_init_vcpu(CPUState *cs)
> >       env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY;
> > +    guest_phys_bits = kvm_check_extension(cs->kvm_state, KVM_CAP_VM_GPA_BITS);
> > +    if (guest_phys_bits &&
> > +        (cpu->guest_phys_bits == 0 ||
> > +         cpu->guest_phys_bits > guest_phys_bits)) {
> > +        cpu->guest_phys_bits = guest_phys_bits;
> > +    }
> > +
> >       /*
> >        * kvm_hyperv_expand_features() is called here for the second time in case
> >        * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle
> 
>
Gerd Hoffmann March 4, 2024, 2:58 p.m. UTC | #3
On Mon, Mar 04, 2024 at 09:54:40AM +0800, Xiaoyao Li wrote:
> On 3/1/2024 6:17 PM, Gerd Hoffmann wrote:
> > query kvm for supported guest physical address bits using
> > KVM_CAP_VM_GPA_BITS.  Expose the value to the guest via cpuid
> > (leaf 0x80000008, eax, bits 16-23).
> > 
> > Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> > ---
> >   target/i386/cpu.h     | 1 +
> >   target/i386/cpu.c     | 1 +
> >   target/i386/kvm/kvm.c | 8 ++++++++
> >   3 files changed, 10 insertions(+)
> > 
> > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > index 952174bb6f52..d427218827f6 100644
> > --- a/target/i386/cpu.h
> > +++ b/target/i386/cpu.h
> > @@ -2026,6 +2026,7 @@ struct ArchCPU {
> >       /* Number of physical address bits supported */
> >       uint32_t phys_bits;
> > +    uint32_t guest_phys_bits;
> >       /* in order to simplify APIC support, we leave this pointer to the
> >          user */
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 2666ef380891..1a6cfc75951e 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -6570,6 +6570,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
> >           if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
> >               /* 64 bit processor */
> >                *eax |= (cpu_x86_virtual_addr_width(env) << 8);
> > +             *eax |= (cpu->guest_phys_bits << 16);
> 
> I think you misunderstand this field.
> 
> If you expose this field to guest, it's the information for nested guest.
> i.e., the guest itself runs as a hypervisor will know its nested guest can
> have guest_phys_bits for physical addr.

I think those limits (l1 + l2 guest phys-bits) are identical, no?

The problem this tries to solve is that making the guest phys-bits
smaller than the host phys-bits is problematic (which why we have
allow_smaller_maxphyaddr), but nevertheless there are cases where
the usable guest physical address space is smaller than the host
physical address space.  One case is intel processors with phys-bits
larger than 48 and 4-level EPT.  Another case is amd processors with
phys-bits larger than 48 and the l0 hypervisor using 4-level paging.

The guest needs to know that limit, specifically the guest firmware
so it knows where it can map PCI bars.

take care,
  Gerd
Xiaoyao Li March 5, 2024, 3:03 a.m. UTC | #4
On 3/4/2024 10:58 PM, Gerd Hoffmann wrote:
> On Mon, Mar 04, 2024 at 09:54:40AM +0800, Xiaoyao Li wrote:
>> On 3/1/2024 6:17 PM, Gerd Hoffmann wrote:
>>> query kvm for supported guest physical address bits using
>>> KVM_CAP_VM_GPA_BITS.  Expose the value to the guest via cpuid
>>> (leaf 0x80000008, eax, bits 16-23).
>>>
>>> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
>>> ---
>>>    target/i386/cpu.h     | 1 +
>>>    target/i386/cpu.c     | 1 +
>>>    target/i386/kvm/kvm.c | 8 ++++++++
>>>    3 files changed, 10 insertions(+)
>>>
>>> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
>>> index 952174bb6f52..d427218827f6 100644
>>> --- a/target/i386/cpu.h
>>> +++ b/target/i386/cpu.h
>>> @@ -2026,6 +2026,7 @@ struct ArchCPU {
>>>        /* Number of physical address bits supported */
>>>        uint32_t phys_bits;
>>> +    uint32_t guest_phys_bits;
>>>        /* in order to simplify APIC support, we leave this pointer to the
>>>           user */
>>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
>>> index 2666ef380891..1a6cfc75951e 100644
>>> --- a/target/i386/cpu.c
>>> +++ b/target/i386/cpu.c
>>> @@ -6570,6 +6570,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>>>            if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
>>>                /* 64 bit processor */
>>>                 *eax |= (cpu_x86_virtual_addr_width(env) << 8);
>>> +             *eax |= (cpu->guest_phys_bits << 16);
>>
>> I think you misunderstand this field.
>>
>> If you expose this field to guest, it's the information for nested guest.
>> i.e., the guest itself runs as a hypervisor will know its nested guest can
>> have guest_phys_bits for physical addr.
> 
> I think those limits (l1 + l2 guest phys-bits) are identical, no?

Sorry, I didn't know this patch was based on the off-list proposal made 
by Paolo that changing the definition of CPUID.0x80000008:EAX[23:16] to 
advertise the "maximum usable physical address bits".

If you call out this in the change log, it can avoid the misunderstanding.

As I replied to KVM series, I think the info is better to setup by KVM 
and reported by GET_SUPPORTED_CPUID.

> The problem this tries to solve is that making the guest phys-bits
> smaller than the host phys-bits is problematic (which why we have
> allow_smaller_maxphyaddr), but nevertheless there are cases where
> the usable guest physical address space is smaller than the host
> physical address space.  One case is intel processors with phys-bits
> larger than 48 and 4-level EPT.  Another case is amd processors with
> phys-bits larger than 48 and the l0 hypervisor using 4-level paging.
> 
> The guest needs to know that limit, specifically the guest firmware
> so it knows where it can map PCI bars.
> 
> take care,
>    Gerd
>
Paolo Bonzini March 6, 2024, 10:50 p.m. UTC | #5
On 3/4/24 02:54, Xiaoyao Li wrote:
> On 3/1/2024 6:17 PM, Gerd Hoffmann wrote:
>> query kvm for supported guest physical address bits using
>> KVM_CAP_VM_GPA_BITS.  Expose the value to the guest via cpuid
>> (leaf 0x80000008, eax, bits 16-23).
>>
>> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
>> ---
>>   target/i386/cpu.h     | 1 +
>>   target/i386/cpu.c     | 1 +
>>   target/i386/kvm/kvm.c | 8 ++++++++
>>   3 files changed, 10 insertions(+)
>>
>> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
>> index 952174bb6f52..d427218827f6 100644
>> --- a/target/i386/cpu.h
>> +++ b/target/i386/cpu.h
>> @@ -2026,6 +2026,7 @@ struct ArchCPU {
>>       /* Number of physical address bits supported */
>>       uint32_t phys_bits;
>> +    uint32_t guest_phys_bits;
>>       /* in order to simplify APIC support, we leave this pointer to the
>>          user */
>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
>> index 2666ef380891..1a6cfc75951e 100644
>> --- a/target/i386/cpu.c
>> +++ b/target/i386/cpu.c
>> @@ -6570,6 +6570,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t 
>> index, uint32_t count,
>>           if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
>>               /* 64 bit processor */
>>                *eax |= (cpu_x86_virtual_addr_width(env) << 8);
>> +             *eax |= (cpu->guest_phys_bits << 16);
> 
> I think you misunderstand this field.
> 
> If you expose this field to guest, it's the information for nested 
> guest. i.e., the guest itself runs as a hypervisor will know its nested 
> guest can have guest_phys_bits for physical addr.

It's one possible interpretation of AMD's definition. However there's no 
processor that has different MAXPHYADDR with/without nested paging, so 
there's no real benefit in adopting that interpretation.

The only architectural case in which you have two conflicting values for 
the guest MAXPHYADDR is hCR4.LA57=0 (and likewise for Intel 4-level EPT) 
with MAXPHYADDR=52, so it's useful to treat GuestPhysAddrSize as a way 
to communicate this situation to the guest.

Paolo
diff mbox series

Patch

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 952174bb6f52..d427218827f6 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2026,6 +2026,7 @@  struct ArchCPU {
 
     /* Number of physical address bits supported */
     uint32_t phys_bits;
+    uint32_t guest_phys_bits;
 
     /* in order to simplify APIC support, we leave this pointer to the
        user */
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 2666ef380891..1a6cfc75951e 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6570,6 +6570,7 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
             /* 64 bit processor */
              *eax |= (cpu_x86_virtual_addr_width(env) << 8);
+             *eax |= (cpu->guest_phys_bits << 16);
         }
         *ebx = env->features[FEAT_8000_0008_EBX];
         if (cs->nr_cores * cs->nr_threads > 1) {
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 42970ab046fa..e06c9d66bb01 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1716,6 +1716,7 @@  int kvm_arch_init_vcpu(CPUState *cs)
     X86CPU *cpu = X86_CPU(cs);
     CPUX86State *env = &cpu->env;
     uint32_t limit, i, j, cpuid_i;
+    uint32_t guest_phys_bits;
     uint32_t unused;
     struct kvm_cpuid_entry2 *c;
     uint32_t signature[3];
@@ -1751,6 +1752,13 @@  int kvm_arch_init_vcpu(CPUState *cs)
 
     env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY;
 
+    guest_phys_bits = kvm_check_extension(cs->kvm_state, KVM_CAP_VM_GPA_BITS);
+    if (guest_phys_bits &&
+        (cpu->guest_phys_bits == 0 ||
+         cpu->guest_phys_bits > guest_phys_bits)) {
+        cpu->guest_phys_bits = guest_phys_bits;
+    }
+
     /*
      * kvm_hyperv_expand_features() is called here for the second time in case
      * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle