diff mbox series

[v4,1/2] kvm: add support for guest physical bits

Message ID 20240318155336.156197-2-kraxel@redhat.com (mailing list archive)
State New, archived
Headers show
Series kvm: add support for guest physical bits | expand

Commit Message

Gerd Hoffmann March 18, 2024, 3:53 p.m. UTC
Query kvm for supported guest physical address bits, in cpuid
function 80000008, eax[23:16].  Usually this is identical to host
physical address bits.  With NPT or EPT being used this might be
restricted to 48 (max 4-level paging address space size) even if
the host cpu supports more physical address bits.

When set pass this to the guest, using cpuid too.  Guest firmware
can use this to figure how big the usable guest physical address
space is, so PCI bar mapping are actually reachable.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 target/i386/cpu.h         |  1 +
 target/i386/cpu.c         |  1 +
 target/i386/kvm/kvm-cpu.c | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 32 insertions(+), 1 deletion(-)

Comments

Xiaoyao Li March 20, 2024, 2:44 a.m. UTC | #1
On 3/18/2024 11:53 PM, Gerd Hoffmann wrote:
> Query kvm for supported guest physical address bits, in cpuid
> function 80000008, eax[23:16].  Usually this is identical to host
> physical address bits.  With NPT or EPT being used this might be
> restricted to 48 (max 4-level paging address space size) even if
> the host cpu supports more physical address bits.
> 
> When set pass this to the guest, using cpuid too.  Guest firmware
> can use this to figure how big the usable guest physical address
> space is, so PCI bar mapping are actually reachable.
> 
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>   target/i386/cpu.h         |  1 +
>   target/i386/cpu.c         |  1 +
>   target/i386/kvm/kvm-cpu.c | 31 ++++++++++++++++++++++++++++++-
>   3 files changed, 32 insertions(+), 1 deletion(-)
> 
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 952174bb6f52..d427218827f6 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -2026,6 +2026,7 @@ struct ArchCPU {
>   
>       /* Number of physical address bits supported */
>       uint32_t phys_bits;
> +    uint32_t guest_phys_bits;
>   
>       /* in order to simplify APIC support, we leave this pointer to the
>          user */
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 9a210d8d9290..c88c895a5b3e 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -6570,6 +6570,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>           if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
>               /* 64 bit processor */
>                *eax |= (cpu_x86_virtual_addr_width(env) << 8);
> +             *eax |= (cpu->guest_phys_bits << 16);
>           }
>           *ebx = env->features[FEAT_8000_0008_EBX];
>           if (cs->nr_cores * cs->nr_threads > 1) {
> diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
> index 9c791b7b0520..5132bb96abd5 100644
> --- a/target/i386/kvm/kvm-cpu.c
> +++ b/target/i386/kvm/kvm-cpu.c
> @@ -18,10 +18,33 @@
>   #include "kvm_i386.h"
>   #include "hw/core/accel-cpu.h"
>   
> +static void kvm_set_guest_phys_bits(CPUState *cs)
> +{
> +    X86CPU *cpu = X86_CPU(cs);
> +    uint32_t eax, guest_phys_bits;
> +
> +    eax = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x80000008, 0, R_EAX);
> +    guest_phys_bits = (eax >> 16) & 0xff;
> +    if (!guest_phys_bits) {
> +        return;
> +    }
> +
> +    if (cpu->guest_phys_bits == 0 ||
> +        cpu->guest_phys_bits > guest_phys_bits) {
> +        cpu->guest_phys_bits = guest_phys_bits;
> +    }
> +
> +    if (cpu->host_phys_bits_limit &&
> +        cpu->guest_phys_bits > cpu->host_phys_bits_limit) {
> +        cpu->guest_phys_bits = cpu->host_phys_bits_limit;

host_phys_bits_limit takes effect only when cpu->host_phys_bits is set.

If users pass configuration like "-cpu 
qemu64,phys-bits=52,host-phys-bits-limit=45", the cpu->guest_phys_bits 
will be set to 45. I think this is not what we want, though the usage 
seems insane.

We can guard it as

  if (cpu->host_phys_bits && cpu->host_phys_bits_limit &&
      cpu->guest_phys_bits > cpu->host_phys_bits_limt)
{
}

Simpler, we can guard with cpu->phys_bits like below, because 
cpu->host_phys_bits_limit is used to guard cpu->phys_bits in 
host_cpu_realizefn()

  if (cpu->guest_phys_bits > cpu->phys_bits) {
	cpu->guest_phys_bits = cpu->phys_bits;
}


with this resolved,

Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>

> +    }
> +}
> +
>   static bool kvm_cpu_realizefn(CPUState *cs, Error **errp)
>   {
>       X86CPU *cpu = X86_CPU(cs);
>       CPUX86State *env = &cpu->env;
> +    bool ret;
>   
>       /*
>        * The realize order is important, since x86_cpu_realize() checks if
> @@ -50,7 +73,13 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp)
>                                                      MSR_IA32_UCODE_REV);
>           }
>       }
> -    return host_cpu_realizefn(cs, errp);
> +    ret = host_cpu_realizefn(cs, errp);
> +    if (!ret) {
> +        return ret;
> +    }
> +
> +    kvm_set_guest_phys_bits(cs);
> +    return true;
>   }
>   
>   static bool lmce_supported(void)
Gerd Hoffmann March 22, 2024, 12:50 p.m. UTC | #2
> > +    if (cpu->host_phys_bits_limit &&
> > +        cpu->guest_phys_bits > cpu->host_phys_bits_limit) {
> > +        cpu->guest_phys_bits = cpu->host_phys_bits_limit;
> 
> host_phys_bits_limit takes effect only when cpu->host_phys_bits is set.
> 
> If users pass configuration like "-cpu
> qemu64,phys-bits=52,host-phys-bits-limit=45", the cpu->guest_phys_bits will
> be set to 45. I think this is not what we want, though the usage seems
> insane.
> 
> We can guard it as
> 
>  if (cpu->host_phys_bits && cpu->host_phys_bits_limit &&
>      cpu->guest_phys_bits > cpu->host_phys_bits_limt)
> {
> }

Yes, makes sense.

> Simpler, we can guard with cpu->phys_bits like below, because
> cpu->host_phys_bits_limit is used to guard cpu->phys_bits in
> host_cpu_realizefn()
> 
>  if (cpu->guest_phys_bits > cpu->phys_bits) {
> 	cpu->guest_phys_bits = cpu->phys_bits;
> }

I think I prefer the first version.  The logic is already difficult
enough to follow because it is spread across a bunch of files due to
the different cases we have to handle (tcg, kvm-with-host_phys_bits,
kvm-without-host_phys_bits).

It's not in any way performance-critical, so I happily trade some extra
checks for code which is easier to read.

take care,
  Gerd
Paolo Bonzini March 27, 2024, 8:53 a.m. UTC | #3
On Wed, Mar 20, 2024 at 3:45 AM Xiaoyao Li <xiaoyao.li@intel.com> wrote:
> If users pass configuration like "-cpu
> qemu64,phys-bits=52,host-phys-bits-limit=45", the cpu->guest_phys_bits
> will be set to 45. I think this is not what we want, though the usage
> seems insane.
>
> We can guard it as
>
>   if (cpu->host_phys_bits && cpu->host_phys_bits_limit &&
>       cpu->guest_phys_bits > cpu->host_phys_bits_limt)
> {
> }

> Simpler, we can guard with cpu->phys_bits like below, because
> cpu->host_phys_bits_limit is used to guard cpu->phys_bits in
> host_cpu_realizefn()
>
>   if (cpu->guest_phys_bits > cpu->phys_bits) {
>         cpu->guest_phys_bits = cpu->phys_bits;
> }
>
>
> with this resolved,
>
> Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>

[oops sorry - I noticed now that this email was never sent, so I am
sending it for archival]

There are more issues:

1) for compatibility with older machine types, the GuestPhysAddrSize
should remain 0. One possibility is to have "-1" as "accelerator
default" and "0" as "show it as zero in CPUID".

2) a "guest-phys-bits is not user-configurable in 32 bit" error is
probably a good idea just like it does for cpu->phys_bits

3) I think the order of the patches makes more sense if the property
is added first and KVM is adjusted second.

I'll post a v5 myself (mostly because it has to include the creation
of 9.1 machine types).

Paolo

> > +    }
> > +}
> > +
> >   static bool kvm_cpu_realizefn(CPUState *cs, Error **errp)
> >   {
> >       X86CPU *cpu = X86_CPU(cs);
> >       CPUX86State *env = &cpu->env;
> > +    bool ret;
> >
> >       /*
> >        * The realize order is important, since x86_cpu_realize() checks if
> > @@ -50,7 +73,13 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp)
> >                                                      MSR_IA32_UCODE_REV);
> >           }
> >       }
> > -    return host_cpu_realizefn(cs, errp);
> > +    ret = host_cpu_realizefn(cs, errp);
> > +    if (!ret) {
> > +        return ret;
> > +    }
> > +
> > +    kvm_set_guest_phys_bits(cs);
> > +    return true;
> >   }
> >
> >   static bool lmce_supported(void)
>Ther
diff mbox series

Patch

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 952174bb6f52..d427218827f6 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2026,6 +2026,7 @@  struct ArchCPU {
 
     /* Number of physical address bits supported */
     uint32_t phys_bits;
+    uint32_t guest_phys_bits;
 
     /* in order to simplify APIC support, we leave this pointer to the
        user */
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 9a210d8d9290..c88c895a5b3e 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6570,6 +6570,7 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
             /* 64 bit processor */
              *eax |= (cpu_x86_virtual_addr_width(env) << 8);
+             *eax |= (cpu->guest_phys_bits << 16);
         }
         *ebx = env->features[FEAT_8000_0008_EBX];
         if (cs->nr_cores * cs->nr_threads > 1) {
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
index 9c791b7b0520..5132bb96abd5 100644
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -18,10 +18,33 @@ 
 #include "kvm_i386.h"
 #include "hw/core/accel-cpu.h"
 
+static void kvm_set_guest_phys_bits(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    uint32_t eax, guest_phys_bits;
+
+    eax = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x80000008, 0, R_EAX);
+    guest_phys_bits = (eax >> 16) & 0xff;
+    if (!guest_phys_bits) {
+        return;
+    }
+
+    if (cpu->guest_phys_bits == 0 ||
+        cpu->guest_phys_bits > guest_phys_bits) {
+        cpu->guest_phys_bits = guest_phys_bits;
+    }
+
+    if (cpu->host_phys_bits_limit &&
+        cpu->guest_phys_bits > cpu->host_phys_bits_limit) {
+        cpu->guest_phys_bits = cpu->host_phys_bits_limit;
+    }
+}
+
 static bool kvm_cpu_realizefn(CPUState *cs, Error **errp)
 {
     X86CPU *cpu = X86_CPU(cs);
     CPUX86State *env = &cpu->env;
+    bool ret;
 
     /*
      * The realize order is important, since x86_cpu_realize() checks if
@@ -50,7 +73,13 @@  static bool kvm_cpu_realizefn(CPUState *cs, Error **errp)
                                                    MSR_IA32_UCODE_REV);
         }
     }
-    return host_cpu_realizefn(cs, errp);
+    ret = host_cpu_realizefn(cs, errp);
+    if (!ret) {
+        return ret;
+    }
+
+    kvm_set_guest_phys_bits(cs);
+    return true;
 }
 
 static bool lmce_supported(void)