diff mbox series

[v2,4/5] hw/arm/virt: Use the PA range to compute the memory map

Message ID 20211003164605.3116450-5-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series target/arm: Reduced-IPA space and highmem=off fixes | expand

Commit Message

Marc Zyngier Oct. 3, 2021, 4:46 p.m. UTC
The highmem attribute is nothing but another way to express the
PA range of a VM. To support HW that has a smaller PA range then
what QEMU assumes, pass this PA range to the virt_set_memmap()
function, allowing it to correctly exclude highmem devices
if they are outside of the PA range.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 hw/arm/virt.c | 46 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 11 deletions(-)

Comments

Andrew Jones Oct. 4, 2021, 10:11 a.m. UTC | #1
On Sun, Oct 03, 2021 at 05:46:04PM +0100, Marc Zyngier wrote:
> The highmem attribute is nothing but another way to express the
> PA range of a VM. To support HW that has a smaller PA range then
> what QEMU assumes, pass this PA range to the virt_set_memmap()
> function, allowing it to correctly exclude highmem devices
> if they are outside of the PA range.
> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>  hw/arm/virt.c | 46 +++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 35 insertions(+), 11 deletions(-)
> 
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 9d2abdbd5f..a572e0c9d9 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -1610,10 +1610,10 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
>      return arm_cpu_mp_affinity(idx, clustersz);
>  }
>  
> -static void virt_set_memmap(VirtMachineState *vms)
> +static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
>  {
>      MachineState *ms = MACHINE(vms);
> -    hwaddr base, device_memory_base, device_memory_size;
> +    hwaddr base, device_memory_base, device_memory_size, memtop;
>      int i;
>  
>      vms->memmap = extended_memmap;
> @@ -1628,9 +1628,12 @@ static void virt_set_memmap(VirtMachineState *vms)
>          exit(EXIT_FAILURE);
>      }
>  
> -    if (!vms->highmem &&
> -        vms->memmap[VIRT_MEM].base + ms->maxram_size > 4 * GiB) {
> -        error_report("highmem=off, but memory crosses the 4GiB limit\n");
> +    if (!vms->highmem)
> +	    pa_bits = 32;
> +
> +    if (vms->memmap[VIRT_MEM].base + ms->maxram_size > BIT_ULL(pa_bits)) {
> +	    error_report("Addressing limited to %d bits, but memory exceeds it by %llu bytes\n",
> +			 pa_bits, vms->memmap[VIRT_MEM].base + ms->maxram_size - BIT_ULL(pa_bits));
>          exit(EXIT_FAILURE);
>      }
>      /*
> @@ -1645,7 +1648,7 @@ static void virt_set_memmap(VirtMachineState *vms)
>      device_memory_size = ms->maxram_size - ms->ram_size + ms->ram_slots * GiB;
>  
>      /* Base address of the high IO region */
> -    base = device_memory_base + ROUND_UP(device_memory_size, GiB);
> +    memtop = base = device_memory_base + ROUND_UP(device_memory_size, GiB);
>      if (base < device_memory_base) {
>          error_report("maxmem/slots too huge");
>          exit(EXIT_FAILURE);
> @@ -1662,9 +1665,17 @@ static void virt_set_memmap(VirtMachineState *vms)
>          vms->memmap[i].size = size;
>          base += size;
>      }
> -    vms->highest_gpa = (vms->highmem ?
> -                        base :
> -                        vms->memmap[VIRT_MEM].base + ms->maxram_size) - 1;
> +
> +    /*
> +     * If base fits within pa_bits, all good. If it doesn't, limit it
> +     * to the end of RAM, which is guaranteed to fit within pa_bits.

We tested that

  vms->memmap[VIRT_MEM].base + ms->maxram_size

fits within pa_bits, but here we're setting highest_gpa to

  ROUND_UP(vms->memmap[VIRT_MEM].base + ms->ram_size, GiB) +
  ROUND_UP(ms->maxram_size - ms->ram_size + ms->ram_slots * GiB, GiB)

which will be larger. Shouldn't we test memtop instead to make this
guarantee?


> +     */
> +    if (base <= BIT_ULL(pa_bits)) {
> +        vms->highest_gpa = base -1;
> +    } else {
> +        vms->highest_gpa = memtop - 1;
> +    }
> +
>      if (device_memory_size > 0) {
>          ms->device_memory = g_malloc0(sizeof(*ms->device_memory));
>          ms->device_memory->base = device_memory_base;
> @@ -1860,7 +1871,20 @@ static void machvirt_init(MachineState *machine)
>       * to create a VM with the right number of IPA bits.
>       */
>      if (!vms->memmap) {
> -        virt_set_memmap(vms);
> +        ARMCPU *armcpu = ARM_CPU(first_cpu);


I think it's too early to use first_cpu here (although, I'll admit I'm
always confused as to what gets initialized when...) Assuming we need to
realize the cpus first, then we don't do that until a bit further down
in this function. I wonder if it's possible to delay this memmap setup
until after cpu realization. I see the memmap getting used prior when
calculating virt_max_cpus, but that looks like it needs to be updated
anyway to take highmem into account as to whether or not we should
consider the high gicv3 redist region in the calculation.

> +        int pa_bits;
> +
> +        if (object_property_get_bool(OBJECT(first_cpu), "aarch64", NULL)) {
> +            pa_bits = arm_pamax(armcpu);
> +        } else if (arm_feature(&armcpu->env, ARM_FEATURE_LPAE)) {
> +            /* v7 with LPAE */
> +            pa_bits = 40;
> +        } else {
> +            /* Anything else */
> +            pa_bits = 32;
> +        }
> +
> +        virt_set_memmap(vms, pa_bits);
>      }
>  
>      /* We can probe only here because during property set
> @@ -2596,7 +2620,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str)
>      max_vm_pa_size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa);
>  
>      /* we freeze the memory map to compute the highest gpa */
> -    virt_set_memmap(vms);
> +    virt_set_memmap(vms, max_vm_pa_size);
>  
>      requested_pa_size = 64 - clz64(vms->highest_gpa);
>  
> -- 
> 2.30.2
> 

Thanks,
drew
Andrew Jones Oct. 4, 2021, 10:15 a.m. UTC | #2
On Sun, Oct 03, 2021 at 05:46:04PM +0100, Marc Zyngier wrote:
...
> @@ -1662,9 +1665,17 @@ static void virt_set_memmap(VirtMachineState *vms)
>          vms->memmap[i].size = size;
>          base += size;
>      }
> -    vms->highest_gpa = (vms->highmem ?
> -                        base :
> -                        vms->memmap[VIRT_MEM].base + ms->maxram_size) - 1;
> +
> +    /*
> +     * If base fits within pa_bits, all good. If it doesn't, limit it
> +     * to the end of RAM, which is guaranteed to fit within pa_bits.
> +     */
> +    if (base <= BIT_ULL(pa_bits)) {
> +        vms->highest_gpa = base -1;
                                    ^ missing space here

> +    } else {
> +        vms->highest_gpa = memtop - 1;
> +    }
> +
>      if (device_memory_size > 0) {
>          ms->device_memory = g_malloc0(sizeof(*ms->device_memory));
>          ms->device_memory->base = device_memory_base;

Thanks,
drew
Marc Zyngier Dec. 27, 2021, 8:13 p.m. UTC | #3
On Mon, 04 Oct 2021 11:11:10 +0100,
Andrew Jones <drjones@redhat.com> wrote:
> 
> On Sun, Oct 03, 2021 at 05:46:04PM +0100, Marc Zyngier wrote:
> > The highmem attribute is nothing but another way to express the
> > PA range of a VM. To support HW that has a smaller PA range then
> > what QEMU assumes, pass this PA range to the virt_set_memmap()
> > function, allowing it to correctly exclude highmem devices
> > if they are outside of the PA range.
> > 
> > Signed-off-by: Marc Zyngier <maz@kernel.org>
> > ---
> >  hw/arm/virt.c | 46 +++++++++++++++++++++++++++++++++++-----------
> >  1 file changed, 35 insertions(+), 11 deletions(-)
> > 
> > diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> > index 9d2abdbd5f..a572e0c9d9 100644
> > --- a/hw/arm/virt.c
> > +++ b/hw/arm/virt.c
> > @@ -1610,10 +1610,10 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
> >      return arm_cpu_mp_affinity(idx, clustersz);
> >  }
> >  
> > -static void virt_set_memmap(VirtMachineState *vms)
> > +static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
> >  {
> >      MachineState *ms = MACHINE(vms);
> > -    hwaddr base, device_memory_base, device_memory_size;
> > +    hwaddr base, device_memory_base, device_memory_size, memtop;
> >      int i;
> >  
> >      vms->memmap = extended_memmap;
> > @@ -1628,9 +1628,12 @@ static void virt_set_memmap(VirtMachineState *vms)
> >          exit(EXIT_FAILURE);
> >      }
> >  
> > -    if (!vms->highmem &&
> > -        vms->memmap[VIRT_MEM].base + ms->maxram_size > 4 * GiB) {
> > -        error_report("highmem=off, but memory crosses the 4GiB limit\n");
> > +    if (!vms->highmem)
> > +	    pa_bits = 32;
> > +
> > +    if (vms->memmap[VIRT_MEM].base + ms->maxram_size > BIT_ULL(pa_bits)) {
> > +	    error_report("Addressing limited to %d bits, but memory exceeds it by %llu bytes\n",
> > +			 pa_bits, vms->memmap[VIRT_MEM].base + ms->maxram_size - BIT_ULL(pa_bits));
> >          exit(EXIT_FAILURE);
> >      }
> >      /*
> > @@ -1645,7 +1648,7 @@ static void virt_set_memmap(VirtMachineState *vms)
> >      device_memory_size = ms->maxram_size - ms->ram_size + ms->ram_slots * GiB;
> >  
> >      /* Base address of the high IO region */
> > -    base = device_memory_base + ROUND_UP(device_memory_size, GiB);
> > +    memtop = base = device_memory_base + ROUND_UP(device_memory_size, GiB);
> >      if (base < device_memory_base) {
> >          error_report("maxmem/slots too huge");
> >          exit(EXIT_FAILURE);
> > @@ -1662,9 +1665,17 @@ static void virt_set_memmap(VirtMachineState *vms)
> >          vms->memmap[i].size = size;
> >          base += size;
> >      }
> > -    vms->highest_gpa = (vms->highmem ?
> > -                        base :
> > -                        vms->memmap[VIRT_MEM].base + ms->maxram_size) - 1;
> > +
> > +    /*
> > +     * If base fits within pa_bits, all good. If it doesn't, limit it
> > +     * to the end of RAM, which is guaranteed to fit within pa_bits.
> 
> We tested that
> 
>   vms->memmap[VIRT_MEM].base + ms->maxram_size
> 
> fits within pa_bits, but here we're setting highest_gpa to
> 
>   ROUND_UP(vms->memmap[VIRT_MEM].base + ms->ram_size, GiB) +
>   ROUND_UP(ms->maxram_size - ms->ram_size + ms->ram_slots * GiB, GiB)
> 
> which will be larger. Shouldn't we test memtop instead to make this
> guarantee?

Yes, well spotted.

> 
> 
> > +     */
> > +    if (base <= BIT_ULL(pa_bits)) {
> > +        vms->highest_gpa = base -1;
> > +    } else {
> > +        vms->highest_gpa = memtop - 1;
> > +    }
> > +
> >      if (device_memory_size > 0) {
> >          ms->device_memory = g_malloc0(sizeof(*ms->device_memory));
> >          ms->device_memory->base = device_memory_base;
> > @@ -1860,7 +1871,20 @@ static void machvirt_init(MachineState *machine)
> >       * to create a VM with the right number of IPA bits.
> >       */
> >      if (!vms->memmap) {
> > -        virt_set_memmap(vms);
> > +        ARMCPU *armcpu = ARM_CPU(first_cpu);
> 
> 
> I think it's too early to use first_cpu here (although, I'll admit I'm
> always confused as to what gets initialized when...) Assuming we need to
> realize the cpus first, then we don't do that until a bit further down
> in this function. I wonder if it's possible to delay this memmap setup
> until after cpu realization. I see the memmap getting used prior when
> calculating virt_max_cpus, but that looks like it needs to be updated
> anyway to take highmem into account as to whether or not we should
> consider the high gicv3 redist region in the calculation.

OK, this is nothing short of total hell. You can't create the memory
map later, as MTE and the secure world both get in the way (they
really want a valid memory map). And as you pointed out, using
first_cpu is not appropriate here (obviously, I didn't test this
nearly enough). I could split the creation of the CPUs in two
sequences with the memory map creation in between, but this quickly
becomes quite invasive.

My current approach is to keep the current flow, but to create a
temporary CPU, find whatever I need to know about it, and free
it. Yes, this is a bit overkill, but it solves the chicken and egg
issue simply enough.

Thanks,

	M.
diff mbox series

Patch

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 9d2abdbd5f..a572e0c9d9 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1610,10 +1610,10 @@  static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
     return arm_cpu_mp_affinity(idx, clustersz);
 }
 
-static void virt_set_memmap(VirtMachineState *vms)
+static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
 {
     MachineState *ms = MACHINE(vms);
-    hwaddr base, device_memory_base, device_memory_size;
+    hwaddr base, device_memory_base, device_memory_size, memtop;
     int i;
 
     vms->memmap = extended_memmap;
@@ -1628,9 +1628,12 @@  static void virt_set_memmap(VirtMachineState *vms)
         exit(EXIT_FAILURE);
     }
 
-    if (!vms->highmem &&
-        vms->memmap[VIRT_MEM].base + ms->maxram_size > 4 * GiB) {
-        error_report("highmem=off, but memory crosses the 4GiB limit\n");
+    if (!vms->highmem)
+	    pa_bits = 32;
+
+    if (vms->memmap[VIRT_MEM].base + ms->maxram_size > BIT_ULL(pa_bits)) {
+	    error_report("Addressing limited to %d bits, but memory exceeds it by %llu bytes\n",
+			 pa_bits, vms->memmap[VIRT_MEM].base + ms->maxram_size - BIT_ULL(pa_bits));
         exit(EXIT_FAILURE);
     }
     /*
@@ -1645,7 +1648,7 @@  static void virt_set_memmap(VirtMachineState *vms)
     device_memory_size = ms->maxram_size - ms->ram_size + ms->ram_slots * GiB;
 
     /* Base address of the high IO region */
-    base = device_memory_base + ROUND_UP(device_memory_size, GiB);
+    memtop = base = device_memory_base + ROUND_UP(device_memory_size, GiB);
     if (base < device_memory_base) {
         error_report("maxmem/slots too huge");
         exit(EXIT_FAILURE);
@@ -1662,9 +1665,17 @@  static void virt_set_memmap(VirtMachineState *vms)
         vms->memmap[i].size = size;
         base += size;
     }
-    vms->highest_gpa = (vms->highmem ?
-                        base :
-                        vms->memmap[VIRT_MEM].base + ms->maxram_size) - 1;
+
+    /*
+     * If base fits within pa_bits, all good. If it doesn't, limit it
+     * to the end of RAM, which is guaranteed to fit within pa_bits.
+     */
+    if (base <= BIT_ULL(pa_bits)) {
+        vms->highest_gpa = base -1;
+    } else {
+        vms->highest_gpa = memtop - 1;
+    }
+
     if (device_memory_size > 0) {
         ms->device_memory = g_malloc0(sizeof(*ms->device_memory));
         ms->device_memory->base = device_memory_base;
@@ -1860,7 +1871,20 @@  static void machvirt_init(MachineState *machine)
      * to create a VM with the right number of IPA bits.
      */
     if (!vms->memmap) {
-        virt_set_memmap(vms);
+        ARMCPU *armcpu = ARM_CPU(first_cpu);
+        int pa_bits;
+
+        if (object_property_get_bool(OBJECT(first_cpu), "aarch64", NULL)) {
+            pa_bits = arm_pamax(armcpu);
+        } else if (arm_feature(&armcpu->env, ARM_FEATURE_LPAE)) {
+            /* v7 with LPAE */
+            pa_bits = 40;
+        } else {
+            /* Anything else */
+            pa_bits = 32;
+        }
+
+        virt_set_memmap(vms, pa_bits);
     }
 
     /* We can probe only here because during property set
@@ -2596,7 +2620,7 @@  static int virt_kvm_type(MachineState *ms, const char *type_str)
     max_vm_pa_size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa);
 
     /* we freeze the memory map to compute the highest gpa */
-    virt_set_memmap(vms);
+    virt_set_memmap(vms, max_vm_pa_size);
 
     requested_pa_size = 64 - clz64(vms->highest_gpa);