diff mbox series

hw/arm/virt: Validate memory size on the first NUMA node

Message ID 20220228075203.60064-1-gshan@redhat.com (mailing list archive)
State New, archived
Headers show
Series hw/arm/virt: Validate memory size on the first NUMA node | expand

Commit Message

Gavin Shan Feb. 28, 2022, 7:52 a.m. UTC
When the memory size on the first NUMA node is less than 128MB, the
guest hangs inside EDK2 as the following logs show.

  /home/gavin/sandbox/qemu.main/build/qemu-system-aarch64         \
  -accel kvm -machine virt,gic-version=host                       \
  -cpu host -smp 8,sockets=2,cores=2,threads=2                    \
  -m 1024M,slots=16,maxmem=64G                                    \
  -object memory-backend-ram,id=mem0,size=127M                    \
  -object memory-backend-ram,id=mem1,size=897M                    \
  -numa node,nodeid=0,memdev=mem0                                 \
  -numa node,nodeid=1,memdev=mem1                                 \
  -L /home/gavin/sandbox/qemu.main/build/pc-bios                  \
   :
  QemuVirtMemInfoPeiLibConstructor: System RAM @ 0x47F00000 - 0x7FFFFFFF
  QemuVirtMemInfoPeiLibConstructor: System RAM @ 0x40000000 - 0x47EFFFFF
  ASSERT [MemoryInit] /home/lacos/src/upstream/qemu/roms/edk2/ArmVirtPkg/Library/QemuVirtMemInfoLib/QemuVirtMemInfoPeiLibConstructor.c(93): NewSize >= 0x08000000

This adds MachineClass::validate_numa_nodes() to validate the memory
size on the first NUMA node. The guest is stopped from booting and
the reason is given for this specific case.

Signed-off-by: Gavin Shan <gshan@redhat.com>
---
 hw/arm/virt.c       | 9 +++++++++
 hw/core/numa.c      | 5 +++++
 include/hw/boards.h | 1 +
 3 files changed, 15 insertions(+)

Comments

Igor Mammedov Feb. 28, 2022, 9:08 a.m. UTC | #1
On Mon, 28 Feb 2022 15:52:03 +0800
Gavin Shan <gshan@redhat.com> wrote:

> When the memory size on the first NUMA node is less than 128MB, the
> guest hangs inside EDK2 as the following logs show.
> 
>   /home/gavin/sandbox/qemu.main/build/qemu-system-aarch64         \
>   -accel kvm -machine virt,gic-version=host                       \
>   -cpu host -smp 8,sockets=2,cores=2,threads=2                    \
>   -m 1024M,slots=16,maxmem=64G                                    \
>   -object memory-backend-ram,id=mem0,size=127M                    \
>   -object memory-backend-ram,id=mem1,size=897M                    \
>   -numa node,nodeid=0,memdev=mem0                                 \
>   -numa node,nodeid=1,memdev=mem1                                 \
>   -L /home/gavin/sandbox/qemu.main/build/pc-bios                  \
>    :
>   QemuVirtMemInfoPeiLibConstructor: System RAM @ 0x47F00000 - 0x7FFFFFFF
>   QemuVirtMemInfoPeiLibConstructor: System RAM @ 0x40000000 - 0x47EFFFFF
>   ASSERT [MemoryInit] /home/lacos/src/upstream/qemu/roms/edk2/ArmVirtPkg/Library/QemuVirtMemInfoLib/QemuVirtMemInfoPeiLibConstructor.c(93): NewSize >= 0x08000000
> 
> This adds MachineClass::validate_numa_nodes() to validate the memory
> size on the first NUMA node. The guest is stopped from booting and
> the reason is given for this specific case.

Unless it architecturally wrong thing i.e. (node size less than 128Mb)
,in which case limiting it in QEMU would be justified, I'd prefer
firmware being fixed or it reporting more useful for user error message.
 
> Signed-off-by: Gavin Shan <gshan@redhat.com>
> ---
>  hw/arm/virt.c       | 9 +++++++++
>  hw/core/numa.c      | 5 +++++
>  include/hw/boards.h | 1 +
>  3 files changed, 15 insertions(+)
> 
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 46bf7ceddf..234e7fca28 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -2491,6 +2491,14 @@ static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
>      return idx % ms->numa_state->num_nodes;
>  }
>  
> +static void virt_validate_numa_nodes(MachineState *ms)
> +{
> +    if (ms->numa_state->nodes[0].node_mem < 128 * MiB) {
> +        error_report("The first NUMA node should have at least 128MB memory");
> +        exit(1);

perhaps error_fatal() would be better

> +    }
> +}
> +
>  static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
>  {
>      int n;
> @@ -2836,6 +2844,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
>      mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
>      mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
>      mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
> +    mc->validate_numa_nodes = virt_validate_numa_nodes;
>      mc->kvm_type = virt_kvm_type;
>      assert(!mc->get_hotplug_handler);
>      mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
> diff --git a/hw/core/numa.c b/hw/core/numa.c
> index 1aa05dcf42..543a2eaf11 100644
> --- a/hw/core/numa.c
> +++ b/hw/core/numa.c
> @@ -724,6 +724,11 @@ void numa_complete_configuration(MachineState *ms)
>              /* Validation succeeded, now fill in any missing distances. */
>              complete_init_numa_distance(ms);
>          }
> +
> +        /* Validate NUMA nodes for the individual machine */
> +        if (mc->validate_numa_nodes) {
> +            mc->validate_numa_nodes(ms);
> +        }
>      }
>  }
>  
> diff --git a/include/hw/boards.h b/include/hw/boards.h
> index c92ac8815c..9709a35eeb 100644
> --- a/include/hw/boards.h
> +++ b/include/hw/boards.h
> @@ -282,6 +282,7 @@ struct MachineClass {
>                                                           unsigned cpu_index);
>      const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
>      int64_t (*get_default_cpu_node_id)(const MachineState *ms, int idx);
> +    void (*validate_numa_nodes)(MachineState *ms);
>      ram_addr_t (*fixup_ram_size)(ram_addr_t size);
>  };
>
Gavin Shan March 1, 2022, 9:20 a.m. UTC | #2
Hi Igor,

On 2/28/22 5:08 PM, Igor Mammedov wrote:
> On Mon, 28 Feb 2022 15:52:03 +0800
> Gavin Shan <gshan@redhat.com> wrote:
> 
>> When the memory size on the first NUMA node is less than 128MB, the
>> guest hangs inside EDK2 as the following logs show.
>>
>>    /home/gavin/sandbox/qemu.main/build/qemu-system-aarch64         \
>>    -accel kvm -machine virt,gic-version=host                       \
>>    -cpu host -smp 8,sockets=2,cores=2,threads=2                    \
>>    -m 1024M,slots=16,maxmem=64G                                    \
>>    -object memory-backend-ram,id=mem0,size=127M                    \
>>    -object memory-backend-ram,id=mem1,size=897M                    \
>>    -numa node,nodeid=0,memdev=mem0                                 \
>>    -numa node,nodeid=1,memdev=mem1                                 \
>>    -L /home/gavin/sandbox/qemu.main/build/pc-bios                  \
>>     :
>>    QemuVirtMemInfoPeiLibConstructor: System RAM @ 0x47F00000 - 0x7FFFFFFF
>>    QemuVirtMemInfoPeiLibConstructor: System RAM @ 0x40000000 - 0x47EFFFFF
>>    ASSERT [MemoryInit] /home/lacos/src/upstream/qemu/roms/edk2/ArmVirtPkg/Library/QemuVirtMemInfoLib/QemuVirtMemInfoPeiLibConstructor.c(93): NewSize >= 0x08000000
>>
>> This adds MachineClass::validate_numa_nodes() to validate the memory
>> size on the first NUMA node. The guest is stopped from booting and
>> the reason is given for this specific case.
> 
> Unless it architecturally wrong thing i.e. (node size less than 128Mb)
> ,in which case limiting it in QEMU would be justified, I'd prefer
> firmware being fixed or it reporting more useful for user error message.
>  

[include EDK2 developers]

I don't think 128MB node memory size is architecturally required.
I also thought EDK2 would be better place to provide a precise error
mesage and discussed it through with EDK2 developers. Lets see what
are their thoughts this time.
  
>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>> ---
>>   hw/arm/virt.c       | 9 +++++++++
>>   hw/core/numa.c      | 5 +++++
>>   include/hw/boards.h | 1 +
>>   3 files changed, 15 insertions(+)
>>
>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>> index 46bf7ceddf..234e7fca28 100644
>> --- a/hw/arm/virt.c
>> +++ b/hw/arm/virt.c
>> @@ -2491,6 +2491,14 @@ static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
>>       return idx % ms->numa_state->num_nodes;
>>   }
>>   
>> +static void virt_validate_numa_nodes(MachineState *ms)
>> +{
>> +    if (ms->numa_state->nodes[0].node_mem < 128 * MiB) {
>> +        error_report("The first NUMA node should have at least 128MB memory");
>> +        exit(1);
> 
> perhaps error_fatal() would be better
> 

Yes, I think so :)

>> +    }
>> +}
>> +
>>   static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
>>   {
>>       int n;
>> @@ -2836,6 +2844,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
>>       mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
>>       mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
>>       mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
>> +    mc->validate_numa_nodes = virt_validate_numa_nodes;
>>       mc->kvm_type = virt_kvm_type;
>>       assert(!mc->get_hotplug_handler);
>>       mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
>> diff --git a/hw/core/numa.c b/hw/core/numa.c
>> index 1aa05dcf42..543a2eaf11 100644
>> --- a/hw/core/numa.c
>> +++ b/hw/core/numa.c
>> @@ -724,6 +724,11 @@ void numa_complete_configuration(MachineState *ms)
>>               /* Validation succeeded, now fill in any missing distances. */
>>               complete_init_numa_distance(ms);
>>           }
>> +
>> +        /* Validate NUMA nodes for the individual machine */
>> +        if (mc->validate_numa_nodes) {
>> +            mc->validate_numa_nodes(ms);
>> +        }
>>       }
>>   }
>>   
>> diff --git a/include/hw/boards.h b/include/hw/boards.h
>> index c92ac8815c..9709a35eeb 100644
>> --- a/include/hw/boards.h
>> +++ b/include/hw/boards.h
>> @@ -282,6 +282,7 @@ struct MachineClass {
>>                                                            unsigned cpu_index);
>>       const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
>>       int64_t (*get_default_cpu_node_id)(const MachineState *ms, int idx);
>> +    void (*validate_numa_nodes)(MachineState *ms);
>>       ram_addr_t (*fixup_ram_size)(ram_addr_t size);
>>   };
>>   

Thanks,
Gavin
Gerd Hoffmann March 1, 2022, 11:42 a.m. UTC | #3
Hi,

> > Unless it architecturally wrong thing i.e. (node size less than 128Mb)
> > ,in which case limiting it in QEMU would be justified, I'd prefer
> > firmware being fixed or it reporting more useful for user error message.
> 
> [include EDK2 developers]
> 
> I don't think 128MB node memory size is architecturally required.
> I also thought EDK2 would be better place to provide a precise error
> mesage and discussed it through with EDK2 developers. Lets see what
> are their thoughts this time.

Useful error reporting that early in the firmware initialization is a
rather hard problem, it's much easier for qemu to catch those problems
and print a useful error message.

Fixing the firmware would be possible.  The firmware simply uses the
memory of the first numa note in the early initialization phase, which
could be changed to look for additional numa nodes.  It's IMHO simply
not worth the trouble though.  numa nodes with less memory than 128M
simply doesn't happen in practice, except when QE does questionable
scaleability testing (scale up the number of numa nodes without also
scaling up the total amount of memory, ending up with rather tiny
numa nodes and a configuration nobody actually uses in practice).

take care,
  Gerd
Gavin Shan March 3, 2022, 3:25 a.m. UTC | #4
Hi Gerd,

On 3/1/22 7:42 PM, Gerd Hoffmann wrote:
>>> Unless it architecturally wrong thing i.e. (node size less than 128Mb)
>>> ,in which case limiting it in QEMU would be justified, I'd prefer
>>> firmware being fixed or it reporting more useful for user error message.
>>
>> [include EDK2 developers]
>>
>> I don't think 128MB node memory size is architecturally required.
>> I also thought EDK2 would be better place to provide a precise error
>> mesage and discussed it through with EDK2 developers. Lets see what
>> are their thoughts this time.
> 
> Useful error reporting that early in the firmware initialization is a
> rather hard problem, it's much easier for qemu to catch those problems
> and print a useful error message.
> 
> Fixing the firmware would be possible.  The firmware simply uses the
> memory of the first numa note in the early initialization phase, which
> could be changed to look for additional numa nodes.  It's IMHO simply
> not worth the trouble though.  numa nodes with less memory than 128M
> simply doesn't happen in practice, except when QE does questionable
> scaleability testing (scale up the number of numa nodes without also
> scaling up the total amount of memory, ending up with rather tiny
> numa nodes and a configuration nobody actually uses in practice).
> 

I still don't think QEMU is best place to have this kind of limitation,
which the first NUMA node should have 128MB memory at least. For example,
the limitation exists in EDK2-version-A.0 and the limitation is removed
in EDK2-version-A.1. The QEMU can't boot the guest using EDK2-version-A.1,
but we should succeed.

If it's not worthwhile to be fixed in EDK2, it might be doable to improve
the error message printed by EDK2. Otherwise, we would ignore this issue
because 128MB node memory size isn't used in practice. If the EDK2 error
message can be improved, we might have something like below:

ASSERT [MemoryInit] /home/lacos/src/upstream/qemu/roms/edk2/ArmVirtPkg/Library/QemuVirtMemInfoLib/QemuVirtMemInfoPeiLibConstructor.c(93): NewSize >= 0x08000000

to

The first NUMA node should have more than 128MB memory

Thanks,
Gavin
Gerd Hoffmann March 4, 2022, 8:46 a.m. UTC | #5
Hi,

> because 128MB node memory size isn't used in practice. If the EDK2 error
> message can be improved, we might have something like below:
> 
> ASSERT [MemoryInit] /home/lacos/src/upstream/qemu/roms/edk2/ArmVirtPkg/Library/QemuVirtMemInfoLib/QemuVirtMemInfoPeiLibConstructor.c(93): NewSize >= 0x08000000
> 
> to
> 
> The first NUMA node should have more than 128MB memory

Well, except the firmware doesn't even know yet at that point
whenever it runs on a NUMA machine or not ...

take care,
  Gerd
Igor Mammedov March 4, 2022, 10:52 a.m. UTC | #6
On Thu, 3 Mar 2022 11:25:25 +0800
Gavin Shan <gshan@redhat.com> wrote:

> Hi Gerd,
> 
> On 3/1/22 7:42 PM, Gerd Hoffmann wrote:
> >>> Unless it architecturally wrong thing i.e. (node size less than 128Mb)
> >>> ,in which case limiting it in QEMU would be justified, I'd prefer
> >>> firmware being fixed or it reporting more useful for user error message.  
> >>
> >> [include EDK2 developers]
> >>
> >> I don't think 128MB node memory size is architecturally required.
> >> I also thought EDK2 would be better place to provide a precise error
> >> mesage and discussed it through with EDK2 developers. Lets see what
> >> are their thoughts this time.  
> > 
> > Useful error reporting that early in the firmware initialization is a
> > rather hard problem, it's much easier for qemu to catch those problems
> > and print a useful error message.
> > 
> > Fixing the firmware would be possible.  The firmware simply uses the
> > memory of the first numa note in the early initialization phase, which
> > could be changed to look for additional numa nodes.  It's IMHO simply
> > not worth the trouble though.  numa nodes with less memory than 128M
> > simply doesn't happen in practice, except when QE does questionable
> > scaleability testing (scale up the number of numa nodes without also
> > scaling up the total amount of memory, ending up with rather tiny
> > numa nodes and a configuration nobody actually uses in practice).
> >   
> 
> I still don't think QEMU is best place to have this kind of limitation,
> which the first NUMA node should have 128MB memory at least. For example,
> the limitation exists in EDK2-version-A.0 and the limitation is removed
> in EDK2-version-A.1. The QEMU can't boot the guest using EDK2-version-A.1,
> but we should succeed.

if firmware is not an option, I wouldn't opposed to printing warning
message from QEMU if you can detect that you are running broken edk2
and under condition that no new infa/hooks are invented for this.
(assuming it's worth all the effort at all)

Maybe put it in virt-arm specific machine_done.

> If it's not worthwhile to be fixed in EDK2, it might be doable to improve
> the error message printed by EDK2. Otherwise, we would ignore this issue
> because 128MB node memory size isn't used in practice. If the EDK2 error
> message can be improved, we might have something like below:
> 
> ASSERT [MemoryInit] /home/lacos/src/upstream/qemu/roms/edk2/ArmVirtPkg/Library/QemuVirtMemInfoLib/QemuVirtMemInfoPeiLibConstructor.c(93): NewSize >= 0x08000000
> 
> to
> 
> The first NUMA node should have more than 128MB memory
> 
> Thanks,
> Gavin
>
Peter Maydell March 4, 2022, 10:58 a.m. UTC | #7
On Fri, 4 Mar 2022 at 10:52, Igor Mammedov <imammedo@redhat.com> wrote:
> if firmware is not an option, I wouldn't opposed to printing warning
> message from QEMU if you can detect that you are running broken edk2
> and under condition that no new infa/hooks are invented for this.
> (assuming it's worth all the effort at all)

I am definitely not in favour of that. QEMU should provide the
emulated hardware and let the firmware deal with detecting if
it's missing important stuff. It should as far as is possible
not have special-case detection-of-broken-guests handling.

thanks
-- PMM
Laszlo Ersek March 4, 2022, 2:24 p.m. UTC | #8
Gerd,

On 03/04/22 11:58, Peter Maydell wrote:
> On Fri, 4 Mar 2022 at 10:52, Igor Mammedov <imammedo@redhat.com> wrote:
>> if firmware is not an option, I wouldn't opposed to printing warning
>> message from QEMU if you can detect that you are running broken edk2
>> and under condition that no new infa/hooks are invented for this.
>> (assuming it's worth all the effort at all)
> 
> I am definitely not in favour of that. QEMU should provide the
> emulated hardware and let the firmware deal with detecting if
> it's missing important stuff. It should as far as is possible
> not have special-case detection-of-broken-guests handling.
> 
> thanks
> -- PMM
> 

It's probably simplest if you replace the ASSERT()s in question; please
see the attachment. (Only smoke tested.) Gavin indicated up-thread he'd
be OK with an easier-to-understand error message.

Laszlo
Shan Gavin March 7, 2022, 7:13 a.m. UTC | #9
Hi Laszlo,

Yes, I think it's enough to provide the user-friendly error message in EDK2.
The command lines to start the VM/guest can be adjusted to have more than
128MB memory associated for NUMA node#0 when it's seen by users.

Thanks,
Gavin

Laszlo Ersek <lersek@redhat.com> 于2022年3月4日周五 22:24写道:
>
> Gerd,
>
> On 03/04/22 11:58, Peter Maydell wrote:
> > On Fri, 4 Mar 2022 at 10:52, Igor Mammedov <imammedo@redhat.com> wrote:
> >> if firmware is not an option, I wouldn't opposed to printing warning
> >> message from QEMU if you can detect that you are running broken edk2
> >> and under condition that no new infa/hooks are invented for this.
> >> (assuming it's worth all the effort at all)
> >
> > I am definitely not in favour of that. QEMU should provide the
> > emulated hardware and let the firmware deal with detecting if
> > it's missing important stuff. It should as far as is possible
> > not have special-case detection-of-broken-guests handling.
> >
> > thanks
> > -- PMM
> >
>
> It's probably simplest if you replace the ASSERT()s in question; please
> see the attachment. (Only smoke tested.) Gavin indicated up-thread he'd
> be OK with an easier-to-understand error message.
>
> Laszlo
diff mbox series

Patch

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 46bf7ceddf..234e7fca28 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2491,6 +2491,14 @@  static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
     return idx % ms->numa_state->num_nodes;
 }
 
+static void virt_validate_numa_nodes(MachineState *ms)
+{
+    if (ms->numa_state->nodes[0].node_mem < 128 * MiB) {
+        error_report("The first NUMA node should have at least 128MB memory");
+        exit(1);
+    }
+}
+
 static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
 {
     int n;
@@ -2836,6 +2844,7 @@  static void virt_machine_class_init(ObjectClass *oc, void *data)
     mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
     mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
     mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
+    mc->validate_numa_nodes = virt_validate_numa_nodes;
     mc->kvm_type = virt_kvm_type;
     assert(!mc->get_hotplug_handler);
     mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
diff --git a/hw/core/numa.c b/hw/core/numa.c
index 1aa05dcf42..543a2eaf11 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -724,6 +724,11 @@  void numa_complete_configuration(MachineState *ms)
             /* Validation succeeded, now fill in any missing distances. */
             complete_init_numa_distance(ms);
         }
+
+        /* Validate NUMA nodes for the individual machine */
+        if (mc->validate_numa_nodes) {
+            mc->validate_numa_nodes(ms);
+        }
     }
 }
 
diff --git a/include/hw/boards.h b/include/hw/boards.h
index c92ac8815c..9709a35eeb 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -282,6 +282,7 @@  struct MachineClass {
                                                          unsigned cpu_index);
     const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
     int64_t (*get_default_cpu_node_id)(const MachineState *ms, int idx);
+    void (*validate_numa_nodes)(MachineState *ms);
     ram_addr_t (*fixup_ram_size)(ram_addr_t size);
 };