diff mbox series

[v2] hw/smbios: fix memory corruption for large guests due to handle overlap

Message ID 20220203130957.2248949-1-ani@anisinha.ca (mailing list archive)
State New, archived
Headers show
Series [v2] hw/smbios: fix memory corruption for large guests due to handle overlap | expand

Commit Message

Ani Sinha Feb. 3, 2022, 1:09 p.m. UTC
The current implementation of smbios table handle assignment does not leave
enough gap between tables 17 and table 19 for guests with larger than 8 TB of
memory. This change fixes this issue. This change calculates if additional
space between the tables need to be set aside and then reserves that additional
space.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2023977

Signed-off-by: Ani Sinha <ani@anisinha.ca>
---
 hw/smbios/smbios.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

changelog:
v2: make sure we do not overlap table 19 and table 32 addresses.

Comments

Igor Mammedov Feb. 4, 2022, 9:34 a.m. UTC | #1
On Thu,  3 Feb 2022 18:39:57 +0530
Ani Sinha <ani@anisinha.ca> wrote:

subj talks about memory corruption but the commit message
doesn't explain how memory is corrupted, so either subj or
commit message is wrong.


> The current implementation of smbios table handle assignment does not leave
> enough gap between tables 17 and table 19 for guests with larger than 8 TB of
> memory. This change fixes this issue. This change calculates if additional
> space between the tables need to be set aside and then reserves that additional
> space.

please redo commit message in form,

 1. what's broken
 2. symptoms + way to reproduce
 3. how this patch fixes the issue


> Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2023977
> 
> Signed-off-by: Ani Sinha <ani@anisinha.ca>
> ---
>  hw/smbios/smbios.c | 29 +++++++++++++++++++++++------
>  1 file changed, 23 insertions(+), 6 deletions(-)
> 
> changelog:
> v2: make sure we do not overlap table 19 and table 32 addresses.


that made me thinking about why do we have to hard partitions
handle ranges in the first place and live with a risk to hit
similar bug in the future.

It would be if we were starting at some base for the first table
and then just increment it for every following table.

But we have to leave current handles layout as is, to avoid
braking migration, since SMBIOS tables aren't migrated
over wire, so src and dst have to build exact copies.

So question is it is worth to have legacy SMBIOS code and
introduce a new handle layout + memory_region re-sizable SMBIOS
tables like we did with ACPI ones.

That way we we will be free to change SMBIOS tables
at will without a risk of breaking migration and without
need to add compat knob for every change to keep src and
dst binary compatible.

But that's a bigger refactoring for just a fix,
so I'd go ahead with fixing handles first
and than do refactoring on top.
 
PS:
For this fix tables blob is surely binary not compatible
when we are hitting handles overlap.
But I'm inclined to not adding compat knob with justification
that current code produces invalid tables (with duplicate handles)
and we shouldn't maintain bug (have compat knob) for this.
The risk we would be taking here would be the same (invalid handles)
if src migrated in the middle of reading tables and that is
limited only to old (<=2.4) machines without DMA enabled fwcfg.


> diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
> index 6013df1698..ccac4c1b3a 100644
> --- a/hw/smbios/smbios.c
> +++ b/hw/smbios/smbios.c
> @@ -743,13 +743,16 @@ static void smbios_build_type_16_table(unsigned dimm_cnt)
>  
>  #define MAX_T17_STD_SZ 0x7FFF /* (32G - 1M), in Megabytes */
>  #define MAX_T17_EXT_SZ 0x80000000 /* 2P, in Megabytes */
> +#define T17_BASE 0x1100
> +#define T19_BASE 0x1300
> +#define T32_BASE 0x2000

open coded numbers cleanup should be a separate patch
as it's not related to the actual fix.

>  static void smbios_build_type_17_table(unsigned instance, uint64_t size)
>  {
>      char loc_str[128];
>      uint64_t size_mb;
>  
> -    SMBIOS_BUILD_TABLE_PRE(17, 0x1100 + instance, true); /* required */
> +    SMBIOS_BUILD_TABLE_PRE(17, T17_BASE + instance, true); /* required */
>  
>      t->physical_memory_array_handle = cpu_to_le16(0x1000); /* Type 16 above */
>      t->memory_error_information_handle = cpu_to_le16(0xFFFE); /* Not provided */
> @@ -785,12 +788,13 @@ static void smbios_build_type_17_table(unsigned instance, uint64_t size)
>      SMBIOS_BUILD_TABLE_POST;
>  }
>  
> -static void smbios_build_type_19_table(unsigned instance,
> +static void smbios_build_type_19_table(unsigned instance, unsigned offset,
>                                         uint64_t start, uint64_t size)
>  {
>      uint64_t end, start_kb, end_kb;
>  
> -    SMBIOS_BUILD_TABLE_PRE(19, 0x1300 + instance, true); /* required */
> +    SMBIOS_BUILD_TABLE_PRE(19, T19_BASE + \
> +                           offset + instance, true); /* required */
>  
>      end = start + size - 1;
>      assert(end > start);
> @@ -814,7 +818,7 @@ static void smbios_build_type_19_table(unsigned instance,
>  
>  static void smbios_build_type_32_table(void)
>  {
> -    SMBIOS_BUILD_TABLE_PRE(32, 0x2000, true); /* required */
> +    SMBIOS_BUILD_TABLE_PRE(32, T32_BASE, true); /* required */
>  
>      memset(t->reserved, 0, 6);
>      t->boot_status = 0; /* No errors detected */
> @@ -982,7 +986,7 @@ void smbios_get_tables(MachineState *ms,
>                         uint8_t **anchor, size_t *anchor_len,
>                         Error **errp)
>  {
> -    unsigned i, dimm_cnt;
> +    unsigned i, dimm_cnt, offset;
>  
>      if (smbios_legacy) {
>          *tables = *anchor = NULL;
> @@ -1012,6 +1016,19 @@ void smbios_get_tables(MachineState *ms,
>  
>          dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, MAX_DIMM_SZ) / MAX_DIMM_SZ;

Michael, Gerd,

Another question is why we split memory on 16Gb chunks, to begin with.
Maybe instead of doing so, we should just add 1 type17 entry describing
whole system RAM size. In which case we don't need this dance around
handle offsets anymore.

>  
> +        /*
> +         * The offset determines if we need to keep additional space betweeen
> +         * table 17 and table 19 so that they do not overlap. For example,
> +         * for a VM with larger than 8 TB guest memory and DIMM size of 16 GiB,
> +         * the default space between the two tables (T19_BASE - T17_BASE = 512)
> +         * is not enough.
> +         */
> +        offset = (dimm_cnt > (T19_BASE - T17_BASE)) ? \
> +                 dimm_cnt - (T19_BASE - T17_BASE) : 0;
> +
> +        /* we need to make sure table 19 and table 32 do not overlap */
> +        assert((mem_array_size + offset) < (T32_BASE - T19_BASE));

why its' before type 16? I'd suggest to split/move it close to relevant
types.
Also T32_BASE - T19_BASE part doesn't belong to this patch and should be
a separate one with explanation why it's needed.

>          smbios_build_type_16_table(dimm_cnt);
>  
>          for (i = 0; i < dimm_cnt; i++) {
> @@ -1019,7 +1036,7 @@ void smbios_get_tables(MachineState *ms,
>          }
>  
>          for (i = 0; i < mem_array_size; i++) {
> -            smbios_build_type_19_table(i, mem_array[i].address,
> +            smbios_build_type_19_table(i, offset, mem_array[i].address,
>                                         mem_array[i].length);
>          }
Gerd Hoffmann Feb. 4, 2022, 11:05 a.m. UTC | #2
Hi,

> Another question is why we split memory on 16Gb chunks, to begin with.
> Maybe instead of doing so, we should just add 1 type17 entry describing
> whole system RAM size. In which case we don't need this dance around
> handle offsets anymore.

Maybe to make the entries look like they do on physical hardware?
i.e. DIMM size is a power of two?  Also physical 1TB DIMMs just
don't exist?

take care,
  Gerd
Igor Mammedov Feb. 4, 2022, 12:18 p.m. UTC | #3
On Fri, 4 Feb 2022 12:05:58 +0100
Gerd Hoffmann <kraxel@redhat.com> wrote:

>   Hi,
> 
> > Another question is why we split memory on 16Gb chunks, to begin with.
> > Maybe instead of doing so, we should just add 1 type17 entry describing
> > whole system RAM size. In which case we don't need this dance around
> > handle offsets anymore.  
> 
> Maybe to make the entries look like they do on physical hardware?
> i.e. DIMM size is a power of two?  Also physical 1TB DIMMs just
> don't exist?

Does it have to be a DIMM, we can make it Other/Unknown/Row of chips/Chip
to more close to builtin memory that's is our main ram is.
Michael S. Tsirkin Feb. 4, 2022, 1:51 p.m. UTC | #4
On Fri, Feb 04, 2022 at 10:34:23AM +0100, Igor Mammedov wrote:
> > @@ -982,7 +986,7 @@ void smbios_get_tables(MachineState *ms,
> >                         uint8_t **anchor, size_t *anchor_len,
> >                         Error **errp)
> >  {
> > -    unsigned i, dimm_cnt;
> > +    unsigned i, dimm_cnt, offset;
> >  
> >      if (smbios_legacy) {
> >          *tables = *anchor = NULL;
> > @@ -1012,6 +1016,19 @@ void smbios_get_tables(MachineState *ms,
> >  
> >          dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, MAX_DIMM_SZ) / MAX_DIMM_SZ;
> 
> Michael, Gerd,
> 
> Another question is why we split memory on 16Gb chunks, to begin with.
> Maybe instead of doing so, we should just add 1 type17 entry describing
> whole system RAM size. In which case we don't need this dance around
> handle offsets anymore.

I'm not sure - could be some guests just get confused if a chunk
is too big ... we'd need a lot of testing if we change that ...
Ani Sinha Feb. 4, 2022, 1:52 p.m. UTC | #5
On Fri, Feb 4, 2022 at 17:48 Igor Mammedov <imammedo@redhat.com> wrote:

> On Fri, 4 Feb 2022 12:05:58 +0100
> Gerd Hoffmann <kraxel@redhat.com> wrote:
>
> >   Hi,
> >
> > > Another question is why we split memory on 16Gb chunks, to begin with.
> > > Maybe instead of doing so, we should just add 1 type17 entry describing
> > > whole system RAM size. In which case we don't need this dance around
> > > handle offsets anymore.
> >
> > Maybe to make the entries look like they do on physical hardware?
> > i.e. DIMM size is a power of two?  Also physical 1TB DIMMs just
> > don't exist?
>
> Does it have to be a DIMM, we can make it Other/Unknown/Row of chips/Chip
> to more close to builtin memory that's is our main ram is.


My concern here is even though the spec has provisions for those form
factors I wonder if the guests only expect dimm ? Will it break some guest
operating system?

>
Ani Sinha Feb. 7, 2022, 3:12 p.m. UTC | #6
>
> So question is it is worth to have legacy SMBIOS code and introduce a
> new handle layout + memory_region re-sizable SMBIOS tables like we did
> with ACPI ones.
>
> That way we we will be free to change SMBIOS tables at will without a
> risk of breaking migration and without need to add compat knob for every
> change to keep src and dst binary compatible.
>

Could you please point me to the change on the acpi side so that I can
study it and look into the refactoring for smbios side?
Igor Mammedov Feb. 8, 2022, 7:48 a.m. UTC | #7
On Mon, 7 Feb 2022 20:42:35 +0530 (IST)
Ani Sinha <ani@anisinha.ca> wrote:

> >
> > So question is it is worth to have legacy SMBIOS code and introduce a
> > new handle layout + memory_region re-sizable SMBIOS tables like we did
> > with ACPI ones.
> >
> > That way we we will be free to change SMBIOS tables at will without a
> > risk of breaking migration and without need to add compat knob for every
> > change to keep src and dst binary compatible.
> >  
> 
> Could you please point me to the change on the acpi side so that I can
> study it and look into the refactoring for smbios side?
> 

I'd suggest to start looking at acpi_add_rom_blob() and how it evolved to
the current code. Eventually you should find a commit introducing resizable
memory_regions introduced by Michael, it I recall correctly it was around
that time when we switched ACPI tables to memory regions.
Ani Sinha Feb. 10, 2022, 12:32 p.m. UTC | #8
On Fri, 4 Feb 2022, Ani Sinha wrote:

> On Fri, Feb 4, 2022 at 17:48 Igor Mammedov <imammedo@redhat.com> wrote:
>
> > On Fri, 4 Feb 2022 12:05:58 +0100
> > Gerd Hoffmann <kraxel@redhat.com> wrote:
> >
> > >   Hi,
> > >
> > > > Another question is why we split memory on 16Gb chunks, to begin with.
> > > > Maybe instead of doing so, we should just add 1 type17 entry describing
> > > > whole system RAM size. In which case we don't need this dance around
> > > > handle offsets anymore.
> > >
> > > Maybe to make the entries look like they do on physical hardware?
> > > i.e. DIMM size is a power of two?  Also physical 1TB DIMMs just
> > > don't exist?
> >
> > Does it have to be a DIMM, we can make it Other/Unknown/Row of chips/Chip
> > to more close to builtin memory that's is our main ram is.
>
>
> My concern here is even though the spec has provisions for those form
> factors I wonder if the guests only expect dimm ? Will it break some guest
> operating system?

I am not sure if we have come to any conclusion here. However, I have sent
a v2 patch series with Igor's suggestion addressed.
diff mbox series

Patch

diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index 6013df1698..ccac4c1b3a 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -743,13 +743,16 @@  static void smbios_build_type_16_table(unsigned dimm_cnt)
 
 #define MAX_T17_STD_SZ 0x7FFF /* (32G - 1M), in Megabytes */
 #define MAX_T17_EXT_SZ 0x80000000 /* 2P, in Megabytes */
+#define T17_BASE 0x1100
+#define T19_BASE 0x1300
+#define T32_BASE 0x2000
 
 static void smbios_build_type_17_table(unsigned instance, uint64_t size)
 {
     char loc_str[128];
     uint64_t size_mb;
 
-    SMBIOS_BUILD_TABLE_PRE(17, 0x1100 + instance, true); /* required */
+    SMBIOS_BUILD_TABLE_PRE(17, T17_BASE + instance, true); /* required */
 
     t->physical_memory_array_handle = cpu_to_le16(0x1000); /* Type 16 above */
     t->memory_error_information_handle = cpu_to_le16(0xFFFE); /* Not provided */
@@ -785,12 +788,13 @@  static void smbios_build_type_17_table(unsigned instance, uint64_t size)
     SMBIOS_BUILD_TABLE_POST;
 }
 
-static void smbios_build_type_19_table(unsigned instance,
+static void smbios_build_type_19_table(unsigned instance, unsigned offset,
                                        uint64_t start, uint64_t size)
 {
     uint64_t end, start_kb, end_kb;
 
-    SMBIOS_BUILD_TABLE_PRE(19, 0x1300 + instance, true); /* required */
+    SMBIOS_BUILD_TABLE_PRE(19, T19_BASE + \
+                           offset + instance, true); /* required */
 
     end = start + size - 1;
     assert(end > start);
@@ -814,7 +818,7 @@  static void smbios_build_type_19_table(unsigned instance,
 
 static void smbios_build_type_32_table(void)
 {
-    SMBIOS_BUILD_TABLE_PRE(32, 0x2000, true); /* required */
+    SMBIOS_BUILD_TABLE_PRE(32, T32_BASE, true); /* required */
 
     memset(t->reserved, 0, 6);
     t->boot_status = 0; /* No errors detected */
@@ -982,7 +986,7 @@  void smbios_get_tables(MachineState *ms,
                        uint8_t **anchor, size_t *anchor_len,
                        Error **errp)
 {
-    unsigned i, dimm_cnt;
+    unsigned i, dimm_cnt, offset;
 
     if (smbios_legacy) {
         *tables = *anchor = NULL;
@@ -1012,6 +1016,19 @@  void smbios_get_tables(MachineState *ms,
 
         dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, MAX_DIMM_SZ) / MAX_DIMM_SZ;
 
+        /*
+         * The offset determines if we need to keep additional space betweeen
+         * table 17 and table 19 so that they do not overlap. For example,
+         * for a VM with larger than 8 TB guest memory and DIMM size of 16 GiB,
+         * the default space between the two tables (T19_BASE - T17_BASE = 512)
+         * is not enough.
+         */
+        offset = (dimm_cnt > (T19_BASE - T17_BASE)) ? \
+                 dimm_cnt - (T19_BASE - T17_BASE) : 0;
+
+        /* we need to make sure table 19 and table 32 do not overlap */
+        assert((mem_array_size + offset) < (T32_BASE - T19_BASE));
+
         smbios_build_type_16_table(dimm_cnt);
 
         for (i = 0; i < dimm_cnt; i++) {
@@ -1019,7 +1036,7 @@  void smbios_get_tables(MachineState *ms,
         }
 
         for (i = 0; i < mem_array_size; i++) {
-            smbios_build_type_19_table(i, mem_array[i].address,
+            smbios_build_type_19_table(i, offset, mem_array[i].address,
                                        mem_array[i].length);
         }