diff mbox series

[3/4] virtio-gpu: use a VMState variant for the scanout field

Message ID 20240507111920.1594897-4-marcandre.lureau@redhat.com (mailing list archive)
State New
Headers show
Series Fix "virtio-gpu: fix scanout migration post-load" | expand

Commit Message

Marc-André Lureau May 7, 2024, 11:19 a.m. UTC
From: Marc-André Lureau <marcandre.lureau@redhat.com>

Depending on the version, use v1 or v2 of the scanout VM state.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
 hw/display/virtio-gpu.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

Comments

Peter Xu May 7, 2024, 7:59 p.m. UTC | #1
On Tue, May 07, 2024 at 03:19:19PM +0400, marcandre.lureau@redhat.com wrote:
> From: Marc-André Lureau <marcandre.lureau@redhat.com>
> 
> Depending on the version, use v1 or v2 of the scanout VM state.
> 
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> ---
>  hw/display/virtio-gpu.c | 22 +++++++++++++++++-----
>  1 file changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
> index ae831b6b3e..4fd72caf3f 100644
> --- a/hw/display/virtio-gpu.c
> +++ b/hw/display/virtio-gpu.c
> @@ -1191,17 +1191,29 @@ static const VMStateDescription vmstate_virtio_gpu_scanout = {
>      },
>  };
>  
> +static bool vmstate_before_v2(void *opaque, int version)
> +{
> +    return version <= 1;
> +}
> +
>  static const VMStateDescription vmstate_virtio_gpu_scanouts = {
>      .name = "virtio-gpu-scanouts",
> -    .version_id = 1,
> +    .version_id = 2,
> +    .minimum_version_id = 1,
>      .fields = (const VMStateField[]) {
>          VMSTATE_INT32(parent_obj.enable, struct VirtIOGPU),
>          VMSTATE_UINT32_EQUAL(parent_obj.conf.max_outputs,
>                               struct VirtIOGPU, NULL),
> -        VMSTATE_STRUCT_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
> -                                     parent_obj.conf.max_outputs, 1,
> -                                     vmstate_virtio_gpu_scanout,
> -                                     struct virtio_gpu_scanout),
> +        VMSTATE_VSTRUCT_TEST_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
> +                                           vmstate_before_v2,
> +                                           parent_obj.conf.max_outputs, 1,
> +                                           vmstate_virtio_gpu_scanout,
> +                                           struct virtio_gpu_scanout, 1),
> +        VMSTATE_VSTRUCT_TEST_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
> +                                           NULL,
> +                                           parent_obj.conf.max_outputs, 2,
> +                                           vmstate_virtio_gpu_scanout,
> +                                           struct virtio_gpu_scanout, 2),

Personally I really wished struct_version_id never existed..  After these
years we only have 1 user of it (hw/ipmi/isa_ipmi_kcs.c), and we need to
keep that working.  I'm wondering whether there's way we can avoid adding
one more user, and even more complicated..

I think I get the reasoning behind "we define the same thing twice", but
this is really tricky and definitely needs rich documents, meanwhiel all of
these seem to rely on so many small details: one set .field_exists
properly, one leaving it to NULL; also the two versionings used here for
both parent vmsd, and the struct, and for each entry we need to set
different versions to different fields..

Would it work if we only make the new fields under control with
vmstate_before_v2()?  IOW, making all below with
.field_exists=vmstate_before_v2, so skip below when machine type is old?

         VMSTATE_UINT32_V(fb.format, struct virtio_gpu_scanout, 2),
         VMSTATE_UINT32_V(fb.bytes_pp, struct virtio_gpu_scanout, 2),
         VMSTATE_UINT32_V(fb.width, struct virtio_gpu_scanout, 2),
         VMSTATE_UINT32_V(fb.height, struct virtio_gpu_scanout, 2),
         VMSTATE_UINT32_V(fb.stride, struct virtio_gpu_scanout, 2),
         VMSTATE_UINT32_V(fb.offset, struct virtio_gpu_scanout, 2),

Thanks,
Marc-André Lureau May 10, 2024, 8:39 a.m. UTC | #2
Hi

On Wed, May 8, 2024 at 12:01 AM Peter Xu <peterx@redhat.com> wrote:
>
> On Tue, May 07, 2024 at 03:19:19PM +0400, marcandre.lureau@redhat.com wrote:
> > From: Marc-André Lureau <marcandre.lureau@redhat.com>
> >
> > Depending on the version, use v1 or v2 of the scanout VM state.
> >
> > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > ---
> >  hw/display/virtio-gpu.c | 22 +++++++++++++++++-----
> >  1 file changed, 17 insertions(+), 5 deletions(-)
> >
> > diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
> > index ae831b6b3e..4fd72caf3f 100644
> > --- a/hw/display/virtio-gpu.c
> > +++ b/hw/display/virtio-gpu.c
> > @@ -1191,17 +1191,29 @@ static const VMStateDescription vmstate_virtio_gpu_scanout = {
> >      },
> >  };
> >
> > +static bool vmstate_before_v2(void *opaque, int version)
> > +{
> > +    return version <= 1;
> > +}
> > +
> >  static const VMStateDescription vmstate_virtio_gpu_scanouts = {
> >      .name = "virtio-gpu-scanouts",
> > -    .version_id = 1,
> > +    .version_id = 2,
> > +    .minimum_version_id = 1,
> >      .fields = (const VMStateField[]) {
> >          VMSTATE_INT32(parent_obj.enable, struct VirtIOGPU),
> >          VMSTATE_UINT32_EQUAL(parent_obj.conf.max_outputs,
> >                               struct VirtIOGPU, NULL),
> > -        VMSTATE_STRUCT_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
> > -                                     parent_obj.conf.max_outputs, 1,
> > -                                     vmstate_virtio_gpu_scanout,
> > -                                     struct virtio_gpu_scanout),
> > +        VMSTATE_VSTRUCT_TEST_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
> > +                                           vmstate_before_v2,
> > +                                           parent_obj.conf.max_outputs, 1,
> > +                                           vmstate_virtio_gpu_scanout,
> > +                                           struct virtio_gpu_scanout, 1),
> > +        VMSTATE_VSTRUCT_TEST_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
> > +                                           NULL,
> > +                                           parent_obj.conf.max_outputs, 2,
> > +                                           vmstate_virtio_gpu_scanout,
> > +                                           struct virtio_gpu_scanout, 2),
>
> Personally I really wished struct_version_id never existed..  After these
> years we only have 1 user of it (hw/ipmi/isa_ipmi_kcs.c), and we need to
> keep that working.  I'm wondering whether there's way we can avoid adding
> one more user, and even more complicated..
>
> I think I get the reasoning behind "we define the same thing twice", but
> this is really tricky and definitely needs rich documents, meanwhiel all of
> these seem to rely on so many small details: one set .field_exists
> properly, one leaving it to NULL; also the two versionings used here for
> both parent vmsd, and the struct, and for each entry we need to set
> different versions to different fields..
>
> Would it work if we only make the new fields under control with
> vmstate_before_v2()?  IOW, making all below with
> .field_exists=vmstate_before_v2, so skip below when machine type is old?

The existing VMSTATE_STRUCT_VARRAY_UINT32 would always use the latest
vmstate_virtio_gpu_scanout version_id (2 in master).

The "struct_version_id" solution allows setting the
vmstate_virtio_gpu_scanout version_id to 1 if the parent struct
(vmstate_virtio_gpu_scanouts) is 1, and 2 if the parent is 2.

Since we don't have per VMSD version information on the wire, nested
struct versioning is quite limited and cumbersome. I am not sure it
can be changed without breaking the stream format, and whether it's
worthwhile.


>
>          VMSTATE_UINT32_V(fb.format, struct virtio_gpu_scanout, 2),
>          VMSTATE_UINT32_V(fb.bytes_pp, struct virtio_gpu_scanout, 2),
>          VMSTATE_UINT32_V(fb.width, struct virtio_gpu_scanout, 2),
>          VMSTATE_UINT32_V(fb.height, struct virtio_gpu_scanout, 2),
>          VMSTATE_UINT32_V(fb.stride, struct virtio_gpu_scanout, 2),
>          VMSTATE_UINT32_V(fb.offset, struct virtio_gpu_scanout, 2),
>
> Thanks,
>
> --
> Peter Xu
>
>


--
Marc-André Lureau
Michael S. Tsirkin May 10, 2024, 10:25 a.m. UTC | #3
On Tue, May 07, 2024 at 03:19:19PM +0400, marcandre.lureau@redhat.com wrote:
> From: Marc-André Lureau <marcandre.lureau@redhat.com>
> 
> Depending on the version, use v1 or v2 of the scanout VM state.
> 
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> ---
>  hw/display/virtio-gpu.c | 22 +++++++++++++++++-----
>  1 file changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
> index ae831b6b3e..4fd72caf3f 100644
> --- a/hw/display/virtio-gpu.c
> +++ b/hw/display/virtio-gpu.c
> @@ -1191,17 +1191,29 @@ static const VMStateDescription vmstate_virtio_gpu_scanout = {
>      },
>  };
>  
> +static bool vmstate_before_v2(void *opaque, int version)
> +{
> +    return version <= 1;
> +}
> +
>  static const VMStateDescription vmstate_virtio_gpu_scanouts = {
>      .name = "virtio-gpu-scanouts",
> -    .version_id = 1,
> +    .version_id = 2,
> +    .minimum_version_id = 1,
>      .fields = (const VMStateField[]) {
>          VMSTATE_INT32(parent_obj.enable, struct VirtIOGPU),
>          VMSTATE_UINT32_EQUAL(parent_obj.conf.max_outputs,
>                               struct VirtIOGPU, NULL),
> -        VMSTATE_STRUCT_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
> -                                     parent_obj.conf.max_outputs, 1,
> -                                     vmstate_virtio_gpu_scanout,
> -                                     struct virtio_gpu_scanout),
> +        VMSTATE_VSTRUCT_TEST_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
> +                                           vmstate_before_v2,
> +                                           parent_obj.conf.max_outputs, 1,
> +                                           vmstate_virtio_gpu_scanout,
> +                                           struct virtio_gpu_scanout, 1),
> +        VMSTATE_VSTRUCT_TEST_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
> +                                           NULL,
> +                                           parent_obj.conf.max_outputs, 2,
> +                                           vmstate_virtio_gpu_scanout,
> +                                           struct virtio_gpu_scanout, 2),
>          VMSTATE_END_OF_LIST()
>      },


Just don't, please.
Add a property and add a conditional field based on property, set
from the compat machinery.


>  };
> -- 
> 2.41.0.28.gd7d8841f67
Marc-André Lureau May 10, 2024, 11:57 a.m. UTC | #4
Hi Michael

On Fri, May 10, 2024 at 2:26 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Tue, May 07, 2024 at 03:19:19PM +0400, marcandre.lureau@redhat.com wrote:
> > From: Marc-André Lureau <marcandre.lureau@redhat.com>
> >
> > Depending on the version, use v1 or v2 of the scanout VM state.
> >
> > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > ---
> >  hw/display/virtio-gpu.c | 22 +++++++++++++++++-----
> >  1 file changed, 17 insertions(+), 5 deletions(-)
> >
> > diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
> > index ae831b6b3e..4fd72caf3f 100644
> > --- a/hw/display/virtio-gpu.c
> > +++ b/hw/display/virtio-gpu.c
> > @@ -1191,17 +1191,29 @@ static const VMStateDescription vmstate_virtio_gpu_scanout = {
> >      },
> >  };
> >
> > +static bool vmstate_before_v2(void *opaque, int version)
> > +{
> > +    return version <= 1;
> > +}
> > +
> >  static const VMStateDescription vmstate_virtio_gpu_scanouts = {
> >      .name = "virtio-gpu-scanouts",
> > -    .version_id = 1,
> > +    .version_id = 2,
> > +    .minimum_version_id = 1,
> >      .fields = (const VMStateField[]) {
> >          VMSTATE_INT32(parent_obj.enable, struct VirtIOGPU),
> >          VMSTATE_UINT32_EQUAL(parent_obj.conf.max_outputs,
> >                               struct VirtIOGPU, NULL),
> > -        VMSTATE_STRUCT_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
> > -                                     parent_obj.conf.max_outputs, 1,
> > -                                     vmstate_virtio_gpu_scanout,
> > -                                     struct virtio_gpu_scanout),
> > +        VMSTATE_VSTRUCT_TEST_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
> > +                                           vmstate_before_v2,
> > +                                           parent_obj.conf.max_outputs, 1,
> > +                                           vmstate_virtio_gpu_scanout,
> > +                                           struct virtio_gpu_scanout, 1),
> > +        VMSTATE_VSTRUCT_TEST_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
> > +                                           NULL,
> > +                                           parent_obj.conf.max_outputs, 2,
> > +                                           vmstate_virtio_gpu_scanout,
> > +                                           struct virtio_gpu_scanout, 2),
> >          VMSTATE_END_OF_LIST()
> >      },
>
>
> Just don't, please.
> Add a property and add a conditional field based on property, set
> from the compat machinery.
>

The version isn't propagated through the nested VMSDs, so
vmstate_virtio_gpu_scanout would only have v1, whether we have a field
test or not.

Can you be more explicit what alternative solution you propose?

thanks
Peter Xu May 10, 2024, 5:33 p.m. UTC | #5
Hi, Marc-André,

On Fri, May 10, 2024 at 12:39:34PM +0400, Marc-André Lureau wrote:
> Since we don't have per VMSD version information on the wire, nested
> struct versioning is quite limited and cumbersome. I am not sure it
> can be changed without breaking the stream format, and whether it's
> worthwhile.

Right that's a major pain, and actually I just notice it..

I think it'll be much, much simpler if we keep vmsd version on the wire for
each VMSD (including struct fields), then it makes more sense to me.

Then when I went back and see again the VSTRUCT thing...  I can hardly
understand what it is doing, and also how it works at all.

Look at the current only IPMI user, who has:

        VMSTATE_VSTRUCT_V(kcs, ISAIPMIKCSDevice, 2, vmstate_IPMIKCS,
                          IPMIKCS, 2),

It is setting both vmsd version and struct_version to 2.  I can't tell why
it matters then if anyway both of the fields are the same..

When we do save(), there is:

                } else if (field->flags & VMS_STRUCT) {
                    ret = vmstate_save_state(f, field->vmsd, curr_elem,
                                             vmdesc_loop);
                } else if (field->flags & VMS_VSTRUCT) {
                    ret = vmstate_save_state_v(f, field->vmsd, curr_elem,
                                               vmdesc_loop,
                                               field->struct_version_id, errp);

When we load():

                } else if (field->flags & VMS_STRUCT) {
                    ret = vmstate_load_state(f, field->vmsd, curr_elem,
                                             field->vmsd->version_id);
                } else if (field->flags & VMS_VSTRUCT) {
                    ret = vmstate_load_state(f, field->vmsd, curr_elem,
                                             field->struct_version_id);
                } else {

In this case, passing in struct_version==version should have zero effect
afaict, because the default behavior is passing in vmsd->version_id anyway.

Moreover, now I highly doubt whether the VMS_STRUCT whole thing makes sense
at all as you mentioned.  Especially on the load side, here we should rely
on vmstate_load_state() taking the last parameter as version_id on the
wire.  Here we're passing in the struct's version_id or struct_version_id,
and neither of them makes sense to me... if we miss that version_id
information, afaiu we should simply fix it and put it on the wire..  It'll
break migration, we may need to work that out, but I don't see a better
way.  Keeping it like this like a nightmare to me.. :-(

Irrelevant of all these mess.. For this specific problem, what I meant is
exactly what Michael was requesting too (hopefully), I'd want to avoid
further extending the complexity in this area.  I have a patch attached at
last which I also tested 8.2<->9.0 bi-directional migrations and it worked
for me when I smoked it.  Please have a look to see whether that makes
sense and at the meantime avoid most of the tricks.

I'd also like to mention one more thing just in case this can cause some
more attention to virtio guys..

Normally I ran vmstate-static-checker.py before softfreeze, and I did it
for 9.0 too without seeing this problem.  It isn't raised because all
virtio devices are using the "self managed" VMSTATE_VIRTIO_DEVICE to
migrate.  In that case I am out of luck.  We can further extend what
Fabiano mentioned in the other thread to cover migration stream validations
in the future, but just to mention IMHO that needs extra work, and may work
most likely the same as vmstate static checker but just waste many more cpu
resources.  It'll be good if someone could still help move virtio towards
like most of the rest devices, or at least get covered by the static
checker, too.  But that definitely is a separate topic too.. so we can
address the immediate breakage first.

Thanks,

==8<==
From a24ef99670fa7102da461d795aed4a957bad86b1 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 10 May 2024 12:33:34 -0400
Subject: [PATCH] fix gpu

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 include/hw/virtio/virtio-gpu.h |  2 +-
 hw/core/machine.c              |  1 +
 hw/display/virtio-gpu.c        | 21 +++++++++++++++------
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index ed44cdad6b..e128501bdc 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -176,7 +176,7 @@ typedef struct VGPUDMABuf {
 
 struct VirtIOGPU {
     VirtIOGPUBase parent_obj;
-
+    uint8_t vmstate_version;
     uint64_t conf_max_hostmem;
 
     VirtQueue *ctrl_vq;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 4ff60911e7..8f6f0dda7c 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -42,6 +42,7 @@ GlobalProperty hw_compat_8_2[] = {
     { "migration", "zero-page-detection", "legacy"},
     { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" },
     { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" },
+    { "virtio-gpu-device", "x-vmstate-version", "1" },
 };
 const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2);
 
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index ae831b6b3e..c53f55404c 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1166,6 +1166,14 @@ static void virtio_gpu_cursor_bh(void *opaque)
     virtio_gpu_handle_cursor(&g->parent_obj.parent_obj, g->cursor_vq);
 }
 
+static bool vmstate_after_v2(void *opaque, int version)
+{
+    struct VirtIOGPUBase *base = container_of(opaque, VirtIOGPUBase, scanout);
+    struct VirtIOGPU *gpu = container_of(base, VirtIOGPU, parent_obj);
+
+    return gpu->vmstate_version >= 2;
+}
+
 static const VMStateDescription vmstate_virtio_gpu_scanout = {
     .name = "virtio-gpu-one-scanout",
     .version_id = 2,
@@ -1181,12 +1189,12 @@ static const VMStateDescription vmstate_virtio_gpu_scanout = {
         VMSTATE_UINT32(cursor.hot_y, struct virtio_gpu_scanout),
         VMSTATE_UINT32(cursor.pos.x, struct virtio_gpu_scanout),
         VMSTATE_UINT32(cursor.pos.y, struct virtio_gpu_scanout),
-        VMSTATE_UINT32_V(fb.format, struct virtio_gpu_scanout, 2),
-        VMSTATE_UINT32_V(fb.bytes_pp, struct virtio_gpu_scanout, 2),
-        VMSTATE_UINT32_V(fb.width, struct virtio_gpu_scanout, 2),
-        VMSTATE_UINT32_V(fb.height, struct virtio_gpu_scanout, 2),
-        VMSTATE_UINT32_V(fb.stride, struct virtio_gpu_scanout, 2),
-        VMSTATE_UINT32_V(fb.offset, struct virtio_gpu_scanout, 2),
+        VMSTATE_UINT32_TEST(fb.format, struct virtio_gpu_scanout,vmstate_after_v2),
+        VMSTATE_UINT32_TEST(fb.bytes_pp, struct virtio_gpu_scanout,vmstate_after_v2),
+        VMSTATE_UINT32_TEST(fb.width, struct virtio_gpu_scanout,vmstate_after_v2),
+        VMSTATE_UINT32_TEST(fb.height, struct virtio_gpu_scanout,vmstate_after_v2),
+        VMSTATE_UINT32_TEST(fb.stride, struct virtio_gpu_scanout,vmstate_after_v2),
+        VMSTATE_UINT32_TEST(fb.offset, struct virtio_gpu_scanout,vmstate_after_v2),
         VMSTATE_END_OF_LIST()
     },
 };
@@ -1659,6 +1667,7 @@ static Property virtio_gpu_properties[] = {
     DEFINE_PROP_BIT("blob", VirtIOGPU, parent_obj.conf.flags,
                     VIRTIO_GPU_FLAG_BLOB_ENABLED, false),
     DEFINE_PROP_SIZE("hostmem", VirtIOGPU, parent_obj.conf.hostmem, 0),
+    DEFINE_PROP_UINT8("x-vmstate-version", VirtIOGPU, vmstate_version, 2),
     DEFINE_PROP_END_OF_LIST(),
 };
Marc-André Lureau May 11, 2024, 6:44 a.m. UTC | #6
Hi Peter

On Fri, May 10, 2024 at 9:33 PM Peter Xu <peterx@redhat.com> wrote:
>
> Hi, Marc-André,
>
> On Fri, May 10, 2024 at 12:39:34PM +0400, Marc-André Lureau wrote:
> > Since we don't have per VMSD version information on the wire, nested
> > struct versioning is quite limited and cumbersome. I am not sure it
> > can be changed without breaking the stream format, and whether it's
> > worthwhile.
>
> Right that's a major pain, and actually I just notice it..
>
> I think it'll be much, much simpler if we keep vmsd version on the wire for
> each VMSD (including struct fields), then it makes more sense to me.
>
> Then when I went back and see again the VSTRUCT thing...  I can hardly
> understand what it is doing, and also how it works at all.
>
> Look at the current only IPMI user, who has:
>
>         VMSTATE_VSTRUCT_V(kcs, ISAIPMIKCSDevice, 2, vmstate_IPMIKCS,
>                           IPMIKCS, 2),
>
> It is setting both vmsd version and struct_version to 2.  I can't tell why
> it matters then if anyway both of the fields are the same..
>
> When we do save(), there is:
>
>                 } else if (field->flags & VMS_STRUCT) {
>                     ret = vmstate_save_state(f, field->vmsd, curr_elem,
>                                              vmdesc_loop);
>                 } else if (field->flags & VMS_VSTRUCT) {
>                     ret = vmstate_save_state_v(f, field->vmsd, curr_elem,
>                                                vmdesc_loop,
>                                                field->struct_version_id, errp);
>
> When we load():
>
>                 } else if (field->flags & VMS_STRUCT) {
>                     ret = vmstate_load_state(f, field->vmsd, curr_elem,
>                                              field->vmsd->version_id);
>                 } else if (field->flags & VMS_VSTRUCT) {
>                     ret = vmstate_load_state(f, field->vmsd, curr_elem,
>                                              field->struct_version_id);
>                 } else {
>
> In this case, passing in struct_version==version should have zero effect
> afaict, because the default behavior is passing in vmsd->version_id anyway.

IPMI KCS being a top-level section, the fields with an unsupported
version are filtered before reaching this code.

But since I can't see how a machine will have a specific version, it
only helps for backward migration, which is quite limited.

>
> Moreover, now I highly doubt whether the VMS_STRUCT whole thing makes sense
> at all as you mentioned.  Especially on the load side, here we should rely
> on vmstate_load_state() taking the last parameter as version_id on the
> wire.  Here we're passing in the struct's version_id or struct_version_id,
> and neither of them makes sense to me... if we miss that version_id
> information, afaiu we should simply fix it and put it on the wire..  It'll
> break migration, we may need to work that out, but I don't see a better
> way.  Keeping it like this like a nightmare to me.. :-(

Ack.

Do you think we should add a version on the wire for each VMSD? that
will likely be a format change.

>
> Irrelevant of all these mess.. For this specific problem, what I meant is
> exactly what Michael was requesting too (hopefully), I'd want to avoid
> further extending the complexity in this area.  I have a patch attached at
> last which I also tested 8.2<->9.0 bi-directional migrations and it worked
> for me when I smoked it.  Please have a look to see whether that makes
> sense and at the meantime avoid most of the tricks.

Works for me! thanks for figuring out how to get back the VirtioGPU* !

I'll send v2 with your patch.

>
> I'd also like to mention one more thing just in case this can cause some
> more attention to virtio guys..
>
> Normally I ran vmstate-static-checker.py before softfreeze, and I did it
> for 9.0 too without seeing this problem.  It isn't raised because all
> virtio devices are using the "self managed" VMSTATE_VIRTIO_DEVICE to
> migrate.  In that case I am out of luck.  We can further extend what
> Fabiano mentioned in the other thread to cover migration stream validations
> in the future, but just to mention IMHO that needs extra work, and may work
> most likely the same as vmstate static checker but just waste many more cpu
> resources.  It'll be good if someone could still help move virtio towards
> like most of the rest devices, or at least get covered by the static
> checker, too.  But that definitely is a separate topic too.. so we can
> address the immediate breakage first.
>
> Thanks,
>
> ==8<==
> From a24ef99670fa7102da461d795aed4a957bad86b1 Mon Sep 17 00:00:00 2001
> From: Peter Xu <peterx@redhat.com>
> Date: Fri, 10 May 2024 12:33:34 -0400
> Subject: [PATCH] fix gpu
>
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  include/hw/virtio/virtio-gpu.h |  2 +-
>  hw/core/machine.c              |  1 +
>  hw/display/virtio-gpu.c        | 21 +++++++++++++++------
>  3 files changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
> index ed44cdad6b..e128501bdc 100644
> --- a/include/hw/virtio/virtio-gpu.h
> +++ b/include/hw/virtio/virtio-gpu.h
> @@ -176,7 +176,7 @@ typedef struct VGPUDMABuf {
>
>  struct VirtIOGPU {
>      VirtIOGPUBase parent_obj;
> -
> +    uint8_t vmstate_version;
>      uint64_t conf_max_hostmem;
>
>      VirtQueue *ctrl_vq;
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 4ff60911e7..8f6f0dda7c 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -42,6 +42,7 @@ GlobalProperty hw_compat_8_2[] = {
>      { "migration", "zero-page-detection", "legacy"},
>      { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" },
>      { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" },
> +    { "virtio-gpu-device", "x-vmstate-version", "1" },
>  };
>  const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2);
>
> diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
> index ae831b6b3e..c53f55404c 100644
> --- a/hw/display/virtio-gpu.c
> +++ b/hw/display/virtio-gpu.c
> @@ -1166,6 +1166,14 @@ static void virtio_gpu_cursor_bh(void *opaque)
>      virtio_gpu_handle_cursor(&g->parent_obj.parent_obj, g->cursor_vq);
>  }
>
> +static bool vmstate_after_v2(void *opaque, int version)
> +{
> +    struct VirtIOGPUBase *base = container_of(opaque, VirtIOGPUBase, scanout);
> +    struct VirtIOGPU *gpu = container_of(base, VirtIOGPU, parent_obj);
> +
> +    return gpu->vmstate_version >= 2;
> +}
> +
>  static const VMStateDescription vmstate_virtio_gpu_scanout = {
>      .name = "virtio-gpu-one-scanout",
>      .version_id = 2,
> @@ -1181,12 +1189,12 @@ static const VMStateDescription vmstate_virtio_gpu_scanout = {
>          VMSTATE_UINT32(cursor.hot_y, struct virtio_gpu_scanout),
>          VMSTATE_UINT32(cursor.pos.x, struct virtio_gpu_scanout),
>          VMSTATE_UINT32(cursor.pos.y, struct virtio_gpu_scanout),
> -        VMSTATE_UINT32_V(fb.format, struct virtio_gpu_scanout, 2),
> -        VMSTATE_UINT32_V(fb.bytes_pp, struct virtio_gpu_scanout, 2),
> -        VMSTATE_UINT32_V(fb.width, struct virtio_gpu_scanout, 2),
> -        VMSTATE_UINT32_V(fb.height, struct virtio_gpu_scanout, 2),
> -        VMSTATE_UINT32_V(fb.stride, struct virtio_gpu_scanout, 2),
> -        VMSTATE_UINT32_V(fb.offset, struct virtio_gpu_scanout, 2),
> +        VMSTATE_UINT32_TEST(fb.format, struct virtio_gpu_scanout,vmstate_after_v2),
> +        VMSTATE_UINT32_TEST(fb.bytes_pp, struct virtio_gpu_scanout,vmstate_after_v2),
> +        VMSTATE_UINT32_TEST(fb.width, struct virtio_gpu_scanout,vmstate_after_v2),
> +        VMSTATE_UINT32_TEST(fb.height, struct virtio_gpu_scanout,vmstate_after_v2),
> +        VMSTATE_UINT32_TEST(fb.stride, struct virtio_gpu_scanout,vmstate_after_v2),
> +        VMSTATE_UINT32_TEST(fb.offset, struct virtio_gpu_scanout,vmstate_after_v2),
>          VMSTATE_END_OF_LIST()
>      },
>  };
> @@ -1659,6 +1667,7 @@ static Property virtio_gpu_properties[] = {
>      DEFINE_PROP_BIT("blob", VirtIOGPU, parent_obj.conf.flags,
>                      VIRTIO_GPU_FLAG_BLOB_ENABLED, false),
>      DEFINE_PROP_SIZE("hostmem", VirtIOGPU, parent_obj.conf.hostmem, 0),
> +    DEFINE_PROP_UINT8("x-vmstate-version", VirtIOGPU, vmstate_version, 2),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>
> --
> 2.44.0
>
>
> --
> Peter Xu
>
diff mbox series

Patch

diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index ae831b6b3e..4fd72caf3f 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1191,17 +1191,29 @@  static const VMStateDescription vmstate_virtio_gpu_scanout = {
     },
 };
 
+static bool vmstate_before_v2(void *opaque, int version)
+{
+    return version <= 1;
+}
+
 static const VMStateDescription vmstate_virtio_gpu_scanouts = {
     .name = "virtio-gpu-scanouts",
-    .version_id = 1,
+    .version_id = 2,
+    .minimum_version_id = 1,
     .fields = (const VMStateField[]) {
         VMSTATE_INT32(parent_obj.enable, struct VirtIOGPU),
         VMSTATE_UINT32_EQUAL(parent_obj.conf.max_outputs,
                              struct VirtIOGPU, NULL),
-        VMSTATE_STRUCT_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
-                                     parent_obj.conf.max_outputs, 1,
-                                     vmstate_virtio_gpu_scanout,
-                                     struct virtio_gpu_scanout),
+        VMSTATE_VSTRUCT_TEST_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
+                                           vmstate_before_v2,
+                                           parent_obj.conf.max_outputs, 1,
+                                           vmstate_virtio_gpu_scanout,
+                                           struct virtio_gpu_scanout, 1),
+        VMSTATE_VSTRUCT_TEST_VARRAY_UINT32(parent_obj.scanout, struct VirtIOGPU,
+                                           NULL,
+                                           parent_obj.conf.max_outputs, 2,
+                                           vmstate_virtio_gpu_scanout,
+                                           struct virtio_gpu_scanout, 2),
         VMSTATE_END_OF_LIST()
     },
 };