diff mbox

[v2] balloon: Fix failure of updating guest memory status

Message ID 1467772593-29703-1-git-send-email-liang.z.li@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Liang Li July 6, 2016, 2:36 a.m. UTC
After live migration, 'guest-stats' can't get the expected memory
status in the guest. This issue is caused by commit 4eae2a657d.
The value of 's->stats_vq_elem' will be NULL after live migration,
and the check in the function 'balloon_stats_poll_cb()' will
prevent the 'virtio_notify()' from executing. So guest will not
update the memory status.

Commit 4eae2a657d is doing the right thing, but 's->stats_vq_elem'
should be treated as part of balloon device state and migrated to
destination if it's not NULL to make everything works well.

Signed-off-by: Liang Li <liang.z.li@intel.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Ladi Prosek <lprosek@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/virtio/virtio-balloon.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

Comments

Michael S. Tsirkin July 6, 2016, 8:55 a.m. UTC | #1
On Wed, Jul 06, 2016 at 10:36:33AM +0800, Liang Li wrote:
> After live migration, 'guest-stats' can't get the expected memory
> status in the guest. This issue is caused by commit 4eae2a657d.
> The value of 's->stats_vq_elem' will be NULL after live migration,
> and the check in the function 'balloon_stats_poll_cb()' will
> prevent the 'virtio_notify()' from executing. So guest will not
> update the memory status.
> 
> Commit 4eae2a657d is doing the right thing, but 's->stats_vq_elem'
> should be treated as part of balloon device state and migrated to
> destination if it's not NULL to make everything works well.
> 
> Signed-off-by: Liang Li <liang.z.li@intel.com>
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Ladi Prosek <lprosek@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>

I agree there's an issue but we don't change versions anymore.
Breaking migrations for everyone is also not nice.

How about queueing virtio_balloon_receive_stats
so it will get invoked when vm starts?

> ---
>  hw/virtio/virtio-balloon.c | 22 ++++++++++++++++++++--
>  1 file changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> index 557d3f9..64e80c6 100644
> --- a/hw/virtio/virtio-balloon.c
> +++ b/hw/virtio/virtio-balloon.c
> @@ -31,6 +31,7 @@
>  #include "hw/virtio/virtio-access.h"
>  
>  #define BALLOON_PAGE_SIZE  (1 << VIRTIO_BALLOON_PFN_SHIFT)
> +#define BALLOON_VERSION 2
>  
>  static void balloon_page(void *addr, int deflate)
>  {
> @@ -404,15 +405,24 @@ static void virtio_balloon_save(QEMUFile *f, void *opaque)
>  static void virtio_balloon_save_device(VirtIODevice *vdev, QEMUFile *f)
>  {
>      VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
> +    uint16_t elem_num = 0;
>  
>      qemu_put_be32(f, s->num_pages);
>      qemu_put_be32(f, s->actual);
> +    if (s->stats_vq_elem != NULL) {
> +        elem_num = 1;
> +    }
> +    qemu_put_be16(f, elem_num);
> +    if (elem_num) {
> +        qemu_put_virtqueue_element(f, s->stats_vq_elem);
> +    }
>  }
>  
>  static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
>  {
> -    if (version_id != 1)
> +    if (version_id < 1 || version_id > BALLOON_VERSION) {
>          return -EINVAL;
> +    }
>  
>      return virtio_load(VIRTIO_DEVICE(opaque), f, version_id);
>  }
> @@ -421,9 +431,17 @@ static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
>                                        int version_id)
>  {
>      VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
> +    uint16_t elem_num = 0;
>  
>      s->num_pages = qemu_get_be32(f);
>      s->actual = qemu_get_be32(f);
> +    if (version_id == BALLOON_VERSION) {
> +        elem_num = qemu_get_be16(f);
> +        if (elem_num == 1) {
> +            s->stats_vq_elem =
> +                    qemu_get_virtqueue_element(f, sizeof(VirtQueueElement));
> +        }
> +    }
>  
>      if (balloon_stats_enabled(s)) {
>          balloon_stats_change_timer(s, s->stats_poll_interval);
> @@ -455,7 +473,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>  
>      reset_stats(s);
>  
> -    register_savevm(dev, "virtio-balloon", -1, 1,
> +    register_savevm(dev, "virtio-balloon", -1, BALLOON_VERSION,
>                      virtio_balloon_save, virtio_balloon_load, s);
>  }
>  
> -- 
> 1.8.3.1
Liang Li July 6, 2016, 9:23 a.m. UTC | #2
> On Wed, Jul 06, 2016 at 10:36:33AM +0800, Liang Li wrote:
> > After live migration, 'guest-stats' can't get the expected memory
> > status in the guest. This issue is caused by commit 4eae2a657d.
> > The value of 's->stats_vq_elem' will be NULL after live migration, and
> > the check in the function 'balloon_stats_poll_cb()' will prevent the
> > 'virtio_notify()' from executing. So guest will not update the memory
> > status.
> >
> > Commit 4eae2a657d is doing the right thing, but 's->stats_vq_elem'
> > should be treated as part of balloon device state and migrated to
> > destination if it's not NULL to make everything works well.
> >
> > Signed-off-by: Liang Li <liang.z.li@intel.com>
> > Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Michael S. Tsirkin <mst@redhat.com>
> > Cc: Ladi Prosek <lprosek@redhat.com>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> 
> I agree there's an issue but we don't change versions anymore.
> Breaking migrations for everyone is also not nice.
> 
> How about queueing virtio_balloon_receive_stats so it will get invoked when
> vm starts?
> 

Could you give more explanation about how it works?  I can't catch you.

Thanks!
Liang
Michael S. Tsirkin July 6, 2016, 10:32 a.m. UTC | #3
On Wed, Jul 06, 2016 at 09:23:46AM +0000, Li, Liang Z wrote:
> > On Wed, Jul 06, 2016 at 10:36:33AM +0800, Liang Li wrote:
> > > After live migration, 'guest-stats' can't get the expected memory
> > > status in the guest. This issue is caused by commit 4eae2a657d.
> > > The value of 's->stats_vq_elem' will be NULL after live migration, and
> > > the check in the function 'balloon_stats_poll_cb()' will prevent the
> > > 'virtio_notify()' from executing. So guest will not update the memory
> > > status.
> > >
> > > Commit 4eae2a657d is doing the right thing, but 's->stats_vq_elem'
> > > should be treated as part of balloon device state and migrated to
> > > destination if it's not NULL to make everything works well.
> > >
> > > Signed-off-by: Liang Li <liang.z.li@intel.com>
> > > Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > Cc: Ladi Prosek <lprosek@redhat.com>
> > > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > 
> > I agree there's an issue but we don't change versions anymore.
> > Breaking migrations for everyone is also not nice.
> > 
> > How about queueing virtio_balloon_receive_stats so it will get invoked when
> > vm starts?
> > 
> 
> Could you give more explanation about how it works?  I can't catch you.
> 
> Thanks!
> Liang

virtqueue_discard before migration

virtio_balloon_receive_stats after migration
Liang Li July 6, 2016, 12:49 p.m. UTC | #4
> > > > After live migration, 'guest-stats' can't get the expected memory
> > > > status in the guest. This issue is caused by commit 4eae2a657d.
> > > > The value of 's->stats_vq_elem' will be NULL after live migration,
> > > > and the check in the function 'balloon_stats_poll_cb()' will
> > > > prevent the 'virtio_notify()' from executing. So guest will not
> > > > update the memory status.
> > > >
> > > > Commit 4eae2a657d is doing the right thing, but 's->stats_vq_elem'
> > > > should be treated as part of balloon device state and migrated to
> > > > destination if it's not NULL to make everything works well.
> > > >
> > > > Signed-off-by: Liang Li <liang.z.li@intel.com>
> > > > Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> > > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > > Cc: Ladi Prosek <lprosek@redhat.com>
> > > > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > >
> > > I agree there's an issue but we don't change versions anymore.
> > > Breaking migrations for everyone is also not nice.
> > >
> > > How about queueing virtio_balloon_receive_stats so it will get
> > > invoked when vm starts?
> > >
> >
> > Could you give more explanation about how it works?  I can't catch you.
> >
> > Thanks!
> > Liang
> 
> virtqueue_discard before migration
> 
> virtio_balloon_receive_stats after migration
> 

Sorry, I still can't catch you. Maybe it's easier for you to submit a patch
than writing a lot a words to make me understand your idea. 

I just don't understand why not to use the version to make things easier, is that not
the original intent of version id? 
If we want to extend the device and more states are needed, the idea you suggest can
be used as a common solution?

Thanks!
Liang

> --
> MST
Michael S. Tsirkin July 6, 2016, 1:40 p.m. UTC | #5
On Wed, Jul 06, 2016 at 12:49:06PM +0000, Li, Liang Z wrote:
> > > > > After live migration, 'guest-stats' can't get the expected memory
> > > > > status in the guest. This issue is caused by commit 4eae2a657d.
> > > > > The value of 's->stats_vq_elem' will be NULL after live migration,
> > > > > and the check in the function 'balloon_stats_poll_cb()' will
> > > > > prevent the 'virtio_notify()' from executing. So guest will not
> > > > > update the memory status.
> > > > >
> > > > > Commit 4eae2a657d is doing the right thing, but 's->stats_vq_elem'
> > > > > should be treated as part of balloon device state and migrated to
> > > > > destination if it's not NULL to make everything works well.
> > > > >
> > > > > Signed-off-by: Liang Li <liang.z.li@intel.com>
> > > > > Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> > > > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > > > Cc: Ladi Prosek <lprosek@redhat.com>
> > > > > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > > >
> > > > I agree there's an issue but we don't change versions anymore.
> > > > Breaking migrations for everyone is also not nice.
> > > >
> > > > How about queueing virtio_balloon_receive_stats so it will get
> > > > invoked when vm starts?
> > > >
> > >
> > > Could you give more explanation about how it works?  I can't catch you.
> > >
> > > Thanks!
> > > Liang
> > 
> > virtqueue_discard before migration
> > 
> > virtio_balloon_receive_stats after migration
> > 
> 
> Sorry, I still can't catch you. Maybe it's easier for you to submit a patch
> than writing a lot a words to make me understand your idea. 

I'm rather busy now.  I might look into it towards end of the month.

> I just don't understand why not to use the version to make things easier, is that not
> the original intent of version id? 

This was the original idea but we stopped using version ids
since they have many shortcomings.

> If we want to extend the device and more states are needed, the idea you suggest can
> be used as a common solution?
> 
> Thanks!
> Liang

The idea is to try to avoid adding more state. that's not always
possible but in this case element was seen but not consumed yet,
so it should be possible for destination to simply get it
from the VQ again.

> > --
> > MST
Liang Li Aug. 1, 2016, 11:59 p.m. UTC | #6
> On Wed, Jul 06, 2016 at 12:49:06PM +0000, Li, Liang Z wrote:
> > > > > > After live migration, 'guest-stats' can't get the expected
> > > > > > memory status in the guest. This issue is caused by commit
> 4eae2a657d.
> > > > > > The value of 's->stats_vq_elem' will be NULL after live
> > > > > > migration, and the check in the function
> > > > > > 'balloon_stats_poll_cb()' will prevent the 'virtio_notify()'
> > > > > > from executing. So guest will not update the memory status.
> > > > > >
> > > > > > Commit 4eae2a657d is doing the right thing, but 's->stats_vq_elem'
> > > > > > should be treated as part of balloon device state and migrated
> > > > > > to destination if it's not NULL to make everything works well.
> > > > > >
> > > > > > Signed-off-by: Liang Li <liang.z.li@intel.com>
> > > > > > Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> > > > > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > > > > Cc: Ladi Prosek <lprosek@redhat.com>
> > > > > > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > > > >
> > > > > I agree there's an issue but we don't change versions anymore.
> > > > > Breaking migrations for everyone is also not nice.
> > > > >
> > > > > How about queueing virtio_balloon_receive_stats so it will get
> > > > > invoked when vm starts?
> > > > >
> > > >
> > > > Could you give more explanation about how it works?  I can't catch you.
> > > >
> > > > Thanks!
> > > > Liang
> > >
> > > virtqueue_discard before migration
> > >
> > > virtio_balloon_receive_stats after migration
> > >
> >
> > Sorry, I still can't catch you. Maybe it's easier for you to submit a
> > patch than writing a lot a words to make me understand your idea.
> 
> I'm rather busy now.  I might look into it towards end of the month.
> 
> > I just don't understand why not to use the version to make things
> > easier, is that not the original intent of version id?
> 
> This was the original idea but we stopped using version ids since they have
> many shortcomings.
> 
> > If we want to extend the device and more states are needed, the idea
> > you suggest can be used as a common solution?
> >
> > Thanks!
> > Liang
> 
> The idea is to try to avoid adding more state. that's not always possible but in
> this case element was seen but not consumed yet, so it should be possible
> for destination to simply get it from the VQ again.
> 
> > > --
> > > MST

Hi Michel,

Do you have time for this issue recently?  

Thanks!
Liang
Michael S. Tsirkin Aug. 2, 2016, 12:11 a.m. UTC | #7
On Mon, Aug 01, 2016 at 11:59:31PM +0000, Li, Liang Z wrote:
> > On Wed, Jul 06, 2016 at 12:49:06PM +0000, Li, Liang Z wrote:
> > > > > > > After live migration, 'guest-stats' can't get the expected
> > > > > > > memory status in the guest. This issue is caused by commit
> > 4eae2a657d.
> > > > > > > The value of 's->stats_vq_elem' will be NULL after live
> > > > > > > migration, and the check in the function
> > > > > > > 'balloon_stats_poll_cb()' will prevent the 'virtio_notify()'
> > > > > > > from executing. So guest will not update the memory status.
> > > > > > >
> > > > > > > Commit 4eae2a657d is doing the right thing, but 's->stats_vq_elem'
> > > > > > > should be treated as part of balloon device state and migrated
> > > > > > > to destination if it's not NULL to make everything works well.
> > > > > > >
> > > > > > > Signed-off-by: Liang Li <liang.z.li@intel.com>
> > > > > > > Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> > > > > > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > Cc: Ladi Prosek <lprosek@redhat.com>
> > > > > > > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > > > > >
> > > > > > I agree there's an issue but we don't change versions anymore.
> > > > > > Breaking migrations for everyone is also not nice.
> > > > > >
> > > > > > How about queueing virtio_balloon_receive_stats so it will get
> > > > > > invoked when vm starts?
> > > > > >
> > > > >
> > > > > Could you give more explanation about how it works?  I can't catch you.
> > > > >
> > > > > Thanks!
> > > > > Liang
> > > >
> > > > virtqueue_discard before migration
> > > >
> > > > virtio_balloon_receive_stats after migration
> > > >
> > >
> > > Sorry, I still can't catch you. Maybe it's easier for you to submit a
> > > patch than writing a lot a words to make me understand your idea.
> > 
> > I'm rather busy now.  I might look into it towards end of the month.
> > 
> > > I just don't understand why not to use the version to make things
> > > easier, is that not the original intent of version id?
> > 
> > This was the original idea but we stopped using version ids since they have
> > many shortcomings.
> > 
> > > If we want to extend the device and more states are needed, the idea
> > > you suggest can be used as a common solution?
> > >
> > > Thanks!
> > > Liang
> > 
> > The idea is to try to avoid adding more state. that's not always possible but in
> > this case element was seen but not consumed yet, so it should be possible
> > for destination to simply get it from the VQ again.
> > 
> > > > --
> > > > MST
> 
> Hi Michel,
> 
> Do you have time for this issue recently?  
> 
> Thanks!
> Liang


Sorry, doesn't look like I will.
Idea is to make sure balloon_stats_poll_cb runs
on source. This will set stats_vq_elem to NULL.
Ladi Prosek Aug. 3, 2016, 7:25 a.m. UTC | #8
On Tue, Aug 2, 2016 at 2:11 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Mon, Aug 01, 2016 at 11:59:31PM +0000, Li, Liang Z wrote:
>> > On Wed, Jul 06, 2016 at 12:49:06PM +0000, Li, Liang Z wrote:
>> > > > > > > After live migration, 'guest-stats' can't get the expected
>> > > > > > > memory status in the guest. This issue is caused by commit
>> > 4eae2a657d.
>> > > > > > > The value of 's->stats_vq_elem' will be NULL after live
>> > > > > > > migration, and the check in the function
>> > > > > > > 'balloon_stats_poll_cb()' will prevent the 'virtio_notify()'
>> > > > > > > from executing. So guest will not update the memory status.
>> > > > > > >
>> > > > > > > Commit 4eae2a657d is doing the right thing, but 's->stats_vq_elem'
>> > > > > > > should be treated as part of balloon device state and migrated
>> > > > > > > to destination if it's not NULL to make everything works well.
>> > > > > > >
>> > > > > > > Signed-off-by: Liang Li <liang.z.li@intel.com>
>> > > > > > > Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
>> > > > > > > Cc: Michael S. Tsirkin <mst@redhat.com>
>> > > > > > > Cc: Ladi Prosek <lprosek@redhat.com>
>> > > > > > > Cc: Paolo Bonzini <pbonzini@redhat.com>
>> > > > > >
>> > > > > > I agree there's an issue but we don't change versions anymore.
>> > > > > > Breaking migrations for everyone is also not nice.
>> > > > > >
>> > > > > > How about queueing virtio_balloon_receive_stats so it will get
>> > > > > > invoked when vm starts?
>> > > > > >
>> > > > >
>> > > > > Could you give more explanation about how it works?  I can't catch you.
>> > > > >
>> > > > > Thanks!
>> > > > > Liang
>> > > >
>> > > > virtqueue_discard before migration
>> > > >
>> > > > virtio_balloon_receive_stats after migration
>> > > >
>> > >
>> > > Sorry, I still can't catch you. Maybe it's easier for you to submit a
>> > > patch than writing a lot a words to make me understand your idea.
>> >
>> > I'm rather busy now.  I might look into it towards end of the month.
>> >
>> > > I just don't understand why not to use the version to make things
>> > > easier, is that not the original intent of version id?
>> >
>> > This was the original idea but we stopped using version ids since they have
>> > many shortcomings.
>> >
>> > > If we want to extend the device and more states are needed, the idea
>> > > you suggest can be used as a common solution?
>> > >
>> > > Thanks!
>> > > Liang
>> >
>> > The idea is to try to avoid adding more state. that's not always possible but in
>> > this case element was seen but not consumed yet, so it should be possible
>> > for destination to simply get it from the VQ again.
>> >
>> > > > --
>> > > > MST
>>
>> Hi Michel,
>>
>> Do you have time for this issue recently?
>>
>> Thanks!
>> Liang

Hi Liang,

I should be able to look into it this week if you help me with testing.

Thanks,
Ladi

> Sorry, doesn't look like I will.
> Idea is to make sure balloon_stats_poll_cb runs
> on source. This will set stats_vq_elem to NULL.
>
>
> --
> MST
diff mbox

Patch

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 557d3f9..64e80c6 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -31,6 +31,7 @@ 
 #include "hw/virtio/virtio-access.h"
 
 #define BALLOON_PAGE_SIZE  (1 << VIRTIO_BALLOON_PFN_SHIFT)
+#define BALLOON_VERSION 2
 
 static void balloon_page(void *addr, int deflate)
 {
@@ -404,15 +405,24 @@  static void virtio_balloon_save(QEMUFile *f, void *opaque)
 static void virtio_balloon_save_device(VirtIODevice *vdev, QEMUFile *f)
 {
     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+    uint16_t elem_num = 0;
 
     qemu_put_be32(f, s->num_pages);
     qemu_put_be32(f, s->actual);
+    if (s->stats_vq_elem != NULL) {
+        elem_num = 1;
+    }
+    qemu_put_be16(f, elem_num);
+    if (elem_num) {
+        qemu_put_virtqueue_element(f, s->stats_vq_elem);
+    }
 }
 
 static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
 {
-    if (version_id != 1)
+    if (version_id < 1 || version_id > BALLOON_VERSION) {
         return -EINVAL;
+    }
 
     return virtio_load(VIRTIO_DEVICE(opaque), f, version_id);
 }
@@ -421,9 +431,17 @@  static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
                                       int version_id)
 {
     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+    uint16_t elem_num = 0;
 
     s->num_pages = qemu_get_be32(f);
     s->actual = qemu_get_be32(f);
+    if (version_id == BALLOON_VERSION) {
+        elem_num = qemu_get_be16(f);
+        if (elem_num == 1) {
+            s->stats_vq_elem =
+                    qemu_get_virtqueue_element(f, sizeof(VirtQueueElement));
+        }
+    }
 
     if (balloon_stats_enabled(s)) {
         balloon_stats_change_timer(s, s->stats_poll_interval);
@@ -455,7 +473,7 @@  static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
 
     reset_stats(s);
 
-    register_savevm(dev, "virtio-balloon", -1, 1,
+    register_savevm(dev, "virtio-balloon", -1, BALLOON_VERSION,
                     virtio_balloon_save, virtio_balloon_load, s);
 }