diff mbox

[v3] balloon: Fix failure of updating guest memory status

Message ID 1470396992-14564-1-git-send-email-lprosek@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ladi Prosek Aug. 5, 2016, 11:36 a.m. UTC
The stats_vq_elem field keeps track of the state of the balloon stats
virtqueue protocol but it wasn't preserved across migrations, resulting
in losing guest memory status updates on the receiving VM.

This commit adds a new VM state change handler which resets stats_vq_elem
to NULL when the VM is stopped, eliminating the need for the field to be
migrated. When the VM starts running again, the field is reinitialized by
re-popping the element from the virtqueue.

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio/virtio-balloon.c         | 23 +++++++++++++++++++++++
 include/hw/virtio/virtio-balloon.h |  1 +
 2 files changed, 24 insertions(+)

Comments

Stefan Hajnoczi Aug. 15, 2016, 12:09 p.m. UTC | #1
On Fri, Aug 05, 2016 at 01:36:32PM +0200, Ladi Prosek wrote:
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> index 5af429a..65457e9 100644
> --- a/hw/virtio/virtio-balloon.c
> +++ b/hw/virtio/virtio-balloon.c
> @@ -423,6 +423,26 @@ static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
>      return 0;
>  }
>  
> +static void virtio_balloon_vmstate_cb(void *opaque, int running,
> +                                      RunState state)
> +{
> +    VirtIOBalloon *s = opaque;
> +
> +    if (!running) {
> +        /* put the stats element back if the VM is not running */
> +        if (s->stats_vq_elem != NULL) {
> +            virtqueue_discard(s->svq, s->stats_vq_elem, s->stats_vq_offset);

The third argument should be 0 because we did not write anything into
in_sg[].

Stefan
Ladi Prosek Aug. 15, 2016, 12:35 p.m. UTC | #2
On Mon, Aug 15, 2016 at 2:09 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Fri, Aug 05, 2016 at 01:36:32PM +0200, Ladi Prosek wrote:
>> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
>> index 5af429a..65457e9 100644
>> --- a/hw/virtio/virtio-balloon.c
>> +++ b/hw/virtio/virtio-balloon.c
>> @@ -423,6 +423,26 @@ static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
>>      return 0;
>>  }
>>
>> +static void virtio_balloon_vmstate_cb(void *opaque, int running,
>> +                                      RunState state)
>> +{
>> +    VirtIOBalloon *s = opaque;
>> +
>> +    if (!running) {
>> +        /* put the stats element back if the VM is not running */
>> +        if (s->stats_vq_elem != NULL) {
>> +            virtqueue_discard(s->svq, s->stats_vq_elem, s->stats_vq_offset);
>
> The third argument should be 0 because we did not write anything into
> in_sg[].

Thanks! Then it looks like the regular virtqueue_push in
balloon_stats_poll_cb should also be called with 0 and the
stats_vq_offset field can be deleted altogether. We never write to
in_sg.

Ladi
Stefan Hajnoczi Aug. 15, 2016, 12:50 p.m. UTC | #3
On Mon, Aug 15, 2016 at 1:35 PM, Ladi Prosek <lprosek@redhat.com> wrote:
> On Mon, Aug 15, 2016 at 2:09 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> On Fri, Aug 05, 2016 at 01:36:32PM +0200, Ladi Prosek wrote:
>>> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
>>> index 5af429a..65457e9 100644
>>> --- a/hw/virtio/virtio-balloon.c
>>> +++ b/hw/virtio/virtio-balloon.c
>>> @@ -423,6 +423,26 @@ static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
>>>      return 0;
>>>  }
>>>
>>> +static void virtio_balloon_vmstate_cb(void *opaque, int running,
>>> +                                      RunState state)
>>> +{
>>> +    VirtIOBalloon *s = opaque;
>>> +
>>> +    if (!running) {
>>> +        /* put the stats element back if the VM is not running */
>>> +        if (s->stats_vq_elem != NULL) {
>>> +            virtqueue_discard(s->svq, s->stats_vq_elem, s->stats_vq_offset);
>>
>> The third argument should be 0 because we did not write anything into
>> in_sg[].
>
> Thanks! Then it looks like the regular virtqueue_push in
> balloon_stats_poll_cb should also be called with 0 and the
> stats_vq_offset field can be deleted altogether. We never write to
> in_sg.

Yes.

Stefan
Ladi Prosek Aug. 15, 2016, 12:56 p.m. UTC | #4
On Mon, Aug 15, 2016 at 2:50 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Mon, Aug 15, 2016 at 1:35 PM, Ladi Prosek <lprosek@redhat.com> wrote:
>> On Mon, Aug 15, 2016 at 2:09 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>>> On Fri, Aug 05, 2016 at 01:36:32PM +0200, Ladi Prosek wrote:
>>>> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
>>>> index 5af429a..65457e9 100644
>>>> --- a/hw/virtio/virtio-balloon.c
>>>> +++ b/hw/virtio/virtio-balloon.c
>>>> @@ -423,6 +423,26 @@ static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
>>>>      return 0;
>>>>  }
>>>>
>>>> +static void virtio_balloon_vmstate_cb(void *opaque, int running,
>>>> +                                      RunState state)
>>>> +{
>>>> +    VirtIOBalloon *s = opaque;
>>>> +
>>>> +    if (!running) {
>>>> +        /* put the stats element back if the VM is not running */
>>>> +        if (s->stats_vq_elem != NULL) {
>>>> +            virtqueue_discard(s->svq, s->stats_vq_elem, s->stats_vq_offset);
>>>
>>> The third argument should be 0 because we did not write anything into
>>> in_sg[].
>>
>> Thanks! Then it looks like the regular virtqueue_push in
>> balloon_stats_poll_cb should also be called with 0 and the
>> stats_vq_offset field can be deleted altogether. We never write to
>> in_sg.
>
> Yes.

I'll send a follow-up patch fixing this later if that's ok. Liang is
already testing this patch and passing non-0 does no harm, in_sg is
empty anyway so it doesn't make a difference really.

Thanks,
Ladi
Michael S. Tsirkin Aug. 15, 2016, 1:39 p.m. UTC | #5
On Fri, Aug 05, 2016 at 01:36:32PM +0200, Ladi Prosek wrote:
> The stats_vq_elem field keeps track of the state of the balloon stats
> virtqueue protocol but it wasn't preserved across migrations, resulting
> in losing guest memory status updates on the receiving VM.
> 
> This commit adds a new VM state change handler which resets stats_vq_elem
> to NULL when the VM is stopped, eliminating the need for the field to be
> migrated. When the VM starts running again, the field is reinitialized by
> re-popping the element from the virtqueue.
> 
> Signed-off-by: Ladi Prosek <lprosek@redhat.com>
> Suggested-by: Michael S. Tsirkin <mst@redhat.com>

Almost there. Except we should not touch rings
unless guest set DRIVER_OK in status register, and we should
not touch status vq at all unless VIRTIO_BALLOON_F_STATS_VQ
has been negotiated.

I'll post a patch to do this.


> ---
>  hw/virtio/virtio-balloon.c         | 23 +++++++++++++++++++++++
>  include/hw/virtio/virtio-balloon.h |  1 +
>  2 files changed, 24 insertions(+)
> 
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> index 5af429a..65457e9 100644
> --- a/hw/virtio/virtio-balloon.c
> +++ b/hw/virtio/virtio-balloon.c
> @@ -423,6 +423,26 @@ static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
>      return 0;
>  }
>  
> +static void virtio_balloon_vmstate_cb(void *opaque, int running,
> +                                      RunState state)
> +{
> +    VirtIOBalloon *s = opaque;
> +
> +    if (!running) {
> +        /* put the stats element back if the VM is not running */
> +        if (s->stats_vq_elem != NULL) {
> +            virtqueue_discard(s->svq, s->stats_vq_elem, s->stats_vq_offset);
> +            g_free(s->stats_vq_elem);
> +            s->stats_vq_elem = NULL;
> +        }
> +
> +    } else {
> +        /* poll stats queue for the element we may have discarded
> +         * when the VM was stopped */
> +        virtio_balloon_receive_stats(VIRTIO_DEVICE(s), s->svq);
> +    }
> +}
> +
>  static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -446,6 +466,8 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>      s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
>  
>      reset_stats(s);
> +
> +    s->change = qemu_add_vm_change_state_handler(virtio_balloon_vmstate_cb, s);
>  }
>  
>  static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
> @@ -453,6 +475,7 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>      VirtIOBalloon *s = VIRTIO_BALLOON(dev);
>  
> +    qemu_del_vm_change_state_handler(s->change);
>      balloon_stats_destroy_timer(s);
>      qemu_remove_balloon_handler(s);
>      virtio_cleanup(vdev);
> diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
> index 1ea13bd..d72ff7f 100644
> --- a/include/hw/virtio/virtio-balloon.h
> +++ b/include/hw/virtio/virtio-balloon.h
> @@ -43,6 +43,7 @@ typedef struct VirtIOBalloon {
>      int64_t stats_last_update;
>      int64_t stats_poll_interval;
>      uint32_t host_features;
> +    VMChangeStateEntry *change;
>  } VirtIOBalloon;
>  
>  #endif
> -- 
> 2.5.5
Michael S. Tsirkin Aug. 15, 2016, 11:08 p.m. UTC | #6
On Fri, Aug 05, 2016 at 01:36:32PM +0200, Ladi Prosek wrote:
> The stats_vq_elem field keeps track of the state of the balloon stats
> virtqueue protocol but it wasn't preserved across migrations, resulting
> in losing guest memory status updates on the receiving VM.
> 
> This commit adds a new VM state change handler which resets stats_vq_elem
> to NULL when the VM is stopped, eliminating the need for the field to be
> migrated. When the VM starts running again, the field is reinitialized by
> re-popping the element from the virtqueue.
> 
> Signed-off-by: Ladi Prosek <lprosek@redhat.com>
> Suggested-by: Michael S. Tsirkin <mst@redhat.com>

I think that for the 2.7 machine type, we should
also migrate the guest-stats-polling-interval property.

Otherwise the value that was set on source
is not forwarded to the destination
properly.

And at that point, one wanders whether we should
check the interval is set before receive calling
stats.


> ---
>  hw/virtio/virtio-balloon.c         | 23 +++++++++++++++++++++++
>  include/hw/virtio/virtio-balloon.h |  1 +
>  2 files changed, 24 insertions(+)
> 
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> index 5af429a..65457e9 100644
> --- a/hw/virtio/virtio-balloon.c
> +++ b/hw/virtio/virtio-balloon.c
> @@ -423,6 +423,26 @@ static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
>      return 0;
>  }
>  
> +static void virtio_balloon_vmstate_cb(void *opaque, int running,
> +                                      RunState state)
> +{
> +    VirtIOBalloon *s = opaque;
> +
> +    if (!running) {
> +        /* put the stats element back if the VM is not running */
> +        if (s->stats_vq_elem != NULL) {
> +            virtqueue_discard(s->svq, s->stats_vq_elem, s->stats_vq_offset);
> +            g_free(s->stats_vq_elem);
> +            s->stats_vq_elem = NULL;
> +        }
> +
> +    } else {
> +        /* poll stats queue for the element we may have discarded
> +         * when the VM was stopped */
> +        virtio_balloon_receive_stats(VIRTIO_DEVICE(s), s->svq);
> +    }
> +}
> +
>  static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -446,6 +466,8 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>      s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
>  
>      reset_stats(s);
> +
> +    s->change = qemu_add_vm_change_state_handler(virtio_balloon_vmstate_cb, s);
>  }
>  
>  static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
> @@ -453,6 +475,7 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>      VirtIOBalloon *s = VIRTIO_BALLOON(dev);
>  
> +    qemu_del_vm_change_state_handler(s->change);
>      balloon_stats_destroy_timer(s);
>      qemu_remove_balloon_handler(s);
>      virtio_cleanup(vdev);
> diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
> index 1ea13bd..d72ff7f 100644
> --- a/include/hw/virtio/virtio-balloon.h
> +++ b/include/hw/virtio/virtio-balloon.h
> @@ -43,6 +43,7 @@ typedef struct VirtIOBalloon {
>      int64_t stats_last_update;
>      int64_t stats_poll_interval;
>      uint32_t host_features;
> +    VMChangeStateEntry *change;
>  } VirtIOBalloon;
>  
>  #endif
> -- 
> 2.5.5
Michael S. Tsirkin Aug. 22, 2016, 2 a.m. UTC | #7
On Fri, Aug 05, 2016 at 01:36:32PM +0200, Ladi Prosek wrote:
> The stats_vq_elem field keeps track of the state of the balloon stats
> virtqueue protocol but it wasn't preserved across migrations, resulting
> in losing guest memory status updates on the receiving VM.
> 
> This commit adds a new VM state change handler which resets stats_vq_elem
> to NULL when the VM is stopped, eliminating the need for the field to be
> migrated. When the VM starts running again, the field is reinitialized by
> re-popping the element from the virtqueue.
> 
> Signed-off-by: Ladi Prosek <lprosek@redhat.com>
> Suggested-by: Michael S. Tsirkin <mst@redhat.com>

OK, but we also must migrate the stats timer value,
otherwise management must specify it again on destination.

Only for compat machine types unfortunately otherwise
we break migration from old QEMU...



> ---
>  hw/virtio/virtio-balloon.c         | 23 +++++++++++++++++++++++
>  include/hw/virtio/virtio-balloon.h |  1 +
>  2 files changed, 24 insertions(+)
> 
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> index 5af429a..65457e9 100644
> --- a/hw/virtio/virtio-balloon.c
> +++ b/hw/virtio/virtio-balloon.c
> @@ -423,6 +423,26 @@ static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
>      return 0;
>  }
>  
> +static void virtio_balloon_vmstate_cb(void *opaque, int running,
> +                                      RunState state)
> +{
> +    VirtIOBalloon *s = opaque;
> +
> +    if (!running) {
> +        /* put the stats element back if the VM is not running */
> +        if (s->stats_vq_elem != NULL) {
> +            virtqueue_discard(s->svq, s->stats_vq_elem, s->stats_vq_offset);
> +            g_free(s->stats_vq_elem);
> +            s->stats_vq_elem = NULL;
> +        }
> +
> +    } else {
> +        /* poll stats queue for the element we may have discarded
> +         * when the VM was stopped */
> +        virtio_balloon_receive_stats(VIRTIO_DEVICE(s), s->svq);
> +    }
> +}
> +
>  static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -446,6 +466,8 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>      s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
>  
>      reset_stats(s);
> +
> +    s->change = qemu_add_vm_change_state_handler(virtio_balloon_vmstate_cb, s);
>  }
>  
>  static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
> @@ -453,6 +475,7 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>      VirtIOBalloon *s = VIRTIO_BALLOON(dev);
>  
> +    qemu_del_vm_change_state_handler(s->change);
>      balloon_stats_destroy_timer(s);
>      qemu_remove_balloon_handler(s);
>      virtio_cleanup(vdev);
> diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
> index 1ea13bd..d72ff7f 100644
> --- a/include/hw/virtio/virtio-balloon.h
> +++ b/include/hw/virtio/virtio-balloon.h
> @@ -43,6 +43,7 @@ typedef struct VirtIOBalloon {
>      int64_t stats_last_update;
>      int64_t stats_poll_interval;
>      uint32_t host_features;
> +    VMChangeStateEntry *change;
>  } VirtIOBalloon;
>  
>  #endif
> -- 
> 2.5.5
diff mbox

Patch

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 5af429a..65457e9 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -423,6 +423,26 @@  static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
     return 0;
 }
 
+static void virtio_balloon_vmstate_cb(void *opaque, int running,
+                                      RunState state)
+{
+    VirtIOBalloon *s = opaque;
+
+    if (!running) {
+        /* put the stats element back if the VM is not running */
+        if (s->stats_vq_elem != NULL) {
+            virtqueue_discard(s->svq, s->stats_vq_elem, s->stats_vq_offset);
+            g_free(s->stats_vq_elem);
+            s->stats_vq_elem = NULL;
+        }
+
+    } else {
+        /* poll stats queue for the element we may have discarded
+         * when the VM was stopped */
+        virtio_balloon_receive_stats(VIRTIO_DEVICE(s), s->svq);
+    }
+}
+
 static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -446,6 +466,8 @@  static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
     s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
 
     reset_stats(s);
+
+    s->change = qemu_add_vm_change_state_handler(virtio_balloon_vmstate_cb, s);
 }
 
 static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
@@ -453,6 +475,7 @@  static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOBalloon *s = VIRTIO_BALLOON(dev);
 
+    qemu_del_vm_change_state_handler(s->change);
     balloon_stats_destroy_timer(s);
     qemu_remove_balloon_handler(s);
     virtio_cleanup(vdev);
diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
index 1ea13bd..d72ff7f 100644
--- a/include/hw/virtio/virtio-balloon.h
+++ b/include/hw/virtio/virtio-balloon.h
@@ -43,6 +43,7 @@  typedef struct VirtIOBalloon {
     int64_t stats_last_update;
     int64_t stats_poll_interval;
     uint32_t host_features;
+    VMChangeStateEntry *change;
 } VirtIOBalloon;
 
 #endif