diff mbox

[v2,4/6] virtio-balloon: keep collecting stats on save/restore

Message ID 1471613966-7267-5-git-send-email-rkagan@virtuozzo.com (mailing list archive)
State New, archived
Headers show

Commit Message

Roman Kagan Aug. 19, 2016, 1:39 p.m. UTC
Upon save/restore virtio-balloon stats acquisition stops.  The reason is
that the fact that the (only) virtqueue element is being used by QEMU is
not recorded anywhere on save, so upon restore it's not released to the
guest, making further progress impossible.

Saving the information about the used element would introduce unjustified
vmstate incompatibility.

So instead just make sure the element is pushed before save, leaving the
ball on the guest side.  For that, add vm state change handler to
virtio-ballon which would take care of pushing the element if there is
one.

Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Ladi Prosek <lprosek@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/virtio/virtio-balloon.c         | 27 ++++++++++++++++++++++-----
 include/hw/virtio/virtio-balloon.h |  1 +
 2 files changed, 23 insertions(+), 5 deletions(-)

Comments

Ladi Prosek Sept. 1, 2016, 8:35 a.m. UTC | #1
On Fri, Aug 19, 2016 at 3:39 PM, Roman Kagan <rkagan@virtuozzo.com> wrote:
> Upon save/restore virtio-balloon stats acquisition stops.  The reason is
> that the fact that the (only) virtqueue element is being used by QEMU is
> not recorded anywhere on save, so upon restore it's not released to the
> guest, making further progress impossible.
>
> Saving the information about the used element would introduce unjustified
> vmstate incompatibility.
>
> So instead just make sure the element is pushed before save, leaving the
> ball on the guest side.  For that, add vm state change handler to
> virtio-ballon which would take care of pushing the element if there is
> one.
>
> Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> Cc: Ladi Prosek <lprosek@redhat.com>
> Cc: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  hw/virtio/virtio-balloon.c         | 27 ++++++++++++++++++++++-----
>  include/hw/virtio/virtio-balloon.h |  1 +
>  2 files changed, 23 insertions(+), 5 deletions(-)
>
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> index 6d4c57c..f00ad8e 100644
> --- a/hw/virtio/virtio-balloon.c
> +++ b/hw/virtio/virtio-balloon.c
> @@ -88,10 +88,19 @@ static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs)
>      timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000);
>  }
>
> +static void balloon_stats_push_elem(VirtIOBalloon *s)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(s);
> +
> +    virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset);
> +    virtio_notify(vdev, s->svq);
> +    g_free(s->stats_vq_elem);
> +    s->stats_vq_elem = NULL;
> +}
> +
>  static void balloon_stats_poll_cb(void *opaque)
>  {
>      VirtIOBalloon *s = opaque;
> -    VirtIODevice *vdev = VIRTIO_DEVICE(s);
>
>      if (!s->stats_vq_elem) {
>          /* The guest hasn't sent the stats yet (either not enabled or we came
> @@ -100,10 +109,7 @@ static void balloon_stats_poll_cb(void *opaque)
>          return;
>      }
>
> -    virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset);
> -    virtio_notify(vdev, s->svq);
> -    g_free(s->stats_vq_elem);
> -    s->stats_vq_elem = NULL;
> +    balloon_stats_push_elem(s);
>  }
>
>  static void balloon_stats_get_all(Object *obj, Visitor *v, const char *name,
> @@ -414,6 +420,15 @@ static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
>      return 0;
>  }
>
> +static void balloon_vm_state_change(void *opaque, int running, RunState state)
> +{
> +    VirtIOBalloon *s = opaque;
> +
> +    if (!running && s->stats_vq_elem) {
> +        balloon_stats_push_elem(s);
> +    }
> +}
> +
>  static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -436,6 +451,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>      s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
>      s->svq = virtio_add_queue(vdev, 1, virtio_balloon_receive_stats);
>
> +    s->change = qemu_add_vm_change_state_handler(balloon_vm_state_change, s);
>      reset_stats(s);
>  }
>
> @@ -444,6 +460,7 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>      VirtIOBalloon *s = VIRTIO_BALLOON(dev);
>
> +    qemu_del_vm_change_state_handler(s->change);
>      balloon_stats_destroy_timer(s);
>      qemu_remove_balloon_handler(s);
>      virtio_cleanup(vdev);
> diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
> index 1ea13bd..d72ff7f 100644
> --- a/include/hw/virtio/virtio-balloon.h
> +++ b/include/hw/virtio/virtio-balloon.h
> @@ -43,6 +43,7 @@ typedef struct VirtIOBalloon {
>      int64_t stats_last_update;
>      int64_t stats_poll_interval;
>      uint32_t host_features;
> +    VMChangeStateEntry *change;
>  } VirtIOBalloon;
>
>  #endif
> --
> 2.7.4
>

Hi Roman,

I talked to Michael Tsirkin and he agrees with merging this patch for
2.7. Could you please resubmit and use the set_status callback instead
of adding another VM state change handler?

static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
{
    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);

    if (!vdev->vm_running && s->stats_vq_elem) {
        balloon_stats_push_elem(s);
    }
}

and

    vdc->set_status = virtio_balloon_set_status;
in virtio_balloon_class_init.

This is somewhat urgent because 2.7 will be out soon. If you're busy
or if I don't hear from you I'll post it on your behalf.

Thanks!
Ladi
Roman Kagan Sept. 1, 2016, 2:17 p.m. UTC | #2
On Thu, Sep 01, 2016 at 10:35:49AM +0200, Ladi Prosek wrote:
> On Fri, Aug 19, 2016 at 3:39 PM, Roman Kagan <rkagan@virtuozzo.com> wrote:
> > Upon save/restore virtio-balloon stats acquisition stops.  The reason is
> > that the fact that the (only) virtqueue element is being used by QEMU is
> > not recorded anywhere on save, so upon restore it's not released to the
> > guest, making further progress impossible.
> >
> > Saving the information about the used element would introduce unjustified
> > vmstate incompatibility.
> >
> > So instead just make sure the element is pushed before save, leaving the
> > ball on the guest side.  For that, add vm state change handler to
> > virtio-ballon which would take care of pushing the element if there is
> > one.
> >
> > Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
> > Cc: "Michael S. Tsirkin" <mst@redhat.com>
> > Cc: Ladi Prosek <lprosek@redhat.com>
> > Cc: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  hw/virtio/virtio-balloon.c         | 27 ++++++++++++++++++++++-----
> >  include/hw/virtio/virtio-balloon.h |  1 +
> >  2 files changed, 23 insertions(+), 5 deletions(-)
> >
> > diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> > index 6d4c57c..f00ad8e 100644
> > --- a/hw/virtio/virtio-balloon.c
> > +++ b/hw/virtio/virtio-balloon.c
> > @@ -88,10 +88,19 @@ static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs)
> >      timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000);
> >  }
> >
> > +static void balloon_stats_push_elem(VirtIOBalloon *s)
> > +{
> > +    VirtIODevice *vdev = VIRTIO_DEVICE(s);
> > +
> > +    virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset);
> > +    virtio_notify(vdev, s->svq);
> > +    g_free(s->stats_vq_elem);
> > +    s->stats_vq_elem = NULL;
> > +}
> > +
> >  static void balloon_stats_poll_cb(void *opaque)
> >  {
> >      VirtIOBalloon *s = opaque;
> > -    VirtIODevice *vdev = VIRTIO_DEVICE(s);
> >
> >      if (!s->stats_vq_elem) {
> >          /* The guest hasn't sent the stats yet (either not enabled or we came
> > @@ -100,10 +109,7 @@ static void balloon_stats_poll_cb(void *opaque)
> >          return;
> >      }
> >
> > -    virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset);
> > -    virtio_notify(vdev, s->svq);
> > -    g_free(s->stats_vq_elem);
> > -    s->stats_vq_elem = NULL;
> > +    balloon_stats_push_elem(s);
> >  }
> >
> >  static void balloon_stats_get_all(Object *obj, Visitor *v, const char *name,
> > @@ -414,6 +420,15 @@ static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
> >      return 0;
> >  }
> >
> > +static void balloon_vm_state_change(void *opaque, int running, RunState state)
> > +{
> > +    VirtIOBalloon *s = opaque;
> > +
> > +    if (!running && s->stats_vq_elem) {
> > +        balloon_stats_push_elem(s);
> > +    }
> > +}
> > +
> >  static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
> >  {
> >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > @@ -436,6 +451,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
> >      s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
> >      s->svq = virtio_add_queue(vdev, 1, virtio_balloon_receive_stats);
> >
> > +    s->change = qemu_add_vm_change_state_handler(balloon_vm_state_change, s);
> >      reset_stats(s);
> >  }
> >
> > @@ -444,6 +460,7 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
> >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >      VirtIOBalloon *s = VIRTIO_BALLOON(dev);
> >
> > +    qemu_del_vm_change_state_handler(s->change);
> >      balloon_stats_destroy_timer(s);
> >      qemu_remove_balloon_handler(s);
> >      virtio_cleanup(vdev);
> > diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
> > index 1ea13bd..d72ff7f 100644
> > --- a/include/hw/virtio/virtio-balloon.h
> > +++ b/include/hw/virtio/virtio-balloon.h
> > @@ -43,6 +43,7 @@ typedef struct VirtIOBalloon {
> >      int64_t stats_last_update;
> >      int64_t stats_poll_interval;
> >      uint32_t host_features;
> > +    VMChangeStateEntry *change;
> >  } VirtIOBalloon;
> >
> >  #endif
> > --
> > 2.7.4
> >
> 
> Hi Roman,
> 
> I talked to Michael Tsirkin and he agrees with merging this patch for
> 2.7.

I'm not happy with this patch: it tries to solve the problem on the
"save" side and therefore doesn't fix the bug when migrating from an
earlier QEMU version.

I wonder if we can do better and solve it on the "load" side.  (At first
I thought that your patch did that but on a closer look it turned out
not the case).

In particular, with Stefan's patch to restore VirtQueue->inuse, we
should be able to just rewind ->last_avail_idx by ->inuse during "load",
which AFAICS would also fix the bug.  What do you think?

> Could you please resubmit and use the set_status callback instead
> of adding another VM state change handler?
> 
> static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
> {
>     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
> 
>     if (!vdev->vm_running && s->stats_vq_elem) {
>         balloon_stats_push_elem(s);
>     }
> }
> 
> and
> 
>     vdc->set_status = virtio_balloon_set_status;
> in virtio_balloon_class_init.

If the scheme I described above works out this won't be needed at all.

> This is somewhat urgent because 2.7 will be out soon. If you're busy
> or if I don't hear from you I'll post it on your behalf.

OK will focus on it now.

Thanks,
Roman.
Ladi Prosek Sept. 1, 2016, 3:43 p.m. UTC | #3
On Thu, Sep 1, 2016 at 4:17 PM, Roman Kagan <rkagan@virtuozzo.com> wrote:
> On Thu, Sep 01, 2016 at 10:35:49AM +0200, Ladi Prosek wrote:
>> On Fri, Aug 19, 2016 at 3:39 PM, Roman Kagan <rkagan@virtuozzo.com> wrote:
>> > Upon save/restore virtio-balloon stats acquisition stops.  The reason is
>> > that the fact that the (only) virtqueue element is being used by QEMU is
>> > not recorded anywhere on save, so upon restore it's not released to the
>> > guest, making further progress impossible.
>> >
>> > Saving the information about the used element would introduce unjustified
>> > vmstate incompatibility.
>> >
>> > So instead just make sure the element is pushed before save, leaving the
>> > ball on the guest side.  For that, add vm state change handler to
>> > virtio-ballon which would take care of pushing the element if there is
>> > one.
>> >
>> > Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
>> > Cc: "Michael S. Tsirkin" <mst@redhat.com>
>> > Cc: Ladi Prosek <lprosek@redhat.com>
>> > Cc: Stefan Hajnoczi <stefanha@redhat.com>
>> > ---
>> >  hw/virtio/virtio-balloon.c         | 27 ++++++++++++++++++++++-----
>> >  include/hw/virtio/virtio-balloon.h |  1 +
>> >  2 files changed, 23 insertions(+), 5 deletions(-)
>> >
>> > diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
>> > index 6d4c57c..f00ad8e 100644
>> > --- a/hw/virtio/virtio-balloon.c
>> > +++ b/hw/virtio/virtio-balloon.c
>> > @@ -88,10 +88,19 @@ static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs)
>> >      timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000);
>> >  }
>> >
>> > +static void balloon_stats_push_elem(VirtIOBalloon *s)
>> > +{
>> > +    VirtIODevice *vdev = VIRTIO_DEVICE(s);
>> > +
>> > +    virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset);
>> > +    virtio_notify(vdev, s->svq);
>> > +    g_free(s->stats_vq_elem);
>> > +    s->stats_vq_elem = NULL;
>> > +}
>> > +
>> >  static void balloon_stats_poll_cb(void *opaque)
>> >  {
>> >      VirtIOBalloon *s = opaque;
>> > -    VirtIODevice *vdev = VIRTIO_DEVICE(s);
>> >
>> >      if (!s->stats_vq_elem) {
>> >          /* The guest hasn't sent the stats yet (either not enabled or we came
>> > @@ -100,10 +109,7 @@ static void balloon_stats_poll_cb(void *opaque)
>> >          return;
>> >      }
>> >
>> > -    virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset);
>> > -    virtio_notify(vdev, s->svq);
>> > -    g_free(s->stats_vq_elem);
>> > -    s->stats_vq_elem = NULL;
>> > +    balloon_stats_push_elem(s);
>> >  }
>> >
>> >  static void balloon_stats_get_all(Object *obj, Visitor *v, const char *name,
>> > @@ -414,6 +420,15 @@ static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
>> >      return 0;
>> >  }
>> >
>> > +static void balloon_vm_state_change(void *opaque, int running, RunState state)
>> > +{
>> > +    VirtIOBalloon *s = opaque;
>> > +
>> > +    if (!running && s->stats_vq_elem) {
>> > +        balloon_stats_push_elem(s);
>> > +    }
>> > +}
>> > +
>> >  static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>> >  {
>> >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> > @@ -436,6 +451,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>> >      s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
>> >      s->svq = virtio_add_queue(vdev, 1, virtio_balloon_receive_stats);
>> >
>> > +    s->change = qemu_add_vm_change_state_handler(balloon_vm_state_change, s);
>> >      reset_stats(s);
>> >  }
>> >
>> > @@ -444,6 +460,7 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
>> >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> >      VirtIOBalloon *s = VIRTIO_BALLOON(dev);
>> >
>> > +    qemu_del_vm_change_state_handler(s->change);
>> >      balloon_stats_destroy_timer(s);
>> >      qemu_remove_balloon_handler(s);
>> >      virtio_cleanup(vdev);
>> > diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
>> > index 1ea13bd..d72ff7f 100644
>> > --- a/include/hw/virtio/virtio-balloon.h
>> > +++ b/include/hw/virtio/virtio-balloon.h
>> > @@ -43,6 +43,7 @@ typedef struct VirtIOBalloon {
>> >      int64_t stats_last_update;
>> >      int64_t stats_poll_interval;
>> >      uint32_t host_features;
>> > +    VMChangeStateEntry *change;
>> >  } VirtIOBalloon;
>> >
>> >  #endif
>> > --
>> > 2.7.4
>> >
>>
>> Hi Roman,
>>
>> I talked to Michael Tsirkin and he agrees with merging this patch for
>> 2.7.
>
> I'm not happy with this patch: it tries to solve the problem on the
> "save" side and therefore doesn't fix the bug when migrating from an
> earlier QEMU version.
>
> I wonder if we can do better and solve it on the "load" side.  (At first
> I thought that your patch did that but on a closer look it turned out
> not the case).
>
> In particular, with Stefan's patch to restore VirtQueue->inuse, we
> should be able to just rewind ->last_avail_idx by ->inuse during "load",
> which AFAICS would also fix the bug.  What do you think?

Stefan was actually going to do exactly that in

https://lists.gnu.org/archive/html/qemu-devel/2016-08/msg02455.html

but then dropped the patch in favor of this "save" side approach.
Going with Stefan's fix seems kind of unfortunate from a future-proof
perspective. It wouldn't be easy to revert it if we ever decide that
we want to leave something in the queue. But I understand that solving
the problem on the "load" side would be more helpful for real-world
deployments. Maybe we could do both but gate the load side hack with a
source QEMU version check? I wonder what Stefan thinks.

>> Could you please resubmit and use the set_status callback instead
>> of adding another VM state change handler?
>>
>> static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
>> {
>>     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
>>
>>     if (!vdev->vm_running && s->stats_vq_elem) {
>>         balloon_stats_push_elem(s);
>>     }
>> }
>>
>> and
>>
>>     vdc->set_status = virtio_balloon_set_status;
>> in virtio_balloon_class_init.
>
> If the scheme I described above works out this won't be needed at all.
>
>> This is somewhat urgent because 2.7 will be out soon. If you're busy
>> or if I don't hear from you I'll post it on your behalf.
>
> OK will focus on it now.
>
> Thanks,
> Roman.
Michael S. Tsirkin Sept. 1, 2016, 4:18 p.m. UTC | #4
On Thu, Sep 01, 2016 at 05:43:05PM +0200, Ladi Prosek wrote:
> On Thu, Sep 1, 2016 at 4:17 PM, Roman Kagan <rkagan@virtuozzo.com> wrote:
> > On Thu, Sep 01, 2016 at 10:35:49AM +0200, Ladi Prosek wrote:
> >> On Fri, Aug 19, 2016 at 3:39 PM, Roman Kagan <rkagan@virtuozzo.com> wrote:
> >> > Upon save/restore virtio-balloon stats acquisition stops.  The reason is
> >> > that the fact that the (only) virtqueue element is being used by QEMU is
> >> > not recorded anywhere on save, so upon restore it's not released to the
> >> > guest, making further progress impossible.
> >> >
> >> > Saving the information about the used element would introduce unjustified
> >> > vmstate incompatibility.
> >> >
> >> > So instead just make sure the element is pushed before save, leaving the
> >> > ball on the guest side.  For that, add vm state change handler to
> >> > virtio-ballon which would take care of pushing the element if there is
> >> > one.
> >> >
> >> > Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
> >> > Cc: "Michael S. Tsirkin" <mst@redhat.com>
> >> > Cc: Ladi Prosek <lprosek@redhat.com>
> >> > Cc: Stefan Hajnoczi <stefanha@redhat.com>
> >> > ---
> >> >  hw/virtio/virtio-balloon.c         | 27 ++++++++++++++++++++++-----
> >> >  include/hw/virtio/virtio-balloon.h |  1 +
> >> >  2 files changed, 23 insertions(+), 5 deletions(-)
> >> >
> >> > diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> >> > index 6d4c57c..f00ad8e 100644
> >> > --- a/hw/virtio/virtio-balloon.c
> >> > +++ b/hw/virtio/virtio-balloon.c
> >> > @@ -88,10 +88,19 @@ static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs)
> >> >      timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000);
> >> >  }
> >> >
> >> > +static void balloon_stats_push_elem(VirtIOBalloon *s)
> >> > +{
> >> > +    VirtIODevice *vdev = VIRTIO_DEVICE(s);
> >> > +
> >> > +    virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset);
> >> > +    virtio_notify(vdev, s->svq);
> >> > +    g_free(s->stats_vq_elem);
> >> > +    s->stats_vq_elem = NULL;
> >> > +}
> >> > +
> >> >  static void balloon_stats_poll_cb(void *opaque)
> >> >  {
> >> >      VirtIOBalloon *s = opaque;
> >> > -    VirtIODevice *vdev = VIRTIO_DEVICE(s);
> >> >
> >> >      if (!s->stats_vq_elem) {
> >> >          /* The guest hasn't sent the stats yet (either not enabled or we came
> >> > @@ -100,10 +109,7 @@ static void balloon_stats_poll_cb(void *opaque)
> >> >          return;
> >> >      }
> >> >
> >> > -    virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset);
> >> > -    virtio_notify(vdev, s->svq);
> >> > -    g_free(s->stats_vq_elem);
> >> > -    s->stats_vq_elem = NULL;
> >> > +    balloon_stats_push_elem(s);
> >> >  }
> >> >
> >> >  static void balloon_stats_get_all(Object *obj, Visitor *v, const char *name,
> >> > @@ -414,6 +420,15 @@ static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
> >> >      return 0;
> >> >  }
> >> >
> >> > +static void balloon_vm_state_change(void *opaque, int running, RunState state)
> >> > +{
> >> > +    VirtIOBalloon *s = opaque;
> >> > +
> >> > +    if (!running && s->stats_vq_elem) {
> >> > +        balloon_stats_push_elem(s);
> >> > +    }
> >> > +}
> >> > +
> >> >  static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
> >> >  {
> >> >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >> > @@ -436,6 +451,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
> >> >      s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
> >> >      s->svq = virtio_add_queue(vdev, 1, virtio_balloon_receive_stats);
> >> >
> >> > +    s->change = qemu_add_vm_change_state_handler(balloon_vm_state_change, s);
> >> >      reset_stats(s);
> >> >  }
> >> >
> >> > @@ -444,6 +460,7 @@ static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
> >> >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >> >      VirtIOBalloon *s = VIRTIO_BALLOON(dev);
> >> >
> >> > +    qemu_del_vm_change_state_handler(s->change);
> >> >      balloon_stats_destroy_timer(s);
> >> >      qemu_remove_balloon_handler(s);
> >> >      virtio_cleanup(vdev);
> >> > diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
> >> > index 1ea13bd..d72ff7f 100644
> >> > --- a/include/hw/virtio/virtio-balloon.h
> >> > +++ b/include/hw/virtio/virtio-balloon.h
> >> > @@ -43,6 +43,7 @@ typedef struct VirtIOBalloon {
> >> >      int64_t stats_last_update;
> >> >      int64_t stats_poll_interval;
> >> >      uint32_t host_features;
> >> > +    VMChangeStateEntry *change;
> >> >  } VirtIOBalloon;
> >> >
> >> >  #endif
> >> > --
> >> > 2.7.4
> >> >
> >>
> >> Hi Roman,
> >>
> >> I talked to Michael Tsirkin and he agrees with merging this patch for
> >> 2.7.
> >
> > I'm not happy with this patch: it tries to solve the problem on the
> > "save" side and therefore doesn't fix the bug when migrating from an
> > earlier QEMU version.
> >
> > I wonder if we can do better and solve it on the "load" side.  (At first
> > I thought that your patch did that but on a closer look it turned out
> > not the case).
> >
> > In particular, with Stefan's patch to restore VirtQueue->inuse, we
> > should be able to just rewind ->last_avail_idx by ->inuse during "load",
> > which AFAICS would also fix the bug.  What do you think?
> 
> Stefan was actually going to do exactly that in
> 
> https://lists.gnu.org/archive/html/qemu-devel/2016-08/msg02455.html
> 
> but then dropped the patch in favor of this "save" side approach.
> Going with Stefan's fix seems kind of unfortunate from a future-proof
> perspective. It wouldn't be easy to revert it if we ever decide that
> we want to leave something in the queue. But I understand that solving
> the problem on the "load" side would be more helpful for real-world
> deployments. Maybe we could do both but gate the load side hack with a
> source QEMU version check? I wonder what Stefan thinks.

It's a valid point. Try to resubmit Stefan's patches and we'll discuss?

> >> Could you please resubmit and use the set_status callback instead
> >> of adding another VM state change handler?
> >>
> >> static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
> >> {
> >>     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
> >>
> >>     if (!vdev->vm_running && s->stats_vq_elem) {
> >>         balloon_stats_push_elem(s);
> >>     }
> >> }
> >>
> >> and
> >>
> >>     vdc->set_status = virtio_balloon_set_status;
> >> in virtio_balloon_class_init.
> >
> > If the scheme I described above works out this won't be needed at all.
> >
> >> This is somewhat urgent because 2.7 will be out soon. If you're busy
> >> or if I don't hear from you I'll post it on your behalf.
> >
> > OK will focus on it now.
> >
> > Thanks,
> > Roman.
Roman Kagan Sept. 1, 2016, 6:03 p.m. UTC | #5
On Thu, Sep 01, 2016 at 05:43:05PM +0200, Ladi Prosek wrote:
> On Thu, Sep 1, 2016 at 4:17 PM, Roman Kagan <rkagan@virtuozzo.com> wrote:
> > I'm not happy with this patch: it tries to solve the problem on the
> > "save" side and therefore doesn't fix the bug when migrating from an
> > earlier QEMU version.
> >
> > I wonder if we can do better and solve it on the "load" side.  (At first
> > I thought that your patch did that but on a closer look it turned out
> > not the case).
> >
> > In particular, with Stefan's patch to restore VirtQueue->inuse, we
> > should be able to just rewind ->last_avail_idx by ->inuse during "load",
> > which AFAICS would also fix the bug.  What do you think?
> 
> Stefan was actually going to do exactly that in
> 
> https://lists.gnu.org/archive/html/qemu-devel/2016-08/msg02455.html

Hmm, obviously my mail search skills need to be improved :(  I already
coded a patch before I saw this.

> but then dropped the patch in favor of this "save" side approach.
> Going with Stefan's fix seems kind of unfortunate from a future-proof
> perspective. It wouldn't be easy to revert it if we ever decide that
> we want to leave something in the queue. But I understand that solving
> the problem on the "load" side would be more helpful for real-world
> deployments. Maybe we could do both but gate the load side hack with a
> source QEMU version check? I wonder what Stefan thinks.

I woudn't say it looked like a hack so I wouldn't bother with a version
check.  Anyway let's look at the code and decide.

> >> Could you please resubmit and use the set_status callback instead
> >> of adding another VM state change handler?
> >>
> >> static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
> >> {
> >>     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
> >>
> >>     if (!vdev->vm_running && s->stats_vq_elem) {
> >>         balloon_stats_push_elem(s);
> >>     }
> >> }
> >>
> >> and
> >>
> >>     vdc->set_status = virtio_balloon_set_status;
> >> in virtio_balloon_class_init.
> >
> > If the scheme I described above works out this won't be needed at all.

Sure I was wrong here, I used it to kick the stats collection on load.

I'll post the patch in a minute.

Roman.
diff mbox

Patch

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 6d4c57c..f00ad8e 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -88,10 +88,19 @@  static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs)
     timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000);
 }
 
+static void balloon_stats_push_elem(VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset);
+    virtio_notify(vdev, s->svq);
+    g_free(s->stats_vq_elem);
+    s->stats_vq_elem = NULL;
+}
+
 static void balloon_stats_poll_cb(void *opaque)
 {
     VirtIOBalloon *s = opaque;
-    VirtIODevice *vdev = VIRTIO_DEVICE(s);
 
     if (!s->stats_vq_elem) {
         /* The guest hasn't sent the stats yet (either not enabled or we came
@@ -100,10 +109,7 @@  static void balloon_stats_poll_cb(void *opaque)
         return;
     }
 
-    virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset);
-    virtio_notify(vdev, s->svq);
-    g_free(s->stats_vq_elem);
-    s->stats_vq_elem = NULL;
+    balloon_stats_push_elem(s);
 }
 
 static void balloon_stats_get_all(Object *obj, Visitor *v, const char *name,
@@ -414,6 +420,15 @@  static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
     return 0;
 }
 
+static void balloon_vm_state_change(void *opaque, int running, RunState state)
+{
+    VirtIOBalloon *s = opaque;
+
+    if (!running && s->stats_vq_elem) {
+        balloon_stats_push_elem(s);
+    }
+}
+
 static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -436,6 +451,7 @@  static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
     s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
     s->svq = virtio_add_queue(vdev, 1, virtio_balloon_receive_stats);
 
+    s->change = qemu_add_vm_change_state_handler(balloon_vm_state_change, s);
     reset_stats(s);
 }
 
@@ -444,6 +460,7 @@  static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOBalloon *s = VIRTIO_BALLOON(dev);
 
+    qemu_del_vm_change_state_handler(s->change);
     balloon_stats_destroy_timer(s);
     qemu_remove_balloon_handler(s);
     virtio_cleanup(vdev);
diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
index 1ea13bd..d72ff7f 100644
--- a/include/hw/virtio/virtio-balloon.h
+++ b/include/hw/virtio/virtio-balloon.h
@@ -43,6 +43,7 @@  typedef struct VirtIOBalloon {
     int64_t stats_last_update;
     int64_t stats_poll_interval;
     uint32_t host_features;
+    VMChangeStateEntry *change;
 } VirtIOBalloon;
 
 #endif