diff mbox series

[v8,1/7] virtio: Introduce started flag to VirtioDevice

Message ID 20190320112646.3712-2-xieyongji@baidu.com (mailing list archive)
State New, archived
Headers show
Series vhost-user-blk: Add support for backend reconnecting | expand

Commit Message

Yongji Xie March 20, 2019, 11:26 a.m. UTC
From: Xie Yongji <xieyongji@baidu.com>

The virtio 1.0 transitional devices support driver uses the device
before setting the DRIVER_OK status bit. So we introduce a started
flag to indicate whether driver has started the device or not.

Signed-off-by: Xie Yongji <xieyongji@baidu.com>
Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
---
 hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
 include/hw/virtio/virtio.h |  2 ++
 2 files changed, 52 insertions(+), 2 deletions(-)

Comments

Greg Kurz May 24, 2019, 10:19 a.m. UTC | #1
On Mon, 20 May 2019 19:10:35 -0400
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> From: Xie Yongji <xieyongji@baidu.com>
> 
> The virtio 1.0 transitional devices support driver uses the device
> before setting the DRIVER_OK status bit. So we introduce a started
> flag to indicate whether driver has started the device or not.
> 
> Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  include/hw/virtio/virtio.h |  2 ++
>  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
>  2 files changed, 52 insertions(+), 2 deletions(-)
> 
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 7140381e3a..27c0efc3d0 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -105,6 +105,8 @@ struct VirtIODevice
>      uint16_t device_id;
>      bool vm_running;
>      bool broken; /* device in invalid state, needs reset */
> +    bool started;
> +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
>      VMChangeStateEntry *vmstate;
>      char *bus_name;
>      uint8_t device_endian;
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 28056a7ef7..5d533ac74e 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
>              }
>          }
>      }
> +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> +        vdev->start_on_kick = false;
> +    }
> +
>      if (k->set_status) {
>          k->set_status(vdev, val);
>      }
>      vdev->status = val;
> +
>      return 0;
>  }
>  
> @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
>          k->reset(vdev);
>      }
>  
> +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> +    vdev->started = false;
>      vdev->broken = false;
>      vdev->guest_features = 0;
>      vdev->queue_sel = 0;
> @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
>  
>  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
>  {
> +    bool ret = false;
> +
>      if (vq->vring.desc && vq->handle_aio_output) {
>          VirtIODevice *vdev = vq->vdev;
>  
>          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> -        return vq->handle_aio_output(vdev, vq);
> +        ret = vq->handle_aio_output(vdev, vq);
> +
> +        if (unlikely(vdev->start_on_kick)) {
> +            vdev->started = true;
> +            vdev->start_on_kick = false;
> +        }
>      }
>  
> -    return false;
> +    return ret;
>  }
>  
>  static void virtio_queue_notify_vq(VirtQueue *vq)
> @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
>  
>          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
>          vq->handle_output(vdev, vq);
> +
> +        if (unlikely(vdev->start_on_kick)) {
> +            vdev->started = true;
> +            vdev->start_on_kick = false;
> +        }
>      }
>  }
>  
> @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
>      } else if (vq->handle_output) {
>          vq->handle_output(vdev, vq);
>      }
> +
> +    if (unlikely(vdev->start_on_kick)) {
> +        vdev->started = true;
> +        vdev->start_on_kick = false;
> +    }
>  }
>  
>  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
>      return vdev->broken;
>  }
>  
> +static bool virtio_started_needed(void *opaque)
> +{
> +    VirtIODevice *vdev = opaque;
> +
> +    return vdev->started;

Existing machine types don't know about the "virtio/started" subsection. This
breaks migration to older QEMUs if the driver has started the device, ie. most
probably always when it comes to live migration.

My understanding is that we do try to support backward migration though. It
is a regular practice in datacenters to migrate workloads without having to
take care of the QEMU version. FWIW I had to fix similar issues downstream
many times in the past because customers had filed bugs.

Cc'ing David for his opinion.

> +}
> +
>  static const VMStateDescription vmstate_virtqueue = {
>      .name = "virtqueue_state",
>      .version_id = 1,
> @@ -1898,6 +1931,17 @@ static const VMStateDescription vmstate_virtio_broken = {
>      }
>  };
>  
> +static const VMStateDescription vmstate_virtio_started = {
> +    .name = "virtio/started",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = &virtio_started_needed,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_BOOL(started, VirtIODevice),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  static const VMStateDescription vmstate_virtio = {
>      .name = "virtio",
>      .version_id = 1,
> @@ -1913,6 +1957,7 @@ static const VMStateDescription vmstate_virtio = {
>          &vmstate_virtio_ringsize,
>          &vmstate_virtio_broken,
>          &vmstate_virtio_extra_state,
> +        &vmstate_virtio_started,
>          NULL
>      }
>  };
> @@ -2286,6 +2331,9 @@ void virtio_init(VirtIODevice *vdev, const char *name,
>              g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
>      }
>  
> +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> +    vdev->started = false;
>      vdev->device_id = device_id;
>      vdev->status = 0;
>      atomic_set(&vdev->isr, 0);
Yongji Xie May 24, 2019, 11:56 a.m. UTC | #2
On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote:
>
> On Mon, 20 May 2019 19:10:35 -0400
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
>
> > From: Xie Yongji <xieyongji@baidu.com>
> >
> > The virtio 1.0 transitional devices support driver uses the device
> > before setting the DRIVER_OK status bit. So we introduce a started
> > flag to indicate whether driver has started the device or not.
> >
> > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  include/hw/virtio/virtio.h |  2 ++
> >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> >  2 files changed, 52 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > index 7140381e3a..27c0efc3d0 100644
> > --- a/include/hw/virtio/virtio.h
> > +++ b/include/hw/virtio/virtio.h
> > @@ -105,6 +105,8 @@ struct VirtIODevice
> >      uint16_t device_id;
> >      bool vm_running;
> >      bool broken; /* device in invalid state, needs reset */
> > +    bool started;
> > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> >      VMChangeStateEntry *vmstate;
> >      char *bus_name;
> >      uint8_t device_endian;
> > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > index 28056a7ef7..5d533ac74e 100644
> > --- a/hw/virtio/virtio.c
> > +++ b/hw/virtio/virtio.c
> > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> >              }
> >          }
> >      }
> > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > +        vdev->start_on_kick = false;
> > +    }
> > +
> >      if (k->set_status) {
> >          k->set_status(vdev, val);
> >      }
> >      vdev->status = val;
> > +
> >      return 0;
> >  }
> >
> > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> >          k->reset(vdev);
> >      }
> >
> > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > +    vdev->started = false;
> >      vdev->broken = false;
> >      vdev->guest_features = 0;
> >      vdev->queue_sel = 0;
> > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> >
> >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> >  {
> > +    bool ret = false;
> > +
> >      if (vq->vring.desc && vq->handle_aio_output) {
> >          VirtIODevice *vdev = vq->vdev;
> >
> >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > -        return vq->handle_aio_output(vdev, vq);
> > +        ret = vq->handle_aio_output(vdev, vq);
> > +
> > +        if (unlikely(vdev->start_on_kick)) {
> > +            vdev->started = true;
> > +            vdev->start_on_kick = false;
> > +        }
> >      }
> >
> > -    return false;
> > +    return ret;
> >  }
> >
> >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> >
> >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> >          vq->handle_output(vdev, vq);
> > +
> > +        if (unlikely(vdev->start_on_kick)) {
> > +            vdev->started = true;
> > +            vdev->start_on_kick = false;
> > +        }
> >      }
> >  }
> >
> > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> >      } else if (vq->handle_output) {
> >          vq->handle_output(vdev, vq);
> >      }
> > +
> > +    if (unlikely(vdev->start_on_kick)) {
> > +        vdev->started = true;
> > +        vdev->start_on_kick = false;
> > +    }
> >  }
> >
> >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> >      return vdev->broken;
> >  }
> >
> > +static bool virtio_started_needed(void *opaque)
> > +{
> > +    VirtIODevice *vdev = opaque;
> > +
> > +    return vdev->started;
>
> Existing machine types don't know about the "virtio/started" subsection. This
> breaks migration to older QEMUs if the driver has started the device, ie. most
> probably always when it comes to live migration.
>
> My understanding is that we do try to support backward migration though. It
> is a regular practice in datacenters to migrate workloads without having to
> take care of the QEMU version. FWIW I had to fix similar issues downstream
> many times in the past because customers had filed bugs.
>

If we do need to support backward migration, for this patch, what I
can think of is to only migrate the flag in the case that guest kicks
but not set DRIVER_OK. This could fix backward migration in most case.
Not sure if there is a more general approach...

Thanks,
Yongji
Greg Kurz May 27, 2019, 10:44 a.m. UTC | #3
On Fri, 24 May 2019 19:56:06 +0800
Yongji Xie <elohimes@gmail.com> wrote:

> On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote:
> >
> > On Mon, 20 May 2019 19:10:35 -0400
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> >  
> > > From: Xie Yongji <xieyongji@baidu.com>
> > >
> > > The virtio 1.0 transitional devices support driver uses the device
> > > before setting the DRIVER_OK status bit. So we introduce a started
> > > flag to indicate whether driver has started the device or not.
> > >
> > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > ---
> > >  include/hw/virtio/virtio.h |  2 ++
> > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > index 7140381e3a..27c0efc3d0 100644
> > > --- a/include/hw/virtio/virtio.h
> > > +++ b/include/hw/virtio/virtio.h
> > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > >      uint16_t device_id;
> > >      bool vm_running;
> > >      bool broken; /* device in invalid state, needs reset */
> > > +    bool started;
> > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > >      VMChangeStateEntry *vmstate;
> > >      char *bus_name;
> > >      uint8_t device_endian;
> > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > index 28056a7ef7..5d533ac74e 100644
> > > --- a/hw/virtio/virtio.c
> > > +++ b/hw/virtio/virtio.c
> > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > >              }
> > >          }
> > >      }
> > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > +        vdev->start_on_kick = false;
> > > +    }
> > > +
> > >      if (k->set_status) {
> > >          k->set_status(vdev, val);
> > >      }
> > >      vdev->status = val;
> > > +
> > >      return 0;
> > >  }
> > >
> > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > >          k->reset(vdev);
> > >      }
> > >
> > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > +    vdev->started = false;
> > >      vdev->broken = false;
> > >      vdev->guest_features = 0;
> > >      vdev->queue_sel = 0;
> > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > >
> > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > >  {
> > > +    bool ret = false;
> > > +
> > >      if (vq->vring.desc && vq->handle_aio_output) {
> > >          VirtIODevice *vdev = vq->vdev;
> > >
> > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > -        return vq->handle_aio_output(vdev, vq);
> > > +        ret = vq->handle_aio_output(vdev, vq);
> > > +
> > > +        if (unlikely(vdev->start_on_kick)) {
> > > +            vdev->started = true;
> > > +            vdev->start_on_kick = false;
> > > +        }
> > >      }
> > >
> > > -    return false;
> > > +    return ret;
> > >  }
> > >
> > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > >
> > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > >          vq->handle_output(vdev, vq);
> > > +
> > > +        if (unlikely(vdev->start_on_kick)) {
> > > +            vdev->started = true;
> > > +            vdev->start_on_kick = false;
> > > +        }
> > >      }
> > >  }
> > >
> > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > >      } else if (vq->handle_output) {
> > >          vq->handle_output(vdev, vq);
> > >      }
> > > +
> > > +    if (unlikely(vdev->start_on_kick)) {
> > > +        vdev->started = true;
> > > +        vdev->start_on_kick = false;
> > > +    }
> > >  }
> > >
> > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > >      return vdev->broken;
> > >  }
> > >
> > > +static bool virtio_started_needed(void *opaque)
> > > +{
> > > +    VirtIODevice *vdev = opaque;
> > > +
> > > +    return vdev->started;  
> >
> > Existing machine types don't know about the "virtio/started" subsection. This
> > breaks migration to older QEMUs if the driver has started the device, ie. most
> > probably always when it comes to live migration.
> >
> > My understanding is that we do try to support backward migration though. It
> > is a regular practice in datacenters to migrate workloads without having to
> > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > many times in the past because customers had filed bugs.
> >  
> 
> If we do need to support backward migration, for this patch, what I
> can think of is to only migrate the flag in the case that guest kicks
> but not set DRIVER_OK. This could fix backward migration in most case.

You mean something like that ?

static bool virtio_started_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;

    return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
}

> Not sure if there is a more general approach...
> 

Another approach would be to only implement the started flag for
machine version > 4.0. This can be achieved by adding a "use-started"
property to the base virtio device, true by default and set to
false by hw_compat_4_0.

> Thanks,
> Yongji
Yongji Xie May 27, 2019, 1:04 p.m. UTC | #4
On Mon, 27 May 2019 at 18:44, Greg Kurz <groug@kaod.org> wrote:
>
> On Fri, 24 May 2019 19:56:06 +0800
> Yongji Xie <elohimes@gmail.com> wrote:
>
> > On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote:
> > >
> > > On Mon, 20 May 2019 19:10:35 -0400
> > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > >
> > > > From: Xie Yongji <xieyongji@baidu.com>
> > > >
> > > > The virtio 1.0 transitional devices support driver uses the device
> > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > flag to indicate whether driver has started the device or not.
> > > >
> > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > ---
> > > >  include/hw/virtio/virtio.h |  2 ++
> > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > index 7140381e3a..27c0efc3d0 100644
> > > > --- a/include/hw/virtio/virtio.h
> > > > +++ b/include/hw/virtio/virtio.h
> > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > >      uint16_t device_id;
> > > >      bool vm_running;
> > > >      bool broken; /* device in invalid state, needs reset */
> > > > +    bool started;
> > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > >      VMChangeStateEntry *vmstate;
> > > >      char *bus_name;
> > > >      uint8_t device_endian;
> > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > index 28056a7ef7..5d533ac74e 100644
> > > > --- a/hw/virtio/virtio.c
> > > > +++ b/hw/virtio/virtio.c
> > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > >              }
> > > >          }
> > > >      }
> > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > +        vdev->start_on_kick = false;
> > > > +    }
> > > > +
> > > >      if (k->set_status) {
> > > >          k->set_status(vdev, val);
> > > >      }
> > > >      vdev->status = val;
> > > > +
> > > >      return 0;
> > > >  }
> > > >
> > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > >          k->reset(vdev);
> > > >      }
> > > >
> > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > +    vdev->started = false;
> > > >      vdev->broken = false;
> > > >      vdev->guest_features = 0;
> > > >      vdev->queue_sel = 0;
> > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > >
> > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > >  {
> > > > +    bool ret = false;
> > > > +
> > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > >          VirtIODevice *vdev = vq->vdev;
> > > >
> > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > -        return vq->handle_aio_output(vdev, vq);
> > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > +
> > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > +            vdev->started = true;
> > > > +            vdev->start_on_kick = false;
> > > > +        }
> > > >      }
> > > >
> > > > -    return false;
> > > > +    return ret;
> > > >  }
> > > >
> > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > >
> > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > >          vq->handle_output(vdev, vq);
> > > > +
> > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > +            vdev->started = true;
> > > > +            vdev->start_on_kick = false;
> > > > +        }
> > > >      }
> > > >  }
> > > >
> > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > >      } else if (vq->handle_output) {
> > > >          vq->handle_output(vdev, vq);
> > > >      }
> > > > +
> > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > +        vdev->started = true;
> > > > +        vdev->start_on_kick = false;
> > > > +    }
> > > >  }
> > > >
> > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > >      return vdev->broken;
> > > >  }
> > > >
> > > > +static bool virtio_started_needed(void *opaque)
> > > > +{
> > > > +    VirtIODevice *vdev = opaque;
> > > > +
> > > > +    return vdev->started;
> > >
> > > Existing machine types don't know about the "virtio/started" subsection. This
> > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > probably always when it comes to live migration.
> > >
> > > My understanding is that we do try to support backward migration though. It
> > > is a regular practice in datacenters to migrate workloads without having to
> > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > many times in the past because customers had filed bugs.
> > >
> >
> > If we do need to support backward migration, for this patch, what I
> > can think of is to only migrate the flag in the case that guest kicks
> > but not set DRIVER_OK. This could fix backward migration in most case.
>
> You mean something like that ?
>
> static bool virtio_started_needed(void *opaque)
> {
>     VirtIODevice *vdev = opaque;
>
>     return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
> }
>

Yes.

> > Not sure if there is a more general approach...
> >
>
> Another approach would be to only implement the started flag for
> machine version > 4.0. This can be achieved by adding a "use-started"
> property to the base virtio device, true by default and set to
> false by hw_compat_4_0.
>

If so, the problem that I tried to fix in this patch would still be
exist in old machine type, right?

Now I think the first approach may be more reasonable. Blocking a
migration that will cause guest hang instead of allowing that.

Thanks,
Yongji
Greg Kurz May 27, 2019, 3:45 p.m. UTC | #5
On Mon, 27 May 2019 21:04:38 +0800
Yongji Xie <elohimes@gmail.com> wrote:

> On Mon, 27 May 2019 at 18:44, Greg Kurz <groug@kaod.org> wrote:
> >
> > On Fri, 24 May 2019 19:56:06 +0800
> > Yongji Xie <elohimes@gmail.com> wrote:
> >  
> > > On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote:  
> > > >
> > > > On Mon, 20 May 2019 19:10:35 -0400
> > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > >  
> > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > >
> > > > > The virtio 1.0 transitional devices support driver uses the device
> > > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > > flag to indicate whether driver has started the device or not.
> > > > >
> > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > ---
> > > > >  include/hw/virtio/virtio.h |  2 ++
> > > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > > index 7140381e3a..27c0efc3d0 100644
> > > > > --- a/include/hw/virtio/virtio.h
> > > > > +++ b/include/hw/virtio/virtio.h
> > > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > > >      uint16_t device_id;
> > > > >      bool vm_running;
> > > > >      bool broken; /* device in invalid state, needs reset */
> > > > > +    bool started;
> > > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > > >      VMChangeStateEntry *vmstate;
> > > > >      char *bus_name;
> > > > >      uint8_t device_endian;
> > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > > index 28056a7ef7..5d533ac74e 100644
> > > > > --- a/hw/virtio/virtio.c
> > > > > +++ b/hw/virtio/virtio.c
> > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > > >              }
> > > > >          }
> > > > >      }
> > > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > > +        vdev->start_on_kick = false;
> > > > > +    }
> > > > > +
> > > > >      if (k->set_status) {
> > > > >          k->set_status(vdev, val);
> > > > >      }
> > > > >      vdev->status = val;
> > > > > +
> > > > >      return 0;
> > > > >  }
> > > > >
> > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > > >          k->reset(vdev);
> > > > >      }
> > > > >
> > > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > > +    vdev->started = false;
> > > > >      vdev->broken = false;
> > > > >      vdev->guest_features = 0;
> > > > >      vdev->queue_sel = 0;
> > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > > >
> > > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > > >  {
> > > > > +    bool ret = false;
> > > > > +
> > > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > > >          VirtIODevice *vdev = vq->vdev;
> > > > >
> > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > -        return vq->handle_aio_output(vdev, vq);
> > > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > > +
> > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > +            vdev->started = true;
> > > > > +            vdev->start_on_kick = false;
> > > > > +        }
> > > > >      }
> > > > >
> > > > > -    return false;
> > > > > +    return ret;
> > > > >  }
> > > > >
> > > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > >
> > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > >          vq->handle_output(vdev, vq);
> > > > > +
> > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > +            vdev->started = true;
> > > > > +            vdev->start_on_kick = false;
> > > > > +        }
> > > > >      }
> > > > >  }
> > > > >
> > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > > >      } else if (vq->handle_output) {
> > > > >          vq->handle_output(vdev, vq);
> > > > >      }
> > > > > +
> > > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > > +        vdev->started = true;
> > > > > +        vdev->start_on_kick = false;
> > > > > +    }
> > > > >  }
> > > > >
> > > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > > >      return vdev->broken;
> > > > >  }
> > > > >
> > > > > +static bool virtio_started_needed(void *opaque)
> > > > > +{
> > > > > +    VirtIODevice *vdev = opaque;
> > > > > +
> > > > > +    return vdev->started;  
> > > >
> > > > Existing machine types don't know about the "virtio/started" subsection. This
> > > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > > probably always when it comes to live migration.
> > > >
> > > > My understanding is that we do try to support backward migration though. It
> > > > is a regular practice in datacenters to migrate workloads without having to
> > > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > > many times in the past because customers had filed bugs.
> > > >  
> > >
> > > If we do need to support backward migration, for this patch, what I
> > > can think of is to only migrate the flag in the case that guest kicks
> > > but not set DRIVER_OK. This could fix backward migration in most case.  
> >
> > You mean something like that ?
> >
> > static bool virtio_started_needed(void *opaque)
> > {
> >     VirtIODevice *vdev = opaque;
> >
> >     return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
> > }
> >  
> 
> Yes.
> 
> > > Not sure if there is a more general approach...
> > >  
> >
> > Another approach would be to only implement the started flag for
> > machine version > 4.0. This can be achieved by adding a "use-started"
> > property to the base virtio device, true by default and set to
> > false by hw_compat_4_0.
> >  
> 
> If so, the problem that I tried to fix in this patch would still be
> exist in old machine type, right?
> 

Yes.

> Now I think the first approach may be more reasonable. Blocking a
> migration that will cause guest hang instead of allowing that.
> 

Fair enough but it may be difficult for the end user to understand
why the migration failed in the first place... What about detecting
the hang condition (kick before DRIVER_OK) in the source, add
a migration blocker with an explicit message and remove the blocker
once DRIVER_OK is set ?

> Thanks,
> Yongji
Michael S. Tsirkin May 27, 2019, 6:53 p.m. UTC | #6
On Mon, May 27, 2019 at 12:44:46PM +0200, Greg Kurz wrote:
> On Fri, 24 May 2019 19:56:06 +0800
> Yongji Xie <elohimes@gmail.com> wrote:
> 
> > On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote:
> > >
> > > On Mon, 20 May 2019 19:10:35 -0400
> > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > >  
> > > > From: Xie Yongji <xieyongji@baidu.com>
> > > >
> > > > The virtio 1.0 transitional devices support driver uses the device
> > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > flag to indicate whether driver has started the device or not.
> > > >
> > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > ---
> > > >  include/hw/virtio/virtio.h |  2 ++
> > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > index 7140381e3a..27c0efc3d0 100644
> > > > --- a/include/hw/virtio/virtio.h
> > > > +++ b/include/hw/virtio/virtio.h
> > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > >      uint16_t device_id;
> > > >      bool vm_running;
> > > >      bool broken; /* device in invalid state, needs reset */
> > > > +    bool started;
> > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > >      VMChangeStateEntry *vmstate;
> > > >      char *bus_name;
> > > >      uint8_t device_endian;
> > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > index 28056a7ef7..5d533ac74e 100644
> > > > --- a/hw/virtio/virtio.c
> > > > +++ b/hw/virtio/virtio.c
> > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > >              }
> > > >          }
> > > >      }
> > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > +        vdev->start_on_kick = false;
> > > > +    }
> > > > +
> > > >      if (k->set_status) {
> > > >          k->set_status(vdev, val);
> > > >      }
> > > >      vdev->status = val;
> > > > +
> > > >      return 0;
> > > >  }
> > > >
> > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > >          k->reset(vdev);
> > > >      }
> > > >
> > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > +    vdev->started = false;
> > > >      vdev->broken = false;
> > > >      vdev->guest_features = 0;
> > > >      vdev->queue_sel = 0;
> > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > >
> > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > >  {
> > > > +    bool ret = false;
> > > > +
> > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > >          VirtIODevice *vdev = vq->vdev;
> > > >
> > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > -        return vq->handle_aio_output(vdev, vq);
> > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > +
> > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > +            vdev->started = true;
> > > > +            vdev->start_on_kick = false;
> > > > +        }
> > > >      }
> > > >
> > > > -    return false;
> > > > +    return ret;
> > > >  }
> > > >
> > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > >
> > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > >          vq->handle_output(vdev, vq);
> > > > +
> > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > +            vdev->started = true;
> > > > +            vdev->start_on_kick = false;
> > > > +        }
> > > >      }
> > > >  }
> > > >
> > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > >      } else if (vq->handle_output) {
> > > >          vq->handle_output(vdev, vq);
> > > >      }
> > > > +
> > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > +        vdev->started = true;
> > > > +        vdev->start_on_kick = false;
> > > > +    }
> > > >  }
> > > >
> > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > >      return vdev->broken;
> > > >  }
> > > >
> > > > +static bool virtio_started_needed(void *opaque)
> > > > +{
> > > > +    VirtIODevice *vdev = opaque;
> > > > +
> > > > +    return vdev->started;  
> > >
> > > Existing machine types don't know about the "virtio/started" subsection. This
> > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > probably always when it comes to live migration.
> > >
> > > My understanding is that we do try to support backward migration though. It
> > > is a regular practice in datacenters to migrate workloads without having to
> > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > many times in the past because customers had filed bugs.
> > >  
> > 
> > If we do need to support backward migration, for this patch, what I
> > can think of is to only migrate the flag in the case that guest kicks
> > but not set DRIVER_OK. This could fix backward migration in most case.
> 
> You mean something like that ?
> 
> static bool virtio_started_needed(void *opaque)
> {
>     VirtIODevice *vdev = opaque;
> 
>     return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
> }
> 
> > Not sure if there is a more general approach...
> > 
> 
> Another approach would be to only implement the started flag for
> machine version > 4.0. This can be achieved by adding a "use-started"
> property to the base virtio device, true by default and set to
> false by hw_compat_4_0.

I think this is best frankly.


> > Thanks,
> > Yongji
David Gibson May 28, 2019, 12:08 a.m. UTC | #7
On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote:
> On Mon, 20 May 2019 19:10:35 -0400
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > From: Xie Yongji <xieyongji@baidu.com>
> > 
> > The virtio 1.0 transitional devices support driver uses the device
> > before setting the DRIVER_OK status bit. So we introduce a started
> > flag to indicate whether driver has started the device or not.
> > 
> > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  include/hw/virtio/virtio.h |  2 ++
> >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> >  2 files changed, 52 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > index 7140381e3a..27c0efc3d0 100644
> > --- a/include/hw/virtio/virtio.h
> > +++ b/include/hw/virtio/virtio.h
> > @@ -105,6 +105,8 @@ struct VirtIODevice
> >      uint16_t device_id;
> >      bool vm_running;
> >      bool broken; /* device in invalid state, needs reset */
> > +    bool started;
> > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> >      VMChangeStateEntry *vmstate;
> >      char *bus_name;
> >      uint8_t device_endian;
> > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > index 28056a7ef7..5d533ac74e 100644
> > --- a/hw/virtio/virtio.c
> > +++ b/hw/virtio/virtio.c
> > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> >              }
> >          }
> >      }
> > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > +        vdev->start_on_kick = false;
> > +    }
> > +
> >      if (k->set_status) {
> >          k->set_status(vdev, val);
> >      }
> >      vdev->status = val;
> > +
> >      return 0;
> >  }
> >  
> > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> >          k->reset(vdev);
> >      }
> >  
> > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > +    vdev->started = false;
> >      vdev->broken = false;
> >      vdev->guest_features = 0;
> >      vdev->queue_sel = 0;
> > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> >  
> >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> >  {
> > +    bool ret = false;
> > +
> >      if (vq->vring.desc && vq->handle_aio_output) {
> >          VirtIODevice *vdev = vq->vdev;
> >  
> >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > -        return vq->handle_aio_output(vdev, vq);
> > +        ret = vq->handle_aio_output(vdev, vq);
> > +
> > +        if (unlikely(vdev->start_on_kick)) {
> > +            vdev->started = true;
> > +            vdev->start_on_kick = false;
> > +        }
> >      }
> >  
> > -    return false;
> > +    return ret;
> >  }
> >  
> >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> >  
> >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> >          vq->handle_output(vdev, vq);
> > +
> > +        if (unlikely(vdev->start_on_kick)) {
> > +            vdev->started = true;
> > +            vdev->start_on_kick = false;
> > +        }
> >      }
> >  }
> >  
> > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> >      } else if (vq->handle_output) {
> >          vq->handle_output(vdev, vq);
> >      }
> > +
> > +    if (unlikely(vdev->start_on_kick)) {
> > +        vdev->started = true;
> > +        vdev->start_on_kick = false;
> > +    }
> >  }
> >  
> >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> >      return vdev->broken;
> >  }
> >  
> > +static bool virtio_started_needed(void *opaque)
> > +{
> > +    VirtIODevice *vdev = opaque;
> > +
> > +    return vdev->started;
> 
> Existing machine types don't know about the "virtio/started" subsection. This
> breaks migration to older QEMUs if the driver has started the device, ie. most
> probably always when it comes to live migration.
> 
> My understanding is that we do try to support backward migration though. It
> is a regular practice in datacenters to migrate workloads without having to
> take care of the QEMU version. FWIW I had to fix similar issues downstream
> many times in the past because customers had filed bugs.
> 
> Cc'ing David for his opinion.

Uh.. did you mean to CC me, or Dave Gilbert?

I mean, I think you're right that we should try to maintain backwards
migration, but this isn't really my area of authority.
Yongji Xie May 28, 2019, 2:48 a.m. UTC | #8
On Tue, 28 May 2019 at 02:54, Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, May 27, 2019 at 12:44:46PM +0200, Greg Kurz wrote:
> > On Fri, 24 May 2019 19:56:06 +0800
> > Yongji Xie <elohimes@gmail.com> wrote:
> >
> > > On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote:
> > > >
> > > > On Mon, 20 May 2019 19:10:35 -0400
> > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > >
> > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > >
> > > > > The virtio 1.0 transitional devices support driver uses the device
> > > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > > flag to indicate whether driver has started the device or not.
> > > > >
> > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > ---
> > > > >  include/hw/virtio/virtio.h |  2 ++
> > > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > > index 7140381e3a..27c0efc3d0 100644
> > > > > --- a/include/hw/virtio/virtio.h
> > > > > +++ b/include/hw/virtio/virtio.h
> > > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > > >      uint16_t device_id;
> > > > >      bool vm_running;
> > > > >      bool broken; /* device in invalid state, needs reset */
> > > > > +    bool started;
> > > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > > >      VMChangeStateEntry *vmstate;
> > > > >      char *bus_name;
> > > > >      uint8_t device_endian;
> > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > > index 28056a7ef7..5d533ac74e 100644
> > > > > --- a/hw/virtio/virtio.c
> > > > > +++ b/hw/virtio/virtio.c
> > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > > >              }
> > > > >          }
> > > > >      }
> > > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > > +        vdev->start_on_kick = false;
> > > > > +    }
> > > > > +
> > > > >      if (k->set_status) {
> > > > >          k->set_status(vdev, val);
> > > > >      }
> > > > >      vdev->status = val;
> > > > > +
> > > > >      return 0;
> > > > >  }
> > > > >
> > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > > >          k->reset(vdev);
> > > > >      }
> > > > >
> > > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > > +    vdev->started = false;
> > > > >      vdev->broken = false;
> > > > >      vdev->guest_features = 0;
> > > > >      vdev->queue_sel = 0;
> > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > > >
> > > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > > >  {
> > > > > +    bool ret = false;
> > > > > +
> > > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > > >          VirtIODevice *vdev = vq->vdev;
> > > > >
> > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > -        return vq->handle_aio_output(vdev, vq);
> > > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > > +
> > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > +            vdev->started = true;
> > > > > +            vdev->start_on_kick = false;
> > > > > +        }
> > > > >      }
> > > > >
> > > > > -    return false;
> > > > > +    return ret;
> > > > >  }
> > > > >
> > > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > >
> > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > >          vq->handle_output(vdev, vq);
> > > > > +
> > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > +            vdev->started = true;
> > > > > +            vdev->start_on_kick = false;
> > > > > +        }
> > > > >      }
> > > > >  }
> > > > >
> > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > > >      } else if (vq->handle_output) {
> > > > >          vq->handle_output(vdev, vq);
> > > > >      }
> > > > > +
> > > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > > +        vdev->started = true;
> > > > > +        vdev->start_on_kick = false;
> > > > > +    }
> > > > >  }
> > > > >
> > > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > > >      return vdev->broken;
> > > > >  }
> > > > >
> > > > > +static bool virtio_started_needed(void *opaque)
> > > > > +{
> > > > > +    VirtIODevice *vdev = opaque;
> > > > > +
> > > > > +    return vdev->started;
> > > >
> > > > Existing machine types don't know about the "virtio/started" subsection. This
> > > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > > probably always when it comes to live migration.
> > > >
> > > > My understanding is that we do try to support backward migration though. It
> > > > is a regular practice in datacenters to migrate workloads without having to
> > > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > > many times in the past because customers had filed bugs.
> > > >
> > >
> > > If we do need to support backward migration, for this patch, what I
> > > can think of is to only migrate the flag in the case that guest kicks
> > > but not set DRIVER_OK. This could fix backward migration in most case.
> >
> > You mean something like that ?
> >
> > static bool virtio_started_needed(void *opaque)
> > {
> >     VirtIODevice *vdev = opaque;
> >
> >     return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
> > }
> >
> > > Not sure if there is a more general approach...
> > >
> >
> > Another approach would be to only implement the started flag for
> > machine version > 4.0. This can be achieved by adding a "use-started"
> > property to the base virtio device, true by default and set to
> > false by hw_compat_4_0.
>
> I think this is best frankly.
>

Only implement the started flag for machine version > 4.0 might not be
good because vhost-user-blk now need to use this flag. How about only
migrating this flag for machine version > 4.0 instead?

Thanks,
Yongji
Greg Kurz May 28, 2019, 6:39 a.m. UTC | #9
On Tue, 28 May 2019 10:08:54 +1000
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote:
> > On Mon, 20 May 2019 19:10:35 -0400
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> >   
> > > From: Xie Yongji <xieyongji@baidu.com>
> > > 
> > > The virtio 1.0 transitional devices support driver uses the device
> > > before setting the DRIVER_OK status bit. So we introduce a started
> > > flag to indicate whether driver has started the device or not.
> > > 
> > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > ---
> > >  include/hw/virtio/virtio.h |  2 ++
> > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > index 7140381e3a..27c0efc3d0 100644
> > > --- a/include/hw/virtio/virtio.h
> > > +++ b/include/hw/virtio/virtio.h
> > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > >      uint16_t device_id;
> > >      bool vm_running;
> > >      bool broken; /* device in invalid state, needs reset */
> > > +    bool started;
> > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > >      VMChangeStateEntry *vmstate;
> > >      char *bus_name;
> > >      uint8_t device_endian;
> > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > index 28056a7ef7..5d533ac74e 100644
> > > --- a/hw/virtio/virtio.c
> > > +++ b/hw/virtio/virtio.c
> > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > >              }
> > >          }
> > >      }
> > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > +        vdev->start_on_kick = false;
> > > +    }
> > > +
> > >      if (k->set_status) {
> > >          k->set_status(vdev, val);
> > >      }
> > >      vdev->status = val;
> > > +
> > >      return 0;
> > >  }
> > >  
> > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > >          k->reset(vdev);
> > >      }
> > >  
> > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > +    vdev->started = false;
> > >      vdev->broken = false;
> > >      vdev->guest_features = 0;
> > >      vdev->queue_sel = 0;
> > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > >  
> > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > >  {
> > > +    bool ret = false;
> > > +
> > >      if (vq->vring.desc && vq->handle_aio_output) {
> > >          VirtIODevice *vdev = vq->vdev;
> > >  
> > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > -        return vq->handle_aio_output(vdev, vq);
> > > +        ret = vq->handle_aio_output(vdev, vq);
> > > +
> > > +        if (unlikely(vdev->start_on_kick)) {
> > > +            vdev->started = true;
> > > +            vdev->start_on_kick = false;
> > > +        }
> > >      }
> > >  
> > > -    return false;
> > > +    return ret;
> > >  }
> > >  
> > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > >  
> > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > >          vq->handle_output(vdev, vq);
> > > +
> > > +        if (unlikely(vdev->start_on_kick)) {
> > > +            vdev->started = true;
> > > +            vdev->start_on_kick = false;
> > > +        }
> > >      }
> > >  }
> > >  
> > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > >      } else if (vq->handle_output) {
> > >          vq->handle_output(vdev, vq);
> > >      }
> > > +
> > > +    if (unlikely(vdev->start_on_kick)) {
> > > +        vdev->started = true;
> > > +        vdev->start_on_kick = false;
> > > +    }
> > >  }
> > >  
> > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > >      return vdev->broken;
> > >  }
> > >  
> > > +static bool virtio_started_needed(void *opaque)
> > > +{
> > > +    VirtIODevice *vdev = opaque;
> > > +
> > > +    return vdev->started;  
> > 
> > Existing machine types don't know about the "virtio/started" subsection. This
> > breaks migration to older QEMUs if the driver has started the device, ie. most
> > probably always when it comes to live migration.
> > 
> > My understanding is that we do try to support backward migration though. It
> > is a regular practice in datacenters to migrate workloads without having to
> > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > many times in the past because customers had filed bugs.
> > 
> > Cc'ing David for his opinion.  
> 
> Uh.. did you mean to CC me, or Dave Gilbert?
> 

Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable
as well. I remember being involved in backward migration fixes for spapr
several times.

> I mean, I think you're right that we should try to maintain backwards
> migration, but this isn't really my area of authority.
> 

Cc'ing Dave Gilbert :)

Cheers,

--
Greg
Dr. David Alan Gilbert May 29, 2019, 11:18 a.m. UTC | #10
* Greg Kurz (groug@kaod.org) wrote:
> On Tue, 28 May 2019 10:08:54 +1000
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote:
> > > On Mon, 20 May 2019 19:10:35 -0400
> > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > >   
> > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > 
> > > > The virtio 1.0 transitional devices support driver uses the device
> > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > flag to indicate whether driver has started the device or not.
> > > > 
> > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > ---
> > > >  include/hw/virtio/virtio.h |  2 ++
> > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > index 7140381e3a..27c0efc3d0 100644
> > > > --- a/include/hw/virtio/virtio.h
> > > > +++ b/include/hw/virtio/virtio.h
> > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > >      uint16_t device_id;
> > > >      bool vm_running;
> > > >      bool broken; /* device in invalid state, needs reset */
> > > > +    bool started;
> > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > >      VMChangeStateEntry *vmstate;
> > > >      char *bus_name;
> > > >      uint8_t device_endian;
> > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > index 28056a7ef7..5d533ac74e 100644
> > > > --- a/hw/virtio/virtio.c
> > > > +++ b/hw/virtio/virtio.c
> > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > >              }
> > > >          }
> > > >      }
> > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > +        vdev->start_on_kick = false;
> > > > +    }
> > > > +
> > > >      if (k->set_status) {
> > > >          k->set_status(vdev, val);
> > > >      }
> > > >      vdev->status = val;
> > > > +
> > > >      return 0;
> > > >  }
> > > >  
> > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > >          k->reset(vdev);
> > > >      }
> > > >  
> > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > +    vdev->started = false;
> > > >      vdev->broken = false;
> > > >      vdev->guest_features = 0;
> > > >      vdev->queue_sel = 0;
> > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > >  
> > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > >  {
> > > > +    bool ret = false;
> > > > +
> > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > >          VirtIODevice *vdev = vq->vdev;
> > > >  
> > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > -        return vq->handle_aio_output(vdev, vq);
> > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > +
> > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > +            vdev->started = true;
> > > > +            vdev->start_on_kick = false;
> > > > +        }
> > > >      }
> > > >  
> > > > -    return false;
> > > > +    return ret;
> > > >  }
> > > >  
> > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > >  
> > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > >          vq->handle_output(vdev, vq);
> > > > +
> > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > +            vdev->started = true;
> > > > +            vdev->start_on_kick = false;
> > > > +        }
> > > >      }
> > > >  }
> > > >  
> > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > >      } else if (vq->handle_output) {
> > > >          vq->handle_output(vdev, vq);
> > > >      }
> > > > +
> > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > +        vdev->started = true;
> > > > +        vdev->start_on_kick = false;
> > > > +    }
> > > >  }
> > > >  
> > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > >      return vdev->broken;
> > > >  }
> > > >  
> > > > +static bool virtio_started_needed(void *opaque)
> > > > +{
> > > > +    VirtIODevice *vdev = opaque;
> > > > +
> > > > +    return vdev->started;  
> > > 
> > > Existing machine types don't know about the "virtio/started" subsection. This
> > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > probably always when it comes to live migration.
> > > 
> > > My understanding is that we do try to support backward migration though. It
> > > is a regular practice in datacenters to migrate workloads without having to
> > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > many times in the past because customers had filed bugs.
> > > 
> > > Cc'ing David for his opinion.  
> > 
> > Uh.. did you mean to CC me, or Dave Gilbert?
> > 
> 
> Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable
> as well. I remember being involved in backward migration fixes for spapr
> several times.
> 
> > I mean, I think you're right that we should try to maintain backwards
> > migration, but this isn't really my area of authority.
> > 
> 
> Cc'ing Dave Gilbert :)

Right, I need to maintain backwards migration compatibility; tie the
feature to a machine type so it's only used on newer machine types and
then we'll be safe.

Having said that, what's the symptom when this goes wrong?

Dave

> Cheers,
> 
> --
> Greg


--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Greg Kurz May 29, 2019, 11:54 a.m. UTC | #11
On Wed, 29 May 2019 12:18:50 +0100
"Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:

> * Greg Kurz (groug@kaod.org) wrote:
> > On Tue, 28 May 2019 10:08:54 +1000
> > David Gibson <david@gibson.dropbear.id.au> wrote:
> >   
> > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote:  
> > > > On Mon, 20 May 2019 19:10:35 -0400
> > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > >     
> > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > > 
> > > > > The virtio 1.0 transitional devices support driver uses the device
> > > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > > flag to indicate whether driver has started the device or not.
> > > > > 
> > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > ---
> > > > >  include/hw/virtio/virtio.h |  2 ++
> > > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > > > 
> > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > > index 7140381e3a..27c0efc3d0 100644
> > > > > --- a/include/hw/virtio/virtio.h
> > > > > +++ b/include/hw/virtio/virtio.h
> > > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > > >      uint16_t device_id;
> > > > >      bool vm_running;
> > > > >      bool broken; /* device in invalid state, needs reset */
> > > > > +    bool started;
> > > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > > >      VMChangeStateEntry *vmstate;
> > > > >      char *bus_name;
> > > > >      uint8_t device_endian;
> > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > > index 28056a7ef7..5d533ac74e 100644
> > > > > --- a/hw/virtio/virtio.c
> > > > > +++ b/hw/virtio/virtio.c
> > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > > >              }
> > > > >          }
> > > > >      }
> > > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > > +        vdev->start_on_kick = false;
> > > > > +    }
> > > > > +
> > > > >      if (k->set_status) {
> > > > >          k->set_status(vdev, val);
> > > > >      }
> > > > >      vdev->status = val;
> > > > > +
> > > > >      return 0;
> > > > >  }
> > > > >  
> > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > > >          k->reset(vdev);
> > > > >      }
> > > > >  
> > > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > > +    vdev->started = false;
> > > > >      vdev->broken = false;
> > > > >      vdev->guest_features = 0;
> > > > >      vdev->queue_sel = 0;
> > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > > >  
> > > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > > >  {
> > > > > +    bool ret = false;
> > > > > +
> > > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > > >          VirtIODevice *vdev = vq->vdev;
> > > > >  
> > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > -        return vq->handle_aio_output(vdev, vq);
> > > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > > +
> > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > +            vdev->started = true;
> > > > > +            vdev->start_on_kick = false;
> > > > > +        }
> > > > >      }
> > > > >  
> > > > > -    return false;
> > > > > +    return ret;
> > > > >  }
> > > > >  
> > > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > >  
> > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > >          vq->handle_output(vdev, vq);
> > > > > +
> > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > +            vdev->started = true;
> > > > > +            vdev->start_on_kick = false;
> > > > > +        }
> > > > >      }
> > > > >  }
> > > > >  
> > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > > >      } else if (vq->handle_output) {
> > > > >          vq->handle_output(vdev, vq);
> > > > >      }
> > > > > +
> > > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > > +        vdev->started = true;
> > > > > +        vdev->start_on_kick = false;
> > > > > +    }
> > > > >  }
> > > > >  
> > > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > > >      return vdev->broken;
> > > > >  }
> > > > >  
> > > > > +static bool virtio_started_needed(void *opaque)
> > > > > +{
> > > > > +    VirtIODevice *vdev = opaque;
> > > > > +
> > > > > +    return vdev->started;    
> > > > 
> > > > Existing machine types don't know about the "virtio/started" subsection. This
> > > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > > probably always when it comes to live migration.
> > > > 
> > > > My understanding is that we do try to support backward migration though. It
> > > > is a regular practice in datacenters to migrate workloads without having to
> > > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > > many times in the past because customers had filed bugs.
> > > > 
> > > > Cc'ing David for his opinion.    
> > > 
> > > Uh.. did you mean to CC me, or Dave Gilbert?
> > >   
> > 
> > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable
> > as well. I remember being involved in backward migration fixes for spapr
> > several times.
> >   
> > > I mean, I think you're right that we should try to maintain backwards
> > > migration, but this isn't really my area of authority.
> > >   
> > 
> > Cc'ing Dave Gilbert :)  
> 
> Right, I need to maintain backwards migration compatibility; tie the
> feature to a machine type so it's only used on newer machine types and
> then we'll be safe.
> 
> Having said that, what's the symptom when this goes wrong?
> 

Since the started flag is set as soon as the guest driver begins to use
the device and remains so until next reset, the associated subsection is
basically always emitted when migrating a booted guest. This causes
migration to always fail on the target in this case:

qemu-system-ppc64: Failed to load virtio-net:virtio
qemu-system-ppc64: error while loading state for instance 0x0 of device 'pci@800000020000000:00.0/virtio-net'
qemu-system-ppc64: load of migration failed: No such file or directory

Xie Yongji has just sent a series to fix that, with you in Cc:

Cheers,

--
Greg

> Dave
> 
> > Cheers,
> > 
> > --
> > Greg  
> 
> 
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Dr. David Alan Gilbert May 29, 2019, 12:38 p.m. UTC | #12
* Greg Kurz (groug@kaod.org) wrote:
> On Wed, 29 May 2019 12:18:50 +0100
> "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> 
> > * Greg Kurz (groug@kaod.org) wrote:
> > > On Tue, 28 May 2019 10:08:54 +1000
> > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > >   
> > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote:  
> > > > > On Mon, 20 May 2019 19:10:35 -0400
> > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > >     
> > > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > > > 
> > > > > > The virtio 1.0 transitional devices support driver uses the device
> > > > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > > > flag to indicate whether driver has started the device or not.
> > > > > > 
> > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > ---
> > > > > >  include/hw/virtio/virtio.h |  2 ++
> > > > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > > > > 
> > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > > > index 7140381e3a..27c0efc3d0 100644
> > > > > > --- a/include/hw/virtio/virtio.h
> > > > > > +++ b/include/hw/virtio/virtio.h
> > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > > > >      uint16_t device_id;
> > > > > >      bool vm_running;
> > > > > >      bool broken; /* device in invalid state, needs reset */
> > > > > > +    bool started;
> > > > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > > > >      VMChangeStateEntry *vmstate;
> > > > > >      char *bus_name;
> > > > > >      uint8_t device_endian;
> > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > > > index 28056a7ef7..5d533ac74e 100644
> > > > > > --- a/hw/virtio/virtio.c
> > > > > > +++ b/hw/virtio/virtio.c
> > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > > > >              }
> > > > > >          }
> > > > > >      }
> > > > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > > > +        vdev->start_on_kick = false;
> > > > > > +    }
> > > > > > +
> > > > > >      if (k->set_status) {
> > > > > >          k->set_status(vdev, val);
> > > > > >      }
> > > > > >      vdev->status = val;
> > > > > > +
> > > > > >      return 0;
> > > > > >  }
> > > > > >  
> > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > > > >          k->reset(vdev);
> > > > > >      }
> > > > > >  
> > > > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > > > +    vdev->started = false;
> > > > > >      vdev->broken = false;
> > > > > >      vdev->guest_features = 0;
> > > > > >      vdev->queue_sel = 0;
> > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > > > >  
> > > > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > > > >  {
> > > > > > +    bool ret = false;
> > > > > > +
> > > > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > > > >          VirtIODevice *vdev = vq->vdev;
> > > > > >  
> > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > -        return vq->handle_aio_output(vdev, vq);
> > > > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > > > +
> > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > +            vdev->started = true;
> > > > > > +            vdev->start_on_kick = false;
> > > > > > +        }
> > > > > >      }
> > > > > >  
> > > > > > -    return false;
> > > > > > +    return ret;
> > > > > >  }
> > > > > >  
> > > > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > >  
> > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > >          vq->handle_output(vdev, vq);
> > > > > > +
> > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > +            vdev->started = true;
> > > > > > +            vdev->start_on_kick = false;
> > > > > > +        }
> > > > > >      }
> > > > > >  }
> > > > > >  
> > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > > > >      } else if (vq->handle_output) {
> > > > > >          vq->handle_output(vdev, vq);
> > > > > >      }
> > > > > > +
> > > > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > > > +        vdev->started = true;
> > > > > > +        vdev->start_on_kick = false;
> > > > > > +    }
> > > > > >  }
> > > > > >  
> > > > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > > > >      return vdev->broken;
> > > > > >  }
> > > > > >  
> > > > > > +static bool virtio_started_needed(void *opaque)
> > > > > > +{
> > > > > > +    VirtIODevice *vdev = opaque;
> > > > > > +
> > > > > > +    return vdev->started;    
> > > > > 
> > > > > Existing machine types don't know about the "virtio/started" subsection. This
> > > > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > > > probably always when it comes to live migration.
> > > > > 
> > > > > My understanding is that we do try to support backward migration though. It
> > > > > is a regular practice in datacenters to migrate workloads without having to
> > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > > > many times in the past because customers had filed bugs.
> > > > > 
> > > > > Cc'ing David for his opinion.    
> > > > 
> > > > Uh.. did you mean to CC me, or Dave Gilbert?
> > > >   
> > > 
> > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable
> > > as well. I remember being involved in backward migration fixes for spapr
> > > several times.
> > >   
> > > > I mean, I think you're right that we should try to maintain backwards
> > > > migration, but this isn't really my area of authority.
> > > >   
> > > 
> > > Cc'ing Dave Gilbert :)  
> > 
> > Right, I need to maintain backwards migration compatibility; tie the
> > feature to a machine type so it's only used on newer machine types and
> > then we'll be safe.
> > 
> > Having said that, what's the symptom when this goes wrong?
> > 
> 
> Since the started flag is set as soon as the guest driver begins to use
> the device and remains so until next reset, the associated subsection is
> basically always emitted when migrating a booted guest. This causes
> migration to always fail on the target in this case:

I meant what's the symptom without this patch series at all?

Dave

> qemu-system-ppc64: Failed to load virtio-net:virtio
> qemu-system-ppc64: error while loading state for instance 0x0 of device 'pci@800000020000000:00.0/virtio-net'
> qemu-system-ppc64: load of migration failed: No such file or directory
> 
> Xie Yongji has just sent a series to fix that, with you in Cc:
> 
> Cheers,
> 
> --
> Greg
> 
> > Dave
> > 
> > > Cheers,
> > > 
> > > --
> > > Greg  
> > 
> > 
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Greg Kurz May 29, 2019, 1:02 p.m. UTC | #13
On Wed, 29 May 2019 13:38:19 +0100
"Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:

> * Greg Kurz (groug@kaod.org) wrote:
> > On Wed, 29 May 2019 12:18:50 +0100
> > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> >   
> > > * Greg Kurz (groug@kaod.org) wrote:  
> > > > On Tue, 28 May 2019 10:08:54 +1000
> > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > >     
> > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote:    
> > > > > > On Mon, 20 May 2019 19:10:35 -0400
> > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > >       
> > > > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > > > > 
> > > > > > > The virtio 1.0 transitional devices support driver uses the device
> > > > > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > > > > flag to indicate whether driver has started the device or not.
> > > > > > > 
> > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > ---
> > > > > > >  include/hw/virtio/virtio.h |  2 ++
> > > > > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > > > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > > > > index 7140381e3a..27c0efc3d0 100644
> > > > > > > --- a/include/hw/virtio/virtio.h
> > > > > > > +++ b/include/hw/virtio/virtio.h
> > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > > > > >      uint16_t device_id;
> > > > > > >      bool vm_running;
> > > > > > >      bool broken; /* device in invalid state, needs reset */
> > > > > > > +    bool started;
> > > > > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > > > > >      VMChangeStateEntry *vmstate;
> > > > > > >      char *bus_name;
> > > > > > >      uint8_t device_endian;
> > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > > > > index 28056a7ef7..5d533ac74e 100644
> > > > > > > --- a/hw/virtio/virtio.c
> > > > > > > +++ b/hw/virtio/virtio.c
> > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > > > > >              }
> > > > > > >          }
> > > > > > >      }
> > > > > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > > > > +        vdev->start_on_kick = false;
> > > > > > > +    }
> > > > > > > +
> > > > > > >      if (k->set_status) {
> > > > > > >          k->set_status(vdev, val);
> > > > > > >      }
> > > > > > >      vdev->status = val;
> > > > > > > +
> > > > > > >      return 0;
> > > > > > >  }
> > > > > > >  
> > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > > > > >          k->reset(vdev);
> > > > > > >      }
> > > > > > >  
> > > > > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > > > > +    vdev->started = false;
> > > > > > >      vdev->broken = false;
> > > > > > >      vdev->guest_features = 0;
> > > > > > >      vdev->queue_sel = 0;
> > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > > > > >  
> > > > > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > > > > >  {
> > > > > > > +    bool ret = false;
> > > > > > > +
> > > > > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > > > > >          VirtIODevice *vdev = vq->vdev;
> > > > > > >  
> > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > -        return vq->handle_aio_output(vdev, vq);
> > > > > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > > > > +
> > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > +            vdev->started = true;
> > > > > > > +            vdev->start_on_kick = false;
> > > > > > > +        }
> > > > > > >      }
> > > > > > >  
> > > > > > > -    return false;
> > > > > > > +    return ret;
> > > > > > >  }
> > > > > > >  
> > > > > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > >  
> > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > +
> > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > +            vdev->started = true;
> > > > > > > +            vdev->start_on_kick = false;
> > > > > > > +        }
> > > > > > >      }
> > > > > > >  }
> > > > > > >  
> > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > > > > >      } else if (vq->handle_output) {
> > > > > > >          vq->handle_output(vdev, vq);
> > > > > > >      }
> > > > > > > +
> > > > > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > > > > +        vdev->started = true;
> > > > > > > +        vdev->start_on_kick = false;
> > > > > > > +    }
> > > > > > >  }
> > > > > > >  
> > > > > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > > > > >      return vdev->broken;
> > > > > > >  }
> > > > > > >  
> > > > > > > +static bool virtio_started_needed(void *opaque)
> > > > > > > +{
> > > > > > > +    VirtIODevice *vdev = opaque;
> > > > > > > +
> > > > > > > +    return vdev->started;      
> > > > > > 
> > > > > > Existing machine types don't know about the "virtio/started" subsection. This
> > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > > > > probably always when it comes to live migration.
> > > > > > 
> > > > > > My understanding is that we do try to support backward migration though. It
> > > > > > is a regular practice in datacenters to migrate workloads without having to
> > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > > > > many times in the past because customers had filed bugs.
> > > > > > 
> > > > > > Cc'ing David for his opinion.      
> > > > > 
> > > > > Uh.. did you mean to CC me, or Dave Gilbert?
> > > > >     
> > > > 
> > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable
> > > > as well. I remember being involved in backward migration fixes for spapr
> > > > several times.
> > > >     
> > > > > I mean, I think you're right that we should try to maintain backwards
> > > > > migration, but this isn't really my area of authority.
> > > > >     
> > > > 
> > > > Cc'ing Dave Gilbert :)    
> > > 
> > > Right, I need to maintain backwards migration compatibility; tie the
> > > feature to a machine type so it's only used on newer machine types and
> > > then we'll be safe.
> > > 
> > > Having said that, what's the symptom when this goes wrong?
> > >   
> > 
> > Since the started flag is set as soon as the guest driver begins to use
> > the device and remains so until next reset, the associated subsection is
> > basically always emitted when migrating a booted guest. This causes
> > migration to always fail on the target in this case:  
> 
> I meant what's the symptom without this patch series at all?
> 

Oh sorry... if I got it right, migrating when the guest first kicked the
device but not set the DRIVE_OK bit yet would result in a guest hang on
the destination. Yongji might elaborate a bit more on that.

> Dave
> 
> > qemu-system-ppc64: Failed to load virtio-net:virtio
> > qemu-system-ppc64: error while loading state for instance 0x0 of device 'pci@800000020000000:00.0/virtio-net'
> > qemu-system-ppc64: load of migration failed: No such file or directory
> > 
> > Xie Yongji has just sent a series to fix that, with you in Cc:
> > 
> > Cheers,
> > 
> > --
> > Greg
> >   
> > > Dave
> > >   
> > > > Cheers,
> > > > 
> > > > --
> > > > Greg    
> > > 
> > > 
> > > --
> > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK  
> >   
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Dr. David Alan Gilbert May 29, 2019, 1:40 p.m. UTC | #14
* Greg Kurz (groug@kaod.org) wrote:
> On Wed, 29 May 2019 13:38:19 +0100
> "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> 
> > * Greg Kurz (groug@kaod.org) wrote:
> > > On Wed, 29 May 2019 12:18:50 +0100
> > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > >   
> > > > * Greg Kurz (groug@kaod.org) wrote:  
> > > > > On Tue, 28 May 2019 10:08:54 +1000
> > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > >     
> > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote:    
> > > > > > > On Mon, 20 May 2019 19:10:35 -0400
> > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > >       
> > > > > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > > > > > 
> > > > > > > > The virtio 1.0 transitional devices support driver uses the device
> > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > > > > > flag to indicate whether driver has started the device or not.
> > > > > > > > 
> > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > ---
> > > > > > > >  include/hw/virtio/virtio.h |  2 ++
> > > > > > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > > > > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > > > > > index 7140381e3a..27c0efc3d0 100644
> > > > > > > > --- a/include/hw/virtio/virtio.h
> > > > > > > > +++ b/include/hw/virtio/virtio.h
> > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > > > > > >      uint16_t device_id;
> > > > > > > >      bool vm_running;
> > > > > > > >      bool broken; /* device in invalid state, needs reset */
> > > > > > > > +    bool started;
> > > > > > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > > > > > >      VMChangeStateEntry *vmstate;
> > > > > > > >      char *bus_name;
> > > > > > > >      uint8_t device_endian;
> > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > > > > > index 28056a7ef7..5d533ac74e 100644
> > > > > > > > --- a/hw/virtio/virtio.c
> > > > > > > > +++ b/hw/virtio/virtio.c
> > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > > > > > >              }
> > > > > > > >          }
> > > > > > > >      }
> > > > > > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > > > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > >      if (k->set_status) {
> > > > > > > >          k->set_status(vdev, val);
> > > > > > > >      }
> > > > > > > >      vdev->status = val;
> > > > > > > > +
> > > > > > > >      return 0;
> > > > > > > >  }
> > > > > > > >  
> > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > > > > > >          k->reset(vdev);
> > > > > > > >      }
> > > > > > > >  
> > > > > > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > > > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > > > > > +    vdev->started = false;
> > > > > > > >      vdev->broken = false;
> > > > > > > >      vdev->guest_features = 0;
> > > > > > > >      vdev->queue_sel = 0;
> > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > > > > > >  
> > > > > > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > > > > > >  {
> > > > > > > > +    bool ret = false;
> > > > > > > > +
> > > > > > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > > > > > >          VirtIODevice *vdev = vq->vdev;
> > > > > > > >  
> > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > > -        return vq->handle_aio_output(vdev, vq);
> > > > > > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > > > > > +
> > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > +            vdev->started = true;
> > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > +        }
> > > > > > > >      }
> > > > > > > >  
> > > > > > > > -    return false;
> > > > > > > > +    return ret;
> > > > > > > >  }
> > > > > > > >  
> > > > > > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > >  
> > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > > +
> > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > +            vdev->started = true;
> > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > +        }
> > > > > > > >      }
> > > > > > > >  }
> > > > > > > >  
> > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > > > > > >      } else if (vq->handle_output) {
> > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > >      }
> > > > > > > > +
> > > > > > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > > > > > +        vdev->started = true;
> > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > +    }
> > > > > > > >  }
> > > > > > > >  
> > > > > > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > > > > > >      return vdev->broken;
> > > > > > > >  }
> > > > > > > >  
> > > > > > > > +static bool virtio_started_needed(void *opaque)
> > > > > > > > +{
> > > > > > > > +    VirtIODevice *vdev = opaque;
> > > > > > > > +
> > > > > > > > +    return vdev->started;      
> > > > > > > 
> > > > > > > Existing machine types don't know about the "virtio/started" subsection. This
> > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > > > > > probably always when it comes to live migration.
> > > > > > > 
> > > > > > > My understanding is that we do try to support backward migration though. It
> > > > > > > is a regular practice in datacenters to migrate workloads without having to
> > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > > > > > many times in the past because customers had filed bugs.
> > > > > > > 
> > > > > > > Cc'ing David for his opinion.      
> > > > > > 
> > > > > > Uh.. did you mean to CC me, or Dave Gilbert?
> > > > > >     
> > > > > 
> > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable
> > > > > as well. I remember being involved in backward migration fixes for spapr
> > > > > several times.
> > > > >     
> > > > > > I mean, I think you're right that we should try to maintain backwards
> > > > > > migration, but this isn't really my area of authority.
> > > > > >     
> > > > > 
> > > > > Cc'ing Dave Gilbert :)    
> > > > 
> > > > Right, I need to maintain backwards migration compatibility; tie the
> > > > feature to a machine type so it's only used on newer machine types and
> > > > then we'll be safe.
> > > > 
> > > > Having said that, what's the symptom when this goes wrong?
> > > >   
> > > 
> > > Since the started flag is set as soon as the guest driver begins to use
> > > the device and remains so until next reset, the associated subsection is
> > > basically always emitted when migrating a booted guest. This causes
> > > migration to always fail on the target in this case:  
> > 
> > I meant what's the symptom without this patch series at all?
> > 
> 
> Oh sorry... if I got it right, migrating when the guest first kicked the
> device but not set the DRIVE_OK bit yet would result in a guest hang on
> the destination. Yongji might elaborate a bit more on that.

Hmm - the only thing worse than a migration failing with an error is a
migration that fails with a hung guest.

If you were sending a new subsection in *only* the case where the guest
would definitely hang, then I think it would be worth sending the
subsection and allowing the backwards migration to fail with the error
because it's a bit better than a hung VM.

What we cant have though is most backwards migrations failing.

Dave

> > Dave
> > 
> > > qemu-system-ppc64: Failed to load virtio-net:virtio
> > > qemu-system-ppc64: error while loading state for instance 0x0 of device 'pci@800000020000000:00.0/virtio-net'
> > > qemu-system-ppc64: load of migration failed: No such file or directory
> > > 
> > > Xie Yongji has just sent a series to fix that, with you in Cc:
> > > 
> > > Cheers,
> > > 
> > > --
> > > Greg
> > >   
> > > > Dave
> > > >   
> > > > > Cheers,
> > > > > 
> > > > > --
> > > > > Greg    
> > > > 
> > > > 
> > > > --
> > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK  
> > >   
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Yongji Xie May 29, 2019, 1:57 p.m. UTC | #15
On Wed, 29 May 2019 at 21:02, Greg Kurz <groug@kaod.org> wrote:
>
> On Wed, 29 May 2019 13:38:19 +0100
> "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
>
> > * Greg Kurz (groug@kaod.org) wrote:
> > > On Wed, 29 May 2019 12:18:50 +0100
> > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > >
> > > > * Greg Kurz (groug@kaod.org) wrote:
> > > > > On Tue, 28 May 2019 10:08:54 +1000
> > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > >
> > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote:
> > > > > > > On Mon, 20 May 2019 19:10:35 -0400
> > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > >
> > > > > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > > > > >
> > > > > > > > The virtio 1.0 transitional devices support driver uses the device
> > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > > > > > flag to indicate whether driver has started the device or not.
> > > > > > > >
> > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > ---
> > > > > > > >  include/hw/virtio/virtio.h |  2 ++
> > > > > > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > > > > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > > > > > index 7140381e3a..27c0efc3d0 100644
> > > > > > > > --- a/include/hw/virtio/virtio.h
> > > > > > > > +++ b/include/hw/virtio/virtio.h
> > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > > > > > >      uint16_t device_id;
> > > > > > > >      bool vm_running;
> > > > > > > >      bool broken; /* device in invalid state, needs reset */
> > > > > > > > +    bool started;
> > > > > > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > > > > > >      VMChangeStateEntry *vmstate;
> > > > > > > >      char *bus_name;
> > > > > > > >      uint8_t device_endian;
> > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > > > > > index 28056a7ef7..5d533ac74e 100644
> > > > > > > > --- a/hw/virtio/virtio.c
> > > > > > > > +++ b/hw/virtio/virtio.c
> > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > > > > > >              }
> > > > > > > >          }
> > > > > > > >      }
> > > > > > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > > > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > >      if (k->set_status) {
> > > > > > > >          k->set_status(vdev, val);
> > > > > > > >      }
> > > > > > > >      vdev->status = val;
> > > > > > > > +
> > > > > > > >      return 0;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > > > > > >          k->reset(vdev);
> > > > > > > >      }
> > > > > > > >
> > > > > > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > > > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > > > > > +    vdev->started = false;
> > > > > > > >      vdev->broken = false;
> > > > > > > >      vdev->guest_features = 0;
> > > > > > > >      vdev->queue_sel = 0;
> > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > > > > > >
> > > > > > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > > > > > >  {
> > > > > > > > +    bool ret = false;
> > > > > > > > +
> > > > > > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > > > > > >          VirtIODevice *vdev = vq->vdev;
> > > > > > > >
> > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > > -        return vq->handle_aio_output(vdev, vq);
> > > > > > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > > > > > +
> > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > +            vdev->started = true;
> > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > +        }
> > > > > > > >      }
> > > > > > > >
> > > > > > > > -    return false;
> > > > > > > > +    return ret;
> > > > > > > >  }
> > > > > > > >
> > > > > > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > >
> > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > > +
> > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > +            vdev->started = true;
> > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > +        }
> > > > > > > >      }
> > > > > > > >  }
> > > > > > > >
> > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > > > > > >      } else if (vq->handle_output) {
> > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > >      }
> > > > > > > > +
> > > > > > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > > > > > +        vdev->started = true;
> > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > +    }
> > > > > > > >  }
> > > > > > > >
> > > > > > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > > > > > >      return vdev->broken;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > +static bool virtio_started_needed(void *opaque)
> > > > > > > > +{
> > > > > > > > +    VirtIODevice *vdev = opaque;
> > > > > > > > +
> > > > > > > > +    return vdev->started;
> > > > > > >
> > > > > > > Existing machine types don't know about the "virtio/started" subsection. This
> > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > > > > > probably always when it comes to live migration.
> > > > > > >
> > > > > > > My understanding is that we do try to support backward migration though. It
> > > > > > > is a regular practice in datacenters to migrate workloads without having to
> > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > > > > > many times in the past because customers had filed bugs.
> > > > > > >
> > > > > > > Cc'ing David for his opinion.
> > > > > >
> > > > > > Uh.. did you mean to CC me, or Dave Gilbert?
> > > > > >
> > > > >
> > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable
> > > > > as well. I remember being involved in backward migration fixes for spapr
> > > > > several times.
> > > > >
> > > > > > I mean, I think you're right that we should try to maintain backwards
> > > > > > migration, but this isn't really my area of authority.
> > > > > >
> > > > >
> > > > > Cc'ing Dave Gilbert :)
> > > >
> > > > Right, I need to maintain backwards migration compatibility; tie the
> > > > feature to a machine type so it's only used on newer machine types and
> > > > then we'll be safe.
> > > >
> > > > Having said that, what's the symptom when this goes wrong?
> > > >
> > >
> > > Since the started flag is set as soon as the guest driver begins to use
> > > the device and remains so until next reset, the associated subsection is
> > > basically always emitted when migrating a booted guest. This causes
> > > migration to always fail on the target in this case:
> >
> > I meant what's the symptom without this patch series at all?
> >
>
> Oh sorry... if I got it right, migrating when the guest first kicked the
> device but not set the DRIVE_OK bit yet would result in a guest hang on
> the destination. Yongji might elaborate a bit more on that.
>

Yes, that's the issue that my patch tries to fix. It only happens in a
short window for virtio 1.0 transitional devices.

Thanks,
Yongji
Yongji Xie May 29, 2019, 2:35 p.m. UTC | #16
On Wed, 29 May 2019 at 21:43, Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
>
> * Greg Kurz (groug@kaod.org) wrote:
> > On Wed, 29 May 2019 13:38:19 +0100
> > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> >
> > > * Greg Kurz (groug@kaod.org) wrote:
> > > > On Wed, 29 May 2019 12:18:50 +0100
> > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > >
> > > > > * Greg Kurz (groug@kaod.org) wrote:
> > > > > > On Tue, 28 May 2019 10:08:54 +1000
> > > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > > >
> > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote:
> > > > > > > > On Mon, 20 May 2019 19:10:35 -0400
> > > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > > >
> > > > > > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > > > > > >
> > > > > > > > > The virtio 1.0 transitional devices support driver uses the device
> > > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > > > > > > flag to indicate whether driver has started the device or not.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > > ---
> > > > > > > > >  include/hw/virtio/virtio.h |  2 ++
> > > > > > > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > > > > > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > > > > > > index 7140381e3a..27c0efc3d0 100644
> > > > > > > > > --- a/include/hw/virtio/virtio.h
> > > > > > > > > +++ b/include/hw/virtio/virtio.h
> > > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > > > > > > >      uint16_t device_id;
> > > > > > > > >      bool vm_running;
> > > > > > > > >      bool broken; /* device in invalid state, needs reset */
> > > > > > > > > +    bool started;
> > > > > > > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > > > > > > >      VMChangeStateEntry *vmstate;
> > > > > > > > >      char *bus_name;
> > > > > > > > >      uint8_t device_endian;
> > > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > > > > > > index 28056a7ef7..5d533ac74e 100644
> > > > > > > > > --- a/hw/virtio/virtio.c
> > > > > > > > > +++ b/hw/virtio/virtio.c
> > > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > > > > > > >              }
> > > > > > > > >          }
> > > > > > > > >      }
> > > > > > > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > > > > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > > +    }
> > > > > > > > > +
> > > > > > > > >      if (k->set_status) {
> > > > > > > > >          k->set_status(vdev, val);
> > > > > > > > >      }
> > > > > > > > >      vdev->status = val;
> > > > > > > > > +
> > > > > > > > >      return 0;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > > > > > > >          k->reset(vdev);
> > > > > > > > >      }
> > > > > > > > >
> > > > > > > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > > > > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > > > > > > +    vdev->started = false;
> > > > > > > > >      vdev->broken = false;
> > > > > > > > >      vdev->guest_features = 0;
> > > > > > > > >      vdev->queue_sel = 0;
> > > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > > > > > > >
> > > > > > > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > > > > > > >  {
> > > > > > > > > +    bool ret = false;
> > > > > > > > > +
> > > > > > > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > > > > > > >          VirtIODevice *vdev = vq->vdev;
> > > > > > > > >
> > > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > > > -        return vq->handle_aio_output(vdev, vq);
> > > > > > > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > > > > > > +
> > > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > +            vdev->started = true;
> > > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > > +        }
> > > > > > > > >      }
> > > > > > > > >
> > > > > > > > > -    return false;
> > > > > > > > > +    return ret;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > > >
> > > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > > > +
> > > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > +            vdev->started = true;
> > > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > > +        }
> > > > > > > > >      }
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > > > > > > >      } else if (vq->handle_output) {
> > > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > > >      }
> > > > > > > > > +
> > > > > > > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > +        vdev->started = true;
> > > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > > +    }
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > > > > > > >      return vdev->broken;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > +static bool virtio_started_needed(void *opaque)
> > > > > > > > > +{
> > > > > > > > > +    VirtIODevice *vdev = opaque;
> > > > > > > > > +
> > > > > > > > > +    return vdev->started;
> > > > > > > >
> > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This
> > > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > > > > > > probably always when it comes to live migration.
> > > > > > > >
> > > > > > > > My understanding is that we do try to support backward migration though. It
> > > > > > > > is a regular practice in datacenters to migrate workloads without having to
> > > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > > > > > > many times in the past because customers had filed bugs.
> > > > > > > >
> > > > > > > > Cc'ing David for his opinion.
> > > > > > >
> > > > > > > Uh.. did you mean to CC me, or Dave Gilbert?
> > > > > > >
> > > > > >
> > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable
> > > > > > as well. I remember being involved in backward migration fixes for spapr
> > > > > > several times.
> > > > > >
> > > > > > > I mean, I think you're right that we should try to maintain backwards
> > > > > > > migration, but this isn't really my area of authority.
> > > > > > >
> > > > > >
> > > > > > Cc'ing Dave Gilbert :)
> > > > >
> > > > > Right, I need to maintain backwards migration compatibility; tie the
> > > > > feature to a machine type so it's only used on newer machine types and
> > > > > then we'll be safe.
> > > > >
> > > > > Having said that, what's the symptom when this goes wrong?
> > > > >
> > > >
> > > > Since the started flag is set as soon as the guest driver begins to use
> > > > the device and remains so until next reset, the associated subsection is
> > > > basically always emitted when migrating a booted guest. This causes
> > > > migration to always fail on the target in this case:
> > >
> > > I meant what's the symptom without this patch series at all?
> > >
> >
> > Oh sorry... if I got it right, migrating when the guest first kicked the
> > device but not set the DRIVE_OK bit yet would result in a guest hang on
> > the destination. Yongji might elaborate a bit more on that.
>
> Hmm - the only thing worse than a migration failing with an error is a
> migration that fails with a hung guest.
>
> If you were sending a new subsection in *only* the case where the guest
> would definitely hang, then I think it would be worth sending the
> subsection and allowing the backwards migration to fail with the error
> because it's a bit better than a hung VM.
>

I considered this approach before. But it needs to add some complex
logic in virtio code to make sure migration works well between
different qemu versions. Since it's hardly to trigger the hung VM bug.
So I still try to use the general way (tie the feature to a machine
type) to fix this migration issue. It's easily to ensure the
compatibility

Thanks,
Yongji
Dr. David Alan Gilbert May 29, 2019, 2:42 p.m. UTC | #17
* Yongji Xie (elohimes@gmail.com) wrote:
> On Wed, 29 May 2019 at 21:43, Dr. David Alan Gilbert
> <dgilbert@redhat.com> wrote:
> >
> > * Greg Kurz (groug@kaod.org) wrote:
> > > On Wed, 29 May 2019 13:38:19 +0100
> > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > >
> > > > * Greg Kurz (groug@kaod.org) wrote:
> > > > > On Wed, 29 May 2019 12:18:50 +0100
> > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > >
> > > > > > * Greg Kurz (groug@kaod.org) wrote:
> > > > > > > On Tue, 28 May 2019 10:08:54 +1000
> > > > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > > > >
> > > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote:
> > > > > > > > > On Mon, 20 May 2019 19:10:35 -0400
> > > > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > > > > > > >
> > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device
> > > > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > > > > > > > flag to indicate whether driver has started the device or not.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > > > ---
> > > > > > > > > >  include/hw/virtio/virtio.h |  2 ++
> > > > > > > > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > > > > > > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > > > > > > > index 7140381e3a..27c0efc3d0 100644
> > > > > > > > > > --- a/include/hw/virtio/virtio.h
> > > > > > > > > > +++ b/include/hw/virtio/virtio.h
> > > > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > > > > > > > >      uint16_t device_id;
> > > > > > > > > >      bool vm_running;
> > > > > > > > > >      bool broken; /* device in invalid state, needs reset */
> > > > > > > > > > +    bool started;
> > > > > > > > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > > > > > > > >      VMChangeStateEntry *vmstate;
> > > > > > > > > >      char *bus_name;
> > > > > > > > > >      uint8_t device_endian;
> > > > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > > > > > > > index 28056a7ef7..5d533ac74e 100644
> > > > > > > > > > --- a/hw/virtio/virtio.c
> > > > > > > > > > +++ b/hw/virtio/virtio.c
> > > > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > > > > > > > >              }
> > > > > > > > > >          }
> > > > > > > > > >      }
> > > > > > > > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > > > > > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > > > +    }
> > > > > > > > > > +
> > > > > > > > > >      if (k->set_status) {
> > > > > > > > > >          k->set_status(vdev, val);
> > > > > > > > > >      }
> > > > > > > > > >      vdev->status = val;
> > > > > > > > > > +
> > > > > > > > > >      return 0;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > > > > > > > >          k->reset(vdev);
> > > > > > > > > >      }
> > > > > > > > > >
> > > > > > > > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > > > > > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > > > > > > > +    vdev->started = false;
> > > > > > > > > >      vdev->broken = false;
> > > > > > > > > >      vdev->guest_features = 0;
> > > > > > > > > >      vdev->queue_sel = 0;
> > > > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > > > > > > > >
> > > > > > > > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > > > > > > > >  {
> > > > > > > > > > +    bool ret = false;
> > > > > > > > > > +
> > > > > > > > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > > > > > > > >          VirtIODevice *vdev = vq->vdev;
> > > > > > > > > >
> > > > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > > > > -        return vq->handle_aio_output(vdev, vq);
> > > > > > > > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > > > > > > > +
> > > > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > > +            vdev->started = true;
> > > > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > > > +        }
> > > > > > > > > >      }
> > > > > > > > > >
> > > > > > > > > > -    return false;
> > > > > > > > > > +    return ret;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > > > >
> > > > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > > > > +
> > > > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > > +            vdev->started = true;
> > > > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > > > +        }
> > > > > > > > > >      }
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > > > > > > > >      } else if (vq->handle_output) {
> > > > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > > > >      }
> > > > > > > > > > +
> > > > > > > > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > > +        vdev->started = true;
> > > > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > > > +    }
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > > > > > > > >      return vdev->broken;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > +static bool virtio_started_needed(void *opaque)
> > > > > > > > > > +{
> > > > > > > > > > +    VirtIODevice *vdev = opaque;
> > > > > > > > > > +
> > > > > > > > > > +    return vdev->started;
> > > > > > > > >
> > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This
> > > > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > > > > > > > probably always when it comes to live migration.
> > > > > > > > >
> > > > > > > > > My understanding is that we do try to support backward migration though. It
> > > > > > > > > is a regular practice in datacenters to migrate workloads without having to
> > > > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > > > > > > > many times in the past because customers had filed bugs.
> > > > > > > > >
> > > > > > > > > Cc'ing David for his opinion.
> > > > > > > >
> > > > > > > > Uh.. did you mean to CC me, or Dave Gilbert?
> > > > > > > >
> > > > > > >
> > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable
> > > > > > > as well. I remember being involved in backward migration fixes for spapr
> > > > > > > several times.
> > > > > > >
> > > > > > > > I mean, I think you're right that we should try to maintain backwards
> > > > > > > > migration, but this isn't really my area of authority.
> > > > > > > >
> > > > > > >
> > > > > > > Cc'ing Dave Gilbert :)
> > > > > >
> > > > > > Right, I need to maintain backwards migration compatibility; tie the
> > > > > > feature to a machine type so it's only used on newer machine types and
> > > > > > then we'll be safe.
> > > > > >
> > > > > > Having said that, what's the symptom when this goes wrong?
> > > > > >
> > > > >
> > > > > Since the started flag is set as soon as the guest driver begins to use
> > > > > the device and remains so until next reset, the associated subsection is
> > > > > basically always emitted when migrating a booted guest. This causes
> > > > > migration to always fail on the target in this case:
> > > >
> > > > I meant what's the symptom without this patch series at all?
> > > >
> > >
> > > Oh sorry... if I got it right, migrating when the guest first kicked the
> > > device but not set the DRIVE_OK bit yet would result in a guest hang on
> > > the destination. Yongji might elaborate a bit more on that.
> >
> > Hmm - the only thing worse than a migration failing with an error is a
> > migration that fails with a hung guest.
> >
> > If you were sending a new subsection in *only* the case where the guest
> > would definitely hang, then I think it would be worth sending the
> > subsection and allowing the backwards migration to fail with the error
> > because it's a bit better than a hung VM.
> >
> 
> I considered this approach before. But it needs to add some complex
> logic in virtio code to make sure migration works well between
> different qemu versions. Since it's hardly to trigger the hung VM bug.
> So I still try to use the general way (tie the feature to a machine
> type) to fix this migration issue. It's easily to ensure the
> compatibility

OK, great.  For reference could you give me a bit more info on how
exactly the failure happens, how you spot it and what the symptoms are;
so that when someone comes to me with a hung VM I know what to look for?

Dave

> Thanks,
> Yongji
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Yongji Xie May 30, 2019, 12:39 a.m. UTC | #18
On Wed, 29 May 2019 at 22:42, Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
>
> * Yongji Xie (elohimes@gmail.com) wrote:
> > On Wed, 29 May 2019 at 21:43, Dr. David Alan Gilbert
> > <dgilbert@redhat.com> wrote:
> > >
> > > * Greg Kurz (groug@kaod.org) wrote:
> > > > On Wed, 29 May 2019 13:38:19 +0100
> > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > >
> > > > > * Greg Kurz (groug@kaod.org) wrote:
> > > > > > On Wed, 29 May 2019 12:18:50 +0100
> > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > > >
> > > > > > > * Greg Kurz (groug@kaod.org) wrote:
> > > > > > > > On Tue, 28 May 2019 10:08:54 +1000
> > > > > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > > > > >
> > > > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote:
> > > > > > > > > > On Mon, 20 May 2019 19:10:35 -0400
> > > > > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > > > > >
> > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > > > > > > > >
> > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device
> > > > > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > > > > > > > > flag to indicate whether driver has started the device or not.
> > > > > > > > > > >
> > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > > > > ---
> > > > > > > > > > >  include/hw/virtio/virtio.h |  2 ++
> > > > > > > > > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > > > > > > > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > > > > > > > > >
> > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > > > > > > > > index 7140381e3a..27c0efc3d0 100644
> > > > > > > > > > > --- a/include/hw/virtio/virtio.h
> > > > > > > > > > > +++ b/include/hw/virtio/virtio.h
> > > > > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > > > > > > > > >      uint16_t device_id;
> > > > > > > > > > >      bool vm_running;
> > > > > > > > > > >      bool broken; /* device in invalid state, needs reset */
> > > > > > > > > > > +    bool started;
> > > > > > > > > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > > > > > > > > >      VMChangeStateEntry *vmstate;
> > > > > > > > > > >      char *bus_name;
> > > > > > > > > > >      uint8_t device_endian;
> > > > > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > > > > > > > > index 28056a7ef7..5d533ac74e 100644
> > > > > > > > > > > --- a/hw/virtio/virtio.c
> > > > > > > > > > > +++ b/hw/virtio/virtio.c
> > > > > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > > > > > > > > >              }
> > > > > > > > > > >          }
> > > > > > > > > > >      }
> > > > > > > > > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > > > > > > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > > > > +    }
> > > > > > > > > > > +
> > > > > > > > > > >      if (k->set_status) {
> > > > > > > > > > >          k->set_status(vdev, val);
> > > > > > > > > > >      }
> > > > > > > > > > >      vdev->status = val;
> > > > > > > > > > > +
> > > > > > > > > > >      return 0;
> > > > > > > > > > >  }
> > > > > > > > > > >
> > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > > > > > > > > >          k->reset(vdev);
> > > > > > > > > > >      }
> > > > > > > > > > >
> > > > > > > > > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > > > > > > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > > > > > > > > +    vdev->started = false;
> > > > > > > > > > >      vdev->broken = false;
> > > > > > > > > > >      vdev->guest_features = 0;
> > > > > > > > > > >      vdev->queue_sel = 0;
> > > > > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > > > > > > > > >
> > > > > > > > > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > > > > > > > > >  {
> > > > > > > > > > > +    bool ret = false;
> > > > > > > > > > > +
> > > > > > > > > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > > > > > > > > >          VirtIODevice *vdev = vq->vdev;
> > > > > > > > > > >
> > > > > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > > > > > -        return vq->handle_aio_output(vdev, vq);
> > > > > > > > > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > > > > > > > > +
> > > > > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > > > +            vdev->started = true;
> > > > > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > > > > +        }
> > > > > > > > > > >      }
> > > > > > > > > > >
> > > > > > > > > > > -    return false;
> > > > > > > > > > > +    return ret;
> > > > > > > > > > >  }
> > > > > > > > > > >
> > > > > > > > > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > > > > >
> > > > > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > > > > > +
> > > > > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > > > +            vdev->started = true;
> > > > > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > > > > +        }
> > > > > > > > > > >      }
> > > > > > > > > > >  }
> > > > > > > > > > >
> > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > > > > > > > > >      } else if (vq->handle_output) {
> > > > > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > > > > >      }
> > > > > > > > > > > +
> > > > > > > > > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > > > +        vdev->started = true;
> > > > > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > > > > +    }
> > > > > > > > > > >  }
> > > > > > > > > > >
> > > > > > > > > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > > > > > > > > >      return vdev->broken;
> > > > > > > > > > >  }
> > > > > > > > > > >
> > > > > > > > > > > +static bool virtio_started_needed(void *opaque)
> > > > > > > > > > > +{
> > > > > > > > > > > +    VirtIODevice *vdev = opaque;
> > > > > > > > > > > +
> > > > > > > > > > > +    return vdev->started;
> > > > > > > > > >
> > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This
> > > > > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > > > > > > > > probably always when it comes to live migration.
> > > > > > > > > >
> > > > > > > > > > My understanding is that we do try to support backward migration though. It
> > > > > > > > > > is a regular practice in datacenters to migrate workloads without having to
> > > > > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > > > > > > > > many times in the past because customers had filed bugs.
> > > > > > > > > >
> > > > > > > > > > Cc'ing David for his opinion.
> > > > > > > > >
> > > > > > > > > Uh.. did you mean to CC me, or Dave Gilbert?
> > > > > > > > >
> > > > > > > >
> > > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable
> > > > > > > > as well. I remember being involved in backward migration fixes for spapr
> > > > > > > > several times.
> > > > > > > >
> > > > > > > > > I mean, I think you're right that we should try to maintain backwards
> > > > > > > > > migration, but this isn't really my area of authority.
> > > > > > > > >
> > > > > > > >
> > > > > > > > Cc'ing Dave Gilbert :)
> > > > > > >
> > > > > > > Right, I need to maintain backwards migration compatibility; tie the
> > > > > > > feature to a machine type so it's only used on newer machine types and
> > > > > > > then we'll be safe.
> > > > > > >
> > > > > > > Having said that, what's the symptom when this goes wrong?
> > > > > > >
> > > > > >
> > > > > > Since the started flag is set as soon as the guest driver begins to use
> > > > > > the device and remains so until next reset, the associated subsection is
> > > > > > basically always emitted when migrating a booted guest. This causes
> > > > > > migration to always fail on the target in this case:
> > > > >
> > > > > I meant what's the symptom without this patch series at all?
> > > > >
> > > >
> > > > Oh sorry... if I got it right, migrating when the guest first kicked the
> > > > device but not set the DRIVE_OK bit yet would result in a guest hang on
> > > > the destination. Yongji might elaborate a bit more on that.
> > >
> > > Hmm - the only thing worse than a migration failing with an error is a
> > > migration that fails with a hung guest.
> > >
> > > If you were sending a new subsection in *only* the case where the guest
> > > would definitely hang, then I think it would be worth sending the
> > > subsection and allowing the backwards migration to fail with the error
> > > because it's a bit better than a hung VM.
> > >
> >
> > I considered this approach before. But it needs to add some complex
> > logic in virtio code to make sure migration works well between
> > different qemu versions. Since it's hardly to trigger the hung VM bug.
> > So I still try to use the general way (tie the feature to a machine
> > type) to fix this migration issue. It's easily to ensure the
> > compatibility
>
> OK, great.  For reference could you give me a bit more info on how
> exactly the failure happens, how you spot it and what the symptoms are;
> so that when someone comes to me with a hung VM I know what to look for?
>

Oh, sure. This failure would only happen on an old guest (e.g. centos
6u5, centos 6u3) with virtio-blk/vhost-user-blk device. This kind of
guest might be hung when migration completes at the point that guest
is booting (loading virtio-blk driver) because its first I/O would be
hung.

Thanks,
Yongji
Dr. David Alan Gilbert May 30, 2019, 9:06 a.m. UTC | #19
* Yongji Xie (elohimes@gmail.com) wrote:
> On Wed, 29 May 2019 at 22:42, Dr. David Alan Gilbert
> <dgilbert@redhat.com> wrote:
> >
> > * Yongji Xie (elohimes@gmail.com) wrote:
> > > On Wed, 29 May 2019 at 21:43, Dr. David Alan Gilbert
> > > <dgilbert@redhat.com> wrote:
> > > >
> > > > * Greg Kurz (groug@kaod.org) wrote:
> > > > > On Wed, 29 May 2019 13:38:19 +0100
> > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > >
> > > > > > * Greg Kurz (groug@kaod.org) wrote:
> > > > > > > On Wed, 29 May 2019 12:18:50 +0100
> > > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > > > >
> > > > > > > > * Greg Kurz (groug@kaod.org) wrote:
> > > > > > > > > On Tue, 28 May 2019 10:08:54 +1000
> > > > > > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > > > > > >
> > > > > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote:
> > > > > > > > > > > On Mon, 20 May 2019 19:10:35 -0400
> > > > > > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > > > > > > > > >
> > > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device
> > > > > > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > > > > > > > > > flag to indicate whether driver has started the device or not.
> > > > > > > > > > > >
> > > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > > > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > > > > > ---
> > > > > > > > > > > >  include/hw/virtio/virtio.h |  2 ++
> > > > > > > > > > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > > > > > > > > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > > > > > > > > > >
> > > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > > > > > > > > > index 7140381e3a..27c0efc3d0 100644
> > > > > > > > > > > > --- a/include/hw/virtio/virtio.h
> > > > > > > > > > > > +++ b/include/hw/virtio/virtio.h
> > > > > > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > > > > > > > > > >      uint16_t device_id;
> > > > > > > > > > > >      bool vm_running;
> > > > > > > > > > > >      bool broken; /* device in invalid state, needs reset */
> > > > > > > > > > > > +    bool started;
> > > > > > > > > > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > > > > > > > > > >      VMChangeStateEntry *vmstate;
> > > > > > > > > > > >      char *bus_name;
> > > > > > > > > > > >      uint8_t device_endian;
> > > > > > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > > > > > > > > > index 28056a7ef7..5d533ac74e 100644
> > > > > > > > > > > > --- a/hw/virtio/virtio.c
> > > > > > > > > > > > +++ b/hw/virtio/virtio.c
> > > > > > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > > > > > > > > > >              }
> > > > > > > > > > > >          }
> > > > > > > > > > > >      }
> > > > > > > > > > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > > > > > > > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > > > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > > > > > +    }
> > > > > > > > > > > > +
> > > > > > > > > > > >      if (k->set_status) {
> > > > > > > > > > > >          k->set_status(vdev, val);
> > > > > > > > > > > >      }
> > > > > > > > > > > >      vdev->status = val;
> > > > > > > > > > > > +
> > > > > > > > > > > >      return 0;
> > > > > > > > > > > >  }
> > > > > > > > > > > >
> > > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > > > > > > > > > >          k->reset(vdev);
> > > > > > > > > > > >      }
> > > > > > > > > > > >
> > > > > > > > > > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > > > > > > > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > > > > > > > > > +    vdev->started = false;
> > > > > > > > > > > >      vdev->broken = false;
> > > > > > > > > > > >      vdev->guest_features = 0;
> > > > > > > > > > > >      vdev->queue_sel = 0;
> > > > > > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > > > > > > > > > >
> > > > > > > > > > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > > > > > > > > > >  {
> > > > > > > > > > > > +    bool ret = false;
> > > > > > > > > > > > +
> > > > > > > > > > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > > > > > > > > > >          VirtIODevice *vdev = vq->vdev;
> > > > > > > > > > > >
> > > > > > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > > > > > > -        return vq->handle_aio_output(vdev, vq);
> > > > > > > > > > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > > > > > > > > > +
> > > > > > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > > > > +            vdev->started = true;
> > > > > > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > > > > > +        }
> > > > > > > > > > > >      }
> > > > > > > > > > > >
> > > > > > > > > > > > -    return false;
> > > > > > > > > > > > +    return ret;
> > > > > > > > > > > >  }
> > > > > > > > > > > >
> > > > > > > > > > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > > > > > >
> > > > > > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > > > > > > +
> > > > > > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > > > > +            vdev->started = true;
> > > > > > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > > > > > +        }
> > > > > > > > > > > >      }
> > > > > > > > > > > >  }
> > > > > > > > > > > >
> > > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > > > > > > > > > >      } else if (vq->handle_output) {
> > > > > > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > > > > > >      }
> > > > > > > > > > > > +
> > > > > > > > > > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > > > > +        vdev->started = true;
> > > > > > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > > > > > +    }
> > > > > > > > > > > >  }
> > > > > > > > > > > >
> > > > > > > > > > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > > > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > > > > > > > > > >      return vdev->broken;
> > > > > > > > > > > >  }
> > > > > > > > > > > >
> > > > > > > > > > > > +static bool virtio_started_needed(void *opaque)
> > > > > > > > > > > > +{
> > > > > > > > > > > > +    VirtIODevice *vdev = opaque;
> > > > > > > > > > > > +
> > > > > > > > > > > > +    return vdev->started;
> > > > > > > > > > >
> > > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This
> > > > > > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > > > > > > > > > probably always when it comes to live migration.
> > > > > > > > > > >
> > > > > > > > > > > My understanding is that we do try to support backward migration though. It
> > > > > > > > > > > is a regular practice in datacenters to migrate workloads without having to
> > > > > > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > > > > > > > > > many times in the past because customers had filed bugs.
> > > > > > > > > > >
> > > > > > > > > > > Cc'ing David for his opinion.
> > > > > > > > > >
> > > > > > > > > > Uh.. did you mean to CC me, or Dave Gilbert?
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable
> > > > > > > > > as well. I remember being involved in backward migration fixes for spapr
> > > > > > > > > several times.
> > > > > > > > >
> > > > > > > > > > I mean, I think you're right that we should try to maintain backwards
> > > > > > > > > > migration, but this isn't really my area of authority.
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > Cc'ing Dave Gilbert :)
> > > > > > > >
> > > > > > > > Right, I need to maintain backwards migration compatibility; tie the
> > > > > > > > feature to a machine type so it's only used on newer machine types and
> > > > > > > > then we'll be safe.
> > > > > > > >
> > > > > > > > Having said that, what's the symptom when this goes wrong?
> > > > > > > >
> > > > > > >
> > > > > > > Since the started flag is set as soon as the guest driver begins to use
> > > > > > > the device and remains so until next reset, the associated subsection is
> > > > > > > basically always emitted when migrating a booted guest. This causes
> > > > > > > migration to always fail on the target in this case:
> > > > > >
> > > > > > I meant what's the symptom without this patch series at all?
> > > > > >
> > > > >
> > > > > Oh sorry... if I got it right, migrating when the guest first kicked the
> > > > > device but not set the DRIVE_OK bit yet would result in a guest hang on
> > > > > the destination. Yongji might elaborate a bit more on that.
> > > >
> > > > Hmm - the only thing worse than a migration failing with an error is a
> > > > migration that fails with a hung guest.
> > > >
> > > > If you were sending a new subsection in *only* the case where the guest
> > > > would definitely hang, then I think it would be worth sending the
> > > > subsection and allowing the backwards migration to fail with the error
> > > > because it's a bit better than a hung VM.
> > > >
> > >
> > > I considered this approach before. But it needs to add some complex
> > > logic in virtio code to make sure migration works well between
> > > different qemu versions. Since it's hardly to trigger the hung VM bug.
> > > So I still try to use the general way (tie the feature to a machine
> > > type) to fix this migration issue. It's easily to ensure the
> > > compatibility
> >
> > OK, great.  For reference could you give me a bit more info on how
> > exactly the failure happens, how you spot it and what the symptoms are;
> > so that when someone comes to me with a hung VM I know what to look for?
> >
> 
> Oh, sure. This failure would only happen on an old guest (e.g. centos
> 6u5, centos 6u3) with virtio-blk/vhost-user-blk device. This kind of
> guest might be hung when migration completes at the point that guest
> is booting (loading virtio-blk driver) because its first I/O would be
> hung.

OK, thanks.
But a migration when the VM is running is fine? - it's only a problem
if it happens during bootup, before the driver starts?

Dave

> Thanks,
> Yongji
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Yongji Xie May 30, 2019, 9:26 a.m. UTC | #20
On Thu, 30 May 2019 at 17:06, Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
>
> * Yongji Xie (elohimes@gmail.com) wrote:
> > On Wed, 29 May 2019 at 22:42, Dr. David Alan Gilbert
> > <dgilbert@redhat.com> wrote:
> > >
> > > * Yongji Xie (elohimes@gmail.com) wrote:
> > > > On Wed, 29 May 2019 at 21:43, Dr. David Alan Gilbert
> > > > <dgilbert@redhat.com> wrote:
> > > > >
> > > > > * Greg Kurz (groug@kaod.org) wrote:
> > > > > > On Wed, 29 May 2019 13:38:19 +0100
> > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > > >
> > > > > > > * Greg Kurz (groug@kaod.org) wrote:
> > > > > > > > On Wed, 29 May 2019 12:18:50 +0100
> > > > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > > > > >
> > > > > > > > > * Greg Kurz (groug@kaod.org) wrote:
> > > > > > > > > > On Tue, 28 May 2019 10:08:54 +1000
> > > > > > > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > > > > > > >
> > > > > > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote:
> > > > > > > > > > > > On Mon, 20 May 2019 19:10:35 -0400
> > > > > > > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > > > > > > > > > >
> > > > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device
> > > > > > > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > > > > > > > > > > flag to indicate whether driver has started the device or not.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > > > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > > > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > > > > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > > > > > > ---
> > > > > > > > > > > > >  include/hw/virtio/virtio.h |  2 ++
> > > > > > > > > > > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > > > > > > > > > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > > > > > > > > > > >
> > > > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > > > > > > > > > > index 7140381e3a..27c0efc3d0 100644
> > > > > > > > > > > > > --- a/include/hw/virtio/virtio.h
> > > > > > > > > > > > > +++ b/include/hw/virtio/virtio.h
> > > > > > > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > > > > > > > > > > >      uint16_t device_id;
> > > > > > > > > > > > >      bool vm_running;
> > > > > > > > > > > > >      bool broken; /* device in invalid state, needs reset */
> > > > > > > > > > > > > +    bool started;
> > > > > > > > > > > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > > > > > > > > > > >      VMChangeStateEntry *vmstate;
> > > > > > > > > > > > >      char *bus_name;
> > > > > > > > > > > > >      uint8_t device_endian;
> > > > > > > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > > > > > > > > > > index 28056a7ef7..5d533ac74e 100644
> > > > > > > > > > > > > --- a/hw/virtio/virtio.c
> > > > > > > > > > > > > +++ b/hw/virtio/virtio.c
> > > > > > > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > > > > > > > > > > >              }
> > > > > > > > > > > > >          }
> > > > > > > > > > > > >      }
> > > > > > > > > > > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > > > > > > > > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > > > > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > > > > > > +    }
> > > > > > > > > > > > > +
> > > > > > > > > > > > >      if (k->set_status) {
> > > > > > > > > > > > >          k->set_status(vdev, val);
> > > > > > > > > > > > >      }
> > > > > > > > > > > > >      vdev->status = val;
> > > > > > > > > > > > > +
> > > > > > > > > > > > >      return 0;
> > > > > > > > > > > > >  }
> > > > > > > > > > > > >
> > > > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > > > > > > > > > > >          k->reset(vdev);
> > > > > > > > > > > > >      }
> > > > > > > > > > > > >
> > > > > > > > > > > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > > > > > > > > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > > > > > > > > > > +    vdev->started = false;
> > > > > > > > > > > > >      vdev->broken = false;
> > > > > > > > > > > > >      vdev->guest_features = 0;
> > > > > > > > > > > > >      vdev->queue_sel = 0;
> > > > > > > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > > > > > > > > > > >
> > > > > > > > > > > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > > > > > > > > > > >  {
> > > > > > > > > > > > > +    bool ret = false;
> > > > > > > > > > > > > +
> > > > > > > > > > > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > > > > > > > > > > >          VirtIODevice *vdev = vq->vdev;
> > > > > > > > > > > > >
> > > > > > > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > > > > > > > -        return vq->handle_aio_output(vdev, vq);
> > > > > > > > > > > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > > > > > +            vdev->started = true;
> > > > > > > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > > > > > > +        }
> > > > > > > > > > > > >      }
> > > > > > > > > > > > >
> > > > > > > > > > > > > -    return false;
> > > > > > > > > > > > > +    return ret;
> > > > > > > > > > > > >  }
> > > > > > > > > > > > >
> > > > > > > > > > > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > > > > > > >
> > > > > > > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > > > > > +            vdev->started = true;
> > > > > > > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > > > > > > +        }
> > > > > > > > > > > > >      }
> > > > > > > > > > > > >  }
> > > > > > > > > > > > >
> > > > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > > > > > > > > > > >      } else if (vq->handle_output) {
> > > > > > > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > > > > > > >      }
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > > > > > +        vdev->started = true;
> > > > > > > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > > > > > > +    }
> > > > > > > > > > > > >  }
> > > > > > > > > > > > >
> > > > > > > > > > > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > > > > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > > > > > > > > > > >      return vdev->broken;
> > > > > > > > > > > > >  }
> > > > > > > > > > > > >
> > > > > > > > > > > > > +static bool virtio_started_needed(void *opaque)
> > > > > > > > > > > > > +{
> > > > > > > > > > > > > +    VirtIODevice *vdev = opaque;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +    return vdev->started;
> > > > > > > > > > > >
> > > > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This
> > > > > > > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > > > > > > > > > > probably always when it comes to live migration.
> > > > > > > > > > > >
> > > > > > > > > > > > My understanding is that we do try to support backward migration though. It
> > > > > > > > > > > > is a regular practice in datacenters to migrate workloads without having to
> > > > > > > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > > > > > > > > > > many times in the past because customers had filed bugs.
> > > > > > > > > > > >
> > > > > > > > > > > > Cc'ing David for his opinion.
> > > > > > > > > > >
> > > > > > > > > > > Uh.. did you mean to CC me, or Dave Gilbert?
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable
> > > > > > > > > > as well. I remember being involved in backward migration fixes for spapr
> > > > > > > > > > several times.
> > > > > > > > > >
> > > > > > > > > > > I mean, I think you're right that we should try to maintain backwards
> > > > > > > > > > > migration, but this isn't really my area of authority.
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Cc'ing Dave Gilbert :)
> > > > > > > > >
> > > > > > > > > Right, I need to maintain backwards migration compatibility; tie the
> > > > > > > > > feature to a machine type so it's only used on newer machine types and
> > > > > > > > > then we'll be safe.
> > > > > > > > >
> > > > > > > > > Having said that, what's the symptom when this goes wrong?
> > > > > > > > >
> > > > > > > >
> > > > > > > > Since the started flag is set as soon as the guest driver begins to use
> > > > > > > > the device and remains so until next reset, the associated subsection is
> > > > > > > > basically always emitted when migrating a booted guest. This causes
> > > > > > > > migration to always fail on the target in this case:
> > > > > > >
> > > > > > > I meant what's the symptom without this patch series at all?
> > > > > > >
> > > > > >
> > > > > > Oh sorry... if I got it right, migrating when the guest first kicked the
> > > > > > device but not set the DRIVE_OK bit yet would result in a guest hang on
> > > > > > the destination. Yongji might elaborate a bit more on that.
> > > > >
> > > > > Hmm - the only thing worse than a migration failing with an error is a
> > > > > migration that fails with a hung guest.
> > > > >
> > > > > If you were sending a new subsection in *only* the case where the guest
> > > > > would definitely hang, then I think it would be worth sending the
> > > > > subsection and allowing the backwards migration to fail with the error
> > > > > because it's a bit better than a hung VM.
> > > > >
> > > >
> > > > I considered this approach before. But it needs to add some complex
> > > > logic in virtio code to make sure migration works well between
> > > > different qemu versions. Since it's hardly to trigger the hung VM bug.
> > > > So I still try to use the general way (tie the feature to a machine
> > > > type) to fix this migration issue. It's easily to ensure the
> > > > compatibility
> > >
> > > OK, great.  For reference could you give me a bit more info on how
> > > exactly the failure happens, how you spot it and what the symptoms are;
> > > so that when someone comes to me with a hung VM I know what to look for?
> > >
> >
> > Oh, sure. This failure would only happen on an old guest (e.g. centos
> > 6u5, centos 6u3) with virtio-blk/vhost-user-blk device. This kind of
> > guest might be hung when migration completes at the point that guest
> > is booting (loading virtio-blk driver) because its first I/O would be
> > hung.
>
> OK, thanks.
> But a migration when the VM is running is fine? - it's only a problem
> if it happens during bootup, before the driver starts?
>

Yes. Running VM would be fine unless we reload the driver manually at
that point...

Thanks,
Yongji
Dr. David Alan Gilbert May 30, 2019, 9:34 a.m. UTC | #21
* Yongji Xie (elohimes@gmail.com) wrote:
> On Thu, 30 May 2019 at 17:06, Dr. David Alan Gilbert
> <dgilbert@redhat.com> wrote:
> >
> > * Yongji Xie (elohimes@gmail.com) wrote:
> > > On Wed, 29 May 2019 at 22:42, Dr. David Alan Gilbert
> > > <dgilbert@redhat.com> wrote:
> > > >
> > > > * Yongji Xie (elohimes@gmail.com) wrote:
> > > > > On Wed, 29 May 2019 at 21:43, Dr. David Alan Gilbert
> > > > > <dgilbert@redhat.com> wrote:
> > > > > >
> > > > > > * Greg Kurz (groug@kaod.org) wrote:
> > > > > > > On Wed, 29 May 2019 13:38:19 +0100
> > > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > > > >
> > > > > > > > * Greg Kurz (groug@kaod.org) wrote:
> > > > > > > > > On Wed, 29 May 2019 12:18:50 +0100
> > > > > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > > * Greg Kurz (groug@kaod.org) wrote:
> > > > > > > > > > > On Tue, 28 May 2019 10:08:54 +1000
> > > > > > > > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > > > > > > > >
> > > > > > > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote:
> > > > > > > > > > > > > On Mon, 20 May 2019 19:10:35 -0400
> > > > > > > > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com>
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device
> > > > > > > > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started
> > > > > > > > > > > > > > flag to indicate whether driver has started the device or not.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com>
> > > > > > > > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> > > > > > > > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com>
> > > > > > > > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > > > > > > > > > > ---
> > > > > > > > > > > > > >  include/hw/virtio/virtio.h |  2 ++
> > > > > > > > > > > > > >  hw/virtio/virtio.c         | 52 ++++++++++++++++++++++++++++++++++++--
> > > > > > > > > > > > > >  2 files changed, 52 insertions(+), 2 deletions(-)
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > > > > > > > > > > > > > index 7140381e3a..27c0efc3d0 100644
> > > > > > > > > > > > > > --- a/include/hw/virtio/virtio.h
> > > > > > > > > > > > > > +++ b/include/hw/virtio/virtio.h
> > > > > > > > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice
> > > > > > > > > > > > > >      uint16_t device_id;
> > > > > > > > > > > > > >      bool vm_running;
> > > > > > > > > > > > > >      bool broken; /* device in invalid state, needs reset */
> > > > > > > > > > > > > > +    bool started;
> > > > > > > > > > > > > > +    bool start_on_kick; /* virtio 1.0 transitional devices support that */
> > > > > > > > > > > > > >      VMChangeStateEntry *vmstate;
> > > > > > > > > > > > > >      char *bus_name;
> > > > > > > > > > > > > >      uint8_t device_endian;
> > > > > > > > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > > > > > > > > > > > > > index 28056a7ef7..5d533ac74e 100644
> > > > > > > > > > > > > > --- a/hw/virtio/virtio.c
> > > > > > > > > > > > > > +++ b/hw/virtio/virtio.c
> > > > > > > > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
> > > > > > > > > > > > > >              }
> > > > > > > > > > > > > >          }
> > > > > > > > > > > > > >      }
> > > > > > > > > > > > > > +    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > > > > > > > > > > > > > +    if (unlikely(vdev->start_on_kick && vdev->started)) {
> > > > > > > > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > > > > > > > +    }
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > >      if (k->set_status) {
> > > > > > > > > > > > > >          k->set_status(vdev, val);
> > > > > > > > > > > > > >      }
> > > > > > > > > > > > > >      vdev->status = val;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > >      return 0;
> > > > > > > > > > > > > >  }
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque)
> > > > > > > > > > > > > >          k->reset(vdev);
> > > > > > > > > > > > > >      }
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > +    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
> > > > > > > > > > > > > > +                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
> > > > > > > > > > > > > > +    vdev->started = false;
> > > > > > > > > > > > > >      vdev->broken = false;
> > > > > > > > > > > > > >      vdev->guest_features = 0;
> > > > > > > > > > > > > >      vdev->queue_sel = 0;
> > > > > > > > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >  static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
> > > > > > > > > > > > > >  {
> > > > > > > > > > > > > > +    bool ret = false;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > >      if (vq->vring.desc && vq->handle_aio_output) {
> > > > > > > > > > > > > >          VirtIODevice *vdev = vq->vdev;
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > > > > > > > > -        return vq->handle_aio_output(vdev, vq);
> > > > > > > > > > > > > > +        ret = vq->handle_aio_output(vdev, vq);
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > > > > > > +            vdev->started = true;
> > > > > > > > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > > > > > > > +        }
> > > > > > > > > > > > > >      }
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > -    return false;
> > > > > > > > > > > > > > +    return ret;
> > > > > > > > > > > > > >  }
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >  static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> > > > > > > > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +        if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > > > > > > +            vdev->started = true;
> > > > > > > > > > > > > > +            vdev->start_on_kick = false;
> > > > > > > > > > > > > > +        }
> > > > > > > > > > > > > >      }
> > > > > > > > > > > > > >  }
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
> > > > > > > > > > > > > >      } else if (vq->handle_output) {
> > > > > > > > > > > > > >          vq->handle_output(vdev, vq);
> > > > > > > > > > > > > >      }
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +    if (unlikely(vdev->start_on_kick)) {
> > > > > > > > > > > > > > +        vdev->started = true;
> > > > > > > > > > > > > > +        vdev->start_on_kick = false;
> > > > > > > > > > > > > > +    }
> > > > > > > > > > > > > >  }
> > > > > > > > > > > > > >
> > > > > > > > > > > > > >  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
> > > > > > > > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > > > > > > > > > > > >      return vdev->broken;
> > > > > > > > > > > > > >  }
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > +static bool virtio_started_needed(void *opaque)
> > > > > > > > > > > > > > +{
> > > > > > > > > > > > > > +    VirtIODevice *vdev = opaque;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +    return vdev->started;
> > > > > > > > > > > > >
> > > > > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This
> > > > > > > > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > > > > > > > > > > > probably always when it comes to live migration.
> > > > > > > > > > > > >
> > > > > > > > > > > > > My understanding is that we do try to support backward migration though. It
> > > > > > > > > > > > > is a regular practice in datacenters to migrate workloads without having to
> > > > > > > > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > > > > > > > > > > > many times in the past because customers had filed bugs.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Cc'ing David for his opinion.
> > > > > > > > > > > >
> > > > > > > > > > > > Uh.. did you mean to CC me, or Dave Gilbert?
> > > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable
> > > > > > > > > > > as well. I remember being involved in backward migration fixes for spapr
> > > > > > > > > > > several times.
> > > > > > > > > > >
> > > > > > > > > > > > I mean, I think you're right that we should try to maintain backwards
> > > > > > > > > > > > migration, but this isn't really my area of authority.
> > > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > Cc'ing Dave Gilbert :)
> > > > > > > > > >
> > > > > > > > > > Right, I need to maintain backwards migration compatibility; tie the
> > > > > > > > > > feature to a machine type so it's only used on newer machine types and
> > > > > > > > > > then we'll be safe.
> > > > > > > > > >
> > > > > > > > > > Having said that, what's the symptom when this goes wrong?
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > Since the started flag is set as soon as the guest driver begins to use
> > > > > > > > > the device and remains so until next reset, the associated subsection is
> > > > > > > > > basically always emitted when migrating a booted guest. This causes
> > > > > > > > > migration to always fail on the target in this case:
> > > > > > > >
> > > > > > > > I meant what's the symptom without this patch series at all?
> > > > > > > >
> > > > > > >
> > > > > > > Oh sorry... if I got it right, migrating when the guest first kicked the
> > > > > > > device but not set the DRIVE_OK bit yet would result in a guest hang on
> > > > > > > the destination. Yongji might elaborate a bit more on that.
> > > > > >
> > > > > > Hmm - the only thing worse than a migration failing with an error is a
> > > > > > migration that fails with a hung guest.
> > > > > >
> > > > > > If you were sending a new subsection in *only* the case where the guest
> > > > > > would definitely hang, then I think it would be worth sending the
> > > > > > subsection and allowing the backwards migration to fail with the error
> > > > > > because it's a bit better than a hung VM.
> > > > > >
> > > > >
> > > > > I considered this approach before. But it needs to add some complex
> > > > > logic in virtio code to make sure migration works well between
> > > > > different qemu versions. Since it's hardly to trigger the hung VM bug.
> > > > > So I still try to use the general way (tie the feature to a machine
> > > > > type) to fix this migration issue. It's easily to ensure the
> > > > > compatibility
> > > >
> > > > OK, great.  For reference could you give me a bit more info on how
> > > > exactly the failure happens, how you spot it and what the symptoms are;
> > > > so that when someone comes to me with a hung VM I know what to look for?
> > > >
> > >
> > > Oh, sure. This failure would only happen on an old guest (e.g. centos
> > > 6u5, centos 6u3) with virtio-blk/vhost-user-blk device. This kind of
> > > guest might be hung when migration completes at the point that guest
> > > is booting (loading virtio-blk driver) because its first I/O would be
> > > hung.
> >
> > OK, thanks.
> > But a migration when the VM is running is fine? - it's only a problem
> > if it happens during bootup, before the driver starts?
> >
> 
> Yes. Running VM would be fine unless we reload the driver manually at
> that point...

OK, thanks.

Dave

> Thanks,
> Yongji
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Eduardo Habkost May 31, 2019, 7:36 p.m. UTC | #22
On Tue, May 28, 2019 at 10:48:09AM +0800, Yongji Xie wrote:
> On Tue, 28 May 2019 at 02:54, Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Mon, May 27, 2019 at 12:44:46PM +0200, Greg Kurz wrote:
> > > On Fri, 24 May 2019 19:56:06 +0800
> > > Yongji Xie <elohimes@gmail.com> wrote:
> > >
> > > > On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote:
> > > > >
> > > > > On Mon, 20 May 2019 19:10:35 -0400
> > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > >
> > > > > > From: Xie Yongji <xieyongji@baidu.com>
[...]
> > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > > > >      return vdev->broken;
> > > > > >  }
> > > > > >
> > > > > > +static bool virtio_started_needed(void *opaque)
> > > > > > +{
> > > > > > +    VirtIODevice *vdev = opaque;
> > > > > > +
> > > > > > +    return vdev->started;
> > > > >
> > > > > Existing machine types don't know about the "virtio/started" subsection. This
> > > > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > > > probably always when it comes to live migration.
> > > > >
> > > > > My understanding is that we do try to support backward migration though. It
> > > > > is a regular practice in datacenters to migrate workloads without having to
> > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > > > many times in the past because customers had filed bugs.
> > > > >
> > > >
> > > > If we do need to support backward migration, for this patch, what I
> > > > can think of is to only migrate the flag in the case that guest kicks
> > > > but not set DRIVER_OK. This could fix backward migration in most case.
> > >
> > > You mean something like that ?
> > >
> > > static bool virtio_started_needed(void *opaque)
> > > {
> > >     VirtIODevice *vdev = opaque;
> > >
> > >     return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
> > > }
> > >
> > > > Not sure if there is a more general approach...
> > > >
> > >
> > > Another approach would be to only implement the started flag for
> > > machine version > 4.0. This can be achieved by adding a "use-started"
> > > property to the base virtio device, true by default and set to
> > > false by hw_compat_4_0.
> >
> > I think this is best frankly.
> >
> 
> Only implement the started flag for machine version > 4.0 might not be
> good because vhost-user-blk now need to use this flag. How about only
> migrating this flag for machine version > 4.0 instead?

Was this implemented?  Is migration from QEMU 4.1 to QEMU 4.0
currently broken?
Greg Kurz June 1, 2019, 3:49 p.m. UTC | #23
On Fri, 31 May 2019 16:36:33 -0300
Eduardo Habkost <ehabkost@redhat.com> wrote:

> On Tue, May 28, 2019 at 10:48:09AM +0800, Yongji Xie wrote:
> > On Tue, 28 May 2019 at 02:54, Michael S. Tsirkin <mst@redhat.com> wrote:  
> > >
> > > On Mon, May 27, 2019 at 12:44:46PM +0200, Greg Kurz wrote:  
> > > > On Fri, 24 May 2019 19:56:06 +0800
> > > > Yongji Xie <elohimes@gmail.com> wrote:
> > > >  
> > > > > On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote:  
> > > > > >
> > > > > > On Mon, 20 May 2019 19:10:35 -0400
> > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > >  
> > > > > > > From: Xie Yongji <xieyongji@baidu.com>  
> [...]
> > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> > > > > > >      return vdev->broken;
> > > > > > >  }
> > > > > > >
> > > > > > > +static bool virtio_started_needed(void *opaque)
> > > > > > > +{
> > > > > > > +    VirtIODevice *vdev = opaque;
> > > > > > > +
> > > > > > > +    return vdev->started;  
> > > > > >
> > > > > > Existing machine types don't know about the "virtio/started" subsection. This
> > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most
> > > > > > probably always when it comes to live migration.
> > > > > >
> > > > > > My understanding is that we do try to support backward migration though. It
> > > > > > is a regular practice in datacenters to migrate workloads without having to
> > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream
> > > > > > many times in the past because customers had filed bugs.
> > > > > >  
> > > > >
> > > > > If we do need to support backward migration, for this patch, what I
> > > > > can think of is to only migrate the flag in the case that guest kicks
> > > > > but not set DRIVER_OK. This could fix backward migration in most case.  
> > > >
> > > > You mean something like that ?
> > > >
> > > > static bool virtio_started_needed(void *opaque)
> > > > {
> > > >     VirtIODevice *vdev = opaque;
> > > >
> > > >     return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
> > > > }
> > > >  
> > > > > Not sure if there is a more general approach...
> > > > >  
> > > >
> > > > Another approach would be to only implement the started flag for
> > > > machine version > 4.0. This can be achieved by adding a "use-started"
> > > > property to the base virtio device, true by default and set to
> > > > false by hw_compat_4_0.  
> > >
> > > I think this is best frankly.
> > >  
> > 
> > Only implement the started flag for machine version > 4.0 might not be
> > good because vhost-user-blk now need to use this flag. How about only
> > migrating this flag for machine version > 4.0 instead?  
> 
> Was this implemented?  Is migration from QEMU 4.1 to QEMU 4.0
> currently broken?
> 

Answer is yes for both questions.
Laurent Vivier June 24, 2019, 5:54 p.m. UTC | #24
On 01/06/2019 17:49, Greg Kurz wrote:
> On Fri, 31 May 2019 16:36:33 -0300
> Eduardo Habkost <ehabkost@redhat.com> wrote:
> 
>> On Tue, May 28, 2019 at 10:48:09AM +0800, Yongji Xie wrote:
>>> On Tue, 28 May 2019 at 02:54, Michael S. Tsirkin <mst@redhat.com> wrote:  
>>>>
>>>> On Mon, May 27, 2019 at 12:44:46PM +0200, Greg Kurz wrote:  
>>>>> On Fri, 24 May 2019 19:56:06 +0800
>>>>> Yongji Xie <elohimes@gmail.com> wrote:
>>>>>  
>>>>>> On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote:  
>>>>>>>
>>>>>>> On Mon, 20 May 2019 19:10:35 -0400
>>>>>>> "Michael S. Tsirkin" <mst@redhat.com> wrote:
>>>>>>>  
>>>>>>>> From: Xie Yongji <xieyongji@baidu.com>  
>> [...]
>>>>>>>> @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
>>>>>>>>      return vdev->broken;
>>>>>>>>  }
>>>>>>>>
>>>>>>>> +static bool virtio_started_needed(void *opaque)
>>>>>>>> +{
>>>>>>>> +    VirtIODevice *vdev = opaque;
>>>>>>>> +
>>>>>>>> +    return vdev->started;  
>>>>>>>
>>>>>>> Existing machine types don't know about the "virtio/started" subsection. This
>>>>>>> breaks migration to older QEMUs if the driver has started the device, ie. most
>>>>>>> probably always when it comes to live migration.
>>>>>>>
>>>>>>> My understanding is that we do try to support backward migration though. It
>>>>>>> is a regular practice in datacenters to migrate workloads without having to
>>>>>>> take care of the QEMU version. FWIW I had to fix similar issues downstream
>>>>>>> many times in the past because customers had filed bugs.
>>>>>>>  
>>>>>>
>>>>>> If we do need to support backward migration, for this patch, what I
>>>>>> can think of is to only migrate the flag in the case that guest kicks
>>>>>> but not set DRIVER_OK. This could fix backward migration in most case.  
>>>>>
>>>>> You mean something like that ?
>>>>>
>>>>> static bool virtio_started_needed(void *opaque)
>>>>> {
>>>>>     VirtIODevice *vdev = opaque;
>>>>>
>>>>>     return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
>>>>> }
>>>>>  
>>>>>> Not sure if there is a more general approach...
>>>>>>  
>>>>>
>>>>> Another approach would be to only implement the started flag for
>>>>> machine version > 4.0. This can be achieved by adding a "use-started"
>>>>> property to the base virtio device, true by default and set to
>>>>> false by hw_compat_4_0.  
>>>>
>>>> I think this is best frankly.
>>>>  
>>>
>>> Only implement the started flag for machine version > 4.0 might not be
>>> good because vhost-user-blk now need to use this flag. How about only
>>> migrating this flag for machine version > 4.0 instead?  
>>
>> Was this implemented?  Is migration from QEMU 4.1 to QEMU 4.0
>> currently broken?
>>
> 
> Answer is yes for both questions.
> 

Is there a fix?

The problem is really easy to reproduce: start a guest with virtio-blk
and migrate once the driver has started.

Thanks,
Laurent
Greg Kurz July 5, 2019, 1:45 p.m. UTC | #25
On Mon, 24 Jun 2019 19:54:27 +0200
Laurent Vivier <lvivier@redhat.com> wrote:

> On 01/06/2019 17:49, Greg Kurz wrote:
> > On Fri, 31 May 2019 16:36:33 -0300
> > Eduardo Habkost <ehabkost@redhat.com> wrote:
> > 
> >> On Tue, May 28, 2019 at 10:48:09AM +0800, Yongji Xie wrote:
> >>> On Tue, 28 May 2019 at 02:54, Michael S. Tsirkin <mst@redhat.com> wrote:  
> >>>>
> >>>> On Mon, May 27, 2019 at 12:44:46PM +0200, Greg Kurz wrote:  
> >>>>> On Fri, 24 May 2019 19:56:06 +0800
> >>>>> Yongji Xie <elohimes@gmail.com> wrote:
> >>>>>  
> >>>>>> On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote:  
> >>>>>>>
> >>>>>>> On Mon, 20 May 2019 19:10:35 -0400
> >>>>>>> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> >>>>>>>  
> >>>>>>>> From: Xie Yongji <xieyongji@baidu.com>  
> >> [...]
> >>>>>>>> @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque)
> >>>>>>>>      return vdev->broken;
> >>>>>>>>  }
> >>>>>>>>
> >>>>>>>> +static bool virtio_started_needed(void *opaque)
> >>>>>>>> +{
> >>>>>>>> +    VirtIODevice *vdev = opaque;
> >>>>>>>> +
> >>>>>>>> +    return vdev->started;  
> >>>>>>>
> >>>>>>> Existing machine types don't know about the "virtio/started" subsection. This
> >>>>>>> breaks migration to older QEMUs if the driver has started the device, ie. most
> >>>>>>> probably always when it comes to live migration.
> >>>>>>>
> >>>>>>> My understanding is that we do try to support backward migration though. It
> >>>>>>> is a regular practice in datacenters to migrate workloads without having to
> >>>>>>> take care of the QEMU version. FWIW I had to fix similar issues downstream
> >>>>>>> many times in the past because customers had filed bugs.
> >>>>>>>  
> >>>>>>
> >>>>>> If we do need to support backward migration, for this patch, what I
> >>>>>> can think of is to only migrate the flag in the case that guest kicks
> >>>>>> but not set DRIVER_OK. This could fix backward migration in most case.  
> >>>>>
> >>>>> You mean something like that ?
> >>>>>
> >>>>> static bool virtio_started_needed(void *opaque)
> >>>>> {
> >>>>>     VirtIODevice *vdev = opaque;
> >>>>>
> >>>>>     return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
> >>>>> }
> >>>>>  
> >>>>>> Not sure if there is a more general approach...
> >>>>>>  
> >>>>>
> >>>>> Another approach would be to only implement the started flag for
> >>>>> machine version > 4.0. This can be achieved by adding a "use-started"
> >>>>> property to the base virtio device, true by default and set to
> >>>>> false by hw_compat_4_0.  
> >>>>
> >>>> I think this is best frankly.
> >>>>  
> >>>
> >>> Only implement the started flag for machine version > 4.0 might not be
> >>> good because vhost-user-blk now need to use this flag. How about only
> >>> migrating this flag for machine version > 4.0 instead?  
> >>
> >> Was this implemented?  Is migration from QEMU 4.1 to QEMU 4.0
> >> currently broken?
> >>
> > 
> > Answer is yes for both questions.
> > 
> 
> Is there a fix?
> 

The fix was merged with this PR:

https://lists.gnu.org/archive/html/qemu-devel/2019-07/msg01565.html

Cheers,

--
Greg

> The problem is really easy to reproduce: start a guest with virtio-blk
> and migrate once the driver has started.
> 
> Thanks,
> Laurent
diff mbox series

Patch

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 2626a895cb..af7b59b4ae 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1162,10 +1162,16 @@  int virtio_set_status(VirtIODevice *vdev, uint8_t val)
             }
         }
     }
+    vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
+    if (unlikely(vdev->start_on_kick && vdev->started)) {
+        vdev->start_on_kick = false;
+    }
+
     if (k->set_status) {
         k->set_status(vdev, val);
     }
     vdev->status = val;
+
     return 0;
 }
 
@@ -1208,6 +1214,9 @@  void virtio_reset(void *opaque)
         k->reset(vdev);
     }
 
+    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
+                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
+    vdev->started = false;
     vdev->broken = false;
     vdev->guest_features = 0;
     vdev->queue_sel = 0;
@@ -1518,14 +1527,21 @@  void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
 
 static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
 {
+    bool ret = false;
+
     if (vq->vring.desc && vq->handle_aio_output) {
         VirtIODevice *vdev = vq->vdev;
 
         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
-        return vq->handle_aio_output(vdev, vq);
+        ret = vq->handle_aio_output(vdev, vq);
+
+        if (unlikely(vdev->start_on_kick)) {
+            vdev->started = true;
+            vdev->start_on_kick = false;
+        }
     }
 
-    return false;
+    return ret;
 }
 
 static void virtio_queue_notify_vq(VirtQueue *vq)
@@ -1539,6 +1555,11 @@  static void virtio_queue_notify_vq(VirtQueue *vq)
 
         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
         vq->handle_output(vdev, vq);
+
+        if (unlikely(vdev->start_on_kick)) {
+            vdev->started = true;
+            vdev->start_on_kick = false;
+        }
     }
 }
 
@@ -1556,6 +1577,11 @@  void virtio_queue_notify(VirtIODevice *vdev, int n)
     } else if (vq->handle_output) {
         vq->handle_output(vdev, vq);
     }
+
+    if (unlikely(vdev->start_on_kick)) {
+        vdev->started = true;
+        vdev->start_on_kick = false;
+    }
 }
 
 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
@@ -1770,6 +1796,13 @@  static bool virtio_broken_needed(void *opaque)
     return vdev->broken;
 }
 
+static bool virtio_started_needed(void *opaque)
+{
+    VirtIODevice *vdev = opaque;
+
+    return vdev->started;
+}
+
 static const VMStateDescription vmstate_virtqueue = {
     .name = "virtqueue_state",
     .version_id = 1,
@@ -1898,6 +1931,17 @@  static const VMStateDescription vmstate_virtio_broken = {
     }
 };
 
+static const VMStateDescription vmstate_virtio_started = {
+    .name = "virtio/started",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = &virtio_started_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(started, VirtIODevice),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_virtio = {
     .name = "virtio",
     .version_id = 1,
@@ -1913,6 +1957,7 @@  static const VMStateDescription vmstate_virtio = {
         &vmstate_virtio_ringsize,
         &vmstate_virtio_broken,
         &vmstate_virtio_extra_state,
+        &vmstate_virtio_started,
         NULL
     }
 };
@@ -2286,6 +2331,9 @@  void virtio_init(VirtIODevice *vdev, const char *name,
             g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
     }
 
+    vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) &&
+                          !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1));
+    vdev->started = false;
     vdev->device_id = device_id;
     vdev->status = 0;
     atomic_set(&vdev->isr, 0);
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index ce9516236a..fea08bcc44 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -105,6 +105,8 @@  struct VirtIODevice
     uint16_t device_id;
     bool vm_running;
     bool broken; /* device in invalid state, needs reset */
+    bool started;
+    bool start_on_kick; /* virtio 1.0 transitional devices support that */
     VMChangeStateEntry *vmstate;
     char *bus_name;
     uint8_t device_endian;