Message ID | 20190320112646.3712-2-xieyongji@baidu.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vhost-user-blk: Add support for backend reconnecting | expand |
On Mon, 20 May 2019 19:10:35 -0400 "Michael S. Tsirkin" <mst@redhat.com> wrote: > From: Xie Yongji <xieyongji@baidu.com> > > The virtio 1.0 transitional devices support driver uses the device > before setting the DRIVER_OK status bit. So we introduce a started > flag to indicate whether driver has started the device or not. > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > --- > include/hw/virtio/virtio.h | 2 ++ > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > 2 files changed, 52 insertions(+), 2 deletions(-) > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > index 7140381e3a..27c0efc3d0 100644 > --- a/include/hw/virtio/virtio.h > +++ b/include/hw/virtio/virtio.h > @@ -105,6 +105,8 @@ struct VirtIODevice > uint16_t device_id; > bool vm_running; > bool broken; /* device in invalid state, needs reset */ > + bool started; > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > VMChangeStateEntry *vmstate; > char *bus_name; > uint8_t device_endian; > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > index 28056a7ef7..5d533ac74e 100644 > --- a/hw/virtio/virtio.c > +++ b/hw/virtio/virtio.c > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > } > } > } > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > + if (unlikely(vdev->start_on_kick && vdev->started)) { > + vdev->start_on_kick = false; > + } > + > if (k->set_status) { > k->set_status(vdev, val); > } > vdev->status = val; > + > return 0; > } > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > k->reset(vdev); > } > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > + vdev->started = false; > vdev->broken = false; > vdev->guest_features = 0; > vdev->queue_sel = 0; > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > { > + bool ret = false; > + > if (vq->vring.desc && vq->handle_aio_output) { > VirtIODevice *vdev = vq->vdev; > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > - return vq->handle_aio_output(vdev, vq); > + ret = vq->handle_aio_output(vdev, vq); > + > + if (unlikely(vdev->start_on_kick)) { > + vdev->started = true; > + vdev->start_on_kick = false; > + } > } > > - return false; > + return ret; > } > > static void virtio_queue_notify_vq(VirtQueue *vq) > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > vq->handle_output(vdev, vq); > + > + if (unlikely(vdev->start_on_kick)) { > + vdev->started = true; > + vdev->start_on_kick = false; > + } > } > } > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > } else if (vq->handle_output) { > vq->handle_output(vdev, vq); > } > + > + if (unlikely(vdev->start_on_kick)) { > + vdev->started = true; > + vdev->start_on_kick = false; > + } > } > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > return vdev->broken; > } > > +static bool virtio_started_needed(void *opaque) > +{ > + VirtIODevice *vdev = opaque; > + > + return vdev->started; Existing machine types don't know about the "virtio/started" subsection. This breaks migration to older QEMUs if the driver has started the device, ie. most probably always when it comes to live migration. My understanding is that we do try to support backward migration though. It is a regular practice in datacenters to migrate workloads without having to take care of the QEMU version. FWIW I had to fix similar issues downstream many times in the past because customers had filed bugs. Cc'ing David for his opinion. > +} > + > static const VMStateDescription vmstate_virtqueue = { > .name = "virtqueue_state", > .version_id = 1, > @@ -1898,6 +1931,17 @@ static const VMStateDescription vmstate_virtio_broken = { > } > }; > > +static const VMStateDescription vmstate_virtio_started = { > + .name = "virtio/started", > + .version_id = 1, > + .minimum_version_id = 1, > + .needed = &virtio_started_needed, > + .fields = (VMStateField[]) { > + VMSTATE_BOOL(started, VirtIODevice), > + VMSTATE_END_OF_LIST() > + } > +}; > + > static const VMStateDescription vmstate_virtio = { > .name = "virtio", > .version_id = 1, > @@ -1913,6 +1957,7 @@ static const VMStateDescription vmstate_virtio = { > &vmstate_virtio_ringsize, > &vmstate_virtio_broken, > &vmstate_virtio_extra_state, > + &vmstate_virtio_started, > NULL > } > }; > @@ -2286,6 +2331,9 @@ void virtio_init(VirtIODevice *vdev, const char *name, > g_malloc0(sizeof(*vdev->vector_queues) * nvectors); > } > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > + vdev->started = false; > vdev->device_id = device_id; > vdev->status = 0; > atomic_set(&vdev->isr, 0);
On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote: > > On Mon, 20 May 2019 19:10:35 -0400 > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > From: Xie Yongji <xieyongji@baidu.com> > > > > The virtio 1.0 transitional devices support driver uses the device > > before setting the DRIVER_OK status bit. So we introduce a started > > flag to indicate whether driver has started the device or not. > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > --- > > include/hw/virtio/virtio.h | 2 ++ > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > index 7140381e3a..27c0efc3d0 100644 > > --- a/include/hw/virtio/virtio.h > > +++ b/include/hw/virtio/virtio.h > > @@ -105,6 +105,8 @@ struct VirtIODevice > > uint16_t device_id; > > bool vm_running; > > bool broken; /* device in invalid state, needs reset */ > > + bool started; > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > VMChangeStateEntry *vmstate; > > char *bus_name; > > uint8_t device_endian; > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > index 28056a7ef7..5d533ac74e 100644 > > --- a/hw/virtio/virtio.c > > +++ b/hw/virtio/virtio.c > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > } > > } > > } > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > + vdev->start_on_kick = false; > > + } > > + > > if (k->set_status) { > > k->set_status(vdev, val); > > } > > vdev->status = val; > > + > > return 0; > > } > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > k->reset(vdev); > > } > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > + vdev->started = false; > > vdev->broken = false; > > vdev->guest_features = 0; > > vdev->queue_sel = 0; > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > { > > + bool ret = false; > > + > > if (vq->vring.desc && vq->handle_aio_output) { > > VirtIODevice *vdev = vq->vdev; > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > - return vq->handle_aio_output(vdev, vq); > > + ret = vq->handle_aio_output(vdev, vq); > > + > > + if (unlikely(vdev->start_on_kick)) { > > + vdev->started = true; > > + vdev->start_on_kick = false; > > + } > > } > > > > - return false; > > + return ret; > > } > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > vq->handle_output(vdev, vq); > > + > > + if (unlikely(vdev->start_on_kick)) { > > + vdev->started = true; > > + vdev->start_on_kick = false; > > + } > > } > > } > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > } else if (vq->handle_output) { > > vq->handle_output(vdev, vq); > > } > > + > > + if (unlikely(vdev->start_on_kick)) { > > + vdev->started = true; > > + vdev->start_on_kick = false; > > + } > > } > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > return vdev->broken; > > } > > > > +static bool virtio_started_needed(void *opaque) > > +{ > > + VirtIODevice *vdev = opaque; > > + > > + return vdev->started; > > Existing machine types don't know about the "virtio/started" subsection. This > breaks migration to older QEMUs if the driver has started the device, ie. most > probably always when it comes to live migration. > > My understanding is that we do try to support backward migration though. It > is a regular practice in datacenters to migrate workloads without having to > take care of the QEMU version. FWIW I had to fix similar issues downstream > many times in the past because customers had filed bugs. > If we do need to support backward migration, for this patch, what I can think of is to only migrate the flag in the case that guest kicks but not set DRIVER_OK. This could fix backward migration in most case. Not sure if there is a more general approach... Thanks, Yongji
On Fri, 24 May 2019 19:56:06 +0800 Yongji Xie <elohimes@gmail.com> wrote: > On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote: > > > > On Mon, 20 May 2019 19:10:35 -0400 > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > before setting the DRIVER_OK status bit. So we introduce a started > > > flag to indicate whether driver has started the device or not. > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > --- > > > include/hw/virtio/virtio.h | 2 ++ > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > index 7140381e3a..27c0efc3d0 100644 > > > --- a/include/hw/virtio/virtio.h > > > +++ b/include/hw/virtio/virtio.h > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > uint16_t device_id; > > > bool vm_running; > > > bool broken; /* device in invalid state, needs reset */ > > > + bool started; > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > VMChangeStateEntry *vmstate; > > > char *bus_name; > > > uint8_t device_endian; > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > index 28056a7ef7..5d533ac74e 100644 > > > --- a/hw/virtio/virtio.c > > > +++ b/hw/virtio/virtio.c > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > } > > > } > > > } > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > + vdev->start_on_kick = false; > > > + } > > > + > > > if (k->set_status) { > > > k->set_status(vdev, val); > > > } > > > vdev->status = val; > > > + > > > return 0; > > > } > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > k->reset(vdev); > > > } > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > + vdev->started = false; > > > vdev->broken = false; > > > vdev->guest_features = 0; > > > vdev->queue_sel = 0; > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > { > > > + bool ret = false; > > > + > > > if (vq->vring.desc && vq->handle_aio_output) { > > > VirtIODevice *vdev = vq->vdev; > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > - return vq->handle_aio_output(vdev, vq); > > > + ret = vq->handle_aio_output(vdev, vq); > > > + > > > + if (unlikely(vdev->start_on_kick)) { > > > + vdev->started = true; > > > + vdev->start_on_kick = false; > > > + } > > > } > > > > > > - return false; > > > + return ret; > > > } > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > vq->handle_output(vdev, vq); > > > + > > > + if (unlikely(vdev->start_on_kick)) { > > > + vdev->started = true; > > > + vdev->start_on_kick = false; > > > + } > > > } > > > } > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > } else if (vq->handle_output) { > > > vq->handle_output(vdev, vq); > > > } > > > + > > > + if (unlikely(vdev->start_on_kick)) { > > > + vdev->started = true; > > > + vdev->start_on_kick = false; > > > + } > > > } > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > return vdev->broken; > > > } > > > > > > +static bool virtio_started_needed(void *opaque) > > > +{ > > > + VirtIODevice *vdev = opaque; > > > + > > > + return vdev->started; > > > > Existing machine types don't know about the "virtio/started" subsection. This > > breaks migration to older QEMUs if the driver has started the device, ie. most > > probably always when it comes to live migration. > > > > My understanding is that we do try to support backward migration though. It > > is a regular practice in datacenters to migrate workloads without having to > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > many times in the past because customers had filed bugs. > > > > If we do need to support backward migration, for this patch, what I > can think of is to only migrate the flag in the case that guest kicks > but not set DRIVER_OK. This could fix backward migration in most case. You mean something like that ? static bool virtio_started_needed(void *opaque) { VirtIODevice *vdev = opaque; return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); } > Not sure if there is a more general approach... > Another approach would be to only implement the started flag for machine version > 4.0. This can be achieved by adding a "use-started" property to the base virtio device, true by default and set to false by hw_compat_4_0. > Thanks, > Yongji
On Mon, 27 May 2019 at 18:44, Greg Kurz <groug@kaod.org> wrote: > > On Fri, 24 May 2019 19:56:06 +0800 > Yongji Xie <elohimes@gmail.com> wrote: > > > On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote: > > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > flag to indicate whether driver has started the device or not. > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > --- > > > > include/hw/virtio/virtio.h | 2 ++ > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > index 7140381e3a..27c0efc3d0 100644 > > > > --- a/include/hw/virtio/virtio.h > > > > +++ b/include/hw/virtio/virtio.h > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > uint16_t device_id; > > > > bool vm_running; > > > > bool broken; /* device in invalid state, needs reset */ > > > > + bool started; > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > VMChangeStateEntry *vmstate; > > > > char *bus_name; > > > > uint8_t device_endian; > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > index 28056a7ef7..5d533ac74e 100644 > > > > --- a/hw/virtio/virtio.c > > > > +++ b/hw/virtio/virtio.c > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > } > > > > } > > > > } > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > + vdev->start_on_kick = false; > > > > + } > > > > + > > > > if (k->set_status) { > > > > k->set_status(vdev, val); > > > > } > > > > vdev->status = val; > > > > + > > > > return 0; > > > > } > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > k->reset(vdev); > > > > } > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > + vdev->started = false; > > > > vdev->broken = false; > > > > vdev->guest_features = 0; > > > > vdev->queue_sel = 0; > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > { > > > > + bool ret = false; > > > > + > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > - return vq->handle_aio_output(vdev, vq); > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > + > > > > + if (unlikely(vdev->start_on_kick)) { > > > > + vdev->started = true; > > > > + vdev->start_on_kick = false; > > > > + } > > > > } > > > > > > > > - return false; > > > > + return ret; > > > > } > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > vq->handle_output(vdev, vq); > > > > + > > > > + if (unlikely(vdev->start_on_kick)) { > > > > + vdev->started = true; > > > > + vdev->start_on_kick = false; > > > > + } > > > > } > > > > } > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > } else if (vq->handle_output) { > > > > vq->handle_output(vdev, vq); > > > > } > > > > + > > > > + if (unlikely(vdev->start_on_kick)) { > > > > + vdev->started = true; > > > > + vdev->start_on_kick = false; > > > > + } > > > > } > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > return vdev->broken; > > > > } > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > +{ > > > > + VirtIODevice *vdev = opaque; > > > > + > > > > + return vdev->started; > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > probably always when it comes to live migration. > > > > > > My understanding is that we do try to support backward migration though. It > > > is a regular practice in datacenters to migrate workloads without having to > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > many times in the past because customers had filed bugs. > > > > > > > If we do need to support backward migration, for this patch, what I > > can think of is to only migrate the flag in the case that guest kicks > > but not set DRIVER_OK. This could fix backward migration in most case. > > You mean something like that ? > > static bool virtio_started_needed(void *opaque) > { > VirtIODevice *vdev = opaque; > > return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); > } > Yes. > > Not sure if there is a more general approach... > > > > Another approach would be to only implement the started flag for > machine version > 4.0. This can be achieved by adding a "use-started" > property to the base virtio device, true by default and set to > false by hw_compat_4_0. > If so, the problem that I tried to fix in this patch would still be exist in old machine type, right? Now I think the first approach may be more reasonable. Blocking a migration that will cause guest hang instead of allowing that. Thanks, Yongji
On Mon, 27 May 2019 21:04:38 +0800 Yongji Xie <elohimes@gmail.com> wrote: > On Mon, 27 May 2019 at 18:44, Greg Kurz <groug@kaod.org> wrote: > > > > On Fri, 24 May 2019 19:56:06 +0800 > > Yongji Xie <elohimes@gmail.com> wrote: > > > > > On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote: > > > > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > > flag to indicate whether driver has started the device or not. > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > --- > > > > > include/hw/virtio/virtio.h | 2 ++ > > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > > index 7140381e3a..27c0efc3d0 100644 > > > > > --- a/include/hw/virtio/virtio.h > > > > > +++ b/include/hw/virtio/virtio.h > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > > uint16_t device_id; > > > > > bool vm_running; > > > > > bool broken; /* device in invalid state, needs reset */ > > > > > + bool started; > > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > > VMChangeStateEntry *vmstate; > > > > > char *bus_name; > > > > > uint8_t device_endian; > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > > index 28056a7ef7..5d533ac74e 100644 > > > > > --- a/hw/virtio/virtio.c > > > > > +++ b/hw/virtio/virtio.c > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > > } > > > > > } > > > > > } > > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > > + vdev->start_on_kick = false; > > > > > + } > > > > > + > > > > > if (k->set_status) { > > > > > k->set_status(vdev, val); > > > > > } > > > > > vdev->status = val; > > > > > + > > > > > return 0; > > > > > } > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > > k->reset(vdev); > > > > > } > > > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > > + vdev->started = false; > > > > > vdev->broken = false; > > > > > vdev->guest_features = 0; > > > > > vdev->queue_sel = 0; > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > > { > > > > > + bool ret = false; > > > > > + > > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > - return vq->handle_aio_output(vdev, vq); > > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > > + > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > + vdev->started = true; > > > > > + vdev->start_on_kick = false; > > > > > + } > > > > > } > > > > > > > > > > - return false; > > > > > + return ret; > > > > > } > > > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > vq->handle_output(vdev, vq); > > > > > + > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > + vdev->started = true; > > > > > + vdev->start_on_kick = false; > > > > > + } > > > > > } > > > > > } > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > > } else if (vq->handle_output) { > > > > > vq->handle_output(vdev, vq); > > > > > } > > > > > + > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > + vdev->started = true; > > > > > + vdev->start_on_kick = false; > > > > > + } > > > > > } > > > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > > return vdev->broken; > > > > > } > > > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > > +{ > > > > > + VirtIODevice *vdev = opaque; > > > > > + > > > > > + return vdev->started; > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > > probably always when it comes to live migration. > > > > > > > > My understanding is that we do try to support backward migration though. It > > > > is a regular practice in datacenters to migrate workloads without having to > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > > many times in the past because customers had filed bugs. > > > > > > > > > > If we do need to support backward migration, for this patch, what I > > > can think of is to only migrate the flag in the case that guest kicks > > > but not set DRIVER_OK. This could fix backward migration in most case. > > > > You mean something like that ? > > > > static bool virtio_started_needed(void *opaque) > > { > > VirtIODevice *vdev = opaque; > > > > return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); > > } > > > > Yes. > > > > Not sure if there is a more general approach... > > > > > > > Another approach would be to only implement the started flag for > > machine version > 4.0. This can be achieved by adding a "use-started" > > property to the base virtio device, true by default and set to > > false by hw_compat_4_0. > > > > If so, the problem that I tried to fix in this patch would still be > exist in old machine type, right? > Yes. > Now I think the first approach may be more reasonable. Blocking a > migration that will cause guest hang instead of allowing that. > Fair enough but it may be difficult for the end user to understand why the migration failed in the first place... What about detecting the hang condition (kick before DRIVER_OK) in the source, add a migration blocker with an explicit message and remove the blocker once DRIVER_OK is set ? > Thanks, > Yongji
On Mon, May 27, 2019 at 12:44:46PM +0200, Greg Kurz wrote: > On Fri, 24 May 2019 19:56:06 +0800 > Yongji Xie <elohimes@gmail.com> wrote: > > > On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote: > > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > flag to indicate whether driver has started the device or not. > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > --- > > > > include/hw/virtio/virtio.h | 2 ++ > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > index 7140381e3a..27c0efc3d0 100644 > > > > --- a/include/hw/virtio/virtio.h > > > > +++ b/include/hw/virtio/virtio.h > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > uint16_t device_id; > > > > bool vm_running; > > > > bool broken; /* device in invalid state, needs reset */ > > > > + bool started; > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > VMChangeStateEntry *vmstate; > > > > char *bus_name; > > > > uint8_t device_endian; > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > index 28056a7ef7..5d533ac74e 100644 > > > > --- a/hw/virtio/virtio.c > > > > +++ b/hw/virtio/virtio.c > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > } > > > > } > > > > } > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > + vdev->start_on_kick = false; > > > > + } > > > > + > > > > if (k->set_status) { > > > > k->set_status(vdev, val); > > > > } > > > > vdev->status = val; > > > > + > > > > return 0; > > > > } > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > k->reset(vdev); > > > > } > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > + vdev->started = false; > > > > vdev->broken = false; > > > > vdev->guest_features = 0; > > > > vdev->queue_sel = 0; > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > { > > > > + bool ret = false; > > > > + > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > - return vq->handle_aio_output(vdev, vq); > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > + > > > > + if (unlikely(vdev->start_on_kick)) { > > > > + vdev->started = true; > > > > + vdev->start_on_kick = false; > > > > + } > > > > } > > > > > > > > - return false; > > > > + return ret; > > > > } > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > vq->handle_output(vdev, vq); > > > > + > > > > + if (unlikely(vdev->start_on_kick)) { > > > > + vdev->started = true; > > > > + vdev->start_on_kick = false; > > > > + } > > > > } > > > > } > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > } else if (vq->handle_output) { > > > > vq->handle_output(vdev, vq); > > > > } > > > > + > > > > + if (unlikely(vdev->start_on_kick)) { > > > > + vdev->started = true; > > > > + vdev->start_on_kick = false; > > > > + } > > > > } > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > return vdev->broken; > > > > } > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > +{ > > > > + VirtIODevice *vdev = opaque; > > > > + > > > > + return vdev->started; > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > probably always when it comes to live migration. > > > > > > My understanding is that we do try to support backward migration though. It > > > is a regular practice in datacenters to migrate workloads without having to > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > many times in the past because customers had filed bugs. > > > > > > > If we do need to support backward migration, for this patch, what I > > can think of is to only migrate the flag in the case that guest kicks > > but not set DRIVER_OK. This could fix backward migration in most case. > > You mean something like that ? > > static bool virtio_started_needed(void *opaque) > { > VirtIODevice *vdev = opaque; > > return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); > } > > > Not sure if there is a more general approach... > > > > Another approach would be to only implement the started flag for > machine version > 4.0. This can be achieved by adding a "use-started" > property to the base virtio device, true by default and set to > false by hw_compat_4_0. I think this is best frankly. > > Thanks, > > Yongji
On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote: > On Mon, 20 May 2019 19:10:35 -0400 > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > From: Xie Yongji <xieyongji@baidu.com> > > > > The virtio 1.0 transitional devices support driver uses the device > > before setting the DRIVER_OK status bit. So we introduce a started > > flag to indicate whether driver has started the device or not. > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > --- > > include/hw/virtio/virtio.h | 2 ++ > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > index 7140381e3a..27c0efc3d0 100644 > > --- a/include/hw/virtio/virtio.h > > +++ b/include/hw/virtio/virtio.h > > @@ -105,6 +105,8 @@ struct VirtIODevice > > uint16_t device_id; > > bool vm_running; > > bool broken; /* device in invalid state, needs reset */ > > + bool started; > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > VMChangeStateEntry *vmstate; > > char *bus_name; > > uint8_t device_endian; > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > index 28056a7ef7..5d533ac74e 100644 > > --- a/hw/virtio/virtio.c > > +++ b/hw/virtio/virtio.c > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > } > > } > > } > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > + vdev->start_on_kick = false; > > + } > > + > > if (k->set_status) { > > k->set_status(vdev, val); > > } > > vdev->status = val; > > + > > return 0; > > } > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > k->reset(vdev); > > } > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > + vdev->started = false; > > vdev->broken = false; > > vdev->guest_features = 0; > > vdev->queue_sel = 0; > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > { > > + bool ret = false; > > + > > if (vq->vring.desc && vq->handle_aio_output) { > > VirtIODevice *vdev = vq->vdev; > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > - return vq->handle_aio_output(vdev, vq); > > + ret = vq->handle_aio_output(vdev, vq); > > + > > + if (unlikely(vdev->start_on_kick)) { > > + vdev->started = true; > > + vdev->start_on_kick = false; > > + } > > } > > > > - return false; > > + return ret; > > } > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > vq->handle_output(vdev, vq); > > + > > + if (unlikely(vdev->start_on_kick)) { > > + vdev->started = true; > > + vdev->start_on_kick = false; > > + } > > } > > } > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > } else if (vq->handle_output) { > > vq->handle_output(vdev, vq); > > } > > + > > + if (unlikely(vdev->start_on_kick)) { > > + vdev->started = true; > > + vdev->start_on_kick = false; > > + } > > } > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > return vdev->broken; > > } > > > > +static bool virtio_started_needed(void *opaque) > > +{ > > + VirtIODevice *vdev = opaque; > > + > > + return vdev->started; > > Existing machine types don't know about the "virtio/started" subsection. This > breaks migration to older QEMUs if the driver has started the device, ie. most > probably always when it comes to live migration. > > My understanding is that we do try to support backward migration though. It > is a regular practice in datacenters to migrate workloads without having to > take care of the QEMU version. FWIW I had to fix similar issues downstream > many times in the past because customers had filed bugs. > > Cc'ing David for his opinion. Uh.. did you mean to CC me, or Dave Gilbert? I mean, I think you're right that we should try to maintain backwards migration, but this isn't really my area of authority.
On Tue, 28 May 2019 at 02:54, Michael S. Tsirkin <mst@redhat.com> wrote: > > On Mon, May 27, 2019 at 12:44:46PM +0200, Greg Kurz wrote: > > On Fri, 24 May 2019 19:56:06 +0800 > > Yongji Xie <elohimes@gmail.com> wrote: > > > > > On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote: > > > > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > > flag to indicate whether driver has started the device or not. > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > --- > > > > > include/hw/virtio/virtio.h | 2 ++ > > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > > index 7140381e3a..27c0efc3d0 100644 > > > > > --- a/include/hw/virtio/virtio.h > > > > > +++ b/include/hw/virtio/virtio.h > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > > uint16_t device_id; > > > > > bool vm_running; > > > > > bool broken; /* device in invalid state, needs reset */ > > > > > + bool started; > > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > > VMChangeStateEntry *vmstate; > > > > > char *bus_name; > > > > > uint8_t device_endian; > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > > index 28056a7ef7..5d533ac74e 100644 > > > > > --- a/hw/virtio/virtio.c > > > > > +++ b/hw/virtio/virtio.c > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > > } > > > > > } > > > > > } > > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > > + vdev->start_on_kick = false; > > > > > + } > > > > > + > > > > > if (k->set_status) { > > > > > k->set_status(vdev, val); > > > > > } > > > > > vdev->status = val; > > > > > + > > > > > return 0; > > > > > } > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > > k->reset(vdev); > > > > > } > > > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > > + vdev->started = false; > > > > > vdev->broken = false; > > > > > vdev->guest_features = 0; > > > > > vdev->queue_sel = 0; > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > > { > > > > > + bool ret = false; > > > > > + > > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > - return vq->handle_aio_output(vdev, vq); > > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > > + > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > + vdev->started = true; > > > > > + vdev->start_on_kick = false; > > > > > + } > > > > > } > > > > > > > > > > - return false; > > > > > + return ret; > > > > > } > > > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > vq->handle_output(vdev, vq); > > > > > + > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > + vdev->started = true; > > > > > + vdev->start_on_kick = false; > > > > > + } > > > > > } > > > > > } > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > > } else if (vq->handle_output) { > > > > > vq->handle_output(vdev, vq); > > > > > } > > > > > + > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > + vdev->started = true; > > > > > + vdev->start_on_kick = false; > > > > > + } > > > > > } > > > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > > return vdev->broken; > > > > > } > > > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > > +{ > > > > > + VirtIODevice *vdev = opaque; > > > > > + > > > > > + return vdev->started; > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > > probably always when it comes to live migration. > > > > > > > > My understanding is that we do try to support backward migration though. It > > > > is a regular practice in datacenters to migrate workloads without having to > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > > many times in the past because customers had filed bugs. > > > > > > > > > > If we do need to support backward migration, for this patch, what I > > > can think of is to only migrate the flag in the case that guest kicks > > > but not set DRIVER_OK. This could fix backward migration in most case. > > > > You mean something like that ? > > > > static bool virtio_started_needed(void *opaque) > > { > > VirtIODevice *vdev = opaque; > > > > return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); > > } > > > > > Not sure if there is a more general approach... > > > > > > > Another approach would be to only implement the started flag for > > machine version > 4.0. This can be achieved by adding a "use-started" > > property to the base virtio device, true by default and set to > > false by hw_compat_4_0. > > I think this is best frankly. > Only implement the started flag for machine version > 4.0 might not be good because vhost-user-blk now need to use this flag. How about only migrating this flag for machine version > 4.0 instead? Thanks, Yongji
On Tue, 28 May 2019 10:08:54 +1000 David Gibson <david@gibson.dropbear.id.au> wrote: > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote: > > On Mon, 20 May 2019 19:10:35 -0400 > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > before setting the DRIVER_OK status bit. So we introduce a started > > > flag to indicate whether driver has started the device or not. > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > --- > > > include/hw/virtio/virtio.h | 2 ++ > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > index 7140381e3a..27c0efc3d0 100644 > > > --- a/include/hw/virtio/virtio.h > > > +++ b/include/hw/virtio/virtio.h > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > uint16_t device_id; > > > bool vm_running; > > > bool broken; /* device in invalid state, needs reset */ > > > + bool started; > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > VMChangeStateEntry *vmstate; > > > char *bus_name; > > > uint8_t device_endian; > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > index 28056a7ef7..5d533ac74e 100644 > > > --- a/hw/virtio/virtio.c > > > +++ b/hw/virtio/virtio.c > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > } > > > } > > > } > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > + vdev->start_on_kick = false; > > > + } > > > + > > > if (k->set_status) { > > > k->set_status(vdev, val); > > > } > > > vdev->status = val; > > > + > > > return 0; > > > } > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > k->reset(vdev); > > > } > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > + vdev->started = false; > > > vdev->broken = false; > > > vdev->guest_features = 0; > > > vdev->queue_sel = 0; > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > { > > > + bool ret = false; > > > + > > > if (vq->vring.desc && vq->handle_aio_output) { > > > VirtIODevice *vdev = vq->vdev; > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > - return vq->handle_aio_output(vdev, vq); > > > + ret = vq->handle_aio_output(vdev, vq); > > > + > > > + if (unlikely(vdev->start_on_kick)) { > > > + vdev->started = true; > > > + vdev->start_on_kick = false; > > > + } > > > } > > > > > > - return false; > > > + return ret; > > > } > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > vq->handle_output(vdev, vq); > > > + > > > + if (unlikely(vdev->start_on_kick)) { > > > + vdev->started = true; > > > + vdev->start_on_kick = false; > > > + } > > > } > > > } > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > } else if (vq->handle_output) { > > > vq->handle_output(vdev, vq); > > > } > > > + > > > + if (unlikely(vdev->start_on_kick)) { > > > + vdev->started = true; > > > + vdev->start_on_kick = false; > > > + } > > > } > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > return vdev->broken; > > > } > > > > > > +static bool virtio_started_needed(void *opaque) > > > +{ > > > + VirtIODevice *vdev = opaque; > > > + > > > + return vdev->started; > > > > Existing machine types don't know about the "virtio/started" subsection. This > > breaks migration to older QEMUs if the driver has started the device, ie. most > > probably always when it comes to live migration. > > > > My understanding is that we do try to support backward migration though. It > > is a regular practice in datacenters to migrate workloads without having to > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > many times in the past because customers had filed bugs. > > > > Cc'ing David for his opinion. > > Uh.. did you mean to CC me, or Dave Gilbert? > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable as well. I remember being involved in backward migration fixes for spapr several times. > I mean, I think you're right that we should try to maintain backwards > migration, but this isn't really my area of authority. > Cc'ing Dave Gilbert :) Cheers, -- Greg
* Greg Kurz (groug@kaod.org) wrote: > On Tue, 28 May 2019 10:08:54 +1000 > David Gibson <david@gibson.dropbear.id.au> wrote: > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote: > > > On Mon, 20 May 2019 19:10:35 -0400 > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > flag to indicate whether driver has started the device or not. > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > --- > > > > include/hw/virtio/virtio.h | 2 ++ > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > index 7140381e3a..27c0efc3d0 100644 > > > > --- a/include/hw/virtio/virtio.h > > > > +++ b/include/hw/virtio/virtio.h > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > uint16_t device_id; > > > > bool vm_running; > > > > bool broken; /* device in invalid state, needs reset */ > > > > + bool started; > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > VMChangeStateEntry *vmstate; > > > > char *bus_name; > > > > uint8_t device_endian; > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > index 28056a7ef7..5d533ac74e 100644 > > > > --- a/hw/virtio/virtio.c > > > > +++ b/hw/virtio/virtio.c > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > } > > > > } > > > > } > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > + vdev->start_on_kick = false; > > > > + } > > > > + > > > > if (k->set_status) { > > > > k->set_status(vdev, val); > > > > } > > > > vdev->status = val; > > > > + > > > > return 0; > > > > } > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > k->reset(vdev); > > > > } > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > + vdev->started = false; > > > > vdev->broken = false; > > > > vdev->guest_features = 0; > > > > vdev->queue_sel = 0; > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > { > > > > + bool ret = false; > > > > + > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > - return vq->handle_aio_output(vdev, vq); > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > + > > > > + if (unlikely(vdev->start_on_kick)) { > > > > + vdev->started = true; > > > > + vdev->start_on_kick = false; > > > > + } > > > > } > > > > > > > > - return false; > > > > + return ret; > > > > } > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > vq->handle_output(vdev, vq); > > > > + > > > > + if (unlikely(vdev->start_on_kick)) { > > > > + vdev->started = true; > > > > + vdev->start_on_kick = false; > > > > + } > > > > } > > > > } > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > } else if (vq->handle_output) { > > > > vq->handle_output(vdev, vq); > > > > } > > > > + > > > > + if (unlikely(vdev->start_on_kick)) { > > > > + vdev->started = true; > > > > + vdev->start_on_kick = false; > > > > + } > > > > } > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > return vdev->broken; > > > > } > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > +{ > > > > + VirtIODevice *vdev = opaque; > > > > + > > > > + return vdev->started; > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > probably always when it comes to live migration. > > > > > > My understanding is that we do try to support backward migration though. It > > > is a regular practice in datacenters to migrate workloads without having to > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > many times in the past because customers had filed bugs. > > > > > > Cc'ing David for his opinion. > > > > Uh.. did you mean to CC me, or Dave Gilbert? > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable > as well. I remember being involved in backward migration fixes for spapr > several times. > > > I mean, I think you're right that we should try to maintain backwards > > migration, but this isn't really my area of authority. > > > > Cc'ing Dave Gilbert :) Right, I need to maintain backwards migration compatibility; tie the feature to a machine type so it's only used on newer machine types and then we'll be safe. Having said that, what's the symptom when this goes wrong? Dave > Cheers, > > -- > Greg -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Wed, 29 May 2019 12:18:50 +0100 "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > * Greg Kurz (groug@kaod.org) wrote: > > On Tue, 28 May 2019 10:08:54 +1000 > > David Gibson <david@gibson.dropbear.id.au> wrote: > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote: > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > > flag to indicate whether driver has started the device or not. > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > --- > > > > > include/hw/virtio/virtio.h | 2 ++ > > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > > index 7140381e3a..27c0efc3d0 100644 > > > > > --- a/include/hw/virtio/virtio.h > > > > > +++ b/include/hw/virtio/virtio.h > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > > uint16_t device_id; > > > > > bool vm_running; > > > > > bool broken; /* device in invalid state, needs reset */ > > > > > + bool started; > > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > > VMChangeStateEntry *vmstate; > > > > > char *bus_name; > > > > > uint8_t device_endian; > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > > index 28056a7ef7..5d533ac74e 100644 > > > > > --- a/hw/virtio/virtio.c > > > > > +++ b/hw/virtio/virtio.c > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > > } > > > > > } > > > > > } > > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > > + vdev->start_on_kick = false; > > > > > + } > > > > > + > > > > > if (k->set_status) { > > > > > k->set_status(vdev, val); > > > > > } > > > > > vdev->status = val; > > > > > + > > > > > return 0; > > > > > } > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > > k->reset(vdev); > > > > > } > > > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > > + vdev->started = false; > > > > > vdev->broken = false; > > > > > vdev->guest_features = 0; > > > > > vdev->queue_sel = 0; > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > > { > > > > > + bool ret = false; > > > > > + > > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > - return vq->handle_aio_output(vdev, vq); > > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > > + > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > + vdev->started = true; > > > > > + vdev->start_on_kick = false; > > > > > + } > > > > > } > > > > > > > > > > - return false; > > > > > + return ret; > > > > > } > > > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > vq->handle_output(vdev, vq); > > > > > + > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > + vdev->started = true; > > > > > + vdev->start_on_kick = false; > > > > > + } > > > > > } > > > > > } > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > > } else if (vq->handle_output) { > > > > > vq->handle_output(vdev, vq); > > > > > } > > > > > + > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > + vdev->started = true; > > > > > + vdev->start_on_kick = false; > > > > > + } > > > > > } > > > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > > return vdev->broken; > > > > > } > > > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > > +{ > > > > > + VirtIODevice *vdev = opaque; > > > > > + > > > > > + return vdev->started; > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > > probably always when it comes to live migration. > > > > > > > > My understanding is that we do try to support backward migration though. It > > > > is a regular practice in datacenters to migrate workloads without having to > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > > many times in the past because customers had filed bugs. > > > > > > > > Cc'ing David for his opinion. > > > > > > Uh.. did you mean to CC me, or Dave Gilbert? > > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable > > as well. I remember being involved in backward migration fixes for spapr > > several times. > > > > > I mean, I think you're right that we should try to maintain backwards > > > migration, but this isn't really my area of authority. > > > > > > > Cc'ing Dave Gilbert :) > > Right, I need to maintain backwards migration compatibility; tie the > feature to a machine type so it's only used on newer machine types and > then we'll be safe. > > Having said that, what's the symptom when this goes wrong? > Since the started flag is set as soon as the guest driver begins to use the device and remains so until next reset, the associated subsection is basically always emitted when migrating a booted guest. This causes migration to always fail on the target in this case: qemu-system-ppc64: Failed to load virtio-net:virtio qemu-system-ppc64: error while loading state for instance 0x0 of device 'pci@800000020000000:00.0/virtio-net' qemu-system-ppc64: load of migration failed: No such file or directory Xie Yongji has just sent a series to fix that, with you in Cc: Cheers, -- Greg > Dave > > > Cheers, > > > > -- > > Greg > > > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
* Greg Kurz (groug@kaod.org) wrote: > On Wed, 29 May 2019 12:18:50 +0100 > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > * Greg Kurz (groug@kaod.org) wrote: > > > On Tue, 28 May 2019 10:08:54 +1000 > > > David Gibson <david@gibson.dropbear.id.au> wrote: > > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote: > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > > > flag to indicate whether driver has started the device or not. > > > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > --- > > > > > > include/hw/virtio/virtio.h | 2 ++ > > > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > > > index 7140381e3a..27c0efc3d0 100644 > > > > > > --- a/include/hw/virtio/virtio.h > > > > > > +++ b/include/hw/virtio/virtio.h > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > > > uint16_t device_id; > > > > > > bool vm_running; > > > > > > bool broken; /* device in invalid state, needs reset */ > > > > > > + bool started; > > > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > > > VMChangeStateEntry *vmstate; > > > > > > char *bus_name; > > > > > > uint8_t device_endian; > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > > > index 28056a7ef7..5d533ac74e 100644 > > > > > > --- a/hw/virtio/virtio.c > > > > > > +++ b/hw/virtio/virtio.c > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > > > } > > > > > > } > > > > > > } > > > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > > > + vdev->start_on_kick = false; > > > > > > + } > > > > > > + > > > > > > if (k->set_status) { > > > > > > k->set_status(vdev, val); > > > > > > } > > > > > > vdev->status = val; > > > > > > + > > > > > > return 0; > > > > > > } > > > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > > > k->reset(vdev); > > > > > > } > > > > > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > > > + vdev->started = false; > > > > > > vdev->broken = false; > > > > > > vdev->guest_features = 0; > > > > > > vdev->queue_sel = 0; > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > > > { > > > > > > + bool ret = false; > > > > > > + > > > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > - return vq->handle_aio_output(vdev, vq); > > > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > > > + > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > + vdev->started = true; > > > > > > + vdev->start_on_kick = false; > > > > > > + } > > > > > > } > > > > > > > > > > > > - return false; > > > > > > + return ret; > > > > > > } > > > > > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > vq->handle_output(vdev, vq); > > > > > > + > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > + vdev->started = true; > > > > > > + vdev->start_on_kick = false; > > > > > > + } > > > > > > } > > > > > > } > > > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > > > } else if (vq->handle_output) { > > > > > > vq->handle_output(vdev, vq); > > > > > > } > > > > > > + > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > + vdev->started = true; > > > > > > + vdev->start_on_kick = false; > > > > > > + } > > > > > > } > > > > > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > > > return vdev->broken; > > > > > > } > > > > > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > > > +{ > > > > > > + VirtIODevice *vdev = opaque; > > > > > > + > > > > > > + return vdev->started; > > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > > > probably always when it comes to live migration. > > > > > > > > > > My understanding is that we do try to support backward migration though. It > > > > > is a regular practice in datacenters to migrate workloads without having to > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > > > many times in the past because customers had filed bugs. > > > > > > > > > > Cc'ing David for his opinion. > > > > > > > > Uh.. did you mean to CC me, or Dave Gilbert? > > > > > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable > > > as well. I remember being involved in backward migration fixes for spapr > > > several times. > > > > > > > I mean, I think you're right that we should try to maintain backwards > > > > migration, but this isn't really my area of authority. > > > > > > > > > > Cc'ing Dave Gilbert :) > > > > Right, I need to maintain backwards migration compatibility; tie the > > feature to a machine type so it's only used on newer machine types and > > then we'll be safe. > > > > Having said that, what's the symptom when this goes wrong? > > > > Since the started flag is set as soon as the guest driver begins to use > the device and remains so until next reset, the associated subsection is > basically always emitted when migrating a booted guest. This causes > migration to always fail on the target in this case: I meant what's the symptom without this patch series at all? Dave > qemu-system-ppc64: Failed to load virtio-net:virtio > qemu-system-ppc64: error while loading state for instance 0x0 of device 'pci@800000020000000:00.0/virtio-net' > qemu-system-ppc64: load of migration failed: No such file or directory > > Xie Yongji has just sent a series to fix that, with you in Cc: > > Cheers, > > -- > Greg > > > Dave > > > > > Cheers, > > > > > > -- > > > Greg > > > > > > -- > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Wed, 29 May 2019 13:38:19 +0100 "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > * Greg Kurz (groug@kaod.org) wrote: > > On Wed, 29 May 2019 12:18:50 +0100 > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > On Tue, 28 May 2019 10:08:54 +1000 > > > > David Gibson <david@gibson.dropbear.id.au> wrote: > > > > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote: > > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > > > > flag to indicate whether driver has started the device or not. > > > > > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > --- > > > > > > > include/hw/virtio/virtio.h | 2 ++ > > > > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > > > > index 7140381e3a..27c0efc3d0 100644 > > > > > > > --- a/include/hw/virtio/virtio.h > > > > > > > +++ b/include/hw/virtio/virtio.h > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > > > > uint16_t device_id; > > > > > > > bool vm_running; > > > > > > > bool broken; /* device in invalid state, needs reset */ > > > > > > > + bool started; > > > > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > > > > VMChangeStateEntry *vmstate; > > > > > > > char *bus_name; > > > > > > > uint8_t device_endian; > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > > > > index 28056a7ef7..5d533ac74e 100644 > > > > > > > --- a/hw/virtio/virtio.c > > > > > > > +++ b/hw/virtio/virtio.c > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > > > > } > > > > > > > } > > > > > > > } > > > > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > > > > + vdev->start_on_kick = false; > > > > > > > + } > > > > > > > + > > > > > > > if (k->set_status) { > > > > > > > k->set_status(vdev, val); > > > > > > > } > > > > > > > vdev->status = val; > > > > > > > + > > > > > > > return 0; > > > > > > > } > > > > > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > > > > k->reset(vdev); > > > > > > > } > > > > > > > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > > > > + vdev->started = false; > > > > > > > vdev->broken = false; > > > > > > > vdev->guest_features = 0; > > > > > > > vdev->queue_sel = 0; > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > > > > { > > > > > > > + bool ret = false; > > > > > > > + > > > > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > - return vq->handle_aio_output(vdev, vq); > > > > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > > > > + > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > + vdev->started = true; > > > > > > > + vdev->start_on_kick = false; > > > > > > > + } > > > > > > > } > > > > > > > > > > > > > > - return false; > > > > > > > + return ret; > > > > > > > } > > > > > > > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > vq->handle_output(vdev, vq); > > > > > > > + > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > + vdev->started = true; > > > > > > > + vdev->start_on_kick = false; > > > > > > > + } > > > > > > > } > > > > > > > } > > > > > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > > > > } else if (vq->handle_output) { > > > > > > > vq->handle_output(vdev, vq); > > > > > > > } > > > > > > > + > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > + vdev->started = true; > > > > > > > + vdev->start_on_kick = false; > > > > > > > + } > > > > > > > } > > > > > > > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > > > > return vdev->broken; > > > > > > > } > > > > > > > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > > > > +{ > > > > > > > + VirtIODevice *vdev = opaque; > > > > > > > + > > > > > > > + return vdev->started; > > > > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > > > > probably always when it comes to live migration. > > > > > > > > > > > > My understanding is that we do try to support backward migration though. It > > > > > > is a regular practice in datacenters to migrate workloads without having to > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > > > > many times in the past because customers had filed bugs. > > > > > > > > > > > > Cc'ing David for his opinion. > > > > > > > > > > Uh.. did you mean to CC me, or Dave Gilbert? > > > > > > > > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable > > > > as well. I remember being involved in backward migration fixes for spapr > > > > several times. > > > > > > > > > I mean, I think you're right that we should try to maintain backwards > > > > > migration, but this isn't really my area of authority. > > > > > > > > > > > > > Cc'ing Dave Gilbert :) > > > > > > Right, I need to maintain backwards migration compatibility; tie the > > > feature to a machine type so it's only used on newer machine types and > > > then we'll be safe. > > > > > > Having said that, what's the symptom when this goes wrong? > > > > > > > Since the started flag is set as soon as the guest driver begins to use > > the device and remains so until next reset, the associated subsection is > > basically always emitted when migrating a booted guest. This causes > > migration to always fail on the target in this case: > > I meant what's the symptom without this patch series at all? > Oh sorry... if I got it right, migrating when the guest first kicked the device but not set the DRIVE_OK bit yet would result in a guest hang on the destination. Yongji might elaborate a bit more on that. > Dave > > > qemu-system-ppc64: Failed to load virtio-net:virtio > > qemu-system-ppc64: error while loading state for instance 0x0 of device 'pci@800000020000000:00.0/virtio-net' > > qemu-system-ppc64: load of migration failed: No such file or directory > > > > Xie Yongji has just sent a series to fix that, with you in Cc: > > > > Cheers, > > > > -- > > Greg > > > > > Dave > > > > > > > Cheers, > > > > > > > > -- > > > > Greg > > > > > > > > > -- > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
* Greg Kurz (groug@kaod.org) wrote: > On Wed, 29 May 2019 13:38:19 +0100 > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > * Greg Kurz (groug@kaod.org) wrote: > > > On Wed, 29 May 2019 12:18:50 +0100 > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > On Tue, 28 May 2019 10:08:54 +1000 > > > > > David Gibson <david@gibson.dropbear.id.au> wrote: > > > > > > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote: > > > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > > > > > flag to indicate whether driver has started the device or not. > > > > > > > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > --- > > > > > > > > include/hw/virtio/virtio.h | 2 ++ > > > > > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > > > > > index 7140381e3a..27c0efc3d0 100644 > > > > > > > > --- a/include/hw/virtio/virtio.h > > > > > > > > +++ b/include/hw/virtio/virtio.h > > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > > > > > uint16_t device_id; > > > > > > > > bool vm_running; > > > > > > > > bool broken; /* device in invalid state, needs reset */ > > > > > > > > + bool started; > > > > > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > > > > > VMChangeStateEntry *vmstate; > > > > > > > > char *bus_name; > > > > > > > > uint8_t device_endian; > > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > > > > > index 28056a7ef7..5d533ac74e 100644 > > > > > > > > --- a/hw/virtio/virtio.c > > > > > > > > +++ b/hw/virtio/virtio.c > > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > > > > > } > > > > > > > > } > > > > > > > > } > > > > > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > + } > > > > > > > > + > > > > > > > > if (k->set_status) { > > > > > > > > k->set_status(vdev, val); > > > > > > > > } > > > > > > > > vdev->status = val; > > > > > > > > + > > > > > > > > return 0; > > > > > > > > } > > > > > > > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > > > > > k->reset(vdev); > > > > > > > > } > > > > > > > > > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > > > > > + vdev->started = false; > > > > > > > > vdev->broken = false; > > > > > > > > vdev->guest_features = 0; > > > > > > > > vdev->queue_sel = 0; > > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > > > > > { > > > > > > > > + bool ret = false; > > > > > > > > + > > > > > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > - return vq->handle_aio_output(vdev, vq); > > > > > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > > > > > + > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > + vdev->started = true; > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > + } > > > > > > > > } > > > > > > > > > > > > > > > > - return false; > > > > > > > > + return ret; > > > > > > > > } > > > > > > > > > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > + > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > + vdev->started = true; > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > + } > > > > > > > > } > > > > > > > > } > > > > > > > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > > > > > } else if (vq->handle_output) { > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > } > > > > > > > > + > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > + vdev->started = true; > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > + } > > > > > > > > } > > > > > > > > > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > > > > > return vdev->broken; > > > > > > > > } > > > > > > > > > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > > > > > +{ > > > > > > > > + VirtIODevice *vdev = opaque; > > > > > > > > + > > > > > > > > + return vdev->started; > > > > > > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > > > > > probably always when it comes to live migration. > > > > > > > > > > > > > > My understanding is that we do try to support backward migration though. It > > > > > > > is a regular practice in datacenters to migrate workloads without having to > > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > > > > > many times in the past because customers had filed bugs. > > > > > > > > > > > > > > Cc'ing David for his opinion. > > > > > > > > > > > > Uh.. did you mean to CC me, or Dave Gilbert? > > > > > > > > > > > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable > > > > > as well. I remember being involved in backward migration fixes for spapr > > > > > several times. > > > > > > > > > > > I mean, I think you're right that we should try to maintain backwards > > > > > > migration, but this isn't really my area of authority. > > > > > > > > > > > > > > > > Cc'ing Dave Gilbert :) > > > > > > > > Right, I need to maintain backwards migration compatibility; tie the > > > > feature to a machine type so it's only used on newer machine types and > > > > then we'll be safe. > > > > > > > > Having said that, what's the symptom when this goes wrong? > > > > > > > > > > Since the started flag is set as soon as the guest driver begins to use > > > the device and remains so until next reset, the associated subsection is > > > basically always emitted when migrating a booted guest. This causes > > > migration to always fail on the target in this case: > > > > I meant what's the symptom without this patch series at all? > > > > Oh sorry... if I got it right, migrating when the guest first kicked the > device but not set the DRIVE_OK bit yet would result in a guest hang on > the destination. Yongji might elaborate a bit more on that. Hmm - the only thing worse than a migration failing with an error is a migration that fails with a hung guest. If you were sending a new subsection in *only* the case where the guest would definitely hang, then I think it would be worth sending the subsection and allowing the backwards migration to fail with the error because it's a bit better than a hung VM. What we cant have though is most backwards migrations failing. Dave > > Dave > > > > > qemu-system-ppc64: Failed to load virtio-net:virtio > > > qemu-system-ppc64: error while loading state for instance 0x0 of device 'pci@800000020000000:00.0/virtio-net' > > > qemu-system-ppc64: load of migration failed: No such file or directory > > > > > > Xie Yongji has just sent a series to fix that, with you in Cc: > > > > > > Cheers, > > > > > > -- > > > Greg > > > > > > > Dave > > > > > > > > > Cheers, > > > > > > > > > > -- > > > > > Greg > > > > > > > > > > > > -- > > > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > > > > -- > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Wed, 29 May 2019 at 21:02, Greg Kurz <groug@kaod.org> wrote: > > On Wed, 29 May 2019 13:38:19 +0100 > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > * Greg Kurz (groug@kaod.org) wrote: > > > On Wed, 29 May 2019 12:18:50 +0100 > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > On Tue, 28 May 2019 10:08:54 +1000 > > > > > David Gibson <david@gibson.dropbear.id.au> wrote: > > > > > > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote: > > > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > > > > > flag to indicate whether driver has started the device or not. > > > > > > > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > --- > > > > > > > > include/hw/virtio/virtio.h | 2 ++ > > > > > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > > > > > index 7140381e3a..27c0efc3d0 100644 > > > > > > > > --- a/include/hw/virtio/virtio.h > > > > > > > > +++ b/include/hw/virtio/virtio.h > > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > > > > > uint16_t device_id; > > > > > > > > bool vm_running; > > > > > > > > bool broken; /* device in invalid state, needs reset */ > > > > > > > > + bool started; > > > > > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > > > > > VMChangeStateEntry *vmstate; > > > > > > > > char *bus_name; > > > > > > > > uint8_t device_endian; > > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > > > > > index 28056a7ef7..5d533ac74e 100644 > > > > > > > > --- a/hw/virtio/virtio.c > > > > > > > > +++ b/hw/virtio/virtio.c > > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > > > > > } > > > > > > > > } > > > > > > > > } > > > > > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > + } > > > > > > > > + > > > > > > > > if (k->set_status) { > > > > > > > > k->set_status(vdev, val); > > > > > > > > } > > > > > > > > vdev->status = val; > > > > > > > > + > > > > > > > > return 0; > > > > > > > > } > > > > > > > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > > > > > k->reset(vdev); > > > > > > > > } > > > > > > > > > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > > > > > + vdev->started = false; > > > > > > > > vdev->broken = false; > > > > > > > > vdev->guest_features = 0; > > > > > > > > vdev->queue_sel = 0; > > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > > > > > { > > > > > > > > + bool ret = false; > > > > > > > > + > > > > > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > - return vq->handle_aio_output(vdev, vq); > > > > > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > > > > > + > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > + vdev->started = true; > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > + } > > > > > > > > } > > > > > > > > > > > > > > > > - return false; > > > > > > > > + return ret; > > > > > > > > } > > > > > > > > > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > + > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > + vdev->started = true; > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > + } > > > > > > > > } > > > > > > > > } > > > > > > > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > > > > > } else if (vq->handle_output) { > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > } > > > > > > > > + > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > + vdev->started = true; > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > + } > > > > > > > > } > > > > > > > > > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > > > > > return vdev->broken; > > > > > > > > } > > > > > > > > > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > > > > > +{ > > > > > > > > + VirtIODevice *vdev = opaque; > > > > > > > > + > > > > > > > > + return vdev->started; > > > > > > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > > > > > probably always when it comes to live migration. > > > > > > > > > > > > > > My understanding is that we do try to support backward migration though. It > > > > > > > is a regular practice in datacenters to migrate workloads without having to > > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > > > > > many times in the past because customers had filed bugs. > > > > > > > > > > > > > > Cc'ing David for his opinion. > > > > > > > > > > > > Uh.. did you mean to CC me, or Dave Gilbert? > > > > > > > > > > > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable > > > > > as well. I remember being involved in backward migration fixes for spapr > > > > > several times. > > > > > > > > > > > I mean, I think you're right that we should try to maintain backwards > > > > > > migration, but this isn't really my area of authority. > > > > > > > > > > > > > > > > Cc'ing Dave Gilbert :) > > > > > > > > Right, I need to maintain backwards migration compatibility; tie the > > > > feature to a machine type so it's only used on newer machine types and > > > > then we'll be safe. > > > > > > > > Having said that, what's the symptom when this goes wrong? > > > > > > > > > > Since the started flag is set as soon as the guest driver begins to use > > > the device and remains so until next reset, the associated subsection is > > > basically always emitted when migrating a booted guest. This causes > > > migration to always fail on the target in this case: > > > > I meant what's the symptom without this patch series at all? > > > > Oh sorry... if I got it right, migrating when the guest first kicked the > device but not set the DRIVE_OK bit yet would result in a guest hang on > the destination. Yongji might elaborate a bit more on that. > Yes, that's the issue that my patch tries to fix. It only happens in a short window for virtio 1.0 transitional devices. Thanks, Yongji
On Wed, 29 May 2019 at 21:43, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > > * Greg Kurz (groug@kaod.org) wrote: > > On Wed, 29 May 2019 13:38:19 +0100 > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > On Wed, 29 May 2019 12:18:50 +0100 > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > > On Tue, 28 May 2019 10:08:54 +1000 > > > > > > David Gibson <david@gibson.dropbear.id.au> wrote: > > > > > > > > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote: > > > > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > > > > > > flag to indicate whether driver has started the device or not. > > > > > > > > > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > > --- > > > > > > > > > include/hw/virtio/virtio.h | 2 ++ > > > > > > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > > > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > > > > > > index 7140381e3a..27c0efc3d0 100644 > > > > > > > > > --- a/include/hw/virtio/virtio.h > > > > > > > > > +++ b/include/hw/virtio/virtio.h > > > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > > > > > > uint16_t device_id; > > > > > > > > > bool vm_running; > > > > > > > > > bool broken; /* device in invalid state, needs reset */ > > > > > > > > > + bool started; > > > > > > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > > > > > > VMChangeStateEntry *vmstate; > > > > > > > > > char *bus_name; > > > > > > > > > uint8_t device_endian; > > > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > > > > > > index 28056a7ef7..5d533ac74e 100644 > > > > > > > > > --- a/hw/virtio/virtio.c > > > > > > > > > +++ b/hw/virtio/virtio.c > > > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > > > > > > } > > > > > > > > > } > > > > > > > > > } > > > > > > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > > > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > + } > > > > > > > > > + > > > > > > > > > if (k->set_status) { > > > > > > > > > k->set_status(vdev, val); > > > > > > > > > } > > > > > > > > > vdev->status = val; > > > > > > > > > + > > > > > > > > > return 0; > > > > > > > > > } > > > > > > > > > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > > > > > > k->reset(vdev); > > > > > > > > > } > > > > > > > > > > > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > > > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > > > > > > + vdev->started = false; > > > > > > > > > vdev->broken = false; > > > > > > > > > vdev->guest_features = 0; > > > > > > > > > vdev->queue_sel = 0; > > > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > > > > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > > > > > > { > > > > > > > > > + bool ret = false; > > > > > > > > > + > > > > > > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > > > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > > - return vq->handle_aio_output(vdev, vq); > > > > > > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > > > > > > + > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > + vdev->started = true; > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > + } > > > > > > > > > } > > > > > > > > > > > > > > > > > > - return false; > > > > > > > > > + return ret; > > > > > > > > > } > > > > > > > > > > > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > > + > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > + vdev->started = true; > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > + } > > > > > > > > > } > > > > > > > > > } > > > > > > > > > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > > > > > > } else if (vq->handle_output) { > > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > > } > > > > > > > > > + > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > + vdev->started = true; > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > + } > > > > > > > > > } > > > > > > > > > > > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > > > > > > return vdev->broken; > > > > > > > > > } > > > > > > > > > > > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > > > > > > +{ > > > > > > > > > + VirtIODevice *vdev = opaque; > > > > > > > > > + > > > > > > > > > + return vdev->started; > > > > > > > > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > > > > > > probably always when it comes to live migration. > > > > > > > > > > > > > > > > My understanding is that we do try to support backward migration though. It > > > > > > > > is a regular practice in datacenters to migrate workloads without having to > > > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > > > > > > many times in the past because customers had filed bugs. > > > > > > > > > > > > > > > > Cc'ing David for his opinion. > > > > > > > > > > > > > > Uh.. did you mean to CC me, or Dave Gilbert? > > > > > > > > > > > > > > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable > > > > > > as well. I remember being involved in backward migration fixes for spapr > > > > > > several times. > > > > > > > > > > > > > I mean, I think you're right that we should try to maintain backwards > > > > > > > migration, but this isn't really my area of authority. > > > > > > > > > > > > > > > > > > > Cc'ing Dave Gilbert :) > > > > > > > > > > Right, I need to maintain backwards migration compatibility; tie the > > > > > feature to a machine type so it's only used on newer machine types and > > > > > then we'll be safe. > > > > > > > > > > Having said that, what's the symptom when this goes wrong? > > > > > > > > > > > > > Since the started flag is set as soon as the guest driver begins to use > > > > the device and remains so until next reset, the associated subsection is > > > > basically always emitted when migrating a booted guest. This causes > > > > migration to always fail on the target in this case: > > > > > > I meant what's the symptom without this patch series at all? > > > > > > > Oh sorry... if I got it right, migrating when the guest first kicked the > > device but not set the DRIVE_OK bit yet would result in a guest hang on > > the destination. Yongji might elaborate a bit more on that. > > Hmm - the only thing worse than a migration failing with an error is a > migration that fails with a hung guest. > > If you were sending a new subsection in *only* the case where the guest > would definitely hang, then I think it would be worth sending the > subsection and allowing the backwards migration to fail with the error > because it's a bit better than a hung VM. > I considered this approach before. But it needs to add some complex logic in virtio code to make sure migration works well between different qemu versions. Since it's hardly to trigger the hung VM bug. So I still try to use the general way (tie the feature to a machine type) to fix this migration issue. It's easily to ensure the compatibility Thanks, Yongji
* Yongji Xie (elohimes@gmail.com) wrote: > On Wed, 29 May 2019 at 21:43, Dr. David Alan Gilbert > <dgilbert@redhat.com> wrote: > > > > * Greg Kurz (groug@kaod.org) wrote: > > > On Wed, 29 May 2019 13:38:19 +0100 > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > On Wed, 29 May 2019 12:18:50 +0100 > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > > > On Tue, 28 May 2019 10:08:54 +1000 > > > > > > > David Gibson <david@gibson.dropbear.id.au> wrote: > > > > > > > > > > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote: > > > > > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > > > > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > > > > > > > flag to indicate whether driver has started the device or not. > > > > > > > > > > > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > > > --- > > > > > > > > > > include/hw/virtio/virtio.h | 2 ++ > > > > > > > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > > > > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > > > > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > > > > > > > index 7140381e3a..27c0efc3d0 100644 > > > > > > > > > > --- a/include/hw/virtio/virtio.h > > > > > > > > > > +++ b/include/hw/virtio/virtio.h > > > > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > > > > > > > uint16_t device_id; > > > > > > > > > > bool vm_running; > > > > > > > > > > bool broken; /* device in invalid state, needs reset */ > > > > > > > > > > + bool started; > > > > > > > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > > > > > > > VMChangeStateEntry *vmstate; > > > > > > > > > > char *bus_name; > > > > > > > > > > uint8_t device_endian; > > > > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > > > > > > > index 28056a7ef7..5d533ac74e 100644 > > > > > > > > > > --- a/hw/virtio/virtio.c > > > > > > > > > > +++ b/hw/virtio/virtio.c > > > > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > > > > > > > } > > > > > > > > > > } > > > > > > > > > > } > > > > > > > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > > > > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > + } > > > > > > > > > > + > > > > > > > > > > if (k->set_status) { > > > > > > > > > > k->set_status(vdev, val); > > > > > > > > > > } > > > > > > > > > > vdev->status = val; > > > > > > > > > > + > > > > > > > > > > return 0; > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > > > > > > > k->reset(vdev); > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > > > > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > > > > > > > + vdev->started = false; > > > > > > > > > > vdev->broken = false; > > > > > > > > > > vdev->guest_features = 0; > > > > > > > > > > vdev->queue_sel = 0; > > > > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > > > > > > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > > > > > > > { > > > > > > > > > > + bool ret = false; > > > > > > > > > > + > > > > > > > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > > > > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > > > - return vq->handle_aio_output(vdev, vq); > > > > > > > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > > > > > > > + > > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > > + vdev->started = true; > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > + } > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > - return false; > > > > > > > > > > + return ret; > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > > > + > > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > > + vdev->started = true; > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > + } > > > > > > > > > > } > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > > > > > > > } else if (vq->handle_output) { > > > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > > > } > > > > > > > > > > + > > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > > + vdev->started = true; > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > + } > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > > > > > > > return vdev->broken; > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > > > > > > > +{ > > > > > > > > > > + VirtIODevice *vdev = opaque; > > > > > > > > > > + > > > > > > > > > > + return vdev->started; > > > > > > > > > > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > > > > > > > probably always when it comes to live migration. > > > > > > > > > > > > > > > > > > My understanding is that we do try to support backward migration though. It > > > > > > > > > is a regular practice in datacenters to migrate workloads without having to > > > > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > > > > > > > many times in the past because customers had filed bugs. > > > > > > > > > > > > > > > > > > Cc'ing David for his opinion. > > > > > > > > > > > > > > > > Uh.. did you mean to CC me, or Dave Gilbert? > > > > > > > > > > > > > > > > > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable > > > > > > > as well. I remember being involved in backward migration fixes for spapr > > > > > > > several times. > > > > > > > > > > > > > > > I mean, I think you're right that we should try to maintain backwards > > > > > > > > migration, but this isn't really my area of authority. > > > > > > > > > > > > > > > > > > > > > > Cc'ing Dave Gilbert :) > > > > > > > > > > > > Right, I need to maintain backwards migration compatibility; tie the > > > > > > feature to a machine type so it's only used on newer machine types and > > > > > > then we'll be safe. > > > > > > > > > > > > Having said that, what's the symptom when this goes wrong? > > > > > > > > > > > > > > > > Since the started flag is set as soon as the guest driver begins to use > > > > > the device and remains so until next reset, the associated subsection is > > > > > basically always emitted when migrating a booted guest. This causes > > > > > migration to always fail on the target in this case: > > > > > > > > I meant what's the symptom without this patch series at all? > > > > > > > > > > Oh sorry... if I got it right, migrating when the guest first kicked the > > > device but not set the DRIVE_OK bit yet would result in a guest hang on > > > the destination. Yongji might elaborate a bit more on that. > > > > Hmm - the only thing worse than a migration failing with an error is a > > migration that fails with a hung guest. > > > > If you were sending a new subsection in *only* the case where the guest > > would definitely hang, then I think it would be worth sending the > > subsection and allowing the backwards migration to fail with the error > > because it's a bit better than a hung VM. > > > > I considered this approach before. But it needs to add some complex > logic in virtio code to make sure migration works well between > different qemu versions. Since it's hardly to trigger the hung VM bug. > So I still try to use the general way (tie the feature to a machine > type) to fix this migration issue. It's easily to ensure the > compatibility OK, great. For reference could you give me a bit more info on how exactly the failure happens, how you spot it and what the symptoms are; so that when someone comes to me with a hung VM I know what to look for? Dave > Thanks, > Yongji -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Wed, 29 May 2019 at 22:42, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > > * Yongji Xie (elohimes@gmail.com) wrote: > > On Wed, 29 May 2019 at 21:43, Dr. David Alan Gilbert > > <dgilbert@redhat.com> wrote: > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > On Wed, 29 May 2019 13:38:19 +0100 > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > > On Wed, 29 May 2019 12:18:50 +0100 > > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > > > > On Tue, 28 May 2019 10:08:54 +1000 > > > > > > > > David Gibson <david@gibson.dropbear.id.au> wrote: > > > > > > > > > > > > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote: > > > > > > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > > > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > > > > > > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > > > > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > > > > > > > > flag to indicate whether driver has started the device or not. > > > > > > > > > > > > > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > > > > --- > > > > > > > > > > > include/hw/virtio/virtio.h | 2 ++ > > > > > > > > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > > > > > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > > > > > > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > > > > > > > > index 7140381e3a..27c0efc3d0 100644 > > > > > > > > > > > --- a/include/hw/virtio/virtio.h > > > > > > > > > > > +++ b/include/hw/virtio/virtio.h > > > > > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > > > > > > > > uint16_t device_id; > > > > > > > > > > > bool vm_running; > > > > > > > > > > > bool broken; /* device in invalid state, needs reset */ > > > > > > > > > > > + bool started; > > > > > > > > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > > > > > > > > VMChangeStateEntry *vmstate; > > > > > > > > > > > char *bus_name; > > > > > > > > > > > uint8_t device_endian; > > > > > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > > > > > > > > index 28056a7ef7..5d533ac74e 100644 > > > > > > > > > > > --- a/hw/virtio/virtio.c > > > > > > > > > > > +++ b/hw/virtio/virtio.c > > > > > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > > > > > > > > } > > > > > > > > > > > } > > > > > > > > > > > } > > > > > > > > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > > > > > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > + } > > > > > > > > > > > + > > > > > > > > > > > if (k->set_status) { > > > > > > > > > > > k->set_status(vdev, val); > > > > > > > > > > > } > > > > > > > > > > > vdev->status = val; > > > > > > > > > > > + > > > > > > > > > > > return 0; > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > > > > > > > > k->reset(vdev); > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > > > > > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > > > > > > > > + vdev->started = false; > > > > > > > > > > > vdev->broken = false; > > > > > > > > > > > vdev->guest_features = 0; > > > > > > > > > > > vdev->queue_sel = 0; > > > > > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > > > > > > > > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > > > > > > > > { > > > > > > > > > > > + bool ret = false; > > > > > > > > > > > + > > > > > > > > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > > > > > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > > > > - return vq->handle_aio_output(vdev, vq); > > > > > > > > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > > > > > > > > + > > > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > > > + vdev->started = true; > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > + } > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > - return false; > > > > > > > > > > > + return ret; > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > > > > + > > > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > > > + vdev->started = true; > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > + } > > > > > > > > > > > } > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > > > > > > > > } else if (vq->handle_output) { > > > > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > > > > } > > > > > > > > > > > + > > > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > > > + vdev->started = true; > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > + } > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > > > > > > > > return vdev->broken; > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > > > > > > > > +{ > > > > > > > > > > > + VirtIODevice *vdev = opaque; > > > > > > > > > > > + > > > > > > > > > > > + return vdev->started; > > > > > > > > > > > > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > > > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > > > > > > > > probably always when it comes to live migration. > > > > > > > > > > > > > > > > > > > > My understanding is that we do try to support backward migration though. It > > > > > > > > > > is a regular practice in datacenters to migrate workloads without having to > > > > > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > > > > > > > > many times in the past because customers had filed bugs. > > > > > > > > > > > > > > > > > > > > Cc'ing David for his opinion. > > > > > > > > > > > > > > > > > > Uh.. did you mean to CC me, or Dave Gilbert? > > > > > > > > > > > > > > > > > > > > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable > > > > > > > > as well. I remember being involved in backward migration fixes for spapr > > > > > > > > several times. > > > > > > > > > > > > > > > > > I mean, I think you're right that we should try to maintain backwards > > > > > > > > > migration, but this isn't really my area of authority. > > > > > > > > > > > > > > > > > > > > > > > > > Cc'ing Dave Gilbert :) > > > > > > > > > > > > > > Right, I need to maintain backwards migration compatibility; tie the > > > > > > > feature to a machine type so it's only used on newer machine types and > > > > > > > then we'll be safe. > > > > > > > > > > > > > > Having said that, what's the symptom when this goes wrong? > > > > > > > > > > > > > > > > > > > Since the started flag is set as soon as the guest driver begins to use > > > > > > the device and remains so until next reset, the associated subsection is > > > > > > basically always emitted when migrating a booted guest. This causes > > > > > > migration to always fail on the target in this case: > > > > > > > > > > I meant what's the symptom without this patch series at all? > > > > > > > > > > > > > Oh sorry... if I got it right, migrating when the guest first kicked the > > > > device but not set the DRIVE_OK bit yet would result in a guest hang on > > > > the destination. Yongji might elaborate a bit more on that. > > > > > > Hmm - the only thing worse than a migration failing with an error is a > > > migration that fails with a hung guest. > > > > > > If you were sending a new subsection in *only* the case where the guest > > > would definitely hang, then I think it would be worth sending the > > > subsection and allowing the backwards migration to fail with the error > > > because it's a bit better than a hung VM. > > > > > > > I considered this approach before. But it needs to add some complex > > logic in virtio code to make sure migration works well between > > different qemu versions. Since it's hardly to trigger the hung VM bug. > > So I still try to use the general way (tie the feature to a machine > > type) to fix this migration issue. It's easily to ensure the > > compatibility > > OK, great. For reference could you give me a bit more info on how > exactly the failure happens, how you spot it and what the symptoms are; > so that when someone comes to me with a hung VM I know what to look for? > Oh, sure. This failure would only happen on an old guest (e.g. centos 6u5, centos 6u3) with virtio-blk/vhost-user-blk device. This kind of guest might be hung when migration completes at the point that guest is booting (loading virtio-blk driver) because its first I/O would be hung. Thanks, Yongji
* Yongji Xie (elohimes@gmail.com) wrote: > On Wed, 29 May 2019 at 22:42, Dr. David Alan Gilbert > <dgilbert@redhat.com> wrote: > > > > * Yongji Xie (elohimes@gmail.com) wrote: > > > On Wed, 29 May 2019 at 21:43, Dr. David Alan Gilbert > > > <dgilbert@redhat.com> wrote: > > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > On Wed, 29 May 2019 13:38:19 +0100 > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > > > On Wed, 29 May 2019 12:18:50 +0100 > > > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > > > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > > > > > On Tue, 28 May 2019 10:08:54 +1000 > > > > > > > > > David Gibson <david@gibson.dropbear.id.au> wrote: > > > > > > > > > > > > > > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote: > > > > > > > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > > > > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > > > > > > > > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > > > > > > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > > > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > > > > > > > > > flag to indicate whether driver has started the device or not. > > > > > > > > > > > > > > > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > > > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > > > > > --- > > > > > > > > > > > > include/hw/virtio/virtio.h | 2 ++ > > > > > > > > > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > > > > > > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > > > > > > > > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > > > > > > > > > index 7140381e3a..27c0efc3d0 100644 > > > > > > > > > > > > --- a/include/hw/virtio/virtio.h > > > > > > > > > > > > +++ b/include/hw/virtio/virtio.h > > > > > > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > > > > > > > > > uint16_t device_id; > > > > > > > > > > > > bool vm_running; > > > > > > > > > > > > bool broken; /* device in invalid state, needs reset */ > > > > > > > > > > > > + bool started; > > > > > > > > > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > > > > > > > > > VMChangeStateEntry *vmstate; > > > > > > > > > > > > char *bus_name; > > > > > > > > > > > > uint8_t device_endian; > > > > > > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > > > > > > > > > index 28056a7ef7..5d533ac74e 100644 > > > > > > > > > > > > --- a/hw/virtio/virtio.c > > > > > > > > > > > > +++ b/hw/virtio/virtio.c > > > > > > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > > > > > > > > > } > > > > > > > > > > > > } > > > > > > > > > > > > } > > > > > > > > > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > > > > > > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > > + } > > > > > > > > > > > > + > > > > > > > > > > > > if (k->set_status) { > > > > > > > > > > > > k->set_status(vdev, val); > > > > > > > > > > > > } > > > > > > > > > > > > vdev->status = val; > > > > > > > > > > > > + > > > > > > > > > > > > return 0; > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > > > > > > > > > k->reset(vdev); > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > > > > > > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > > > > > > > > > + vdev->started = false; > > > > > > > > > > > > vdev->broken = false; > > > > > > > > > > > > vdev->guest_features = 0; > > > > > > > > > > > > vdev->queue_sel = 0; > > > > > > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > > > > > > > > > > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > > > > > > > > > { > > > > > > > > > > > > + bool ret = false; > > > > > > > > > > > > + > > > > > > > > > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > > > > > > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > > > > > - return vq->handle_aio_output(vdev, vq); > > > > > > > > > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > > > > > > > > > + > > > > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > > > > + vdev->started = true; > > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > > + } > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > - return false; > > > > > > > > > > > > + return ret; > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > > > > > + > > > > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > > > > + vdev->started = true; > > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > > + } > > > > > > > > > > > > } > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > > > > > > > > > } else if (vq->handle_output) { > > > > > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > > > > > } > > > > > > > > > > > > + > > > > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > > > > + vdev->started = true; > > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > > + } > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > > > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > > > > > > > > > return vdev->broken; > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > > > > > > > > > +{ > > > > > > > > > > > > + VirtIODevice *vdev = opaque; > > > > > > > > > > > > + > > > > > > > > > > > > + return vdev->started; > > > > > > > > > > > > > > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > > > > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > > > > > > > > > probably always when it comes to live migration. > > > > > > > > > > > > > > > > > > > > > > My understanding is that we do try to support backward migration though. It > > > > > > > > > > > is a regular practice in datacenters to migrate workloads without having to > > > > > > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > > > > > > > > > many times in the past because customers had filed bugs. > > > > > > > > > > > > > > > > > > > > > > Cc'ing David for his opinion. > > > > > > > > > > > > > > > > > > > > Uh.. did you mean to CC me, or Dave Gilbert? > > > > > > > > > > > > > > > > > > > > > > > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable > > > > > > > > > as well. I remember being involved in backward migration fixes for spapr > > > > > > > > > several times. > > > > > > > > > > > > > > > > > > > I mean, I think you're right that we should try to maintain backwards > > > > > > > > > > migration, but this isn't really my area of authority. > > > > > > > > > > > > > > > > > > > > > > > > > > > > Cc'ing Dave Gilbert :) > > > > > > > > > > > > > > > > Right, I need to maintain backwards migration compatibility; tie the > > > > > > > > feature to a machine type so it's only used on newer machine types and > > > > > > > > then we'll be safe. > > > > > > > > > > > > > > > > Having said that, what's the symptom when this goes wrong? > > > > > > > > > > > > > > > > > > > > > > Since the started flag is set as soon as the guest driver begins to use > > > > > > > the device and remains so until next reset, the associated subsection is > > > > > > > basically always emitted when migrating a booted guest. This causes > > > > > > > migration to always fail on the target in this case: > > > > > > > > > > > > I meant what's the symptom without this patch series at all? > > > > > > > > > > > > > > > > Oh sorry... if I got it right, migrating when the guest first kicked the > > > > > device but not set the DRIVE_OK bit yet would result in a guest hang on > > > > > the destination. Yongji might elaborate a bit more on that. > > > > > > > > Hmm - the only thing worse than a migration failing with an error is a > > > > migration that fails with a hung guest. > > > > > > > > If you were sending a new subsection in *only* the case where the guest > > > > would definitely hang, then I think it would be worth sending the > > > > subsection and allowing the backwards migration to fail with the error > > > > because it's a bit better than a hung VM. > > > > > > > > > > I considered this approach before. But it needs to add some complex > > > logic in virtio code to make sure migration works well between > > > different qemu versions. Since it's hardly to trigger the hung VM bug. > > > So I still try to use the general way (tie the feature to a machine > > > type) to fix this migration issue. It's easily to ensure the > > > compatibility > > > > OK, great. For reference could you give me a bit more info on how > > exactly the failure happens, how you spot it and what the symptoms are; > > so that when someone comes to me with a hung VM I know what to look for? > > > > Oh, sure. This failure would only happen on an old guest (e.g. centos > 6u5, centos 6u3) with virtio-blk/vhost-user-blk device. This kind of > guest might be hung when migration completes at the point that guest > is booting (loading virtio-blk driver) because its first I/O would be > hung. OK, thanks. But a migration when the VM is running is fine? - it's only a problem if it happens during bootup, before the driver starts? Dave > Thanks, > Yongji -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Thu, 30 May 2019 at 17:06, Dr. David Alan Gilbert <dgilbert@redhat.com> wrote: > > * Yongji Xie (elohimes@gmail.com) wrote: > > On Wed, 29 May 2019 at 22:42, Dr. David Alan Gilbert > > <dgilbert@redhat.com> wrote: > > > > > > * Yongji Xie (elohimes@gmail.com) wrote: > > > > On Wed, 29 May 2019 at 21:43, Dr. David Alan Gilbert > > > > <dgilbert@redhat.com> wrote: > > > > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > > On Wed, 29 May 2019 13:38:19 +0100 > > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > > > > On Wed, 29 May 2019 12:18:50 +0100 > > > > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > > > > > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > > > > > > On Tue, 28 May 2019 10:08:54 +1000 > > > > > > > > > > David Gibson <david@gibson.dropbear.id.au> wrote: > > > > > > > > > > > > > > > > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote: > > > > > > > > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > > > > > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > > > > > > > > > > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > > > > > > > > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > > > > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > > > > > > > > > > flag to indicate whether driver has started the device or not. > > > > > > > > > > > > > > > > > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > > > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > > > > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > > > > > > --- > > > > > > > > > > > > > include/hw/virtio/virtio.h | 2 ++ > > > > > > > > > > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > > > > > > > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > > > > > > > > > > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > > > > > > > > > > index 7140381e3a..27c0efc3d0 100644 > > > > > > > > > > > > > --- a/include/hw/virtio/virtio.h > > > > > > > > > > > > > +++ b/include/hw/virtio/virtio.h > > > > > > > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > > > > > > > > > > uint16_t device_id; > > > > > > > > > > > > > bool vm_running; > > > > > > > > > > > > > bool broken; /* device in invalid state, needs reset */ > > > > > > > > > > > > > + bool started; > > > > > > > > > > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > > > > > > > > > > VMChangeStateEntry *vmstate; > > > > > > > > > > > > > char *bus_name; > > > > > > > > > > > > > uint8_t device_endian; > > > > > > > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > > > > > > > > > > index 28056a7ef7..5d533ac74e 100644 > > > > > > > > > > > > > --- a/hw/virtio/virtio.c > > > > > > > > > > > > > +++ b/hw/virtio/virtio.c > > > > > > > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > > > > > > > > > > } > > > > > > > > > > > > > } > > > > > > > > > > > > > } > > > > > > > > > > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > > > > > > > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > > > + } > > > > > > > > > > > > > + > > > > > > > > > > > > > if (k->set_status) { > > > > > > > > > > > > > k->set_status(vdev, val); > > > > > > > > > > > > > } > > > > > > > > > > > > > vdev->status = val; > > > > > > > > > > > > > + > > > > > > > > > > > > > return 0; > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > > > > > > > > > > k->reset(vdev); > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > > > > > > > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > > > > > > > > > > + vdev->started = false; > > > > > > > > > > > > > vdev->broken = false; > > > > > > > > > > > > > vdev->guest_features = 0; > > > > > > > > > > > > > vdev->queue_sel = 0; > > > > > > > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > > > > > > > > > > > > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > > > > > > > > > > { > > > > > > > > > > > > > + bool ret = false; > > > > > > > > > > > > > + > > > > > > > > > > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > > > > > > > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > > > > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > > > > > > - return vq->handle_aio_output(vdev, vq); > > > > > > > > > > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > > > > > > > > > > + > > > > > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > > > > > + vdev->started = true; > > > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > > > + } > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > > > - return false; > > > > > > > > > > > > > + return ret; > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > > > > > > + > > > > > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > > > > > + vdev->started = true; > > > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > > > + } > > > > > > > > > > > > > } > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > > > > > > > > > > } else if (vq->handle_output) { > > > > > > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > > > > > > } > > > > > > > > > > > > > + > > > > > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > > > > > + vdev->started = true; > > > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > > > + } > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > > > > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > > > > > > > > > > return vdev->broken; > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > > > > > > > > > > +{ > > > > > > > > > > > > > + VirtIODevice *vdev = opaque; > > > > > > > > > > > > > + > > > > > > > > > > > > > + return vdev->started; > > > > > > > > > > > > > > > > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > > > > > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > > > > > > > > > > probably always when it comes to live migration. > > > > > > > > > > > > > > > > > > > > > > > > My understanding is that we do try to support backward migration though. It > > > > > > > > > > > > is a regular practice in datacenters to migrate workloads without having to > > > > > > > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > > > > > > > > > > many times in the past because customers had filed bugs. > > > > > > > > > > > > > > > > > > > > > > > > Cc'ing David for his opinion. > > > > > > > > > > > > > > > > > > > > > > Uh.. did you mean to CC me, or Dave Gilbert? > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable > > > > > > > > > > as well. I remember being involved in backward migration fixes for spapr > > > > > > > > > > several times. > > > > > > > > > > > > > > > > > > > > > I mean, I think you're right that we should try to maintain backwards > > > > > > > > > > > migration, but this isn't really my area of authority. > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > Cc'ing Dave Gilbert :) > > > > > > > > > > > > > > > > > > Right, I need to maintain backwards migration compatibility; tie the > > > > > > > > > feature to a machine type so it's only used on newer machine types and > > > > > > > > > then we'll be safe. > > > > > > > > > > > > > > > > > > Having said that, what's the symptom when this goes wrong? > > > > > > > > > > > > > > > > > > > > > > > > > Since the started flag is set as soon as the guest driver begins to use > > > > > > > > the device and remains so until next reset, the associated subsection is > > > > > > > > basically always emitted when migrating a booted guest. This causes > > > > > > > > migration to always fail on the target in this case: > > > > > > > > > > > > > > I meant what's the symptom without this patch series at all? > > > > > > > > > > > > > > > > > > > Oh sorry... if I got it right, migrating when the guest first kicked the > > > > > > device but not set the DRIVE_OK bit yet would result in a guest hang on > > > > > > the destination. Yongji might elaborate a bit more on that. > > > > > > > > > > Hmm - the only thing worse than a migration failing with an error is a > > > > > migration that fails with a hung guest. > > > > > > > > > > If you were sending a new subsection in *only* the case where the guest > > > > > would definitely hang, then I think it would be worth sending the > > > > > subsection and allowing the backwards migration to fail with the error > > > > > because it's a bit better than a hung VM. > > > > > > > > > > > > > I considered this approach before. But it needs to add some complex > > > > logic in virtio code to make sure migration works well between > > > > different qemu versions. Since it's hardly to trigger the hung VM bug. > > > > So I still try to use the general way (tie the feature to a machine > > > > type) to fix this migration issue. It's easily to ensure the > > > > compatibility > > > > > > OK, great. For reference could you give me a bit more info on how > > > exactly the failure happens, how you spot it and what the symptoms are; > > > so that when someone comes to me with a hung VM I know what to look for? > > > > > > > Oh, sure. This failure would only happen on an old guest (e.g. centos > > 6u5, centos 6u3) with virtio-blk/vhost-user-blk device. This kind of > > guest might be hung when migration completes at the point that guest > > is booting (loading virtio-blk driver) because its first I/O would be > > hung. > > OK, thanks. > But a migration when the VM is running is fine? - it's only a problem > if it happens during bootup, before the driver starts? > Yes. Running VM would be fine unless we reload the driver manually at that point... Thanks, Yongji
* Yongji Xie (elohimes@gmail.com) wrote: > On Thu, 30 May 2019 at 17:06, Dr. David Alan Gilbert > <dgilbert@redhat.com> wrote: > > > > * Yongji Xie (elohimes@gmail.com) wrote: > > > On Wed, 29 May 2019 at 22:42, Dr. David Alan Gilbert > > > <dgilbert@redhat.com> wrote: > > > > > > > > * Yongji Xie (elohimes@gmail.com) wrote: > > > > > On Wed, 29 May 2019 at 21:43, Dr. David Alan Gilbert > > > > > <dgilbert@redhat.com> wrote: > > > > > > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > > > On Wed, 29 May 2019 13:38:19 +0100 > > > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > > > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > > > > > On Wed, 29 May 2019 12:18:50 +0100 > > > > > > > > > "Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > > > > > > > > > > > > > > > > > > > * Greg Kurz (groug@kaod.org) wrote: > > > > > > > > > > > On Tue, 28 May 2019 10:08:54 +1000 > > > > > > > > > > > David Gibson <david@gibson.dropbear.id.au> wrote: > > > > > > > > > > > > > > > > > > > > > > > On Fri, May 24, 2019 at 12:19:09PM +0200, Greg Kurz wrote: > > > > > > > > > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > > > > > > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > > > > > > > > > > > > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > > > > > > > > > > > > > > > > > > > The virtio 1.0 transitional devices support driver uses the device > > > > > > > > > > > > > > before setting the DRIVER_OK status bit. So we introduce a started > > > > > > > > > > > > > > flag to indicate whether driver has started the device or not. > > > > > > > > > > > > > > > > > > > > > > > > > > > > Signed-off-by: Xie Yongji <xieyongji@baidu.com> > > > > > > > > > > > > > > Signed-off-by: Zhang Yu <zhangyu31@baidu.com> > > > > > > > > > > > > > > Message-Id: <20190320112646.3712-2-xieyongji@baidu.com> > > > > > > > > > > > > > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > > > > > > > --- > > > > > > > > > > > > > > include/hw/virtio/virtio.h | 2 ++ > > > > > > > > > > > > > > hw/virtio/virtio.c | 52 ++++++++++++++++++++++++++++++++++++-- > > > > > > > > > > > > > > 2 files changed, 52 insertions(+), 2 deletions(-) > > > > > > > > > > > > > > > > > > > > > > > > > > > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > > > > > > > > > > > > > index 7140381e3a..27c0efc3d0 100644 > > > > > > > > > > > > > > --- a/include/hw/virtio/virtio.h > > > > > > > > > > > > > > +++ b/include/hw/virtio/virtio.h > > > > > > > > > > > > > > @@ -105,6 +105,8 @@ struct VirtIODevice > > > > > > > > > > > > > > uint16_t device_id; > > > > > > > > > > > > > > bool vm_running; > > > > > > > > > > > > > > bool broken; /* device in invalid state, needs reset */ > > > > > > > > > > > > > > + bool started; > > > > > > > > > > > > > > + bool start_on_kick; /* virtio 1.0 transitional devices support that */ > > > > > > > > > > > > > > VMChangeStateEntry *vmstate; > > > > > > > > > > > > > > char *bus_name; > > > > > > > > > > > > > > uint8_t device_endian; > > > > > > > > > > > > > > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > > > > > > > > > > > > > > index 28056a7ef7..5d533ac74e 100644 > > > > > > > > > > > > > > --- a/hw/virtio/virtio.c > > > > > > > > > > > > > > +++ b/hw/virtio/virtio.c > > > > > > > > > > > > > > @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) > > > > > > > > > > > > > > } > > > > > > > > > > > > > > } > > > > > > > > > > > > > > } > > > > > > > > > > > > > > + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; > > > > > > > > > > > > > > + if (unlikely(vdev->start_on_kick && vdev->started)) { > > > > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > > > > + } > > > > > > > > > > > > > > + > > > > > > > > > > > > > > if (k->set_status) { > > > > > > > > > > > > > > k->set_status(vdev, val); > > > > > > > > > > > > > > } > > > > > > > > > > > > > > vdev->status = val; > > > > > > > > > > > > > > + > > > > > > > > > > > > > > return 0; > > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > > > > > @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) > > > > > > > > > > > > > > k->reset(vdev); > > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > > > > > + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && > > > > > > > > > > > > > > + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); > > > > > > > > > > > > > > + vdev->started = false; > > > > > > > > > > > > > > vdev->broken = false; > > > > > > > > > > > > > > vdev->guest_features = 0; > > > > > > > > > > > > > > vdev->queue_sel = 0; > > > > > > > > > > > > > > @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) > > > > > > > > > > > > > > > > > > > > > > > > > > > > static bool virtio_queue_notify_aio_vq(VirtQueue *vq) > > > > > > > > > > > > > > { > > > > > > > > > > > > > > + bool ret = false; > > > > > > > > > > > > > > + > > > > > > > > > > > > > > if (vq->vring.desc && vq->handle_aio_output) { > > > > > > > > > > > > > > VirtIODevice *vdev = vq->vdev; > > > > > > > > > > > > > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > > > > > > > - return vq->handle_aio_output(vdev, vq); > > > > > > > > > > > > > > + ret = vq->handle_aio_output(vdev, vq); > > > > > > > > > > > > > > + > > > > > > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > > > > > > + vdev->started = true; > > > > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > > > > + } > > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > > > > > - return false; > > > > > > > > > > > > > > + return ret; > > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > > > > > static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > > > > > @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) > > > > > > > > > > > > > > > > > > > > > > > > > > > > trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); > > > > > > > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > > > > > > > + > > > > > > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > > > > > > + vdev->started = true; > > > > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > > > > + } > > > > > > > > > > > > > > } > > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > > > > > @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) > > > > > > > > > > > > > > } else if (vq->handle_output) { > > > > > > > > > > > > > > vq->handle_output(vdev, vq); > > > > > > > > > > > > > > } > > > > > > > > > > > > > > + > > > > > > > > > > > > > > + if (unlikely(vdev->start_on_kick)) { > > > > > > > > > > > > > > + vdev->started = true; > > > > > > > > > > > > > > + vdev->start_on_kick = false; > > > > > > > > > > > > > > + } > > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > > > > > uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) > > > > > > > > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > > > > > > > > > > > return vdev->broken; > > > > > > > > > > > > > > } > > > > > > > > > > > > > > > > > > > > > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > > > > > > > > > > > +{ > > > > > > > > > > > > > > + VirtIODevice *vdev = opaque; > > > > > > > > > > > > > > + > > > > > > > > > > > > > > + return vdev->started; > > > > > > > > > > > > > > > > > > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > > > > > > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > > > > > > > > > > > probably always when it comes to live migration. > > > > > > > > > > > > > > > > > > > > > > > > > > My understanding is that we do try to support backward migration though. It > > > > > > > > > > > > > is a regular practice in datacenters to migrate workloads without having to > > > > > > > > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > > > > > > > > > > > many times in the past because customers had filed bugs. > > > > > > > > > > > > > > > > > > > > > > > > > > Cc'ing David for his opinion. > > > > > > > > > > > > > > > > > > > > > > > > Uh.. did you mean to CC me, or Dave Gilbert? > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > Oops... Dave Gilbert indeed, but you're thoughts on that matter are valuable > > > > > > > > > > > as well. I remember being involved in backward migration fixes for spapr > > > > > > > > > > > several times. > > > > > > > > > > > > > > > > > > > > > > > I mean, I think you're right that we should try to maintain backwards > > > > > > > > > > > > migration, but this isn't really my area of authority. > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > Cc'ing Dave Gilbert :) > > > > > > > > > > > > > > > > > > > > Right, I need to maintain backwards migration compatibility; tie the > > > > > > > > > > feature to a machine type so it's only used on newer machine types and > > > > > > > > > > then we'll be safe. > > > > > > > > > > > > > > > > > > > > Having said that, what's the symptom when this goes wrong? > > > > > > > > > > > > > > > > > > > > > > > > > > > > Since the started flag is set as soon as the guest driver begins to use > > > > > > > > > the device and remains so until next reset, the associated subsection is > > > > > > > > > basically always emitted when migrating a booted guest. This causes > > > > > > > > > migration to always fail on the target in this case: > > > > > > > > > > > > > > > > I meant what's the symptom without this patch series at all? > > > > > > > > > > > > > > > > > > > > > > Oh sorry... if I got it right, migrating when the guest first kicked the > > > > > > > device but not set the DRIVE_OK bit yet would result in a guest hang on > > > > > > > the destination. Yongji might elaborate a bit more on that. > > > > > > > > > > > > Hmm - the only thing worse than a migration failing with an error is a > > > > > > migration that fails with a hung guest. > > > > > > > > > > > > If you were sending a new subsection in *only* the case where the guest > > > > > > would definitely hang, then I think it would be worth sending the > > > > > > subsection and allowing the backwards migration to fail with the error > > > > > > because it's a bit better than a hung VM. > > > > > > > > > > > > > > > > I considered this approach before. But it needs to add some complex > > > > > logic in virtio code to make sure migration works well between > > > > > different qemu versions. Since it's hardly to trigger the hung VM bug. > > > > > So I still try to use the general way (tie the feature to a machine > > > > > type) to fix this migration issue. It's easily to ensure the > > > > > compatibility > > > > > > > > OK, great. For reference could you give me a bit more info on how > > > > exactly the failure happens, how you spot it and what the symptoms are; > > > > so that when someone comes to me with a hung VM I know what to look for? > > > > > > > > > > Oh, sure. This failure would only happen on an old guest (e.g. centos > > > 6u5, centos 6u3) with virtio-blk/vhost-user-blk device. This kind of > > > guest might be hung when migration completes at the point that guest > > > is booting (loading virtio-blk driver) because its first I/O would be > > > hung. > > > > OK, thanks. > > But a migration when the VM is running is fine? - it's only a problem > > if it happens during bootup, before the driver starts? > > > > Yes. Running VM would be fine unless we reload the driver manually at > that point... OK, thanks. Dave > Thanks, > Yongji -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Tue, May 28, 2019 at 10:48:09AM +0800, Yongji Xie wrote: > On Tue, 28 May 2019 at 02:54, Michael S. Tsirkin <mst@redhat.com> wrote: > > > > On Mon, May 27, 2019 at 12:44:46PM +0200, Greg Kurz wrote: > > > On Fri, 24 May 2019 19:56:06 +0800 > > > Yongji Xie <elohimes@gmail.com> wrote: > > > > > > > On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote: > > > > > > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com> [...] > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > > > return vdev->broken; > > > > > > } > > > > > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > > > +{ > > > > > > + VirtIODevice *vdev = opaque; > > > > > > + > > > > > > + return vdev->started; > > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > > > probably always when it comes to live migration. > > > > > > > > > > My understanding is that we do try to support backward migration though. It > > > > > is a regular practice in datacenters to migrate workloads without having to > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > > > many times in the past because customers had filed bugs. > > > > > > > > > > > > > If we do need to support backward migration, for this patch, what I > > > > can think of is to only migrate the flag in the case that guest kicks > > > > but not set DRIVER_OK. This could fix backward migration in most case. > > > > > > You mean something like that ? > > > > > > static bool virtio_started_needed(void *opaque) > > > { > > > VirtIODevice *vdev = opaque; > > > > > > return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); > > > } > > > > > > > Not sure if there is a more general approach... > > > > > > > > > > Another approach would be to only implement the started flag for > > > machine version > 4.0. This can be achieved by adding a "use-started" > > > property to the base virtio device, true by default and set to > > > false by hw_compat_4_0. > > > > I think this is best frankly. > > > > Only implement the started flag for machine version > 4.0 might not be > good because vhost-user-blk now need to use this flag. How about only > migrating this flag for machine version > 4.0 instead? Was this implemented? Is migration from QEMU 4.1 to QEMU 4.0 currently broken?
On Fri, 31 May 2019 16:36:33 -0300 Eduardo Habkost <ehabkost@redhat.com> wrote: > On Tue, May 28, 2019 at 10:48:09AM +0800, Yongji Xie wrote: > > On Tue, 28 May 2019 at 02:54, Michael S. Tsirkin <mst@redhat.com> wrote: > > > > > > On Mon, May 27, 2019 at 12:44:46PM +0200, Greg Kurz wrote: > > > > On Fri, 24 May 2019 19:56:06 +0800 > > > > Yongji Xie <elohimes@gmail.com> wrote: > > > > > > > > > On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote: > > > > > > > > > > > > On Mon, 20 May 2019 19:10:35 -0400 > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote: > > > > > > > > > > > > > From: Xie Yongji <xieyongji@baidu.com> > [...] > > > > > > > @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > > > > > > > return vdev->broken; > > > > > > > } > > > > > > > > > > > > > > +static bool virtio_started_needed(void *opaque) > > > > > > > +{ > > > > > > > + VirtIODevice *vdev = opaque; > > > > > > > + > > > > > > > + return vdev->started; > > > > > > > > > > > > Existing machine types don't know about the "virtio/started" subsection. This > > > > > > breaks migration to older QEMUs if the driver has started the device, ie. most > > > > > > probably always when it comes to live migration. > > > > > > > > > > > > My understanding is that we do try to support backward migration though. It > > > > > > is a regular practice in datacenters to migrate workloads without having to > > > > > > take care of the QEMU version. FWIW I had to fix similar issues downstream > > > > > > many times in the past because customers had filed bugs. > > > > > > > > > > > > > > > > If we do need to support backward migration, for this patch, what I > > > > > can think of is to only migrate the flag in the case that guest kicks > > > > > but not set DRIVER_OK. This could fix backward migration in most case. > > > > > > > > You mean something like that ? > > > > > > > > static bool virtio_started_needed(void *opaque) > > > > { > > > > VirtIODevice *vdev = opaque; > > > > > > > > return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); > > > > } > > > > > > > > > Not sure if there is a more general approach... > > > > > > > > > > > > > Another approach would be to only implement the started flag for > > > > machine version > 4.0. This can be achieved by adding a "use-started" > > > > property to the base virtio device, true by default and set to > > > > false by hw_compat_4_0. > > > > > > I think this is best frankly. > > > > > > > Only implement the started flag for machine version > 4.0 might not be > > good because vhost-user-blk now need to use this flag. How about only > > migrating this flag for machine version > 4.0 instead? > > Was this implemented? Is migration from QEMU 4.1 to QEMU 4.0 > currently broken? > Answer is yes for both questions.
On 01/06/2019 17:49, Greg Kurz wrote: > On Fri, 31 May 2019 16:36:33 -0300 > Eduardo Habkost <ehabkost@redhat.com> wrote: > >> On Tue, May 28, 2019 at 10:48:09AM +0800, Yongji Xie wrote: >>> On Tue, 28 May 2019 at 02:54, Michael S. Tsirkin <mst@redhat.com> wrote: >>>> >>>> On Mon, May 27, 2019 at 12:44:46PM +0200, Greg Kurz wrote: >>>>> On Fri, 24 May 2019 19:56:06 +0800 >>>>> Yongji Xie <elohimes@gmail.com> wrote: >>>>> >>>>>> On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote: >>>>>>> >>>>>>> On Mon, 20 May 2019 19:10:35 -0400 >>>>>>> "Michael S. Tsirkin" <mst@redhat.com> wrote: >>>>>>> >>>>>>>> From: Xie Yongji <xieyongji@baidu.com> >> [...] >>>>>>>> @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) >>>>>>>> return vdev->broken; >>>>>>>> } >>>>>>>> >>>>>>>> +static bool virtio_started_needed(void *opaque) >>>>>>>> +{ >>>>>>>> + VirtIODevice *vdev = opaque; >>>>>>>> + >>>>>>>> + return vdev->started; >>>>>>> >>>>>>> Existing machine types don't know about the "virtio/started" subsection. This >>>>>>> breaks migration to older QEMUs if the driver has started the device, ie. most >>>>>>> probably always when it comes to live migration. >>>>>>> >>>>>>> My understanding is that we do try to support backward migration though. It >>>>>>> is a regular practice in datacenters to migrate workloads without having to >>>>>>> take care of the QEMU version. FWIW I had to fix similar issues downstream >>>>>>> many times in the past because customers had filed bugs. >>>>>>> >>>>>> >>>>>> If we do need to support backward migration, for this patch, what I >>>>>> can think of is to only migrate the flag in the case that guest kicks >>>>>> but not set DRIVER_OK. This could fix backward migration in most case. >>>>> >>>>> You mean something like that ? >>>>> >>>>> static bool virtio_started_needed(void *opaque) >>>>> { >>>>> VirtIODevice *vdev = opaque; >>>>> >>>>> return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); >>>>> } >>>>> >>>>>> Not sure if there is a more general approach... >>>>>> >>>>> >>>>> Another approach would be to only implement the started flag for >>>>> machine version > 4.0. This can be achieved by adding a "use-started" >>>>> property to the base virtio device, true by default and set to >>>>> false by hw_compat_4_0. >>>> >>>> I think this is best frankly. >>>> >>> >>> Only implement the started flag for machine version > 4.0 might not be >>> good because vhost-user-blk now need to use this flag. How about only >>> migrating this flag for machine version > 4.0 instead? >> >> Was this implemented? Is migration from QEMU 4.1 to QEMU 4.0 >> currently broken? >> > > Answer is yes for both questions. > Is there a fix? The problem is really easy to reproduce: start a guest with virtio-blk and migrate once the driver has started. Thanks, Laurent
On Mon, 24 Jun 2019 19:54:27 +0200 Laurent Vivier <lvivier@redhat.com> wrote: > On 01/06/2019 17:49, Greg Kurz wrote: > > On Fri, 31 May 2019 16:36:33 -0300 > > Eduardo Habkost <ehabkost@redhat.com> wrote: > > > >> On Tue, May 28, 2019 at 10:48:09AM +0800, Yongji Xie wrote: > >>> On Tue, 28 May 2019 at 02:54, Michael S. Tsirkin <mst@redhat.com> wrote: > >>>> > >>>> On Mon, May 27, 2019 at 12:44:46PM +0200, Greg Kurz wrote: > >>>>> On Fri, 24 May 2019 19:56:06 +0800 > >>>>> Yongji Xie <elohimes@gmail.com> wrote: > >>>>> > >>>>>> On Fri, 24 May 2019 at 18:20, Greg Kurz <groug@kaod.org> wrote: > >>>>>>> > >>>>>>> On Mon, 20 May 2019 19:10:35 -0400 > >>>>>>> "Michael S. Tsirkin" <mst@redhat.com> wrote: > >>>>>>> > >>>>>>>> From: Xie Yongji <xieyongji@baidu.com> > >> [...] > >>>>>>>> @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) > >>>>>>>> return vdev->broken; > >>>>>>>> } > >>>>>>>> > >>>>>>>> +static bool virtio_started_needed(void *opaque) > >>>>>>>> +{ > >>>>>>>> + VirtIODevice *vdev = opaque; > >>>>>>>> + > >>>>>>>> + return vdev->started; > >>>>>>> > >>>>>>> Existing machine types don't know about the "virtio/started" subsection. This > >>>>>>> breaks migration to older QEMUs if the driver has started the device, ie. most > >>>>>>> probably always when it comes to live migration. > >>>>>>> > >>>>>>> My understanding is that we do try to support backward migration though. It > >>>>>>> is a regular practice in datacenters to migrate workloads without having to > >>>>>>> take care of the QEMU version. FWIW I had to fix similar issues downstream > >>>>>>> many times in the past because customers had filed bugs. > >>>>>>> > >>>>>> > >>>>>> If we do need to support backward migration, for this patch, what I > >>>>>> can think of is to only migrate the flag in the case that guest kicks > >>>>>> but not set DRIVER_OK. This could fix backward migration in most case. > >>>>> > >>>>> You mean something like that ? > >>>>> > >>>>> static bool virtio_started_needed(void *opaque) > >>>>> { > >>>>> VirtIODevice *vdev = opaque; > >>>>> > >>>>> return vdev->started && !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); > >>>>> } > >>>>> > >>>>>> Not sure if there is a more general approach... > >>>>>> > >>>>> > >>>>> Another approach would be to only implement the started flag for > >>>>> machine version > 4.0. This can be achieved by adding a "use-started" > >>>>> property to the base virtio device, true by default and set to > >>>>> false by hw_compat_4_0. > >>>> > >>>> I think this is best frankly. > >>>> > >>> > >>> Only implement the started flag for machine version > 4.0 might not be > >>> good because vhost-user-blk now need to use this flag. How about only > >>> migrating this flag for machine version > 4.0 instead? > >> > >> Was this implemented? Is migration from QEMU 4.1 to QEMU 4.0 > >> currently broken? > >> > > > > Answer is yes for both questions. > > > > Is there a fix? > The fix was merged with this PR: https://lists.gnu.org/archive/html/qemu-devel/2019-07/msg01565.html Cheers, -- Greg > The problem is really easy to reproduce: start a guest with virtio-blk > and migrate once the driver has started. > > Thanks, > Laurent
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 2626a895cb..af7b59b4ae 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1162,10 +1162,16 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) } } } + vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; + if (unlikely(vdev->start_on_kick && vdev->started)) { + vdev->start_on_kick = false; + } + if (k->set_status) { k->set_status(vdev, val); } vdev->status = val; + return 0; } @@ -1208,6 +1214,9 @@ void virtio_reset(void *opaque) k->reset(vdev); } + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); + vdev->started = false; vdev->broken = false; vdev->guest_features = 0; vdev->queue_sel = 0; @@ -1518,14 +1527,21 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) static bool virtio_queue_notify_aio_vq(VirtQueue *vq) { + bool ret = false; + if (vq->vring.desc && vq->handle_aio_output) { VirtIODevice *vdev = vq->vdev; trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); - return vq->handle_aio_output(vdev, vq); + ret = vq->handle_aio_output(vdev, vq); + + if (unlikely(vdev->start_on_kick)) { + vdev->started = true; + vdev->start_on_kick = false; + } } - return false; + return ret; } static void virtio_queue_notify_vq(VirtQueue *vq) @@ -1539,6 +1555,11 @@ static void virtio_queue_notify_vq(VirtQueue *vq) trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); vq->handle_output(vdev, vq); + + if (unlikely(vdev->start_on_kick)) { + vdev->started = true; + vdev->start_on_kick = false; + } } } @@ -1556,6 +1577,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) } else if (vq->handle_output) { vq->handle_output(vdev, vq); } + + if (unlikely(vdev->start_on_kick)) { + vdev->started = true; + vdev->start_on_kick = false; + } } uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) @@ -1770,6 +1796,13 @@ static bool virtio_broken_needed(void *opaque) return vdev->broken; } +static bool virtio_started_needed(void *opaque) +{ + VirtIODevice *vdev = opaque; + + return vdev->started; +} + static const VMStateDescription vmstate_virtqueue = { .name = "virtqueue_state", .version_id = 1, @@ -1898,6 +1931,17 @@ static const VMStateDescription vmstate_virtio_broken = { } }; +static const VMStateDescription vmstate_virtio_started = { + .name = "virtio/started", + .version_id = 1, + .minimum_version_id = 1, + .needed = &virtio_started_needed, + .fields = (VMStateField[]) { + VMSTATE_BOOL(started, VirtIODevice), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_virtio = { .name = "virtio", .version_id = 1, @@ -1913,6 +1957,7 @@ static const VMStateDescription vmstate_virtio = { &vmstate_virtio_ringsize, &vmstate_virtio_broken, &vmstate_virtio_extra_state, + &vmstate_virtio_started, NULL } }; @@ -2286,6 +2331,9 @@ void virtio_init(VirtIODevice *vdev, const char *name, g_malloc0(sizeof(*vdev->vector_queues) * nvectors); } + vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); + vdev->started = false; vdev->device_id = device_id; vdev->status = 0; atomic_set(&vdev->isr, 0); diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index ce9516236a..fea08bcc44 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -105,6 +105,8 @@ struct VirtIODevice uint16_t device_id; bool vm_running; bool broken; /* device in invalid state, needs reset */ + bool started; + bool start_on_kick; /* virtio 1.0 transitional devices support that */ VMChangeStateEntry *vmstate; char *bus_name; uint8_t device_endian;