diff mbox

[RFC,v2,3/9] virtio: stop virtqueue processing if device is broken

Message ID 1459267981-23408-4-git-send-email-stefanha@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Stefan Hajnoczi March 29, 2016, 4:12 p.m. UTC
QEMU prints an error message and exits when the device enters an invalid
state.  Terminating the process is heavy-handed.  The guest may still be
able to function even if there is a bug in a virtio guest driver.

Moreover, exiting is a bug in nested virtualization where a nested guest
could DoS other nested guests by killing a pass-through virtio device.
I don't think this configuration is possible today but it is likely in
the future.

If the broken flag is set, do not process virtqueues or write back used
descriptors.  The broken flag can be cleared again by resetting the
device.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/virtio/virtio.c         | 39 +++++++++++++++++++++++++++++++++++++++
 include/hw/virtio/virtio.h |  3 +++
 2 files changed, 42 insertions(+)

Comments

Cornelia Huck March 29, 2016, 4:58 p.m. UTC | #1
On Tue, 29 Mar 2016 17:12:55 +0100
Stefan Hajnoczi <stefanha@redhat.com> wrote:

> QEMU prints an error message and exits when the device enters an invalid
> state.  Terminating the process is heavy-handed.  The guest may still be
> able to function even if there is a bug in a virtio guest driver.
> 
> Moreover, exiting is a bug in nested virtualization where a nested guest
> could DoS other nested guests by killing a pass-through virtio device.
> I don't think this configuration is possible today but it is likely in
> the future.
> 
> If the broken flag is set, do not process virtqueues or write back used
> descriptors.  The broken flag can be cleared again by resetting the
> device.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  hw/virtio/virtio.c         | 39 +++++++++++++++++++++++++++++++++++++++
>  include/hw/virtio/virtio.h |  3 +++
>  2 files changed, 42 insertions(+)

> +void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
> +{
> +    va_list ap;
> +
> +    va_start(ap, fmt);
> +    error_vreport(fmt, ap);
> +    va_end(ap);
> +
> +    vdev->broken = true;
> +
> +    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
> +        vdev->status |= VIRTIO_CONFIG_S_NEEDS_RESET;

virtio_set_status()?

> +        virtio_notify_config(vdev);
> +    }
> +}
> +
>  static void virtio_device_realize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 2b5b248..1565e53 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -87,6 +87,7 @@ struct VirtIODevice
>      VirtQueue *vq;
>      uint16_t device_id;
>      bool vm_running;
> +    bool broken; /* device in invalid state, needs reset */

I'm wondering whether there's a sane way to track the broken state via
the NEEDS_RESET status bit instead. We'd probably want to filter out
this bit and not expose it to legacy drivers; but as the status field
is migrated anyway, we might be able to avoid a subsection for
migration.
Stefan Hajnoczi March 30, 2016, 12:19 p.m. UTC | #2
On Tue, Mar 29, 2016 at 06:58:03PM +0200, Cornelia Huck wrote:
> On Tue, 29 Mar 2016 17:12:55 +0100
> Stefan Hajnoczi <stefanha@redhat.com> wrote:
> > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> > index 2b5b248..1565e53 100644
> > --- a/include/hw/virtio/virtio.h
> > +++ b/include/hw/virtio/virtio.h
> > @@ -87,6 +87,7 @@ struct VirtIODevice
> >      VirtQueue *vq;
> >      uint16_t device_id;
> >      bool vm_running;
> > +    bool broken; /* device in invalid state, needs reset */
> 
> I'm wondering whether there's a sane way to track the broken state via
> the NEEDS_RESET status bit instead. We'd probably want to filter out
> this bit and not expose it to legacy drivers; but as the status field
> is migrated anyway, we might be able to avoid a subsection for
> migration.

If we set the non-VIRTIO 1.0 bit and migrate to an old QEMU that doesn't
filter the bit then the guest will see it.

Therefore I'm in favor of keeping vdev->broken separate from the status
bit.  The subsection only needs to be sent when the bit is set.  Only
migration of a broken device to an old QEMU will fail.  All other cases
continue to work so the subsection doesn't impose much incompatibility.

Stefan
Stefan Hajnoczi March 30, 2016, 12:21 p.m. UTC | #3
On Tue, Mar 29, 2016 at 06:58:03PM +0200, Cornelia Huck wrote:
> On Tue, 29 Mar 2016 17:12:55 +0100
> Stefan Hajnoczi <stefanha@redhat.com> wrote:
> 
> > QEMU prints an error message and exits when the device enters an invalid
> > state.  Terminating the process is heavy-handed.  The guest may still be
> > able to function even if there is a bug in a virtio guest driver.
> > 
> > Moreover, exiting is a bug in nested virtualization where a nested guest
> > could DoS other nested guests by killing a pass-through virtio device.
> > I don't think this configuration is possible today but it is likely in
> > the future.
> > 
> > If the broken flag is set, do not process virtqueues or write back used
> > descriptors.  The broken flag can be cleared again by resetting the
> > device.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  hw/virtio/virtio.c         | 39 +++++++++++++++++++++++++++++++++++++++
> >  include/hw/virtio/virtio.h |  3 +++
> >  2 files changed, 42 insertions(+)
> 
> > +void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
> > +{
> > +    va_list ap;
> > +
> > +    va_start(ap, fmt);
> > +    error_vreport(fmt, ap);
> > +    va_end(ap);
> > +
> > +    vdev->broken = true;
> > +
> > +    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
> > +        vdev->status |= VIRTIO_CONFIG_S_NEEDS_RESET;
> 
> virtio_set_status()?

Will fix in next revision
diff mbox

Patch

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index de8a3b3..ff2c736 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -276,6 +276,10 @@  void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 
     virtqueue_unmap_sg(vq, elem, len);
 
+    if (unlikely(vq->vdev->broken)) {
+        return;
+    }
+
     idx = (idx + vq->used_idx) % vq->vring.num;
 
     uelem.id = elem->index;
@@ -286,6 +290,12 @@  void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 void virtqueue_flush(VirtQueue *vq, unsigned int count)
 {
     uint16_t old, new;
+
+    if (unlikely(vq->vdev->broken)) {
+        vq->inuse -= count;
+        return;
+    }
+
     /* Make sure buffer is written before we update index. */
     smp_wmb();
     trace_virtqueue_flush(vq, count);
@@ -546,6 +556,9 @@  void *virtqueue_pop(VirtQueue *vq, size_t sz)
     struct iovec iov[VIRTQUEUE_MAX_SIZE];
     VRingDesc desc;
 
+    if (unlikely(vdev->broken)) {
+        return NULL;
+    }
     if (virtio_queue_empty(vq)) {
         return NULL;
     }
@@ -705,6 +718,10 @@  static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 
+    if (unlikely(vdev->broken)) {
+        return;
+    }
+
     if (k->notify) {
         k->notify(qbus->parent, vector);
     }
@@ -788,6 +805,7 @@  void virtio_reset(void *opaque)
         k->reset(vdev);
     }
 
+    vdev->broken = false;
     vdev->guest_features = 0;
     vdev->queue_sel = 0;
     vdev->status = 0;
@@ -1091,6 +1109,10 @@  void virtio_queue_notify_vq(VirtQueue *vq)
     if (vq->vring.desc && vq->handle_output) {
         VirtIODevice *vdev = vq->vdev;
 
+        if (unlikely(vdev->broken)) {
+            return;
+        }
+
         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
         vq->handle_output(vdev, vq);
     }
@@ -1661,6 +1683,7 @@  void virtio_init(VirtIODevice *vdev, const char *name,
     vdev->config_vector = VIRTIO_NO_VECTOR;
     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
     vdev->vm_running = runstate_is_running();
+    vdev->broken = false;
     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
         vdev->vq[i].vdev = vdev;
@@ -1829,6 +1852,22 @@  void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
     vdev->bus_name = g_strdup(bus_name);
 }
 
+void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_vreport(fmt, ap);
+    va_end(ap);
+
+    vdev->broken = true;
+
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+        vdev->status |= VIRTIO_CONFIG_S_NEEDS_RESET;
+        virtio_notify_config(vdev);
+    }
+}
+
 static void virtio_device_realize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 2b5b248..1565e53 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -87,6 +87,7 @@  struct VirtIODevice
     VirtQueue *vq;
     uint16_t device_id;
     bool vm_running;
+    bool broken; /* device in invalid state, needs reset */
     VMChangeStateEntry *vmstate;
     char *bus_name;
     uint8_t device_endian;
@@ -135,6 +136,8 @@  void virtio_init(VirtIODevice *vdev, const char *name,
                          uint16_t device_id, size_t config_size);
 void virtio_cleanup(VirtIODevice *vdev);
 
+void virtio_error(VirtIODevice *vdev, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
+
 /* Set the child bus name. */
 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name);