diff mbox

[RFC,2/7] virtio: stop virtqueue processing if device is broken

Message ID 1458842214-11450-3-git-send-email-stefanha@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Stefan Hajnoczi March 24, 2016, 5:56 p.m. UTC
QEMU prints an error message and exits when the device enters an invalid
state.  Terminating the process is heavy-handed.  The guest may still be
able to function even if there is a bug in a virtio guest driver.

Moreover, exiting is a bug in nested virtualization where a nested guest
could DoS other nested guests by killing a pass-through virtio device.
I don't think this configuration is possible today but it is likely in
the future.

If the broken flag is set, do not process virtqueues or write back used
descriptors.  The broken flag can be cleared again by resetting the
device.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/virtio/virtio.c         | 34 ++++++++++++++++++++++++++++++++++
 include/hw/virtio/virtio.h |  3 +++
 2 files changed, 37 insertions(+)

Comments

Fam Zheng March 25, 2016, 6:48 a.m. UTC | #1
On Thu, 03/24 17:56, Stefan Hajnoczi wrote:
> QEMU prints an error message and exits when the device enters an invalid
> state.  Terminating the process is heavy-handed.  The guest may still be
> able to function even if there is a bug in a virtio guest driver.
> 
> Moreover, exiting is a bug in nested virtualization where a nested guest
> could DoS other nested guests by killing a pass-through virtio device.
> I don't think this configuration is possible today but it is likely in
> the future.
> 
> If the broken flag is set, do not process virtqueues or write back used
> descriptors.  The broken flag can be cleared again by resetting the
> device.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  hw/virtio/virtio.c         | 34 ++++++++++++++++++++++++++++++++++
>  include/hw/virtio/virtio.h |  3 +++
>  2 files changed, 37 insertions(+)
> 
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index de8a3b3..8fac47c 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -276,6 +276,10 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
>  
>      virtqueue_unmap_sg(vq, elem, len);
>  
> +    if (unlikely(vq->vdev->broken)) {
> +        return;
> +    }
> +
>      idx = (idx + vq->used_idx) % vq->vring.num;
>  
>      uelem.id = elem->index;
> @@ -286,6 +290,12 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
>  void virtqueue_flush(VirtQueue *vq, unsigned int count)
>  {
>      uint16_t old, new;
> +
> +    if (unlikely(vq->vdev->broken)) {
> +        vq->inuse -= count;
> +        return;
> +    }
> +
>      /* Make sure buffer is written before we update index. */
>      smp_wmb();
>      trace_virtqueue_flush(vq, count);
> @@ -546,6 +556,9 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
>      struct iovec iov[VIRTQUEUE_MAX_SIZE];
>      VRingDesc desc;
>  
> +    if (unlikely(vdev->broken)) {
> +        return NULL;
> +    }
>      if (virtio_queue_empty(vq)) {
>          return NULL;
>      }
> @@ -705,6 +718,10 @@ static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
>      BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
>      VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
>  
> +    if (unlikely(vdev->broken)) {
> +        return;
> +    }
> +
>      if (k->notify) {
>          k->notify(qbus->parent, vector);
>      }
> @@ -788,6 +805,7 @@ void virtio_reset(void *opaque)
>          k->reset(vdev);
>      }
>  
> +    vdev->broken = false;
>      vdev->guest_features = 0;
>      vdev->queue_sel = 0;
>      vdev->status = 0;
> @@ -1091,6 +1109,10 @@ void virtio_queue_notify_vq(VirtQueue *vq)
>      if (vq->vring.desc && vq->handle_output) {
>          VirtIODevice *vdev = vq->vdev;
>  
> +        if (unlikely(vdev->broken)) {
> +            return;
> +        }
> +
>          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
>          vq->handle_output(vdev, vq);
>      }
> @@ -1661,6 +1683,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
>      vdev->config_vector = VIRTIO_NO_VECTOR;
>      vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
>      vdev->vm_running = runstate_is_running();
> +    vdev->broken = false;
>      for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
>          vdev->vq[i].vector = VIRTIO_NO_VECTOR;
>          vdev->vq[i].vdev = vdev;
> @@ -1829,6 +1852,17 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
>      vdev->bus_name = g_strdup(bus_name);
>  }
>  
> +void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
> +{
> +    va_list ap;
> +
> +    va_start(ap, fmt);
> +    error_vreport(fmt, ap);
> +    va_end(ap);
> +
> +    vdev->broken = true;
> +}
> +

Since it's not used by the rest of the patch and it is independent, Probably
split this change to a separate patch?

>  static void virtio_device_realize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 2b5b248..1565e53 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -87,6 +87,7 @@ struct VirtIODevice
>      VirtQueue *vq;
>      uint16_t device_id;
>      bool vm_running;
> +    bool broken; /* device in invalid state, needs reset */
>      VMChangeStateEntry *vmstate;
>      char *bus_name;
>      uint8_t device_endian;
> @@ -135,6 +136,8 @@ void virtio_init(VirtIODevice *vdev, const char *name,
>                           uint16_t device_id, size_t config_size);
>  void virtio_cleanup(VirtIODevice *vdev);
>  
> +void virtio_error(VirtIODevice *vdev, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
> +
>  /* Set the child bus name. */
>  void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name);
>  
> -- 
> 2.5.5
>
Cornelia Huck March 29, 2016, 7:52 a.m. UTC | #2
On Thu, 24 Mar 2016 17:56:49 +0000
Stefan Hajnoczi <stefanha@redhat.com> wrote:

> QEMU prints an error message and exits when the device enters an invalid
> state.  Terminating the process is heavy-handed.  The guest may still be
> able to function even if there is a bug in a virtio guest driver.
> 
> Moreover, exiting is a bug in nested virtualization where a nested guest
> could DoS other nested guests by killing a pass-through virtio device.
> I don't think this configuration is possible today but it is likely in
> the future.
> 
> If the broken flag is set, do not process virtqueues or write back used
> descriptors.  The broken flag can be cleared again by resetting the
> device.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  hw/virtio/virtio.c         | 34 ++++++++++++++++++++++++++++++++++
>  include/hw/virtio/virtio.h |  3 +++
>  2 files changed, 37 insertions(+)

> +void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
> +{
> +    va_list ap;
> +
> +    va_start(ap, fmt);
> +    error_vreport(fmt, ap);
> +    va_end(ap);
> +
> +    vdev->broken = true;

We should set the NEEDS_RESET status flag for virtio-1 devices here, I
think.

> +}
> +
Stefan Hajnoczi March 29, 2016, 11:12 a.m. UTC | #3
On Fri, Mar 25, 2016 at 02:48:37PM +0800, Fam Zheng wrote:
> On Thu, 03/24 17:56, Stefan Hajnoczi wrote:
> > QEMU prints an error message and exits when the device enters an invalid
> > state.  Terminating the process is heavy-handed.  The guest may still be
> > able to function even if there is a bug in a virtio guest driver.
> > 
> > Moreover, exiting is a bug in nested virtualization where a nested guest
> > could DoS other nested guests by killing a pass-through virtio device.
> > I don't think this configuration is possible today but it is likely in
> > the future.
> > 
> > If the broken flag is set, do not process virtqueues or write back used
> > descriptors.  The broken flag can be cleared again by resetting the
> > device.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  hw/virtio/virtio.c         | 34 ++++++++++++++++++++++++++++++++++
> >  include/hw/virtio/virtio.h |  3 +++
> >  2 files changed, 37 insertions(+)
> > 
> > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > index de8a3b3..8fac47c 100644
> > --- a/hw/virtio/virtio.c
> > +++ b/hw/virtio/virtio.c
> > @@ -276,6 +276,10 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
> >  
> >      virtqueue_unmap_sg(vq, elem, len);
> >  
> > +    if (unlikely(vq->vdev->broken)) {
> > +        return;
> > +    }
> > +
> >      idx = (idx + vq->used_idx) % vq->vring.num;
> >  
> >      uelem.id = elem->index;
> > @@ -286,6 +290,12 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
> >  void virtqueue_flush(VirtQueue *vq, unsigned int count)
> >  {
> >      uint16_t old, new;
> > +
> > +    if (unlikely(vq->vdev->broken)) {
> > +        vq->inuse -= count;
> > +        return;
> > +    }
> > +
> >      /* Make sure buffer is written before we update index. */
> >      smp_wmb();
> >      trace_virtqueue_flush(vq, count);
> > @@ -546,6 +556,9 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
> >      struct iovec iov[VIRTQUEUE_MAX_SIZE];
> >      VRingDesc desc;
> >  
> > +    if (unlikely(vdev->broken)) {
> > +        return NULL;
> > +    }
> >      if (virtio_queue_empty(vq)) {
> >          return NULL;
> >      }
> > @@ -705,6 +718,10 @@ static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
> >      BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
> >      VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
> >  
> > +    if (unlikely(vdev->broken)) {
> > +        return;
> > +    }
> > +
> >      if (k->notify) {
> >          k->notify(qbus->parent, vector);
> >      }
> > @@ -788,6 +805,7 @@ void virtio_reset(void *opaque)
> >          k->reset(vdev);
> >      }
> >  
> > +    vdev->broken = false;
> >      vdev->guest_features = 0;
> >      vdev->queue_sel = 0;
> >      vdev->status = 0;
> > @@ -1091,6 +1109,10 @@ void virtio_queue_notify_vq(VirtQueue *vq)
> >      if (vq->vring.desc && vq->handle_output) {
> >          VirtIODevice *vdev = vq->vdev;
> >  
> > +        if (unlikely(vdev->broken)) {
> > +            return;
> > +        }
> > +
> >          trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
> >          vq->handle_output(vdev, vq);
> >      }
> > @@ -1661,6 +1683,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
> >      vdev->config_vector = VIRTIO_NO_VECTOR;
> >      vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
> >      vdev->vm_running = runstate_is_running();
> > +    vdev->broken = false;
> >      for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
> >          vdev->vq[i].vector = VIRTIO_NO_VECTOR;
> >          vdev->vq[i].vdev = vdev;
> > @@ -1829,6 +1852,17 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
> >      vdev->bus_name = g_strdup(bus_name);
> >  }
> >  
> > +void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
> > +{
> > +    va_list ap;
> > +
> > +    va_start(ap, fmt);
> > +    error_vreport(fmt, ap);
> > +    va_end(ap);
> > +
> > +    vdev->broken = true;
> > +}
> > +
> 
> Since it's not used by the rest of the patch and it is independent, Probably
> split this change to a separate patch?

The broken flag is introduced here and without this hunk nothing ever
writes to it.  If you had a static analysis tool it might even warn
about the unused field without virtoi_error().

I did consider introducing virtio_error() in another patch before
sending but I think having it here makes this patch more
self-explanatory.  It doesn't keep the reviewer guessing about how the
broken flag will be set in later patches.

Stefan
Stefan Hajnoczi March 29, 2016, 11:14 a.m. UTC | #4
On Tue, Mar 29, 2016 at 09:52:11AM +0200, Cornelia Huck wrote:
> On Thu, 24 Mar 2016 17:56:49 +0000
> Stefan Hajnoczi <stefanha@redhat.com> wrote:
> > +void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
> > +{
> > +    va_list ap;
> > +
> > +    va_start(ap, fmt);
> > +    error_vreport(fmt, ap);
> > +    va_end(ap);
> > +
> > +    vdev->broken = true;
> 
> We should set the NEEDS_RESET status flag for virtio-1 devices here, I
> think.

Will do, thanks.

Stefan
diff mbox

Patch

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index de8a3b3..8fac47c 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -276,6 +276,10 @@  void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 
     virtqueue_unmap_sg(vq, elem, len);
 
+    if (unlikely(vq->vdev->broken)) {
+        return;
+    }
+
     idx = (idx + vq->used_idx) % vq->vring.num;
 
     uelem.id = elem->index;
@@ -286,6 +290,12 @@  void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 void virtqueue_flush(VirtQueue *vq, unsigned int count)
 {
     uint16_t old, new;
+
+    if (unlikely(vq->vdev->broken)) {
+        vq->inuse -= count;
+        return;
+    }
+
     /* Make sure buffer is written before we update index. */
     smp_wmb();
     trace_virtqueue_flush(vq, count);
@@ -546,6 +556,9 @@  void *virtqueue_pop(VirtQueue *vq, size_t sz)
     struct iovec iov[VIRTQUEUE_MAX_SIZE];
     VRingDesc desc;
 
+    if (unlikely(vdev->broken)) {
+        return NULL;
+    }
     if (virtio_queue_empty(vq)) {
         return NULL;
     }
@@ -705,6 +718,10 @@  static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 
+    if (unlikely(vdev->broken)) {
+        return;
+    }
+
     if (k->notify) {
         k->notify(qbus->parent, vector);
     }
@@ -788,6 +805,7 @@  void virtio_reset(void *opaque)
         k->reset(vdev);
     }
 
+    vdev->broken = false;
     vdev->guest_features = 0;
     vdev->queue_sel = 0;
     vdev->status = 0;
@@ -1091,6 +1109,10 @@  void virtio_queue_notify_vq(VirtQueue *vq)
     if (vq->vring.desc && vq->handle_output) {
         VirtIODevice *vdev = vq->vdev;
 
+        if (unlikely(vdev->broken)) {
+            return;
+        }
+
         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
         vq->handle_output(vdev, vq);
     }
@@ -1661,6 +1683,7 @@  void virtio_init(VirtIODevice *vdev, const char *name,
     vdev->config_vector = VIRTIO_NO_VECTOR;
     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
     vdev->vm_running = runstate_is_running();
+    vdev->broken = false;
     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
         vdev->vq[i].vdev = vdev;
@@ -1829,6 +1852,17 @@  void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
     vdev->bus_name = g_strdup(bus_name);
 }
 
+void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_vreport(fmt, ap);
+    va_end(ap);
+
+    vdev->broken = true;
+}
+
 static void virtio_device_realize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 2b5b248..1565e53 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -87,6 +87,7 @@  struct VirtIODevice
     VirtQueue *vq;
     uint16_t device_id;
     bool vm_running;
+    bool broken; /* device in invalid state, needs reset */
     VMChangeStateEntry *vmstate;
     char *bus_name;
     uint8_t device_endian;
@@ -135,6 +136,8 @@  void virtio_init(VirtIODevice *vdev, const char *name,
                          uint16_t device_id, size_t config_size);
 void virtio_cleanup(VirtIODevice *vdev);
 
+void virtio_error(VirtIODevice *vdev, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
+
 /* Set the child bus name. */
 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name);