diff mbox

[1/3] virtio: add virtio_detach_element()

Message ID 1474291685-24226-2-git-send-email-stefanha@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Stefan Hajnoczi Sept. 19, 2016, 1:28 p.m. UTC
During device reset or similar situations a VirtQueueElement needs to be
freed without pushing it onto the used ring or rewinding the virtqueue.
Extract a new function to do this.

Later patches add virtio_detach_element() calls to existing device so
that scatter-gather lists are unmapped and vq->inuse goes back to zero
during device reset.  Currently some devices don't bother and simply
call g_free(elem) which is not a clean way to throw away a
VirtQueueElement.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/virtio/virtio.c         | 27 +++++++++++++++++++++++++--
 include/hw/virtio/virtio.h |  2 ++
 2 files changed, 27 insertions(+), 2 deletions(-)

Comments

Greg Kurz Sept. 26, 2016, 8:43 a.m. UTC | #1
On Mon, 19 Sep 2016 14:28:03 +0100
Stefan Hajnoczi <stefanha@redhat.com> wrote:

> During device reset or similar situations a VirtQueueElement needs to be
> freed without pushing it onto the used ring or rewinding the virtqueue.
> Extract a new function to do this.
> 
> Later patches add virtio_detach_element() calls to existing device so
> that scatter-gather lists are unmapped and vq->inuse goes back to zero
> during device reset.  Currently some devices don't bother and simply
> call g_free(elem) which is not a clean way to throw away a
> VirtQueueElement.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---

FWIW

Acked-by: Greg Kurz <groug@kaod.org>

>  hw/virtio/virtio.c         | 27 +++++++++++++++++++++++++--
>  include/hw/virtio/virtio.h |  2 ++
>  2 files changed, 27 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index fcf3358..adcef45 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -264,12 +264,35 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
>                                    0, elem->out_sg[i].iov_len);
>  }
>  
> +/* virtqueue_detach_element:
> + * @vq: The #VirtQueue
> + * @elem: The #VirtQueueElement
> + * @len: number of bytes written
> + *
> + * Detach the element from the virtqueue.  This function is suitable for device
> + * reset or other situations where a #VirtQueueElement is simply freed and will
> + * not be pushed or discarded.
> + */
> +void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
> +                              unsigned int len)
> +{
> +    vq->inuse--;
> +    virtqueue_unmap_sg(vq, elem, len);
> +}
> +
> +/* virtqueue_discard:
> + * @vq: The #VirtQueue
> + * @elem: The #VirtQueueElement
> + * @len: number of bytes written
> + *
> + * Pretend the most recent element wasn't popped from the virtqueue.  The next
> + * call to virtqueue_pop() will refetch the element.
> + */
>  void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
>                         unsigned int len)
>  {
>      vq->last_avail_idx--;
> -    vq->inuse--;
> -    virtqueue_unmap_sg(vq, elem, len);
> +    virtqueue_detach_element(vq, elem, len);
>  }
>  
>  /* virtqueue_rewind:
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index f05559d..ad1e2d6 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -152,6 +152,8 @@ void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num);
>  void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
>                      unsigned int len);
>  void virtqueue_flush(VirtQueue *vq, unsigned int count);
> +void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
> +                              unsigned int len);
>  void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
>                         unsigned int len);
>  bool virtqueue_rewind(VirtQueue *vq, unsigned int num);
Ladi Prosek Sept. 27, 2016, 7:32 a.m. UTC | #2
On Mon, Sep 19, 2016 at 3:28 PM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> During device reset or similar situations a VirtQueueElement needs to be
> freed without pushing it onto the used ring or rewinding the virtqueue.
> Extract a new function to do this.
>
> Later patches add virtio_detach_element() calls to existing device so
> that scatter-gather lists are unmapped and vq->inuse goes back to zero
> during device reset.  Currently some devices don't bother and simply
> call g_free(elem) which is not a clean way to throw away a
> VirtQueueElement.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  hw/virtio/virtio.c         | 27 +++++++++++++++++++++++++--
>  include/hw/virtio/virtio.h |  2 ++
>  2 files changed, 27 insertions(+), 2 deletions(-)
>
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index fcf3358..adcef45 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -264,12 +264,35 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
>                                    0, elem->out_sg[i].iov_len);
>  }
>
> +/* virtqueue_detach_element:
> + * @vq: The #VirtQueue
> + * @elem: The #VirtQueueElement
> + * @len: number of bytes written
> + *
> + * Detach the element from the virtqueue.  This function is suitable for device
> + * reset or other situations where a #VirtQueueElement is simply freed and will
> + * not be pushed or discarded.
> + */
> +void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
> +                              unsigned int len)
> +{
> +    vq->inuse--;
> +    virtqueue_unmap_sg(vq, elem, len);
> +}
> +
> +/* virtqueue_discard:
> + * @vq: The #VirtQueue
> + * @elem: The #VirtQueueElement
> + * @len: number of bytes written
> + *
> + * Pretend the most recent element wasn't popped from the virtqueue.  The next
> + * call to virtqueue_pop() will refetch the element.
> + */
>  void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
>                         unsigned int len)
>  {
>      vq->last_avail_idx--;
> -    vq->inuse--;
> -    virtqueue_unmap_sg(vq, elem, len);
> +    virtqueue_detach_element(vq, elem, len);

Random comment, not directly related to this change. Would it be worth
adding an assert to this function that elem->index and
vq->last_avail_idx match? In other words, enforce the "most recent"
qualifier mentioned in the comment. As more virtqueue_* functions are
added and the complexity goes up, it is easy to get confused. Also, I
think that naming this function virtqueue_unpop instead of
virtqueue_discard would help.

>  }
>
>  /* virtqueue_rewind:
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index f05559d..ad1e2d6 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -152,6 +152,8 @@ void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num);
>  void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
>                      unsigned int len);
>  void virtqueue_flush(VirtQueue *vq, unsigned int count);
> +void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
> +                              unsigned int len);
>  void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
>                         unsigned int len);
>  bool virtqueue_rewind(VirtQueue *vq, unsigned int num);
> --
> 2.7.4
>

Reviewed-by: Ladi Prosek <lprosek@redhat.com>

Thanks!
Ladi
Stefan Hajnoczi Sept. 27, 2016, 10:08 a.m. UTC | #3
On Tue, Sep 27, 2016 at 09:32:40AM +0200, Ladi Prosek wrote:
> On Mon, Sep 19, 2016 at 3:28 PM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> > During device res> > +/* virtqueue_discard:
> > + * @vq: The #VirtQueue
> > + * @elem: The #VirtQueueElement
> > + * @len: number of bytes written
> > + *
> > + * Pretend the most recent element wasn't popped from the virtqueue.  The next
> > + * call to virtqueue_pop() will refetch the element.
> > + */
> >  void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
> >                         unsigned int len)
> >  {
> >      vq->last_avail_idx--;
> > -    vq->inuse--;
> > -    virtqueue_unmap_sg(vq, elem, len);
> > +    virtqueue_detach_element(vq, elem, len);
> 
> Random comment, not directly related to this change. Would it be worth
> adding an assert to this function that elem->index and
> vq->last_avail_idx match? In other words, enforce the "most recent"
> qualifier mentioned in the comment. As more virtqueue_* functions are
> added and the complexity goes up, it is easy to get confused. Also, I
> think that naming this function virtqueue_unpop instead of
> virtqueue_discard would help.

elem->index is a descriptor ring index.  vq->last_avail_idx is an index
into the available ring.  They are different but your suggestion makes
sense in general.

We shouldn't read from vring memory again for an assertion so
deferencing the available ring isn't possible (because we cannot rely on
vring memory contents after processing the request).  One way to
implement the assertion is to put VirtQueueElements on a linked list
(vq->inuse_elems) but that probably needs live migration support.

I agree that renaming to unpop makes the semantics clearer.

Would you like to submit a patch for either or both?
Ladi Prosek Sept. 27, 2016, 12:12 p.m. UTC | #4
On Tue, Sep 27, 2016 at 12:08 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Tue, Sep 27, 2016 at 09:32:40AM +0200, Ladi Prosek wrote:
>> On Mon, Sep 19, 2016 at 3:28 PM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>> > During device res> > +/* virtqueue_discard:
>> > + * @vq: The #VirtQueue
>> > + * @elem: The #VirtQueueElement
>> > + * @len: number of bytes written
>> > + *
>> > + * Pretend the most recent element wasn't popped from the virtqueue.  The next
>> > + * call to virtqueue_pop() will refetch the element.
>> > + */
>> >  void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
>> >                         unsigned int len)
>> >  {
>> >      vq->last_avail_idx--;
>> > -    vq->inuse--;
>> > -    virtqueue_unmap_sg(vq, elem, len);
>> > +    virtqueue_detach_element(vq, elem, len);
>>
>> Random comment, not directly related to this change. Would it be worth
>> adding an assert to this function that elem->index and
>> vq->last_avail_idx match? In other words, enforce the "most recent"
>> qualifier mentioned in the comment. As more virtqueue_* functions are
>> added and the complexity goes up, it is easy to get confused. Also, I
>> think that naming this function virtqueue_unpop instead of
>> virtqueue_discard would help.
>
> elem->index is a descriptor ring index.  vq->last_avail_idx is an index
> into the available ring.  They are different but your suggestion makes
> sense in general.

Oh, right, I didn't mean they would be identical but something like this:

  g_assert(elem->index == virtqueue_get_head(vq, vq->last_avail_idx));

> We shouldn't read from vring memory again for an assertion so
> deferencing the available ring isn't possible (because we cannot rely on
> vring memory contents after processing the request).

Not sure I follow, shouldn't available ring memory at that index still
be the same? Basically I'd like to assert that the next virtqueue_pop
would return the same element.

> One way to
> implement the assertion is to put VirtQueueElements on a linked list
> (vq->inuse_elems) but that probably needs live migration support.
>
> I agree that renaming to unpop makes the semantics clearer.
>
> Would you like to submit a patch for either or both?

Yes, I'll do both.
Stefan Hajnoczi Sept. 27, 2016, 3:03 p.m. UTC | #5
On Tue, Sep 27, 2016 at 02:12:09PM +0200, Ladi Prosek wrote:
> On Tue, Sep 27, 2016 at 12:08 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> > On Tue, Sep 27, 2016 at 09:32:40AM +0200, Ladi Prosek wrote:
> >> On Mon, Sep 19, 2016 at 3:28 PM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> >> > During device res> > +/* virtqueue_discard:
> >> > + * @vq: The #VirtQueue
> >> > + * @elem: The #VirtQueueElement
> >> > + * @len: number of bytes written
> >> > + *
> >> > + * Pretend the most recent element wasn't popped from the virtqueue.  The next
> >> > + * call to virtqueue_pop() will refetch the element.
> >> > + */
> >> >  void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
> >> >                         unsigned int len)
> >> >  {
> >> >      vq->last_avail_idx--;
> >> > -    vq->inuse--;
> >> > -    virtqueue_unmap_sg(vq, elem, len);
> >> > +    virtqueue_detach_element(vq, elem, len);
> >>
> >> Random comment, not directly related to this change. Would it be worth
> >> adding an assert to this function that elem->index and
> >> vq->last_avail_idx match? In other words, enforce the "most recent"
> >> qualifier mentioned in the comment. As more virtqueue_* functions are
> >> added and the complexity goes up, it is easy to get confused. Also, I
> >> think that naming this function virtqueue_unpop instead of
> >> virtqueue_discard would help.
> >
> > elem->index is a descriptor ring index.  vq->last_avail_idx is an index
> > into the available ring.  They are different but your suggestion makes
> > sense in general.
> 
> Oh, right, I didn't mean they would be identical but something like this:
> 
>   g_assert(elem->index == virtqueue_get_head(vq, vq->last_avail_idx));
> 
> > We shouldn't read from vring memory again for an assertion so
> > deferencing the available ring isn't possible (because we cannot rely on
> > vring memory contents after processing the request).
> 
> Not sure I follow, shouldn't available ring memory at that index still
> be the same? Basically I'd like to assert that the next virtqueue_pop
> would return the same element.

Assertions cannot be guest-triggerable.  The guest can make the
assertion fail by writing a new value to the available ring.

That might not sound like an issue but consider a scenario where the
virtio PCI device is passed through to a nested guest.  Now the nested
guest can kill the parent hypervisor and all sibling VMs.

Stefan
Ladi Prosek Sept. 27, 2016, 3:24 p.m. UTC | #6
On Tue, Sep 27, 2016 at 5:03 PM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> On Tue, Sep 27, 2016 at 02:12:09PM +0200, Ladi Prosek wrote:
>> On Tue, Sep 27, 2016 at 12:08 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> > On Tue, Sep 27, 2016 at 09:32:40AM +0200, Ladi Prosek wrote:
>> >> On Mon, Sep 19, 2016 at 3:28 PM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>> >> > During device res> > +/* virtqueue_discard:
>> >> > + * @vq: The #VirtQueue
>> >> > + * @elem: The #VirtQueueElement
>> >> > + * @len: number of bytes written
>> >> > + *
>> >> > + * Pretend the most recent element wasn't popped from the virtqueue.  The next
>> >> > + * call to virtqueue_pop() will refetch the element.
>> >> > + */
>> >> >  void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
>> >> >                         unsigned int len)
>> >> >  {
>> >> >      vq->last_avail_idx--;
>> >> > -    vq->inuse--;
>> >> > -    virtqueue_unmap_sg(vq, elem, len);
>> >> > +    virtqueue_detach_element(vq, elem, len);
>> >>
>> >> Random comment, not directly related to this change. Would it be worth
>> >> adding an assert to this function that elem->index and
>> >> vq->last_avail_idx match? In other words, enforce the "most recent"
>> >> qualifier mentioned in the comment. As more virtqueue_* functions are
>> >> added and the complexity goes up, it is easy to get confused. Also, I
>> >> think that naming this function virtqueue_unpop instead of
>> >> virtqueue_discard would help.
>> >
>> > elem->index is a descriptor ring index.  vq->last_avail_idx is an index
>> > into the available ring.  They are different but your suggestion makes
>> > sense in general.
>>
>> Oh, right, I didn't mean they would be identical but something like this:
>>
>>   g_assert(elem->index == virtqueue_get_head(vq, vq->last_avail_idx));
>>
>> > We shouldn't read from vring memory again for an assertion so
>> > deferencing the available ring isn't possible (because we cannot rely on
>> > vring memory contents after processing the request).
>>
>> Not sure I follow, shouldn't available ring memory at that index still
>> be the same? Basically I'd like to assert that the next virtqueue_pop
>> would return the same element.
>
> Assertions cannot be guest-triggerable.  The guest can make the
> assertion fail by writing a new value to the available ring.
>
> That might not sound like an issue but consider a scenario where the
> virtio PCI device is passed through to a nested guest.  Now the nested
> guest can kill the parent hypervisor and all sibling VMs.

Got it, all clear now, thanks!

> Stefan
diff mbox

Patch

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index fcf3358..adcef45 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -264,12 +264,35 @@  static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
                                   0, elem->out_sg[i].iov_len);
 }
 
+/* virtqueue_detach_element:
+ * @vq: The #VirtQueue
+ * @elem: The #VirtQueueElement
+ * @len: number of bytes written
+ *
+ * Detach the element from the virtqueue.  This function is suitable for device
+ * reset or other situations where a #VirtQueueElement is simply freed and will
+ * not be pushed or discarded.
+ */
+void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
+                              unsigned int len)
+{
+    vq->inuse--;
+    virtqueue_unmap_sg(vq, elem, len);
+}
+
+/* virtqueue_discard:
+ * @vq: The #VirtQueue
+ * @elem: The #VirtQueueElement
+ * @len: number of bytes written
+ *
+ * Pretend the most recent element wasn't popped from the virtqueue.  The next
+ * call to virtqueue_pop() will refetch the element.
+ */
 void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
                        unsigned int len)
 {
     vq->last_avail_idx--;
-    vq->inuse--;
-    virtqueue_unmap_sg(vq, elem, len);
+    virtqueue_detach_element(vq, elem, len);
 }
 
 /* virtqueue_rewind:
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index f05559d..ad1e2d6 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -152,6 +152,8 @@  void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num);
 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
                     unsigned int len);
 void virtqueue_flush(VirtQueue *vq, unsigned int count);
+void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
+                              unsigned int len);
 void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
                        unsigned int len);
 bool virtqueue_rewind(VirtQueue *vq, unsigned int num);