diff mbox

[1/9] virtio: add functions for piecewise addition of buffers

Message ID 1360671815-2135-2-git-send-email-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paolo Bonzini Feb. 12, 2013, 12:23 p.m. UTC
virtio device drivers translate requests from higher layer in two steps:
a device-specific step in the device driver, and generic preparation
of virtio direct or indirect buffers in virtqueue_add_buf.  Because
virtqueue_add_buf also accepts the outcome of the first step as a single
struct scatterlist, drivers may need to put additional items at the
front or back of the data scatterlists before handing it to virtqueue_add_buf.
Because of this, virtio-scsi has to copy each request into a scatterlist
internal to the driver.  It cannot just use the one that was prepared
by the upper SCSI layers. 

On top of this, virtqueue_add_buf also has the limitation of not
supporting chained scatterlists: the buffers must be provided as an
array of struct scatterlist.  Chained scatterlist, though not supported
on all architectures, would help for virtio-scsi where all additional
items are placed at the front.

This patch adds a different set of APIs for adding a buffer to a virtqueue.
The new API lets you pass the buffers piecewise, wrapping multiple calls
to virtqueue_add_sg between virtqueue_start_buf and virtqueue_end_buf.
virtio-scsi can then call virtqueue_add_sg 3/4 times: for the request
header, for the write buffer (if present), for the response header, and
finally for the read buffer (again if present).  It saves the copying
and the related locking.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 drivers/virtio/virtio_ring.c |  211 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/virtio.h       |   14 +++
 2 files changed, 225 insertions(+), 0 deletions(-)

Comments

Michael S. Tsirkin Feb. 12, 2013, 2:56 p.m. UTC | #1
On Tue, Feb 12, 2013 at 01:23:27PM +0100, Paolo Bonzini wrote:
> virtio device drivers translate requests from higher layer in two steps:
> a device-specific step in the device driver, and generic preparation
> of virtio direct or indirect buffers in virtqueue_add_buf.  Because
> virtqueue_add_buf also accepts the outcome of the first step as a single
> struct scatterlist, drivers may need to put additional items at the
> front or back of the data scatterlists before handing it to virtqueue_add_buf.
> Because of this, virtio-scsi has to copy each request into a scatterlist
> internal to the driver.  It cannot just use the one that was prepared
> by the upper SCSI layers. 
> 
> On top of this, virtqueue_add_buf also has the limitation of not
> supporting chained scatterlists: the buffers must be provided as an
> array of struct scatterlist.  Chained scatterlist, though not supported
> on all architectures, would help for virtio-scsi where all additional
> items are placed at the front.
> 
> This patch adds a different set of APIs for adding a buffer to a virtqueue.
> The new API lets you pass the buffers piecewise, wrapping multiple calls
> to virtqueue_add_sg between virtqueue_start_buf and virtqueue_end_buf.
> virtio-scsi can then call virtqueue_add_sg 3/4 times: for the request
> header, for the write buffer (if present), for the response header, and
> finally for the read buffer (again if present).  It saves the copying
> and the related locking.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  drivers/virtio/virtio_ring.c |  211 ++++++++++++++++++++++++++++++++++++++++++
>  include/linux/virtio.h       |   14 +++
>  2 files changed, 225 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index ffd7e7d..64184e5 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -101,6 +101,10 @@ struct vring_virtqueue
>  	/* Last used index we've seen. */
>  	u16 last_used_idx;
>  
> +	/* State between virtqueue_start_buf and virtqueue_end_buf.  */
> +	int head;
> +	struct vring_desc *indirect_base, *tail;
> +
>  	/* How to notify other side. FIXME: commonalize hcalls! */
>  	void (*notify)(struct virtqueue *vq);
>  
> @@ -394,6 +398,213 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
>  	vq->vq.num_free++;
>  }
>  
> +/**
> + * virtqueue_start_buf - start building buffer for the other end
> + * @vq: the struct virtqueue we're talking about.
> + * @data: the token identifying the buffer.
> + * @nents: the number of buffers that will be added

This function starts building one buffer, number of buffers
is a bit weird here.

> + * @nsg: the number of sg lists that will be added

This means number of calls to add_sg ? Not sure why this matters.
How about we pass in in_num/out_num - that is total # of sg,
same as add_buf?


> + * @gfp: how to do memory allocations (if necessary).
> + *
> + * Caller must ensure we don't call this with other virtqueue operations
> + * at the same time (except where noted), and that a successful call is
> + * followed by one or more calls to virtqueue_add_sg, and finally a call
> + * to virtqueue_end_buf.
> + *
> + * Returns zero or a negative error (ie. ENOSPC).
> + */
> +int virtqueue_start_buf(struct virtqueue *_vq,
> +			void *data,
> +			unsigned int nents,
> +			unsigned int nsg,
> +			gfp_t gfp)
> +{
> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +	struct vring_desc *desc = NULL;
> +	int head;
> +	int ret = -ENOMEM;
> +
> +	START_USE(vq);
> +
> +	BUG_ON(data == NULL);
> +
> +#ifdef DEBUG
> +	{
> +		ktime_t now = ktime_get();
> +
> +		/* No kick or get, with .1 second between?  Warn. */
> +		if (vq->last_add_time_valid)
> +			WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
> +					    > 100);
> +		vq->last_add_time = now;
> +		vq->last_add_time_valid = true;
> +	}
> +#endif
> +
> +	BUG_ON(nents < nsg);
> +	BUG_ON(nsg == 0);
> +
> +	/*
> +	 * If the host supports indirect descriptor tables, and there is
> +	 * no space for direct buffers or there are multi-item scatterlists,
> +	 * go indirect.
> +	 */
> +	head = vq->free_head;
> +	if (vq->indirect && (nents > nsg || vq->vq.num_free < nents)) {
> +		if (vq->vq.num_free == 0)
> +			goto no_space;
> +
> +		desc = kmalloc(nents * sizeof(struct vring_desc), gfp);
> +		if (!desc)
> +			goto error;
> +
> +		/* We're about to use a buffer */
> +		vq->vq.num_free--;
> +
> +		/* Use a single buffer which doesn't continue */
> +		vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
> +		vq->vring.desc[head].addr = virt_to_phys(desc);
> +		vq->vring.desc[head].len = nents * sizeof(struct vring_desc);
> +
> +		/* Update free pointer */
> +		vq->free_head = vq->vring.desc[head].next;
> +	}
> +
> +	/* Set token. */
> +	vq->data[head] = data;
> +
> +	pr_debug("Started buffer head %i for %p\n", head, vq);
> +
> +	vq->indirect_base = desc;
> +	vq->tail = NULL;
> +	vq->head = head;
> +	return 0;
> +
> +no_space:
> +	ret = -ENOSPC;
> +error:
> +	pr_debug("Can't add buf (%d) - nents = %i, avail = %i\n",
> +		 ret, nents, vq->vq.num_free);
> +	END_USE(vq);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_start_buf);
> +
> +/**
> + * virtqueue_add_sg - add sglist to buffer being built
> + * @_vq: the virtqueue for which the buffer is being built
> + * @sgl: the description of the buffer(s).
> + * @nents: the number of items to process in sgl
> + * @dir: whether the sgl is read or written (DMA_TO_DEVICE/DMA_FROM_DEVICE only)
> + *
> + * Note that, unlike virtqueue_add_buf, this function follows chained
> + * scatterlists, and stops before the @nents-th item if a scatterlist item
> + * has a marker.
> + *
> + * Caller must ensure we don't call this with other virtqueue operations
> + * at the same time (except where noted).

Hmm so if you want to add in and out, need separate calls?
in_num/out_num would be nicer?


> + */
> +void virtqueue_add_sg(struct virtqueue *_vq,
> +		      struct scatterlist sgl[],
> +		      unsigned int nents,
> +		      enum dma_data_direction dir)
> +{
> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +	unsigned int i, n;
> +	struct scatterlist *sg;
> +	struct vring_desc *tail;
> +	u32 flags;
> +
> +#ifdef DEBUG
> +	BUG_ON(!vq->in_use);
> +#endif
> +
> +	BUG_ON(dir != DMA_FROM_DEVICE && dir != DMA_TO_DEVICE);
> +	BUG_ON(nents == 0);
> +
> +	flags = dir == DMA_FROM_DEVICE ? VRING_DESC_F_WRITE : 0;
> +	flags |= VRING_DESC_F_NEXT;
> +
> +	/*
> +	 * If using indirect descriptor tables, fill in the buffers
> +	 * at vq->indirect_base.
> +	 */
> +	if (vq->indirect_base) {
> +		i = 0;
> +		if (likely(vq->tail))
> +			i = vq->tail - vq->indirect_base + 1;
> +
> +		for_each_sg(sgl, sg, nents, n) {
> +			tail = &vq->indirect_base[i];
> +			tail->flags = flags;
> +			tail->addr = sg_phys(sg);
> +			tail->len = sg->length;
> +			tail->next = ++i;
> +		}
> +	} else {
> +		BUG_ON(vq->vq.num_free < nents);
> +
> +		i = vq->free_head;
> +		for_each_sg(sgl, sg, nents, n) {
> +			tail = &vq->vring.desc[i];
> +			tail->flags = flags;
> +			tail->addr = sg_phys(sg);
> +			tail->len = sg->length;
> +			i = tail->next;
> +			vq->vq.num_free--;
> +		}
> +
> +		vq->free_head = i;
> +	}
> +	vq->tail = tail;
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_add_sg);
> +
> +/**
> + * virtqueue_end_buf - expose buffer to other end
> + * @_vq: the virtqueue for which the buffer was built
> + *
> + * Caller must ensure we don't call this with other virtqueue operations
> + * at the same time (except where noted).
> + */
> +void virtqueue_end_buf(struct virtqueue *_vq)
> +{
> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +	unsigned int avail;
> +	int head = vq->head;
> +	struct vring_desc *tail = vq->tail;
> +
> +#ifdef DEBUG
> +	BUG_ON(!vq->in_use);
> +#endif
> +	BUG_ON(tail == NULL);
> +
> +	/* The last one does not have the next flag set.  */
> +	tail->flags &= ~VRING_DESC_F_NEXT;
> +
> +	/*
> +	 * Put entry in available array.  Descriptors and available array
> +	 * need to be set before we expose the new available array entries.
> +	 */
> +	avail = vq->vring.avail->idx & (vq->vring.num-1);
> +	vq->vring.avail->ring[avail] = head;
> +	virtio_wmb(vq);
> +
> +	vq->vring.avail->idx++;
> +	vq->num_added++;
> +
> +	/*
> +	 * This is very unlikely, but theoretically possible.  Kick
> +	 * just in case.
> +	 */
> +	if (unlikely(vq->num_added == (1 << 16) - 1))
> +		virtqueue_kick(&vq->vq);
> +
> +	pr_debug("Added buffer head %i to %p\n", head, vq);
> +	END_USE(vq);
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_end_buf);
> +
>  static inline bool more_used(const struct vring_virtqueue *vq)
>  {
>  	return vq->last_used_idx != vq->vring.used->idx;
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index cf8adb1..43d6bc3 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -7,6 +7,7 @@
>  #include <linux/spinlock.h>
>  #include <linux/device.h>
>  #include <linux/mod_devicetable.h>
> +#include <linux/dma-direction.h>
>  #include <linux/gfp.h>
>  
>  /**
> @@ -40,6 +41,19 @@ int virtqueue_add_buf(struct virtqueue *vq,
>  		      void *data,
>  		      gfp_t gfp);
>  
> +int virtqueue_start_buf(struct virtqueue *_vq,
> +			void *data,
> +			unsigned int nents,
> +			unsigned int nsg,
> +			gfp_t gfp);
> +
> +void virtqueue_add_sg(struct virtqueue *_vq,
> +		      struct scatterlist sgl[],
> +		      unsigned int nents,
> +		      enum dma_data_direction dir);
> +
> +void virtqueue_end_buf(struct virtqueue *_vq);
> +
>  void virtqueue_kick(struct virtqueue *vq);
>  
>  bool virtqueue_kick_prepare(struct virtqueue *vq);
> -- 
> 1.7.1
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Feb. 12, 2013, 3:32 p.m. UTC | #2
Il 12/02/2013 15:56, Michael S. Tsirkin ha scritto:
>> > +/**
>> > + * virtqueue_start_buf - start building buffer for the other end
>> > + * @vq: the struct virtqueue we're talking about.
>> > + * @data: the token identifying the buffer.
>> > + * @nents: the number of buffers that will be added
> This function starts building one buffer, number of buffers
> is a bit weird here.

Ok.

>> > + * @nsg: the number of sg lists that will be added
> This means number of calls to add_sg ? Not sure why this matters.
> How about we pass in in_num/out_num - that is total # of sg,
> same as add_buf?

It is used to choose between direct and indirect.

>> > +/**
>> > + * virtqueue_add_sg - add sglist to buffer being built
>> > + * @_vq: the virtqueue for which the buffer is being built
>> > + * @sgl: the description of the buffer(s).
>> > + * @nents: the number of items to process in sgl
>> > + * @dir: whether the sgl is read or written (DMA_TO_DEVICE/DMA_FROM_DEVICE only)
>> > + *
>> > + * Note that, unlike virtqueue_add_buf, this function follows chained
>> > + * scatterlists, and stops before the @nents-th item if a scatterlist item
>> > + * has a marker.
>> > + *
>> > + * Caller must ensure we don't call this with other virtqueue operations
>> > + * at the same time (except where noted).
> Hmm so if you want to add in and out, need separate calls?
> in_num/out_num would be nicer?

If you want to add in and out just use virtqueue_add_buf...

Paolo

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Feb. 12, 2013, 3:43 p.m. UTC | #3
On Tue, Feb 12, 2013 at 04:32:27PM +0100, Paolo Bonzini wrote:
> Il 12/02/2013 15:56, Michael S. Tsirkin ha scritto:
> >> > +/**
> >> > + * virtqueue_start_buf - start building buffer for the other end
> >> > + * @vq: the struct virtqueue we're talking about.
> >> > + * @data: the token identifying the buffer.
> >> > + * @nents: the number of buffers that will be added
> > This function starts building one buffer, number of buffers
> > is a bit weird here.
> 
> Ok.
> 
> >> > + * @nsg: the number of sg lists that will be added
> > This means number of calls to add_sg ? Not sure why this matters.
> > How about we pass in in_num/out_num - that is total # of sg,
> > same as add_buf?
> 
> It is used to choose between direct and indirect.

total number of in and out should be enough for this, no?

> >> > +/**
> >> > + * virtqueue_add_sg - add sglist to buffer being built
> >> > + * @_vq: the virtqueue for which the buffer is being built
> >> > + * @sgl: the description of the buffer(s).
> >> > + * @nents: the number of items to process in sgl
> >> > + * @dir: whether the sgl is read or written (DMA_TO_DEVICE/DMA_FROM_DEVICE only)
> >> > + *
> >> > + * Note that, unlike virtqueue_add_buf, this function follows chained
> >> > + * scatterlists, and stops before the @nents-th item if a scatterlist item
> >> > + * has a marker.
> >> > + *
> >> > + * Caller must ensure we don't call this with other virtqueue operations
> >> > + * at the same time (except where noted).
> > Hmm so if you want to add in and out, need separate calls?
> > in_num/out_num would be nicer?
> 
> If you want to add in and out just use virtqueue_add_buf...
> 
> Paolo

I thought the point of this one is maximum flexibility.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Feb. 12, 2013, 3:48 p.m. UTC | #4
Il 12/02/2013 16:43, Michael S. Tsirkin ha scritto:
> On Tue, Feb 12, 2013 at 04:32:27PM +0100, Paolo Bonzini wrote:
>> Il 12/02/2013 15:56, Michael S. Tsirkin ha scritto:
>>>>> +/**
>>>>> + * virtqueue_start_buf - start building buffer for the other end
>>>>> + * @vq: the struct virtqueue we're talking about.
>>>>> + * @data: the token identifying the buffer.
>>>>> + * @nents: the number of buffers that will be added
>>> This function starts building one buffer, number of buffers
>>> is a bit weird here.
>>
>> Ok.
>>
>>>>> + * @nsg: the number of sg lists that will be added
>>> This means number of calls to add_sg ? Not sure why this matters.
>>> How about we pass in in_num/out_num - that is total # of sg,
>>> same as add_buf?
>>
>> It is used to choose between direct and indirect.
> 
> total number of in and out should be enough for this, no?

Originally, I used nsg/nents because I wanted to use mixed direct and
indirect buffers.  nsg/nents let me choose between full direct (nsg ==
nents), mixed (num_free >= nsg), full indirect (num_free < nsg).  Then I
had to give up because QEMU does not support it, but I still would like
to keep that open in the API.

In this series, however, I am still using nsg to choose between direct
and indirect.  I would like to use dirtect for small scatterlists, even
if they are surrounded by a request/response headers/footers.

>>>>> +/**
>>>>> + * virtqueue_add_sg - add sglist to buffer being built
>>>>> + * @_vq: the virtqueue for which the buffer is being built
>>>>> + * @sgl: the description of the buffer(s).
>>>>> + * @nents: the number of items to process in sgl
>>>>> + * @dir: whether the sgl is read or written (DMA_TO_DEVICE/DMA_FROM_DEVICE only)
>>>>> + *
>>>>> + * Note that, unlike virtqueue_add_buf, this function follows chained
>>>>> + * scatterlists, and stops before the @nents-th item if a scatterlist item
>>>>> + * has a marker.
>>>>> + *
>>>>> + * Caller must ensure we don't call this with other virtqueue operations
>>>>> + * at the same time (except where noted).
>>> Hmm so if you want to add in and out, need separate calls?
>>> in_num/out_num would be nicer?
>>
>> If you want to add in and out just use virtqueue_add_buf...
> 
> I thought the point of this one is maximum flexibility.

Maximum flexibility does not include doing everything in one call (the
other way round in fact: you already need to wrap with start/end, hence
doing one or two extra add_sg calls is not important).

Paolo

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Feb. 12, 2013, 4:13 p.m. UTC | #5
On Tue, Feb 12, 2013 at 04:48:39PM +0100, Paolo Bonzini wrote:
> Il 12/02/2013 16:43, Michael S. Tsirkin ha scritto:
> > On Tue, Feb 12, 2013 at 04:32:27PM +0100, Paolo Bonzini wrote:
> >> Il 12/02/2013 15:56, Michael S. Tsirkin ha scritto:
> >>>>> +/**
> >>>>> + * virtqueue_start_buf - start building buffer for the other end
> >>>>> + * @vq: the struct virtqueue we're talking about.
> >>>>> + * @data: the token identifying the buffer.
> >>>>> + * @nents: the number of buffers that will be added
> >>> This function starts building one buffer, number of buffers
> >>> is a bit weird here.
> >>
> >> Ok.
> >>
> >>>>> + * @nsg: the number of sg lists that will be added
> >>> This means number of calls to add_sg ? Not sure why this matters.
> >>> How about we pass in in_num/out_num - that is total # of sg,
> >>> same as add_buf?
> >>
> >> It is used to choose between direct and indirect.
> > 
> > total number of in and out should be enough for this, no?
> 
> Originally, I used nsg/nents because I wanted to use mixed direct and
> indirect buffers.  nsg/nents let me choose between full direct (nsg ==
> nents), mixed (num_free >= nsg), full indirect (num_free < nsg).  Then I
> had to give up because QEMU does not support it, but I still would like
> to keep that open in the API.

Problem is it does not seem to make sense in the API.

> In this series, however, I am still using nsg to choose between direct
> and indirect.  I would like to use dirtect for small scatterlists, even
> if they are surrounded by a request/response headers/footers.

Shouldn't we base this on total number of s/g entries?
I don't see why does it matter how many calls you use
to build up the list.

> >>>>> +/**
> >>>>> + * virtqueue_add_sg - add sglist to buffer being built
> >>>>> + * @_vq: the virtqueue for which the buffer is being built
> >>>>> + * @sgl: the description of the buffer(s).
> >>>>> + * @nents: the number of items to process in sgl
> >>>>> + * @dir: whether the sgl is read or written (DMA_TO_DEVICE/DMA_FROM_DEVICE only)
> >>>>> + *
> >>>>> + * Note that, unlike virtqueue_add_buf, this function follows chained
> >>>>> + * scatterlists, and stops before the @nents-th item if a scatterlist item
> >>>>> + * has a marker.
> >>>>> + *
> >>>>> + * Caller must ensure we don't call this with other virtqueue operations
> >>>>> + * at the same time (except where noted).
> >>> Hmm so if you want to add in and out, need separate calls?
> >>> in_num/out_num would be nicer?
> >>
> >> If you want to add in and out just use virtqueue_add_buf...
> > 
> > I thought the point of this one is maximum flexibility.
> 
> Maximum flexibility does not include doing everything in one call (the
> other way round in fact: you already need to wrap with start/end, hence
> doing one or two extra add_sg calls is not important).
> 
> Paolo

My point is, we have exactly same number of parameters:
in + out instead of nsg + direction, and we get more
functionality.
Paolo Bonzini Feb. 12, 2013, 4:17 p.m. UTC | #6
Il 12/02/2013 17:13, Michael S. Tsirkin ha scritto:
>>>>>>> + * @nsg: the number of sg lists that will be added
>>>>> This means number of calls to add_sg ? Not sure why this matters.
>>>>> How about we pass in in_num/out_num - that is total # of sg,
>>>>> same as add_buf?
>>>>
>>>> It is used to choose between direct and indirect.
>>>
>>> total number of in and out should be enough for this, no?
>>
>> Originally, I used nsg/nents because I wanted to use mixed direct and
>> indirect buffers.  nsg/nents let me choose between full direct (nsg ==
>> nents), mixed (num_free >= nsg), full indirect (num_free < nsg).  Then I
>> had to give up because QEMU does not support it, but I still would like
>> to keep that open in the API.
> 
> Problem is it does not seem to make sense in the API.

Why not?  Perhaps in the idea you have of the implementation, but in the
API it definitely makes sense.  It's a fast-path API, it makes sense to
provide as much information as possible upfront.

>> In this series, however, I am still using nsg to choose between direct
>> and indirect.  I would like to use dirtect for small scatterlists, even
>> if they are surrounded by a request/response headers/footers.
> 
> Shouldn't we base this on total number of s/g entries?
> I don't see why does it matter how many calls you use
> to build up the list.

The idea is that in general the headers/footers are few (so their number
doesn't really matter) and are in singleton scatterlists.  Hence, the
heuristic checks at the data part of the request, and chooses
direct/indirect depending on the size of that part.

>>>>>>> +/**
>>>>>>> + * virtqueue_add_sg - add sglist to buffer being built
>>>>>>> + * @_vq: the virtqueue for which the buffer is being built
>>>>>>> + * @sgl: the description of the buffer(s).
>>>>>>> + * @nents: the number of items to process in sgl
>>>>>>> + * @dir: whether the sgl is read or written (DMA_TO_DEVICE/DMA_FROM_DEVICE only)
>>>>>>> + *
>>>>>>> + * Note that, unlike virtqueue_add_buf, this function follows chained
>>>>>>> + * scatterlists, and stops before the @nents-th item if a scatterlist item
>>>>>>> + * has a marker.
>>>>>>> + *
>>>>>>> + * Caller must ensure we don't call this with other virtqueue operations
>>>>>>> + * at the same time (except where noted).
>>>>> Hmm so if you want to add in and out, need separate calls?
>>>>> in_num/out_num would be nicer?
>>>>
>>>> If you want to add in and out just use virtqueue_add_buf...
>>>
>>> I thought the point of this one is maximum flexibility.
>>
>> Maximum flexibility does not include doing everything in one call (the
>> other way round in fact: you already need to wrap with start/end, hence
>> doing one or two extra add_sg calls is not important).
> 
> My point is, we have exactly same number of parameters:
> in + out instead of nsg + direction, and we get more
> functionality.

And we also have more complex (and slower) code, that would never be
used.  You would never save more than one call, because you cannot
alternate out and in buffers arbitrarily.  And if you look at how the
API is actually used in virtio-blk and virtio-scsi, building the buffers
"two-piecewise" doesn't fit the flow of the code.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Feb. 12, 2013, 4:35 p.m. UTC | #7
On Tue, Feb 12, 2013 at 05:17:47PM +0100, Paolo Bonzini wrote:
> Il 12/02/2013 17:13, Michael S. Tsirkin ha scritto:
> >>>>>>> + * @nsg: the number of sg lists that will be added
> >>>>> This means number of calls to add_sg ? Not sure why this matters.
> >>>>> How about we pass in in_num/out_num - that is total # of sg,
> >>>>> same as add_buf?
> >>>>
> >>>> It is used to choose between direct and indirect.
> >>>
> >>> total number of in and out should be enough for this, no?
> >>
> >> Originally, I used nsg/nents because I wanted to use mixed direct and
> >> indirect buffers.  nsg/nents let me choose between full direct (nsg ==
> >> nents), mixed (num_free >= nsg), full indirect (num_free < nsg).  Then I
> >> had to give up because QEMU does not support it, but I still would like
> >> to keep that open in the API.
> > 
> > Problem is it does not seem to make sense in the API.
> 
> Why not?  Perhaps in the idea you have of the implementation, but in the
> API it definitely makes sense.  It's a fast-path API, it makes sense to
> provide as much information as possible upfront.

If we are ignoring some information, I think we are better off
without asking for it.

> >> In this series, however, I am still using nsg to choose between direct
> >> and indirect.  I would like to use dirtect for small scatterlists, even
> >> if they are surrounded by a request/response headers/footers.
> > 
> > Shouldn't we base this on total number of s/g entries?
> > I don't see why does it matter how many calls you use
> > to build up the list.
> 
> The idea is that in general the headers/footers are few (so their number
> doesn't really matter) and are in singleton scatterlists.  Hence, the
> heuristic checks at the data part of the request, and chooses
> direct/indirect depending on the size of that part.

Why? Why not the total size as we did before?

> >>>>>>> +/**
> >>>>>>> + * virtqueue_add_sg - add sglist to buffer being built
> >>>>>>> + * @_vq: the virtqueue for which the buffer is being built
> >>>>>>> + * @sgl: the description of the buffer(s).
> >>>>>>> + * @nents: the number of items to process in sgl
> >>>>>>> + * @dir: whether the sgl is read or written (DMA_TO_DEVICE/DMA_FROM_DEVICE only)
> >>>>>>> + *
> >>>>>>> + * Note that, unlike virtqueue_add_buf, this function follows chained
> >>>>>>> + * scatterlists, and stops before the @nents-th item if a scatterlist item
> >>>>>>> + * has a marker.
> >>>>>>> + *
> >>>>>>> + * Caller must ensure we don't call this with other virtqueue operations
> >>>>>>> + * at the same time (except where noted).
> >>>>> Hmm so if you want to add in and out, need separate calls?
> >>>>> in_num/out_num would be nicer?
> >>>>
> >>>> If you want to add in and out just use virtqueue_add_buf...
> >>>
> >>> I thought the point of this one is maximum flexibility.
> >>
> >> Maximum flexibility does not include doing everything in one call (the
> >> other way round in fact: you already need to wrap with start/end, hence
> >> doing one or two extra add_sg calls is not important).
> > 
> > My point is, we have exactly same number of parameters:
> > in + out instead of nsg + direction, and we get more
> > functionality.
> 
> And we also have more complex (and slower) code, that would never be
> used.

Instead of 
	flags = (directon == from_device) ? out : in;

you would do

	flags = idx > in ? out : in;

why is this slower?


> You would never save more than one call, because you cannot
> alternate out and in buffers arbitrarily.

That's the problem with the API, it apparently let you do this, and
if you do it will fail at run time.  If we specify in/out upfront in
start, there's no way to misuse the API.

>  And if you look at how the
> API is actually used in virtio-blk and virtio-scsi, building the buffers
> "two-piecewise" doesn't fit the flow of the code.
> 
> Paolo
Paolo Bonzini Feb. 12, 2013, 4:57 p.m. UTC | #8
Il 12/02/2013 17:35, Michael S. Tsirkin ha scritto:
> On Tue, Feb 12, 2013 at 05:17:47PM +0100, Paolo Bonzini wrote:
>> Il 12/02/2013 17:13, Michael S. Tsirkin ha scritto:
>>>>>>>>> + * @nsg: the number of sg lists that will be added
>>>>>>> This means number of calls to add_sg ? Not sure why this matters.
>>>>>>> How about we pass in in_num/out_num - that is total # of sg,
>>>>>>> same as add_buf?
>>>>>>
>>>>>> It is used to choose between direct and indirect.
>>>>>
>>>>> total number of in and out should be enough for this, no?
>>>>
>>>> Originally, I used nsg/nents because I wanted to use mixed direct and
>>>> indirect buffers.  nsg/nents let me choose between full direct (nsg ==
>>>> nents), mixed (num_free >= nsg), full indirect (num_free < nsg).  Then I
>>>> had to give up because QEMU does not support it, but I still would like
>>>> to keep that open in the API.
>>>
>>> Problem is it does not seem to make sense in the API.
>>
>> Why not?  Perhaps in the idea you have of the implementation, but in the
>> API it definitely makes sense.  It's a fast-path API, it makes sense to
>> provide as much information as possible upfront.
> 
> If we are ignoring some information, I think we are better off
> without asking for it.

We're not ignoring it.  virtqueue_start_buf uses both nents and nsg:

        if (vq->indirect && (nents > nsg || vq->vq.num_free < nents)) {
		/* indirect */
	}

>>>> In this series, however, I am still using nsg to choose between direct
>>>> and indirect.  I would like to use dirtect for small scatterlists, even
>>>> if they are surrounded by a request/response headers/footers.
>>>
>>> Shouldn't we base this on total number of s/g entries?
>>> I don't see why does it matter how many calls you use
>>> to build up the list.
>>
>> The idea is that in general the headers/footers are few (so their number
>> doesn't really matter) and are in singleton scatterlists.  Hence, the
>> heuristic checks at the data part of the request, and chooses
>> direct/indirect depending on the size of that part.
> 
> Why? Why not the total size as we did before?

"More than one buffer" is not a great heuristic.  In particular, it
causes all virtio-blk and virtio-scsi requests to go indirect.

More than three buffers, or more than five buffers, is just an ad-hoc
hack, and similarly not great.

>>>>>>>>> +/**
>>>>>>>>> + * virtqueue_add_sg - add sglist to buffer being built
>>>>>>>>> + * @_vq: the virtqueue for which the buffer is being built
>>>>>>>>> + * @sgl: the description of the buffer(s).
>>>>>>>>> + * @nents: the number of items to process in sgl
>>>>>>>>> + * @dir: whether the sgl is read or written (DMA_TO_DEVICE/DMA_FROM_DEVICE only)
>>>>>>>>> + *
>>>>>>>>> + * Note that, unlike virtqueue_add_buf, this function follows chained
>>>>>>>>> + * scatterlists, and stops before the @nents-th item if a scatterlist item
>>>>>>>>> + * has a marker.
>>>>>>>>> + *
>>>>>>>>> + * Caller must ensure we don't call this with other virtqueue operations
>>>>>>>>> + * at the same time (except where noted).
>>>>>>> Hmm so if you want to add in and out, need separate calls?
>>>>>>> in_num/out_num would be nicer?
>>>>>>
>>>>>> If you want to add in and out just use virtqueue_add_buf...
>>>>>
>>>>> I thought the point of this one is maximum flexibility.
>>>>
>>>> Maximum flexibility does not include doing everything in one call (the
>>>> other way round in fact: you already need to wrap with start/end, hence
>>>> doing one or two extra add_sg calls is not important).
>>>
>>> My point is, we have exactly same number of parameters:
>>> in + out instead of nsg + direction, and we get more
>>> functionality.
>>
>> And we also have more complex (and slower) code, that would never be
>> used.
> 
> Instead of 
> 	flags = (directon == from_device) ? out : in;
> 
> you would do
> 
> 	flags = idx > in ? out : in;
> 
> why is this slower?

You said "in + out instead of nsg + direction", but now instead you're
talking about specifying in/out upfront in virtqueue_start_buf.

Specifying in/out in virtqueue_add_sg would have two loops instead of
one, one of them (you don't know which) unused on every call, and
wouldn't fix the problem of possibly misusing the API.

Specifying in/out upfront would look something like

	flags = vq->idx > vq->in ? VRING_DESC_F_WRITE : 0;

or with some optimization

	flags = vq->something > 0 ? VRING_DESC_F_WRITE : 0;

It is not clear for me whether you'd allow a single virtqueue_add_sg to
cover both out and in elements.  If so, the function would become much
more complex because the flags could change in the middle, and that's
what I was referring to.  If not, you traded one possible misuse with
another.

>> You would never save more than one call, because you cannot
>> alternate out and in buffers arbitrarily.
> 
> That's the problem with the API, it apparently let you do this, and
> if you do it will fail at run time.  If we specify in/out upfront in
> start, there's no way to misuse the API.

Perhaps, but 3 or 4 arguments (in/out/nsg or in/out/nsg_in/nsg_out) just
for this are definitely too many and make the API harder to use.

You have to find a balance.  Having actually used the API, the
possibility of mixing in/out buffers by mistake never even occurred to
me, much less happened in practice, so I didn't consider it a problem.
Mixing in/out buffers in a single call wasn't a necessity, either.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Feb. 12, 2013, 5:34 p.m. UTC | #9
On Tue, Feb 12, 2013 at 05:57:55PM +0100, Paolo Bonzini wrote:
> Il 12/02/2013 17:35, Michael S. Tsirkin ha scritto:
> > On Tue, Feb 12, 2013 at 05:17:47PM +0100, Paolo Bonzini wrote:
> >> Il 12/02/2013 17:13, Michael S. Tsirkin ha scritto:
> >>>>>>>>> + * @nsg: the number of sg lists that will be added
> >>>>>>> This means number of calls to add_sg ? Not sure why this matters.
> >>>>>>> How about we pass in in_num/out_num - that is total # of sg,
> >>>>>>> same as add_buf?
> >>>>>>
> >>>>>> It is used to choose between direct and indirect.
> >>>>>
> >>>>> total number of in and out should be enough for this, no?
> >>>>
> >>>> Originally, I used nsg/nents because I wanted to use mixed direct and
> >>>> indirect buffers.  nsg/nents let me choose between full direct (nsg ==
> >>>> nents), mixed (num_free >= nsg), full indirect (num_free < nsg).  Then I
> >>>> had to give up because QEMU does not support it, but I still would like
> >>>> to keep that open in the API.
> >>>
> >>> Problem is it does not seem to make sense in the API.
> >>
> >> Why not?  Perhaps in the idea you have of the implementation, but in the
> >> API it definitely makes sense.  It's a fast-path API, it makes sense to
> >> provide as much information as possible upfront.
> > 
> > If we are ignoring some information, I think we are better off
> > without asking for it.
> 
> We're not ignoring it.  virtqueue_start_buf uses both nents and nsg:
> 
>         if (vq->indirect && (nents > nsg || vq->vq.num_free < nents)) {
> 		/* indirect */
> 	}
> >>>> In this series, however, I am still using nsg to choose between direct
> >>>> and indirect.  I would like to use dirtect for small scatterlists, even
> >>>> if they are surrounded by a request/response headers/footers.
> >>>
> >>> Shouldn't we base this on total number of s/g entries?
> >>> I don't see why does it matter how many calls you use
> >>> to build up the list.
> >>
> >> The idea is that in general the headers/footers are few (so their number
> >> doesn't really matter) and are in singleton scatterlists.  Hence, the
> >> heuristic checks at the data part of the request, and chooses
> >> direct/indirect depending on the size of that part.
> > 
> > Why? Why not the total size as we did before?
> 
> "More than one buffer" is not a great heuristic.  In particular, it
> causes all virtio-blk and virtio-scsi requests to go indirect.

If you don't do indirect you get at least 2x less space in the ring.
For blk there were workloads where we always were out of buffers.
Similarly for net, switching heuristics degrades some workloads.
Let's not change these things as part of unrelated API work,
it should be a separate patch with benchmarking showing this
is not a problem.

> More than three buffers, or more than five buffers, is just an ad-hoc
> hack, and similarly not great.

If you want to expose control over indirect buffer to drivers,
we can do this. There were patches on list. How about doing that
and posting actual performance results?  In particular maybe this is
where all the performance wins come from?  This nsgs/nents hack just
seems to rely on how one specific driver uses the API.

> >>>>>>>>> +/**
> >>>>>>>>> + * virtqueue_add_sg - add sglist to buffer being built
> >>>>>>>>> + * @_vq: the virtqueue for which the buffer is being built
> >>>>>>>>> + * @sgl: the description of the buffer(s).
> >>>>>>>>> + * @nents: the number of items to process in sgl
> >>>>>>>>> + * @dir: whether the sgl is read or written (DMA_TO_DEVICE/DMA_FROM_DEVICE only)
> >>>>>>>>> + *
> >>>>>>>>> + * Note that, unlike virtqueue_add_buf, this function follows chained
> >>>>>>>>> + * scatterlists, and stops before the @nents-th item if a scatterlist item
> >>>>>>>>> + * has a marker.
> >>>>>>>>> + *
> >>>>>>>>> + * Caller must ensure we don't call this with other virtqueue operations
> >>>>>>>>> + * at the same time (except where noted).
> >>>>>>> Hmm so if you want to add in and out, need separate calls?
> >>>>>>> in_num/out_num would be nicer?
> >>>>>>
> >>>>>> If you want to add in and out just use virtqueue_add_buf...
> >>>>>
> >>>>> I thought the point of this one is maximum flexibility.
> >>>>
> >>>> Maximum flexibility does not include doing everything in one call (the
> >>>> other way round in fact: you already need to wrap with start/end, hence
> >>>> doing one or two extra add_sg calls is not important).
> >>>
> >>> My point is, we have exactly same number of parameters:
> >>> in + out instead of nsg + direction, and we get more
> >>> functionality.
> >>
> >> And we also have more complex (and slower) code, that would never be
> >> used.
> > 
> > Instead of 
> > 	flags = (directon == from_device) ? out : in;
> > 
> > you would do
> > 
> > 	flags = idx > in ? out : in;
> > 
> > why is this slower?
> 
> You said "in + out instead of nsg + direction", but now instead you're
> talking about specifying in/out upfront in virtqueue_start_buf.
> 
> Specifying in/out in virtqueue_add_sg would have two loops instead of
> one, one of them (you don't know which) unused on every call, and
> wouldn't fix the problem of possibly misusing the API.

One loop, and it also let us avoid setting VRING_DESC_F_NEXT
instead of set then later clear:

+               for_each_sg(sgl, sg, nents, n) {

+       		flags = idx > in_sg ? VRING_DESC_F_WRITE : 0;
+       		flags |= idx < (in_sg + out_sg - 1) ? VRING_DESC_F_NEXT : 0;
+                       tail = &vq->indirect_base[i];
+                       tail->flags = flags;
+                       tail->addr = sg_phys(sg);
+                       tail->len = sg->length;
+                       tail->next = ++i;
+               }                           



> Specifying in/_ut upfront would look something like
> 
> 	flags = vq->idx > vq->in ? VRING_DESC_F_WRITE : 0;
> 
> or with some optimization
> 
> 	flags = vq->something > 0 ? VRING_DESC_F_WRITE : 0;
> 
> It is not clear for me whether you'd allow a single virtqueue_add_sg to
> cover both out and in elements.  If so, the function would become much
> more complex because the flags could change in the middle, and that's
> what I was referring to.

You just move the flag assignment within the loop.
Does not seem more complex at all.

>  If not, you traded one possible misuse with another.
> 
> >> You would never save more than one call, because you cannot
> >> alternate out and in buffers arbitrarily.
> > 
> > That's the problem with the API, it apparently let you do this, and
> > if you do it will fail at run time.  If we specify in/out upfront in
> > start, there's no way to misuse the API.
> 
> Perhaps, but 3 or 4 arguments (in/out/nsg or in/out/nsg_in/nsg_out) just
> for this are definitely too many and make the API harder to use.
> 
> You have to find a balance.  Having actually used the API, the
> possibility of mixing in/out buffers by mistake never even occurred to
> me, much less happened in practice, so I didn't consider it a problem.
> Mixing in/out buffers in a single call wasn't a necessity, either.
> 
> Paolo

It is useful for virtqueue_add_buf implementation.
Basically the more consistent the interface is with virtqueue_add_buf,
the better.

I'm not against changing virtqueue_add_buf if you like but let's keep
it all consistent.
Paolo Bonzini Feb. 12, 2013, 6:04 p.m. UTC | #10
Il 12/02/2013 18:34, Michael S. Tsirkin ha scritto:
> On Tue, Feb 12, 2013 at 05:57:55PM +0100, Paolo Bonzini wrote:
>> Il 12/02/2013 17:35, Michael S. Tsirkin ha scritto:
>>> On Tue, Feb 12, 2013 at 05:17:47PM +0100, Paolo Bonzini wrote:
>>>> Il 12/02/2013 17:13, Michael S. Tsirkin ha scritto:
>>>>>> In this series, however, I am still using nsg to choose between direct
>>>>>> and indirect.  I would like to use dirtect for small scatterlists, even
>>>>>> if they are surrounded by a request/response headers/footers.
>>>>>
>>>>> Shouldn't we base this on total number of s/g entries?
>>>>> I don't see why does it matter how many calls you use
>>>>> to build up the list.
>>>>
>>>> The idea is that in general the headers/footers are few (so their number
>>>> doesn't really matter) and are in singleton scatterlists.  Hence, the
>>>> heuristic checks at the data part of the request, and chooses
>>>> direct/indirect depending on the size of that part.
>>>
>>> Why? Why not the total size as we did before?
>>
>> "More than one buffer" is not a great heuristic.  In particular, it
>> causes all virtio-blk and virtio-scsi requests to go indirect.
> 
> If you don't do indirect you get at least 2x less space in the ring.
> For blk there were workloads where we always were out of buffers.

The heuristic is very conservative actually, and doesn't really get even
close to out-of-buffers.  You can see that in the single-queue results:

# of targets    single-queue
1                  540
2                  795
4                  997
8                 1136
16                1440
24                1408
32                1515

These are with the patched code; if queue space was a problem, you would
see much worse performance as you increase the number of targets (and
benchmark threads).  These are for virtio-scsi, that puts all disks on a
single request queue.

> Similarly for net, switching heuristics degrades some workloads.

Net is not affected.  Obviously for the mergeable buffers case that
always uses direct, but also for the others you can check that it will
always use direct/indirect as in the old scheme.  (Which is why I liked
this heuristic, too).

> Let's not change these things as part of unrelated API work,
> it should be a separate patch with benchmarking showing this
> is not a problem.

The change only happens for virtio-blk and virtio-scsi.  I benchmarked
it, if somebody found different results it would be easily bisectable.

>> More than three buffers, or more than five buffers, is just an ad-hoc
>> hack, and similarly not great.
> 
> If you want to expose control over indirect buffer to drivers,
> we can do this. There were patches on list. How about doing that
> and posting actual performance results?  In particular maybe this is
> where all the performance wins come from?

No, it's not.  Code that was high-ish in the profile disappears
(completely, not just from the profile :)).  But it's not just
performance wins, it's also simplified code and locking, so it would be
worthwhile even with no performance win.

Anyhow, I've sent v2 of this patch with the old heuristic.  It's a
one-line change, it's fine.

>>>> And we also have more complex (and slower) code, that would never be
>>>> used.
>>>
>>> Instead of 
>>> 	flags = (directon == from_device) ? out : in;
>>>
>>> you would do
>>>
>>> 	flags = idx > in ? out : in;
>>>
>>> why is this slower?
>>
>> You said "in + out instead of nsg + direction", but now instead you're
>> talking about specifying in/out upfront in virtqueue_start_buf.
>>
>> Specifying in/out in virtqueue_add_sg would have two loops instead of
>> one, one of them (you don't know which) unused on every call, and
>> wouldn't fix the problem of possibly misusing the API.
> 
> One loop, and it also let us avoid setting VRING_DESC_F_NEXT
> instead of set then later clear:
> 
> +               for_each_sg(sgl, sg, nents, n) {
> 
> +       		flags = idx > in_sg ? VRING_DESC_F_WRITE : 0;
> +       		flags |= idx < (in_sg + out_sg - 1) ? VRING_DESC_F_NEXT : 0;
> +                       tail = &vq->indirect_base[i];
> +                       tail->flags = flags;
> +                       tail->addr = sg_phys(sg);
> +                       tail->len = sg->length;
> +                       tail->next = ++i;
> +               }                           
> 

And slower it becomes.

>>  If not, you traded one possible misuse with another.
>>
>>>> You would never save more than one call, because you cannot
>>>> alternate out and in buffers arbitrarily.
>>>
>>> That's the problem with the API, it apparently let you do this, and
>>> if you do it will fail at run time.  If we specify in/out upfront in
>>> start, there's no way to misuse the API.
>>
>> Perhaps, but 3 or 4 arguments (in/out/nsg or in/out/nsg_in/nsg_out) just
>> for this are definitely too many and make the API harder to use.
>>
>> You have to find a balance.  Having actually used the API, the
>> possibility of mixing in/out buffers by mistake never even occurred to
>> me, much less happened in practice, so I didn't consider it a problem.
>> Mixing in/out buffers in a single call wasn't a necessity, either.
> 
> It is useful for virtqueue_add_buf implementation.

        ret = virtqueue_start_buf(vq, data, out + in, !!out + !!in,
				  gfp);
        if (ret < 0)
                return ret;

        if (out)
                virtqueue_add_sg(vq, sg, out, DMA_TO_DEVICE);
        if (in)
                virtqueue_add_sg(vq, sg + out, in, DMA_FROM_DEVICE);

        virtqueue_end_buf(vq);
	return 0;

How can it be simpler and easier to understand than that?

> Basically the more consistent the interface is with virtqueue_add_buf,
> the better.

The interface is consistent with virtqueue_add_buf_single, where out/in
clearly doesn't make sense.

virtqueue_add_buf and virtqueue_add_sg are very different, despite the
similar name.

> I'm not against changing virtqueue_add_buf if you like but let's keep
> it all consistent.

How can you change virtqueue_add_buf?

Paolo

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Feb. 12, 2013, 6:23 p.m. UTC | #11
On Tue, Feb 12, 2013 at 07:04:27PM +0100, Paolo Bonzini wrote:
> >> Perhaps, but 3 or 4 arguments (in/out/nsg or in/out/nsg_in/nsg_out) just
> >> for this are definitely too many and make the API harder to use.
> >>
> >> You have to find a balance.  Having actually used the API, the
> >> possibility of mixing in/out buffers by mistake never even occurred to
> >> me, much less happened in practice, so I didn't consider it a problem.
> >> Mixing in/out buffers in a single call wasn't a necessity, either.
> > 
> > It is useful for virtqueue_add_buf implementation.
> 
>         ret = virtqueue_start_buf(vq, data, out + in, !!out + !!in,
> 				  gfp);
>         if (ret < 0)
>                 return ret;
> 
>         if (out)
>                 virtqueue_add_sg(vq, sg, out, DMA_TO_DEVICE);
>         if (in)
>                 virtqueue_add_sg(vq, sg + out, in, DMA_FROM_DEVICE);
> 
>         virtqueue_end_buf(vq);
> 	return 0;
> 
> How can it be simpler and easier to understand than that?

Like this:

         ret = virtqueue_start_buf(vq, data, in, out, gfp);
         if (ret < 0)
                 return ret;
 
         virtqueue_add_sg(vq, sg, in, out);
 
         virtqueue_end_buf(vq);


> > Basically the more consistent the interface is with virtqueue_add_buf,
> > the better.
> 
> The interface is consistent with virtqueue_add_buf_single, where out/in
> clearly doesn't make sense.

Hmm, we could make virtqueue_add_buf_single consistent by giving it 'bool in'.

> virtqueue_add_buf and virtqueue_add_sg are very different, despite the
> similar name.

True. The similarity is between _start and _add_buf.
And this is confusing too. Maybe this means
_start and _add_sg should be renamed.

> > I'm not against changing virtqueue_add_buf if you like but let's keep
> > it all consistent.
> 
> How can you change virtqueue_add_buf?

Donnu.
Paolo Bonzini Feb. 12, 2013, 8:08 p.m. UTC | #12
Il 12/02/2013 19:23, Michael S. Tsirkin ha scritto:
> On Tue, Feb 12, 2013 at 07:04:27PM +0100, Paolo Bonzini wrote:
>>>> Perhaps, but 3 or 4 arguments (in/out/nsg or in/out/nsg_in/nsg_out) just
>>>> for this are definitely too many and make the API harder to use.
>>>>
>>>> You have to find a balance.  Having actually used the API, the
>>>> possibility of mixing in/out buffers by mistake never even occurred to
>>>> me, much less happened in practice, so I didn't consider it a problem.
>>>> Mixing in/out buffers in a single call wasn't a necessity, either.
>>>
>>> It is useful for virtqueue_add_buf implementation.
>>
>>         ret = virtqueue_start_buf(vq, data, out + in, !!out + !!in,
>> 				  gfp);
>>         if (ret < 0)
>>                 return ret;
>>
>>         if (out)
>>                 virtqueue_add_sg(vq, sg, out, DMA_TO_DEVICE);
>>         if (in)
>>                 virtqueue_add_sg(vq, sg + out, in, DMA_FROM_DEVICE);
>>
>>         virtqueue_end_buf(vq);
>> 	return 0;
>>
>> How can it be simpler and easier to understand than that?
> 
> Like this:
> 
>          ret = virtqueue_start_buf(vq, data, in, out, gfp);
>          if (ret < 0)
>                  return ret;
>  
>          virtqueue_add_sg(vq, sg, in, out);
>  
>          virtqueue_end_buf(vq);

It's out/in, not in/out... I know you wrote it in a hurry, but it kind
of shows that the new API is easier to use.  Check out patch 8, it's  a
real improvement in readability.

Plus you haven't solved the problem of alternating to/from-device
elements (which is also harder to spot with in/out than with the enum).

And no one else would use add_sg with in != 0 && out != 0, so you'd be
favoring one caller over all the others.  If you did this instead:

         virtqueue_add_sg(vq, sg, in + out);

it would really look like a hack IMHO.

>>> Basically the more consistent the interface is with virtqueue_add_buf,
>>> the better.
>>
>> The interface is consistent with virtqueue_add_buf_single, where out/in
>> clearly doesn't make sense.
> 
> Hmm, we could make virtqueue_add_buf_single consistent by giving it 'bool in'.

But is it "bool in" or "bool out"?

>> virtqueue_add_buf and virtqueue_add_sg are very different, despite the
>> similar name.
> 
> True. The similarity is between _start and _add_buf.
> And this is confusing too. Maybe this means
> _start and _add_sg should be renamed.

Maybe.  If you have any suggestions it's fine.

BTW I tried using out/in for start_buf, and the code in virtio-blk gets
messier, it has to do all the math twice.  Perhaps we just need to
acknowledge that the API is different and thus the optimal choice of
arguments is different.  C doesn't have keyword arguments, there not
much that we can do.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Feb. 12, 2013, 8:49 p.m. UTC | #13
On Tue, Feb 12, 2013 at 09:08:27PM +0100, Paolo Bonzini wrote:
> Il 12/02/2013 19:23, Michael S. Tsirkin ha scritto:
> > On Tue, Feb 12, 2013 at 07:04:27PM +0100, Paolo Bonzini wrote:
> >>>> Perhaps, but 3 or 4 arguments (in/out/nsg or in/out/nsg_in/nsg_out) just
> >>>> for this are definitely too many and make the API harder to use.
> >>>>
> >>>> You have to find a balance.  Having actually used the API, the
> >>>> possibility of mixing in/out buffers by mistake never even occurred to
> >>>> me, much less happened in practice, so I didn't consider it a problem.
> >>>> Mixing in/out buffers in a single call wasn't a necessity, either.
> >>>
> >>> It is useful for virtqueue_add_buf implementation.
> >>
> >>         ret = virtqueue_start_buf(vq, data, out + in, !!out + !!in,
> >> 				  gfp);
> >>         if (ret < 0)
> >>                 return ret;
> >>
> >>         if (out)
> >>                 virtqueue_add_sg(vq, sg, out, DMA_TO_DEVICE);
> >>         if (in)
> >>                 virtqueue_add_sg(vq, sg + out, in, DMA_FROM_DEVICE);
> >>
> >>         virtqueue_end_buf(vq);
> >> 	return 0;
> >>
> >> How can it be simpler and easier to understand than that?
> > 
> > Like this:
> > 
> >          ret = virtqueue_start_buf(vq, data, in, out, gfp);
> >          if (ret < 0)
> >                  return ret;
> >  
> >          virtqueue_add_sg(vq, sg, in, out);
> >  
> >          virtqueue_end_buf(vq);
> 
> It's out/in, not in/out... I know you wrote it in a hurry, but it kind
> of shows that the new API is easier to use.  Check out patch 8, it's  a
> real improvement in readability.

That's virtqueue_add_buf_single, that's a separate matter.
Another option for _single is just two wrappers:
virtqueue_add_buf_in
virtqueue_add_buf_out

> Plus you haven't solved the problem of alternating to/from-device
> elements (which is also harder to spot with in/out than with the enum).

Yes it does, if add_sg does not have in/out at all there's no way to
request the impossible to/from mix.

> And no one else would use add_sg with in != 0 && out != 0, so you'd be
> favoring one caller over all the others.

Yes but it's an important caller as all drivers besides storage use this
path.

>  If you did this instead:
> 
>          virtqueue_add_sg(vq, sg, in + out);
> 
> it would really look like a hack IMHO.
> 
> >>> Basically the more consistent the interface is with virtqueue_add_buf,
> >>> the better.
> >>
> >> The interface is consistent with virtqueue_add_buf_single, where out/in
> >> clearly doesn't make sense.
> > 
> > Hmm, we could make virtqueue_add_buf_single consistent by giving it 'bool in'.
> 
> But is it "bool in" or "bool out"?

Agree, bool is a bit ugly anyway.

> >> virtqueue_add_buf and virtqueue_add_sg are very different, despite the
> >> similar name.
> > 
> > True. The similarity is between _start and _add_buf.
> > And this is confusing too. Maybe this means
> > _start and _add_sg should be renamed.
> 
> Maybe.  If you have any suggestions it's fine.
> 
> BTW I tried using out/in for start_buf, and the code in virtio-blk gets
> messier, it has to do all the math twice.

I'm pretty sure we can do this without duplication, if we want to.

> Perhaps we just need to
> acknowledge that the API is different and thus the optimal choice of
> arguments is different.  C doesn't have keyword arguments, there not
> much that we can do.
> 
> Paolo

Yea, maybe. I'm not the API guru here anyway, it's Rusty's street.
Paolo Bonzini Feb. 13, 2013, 8:06 a.m. UTC | #14
Il 12/02/2013 21:49, Michael S. Tsirkin ha scritto:
> On Tue, Feb 12, 2013 at 09:08:27PM +0100, Paolo Bonzini wrote:
>> Il 12/02/2013 19:23, Michael S. Tsirkin ha scritto:
>>> On Tue, Feb 12, 2013 at 07:04:27PM +0100, Paolo Bonzini wrote:
>>>>>> Perhaps, but 3 or 4 arguments (in/out/nsg or in/out/nsg_in/nsg_out) just
>>>>>> for this are definitely too many and make the API harder to use.
>>>>>>
>>>>>> You have to find a balance.  Having actually used the API, the
>>>>>> possibility of mixing in/out buffers by mistake never even occurred to
>>>>>> me, much less happened in practice, so I didn't consider it a problem.
>>>>>> Mixing in/out buffers in a single call wasn't a necessity, either.
>>>>>
>>>>> It is useful for virtqueue_add_buf implementation.
>>>>
>>>>         ret = virtqueue_start_buf(vq, data, out + in, !!out + !!in,
>>>> 				  gfp);
>>>>         if (ret < 0)
>>>>                 return ret;
>>>>
>>>>         if (out)
>>>>                 virtqueue_add_sg(vq, sg, out, DMA_TO_DEVICE);
>>>>         if (in)
>>>>                 virtqueue_add_sg(vq, sg + out, in, DMA_FROM_DEVICE);
>>>>
>>>>         virtqueue_end_buf(vq);
>>>> 	return 0;
>>>>
>>>> How can it be simpler and easier to understand than that?
>>>
>>> Like this:
>>>
>>>          ret = virtqueue_start_buf(vq, data, in, out, gfp);
>>>          if (ret < 0)
>>>                  return ret;
>>>  
>>>          virtqueue_add_sg(vq, sg, in, out);
>>>  
>>>          virtqueue_end_buf(vq);
>>
>> It's out/in, not in/out... I know you wrote it in a hurry, but it kind
>> of shows that the new API is easier to use.  Check out patch 8, it's  a
>> real improvement in readability.
> 
> That's virtqueue_add_buf_single, that's a separate matter.
> Another option for _single is just two wrappers:
> virtqueue_add_buf_in
> virtqueue_add_buf_out

I like it less, but yes this one would be ok (no driver uses a variable
for the enum parameter).

>> Plus you haven't solved the problem of alternating to/from-device
>> elements (which is also harder to spot with in/out than with the enum).
> 
> Yes it does, if add_sg does not have in/out at all there's no way to
> request the impossible to/from mix.

In your example above it does have it.  I assume you meant

         ret = virtqueue_start_buf(vq, data, out, in, gfp);
         if (ret < 0)
                 return ret;

          virtqueue_add_sg(vq, sg, out + in);
          virtqueue_end_buf(vq);

>>>> virtqueue_add_buf and virtqueue_add_sg are very different, despite the
>>>> similar name.
>>>
>>> True. The similarity is between _start and _add_buf.
>>> And this is confusing too. Maybe this means
>>> _start and _add_sg should be renamed.
>>
>> Maybe.  If you have any suggestions it's fine.
>>
>> BTW I tried using out/in for start_buf, and the code in virtio-blk gets
>> messier, it has to do all the math twice.
> 
> I'm pretty sure we can do this without duplication, if we want to.

Indeed, if you remove the out/in arguments from _sg there is no
duplication in virtio-blk.  That's because it places data-out at the end
and data-in at the beginning (so data is always after the request header
and before the response footer).

>> Perhaps we just need to
>> acknowledge that the API is different and thus the optimal choice of
>> arguments is different.  C doesn't have keyword arguments, there not
>> much that we can do.
> 
> Yea, maybe. I'm not the API guru here anyway, it's Rusty's street.

Let's wait for him.

Paolo

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Feb. 13, 2013, 10:33 a.m. UTC | #15
On Wed, Feb 13, 2013 at 09:06:27AM +0100, Paolo Bonzini wrote:
> Il 12/02/2013 21:49, Michael S. Tsirkin ha scritto:
> > On Tue, Feb 12, 2013 at 09:08:27PM +0100, Paolo Bonzini wrote:
> >> Il 12/02/2013 19:23, Michael S. Tsirkin ha scritto:
> >>> On Tue, Feb 12, 2013 at 07:04:27PM +0100, Paolo Bonzini wrote:
> >>>>>> Perhaps, but 3 or 4 arguments (in/out/nsg or in/out/nsg_in/nsg_out) just
> >>>>>> for this are definitely too many and make the API harder to use.
> >>>>>>
> >>>>>> You have to find a balance.  Having actually used the API, the
> >>>>>> possibility of mixing in/out buffers by mistake never even occurred to
> >>>>>> me, much less happened in practice, so I didn't consider it a problem.
> >>>>>> Mixing in/out buffers in a single call wasn't a necessity, either.
> >>>>>
> >>>>> It is useful for virtqueue_add_buf implementation.
> >>>>
> >>>>         ret = virtqueue_start_buf(vq, data, out + in, !!out + !!in,
> >>>> 				  gfp);
> >>>>         if (ret < 0)
> >>>>                 return ret;
> >>>>
> >>>>         if (out)
> >>>>                 virtqueue_add_sg(vq, sg, out, DMA_TO_DEVICE);
> >>>>         if (in)
> >>>>                 virtqueue_add_sg(vq, sg + out, in, DMA_FROM_DEVICE);
> >>>>
> >>>>         virtqueue_end_buf(vq);
> >>>> 	return 0;
> >>>>
> >>>> How can it be simpler and easier to understand than that?
> >>>
> >>> Like this:
> >>>
> >>>          ret = virtqueue_start_buf(vq, data, in, out, gfp);
> >>>          if (ret < 0)
> >>>                  return ret;
> >>>  
> >>>          virtqueue_add_sg(vq, sg, in, out);
> >>>  
> >>>          virtqueue_end_buf(vq);
> >>
> >> It's out/in, not in/out... I know you wrote it in a hurry, but it kind
> >> of shows that the new API is easier to use.  Check out patch 8, it's  a
> >> real improvement in readability.
> > 
> > That's virtqueue_add_buf_single, that's a separate matter.
> > Another option for _single is just two wrappers:
> > virtqueue_add_buf_in
> > virtqueue_add_buf_out
> 
> I like it less, but yes this one would be ok (no driver uses a variable
> for the enum parameter).

OK, this has the advantage of being even shorter.

> >> Plus you haven't solved the problem of alternating to/from-device
> >> elements (which is also harder to spot with in/out than with the enum).
> > 
> > Yes it does, if add_sg does not have in/out at all there's no way to
> > request the impossible to/from mix.
> 
> In your example above it does have it.  I assume you meant
> 
>          ret = virtqueue_start_buf(vq, data, out, in, gfp);
>          if (ret < 0)
>                  return ret;
> 
>           virtqueue_add_sg(vq, sg, out + in);
>           virtqueue_end_buf(vq);
> 
> >>>> virtqueue_add_buf and virtqueue_add_sg are very different, despite the
> >>>> similar name.
> >>>
> >>> True. The similarity is between _start and _add_buf.
> >>> And this is confusing too. Maybe this means
> >>> _start and _add_sg should be renamed.
> >>
> >> Maybe.  If you have any suggestions it's fine.
> >>
> >> BTW I tried using out/in for start_buf, and the code in virtio-blk gets
> >> messier, it has to do all the math twice.
> > 
> > I'm pretty sure we can do this without duplication, if we want to.
> 
> Indeed, if you remove the out/in arguments from _sg there is no
> duplication in virtio-blk.  That's because it places data-out at the end
> and data-in at the beginning (so data is always after the request header
> and before the response footer).

Yes, it's not a virtio-blk thing, virtio spec and interface require that
we have in after out.
So yes, to me it seems cleaner to drop out/in arguments from _sg.

> >> acknowledge that the API is different and thus the optimal choice of
> >> arguments is different.  C doesn't have keyword arguments, there not
> >> much that we can do.
> > 
> > Yea, maybe. I'm not the API guru here anyway, it's Rusty's street.
> 
> Let's wait for him.
> 
> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index ffd7e7d..64184e5 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -101,6 +101,10 @@  struct vring_virtqueue
 	/* Last used index we've seen. */
 	u16 last_used_idx;
 
+	/* State between virtqueue_start_buf and virtqueue_end_buf.  */
+	int head;
+	struct vring_desc *indirect_base, *tail;
+
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	void (*notify)(struct virtqueue *vq);
 
@@ -394,6 +398,213 @@  static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
 	vq->vq.num_free++;
 }
 
+/**
+ * virtqueue_start_buf - start building buffer for the other end
+ * @vq: the struct virtqueue we're talking about.
+ * @data: the token identifying the buffer.
+ * @nents: the number of buffers that will be added
+ * @nsg: the number of sg lists that will be added
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted), and that a successful call is
+ * followed by one or more calls to virtqueue_add_sg, and finally a call
+ * to virtqueue_end_buf.
+ *
+ * Returns zero or a negative error (ie. ENOSPC).
+ */
+int virtqueue_start_buf(struct virtqueue *_vq,
+			void *data,
+			unsigned int nents,
+			unsigned int nsg,
+			gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_desc *desc = NULL;
+	int head;
+	int ret = -ENOMEM;
+
+	START_USE(vq);
+
+	BUG_ON(data == NULL);
+
+#ifdef DEBUG
+	{
+		ktime_t now = ktime_get();
+
+		/* No kick or get, with .1 second between?  Warn. */
+		if (vq->last_add_time_valid)
+			WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
+					    > 100);
+		vq->last_add_time = now;
+		vq->last_add_time_valid = true;
+	}
+#endif
+
+	BUG_ON(nents < nsg);
+	BUG_ON(nsg == 0);
+
+	/*
+	 * If the host supports indirect descriptor tables, and there is
+	 * no space for direct buffers or there are multi-item scatterlists,
+	 * go indirect.
+	 */
+	head = vq->free_head;
+	if (vq->indirect && (nents > nsg || vq->vq.num_free < nents)) {
+		if (vq->vq.num_free == 0)
+			goto no_space;
+
+		desc = kmalloc(nents * sizeof(struct vring_desc), gfp);
+		if (!desc)
+			goto error;
+
+		/* We're about to use a buffer */
+		vq->vq.num_free--;
+
+		/* Use a single buffer which doesn't continue */
+		vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
+		vq->vring.desc[head].addr = virt_to_phys(desc);
+		vq->vring.desc[head].len = nents * sizeof(struct vring_desc);
+
+		/* Update free pointer */
+		vq->free_head = vq->vring.desc[head].next;
+	}
+
+	/* Set token. */
+	vq->data[head] = data;
+
+	pr_debug("Started buffer head %i for %p\n", head, vq);
+
+	vq->indirect_base = desc;
+	vq->tail = NULL;
+	vq->head = head;
+	return 0;
+
+no_space:
+	ret = -ENOSPC;
+error:
+	pr_debug("Can't add buf (%d) - nents = %i, avail = %i\n",
+		 ret, nents, vq->vq.num_free);
+	END_USE(vq);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(virtqueue_start_buf);
+
+/**
+ * virtqueue_add_sg - add sglist to buffer being built
+ * @_vq: the virtqueue for which the buffer is being built
+ * @sgl: the description of the buffer(s).
+ * @nents: the number of items to process in sgl
+ * @dir: whether the sgl is read or written (DMA_TO_DEVICE/DMA_FROM_DEVICE only)
+ *
+ * Note that, unlike virtqueue_add_buf, this function follows chained
+ * scatterlists, and stops before the @nents-th item if a scatterlist item
+ * has a marker.
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ */
+void virtqueue_add_sg(struct virtqueue *_vq,
+		      struct scatterlist sgl[],
+		      unsigned int nents,
+		      enum dma_data_direction dir)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	unsigned int i, n;
+	struct scatterlist *sg;
+	struct vring_desc *tail;
+	u32 flags;
+
+#ifdef DEBUG
+	BUG_ON(!vq->in_use);
+#endif
+
+	BUG_ON(dir != DMA_FROM_DEVICE && dir != DMA_TO_DEVICE);
+	BUG_ON(nents == 0);
+
+	flags = dir == DMA_FROM_DEVICE ? VRING_DESC_F_WRITE : 0;
+	flags |= VRING_DESC_F_NEXT;
+
+	/*
+	 * If using indirect descriptor tables, fill in the buffers
+	 * at vq->indirect_base.
+	 */
+	if (vq->indirect_base) {
+		i = 0;
+		if (likely(vq->tail))
+			i = vq->tail - vq->indirect_base + 1;
+
+		for_each_sg(sgl, sg, nents, n) {
+			tail = &vq->indirect_base[i];
+			tail->flags = flags;
+			tail->addr = sg_phys(sg);
+			tail->len = sg->length;
+			tail->next = ++i;
+		}
+	} else {
+		BUG_ON(vq->vq.num_free < nents);
+
+		i = vq->free_head;
+		for_each_sg(sgl, sg, nents, n) {
+			tail = &vq->vring.desc[i];
+			tail->flags = flags;
+			tail->addr = sg_phys(sg);
+			tail->len = sg->length;
+			i = tail->next;
+			vq->vq.num_free--;
+		}
+
+		vq->free_head = i;
+	}
+	vq->tail = tail;
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_sg);
+
+/**
+ * virtqueue_end_buf - expose buffer to other end
+ * @_vq: the virtqueue for which the buffer was built
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ */
+void virtqueue_end_buf(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	unsigned int avail;
+	int head = vq->head;
+	struct vring_desc *tail = vq->tail;
+
+#ifdef DEBUG
+	BUG_ON(!vq->in_use);
+#endif
+	BUG_ON(tail == NULL);
+
+	/* The last one does not have the next flag set.  */
+	tail->flags &= ~VRING_DESC_F_NEXT;
+
+	/*
+	 * Put entry in available array.  Descriptors and available array
+	 * need to be set before we expose the new available array entries.
+	 */
+	avail = vq->vring.avail->idx & (vq->vring.num-1);
+	vq->vring.avail->ring[avail] = head;
+	virtio_wmb(vq);
+
+	vq->vring.avail->idx++;
+	vq->num_added++;
+
+	/*
+	 * This is very unlikely, but theoretically possible.  Kick
+	 * just in case.
+	 */
+	if (unlikely(vq->num_added == (1 << 16) - 1))
+		virtqueue_kick(&vq->vq);
+
+	pr_debug("Added buffer head %i to %p\n", head, vq);
+	END_USE(vq);
+}
+EXPORT_SYMBOL_GPL(virtqueue_end_buf);
+
 static inline bool more_used(const struct vring_virtqueue *vq)
 {
 	return vq->last_used_idx != vq->vring.used->idx;
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index cf8adb1..43d6bc3 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -7,6 +7,7 @@ 
 #include <linux/spinlock.h>
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
+#include <linux/dma-direction.h>
 #include <linux/gfp.h>
 
 /**
@@ -40,6 +41,19 @@  int virtqueue_add_buf(struct virtqueue *vq,
 		      void *data,
 		      gfp_t gfp);
 
+int virtqueue_start_buf(struct virtqueue *_vq,
+			void *data,
+			unsigned int nents,
+			unsigned int nsg,
+			gfp_t gfp);
+
+void virtqueue_add_sg(struct virtqueue *_vq,
+		      struct scatterlist sgl[],
+		      unsigned int nents,
+		      enum dma_data_direction dir);
+
+void virtqueue_end_buf(struct virtqueue *_vq);
+
 void virtqueue_kick(struct virtqueue *vq);
 
 bool virtqueue_kick_prepare(struct virtqueue *vq);