diff mbox

[v2,1/5] virtio: add functions for piecewise addition of buffers

Message ID 1355833972-20319-2-git-send-email-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paolo Bonzini Dec. 18, 2012, 12:32 p.m. UTC
The virtqueue_add_buf function has two limitations:

1) it requires the caller to provide all the buffers in a single call;

2) it does not support chained scatterlists: the buffers must be
provided as an array of struct scatterlist;

Because of these limitations, virtio-scsi has to copy each request into
a scatterlist internal to the driver.  It cannot just use the one that
was prepared by the upper SCSI layers.

This patch adds a different set of APIs for adding a buffer to a virtqueue.
The new API lets you pass the buffers piecewise, wrapping multiple calls
to virtqueue_add_sg between virtqueue_start_buf and virtqueue_end_buf.
virtio-scsi can then call virtqueue_add_sg 3/4 times: for the request
header, for the write buffer (if present), for the response header, and
finally for the read buffer (again if present).  It saves the copying
and the related locking.

Note that this API is not needed in virtio-blk, because it does all the
work of the upper SCSI layers itself in the blk_map_rq_sg call.  Then
it simply hands the resulting scatterlist to virtqueue_add_buf.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
	v1->v2: new

 drivers/virtio/virtio_ring.c |  205 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/virtio.h       |   21 ++++
 2 files changed, 226 insertions(+), 0 deletions(-)

Comments

Michael S. Tsirkin Dec. 18, 2012, 1:36 p.m. UTC | #1
Some comments without arguing about whether the performance
benefit is worth it.

On Tue, Dec 18, 2012 at 01:32:48PM +0100, Paolo Bonzini wrote:
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index cf8adb1..39d56c4 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -7,6 +7,7 @@
>  #include <linux/spinlock.h>
>  #include <linux/device.h>
>  #include <linux/mod_devicetable.h>
> +#include <linux/dma-direction.h>
>  #include <linux/gfp.h>
>  
>  /**
> @@ -40,6 +41,26 @@ int virtqueue_add_buf(struct virtqueue *vq,
>  		      void *data,
>  		      gfp_t gfp);
>  
> +struct virtqueue_buf {
> +	struct virtqueue *vq;
> +	struct vring_desc *indirect, *tail;

This is wrong: virtio.h does not include virito_ring.h,
and it shouldn't by design depend on it.

> +	int head;
> +};
> +

Can't we track state internally to the virtqueue?
Exposing it seems to buy us nothing since you can't
call add_buf between start and end anyway.


> +int virtqueue_start_buf(struct virtqueue *_vq,
> +			struct virtqueue_buf *buf,
> +			void *data,
> +			unsigned int count,
> +			unsigned int count_sg,
> +			gfp_t gfp);
> +
> +void virtqueue_add_sg(struct virtqueue_buf *buf,
> +		      struct scatterlist sgl[],
> +		      unsigned int count,
> +		      enum dma_data_direction dir);
> +

And idea: in practice virtio scsi seems to always call sg_init_one, no?
So how about we pass in void* or something and avoid using sg and count?
This would make it useful for -net BTW.

> +void virtqueue_end_buf(struct virtqueue_buf *buf);
> +
>  void virtqueue_kick(struct virtqueue *vq);
>  
>  bool virtqueue_kick_prepare(struct virtqueue *vq);
> -- 
> 1.7.1
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Dec. 18, 2012, 1:43 p.m. UTC | #2
Il 18/12/2012 14:36, Michael S. Tsirkin ha scritto:
> Some comments without arguing about whether the performance
> benefit is worth it.
> 
> On Tue, Dec 18, 2012 at 01:32:48PM +0100, Paolo Bonzini wrote:
>> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
>> index cf8adb1..39d56c4 100644
>> --- a/include/linux/virtio.h
>> +++ b/include/linux/virtio.h
>> @@ -7,6 +7,7 @@
>>  #include <linux/spinlock.h>
>>  #include <linux/device.h>
>>  #include <linux/mod_devicetable.h>
>> +#include <linux/dma-direction.h>
>>  #include <linux/gfp.h>
>>  
>>  /**
>> @@ -40,6 +41,26 @@ int virtqueue_add_buf(struct virtqueue *vq,
>>  		      void *data,
>>  		      gfp_t gfp);
>>  
>> +struct virtqueue_buf {
>> +	struct virtqueue *vq;
>> +	struct vring_desc *indirect, *tail;
> 
> This is wrong: virtio.h does not include virito_ring.h,
> and it shouldn't by design depend on it.
> 
>> +	int head;
>> +};
>> +
> 
> Can't we track state internally to the virtqueue?
> Exposing it seems to buy us nothing since you can't
> call add_buf between start and end anyway.

I wanted to keep the state for these functions separate from the rest.
I don't think it makes much sense to move it to struct virtqueue unless
virtqueue_add_buf is converted to use the new API (doesn't make much
sense, could even be a tad slower).

On the other hand moving it there would eliminate the dependency on
virtio_ring.h.  Rusty, what do you think?

>> +int virtqueue_start_buf(struct virtqueue *_vq,
>> +			struct virtqueue_buf *buf,
>> +			void *data,
>> +			unsigned int count,
>> +			unsigned int count_sg,
>> +			gfp_t gfp);
>> +
>> +void virtqueue_add_sg(struct virtqueue_buf *buf,
>> +		      struct scatterlist sgl[],
>> +		      unsigned int count,
>> +		      enum dma_data_direction dir);
>> +
> 
> And idea: in practice virtio scsi seems to always call sg_init_one, no?
> So how about we pass in void* or something and avoid using sg and count?
> This would make it useful for -net BTW.

It also passes the scatterlist from the LLD.  It calls sg_init_one for
the request/response headers.

Paolo

>> +void virtqueue_end_buf(struct virtqueue_buf *buf);
>> +
>>  void virtqueue_kick(struct virtqueue *vq);
>>  
>>  bool virtqueue_kick_prepare(struct virtqueue *vq);
>> -- 
>> 1.7.1
>>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Dec. 18, 2012, 1:59 p.m. UTC | #3
On Tue, Dec 18, 2012 at 02:43:51PM +0100, Paolo Bonzini wrote:
> Il 18/12/2012 14:36, Michael S. Tsirkin ha scritto:
> > Some comments without arguing about whether the performance
> > benefit is worth it.
> > 
> > On Tue, Dec 18, 2012 at 01:32:48PM +0100, Paolo Bonzini wrote:
> >> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> >> index cf8adb1..39d56c4 100644
> >> --- a/include/linux/virtio.h
> >> +++ b/include/linux/virtio.h
> >> @@ -7,6 +7,7 @@
> >>  #include <linux/spinlock.h>
> >>  #include <linux/device.h>
> >>  #include <linux/mod_devicetable.h>
> >> +#include <linux/dma-direction.h>
> >>  #include <linux/gfp.h>
> >>  
> >>  /**
> >> @@ -40,6 +41,26 @@ int virtqueue_add_buf(struct virtqueue *vq,
> >>  		      void *data,
> >>  		      gfp_t gfp);
> >>  
> >> +struct virtqueue_buf {
> >> +	struct virtqueue *vq;
> >> +	struct vring_desc *indirect, *tail;
> > 
> > This is wrong: virtio.h does not include virito_ring.h,
> > and it shouldn't by design depend on it.
> > 
> >> +	int head;
> >> +};
> >> +
> > 
> > Can't we track state internally to the virtqueue?
> > Exposing it seems to buy us nothing since you can't
> > call add_buf between start and end anyway.
> 
> I wanted to keep the state for these functions separate from the rest.
> I don't think it makes much sense to move it to struct virtqueue unless
> virtqueue_add_buf is converted to use the new API (doesn't make much
> sense, could even be a tad slower).

Why would it be slower?

> On the other hand moving it there would eliminate the dependency on
> virtio_ring.h.  Rusty, what do you think?
> 
> >> +int virtqueue_start_buf(struct virtqueue *_vq,
> >> +			struct virtqueue_buf *buf,
> >> +			void *data,
> >> +			unsigned int count,
> >> +			unsigned int count_sg,
> >> +			gfp_t gfp);
> >> +
> >> +void virtqueue_add_sg(struct virtqueue_buf *buf,
> >> +		      struct scatterlist sgl[],
> >> +		      unsigned int count,
> >> +		      enum dma_data_direction dir);
> >> +
> > 
> > And idea: in practice virtio scsi seems to always call sg_init_one, no?
> > So how about we pass in void* or something and avoid using sg and count?
> > This would make it useful for -net BTW.
> 
> It also passes the scatterlist from the LLD.  It calls sg_init_one for
> the request/response headers.
> 
> Paolo

Try adding a _single variant. You might see unrolling a loop
gives more of a benefit than this whole optimization.

> >> +void virtqueue_end_buf(struct virtqueue_buf *buf);
> >> +
> >>  void virtqueue_kick(struct virtqueue *vq);
> >>  
> >>  bool virtqueue_kick_prepare(struct virtqueue *vq);
> >> -- 
> >> 1.7.1
> >>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Dec. 18, 2012, 2:32 p.m. UTC | #4
Il 18/12/2012 14:59, Michael S. Tsirkin ha scritto:
>>> Can't we track state internally to the virtqueue? Exposing it
>>> seems to buy us nothing since you can't call add_buf between
>>> start and end anyway.
>> 
>> I wanted to keep the state for these functions separate from the
>> rest. I don't think it makes much sense to move it to struct
>> virtqueue unless virtqueue_add_buf is converted to use the new API
>> (doesn't make much sense, could even be a tad slower).
> 
> Why would it be slower?

virtqueue_add_buf could be slower if it used the new API.  That's
because of the overhead of writing and reading from struct
virtqueue_buf, instead of using variables in registers.

>> On the other hand moving it there would eliminate the dependency
>> on virtio_ring.h.  Rusty, what do you think?
>> 
>>> And idea: in practice virtio scsi seems to always call
>>> sg_init_one, no? So how about we pass in void* or something and
>>> avoid using sg and count? This would make it useful for -net
>>> BTW.
>> 
>> It also passes the scatterlist from the LLD.  It calls sg_init_one
>> for the request/response headers.
> 
> Try adding a _single variant. You might see unrolling a loop gives
> more of a benefit than this whole optimization.

Makes sense, I'll try.  However, note that I *do* need the
infrastructure in this patch because virtio-scsi could never use a
hypothetical virtqueue_add_buf_single; requests always have at least 2
buffers for the headers.

However I could add virtqueue_add_sg_single and use it for those
headers.  The I/O buffer can keep using virtqueue_add_sg.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Dec. 18, 2012, 3:06 p.m. UTC | #5
On Tue, Dec 18, 2012 at 03:32:15PM +0100, Paolo Bonzini wrote:
> Il 18/12/2012 14:59, Michael S. Tsirkin ha scritto:
> >>> Can't we track state internally to the virtqueue? Exposing it
> >>> seems to buy us nothing since you can't call add_buf between
> >>> start and end anyway.
> >> 
> >> I wanted to keep the state for these functions separate from the
> >> rest. I don't think it makes much sense to move it to struct
> >> virtqueue unless virtqueue_add_buf is converted to use the new API
> >> (doesn't make much sense, could even be a tad slower).
> > 
> > Why would it be slower?
> 
> virtqueue_add_buf could be slower if it used the new API.  That's
> because of the overhead of writing and reading from struct
> virtqueue_buf, instead of using variables in registers.

Yes but we'll get rid of virtqueue_buf.

> >> On the other hand moving it there would eliminate the dependency
> >> on virtio_ring.h.  Rusty, what do you think?
> >> 
> >>> And idea: in practice virtio scsi seems to always call
> >>> sg_init_one, no? So how about we pass in void* or something and
> >>> avoid using sg and count? This would make it useful for -net
> >>> BTW.
> >> 
> >> It also passes the scatterlist from the LLD.  It calls sg_init_one
> >> for the request/response headers.
> > 
> > Try adding a _single variant. You might see unrolling a loop gives
> > more of a benefit than this whole optimization.
> 
> Makes sense, I'll try.  However, note that I *do* need the
> infrastructure in this patch because virtio-scsi could never use a
> hypothetical virtqueue_add_buf_single; requests always have at least 2
> buffers for the headers.
> 
> However I could add virtqueue_add_sg_single and use it for those
> headers.

Right.

>  The I/O buffer can keep using virtqueue_add_sg.
> 
> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stefan Hajnoczi Dec. 19, 2012, 10:47 a.m. UTC | #6
On Tue, Dec 18, 2012 at 01:32:48PM +0100, Paolo Bonzini wrote:
> +/**
> + * virtqueue_start_buf - start building buffer for the other end
> + * @vq: the struct virtqueue we're talking about.
> + * @buf: a struct keeping the state of the buffer
> + * @data: the token identifying the buffer.
> + * @count: the number of buffers that will be added

Perhaps count should be named count_bufs or num_bufs.

> + * @count_sg: the number of sg lists that will be added

What is the purpose of count_sg?

Stefan
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Dec. 19, 2012, 12:04 p.m. UTC | #7
Il 19/12/2012 11:47, Stefan Hajnoczi ha scritto:
> On Tue, Dec 18, 2012 at 01:32:48PM +0100, Paolo Bonzini wrote:
>> +/**
>> + * virtqueue_start_buf - start building buffer for the other end
>> + * @vq: the struct virtqueue we're talking about.
>> + * @buf: a struct keeping the state of the buffer
>> + * @data: the token identifying the buffer.
>> + * @count: the number of buffers that will be added
> 
> Perhaps count should be named count_bufs or num_bufs.

Ok.

>> + * @count_sg: the number of sg lists that will be added
> 
> What is the purpose of count_sg?

It is needed to decide whether to use an indirect or a direct buffer.
The idea is to avoid a memory allocation if the driver is providing us
with separate sg elements (under the assumption that they will be few).

Originally I wanted to use a mix of direct and indirect buffer (direct
if add_buf received a one-element scatterlist, otherwise indirect).  It
would have had the same effect, without having to specify count_sg in
advance.  The spec is not clear if that is allowed or not, but in the
end they do not work with either QEMU or vhost, so I chose this
alternative instead.

Paolo


> Stefan
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stefan Hajnoczi Dec. 19, 2012, 12:40 p.m. UTC | #8
On Wed, Dec 19, 2012 at 1:04 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 19/12/2012 11:47, Stefan Hajnoczi ha scritto:
>> On Tue, Dec 18, 2012 at 01:32:48PM +0100, Paolo Bonzini wrote:
>> What is the purpose of count_sg?
>
> It is needed to decide whether to use an indirect or a direct buffer.
> The idea is to avoid a memory allocation if the driver is providing us
> with separate sg elements (under the assumption that they will be few).

Ah, this makes sense now.  I saw it affects the decision whether to go
indirect or not but it wasn't obvious why.

> Originally I wanted to use a mix of direct and indirect buffer (direct
> if add_buf received a one-element scatterlist, otherwise indirect).  It
> would have had the same effect, without having to specify count_sg in
> advance.  The spec is not clear if that is allowed or not, but in the
> end they do not work with either QEMU or vhost, so I chose this
> alternative instead.

Okay.

Stefan
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Dec. 19, 2012, 4:51 p.m. UTC | #9
On Wed, Dec 19, 2012 at 01:04:08PM +0100, Paolo Bonzini wrote:
> Il 19/12/2012 11:47, Stefan Hajnoczi ha scritto:
> > On Tue, Dec 18, 2012 at 01:32:48PM +0100, Paolo Bonzini wrote:
> >> +/**
> >> + * virtqueue_start_buf - start building buffer for the other end
> >> + * @vq: the struct virtqueue we're talking about.
> >> + * @buf: a struct keeping the state of the buffer
> >> + * @data: the token identifying the buffer.
> >> + * @count: the number of buffers that will be added
> > 
> > Perhaps count should be named count_bufs or num_bufs.
> 
> Ok.
> 
> >> + * @count_sg: the number of sg lists that will be added
> > 
> > What is the purpose of count_sg?
> 
> It is needed to decide whether to use an indirect or a direct buffer.
> The idea is to avoid a memory allocation if the driver is providing us
> with separate sg elements (under the assumption that they will be few).
> 
> Originally I wanted to use a mix of direct and indirect buffer (direct
> if add_buf received a one-element scatterlist, otherwise indirect).  It
> would have had the same effect, without having to specify count_sg in
> advance.  The spec is not clear if that is allowed or not, but in the
> end they do not work with either QEMU or vhost, so I chose this
> alternative instead.
> 
> Paolo

Hmm it should work with vhost.

> 
> > Stefan
> > 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Dec. 19, 2012, 4:52 p.m. UTC | #10
On Wed, Dec 19, 2012 at 06:51:30PM +0200, Michael S. Tsirkin wrote:
> On Wed, Dec 19, 2012 at 01:04:08PM +0100, Paolo Bonzini wrote:
> > Il 19/12/2012 11:47, Stefan Hajnoczi ha scritto:
> > > On Tue, Dec 18, 2012 at 01:32:48PM +0100, Paolo Bonzini wrote:
> > >> +/**
> > >> + * virtqueue_start_buf - start building buffer for the other end
> > >> + * @vq: the struct virtqueue we're talking about.
> > >> + * @buf: a struct keeping the state of the buffer
> > >> + * @data: the token identifying the buffer.
> > >> + * @count: the number of buffers that will be added
> > > 
> > > Perhaps count should be named count_bufs or num_bufs.
> > 
> > Ok.
> > 
> > >> + * @count_sg: the number of sg lists that will be added
> > > 
> > > What is the purpose of count_sg?
> > 
> > It is needed to decide whether to use an indirect or a direct buffer.
> > The idea is to avoid a memory allocation if the driver is providing us
> > with separate sg elements (under the assumption that they will be few).
> > 
> > Originally I wanted to use a mix of direct and indirect buffer (direct
> > if add_buf received a one-element scatterlist, otherwise indirect).  It
> > would have had the same effect, without having to specify count_sg in
> > advance.  The spec is not clear if that is allowed or not, but in the
> > end they do not work with either QEMU or vhost, so I chose this
> > alternative instead.
> > 
> > Paolo
> 
> Hmm it should work with vhost.

BTW passing in num_in + num_out would be nicer than explicit direction:
closer to the current add_buf.

> > 
> > > Stefan
> > > 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index ffd7e7d..ccfa97c 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -394,6 +394,211 @@  static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
 	vq->vq.num_free++;
 }
 
+/**
+ * virtqueue_start_buf - start building buffer for the other end
+ * @vq: the struct virtqueue we're talking about.
+ * @buf: a struct keeping the state of the buffer
+ * @data: the token identifying the buffer.
+ * @count: the number of buffers that will be added
+ * @count_sg: the number of sg lists that will be added
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted), and that a successful call is
+ * followed by one or more calls to virtqueue_add_sg, and finally a call
+ * to virtqueue_end_buf.
+ *
+ * Returns zero or a negative error (ie. ENOSPC).
+ */
+int virtqueue_start_buf(struct virtqueue *_vq,
+			struct virtqueue_buf *buf,
+			void *data,
+			unsigned int count,
+			unsigned int count_sg,
+			gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_desc *desc = NULL;
+	int head;
+	int ret = -ENOMEM;
+
+	START_USE(vq);
+
+	BUG_ON(data == NULL);
+
+#ifdef DEBUG
+	{
+		ktime_t now = ktime_get();
+
+		/* No kick or get, with .1 second between?  Warn. */
+		if (vq->last_add_time_valid)
+			WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
+					    > 100);
+		vq->last_add_time = now;
+		vq->last_add_time_valid = true;
+	}
+#endif
+
+	BUG_ON(count < count_sg);
+	BUG_ON(count_sg == 0);
+
+	/* If the host supports indirect descriptor tables, and there is
+	 * no space for direct buffers or there are multi-item scatterlists,
+	 * go indirect.
+	 */
+	head = vq->free_head;
+	if (vq->indirect && (count > count_sg || vq->vq.num_free < count)) {
+		if (vq->vq.num_free == 0)
+			goto no_space;
+
+		desc = kmalloc(count * sizeof(struct vring_desc), gfp);
+		if (!desc)
+			goto error;
+
+		/* We're about to use a buffer */
+		vq->vq.num_free--;
+
+		/* Use a single buffer which doesn't continue */
+		vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
+		vq->vring.desc[head].addr = virt_to_phys(desc);
+		vq->vring.desc[head].len = count * sizeof(struct vring_desc);
+
+		/* Update free pointer */
+		vq->free_head = vq->vring.desc[head].next;
+	}
+
+	/* Set token. */
+	vq->data[head] = data;
+
+	pr_debug("Started buffer head %i for %p\n", head, vq);
+
+	buf->vq = _vq;
+	buf->indirect = desc;
+	buf->tail = NULL;
+	buf->head = head;
+	return 0;
+
+no_space:
+	ret = -ENOSPC;
+error:
+	pr_debug("Can't add buf (%d) - count = %i, avail = %i\n",
+		 ret, count, vq->vq.num_free);
+	END_USE(vq);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(virtqueue_start_buf);
+
+/**
+ * virtqueue_add_sg - add sglist to buffer
+ * @buf: the struct that was passed to virtqueue_start_buf
+ * @sgl: the description of the buffer(s).
+ * @count: the number of items to process in sgl
+ * @dir: whether the sgl is read or written (DMA_TO_DEVICE/DMA_FROM_DEVICE only)
+ *
+ * Note that, unlike virtqueue_add_buf, this function follows chained
+ * scatterlists, and stops before the @count-th item if a scatterlist item
+ * has a marker.
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ */
+void virtqueue_add_sg(struct virtqueue_buf *buf,
+		      struct scatterlist sgl[],
+		      unsigned int count,
+		      enum dma_data_direction dir)
+{
+	struct vring_virtqueue *vq = to_vvq(buf->vq);
+	unsigned int i, uninitialized_var(prev), n;
+	struct scatterlist *sg;
+	struct vring_desc *tail;
+	u32 flags;
+
+#ifdef DEBUG
+	BUG_ON(!vq->in_use);
+#endif
+
+	BUG_ON(dir != DMA_FROM_DEVICE && dir != DMA_TO_DEVICE);
+	BUG_ON(count == 0);
+
+	flags = (dir == DMA_FROM_DEVICE ? VRING_DESC_F_WRITE : 0);
+	flags |= VRING_DESC_F_NEXT;
+
+	/* If using indirect descriptor tables, fill in the buffers
+	 * at buf->indirect.  */
+	if (buf->indirect != NULL) {
+		i = 0;
+		if (likely(buf->tail != NULL))
+			i = buf->tail - buf->indirect + 1;
+
+		for_each_sg(sgl, sg, count, n) {
+			tail = &buf->indirect[i];
+			tail->flags = flags;
+			tail->addr = sg_phys(sg);
+			tail->len = sg->length;
+			tail->next = ++i;
+		}
+	} else {
+		BUG_ON(vq->vq.num_free < count);
+
+		i = vq->free_head;
+		for_each_sg(sgl, sg, count, n) {
+			tail = &vq->vring.desc[i];
+			tail->flags = flags;
+			tail->addr = sg_phys(sg);
+			tail->len = sg->length;
+			i = vq->vring.desc[i].next;
+			vq->vq.num_free--;
+		}
+
+		vq->free_head = i;
+	}
+	buf->tail = tail;
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_sg);
+
+/**
+ * virtqueue_end_buf - expose buffer to other end
+ * @buf: the struct that was passed to virtqueue_start_buf
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ */
+void virtqueue_end_buf(struct virtqueue_buf *buf)
+{
+	struct vring_virtqueue *vq = to_vvq(buf->vq);
+	unsigned int avail;
+	int head = buf->head;
+	struct vring_desc *tail = buf->tail;
+
+#ifdef DEBUG
+	BUG_ON(!vq->in_use);
+#endif
+	BUG_ON(tail == NULL);
+
+	/* The last one does not have the next flag set.  */
+	tail->flags &= ~VRING_DESC_F_NEXT;
+
+	/* Put entry in available array (but don't update avail->idx until
+	 * virtqueue_end_buf). */
+	avail = (vq->vring.avail->idx & (vq->vring.num-1));
+	vq->vring.avail->ring[avail] = head;
+
+	/* Descriptors and available array need to be set before we expose the
+	 * new available array entries. */
+	virtio_wmb(vq);
+	vq->vring.avail->idx++;
+	vq->num_added++;
+
+	/* This is very unlikely, but theoretically possible.  Kick
+	 * just in case. */
+	if (unlikely(vq->num_added == (1 << 16) - 1))
+		virtqueue_kick(buf->vq);
+
+	pr_debug("Added buffer head %i to %p\n", head, vq);
+	END_USE(vq);
+}
+EXPORT_SYMBOL_GPL(virtqueue_end_buf);
+
 static inline bool more_used(const struct vring_virtqueue *vq)
 {
 	return vq->last_used_idx != vq->vring.used->idx;
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index cf8adb1..39d56c4 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -7,6 +7,7 @@ 
 #include <linux/spinlock.h>
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
+#include <linux/dma-direction.h>
 #include <linux/gfp.h>
 
 /**
@@ -40,6 +41,26 @@  int virtqueue_add_buf(struct virtqueue *vq,
 		      void *data,
 		      gfp_t gfp);
 
+struct virtqueue_buf {
+	struct virtqueue *vq;
+	struct vring_desc *indirect, *tail;
+	int head;
+};
+
+int virtqueue_start_buf(struct virtqueue *_vq,
+			struct virtqueue_buf *buf,
+			void *data,
+			unsigned int count,
+			unsigned int count_sg,
+			gfp_t gfp);
+
+void virtqueue_add_sg(struct virtqueue_buf *buf,
+		      struct scatterlist sgl[],
+		      unsigned int count,
+		      enum dma_data_direction dir);
+
+void virtqueue_end_buf(struct virtqueue_buf *buf);
+
 void virtqueue_kick(struct virtqueue *vq);
 
 bool virtqueue_kick_prepare(struct virtqueue *vq);