diff mbox

Network performance with small packets

Message ID 87fwqv4udl.fsf@rustcorp.com.au (mailing list archive)
State New, archived
Headers show

Commit Message

Rusty Russell March 10, 2011, 1:49 a.m. UTC
None

Comments

Michael S. Tsirkin April 12, 2011, 8:01 p.m. UTC | #1
On Thu, Mar 10, 2011 at 12:19:42PM +1030, Rusty Russell wrote:
> Here's an old patch where I played with implementing this:

...

> 
> virtio: put last_used and last_avail index into ring itself.
> 
> Generally, the other end of the virtio ring doesn't need to see where
> you're up to in consuming the ring.  However, to completely understand
> what's going on from the outside, this information must be exposed.
> For example, if you want to save and restore a virtio_ring, but you're
> not the consumer because the kernel is using it directly.
> 
> Fortunately, we have room to expand:

This seems to be true for x86 kvm and lguest but is it true
for s390?

        err = vmem_add_mapping(config->address,
                               vring_size(config->num,
                                          KVM_S390_VIRTIO_RING_ALIGN));
        if (err)
                goto out;
                
        vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN,
                                 vdev, (void *) config->address,
                                 kvm_notify, callback, name);
        if (!vq) {
                err = -ENOMEM;
                goto unmap;
        }
        


> the ring is always a whole number
> of pages and there's hundreds of bytes of padding after the avail ring
> and the used ring, whatever the number of descriptors (which must be a
> power of 2).
> 
> We add a feature bit so the guest can tell the host that it's writing
> out the current value there, if it wants to use that.
> 
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> ---

....

> --- a/include/linux/virtio_ring.h
> +++ b/include/linux/virtio_ring.h
> @@ -29,6 +29,9 @@
>  /* We support indirect buffer descriptors */
>  #define VIRTIO_RING_F_INDIRECT_DESC	28
>  
> +/* We publish our last-seen used index at the end of the avail ring. */
> +#define VIRTIO_RING_F_PUBLISH_INDICES	29
> +
>  /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
>  struct vring_desc
>  {
> @@ -87,6 +90,7 @@ struct vring {
>   *	__u16 avail_flags;
>   *	__u16 avail_idx;
>   *	__u16 available[num];
> + *	__u16 last_used_idx;
>   *
>   *	// Padding to the next align boundary.
>   *	char pad[];
> @@ -95,6 +99,7 @@ struct vring {
>   *	__u16 used_flags;
>   *	__u16 used_idx;
>   *	struct vring_used_elem used[num];
> + *	__u16 last_avail_idx;
>   * };
>   */
>  static inline void vring_init(struct vring *vr, unsigned int num, void *p,
> @@ -111,9 +116,14 @@ static inline unsigned vring_size(unsign
>  {
>  	return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num)
>  		 + align - 1) & ~(align - 1))
> -		+ sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num;
> +		+ sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num + 2;
>  }
>  
> +/* We publish the last-seen used index at the end of the available ring, and
> + * vice-versa.  These are at the end for backwards compatibility. */
> +#define vring_last_used(vr) ((vr)->avail->ring[(vr)->num])
> +#define vring_last_avail(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num])
> +

Will this last bit work on s390?
If I understand correctly the memory is allocated by host there?

>  #ifdef __KERNEL__
>  #include <linux/irqreturn.h>
>  struct virtio_device;
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rusty Russell April 14, 2011, 11:28 a.m. UTC | #2
On Tue, 12 Apr 2011 23:01:12 +0300, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Thu, Mar 10, 2011 at 12:19:42PM +1030, Rusty Russell wrote:
> > Here's an old patch where I played with implementing this:
> 
> ...
> 
> > 
> > virtio: put last_used and last_avail index into ring itself.
> > 
> > Generally, the other end of the virtio ring doesn't need to see where
> > you're up to in consuming the ring.  However, to completely understand
> > what's going on from the outside, this information must be exposed.
> > For example, if you want to save and restore a virtio_ring, but you're
> > not the consumer because the kernel is using it directly.
> > 
> > Fortunately, we have room to expand:
> 
> This seems to be true for x86 kvm and lguest but is it true
> for s390?

Yes, as the ring is page aligned so there's always room.

> Will this last bit work on s390?
> If I understand correctly the memory is allocated by host there?

They have to offer the feature, so if the have some way of allocating
non-page-aligned amounts of memory, they'll have to add those extra 2
bytes.

So I think it's OK...
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin April 14, 2011, 12:40 p.m. UTC | #3
On Thu, Apr 14, 2011 at 08:58:41PM +0930, Rusty Russell wrote:
> On Tue, 12 Apr 2011 23:01:12 +0300, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Thu, Mar 10, 2011 at 12:19:42PM +1030, Rusty Russell wrote:
> > > Here's an old patch where I played with implementing this:
> > 
> > ...
> > 
> > > 
> > > virtio: put last_used and last_avail index into ring itself.
> > > 
> > > Generally, the other end of the virtio ring doesn't need to see where
> > > you're up to in consuming the ring.  However, to completely understand
> > > what's going on from the outside, this information must be exposed.
> > > For example, if you want to save and restore a virtio_ring, but you're
> > > not the consumer because the kernel is using it directly.
> > > 
> > > Fortunately, we have room to expand:
> > 
> > This seems to be true for x86 kvm and lguest but is it true
> > for s390?
> 
> Yes, as the ring is page aligned so there's always room.
> 
> > Will this last bit work on s390?
> > If I understand correctly the memory is allocated by host there?
> 
> They have to offer the feature, so if the have some way of allocating
> non-page-aligned amounts of memory, they'll have to add those extra 2
> bytes.
> 
> So I think it's OK...
> Rusty.

Correct. I wonder whether we need to pass the relevant flag
to vring_size. If we do we'll need to add a new function
for that though as vring_size is exported to userspace.
Michael S. Tsirkin April 14, 2011, 4:03 p.m. UTC | #4
On Thu, Apr 14, 2011 at 08:58:41PM +0930, Rusty Russell wrote:
> On Tue, 12 Apr 2011 23:01:12 +0300, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Thu, Mar 10, 2011 at 12:19:42PM +1030, Rusty Russell wrote:
> > > Here's an old patch where I played with implementing this:
> > 
> > ...
> > 
> > > 
> > > virtio: put last_used and last_avail index into ring itself.
> > > 
> > > Generally, the other end of the virtio ring doesn't need to see where
> > > you're up to in consuming the ring.  However, to completely understand
> > > what's going on from the outside, this information must be exposed.
> > > For example, if you want to save and restore a virtio_ring, but you're
> > > not the consumer because the kernel is using it directly.
> > > 
> > > Fortunately, we have room to expand:
> > 
> > This seems to be true for x86 kvm and lguest but is it true
> > for s390?
> 
> Yes, as the ring is page aligned so there's always room.
> 
> > Will this last bit work on s390?
> > If I understand correctly the memory is allocated by host there?
> 
> They have to offer the feature, so if the have some way of allocating
> non-page-aligned amounts of memory, they'll have to add those extra 2
> bytes.
> 
> So I think it's OK...
> Rusty.

To clarify, my concern is that we always seem to try to map
these extra 2 bytes, which thinkably might fail?
diff mbox

Patch

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -71,9 +71,6 @@  struct vring_virtqueue
 	/* Number we've added since last sync. */
 	unsigned int num_added;
 
-	/* Last used index we've seen. */
-	u16 last_used_idx;
-
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	void (*notify)(struct virtqueue *vq);
 
@@ -278,12 +275,13 @@  static void detach_buf(struct vring_virt
 
 static inline bool more_used(const struct vring_virtqueue *vq)
 {
-	return vq->last_used_idx != vq->vring.used->idx;
+	return vring_last_used(&vq->vring) != vq->vring.used->idx;
 }
 
 static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_used_elem *u;
 	void *ret;
 	unsigned int i;
 
@@ -300,8 +298,11 @@  static void *vring_get_buf(struct virtqu
 		return NULL;
 	}
 
-	i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id;
-	*len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len;
+	u = &vq->vring.used->ring[vring_last_used(&vq->vring) % vq->vring.num];
+	i = u->id;
+	*len = u->len;
+	/* Make sure we don't reload i after doing checks. */
+	rmb();
 
 	if (unlikely(i >= vq->vring.num)) {
 		BAD_RING(vq, "id %u out of range\n", i);
@@ -315,7 +316,8 @@  static void *vring_get_buf(struct virtqu
 	/* detach_buf clears data, so grab it now. */
 	ret = vq->data[i];
 	detach_buf(vq, i);
-	vq->last_used_idx++;
+	vring_last_used(&vq->vring)++;
+
 	END_USE(vq);
 	return ret;
 }
@@ -402,7 +404,6 @@  struct virtqueue *vring_new_virtqueue(un
 	vq->vq.name = name;
 	vq->notify = notify;
 	vq->broken = false;
-	vq->last_used_idx = 0;
 	vq->num_added = 0;
 	list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
@@ -413,6 +414,10 @@  struct virtqueue *vring_new_virtqueue(un
 
 	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
 
+	/* We publish indices whether they offer it or not: if not, it's junk
+	 * space anyway.  But calling this acknowledges the feature. */
+	virtio_has_feature(vdev, VIRTIO_RING_F_PUBLISH_INDICES);
+
 	/* No callback?  Tell other side not to bother us. */
 	if (!callback)
 		vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
@@ -443,6 +448,8 @@  void vring_transport_features(struct vir
 		switch (i) {
 		case VIRTIO_RING_F_INDIRECT_DESC:
 			break;
+		case VIRTIO_RING_F_PUBLISH_INDICES:
+			break;
 		default:
 			/* We don't understand this bit. */
 			clear_bit(i, vdev->features);
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -29,6 +29,9 @@ 
 /* We support indirect buffer descriptors */
 #define VIRTIO_RING_F_INDIRECT_DESC	28
 
+/* We publish our last-seen used index at the end of the avail ring. */
+#define VIRTIO_RING_F_PUBLISH_INDICES	29
+
 /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
 struct vring_desc
 {
@@ -87,6 +90,7 @@  struct vring {
  *	__u16 avail_flags;
  *	__u16 avail_idx;
  *	__u16 available[num];
+ *	__u16 last_used_idx;
  *
  *	// Padding to the next align boundary.
  *	char pad[];
@@ -95,6 +99,7 @@  struct vring {
  *	__u16 used_flags;
  *	__u16 used_idx;
  *	struct vring_used_elem used[num];
+ *	__u16 last_avail_idx;
  * };
  */
 static inline void vring_init(struct vring *vr, unsigned int num, void *p,
@@ -111,9 +116,14 @@  static inline unsigned vring_size(unsign
 {
 	return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num)
 		 + align - 1) & ~(align - 1))
-		+ sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num;
+		+ sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num + 2;
 }
 
+/* We publish the last-seen used index at the end of the available ring, and
+ * vice-versa.  These are at the end for backwards compatibility. */
+#define vring_last_used(vr) ((vr)->avail->ring[(vr)->num])
+#define vring_last_avail(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num])
+
 #ifdef __KERNEL__
 #include <linux/irqreturn.h>
 struct virtio_device;