diff mbox

[v2,2/2] virtio-ring: Allocate indirect buffers from cache when possible

Message ID 1346159043-16446-2-git-send-email-levinsasha928@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sasha Levin Aug. 28, 2012, 1:04 p.m. UTC
Currently if VIRTIO_RING_F_INDIRECT_DESC is enabled we will
use indirect descriptors and allocate them using a simple
kmalloc().

This patch adds a cache which will allow indirect buffers under
a configurable size to be allocated from that cache instead.

Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
---

Changes in v2:

 - Free correctly indirect buffers.

 drivers/block/virtio_blk.c          |  4 ++++
 drivers/char/hw_random/virtio-rng.c |  4 ++++
 drivers/char/virtio_console.c       |  4 ++++
 drivers/net/virtio_net.c            |  4 ++++
 drivers/virtio/virtio_balloon.c     |  4 ++++
 drivers/virtio/virtio_ring.c        | 28 ++++++++++++++++++++++++----
 include/linux/virtio.h              |  1 +
 net/9p/trans_virtio.c               |  5 +++++
 8 files changed, 50 insertions(+), 4 deletions(-)

Comments

Michael S. Tsirkin Aug. 28, 2012, 1:20 p.m. UTC | #1
On Tue, Aug 28, 2012 at 03:04:03PM +0200, Sasha Levin wrote:
> Currently if VIRTIO_RING_F_INDIRECT_DESC is enabled we will
> use indirect descriptors and allocate them using a simple
> kmalloc().
> 
> This patch adds a cache which will allow indirect buffers under
> a configurable size to be allocated from that cache instead.
> 
> Signed-off-by: Sasha Levin <levinsasha928@gmail.com>

I imagine this helps performance? Any numbers?

> ---
> 
> Changes in v2:
> 
>  - Free correctly indirect buffers.
> 
>  drivers/block/virtio_blk.c          |  4 ++++
>  drivers/char/hw_random/virtio-rng.c |  4 ++++
>  drivers/char/virtio_console.c       |  4 ++++
>  drivers/net/virtio_net.c            |  4 ++++
>  drivers/virtio/virtio_balloon.c     |  4 ++++
>  drivers/virtio/virtio_ring.c        | 28 ++++++++++++++++++++++++----
>  include/linux/virtio.h              |  1 +
>  net/9p/trans_virtio.c               |  5 +++++
>  8 files changed, 50 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 13b8ae9..7f670af 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -25,6 +25,9 @@ struct workqueue_struct *virtblk_wq;
>  static unsigned int indirect_thresh;
>  module_param(indirect_thresh, uint, S_IRUGO);
>  
> +static unsigned int indirect_alloc_thresh;
> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> +
>  struct virtio_blk
>  {
>  	struct virtio_device *vdev;
> @@ -739,6 +742,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
>  	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
>  	vblk->config_enable = true;
>  	vdev->indirect_thresh = indirect_thresh;
> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>  
>  	err = init_vq(vblk);
>  	if (err)
> diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
> index 02d8421..8475ece 100644
> --- a/drivers/char/hw_random/virtio-rng.c
> +++ b/drivers/char/hw_random/virtio-rng.c
> @@ -28,6 +28,9 @@
>  static unsigned int indirect_thresh;
>  module_param(indirect_thresh, uint, S_IRUGO);
>  
> +static unsigned int indirect_alloc_thresh;
> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> +
>  static struct virtqueue *vq;
>  static unsigned int data_avail;
>  static DECLARE_COMPLETION(have_data);
> @@ -97,6 +100,7 @@ static int probe_common(struct virtio_device *vdev)
>  
>  	/* We expect a single virtqueue. */
>  	vdev->indirect_thresh = indirect_thresh;
> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>  	vq = virtio_find_single_vq(vdev, random_recv_done, "input");
>  	if (IS_ERR(vq))
>  		return PTR_ERR(vq);
> diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
> index fc14e7f..c6f24a7 100644
> --- a/drivers/char/virtio_console.c
> +++ b/drivers/char/virtio_console.c
> @@ -42,6 +42,9 @@
>  static unsigned int indirect_thresh;
>  module_param(indirect_thresh, uint, S_IRUGO);
>  
> +static unsigned int indirect_alloc_thresh;
> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> +
>  /*
>   * This is a global struct for storing common data for all the devices
>   * this driver handles.
> @@ -1891,6 +1894,7 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
>  			      &portdev->config.max_nr_ports) == 0)
>  		multiport = true;
>  	vdev->indirect_thresh = indirect_thresh;
> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>  
>  	err = init_vqs(portdev);
>  	if (err < 0) {
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 64a8321..c091efd 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -37,6 +37,9 @@ module_param(gso, bool, 0444);
>  static unsigned int indirect_thresh;
>  module_param(indirect_thresh, uint, S_IRUGO);
>  
> +static unsigned int indirect_alloc_thresh;
> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> +
>  /* FIXME: MTU in config. */
>  #define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
>  #define GOOD_COPY_LEN	128
> @@ -1132,6 +1135,7 @@ static int virtnet_probe(struct virtio_device *vdev)
>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
>  		vi->mergeable_rx_bufs = true;
>  	vdev->indirect_thresh = indirect_thresh;
> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>  
>  	err = init_vqs(vi);
>  	if (err)
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index fce7347..ccf87db 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -38,6 +38,9 @@
>  static unsigned int indirect_thresh;
>  module_param(indirect_thresh, uint, S_IRUGO);
>  
> +static unsigned int indirect_alloc_thresh;
> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> +
>  struct virtio_balloon
>  {
>  	struct virtio_device *vdev;
> @@ -360,6 +363,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
>  	vb->vdev = vdev;
>  	vb->need_stats_update = 0;
>  	vdev->indirect_thresh = indirect_thresh;
> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>  
>  	err = init_vqs(vb);
>  	if (err)
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 99a64a7..e8b9c54 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -93,6 +93,10 @@ struct vring_virtqueue
>  	 */
>  	unsigned int indirect_thresh;
>  
> +	/* Buffers below this size will be allocated from cache */
> +	unsigned int indirect_alloc_thresh;
> +	struct kmem_cache *indirect_cache;
> +
>  	/* Host publishes avail event idx */
>  	bool event;
>  
> @@ -135,7 +139,10 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
>  	unsigned head;
>  	int i;
>  
> -	desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp);
> +	if ((out + in) <= vq->indirect_alloc_thresh)
> +		desc = kmem_cache_alloc(vq->indirect_cache, gfp);
> +	else
> +		desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp);
>  	if (!desc)
>  		return -ENOMEM;
>  
> @@ -384,8 +391,13 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
>  	i = head;
>  
>  	/* Free the indirect table */
> -	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
> -		kfree(phys_to_virt(vq->vring.desc[i].addr));
> +	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT) {
> +		if (vq->vring.desc[i].len > vq->indirect_alloc_thresh)
> +			kfree(phys_to_virt(vq->vring.desc[i].addr));
> +		else
> +			kmem_cache_free(vq->indirect_cache,
> +					phys_to_virt(vq->vring.desc[i].addr));
> +	}
>  
>  	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
>  		i = vq->vring.desc[i].next;
> @@ -654,14 +666,20 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
>  	vq->last_used_idx = 0;
>  	vq->num_added = 0;
>  	vq->indirect_thresh = 0;
> +	vq->indirect_alloc_thresh = 0;
> +	vq->indirect_cache = NULL;
>  	list_add_tail(&vq->vq.list, &vdev->vqs);
>  #ifdef DEBUG
>  	vq->in_use = false;
>  	vq->last_add_time_valid = false;
>  #endif
>  
> -	if (virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
> +	if (virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
>  		vq->indirect_thresh = vdev->indirect_thresh;
> +		vq->indirect_alloc_thresh = vdev->indirect_alloc_thresh;
> +		if (vq->indirect_alloc_thresh)
> +			vq->indirect_cache = KMEM_CACHE(vring_desc[vq->indirect_alloc_thresh], 0);
> +	}
>  
>  	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
>  
> @@ -685,6 +703,8 @@ EXPORT_SYMBOL_GPL(vring_new_virtqueue);
>  void vring_del_virtqueue(struct virtqueue *vq)
>  {
>  	list_del(&vq->list);
> +	if (to_vvq(vq)->indirect_cache)
> +		kmem_cache_destroy(to_vvq(vq)->indirect_cache);
>  	kfree(to_vvq(vq));
>  }
>  EXPORT_SYMBOL_GPL(vring_del_virtqueue);
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index 48bc457..3261c02 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -70,6 +70,7 @@ struct virtio_device {
>  	unsigned long features[1];
>  	void *priv;
>  	unsigned int indirect_thresh;
> +	unsigned int indirect_alloc_thresh;
>  };
>  
>  #define dev_to_virtio(dev) container_of(dev, struct virtio_device, dev)
> diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
> index fc93962..3044c86 100644
> --- a/net/9p/trans_virtio.c
> +++ b/net/9p/trans_virtio.c
> @@ -55,6 +55,9 @@
>  static unsigned int indirect_thresh;
>  module_param(indirect_thresh, uint, S_IRUGO);
>  
> +static unsigned int indirect_alloc_thresh;
> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> +
>  /* a single mutex to manage channel initialization and attachment */
>  static DEFINE_MUTEX(virtio_9p_lock);
>  static DECLARE_WAIT_QUEUE_HEAD(vp_wq);
> @@ -505,6 +508,8 @@ static int p9_virtio_probe(struct virtio_device *vdev)
>  
>  	/* We expect one virtqueue, for requests. */
>  	vdev->indirect_thresh = indirect_thresh;
> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
> +
>  	chan->vq = virtio_find_single_vq(vdev, req_done, "requests");
>  	if (IS_ERR(chan->vq)) {
>  		err = PTR_ERR(chan->vq);
> -- 
> 1.7.12
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sasha Levin Aug. 28, 2012, 1:35 p.m. UTC | #2
On 08/28/2012 03:20 PM, Michael S. Tsirkin wrote:
> On Tue, Aug 28, 2012 at 03:04:03PM +0200, Sasha Levin wrote:
>> Currently if VIRTIO_RING_F_INDIRECT_DESC is enabled we will
>> use indirect descriptors and allocate them using a simple
>> kmalloc().
>>
>> This patch adds a cache which will allow indirect buffers under
>> a configurable size to be allocated from that cache instead.
>>
>> Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
> 
> I imagine this helps performance? Any numbers?

I ran benchmarks on the original RFC, I've re-tested it now and got similar
numbers to the original ones (virtio-net using vhost-net, thresh=16):

Before:
	Recv   Send    Send
	Socket Socket  Message  Elapsed
	Size   Size    Size     Time     Throughput
	bytes  bytes   bytes    secs.    10^6bits/sec

	 87380  16384  16384    10.00    4512.12

After:
	Recv   Send    Send
	Socket Socket  Message  Elapsed
	Size   Size    Size     Time     Throughput
	bytes  bytes   bytes    secs.    10^6bits/sec

	 87380  16384  16384    10.00    5399.18


Thanks,
Sasha
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Aug. 29, 2012, 11:07 a.m. UTC | #3
On Tue, Aug 28, 2012 at 03:35:00PM +0200, Sasha Levin wrote:
> On 08/28/2012 03:20 PM, Michael S. Tsirkin wrote:
> > On Tue, Aug 28, 2012 at 03:04:03PM +0200, Sasha Levin wrote:
> >> Currently if VIRTIO_RING_F_INDIRECT_DESC is enabled we will
> >> use indirect descriptors and allocate them using a simple
> >> kmalloc().
> >>
> >> This patch adds a cache which will allow indirect buffers under
> >> a configurable size to be allocated from that cache instead.
> >>
> >> Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
> > 
> > I imagine this helps performance? Any numbers?
> 
> I ran benchmarks on the original RFC, I've re-tested it now and got similar
> numbers to the original ones (virtio-net using vhost-net, thresh=16):
> 
> Before:
> 	Recv   Send    Send
> 	Socket Socket  Message  Elapsed
> 	Size   Size    Size     Time     Throughput
> 	bytes  bytes   bytes    secs.    10^6bits/sec
> 
> 	 87380  16384  16384    10.00    4512.12
> 
> After:
> 	Recv   Send    Send
> 	Socket Socket  Message  Elapsed
> 	Size   Size    Size     Time     Throughput
> 	bytes  bytes   bytes    secs.    10^6bits/sec
> 
> 	 87380  16384  16384    10.00    5399.18
> 
> 
> Thanks,
> Sasha

This is with both patches 1 + 2?
Sorry could you please also test what happens if you apply
- just patch 1
- just patch 2

Thanks!
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sasha Levin Aug. 29, 2012, 3:03 p.m. UTC | #4
On 08/29/2012 01:07 PM, Michael S. Tsirkin wrote:
> On Tue, Aug 28, 2012 at 03:35:00PM +0200, Sasha Levin wrote:
>> On 08/28/2012 03:20 PM, Michael S. Tsirkin wrote:
>>> On Tue, Aug 28, 2012 at 03:04:03PM +0200, Sasha Levin wrote:
>>>> Currently if VIRTIO_RING_F_INDIRECT_DESC is enabled we will
>>>> use indirect descriptors and allocate them using a simple
>>>> kmalloc().
>>>>
>>>> This patch adds a cache which will allow indirect buffers under
>>>> a configurable size to be allocated from that cache instead.
>>>>
>>>> Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
>>>
>>> I imagine this helps performance? Any numbers?
>>
>> I ran benchmarks on the original RFC, I've re-tested it now and got similar
>> numbers to the original ones (virtio-net using vhost-net, thresh=16):
>>
>> Before:
>> 	Recv   Send    Send
>> 	Socket Socket  Message  Elapsed
>> 	Size   Size    Size     Time     Throughput
>> 	bytes  bytes   bytes    secs.    10^6bits/sec
>>
>> 	 87380  16384  16384    10.00    4512.12
>>
>> After:
>> 	Recv   Send    Send
>> 	Socket Socket  Message  Elapsed
>> 	Size   Size    Size     Time     Throughput
>> 	bytes  bytes   bytes    secs.    10^6bits/sec
>>
>> 	 87380  16384  16384    10.00    5399.18
>>
>>
>> Thanks,
>> Sasha
> 
> This is with both patches 1 + 2?
> Sorry could you please also test what happens if you apply
> - just patch 1
> - just patch 2
> 
> Thanks!

Sure thing!

I've also re-ran it on a IBM server type host instead of my laptop. Here are the
results:

Vanilla kernel:

MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.33.1
() port 0 AF_INET
enable_enobufs failed: getprotobyname
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    7922.72

Patch 1, with threshold=16:

MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.33.1
() port 0 AF_INET
enable_enobufs failed: getprotobyname
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    8415.07

Patch 2:

MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.33.1
() port 0 AF_INET
enable_enobufs failed: getprotobyname
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    8931.05


Note that these are simple tests with netperf listening on one end and a simple
'netperf -H [host]' within the guest. If there are other tests which may be
interesting please let me know.


Thanks,
Sasha

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Aug. 29, 2012, 3:14 p.m. UTC | #5
On Wed, Aug 29, 2012 at 05:03:03PM +0200, Sasha Levin wrote:
> On 08/29/2012 01:07 PM, Michael S. Tsirkin wrote:
> > On Tue, Aug 28, 2012 at 03:35:00PM +0200, Sasha Levin wrote:
> >> On 08/28/2012 03:20 PM, Michael S. Tsirkin wrote:
> >>> On Tue, Aug 28, 2012 at 03:04:03PM +0200, Sasha Levin wrote:
> >>>> Currently if VIRTIO_RING_F_INDIRECT_DESC is enabled we will
> >>>> use indirect descriptors and allocate them using a simple
> >>>> kmalloc().
> >>>>
> >>>> This patch adds a cache which will allow indirect buffers under
> >>>> a configurable size to be allocated from that cache instead.
> >>>>
> >>>> Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
> >>>
> >>> I imagine this helps performance? Any numbers?
> >>
> >> I ran benchmarks on the original RFC, I've re-tested it now and got similar
> >> numbers to the original ones (virtio-net using vhost-net, thresh=16):
> >>
> >> Before:
> >> 	Recv   Send    Send
> >> 	Socket Socket  Message  Elapsed
> >> 	Size   Size    Size     Time     Throughput
> >> 	bytes  bytes   bytes    secs.    10^6bits/sec
> >>
> >> 	 87380  16384  16384    10.00    4512.12
> >>
> >> After:
> >> 	Recv   Send    Send
> >> 	Socket Socket  Message  Elapsed
> >> 	Size   Size    Size     Time     Throughput
> >> 	bytes  bytes   bytes    secs.    10^6bits/sec
> >>
> >> 	 87380  16384  16384    10.00    5399.18
> >>
> >>
> >> Thanks,
> >> Sasha
> > 
> > This is with both patches 1 + 2?
> > Sorry could you please also test what happens if you apply
> > - just patch 1
> > - just patch 2
> > 
> > Thanks!
> 
> Sure thing!
> 
> I've also re-ran it on a IBM server type host instead of my laptop. Here are the
> results:
> 
> Vanilla kernel:
> 
> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.33.1
> () port 0 AF_INET
> enable_enobufs failed: getprotobyname
> Recv   Send    Send
> Socket Socket  Message  Elapsed
> Size   Size    Size     Time     Throughput
> bytes  bytes   bytes    secs.    10^6bits/sec
> 
>  87380  16384  16384    10.00    7922.72
> 
> Patch 1, with threshold=16:

OK so let us set it to 16 for virtio-net by default then?


> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.33.1
> () port 0 AF_INET
> enable_enobufs failed: getprotobyname
> Recv   Send    Send
> Socket Socket  Message  Elapsed
> Size   Size    Size     Time     Throughput
> bytes  bytes   bytes    secs.    10^6bits/sec
> 
>  87380  16384  16384    10.00    8415.07
> 
> Patch 2:
> 
> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.33.1
> () port 0 AF_INET
> enable_enobufs failed: getprotobyname
> Recv   Send    Send
> Socket Socket  Message  Elapsed
> Size   Size    Size     Time     Throughput
> bytes  bytes   bytes    secs.    10^6bits/sec
> 
>  87380  16384  16384    10.00    8931.05
> 
> 
> Note that these are simple tests with netperf listening on one end and a simple
> 'netperf -H [host]' within the guest. If there are other tests which may be
> interesting please let me know.
> 
> 
> Thanks,
> Sasha

Checking that host CPU utilization did not jump would be nice.
E.g. measure BW/host CPU.
Michael S. Tsirkin Aug. 29, 2012, 3:38 p.m. UTC | #6
On Tue, Aug 28, 2012 at 03:04:03PM +0200, Sasha Levin wrote:
> Currently if VIRTIO_RING_F_INDIRECT_DESC is enabled we will
> use indirect descriptors and allocate them using a simple
> kmalloc().
> 
> This patch adds a cache which will allow indirect buffers under
> a configurable size to be allocated from that cache instead.
> 
> Signed-off-by: Sasha Levin <levinsasha928@gmail.com>

The API is ugly - how does driver know what to set?
Is this a typical request size? Then let's call it that.

Also this is really per VQ, right?

What is a good default for net? I guess max sg?

> ---
> 
> Changes in v2:
> 
>  - Free correctly indirect buffers.
> 
>  drivers/block/virtio_blk.c          |  4 ++++
>  drivers/char/hw_random/virtio-rng.c |  4 ++++
>  drivers/char/virtio_console.c       |  4 ++++
>  drivers/net/virtio_net.c            |  4 ++++
>  drivers/virtio/virtio_balloon.c     |  4 ++++
>  drivers/virtio/virtio_ring.c        | 28 ++++++++++++++++++++++++----
>  include/linux/virtio.h              |  1 +
>  net/9p/trans_virtio.c               |  5 +++++
>  8 files changed, 50 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 13b8ae9..7f670af 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -25,6 +25,9 @@ struct workqueue_struct *virtblk_wq;
>  static unsigned int indirect_thresh;
>  module_param(indirect_thresh, uint, S_IRUGO);
>  
> +static unsigned int indirect_alloc_thresh;
> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> +
>  struct virtio_blk
>  {
>  	struct virtio_device *vdev;
> @@ -739,6 +742,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
>  	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
>  	vblk->config_enable = true;
>  	vdev->indirect_thresh = indirect_thresh;
> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>  
>  	err = init_vq(vblk);
>  	if (err)
> diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
> index 02d8421..8475ece 100644
> --- a/drivers/char/hw_random/virtio-rng.c
> +++ b/drivers/char/hw_random/virtio-rng.c
> @@ -28,6 +28,9 @@
>  static unsigned int indirect_thresh;
>  module_param(indirect_thresh, uint, S_IRUGO);
>  
> +static unsigned int indirect_alloc_thresh;
> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> +
>  static struct virtqueue *vq;
>  static unsigned int data_avail;
>  static DECLARE_COMPLETION(have_data);
> @@ -97,6 +100,7 @@ static int probe_common(struct virtio_device *vdev)
>  
>  	/* We expect a single virtqueue. */
>  	vdev->indirect_thresh = indirect_thresh;
> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>  	vq = virtio_find_single_vq(vdev, random_recv_done, "input");
>  	if (IS_ERR(vq))
>  		return PTR_ERR(vq);
> diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
> index fc14e7f..c6f24a7 100644
> --- a/drivers/char/virtio_console.c
> +++ b/drivers/char/virtio_console.c
> @@ -42,6 +42,9 @@
>  static unsigned int indirect_thresh;
>  module_param(indirect_thresh, uint, S_IRUGO);
>  
> +static unsigned int indirect_alloc_thresh;
> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> +
>  /*
>   * This is a global struct for storing common data for all the devices
>   * this driver handles.
> @@ -1891,6 +1894,7 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
>  			      &portdev->config.max_nr_ports) == 0)
>  		multiport = true;
>  	vdev->indirect_thresh = indirect_thresh;
> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>  
>  	err = init_vqs(portdev);
>  	if (err < 0) {
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 64a8321..c091efd 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -37,6 +37,9 @@ module_param(gso, bool, 0444);
>  static unsigned int indirect_thresh;
>  module_param(indirect_thresh, uint, S_IRUGO);
>  
> +static unsigned int indirect_alloc_thresh;
> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> +
>  /* FIXME: MTU in config. */
>  #define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
>  #define GOOD_COPY_LEN	128
> @@ -1132,6 +1135,7 @@ static int virtnet_probe(struct virtio_device *vdev)
>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
>  		vi->mergeable_rx_bufs = true;
>  	vdev->indirect_thresh = indirect_thresh;
> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>  
>  	err = init_vqs(vi);
>  	if (err)
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index fce7347..ccf87db 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -38,6 +38,9 @@
>  static unsigned int indirect_thresh;
>  module_param(indirect_thresh, uint, S_IRUGO);
>  
> +static unsigned int indirect_alloc_thresh;
> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> +
>  struct virtio_balloon
>  {
>  	struct virtio_device *vdev;
> @@ -360,6 +363,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
>  	vb->vdev = vdev;
>  	vb->need_stats_update = 0;
>  	vdev->indirect_thresh = indirect_thresh;
> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>  
>  	err = init_vqs(vb);
>  	if (err)
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 99a64a7..e8b9c54 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -93,6 +93,10 @@ struct vring_virtqueue
>  	 */
>  	unsigned int indirect_thresh;
>  
> +	/* Buffers below this size will be allocated from cache */
> +	unsigned int indirect_alloc_thresh;
> +	struct kmem_cache *indirect_cache;
> +
>  	/* Host publishes avail event idx */
>  	bool event;
>  
> @@ -135,7 +139,10 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
>  	unsigned head;
>  	int i;
>  
> -	desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp);
> +	if ((out + in) <= vq->indirect_alloc_thresh)
> +		desc = kmem_cache_alloc(vq->indirect_cache, gfp);
> +	else
> +		desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp);
>  	if (!desc)
>  		return -ENOMEM;

So this means that drivers need to know how large each
descriptor is to avoid trying to allocate 32Mbyte chunks :)

>  
> @@ -384,8 +391,13 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
>  	i = head;
>  
>  	/* Free the indirect table */
> -	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
> -		kfree(phys_to_virt(vq->vring.desc[i].addr));
> +	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT) {
> +		if (vq->vring.desc[i].len > vq->indirect_alloc_thresh)

This looks wrong. indirect_alloc_thresh is in entries but len is in
bytes, isn't it?


> +			kfree(phys_to_virt(vq->vring.desc[i].addr));
> +		else
> +			kmem_cache_free(vq->indirect_cache,
> +					phys_to_virt(vq->vring.desc[i].addr));
> +	}
>  
>  	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
>  		i = vq->vring.desc[i].next;
> @@ -654,14 +666,20 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
>  	vq->last_used_idx = 0;
>  	vq->num_added = 0;
>  	vq->indirect_thresh = 0;
> +	vq->indirect_alloc_thresh = 0;
> +	vq->indirect_cache = NULL;
>  	list_add_tail(&vq->vq.list, &vdev->vqs);
>  #ifdef DEBUG
>  	vq->in_use = false;
>  	vq->last_add_time_valid = false;
>  #endif
>  
> -	if (virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
> +	if (virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
>  		vq->indirect_thresh = vdev->indirect_thresh;
> +		vq->indirect_alloc_thresh = vdev->indirect_alloc_thresh;
> +		if (vq->indirect_alloc_thresh)
> +			vq->indirect_cache = KMEM_CACHE(vring_desc[vq->indirect_alloc_thresh],
> 0);

I am not a purist but this line looks way too long.
Also - no need to check cache creation succeeded?
On failure - disable caching?

Also - let's check that values are sane before passing them on?
They come from user after all.

Also - should not threshold be per VQ? E.g. for -net we do not
need the cache for RX unless in legacy big packet mode.

> +	}
>  
>  	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
>  
> @@ -685,6 +703,8 @@ EXPORT_SYMBOL_GPL(vring_new_virtqueue);
>  void vring_del_virtqueue(struct virtqueue *vq)
>  {
>  	list_del(&vq->list);
> +	if (to_vvq(vq)->indirect_cache)
> +		kmem_cache_destroy(to_vvq(vq)->indirect_cache);
>  	kfree(to_vvq(vq));
>  }
>  EXPORT_SYMBOL_GPL(vring_del_virtqueue);
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index 48bc457..3261c02 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -70,6 +70,7 @@ struct virtio_device {
>  	unsigned long features[1];
>  	void *priv;
>  	unsigned int indirect_thresh;
> +	unsigned int indirect_alloc_thresh;
>  };
>  
>  #define dev_to_virtio(dev) container_of(dev, struct virtio_device, dev)
> diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
> index fc93962..3044c86 100644
> --- a/net/9p/trans_virtio.c
> +++ b/net/9p/trans_virtio.c
> @@ -55,6 +55,9 @@
>  static unsigned int indirect_thresh;
>  module_param(indirect_thresh, uint, S_IRUGO);
>  
> +static unsigned int indirect_alloc_thresh;
> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> +
>  /* a single mutex to manage channel initialization and attachment */
>  static DEFINE_MUTEX(virtio_9p_lock);
>  static DECLARE_WAIT_QUEUE_HEAD(vp_wq);
> @@ -505,6 +508,8 @@ static int p9_virtio_probe(struct virtio_device *vdev)
>  
>  	/* We expect one virtqueue, for requests. */
>  	vdev->indirect_thresh = indirect_thresh;
> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
> +
>  	chan->vq = virtio_find_single_vq(vdev, req_done, "requests");
>  	if (IS_ERR(chan->vq)) {
>  		err = PTR_ERR(chan->vq);
> -- 
> 1.7.12
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Aug. 29, 2012, 3:38 p.m. UTC | #7
On Wed, Aug 29, 2012 at 05:03:03PM +0200, Sasha Levin wrote:
> On 08/29/2012 01:07 PM, Michael S. Tsirkin wrote:
> > On Tue, Aug 28, 2012 at 03:35:00PM +0200, Sasha Levin wrote:
> >> On 08/28/2012 03:20 PM, Michael S. Tsirkin wrote:
> >>> On Tue, Aug 28, 2012 at 03:04:03PM +0200, Sasha Levin wrote:
> >>>> Currently if VIRTIO_RING_F_INDIRECT_DESC is enabled we will
> >>>> use indirect descriptors and allocate them using a simple
> >>>> kmalloc().
> >>>>
> >>>> This patch adds a cache which will allow indirect buffers under
> >>>> a configurable size to be allocated from that cache instead.
> >>>>
> >>>> Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
> >>>
> >>> I imagine this helps performance? Any numbers?
> >>
> >> I ran benchmarks on the original RFC, I've re-tested it now and got similar
> >> numbers to the original ones (virtio-net using vhost-net, thresh=16):
> >>
> >> Before:
> >> 	Recv   Send    Send
> >> 	Socket Socket  Message  Elapsed
> >> 	Size   Size    Size     Time     Throughput
> >> 	bytes  bytes   bytes    secs.    10^6bits/sec
> >>
> >> 	 87380  16384  16384    10.00    4512.12
> >>
> >> After:
> >> 	Recv   Send    Send
> >> 	Socket Socket  Message  Elapsed
> >> 	Size   Size    Size     Time     Throughput
> >> 	bytes  bytes   bytes    secs.    10^6bits/sec
> >>
> >> 	 87380  16384  16384    10.00    5399.18
> >>
> >>
> >> Thanks,
> >> Sasha
> > 
> > This is with both patches 1 + 2?
> > Sorry could you please also test what happens if you apply
> > - just patch 1
> > - just patch 2
> > 
> > Thanks!
> 
> Sure thing!
> 
> I've also re-ran it on a IBM server type host instead of my laptop. Here are the
> results:
> 
> Vanilla kernel:
> 
> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.33.1
> () port 0 AF_INET
> enable_enobufs failed: getprotobyname
> Recv   Send    Send
> Socket Socket  Message  Elapsed
> Size   Size    Size     Time     Throughput
> bytes  bytes   bytes    secs.    10^6bits/sec
> 
>  87380  16384  16384    10.00    7922.72
> 
> Patch 1, with threshold=16:
> 
> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.33.1
> () port 0 AF_INET
> enable_enobufs failed: getprotobyname
> Recv   Send    Send
> Socket Socket  Message  Elapsed
> Size   Size    Size     Time     Throughput
> bytes  bytes   bytes    secs.    10^6bits/sec
> 
>  87380  16384  16384    10.00    8415.07
> 
> Patch 2:
> 
> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.33.1
> () port 0 AF_INET
> enable_enobufs failed: getprotobyname
> Recv   Send    Send
> Socket Socket  Message  Elapsed
> Size   Size    Size     Time     Throughput
> bytes  bytes   bytes    secs.    10^6bits/sec
> 
>  87380  16384  16384    10.00    8931.05
> 
> 
> Note that these are simple tests with netperf listening on one end and a simple
> 'netperf -H [host]' within the guest. If there are other tests which may be
> interesting please let me know.
> 
> 
> Thanks,
> Sasha


And which parameter did you use for patch 2?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sasha Levin Aug. 29, 2012, 4:50 p.m. UTC | #8
On 08/29/2012 05:38 PM, Michael S. Tsirkin wrote:
> On Wed, Aug 29, 2012 at 05:03:03PM +0200, Sasha Levin wrote:
>> On 08/29/2012 01:07 PM, Michael S. Tsirkin wrote:
>>> On Tue, Aug 28, 2012 at 03:35:00PM +0200, Sasha Levin wrote:
>>>> On 08/28/2012 03:20 PM, Michael S. Tsirkin wrote:
>>>>> On Tue, Aug 28, 2012 at 03:04:03PM +0200, Sasha Levin wrote:
>>>>>> Currently if VIRTIO_RING_F_INDIRECT_DESC is enabled we will
>>>>>> use indirect descriptors and allocate them using a simple
>>>>>> kmalloc().
>>>>>>
>>>>>> This patch adds a cache which will allow indirect buffers under
>>>>>> a configurable size to be allocated from that cache instead.
>>>>>>
>>>>>> Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
>>>>>
>>>>> I imagine this helps performance? Any numbers?
>>>>
>>>> I ran benchmarks on the original RFC, I've re-tested it now and got similar
>>>> numbers to the original ones (virtio-net using vhost-net, thresh=16):
>>>>
>>>> Before:
>>>> 	Recv   Send    Send
>>>> 	Socket Socket  Message  Elapsed
>>>> 	Size   Size    Size     Time     Throughput
>>>> 	bytes  bytes   bytes    secs.    10^6bits/sec
>>>>
>>>> 	 87380  16384  16384    10.00    4512.12
>>>>
>>>> After:
>>>> 	Recv   Send    Send
>>>> 	Socket Socket  Message  Elapsed
>>>> 	Size   Size    Size     Time     Throughput
>>>> 	bytes  bytes   bytes    secs.    10^6bits/sec
>>>>
>>>> 	 87380  16384  16384    10.00    5399.18
>>>>
>>>>
>>>> Thanks,
>>>> Sasha
>>>
>>> This is with both patches 1 + 2?
>>> Sorry could you please also test what happens if you apply
>>> - just patch 1
>>> - just patch 2
>>>
>>> Thanks!
>>
>> Sure thing!
>>
>> I've also re-ran it on a IBM server type host instead of my laptop. Here are the
>> results:
>>
>> Vanilla kernel:
>>
>> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.33.1
>> () port 0 AF_INET
>> enable_enobufs failed: getprotobyname
>> Recv   Send    Send
>> Socket Socket  Message  Elapsed
>> Size   Size    Size     Time     Throughput
>> bytes  bytes   bytes    secs.    10^6bits/sec
>>
>>  87380  16384  16384    10.00    7922.72
>>
>> Patch 1, with threshold=16:
>>
>> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.33.1
>> () port 0 AF_INET
>> enable_enobufs failed: getprotobyname
>> Recv   Send    Send
>> Socket Socket  Message  Elapsed
>> Size   Size    Size     Time     Throughput
>> bytes  bytes   bytes    secs.    10^6bits/sec
>>
>>  87380  16384  16384    10.00    8415.07
>>
>> Patch 2:
>>
>> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.33.1
>> () port 0 AF_INET
>> enable_enobufs failed: getprotobyname
>> Recv   Send    Send
>> Socket Socket  Message  Elapsed
>> Size   Size    Size     Time     Throughput
>> bytes  bytes   bytes    secs.    10^6bits/sec
>>
>>  87380  16384  16384    10.00    8931.05
>>
>>
>> Note that these are simple tests with netperf listening on one end and a simple
>> 'netperf -H [host]' within the guest. If there are other tests which may be
>> interesting please let me know.
>>
>>
>> Thanks,
>> Sasha
> 
> 
> And which parameter did you use for patch 2?
> 

Same as in the first one, 16, the only difference in patch 2 is that we use a
kmemcache, so there's no point in changing the threshold vs patch 1.


Thanks,
Sasha
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sasha Levin Aug. 29, 2012, 5:14 p.m. UTC | #9
On 08/29/2012 05:38 PM, Michael S. Tsirkin wrote:
> On Tue, Aug 28, 2012 at 03:04:03PM +0200, Sasha Levin wrote:
>> Currently if VIRTIO_RING_F_INDIRECT_DESC is enabled we will
>> use indirect descriptors and allocate them using a simple
>> kmalloc().
>>
>> This patch adds a cache which will allow indirect buffers under
>> a configurable size to be allocated from that cache instead.
>>
>> Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
> 
> The API is ugly - how does driver know what to set?
> Is this a typical request size? Then let's call it that.

We've discussed it during the RFC phase, the idea is that we don't know what
would be a good number to use as threshold - which is why I'd like to keep it
disabled as default until it gets more serious tests.

The driver doesn't know what to set, the plan was to make it a dynamic
algorithms which would change it based on current load. Since I can't really do
testing which will provide the correct values for that, the decision was to do
it this way as the first stage and modify it later.

> Also this is really per VQ, right?

Right, we keep it per-device at this stage to keep it simple.

> What is a good default for net? I guess max sg?

I think that it depends on the workload. I'd say we should keep the default to 0
(disabled) unless we can have a good way to adjust it to the load.

>> ---
>>
>> Changes in v2:
>>
>>  - Free correctly indirect buffers.
>>
>>  drivers/block/virtio_blk.c          |  4 ++++
>>  drivers/char/hw_random/virtio-rng.c |  4 ++++
>>  drivers/char/virtio_console.c       |  4 ++++
>>  drivers/net/virtio_net.c            |  4 ++++
>>  drivers/virtio/virtio_balloon.c     |  4 ++++
>>  drivers/virtio/virtio_ring.c        | 28 ++++++++++++++++++++++++----
>>  include/linux/virtio.h              |  1 +
>>  net/9p/trans_virtio.c               |  5 +++++
>>  8 files changed, 50 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
>> index 13b8ae9..7f670af 100644
>> --- a/drivers/block/virtio_blk.c
>> +++ b/drivers/block/virtio_blk.c
>> @@ -25,6 +25,9 @@ struct workqueue_struct *virtblk_wq;
>>  static unsigned int indirect_thresh;
>>  module_param(indirect_thresh, uint, S_IRUGO);
>>  
>> +static unsigned int indirect_alloc_thresh;
>> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
>> +
>>  struct virtio_blk
>>  {
>>  	struct virtio_device *vdev;
>> @@ -739,6 +742,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
>>  	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
>>  	vblk->config_enable = true;
>>  	vdev->indirect_thresh = indirect_thresh;
>> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>>  
>>  	err = init_vq(vblk);
>>  	if (err)
>> diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
>> index 02d8421..8475ece 100644
>> --- a/drivers/char/hw_random/virtio-rng.c
>> +++ b/drivers/char/hw_random/virtio-rng.c
>> @@ -28,6 +28,9 @@
>>  static unsigned int indirect_thresh;
>>  module_param(indirect_thresh, uint, S_IRUGO);
>>  
>> +static unsigned int indirect_alloc_thresh;
>> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
>> +
>>  static struct virtqueue *vq;
>>  static unsigned int data_avail;
>>  static DECLARE_COMPLETION(have_data);
>> @@ -97,6 +100,7 @@ static int probe_common(struct virtio_device *vdev)
>>  
>>  	/* We expect a single virtqueue. */
>>  	vdev->indirect_thresh = indirect_thresh;
>> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>>  	vq = virtio_find_single_vq(vdev, random_recv_done, "input");
>>  	if (IS_ERR(vq))
>>  		return PTR_ERR(vq);
>> diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
>> index fc14e7f..c6f24a7 100644
>> --- a/drivers/char/virtio_console.c
>> +++ b/drivers/char/virtio_console.c
>> @@ -42,6 +42,9 @@
>>  static unsigned int indirect_thresh;
>>  module_param(indirect_thresh, uint, S_IRUGO);
>>  
>> +static unsigned int indirect_alloc_thresh;
>> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
>> +
>>  /*
>>   * This is a global struct for storing common data for all the devices
>>   * this driver handles.
>> @@ -1891,6 +1894,7 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
>>  			      &portdev->config.max_nr_ports) == 0)
>>  		multiport = true;
>>  	vdev->indirect_thresh = indirect_thresh;
>> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>>  
>>  	err = init_vqs(portdev);
>>  	if (err < 0) {
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 64a8321..c091efd 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -37,6 +37,9 @@ module_param(gso, bool, 0444);
>>  static unsigned int indirect_thresh;
>>  module_param(indirect_thresh, uint, S_IRUGO);
>>  
>> +static unsigned int indirect_alloc_thresh;
>> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
>> +
>>  /* FIXME: MTU in config. */
>>  #define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
>>  #define GOOD_COPY_LEN	128
>> @@ -1132,6 +1135,7 @@ static int virtnet_probe(struct virtio_device *vdev)
>>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
>>  		vi->mergeable_rx_bufs = true;
>>  	vdev->indirect_thresh = indirect_thresh;
>> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>>  
>>  	err = init_vqs(vi);
>>  	if (err)
>> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
>> index fce7347..ccf87db 100644
>> --- a/drivers/virtio/virtio_balloon.c
>> +++ b/drivers/virtio/virtio_balloon.c
>> @@ -38,6 +38,9 @@
>>  static unsigned int indirect_thresh;
>>  module_param(indirect_thresh, uint, S_IRUGO);
>>  
>> +static unsigned int indirect_alloc_thresh;
>> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
>> +
>>  struct virtio_balloon
>>  {
>>  	struct virtio_device *vdev;
>> @@ -360,6 +363,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
>>  	vb->vdev = vdev;
>>  	vb->need_stats_update = 0;
>>  	vdev->indirect_thresh = indirect_thresh;
>> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>>  
>>  	err = init_vqs(vb);
>>  	if (err)
>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>> index 99a64a7..e8b9c54 100644
>> --- a/drivers/virtio/virtio_ring.c
>> +++ b/drivers/virtio/virtio_ring.c
>> @@ -93,6 +93,10 @@ struct vring_virtqueue
>>  	 */
>>  	unsigned int indirect_thresh;
>>  
>> +	/* Buffers below this size will be allocated from cache */
>> +	unsigned int indirect_alloc_thresh;
>> +	struct kmem_cache *indirect_cache;
>> +
>>  	/* Host publishes avail event idx */
>>  	bool event;
>>  
>> @@ -135,7 +139,10 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
>>  	unsigned head;
>>  	int i;
>>  
>> -	desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp);
>> +	if ((out + in) <= vq->indirect_alloc_thresh)
>> +		desc = kmem_cache_alloc(vq->indirect_cache, gfp);
>> +	else
>> +		desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp);
>>  	if (!desc)
>>  		return -ENOMEM;
> 
> So this means that drivers need to know how large each
> descriptor is to avoid trying to allocate 32Mbyte chunks :)

Right, this interface isn't perfect and would hopefully be removed in favour of
a dynamic algorithm in the driver.

>>  
>> @@ -384,8 +391,13 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
>>  	i = head;
>>  
>>  	/* Free the indirect table */
>> -	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
>> -		kfree(phys_to_virt(vq->vring.desc[i].addr));
>> +	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT) {
>> +		if (vq->vring.desc[i].len > vq->indirect_alloc_thresh)
> 
> This looks wrong. indirect_alloc_thresh is in entries but len is in
> bytes, isn't it?

Uh, yes. It's supposed to be '.len/sizeof(struct vring_desc)'.

>> +			kfree(phys_to_virt(vq->vring.desc[i].addr));
>> +		else
>> +			kmem_cache_free(vq->indirect_cache,
>> +					phys_to_virt(vq->vring.desc[i].addr));
>> +	}
>>  
>>  	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
>>  		i = vq->vring.desc[i].next;
>> @@ -654,14 +666,20 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
>>  	vq->last_used_idx = 0;
>>  	vq->num_added = 0;
>>  	vq->indirect_thresh = 0;
>> +	vq->indirect_alloc_thresh = 0;
>> +	vq->indirect_cache = NULL;
>>  	list_add_tail(&vq->vq.list, &vdev->vqs);
>>  #ifdef DEBUG
>>  	vq->in_use = false;
>>  	vq->last_add_time_valid = false;
>>  #endif
>>  
>> -	if (virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
>> +	if (virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
>>  		vq->indirect_thresh = vdev->indirect_thresh;
>> +		vq->indirect_alloc_thresh = vdev->indirect_alloc_thresh;
>> +		if (vq->indirect_alloc_thresh)
>> +			vq->indirect_cache = KMEM_CACHE(vring_desc[vq->indirect_alloc_thresh],
>> 0);
> 
> I am not a purist but this line looks way too long.
> Also - no need to check cache creation succeeded?
> On failure - disable caching?
> 
> Also - let's check that values are sane before passing them on?
> They come from user after all.

will fix these two.

> Also - should not threshold be per VQ? E.g. for -net we do not
> need the cache for RX unless in legacy big packet mode.

We've discussed it two months ago over IRC (at least thats what I have in my
notes) - the plan was to keep it simple per-device until something more advanced
to deal with the threshold shows up.


Thanks,
Sasha

> 
>> +	}
>>  
>>  	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
>>  
>> @@ -685,6 +703,8 @@ EXPORT_SYMBOL_GPL(vring_new_virtqueue);
>>  void vring_del_virtqueue(struct virtqueue *vq)
>>  {
>>  	list_del(&vq->list);
>> +	if (to_vvq(vq)->indirect_cache)
>> +		kmem_cache_destroy(to_vvq(vq)->indirect_cache);
>>  	kfree(to_vvq(vq));
>>  }
>>  EXPORT_SYMBOL_GPL(vring_del_virtqueue);
>> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
>> index 48bc457..3261c02 100644
>> --- a/include/linux/virtio.h
>> +++ b/include/linux/virtio.h
>> @@ -70,6 +70,7 @@ struct virtio_device {
>>  	unsigned long features[1];
>>  	void *priv;
>>  	unsigned int indirect_thresh;
>> +	unsigned int indirect_alloc_thresh;
>>  };
>>  
>>  #define dev_to_virtio(dev) container_of(dev, struct virtio_device, dev)
>> diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
>> index fc93962..3044c86 100644
>> --- a/net/9p/trans_virtio.c
>> +++ b/net/9p/trans_virtio.c
>> @@ -55,6 +55,9 @@
>>  static unsigned int indirect_thresh;
>>  module_param(indirect_thresh, uint, S_IRUGO);
>>  
>> +static unsigned int indirect_alloc_thresh;
>> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
>> +
>>  /* a single mutex to manage channel initialization and attachment */
>>  static DEFINE_MUTEX(virtio_9p_lock);
>>  static DECLARE_WAIT_QUEUE_HEAD(vp_wq);
>> @@ -505,6 +508,8 @@ static int p9_virtio_probe(struct virtio_device *vdev)
>>  
>>  	/* We expect one virtqueue, for requests. */
>>  	vdev->indirect_thresh = indirect_thresh;
>> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
>> +
>>  	chan->vq = virtio_find_single_vq(vdev, req_done, "requests");
>>  	if (IS_ERR(chan->vq)) {
>>  		err = PTR_ERR(chan->vq);
>> -- 
>> 1.7.12

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Aug. 29, 2012, 6:12 p.m. UTC | #10
On Wed, Aug 29, 2012 at 07:14:01PM +0200, Sasha Levin wrote:
> On 08/29/2012 05:38 PM, Michael S. Tsirkin wrote:
> > On Tue, Aug 28, 2012 at 03:04:03PM +0200, Sasha Levin wrote:
> >> Currently if VIRTIO_RING_F_INDIRECT_DESC is enabled we will
> >> use indirect descriptors and allocate them using a simple
> >> kmalloc().
> >>
> >> This patch adds a cache which will allow indirect buffers under
> >> a configurable size to be allocated from that cache instead.
> >>
> >> Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
> > 
> > The API is ugly - how does driver know what to set?
> > Is this a typical request size? Then let's call it that.
> 
> We've discussed it during the RFC phase, the idea is that we don't know what
> would be a good number to use as threshold - which is why I'd like to keep it
> disabled as default until it gets more serious tests.
> 
> The driver doesn't know what to set, the plan was to make it a dynamic
> algorithms which would change it based on current load. Since I can't really do
> testing which will provide the correct values for that, the decision was to do
> it this way as the first stage and modify it later.
> 
> > Also this is really per VQ, right?
> 
> Right, we keep it per-device at this stage to keep it simple.
> 
> > What is a good default for net? I guess max sg?
> 
> I think that it depends on the workload. I'd say we should keep the default to 0
> (disabled) unless we can have a good way to adjust it to the load.

For *all* drivers?

Then it is mostly useless. No one has the time to tweak module
parameters in real life.

For virtio-net, 16+1 is not too much and ensures we always
use the cache.

If that works better than 0 I would say run with 17.


> >> ---
> >>
> >> Changes in v2:
> >>
> >>  - Free correctly indirect buffers.
> >>
> >>  drivers/block/virtio_blk.c          |  4 ++++
> >>  drivers/char/hw_random/virtio-rng.c |  4 ++++
> >>  drivers/char/virtio_console.c       |  4 ++++
> >>  drivers/net/virtio_net.c            |  4 ++++
> >>  drivers/virtio/virtio_balloon.c     |  4 ++++
> >>  drivers/virtio/virtio_ring.c        | 28 ++++++++++++++++++++++++----
> >>  include/linux/virtio.h              |  1 +
> >>  net/9p/trans_virtio.c               |  5 +++++
> >>  8 files changed, 50 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> >> index 13b8ae9..7f670af 100644
> >> --- a/drivers/block/virtio_blk.c
> >> +++ b/drivers/block/virtio_blk.c
> >> @@ -25,6 +25,9 @@ struct workqueue_struct *virtblk_wq;
> >>  static unsigned int indirect_thresh;
> >>  module_param(indirect_thresh, uint, S_IRUGO);
> >>  
> >> +static unsigned int indirect_alloc_thresh;
> >> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> >> +
> >>  struct virtio_blk
> >>  {
> >>  	struct virtio_device *vdev;
> >> @@ -739,6 +742,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
> >>  	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
> >>  	vblk->config_enable = true;
> >>  	vdev->indirect_thresh = indirect_thresh;
> >> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
> >>  
> >>  	err = init_vq(vblk);
> >>  	if (err)
> >> diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
> >> index 02d8421..8475ece 100644
> >> --- a/drivers/char/hw_random/virtio-rng.c
> >> +++ b/drivers/char/hw_random/virtio-rng.c
> >> @@ -28,6 +28,9 @@
> >>  static unsigned int indirect_thresh;
> >>  module_param(indirect_thresh, uint, S_IRUGO);
> >>  
> >> +static unsigned int indirect_alloc_thresh;
> >> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> >> +
> >>  static struct virtqueue *vq;
> >>  static unsigned int data_avail;
> >>  static DECLARE_COMPLETION(have_data);
> >> @@ -97,6 +100,7 @@ static int probe_common(struct virtio_device *vdev)
> >>  
> >>  	/* We expect a single virtqueue. */
> >>  	vdev->indirect_thresh = indirect_thresh;
> >> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
> >>  	vq = virtio_find_single_vq(vdev, random_recv_done, "input");
> >>  	if (IS_ERR(vq))
> >>  		return PTR_ERR(vq);
> >> diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
> >> index fc14e7f..c6f24a7 100644
> >> --- a/drivers/char/virtio_console.c
> >> +++ b/drivers/char/virtio_console.c
> >> @@ -42,6 +42,9 @@
> >>  static unsigned int indirect_thresh;
> >>  module_param(indirect_thresh, uint, S_IRUGO);
> >>  
> >> +static unsigned int indirect_alloc_thresh;
> >> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> >> +
> >>  /*
> >>   * This is a global struct for storing common data for all the devices
> >>   * this driver handles.
> >> @@ -1891,6 +1894,7 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
> >>  			      &portdev->config.max_nr_ports) == 0)
> >>  		multiport = true;
> >>  	vdev->indirect_thresh = indirect_thresh;
> >> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
> >>  
> >>  	err = init_vqs(portdev);
> >>  	if (err < 0) {
> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> >> index 64a8321..c091efd 100644
> >> --- a/drivers/net/virtio_net.c
> >> +++ b/drivers/net/virtio_net.c
> >> @@ -37,6 +37,9 @@ module_param(gso, bool, 0444);
> >>  static unsigned int indirect_thresh;
> >>  module_param(indirect_thresh, uint, S_IRUGO);
> >>  
> >> +static unsigned int indirect_alloc_thresh;
> >> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> >> +
> >>  /* FIXME: MTU in config. */
> >>  #define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
> >>  #define GOOD_COPY_LEN	128
> >> @@ -1132,6 +1135,7 @@ static int virtnet_probe(struct virtio_device *vdev)
> >>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
> >>  		vi->mergeable_rx_bufs = true;
> >>  	vdev->indirect_thresh = indirect_thresh;
> >> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
> >>  
> >>  	err = init_vqs(vi);
> >>  	if (err)
> >> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> >> index fce7347..ccf87db 100644
> >> --- a/drivers/virtio/virtio_balloon.c
> >> +++ b/drivers/virtio/virtio_balloon.c
> >> @@ -38,6 +38,9 @@
> >>  static unsigned int indirect_thresh;
> >>  module_param(indirect_thresh, uint, S_IRUGO);
> >>  
> >> +static unsigned int indirect_alloc_thresh;
> >> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> >> +
> >>  struct virtio_balloon
> >>  {
> >>  	struct virtio_device *vdev;
> >> @@ -360,6 +363,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
> >>  	vb->vdev = vdev;
> >>  	vb->need_stats_update = 0;
> >>  	vdev->indirect_thresh = indirect_thresh;
> >> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
> >>  
> >>  	err = init_vqs(vb);
> >>  	if (err)
> >> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> >> index 99a64a7..e8b9c54 100644
> >> --- a/drivers/virtio/virtio_ring.c
> >> +++ b/drivers/virtio/virtio_ring.c
> >> @@ -93,6 +93,10 @@ struct vring_virtqueue
> >>  	 */
> >>  	unsigned int indirect_thresh;
> >>  
> >> +	/* Buffers below this size will be allocated from cache */
> >> +	unsigned int indirect_alloc_thresh;
> >> +	struct kmem_cache *indirect_cache;
> >> +
> >>  	/* Host publishes avail event idx */
> >>  	bool event;
> >>  
> >> @@ -135,7 +139,10 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
> >>  	unsigned head;
> >>  	int i;
> >>  
> >> -	desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp);
> >> +	if ((out + in) <= vq->indirect_alloc_thresh)
> >> +		desc = kmem_cache_alloc(vq->indirect_cache, gfp);
> >> +	else
> >> +		desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp);
> >>  	if (!desc)
> >>  		return -ENOMEM;
> > 
> > So this means that drivers need to know how large each
> > descriptor is to avoid trying to allocate 32Mbyte chunks :)
> 
> Right, this interface isn't perfect and would hopefully be removed in favour of
> a dynamic algorithm in the driver.
> 
> >>  
> >> @@ -384,8 +391,13 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
> >>  	i = head;
> >>  
> >>  	/* Free the indirect table */
> >> -	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
> >> -		kfree(phys_to_virt(vq->vring.desc[i].addr));
> >> +	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT) {
> >> +		if (vq->vring.desc[i].len > vq->indirect_alloc_thresh)
> > 
> > This looks wrong. indirect_alloc_thresh is in entries but len is in
> > bytes, isn't it?
> 
> Uh, yes. It's supposed to be '.len/sizeof(struct vring_desc)'.
> 
> >> +			kfree(phys_to_virt(vq->vring.desc[i].addr));
> >> +		else
> >> +			kmem_cache_free(vq->indirect_cache,
> >> +					phys_to_virt(vq->vring.desc[i].addr));
> >> +	}
> >>  
> >>  	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
> >>  		i = vq->vring.desc[i].next;
> >> @@ -654,14 +666,20 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
> >>  	vq->last_used_idx = 0;
> >>  	vq->num_added = 0;
> >>  	vq->indirect_thresh = 0;
> >> +	vq->indirect_alloc_thresh = 0;
> >> +	vq->indirect_cache = NULL;
> >>  	list_add_tail(&vq->vq.list, &vdev->vqs);
> >>  #ifdef DEBUG
> >>  	vq->in_use = false;
> >>  	vq->last_add_time_valid = false;
> >>  #endif
> >>  
> >> -	if (virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
> >> +	if (virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
> >>  		vq->indirect_thresh = vdev->indirect_thresh;
> >> +		vq->indirect_alloc_thresh = vdev->indirect_alloc_thresh;
> >> +		if (vq->indirect_alloc_thresh)
> >> +			vq->indirect_cache = KMEM_CACHE(vring_desc[vq->indirect_alloc_thresh],
> >> 0);
> > 
> > I am not a purist but this line looks way too long.
> > Also - no need to check cache creation succeeded?
> > On failure - disable caching?
> > 
> > Also - let's check that values are sane before passing them on?
> > They come from user after all.
> 
> will fix these two.
> 
> > Also - should not threshold be per VQ? E.g. for -net we do not
> > need the cache for RX unless in legacy big packet mode.
> 
> We've discussed it two months ago over IRC (at least thats what I have in my
> notes) - the plan was to keep it simple per-device until something more advanced
> to deal with the threshold shows up.
> 
> 
> Thanks,
> Sasha
> 
> > 
> >> +	}
> >>  
> >>  	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
> >>  
> >> @@ -685,6 +703,8 @@ EXPORT_SYMBOL_GPL(vring_new_virtqueue);
> >>  void vring_del_virtqueue(struct virtqueue *vq)
> >>  {
> >>  	list_del(&vq->list);
> >> +	if (to_vvq(vq)->indirect_cache)
> >> +		kmem_cache_destroy(to_vvq(vq)->indirect_cache);
> >>  	kfree(to_vvq(vq));
> >>  }
> >>  EXPORT_SYMBOL_GPL(vring_del_virtqueue);
> >> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> >> index 48bc457..3261c02 100644
> >> --- a/include/linux/virtio.h
> >> +++ b/include/linux/virtio.h
> >> @@ -70,6 +70,7 @@ struct virtio_device {
> >>  	unsigned long features[1];
> >>  	void *priv;
> >>  	unsigned int indirect_thresh;
> >> +	unsigned int indirect_alloc_thresh;
> >>  };
> >>  
> >>  #define dev_to_virtio(dev) container_of(dev, struct virtio_device, dev)
> >> diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
> >> index fc93962..3044c86 100644
> >> --- a/net/9p/trans_virtio.c
> >> +++ b/net/9p/trans_virtio.c
> >> @@ -55,6 +55,9 @@
> >>  static unsigned int indirect_thresh;
> >>  module_param(indirect_thresh, uint, S_IRUGO);
> >>  
> >> +static unsigned int indirect_alloc_thresh;
> >> +module_param(indirect_alloc_thresh, uint, S_IRUGO);
> >> +
> >>  /* a single mutex to manage channel initialization and attachment */
> >>  static DEFINE_MUTEX(virtio_9p_lock);
> >>  static DECLARE_WAIT_QUEUE_HEAD(vp_wq);
> >> @@ -505,6 +508,8 @@ static int p9_virtio_probe(struct virtio_device *vdev)
> >>  
> >>  	/* We expect one virtqueue, for requests. */
> >>  	vdev->indirect_thresh = indirect_thresh;
> >> +	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
> >> +
> >>  	chan->vq = virtio_find_single_vq(vdev, req_done, "requests");
> >>  	if (IS_ERR(chan->vq)) {
> >>  		err = PTR_ERR(chan->vq);
> >> -- 
> >> 1.7.12
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sasha Levin Aug. 29, 2012, 8:46 p.m. UTC | #11
On 08/29/2012 08:12 PM, Michael S. Tsirkin wrote:
>>> > > What is a good default for net? I guess max sg?
>> > 
>> > I think that it depends on the workload. I'd say we should keep the default to 0
>> > (disabled) unless we can have a good way to adjust it to the load.
> For *all* drivers?
> 
> Then it is mostly useless. No one has the time to tweak module
> parameters in real life.
> 
> For virtio-net, 16+1 is not too much and ensures we always
> use the cache.
> 
> If that works better than 0 I would say run with 17.

I was being extra-cautious with leaving it disabled until specifically enabled
because I assumed that this would be one of the first comments I'll get if it
was enabled by default :)

If you're comfortable with setting it to a sane default like 17, I'm perfectly
fine with that as well.


Thanks,
Sasha

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin Aug. 29, 2012, 10:52 p.m. UTC | #12
On Wed, Aug 29, 2012 at 10:46:19PM +0200, Sasha Levin wrote:
> On 08/29/2012 08:12 PM, Michael S. Tsirkin wrote:
> >>> > > What is a good default for net? I guess max sg?
> >> > 
> >> > I think that it depends on the workload. I'd say we should keep the default to 0
> >> > (disabled) unless we can have a good way to adjust it to the load.
> > For *all* drivers?
> > 
> > Then it is mostly useless. No one has the time to tweak module
> > parameters in real life.
> > 
> > For virtio-net, 16+1 is not too much and ensures we always
> > use the cache.
> > 
> > If that works better than 0 I would say run with 17.
> 
> I was being extra-cautious with leaving it disabled until specifically enabled
> because I assumed that this would be one of the first comments I'll get if it
> was enabled by default :)
> 
> If you're comfortable with setting it to a sane default like 17, I'm perfectly
> fine with that as well.
> 
> 
> Thanks,
> Sasha

If our testing shows it helps and does not trigger regressions, then
why not? module params are mostly there for developers.
They are not all that helpful to users.

> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sasha Levin Aug. 30, 2012, 10:34 a.m. UTC | #13
On 08/29/2012 05:14 PM, Michael S. Tsirkin wrote:
>> > Note that these are simple tests with netperf listening on one end and a simple
>> > 'netperf -H [host]' within the guest. If there are other tests which may be
>> > interesting please let me know.
> Checking that host CPU utilization did not jump would be nice.
> E.g. measure BW/host CPU.

Tested it now, no change in CPU between the original, patch 1 and patch 2.


Thanks,
Sasha
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 13b8ae9..7f670af 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -25,6 +25,9 @@  struct workqueue_struct *virtblk_wq;
 static unsigned int indirect_thresh;
 module_param(indirect_thresh, uint, S_IRUGO);
 
+static unsigned int indirect_alloc_thresh;
+module_param(indirect_alloc_thresh, uint, S_IRUGO);
+
 struct virtio_blk
 {
 	struct virtio_device *vdev;
@@ -739,6 +742,7 @@  static int __devinit virtblk_probe(struct virtio_device *vdev)
 	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
 	vblk->config_enable = true;
 	vdev->indirect_thresh = indirect_thresh;
+	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
 
 	err = init_vq(vblk);
 	if (err)
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 02d8421..8475ece 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -28,6 +28,9 @@ 
 static unsigned int indirect_thresh;
 module_param(indirect_thresh, uint, S_IRUGO);
 
+static unsigned int indirect_alloc_thresh;
+module_param(indirect_alloc_thresh, uint, S_IRUGO);
+
 static struct virtqueue *vq;
 static unsigned int data_avail;
 static DECLARE_COMPLETION(have_data);
@@ -97,6 +100,7 @@  static int probe_common(struct virtio_device *vdev)
 
 	/* We expect a single virtqueue. */
 	vdev->indirect_thresh = indirect_thresh;
+	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
 	vq = virtio_find_single_vq(vdev, random_recv_done, "input");
 	if (IS_ERR(vq))
 		return PTR_ERR(vq);
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index fc14e7f..c6f24a7 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -42,6 +42,9 @@ 
 static unsigned int indirect_thresh;
 module_param(indirect_thresh, uint, S_IRUGO);
 
+static unsigned int indirect_alloc_thresh;
+module_param(indirect_alloc_thresh, uint, S_IRUGO);
+
 /*
  * This is a global struct for storing common data for all the devices
  * this driver handles.
@@ -1891,6 +1894,7 @@  static int __devinit virtcons_probe(struct virtio_device *vdev)
 			      &portdev->config.max_nr_ports) == 0)
 		multiport = true;
 	vdev->indirect_thresh = indirect_thresh;
+	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
 
 	err = init_vqs(portdev);
 	if (err < 0) {
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 64a8321..c091efd 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -37,6 +37,9 @@  module_param(gso, bool, 0444);
 static unsigned int indirect_thresh;
 module_param(indirect_thresh, uint, S_IRUGO);
 
+static unsigned int indirect_alloc_thresh;
+module_param(indirect_alloc_thresh, uint, S_IRUGO);
+
 /* FIXME: MTU in config. */
 #define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
 #define GOOD_COPY_LEN	128
@@ -1132,6 +1135,7 @@  static int virtnet_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;
 	vdev->indirect_thresh = indirect_thresh;
+	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
 
 	err = init_vqs(vi);
 	if (err)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index fce7347..ccf87db 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -38,6 +38,9 @@ 
 static unsigned int indirect_thresh;
 module_param(indirect_thresh, uint, S_IRUGO);
 
+static unsigned int indirect_alloc_thresh;
+module_param(indirect_alloc_thresh, uint, S_IRUGO);
+
 struct virtio_balloon
 {
 	struct virtio_device *vdev;
@@ -360,6 +363,7 @@  static int virtballoon_probe(struct virtio_device *vdev)
 	vb->vdev = vdev;
 	vb->need_stats_update = 0;
 	vdev->indirect_thresh = indirect_thresh;
+	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
 
 	err = init_vqs(vb);
 	if (err)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 99a64a7..e8b9c54 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -93,6 +93,10 @@  struct vring_virtqueue
 	 */
 	unsigned int indirect_thresh;
 
+	/* Buffers below this size will be allocated from cache */
+	unsigned int indirect_alloc_thresh;
+	struct kmem_cache *indirect_cache;
+
 	/* Host publishes avail event idx */
 	bool event;
 
@@ -135,7 +139,10 @@  static int vring_add_indirect(struct vring_virtqueue *vq,
 	unsigned head;
 	int i;
 
-	desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp);
+	if ((out + in) <= vq->indirect_alloc_thresh)
+		desc = kmem_cache_alloc(vq->indirect_cache, gfp);
+	else
+		desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp);
 	if (!desc)
 		return -ENOMEM;
 
@@ -384,8 +391,13 @@  static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
 	i = head;
 
 	/* Free the indirect table */
-	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
-		kfree(phys_to_virt(vq->vring.desc[i].addr));
+	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT) {
+		if (vq->vring.desc[i].len > vq->indirect_alloc_thresh)
+			kfree(phys_to_virt(vq->vring.desc[i].addr));
+		else
+			kmem_cache_free(vq->indirect_cache,
+					phys_to_virt(vq->vring.desc[i].addr));
+	}
 
 	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
 		i = vq->vring.desc[i].next;
@@ -654,14 +666,20 @@  struct virtqueue *vring_new_virtqueue(unsigned int num,
 	vq->last_used_idx = 0;
 	vq->num_added = 0;
 	vq->indirect_thresh = 0;
+	vq->indirect_alloc_thresh = 0;
+	vq->indirect_cache = NULL;
 	list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
 	vq->in_use = false;
 	vq->last_add_time_valid = false;
 #endif
 
-	if (virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
+	if (virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
 		vq->indirect_thresh = vdev->indirect_thresh;
+		vq->indirect_alloc_thresh = vdev->indirect_alloc_thresh;
+		if (vq->indirect_alloc_thresh)
+			vq->indirect_cache = KMEM_CACHE(vring_desc[vq->indirect_alloc_thresh], 0);
+	}
 
 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
 
@@ -685,6 +703,8 @@  EXPORT_SYMBOL_GPL(vring_new_virtqueue);
 void vring_del_virtqueue(struct virtqueue *vq)
 {
 	list_del(&vq->list);
+	if (to_vvq(vq)->indirect_cache)
+		kmem_cache_destroy(to_vvq(vq)->indirect_cache);
 	kfree(to_vvq(vq));
 }
 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 48bc457..3261c02 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -70,6 +70,7 @@  struct virtio_device {
 	unsigned long features[1];
 	void *priv;
 	unsigned int indirect_thresh;
+	unsigned int indirect_alloc_thresh;
 };
 
 #define dev_to_virtio(dev) container_of(dev, struct virtio_device, dev)
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index fc93962..3044c86 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -55,6 +55,9 @@ 
 static unsigned int indirect_thresh;
 module_param(indirect_thresh, uint, S_IRUGO);
 
+static unsigned int indirect_alloc_thresh;
+module_param(indirect_alloc_thresh, uint, S_IRUGO);
+
 /* a single mutex to manage channel initialization and attachment */
 static DEFINE_MUTEX(virtio_9p_lock);
 static DECLARE_WAIT_QUEUE_HEAD(vp_wq);
@@ -505,6 +508,8 @@  static int p9_virtio_probe(struct virtio_device *vdev)
 
 	/* We expect one virtqueue, for requests. */
 	vdev->indirect_thresh = indirect_thresh;
+	vdev->indirect_alloc_thresh = indirect_alloc_thresh;
+
 	chan->vq = virtio_find_single_vq(vdev, req_done, "requests");
 	if (IS_ERR(chan->vq)) {
 		err = PTR_ERR(chan->vq);