diff mbox

[2/6] virtio-balloon: speed up inflate/deflate process

Message ID 1465811233-21136-3-git-send-email-liang.z.li@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Liang Li June 13, 2016, 9:47 a.m. UTC
The implementation of the current virtio-balloon is not very efficient,
Bellow is test result of time spends on inflating the balloon to 3GB of
a 4GB idle guest:

a. allocating pages (6.5%, 103ms)
b. sending PFNs to host (68.3%, 787ms)
c. address translation (6.1%, 96ms)
d. madvise (19%, 300ms)

It takes about 1577ms for the whole inflating process to complete. The
test shows that the bottle neck is the stage b and stage d.

If using a bitmap to send the page info instead of the PFNs, we can
reduce the overhead in stage b quite a lot. Furthermore, it's possible
to do the address translation and the madvise with a bulk of pages,
instead of the current page per page way, so the overhead of stage c
and stage d can also be reduced a lot.

This patch is the kernel side implementation which is intended to speed
up the inflating & deflating process by adding a new feature to the
virtio-balloon device. And now, inflating the balloon to 3GB of a 4GB
idle guest only takes 200ms, it's about 8 times as fast as before.

TODO: optimize stage a by allocating/freeing a chunk of pages instead
of a single page at a time.

Signed-off-by: Liang Li <liang.z.li@intel.com>
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Amit Shah <amit.shah@redhat.com>
---
 drivers/virtio/virtio_balloon.c     | 164 +++++++++++++++++++++++++++++++-----
 include/uapi/linux/virtio_balloon.h |   1 +
 2 files changed, 144 insertions(+), 21 deletions(-)

Comments

kernel test robot June 13, 2016, 10:17 a.m. UTC | #1
Hi,

[auto build test WARNING on v4.7-rc3]
[cannot apply to next-20160609]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Liang-Li/Fast-balloon-fast-live-migration/20160613-175812
config: m68k-allyesconfig (attached as .config)
compiler: m68k-linux-gcc (GCC) 4.9.0
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=m68k 

All warnings (new ones prefixed by >>):

   drivers/virtio/virtio_balloon.c: In function 'init_pfn_range':
>> drivers/virtio/virtio_balloon.c:123:2: warning: left shift count >= width of type
     vb->min_pfn = (1UL << 48);
     ^

vim +123 drivers/virtio/virtio_balloon.c

   107		unsigned long pfn = page_to_pfn(page);
   108	
   109		BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT);
   110		/* Convert pfn from Linux page size to balloon page size. */
   111		return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE;
   112	}
   113	
   114	static void balloon_ack(struct virtqueue *vq)
   115	{
   116		struct virtio_balloon *vb = vq->vdev->priv;
   117	
   118		wake_up(&vb->acked);
   119	}
   120	
   121	static inline void init_pfn_range(struct virtio_balloon *vb)
   122	{
 > 123		vb->min_pfn = (1UL << 48);
   124		vb->max_pfn = 0;
   125	}
   126	
   127	static inline void update_pfn_range(struct virtio_balloon *vb,
   128					 struct page *page)
   129	{
   130		unsigned long balloon_pfn = page_to_balloon_pfn(page);
   131	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Michael S. Tsirkin June 24, 2016, 5:39 a.m. UTC | #2
On Mon, Jun 13, 2016 at 05:47:09PM +0800, Liang Li wrote:
> The implementation of the current virtio-balloon is not very efficient,
> Bellow is test result of time spends on inflating the balloon to 3GB of
> a 4GB idle guest:
> 
> a. allocating pages (6.5%, 103ms)
> b. sending PFNs to host (68.3%, 787ms)
> c. address translation (6.1%, 96ms)
> d. madvise (19%, 300ms)
> 
> It takes about 1577ms for the whole inflating process to complete. The
> test shows that the bottle neck is the stage b and stage d.
> 
> If using a bitmap to send the page info instead of the PFNs, we can
> reduce the overhead in stage b quite a lot. Furthermore, it's possible
> to do the address translation and the madvise with a bulk of pages,
> instead of the current page per page way, so the overhead of stage c
> and stage d can also be reduced a lot.
> 
> This patch is the kernel side implementation which is intended to speed
> up the inflating & deflating process by adding a new feature to the
> virtio-balloon device. And now, inflating the balloon to 3GB of a 4GB
> idle guest only takes 200ms, it's about 8 times as fast as before.
> 
> TODO: optimize stage a by allocating/freeing a chunk of pages instead
> of a single page at a time.
> 
> Signed-off-by: Liang Li <liang.z.li@intel.com>
> Suggested-by: Michael S. Tsirkin <mst@redhat.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
> Cc: Amit Shah <amit.shah@redhat.com>

Causes kbuild warnings

> ---
>  drivers/virtio/virtio_balloon.c     | 164 +++++++++++++++++++++++++++++++-----
>  include/uapi/linux/virtio_balloon.h |   1 +
>  2 files changed, 144 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 8d649a2..1fa601b 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -40,11 +40,19 @@
>  #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
>  #define OOM_VBALLOON_DEFAULT_PAGES 256
>  #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
> +#define VIRTIO_BALLOON_PFNS_LIMIT ((2 * (1ULL << 30)) >> PAGE_SHIFT) /* 2GB */

2<< 30  is 2G but that is not a useful comment.
pls explain what is the reason for this selection.

>  
>  static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
>  module_param(oom_pages, int, S_IRUSR | S_IWUSR);
>  MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
>  
> +struct balloon_bmap_hdr {
> +	__virtio32 id;
> +	__virtio32 page_shift;
> +	__virtio64 start_pfn;
> +	__virtio64 bmap_len;
> +};
> +

Put this in an uapi header please.

>  struct virtio_balloon {
>  	struct virtio_device *vdev;
>  	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
> @@ -62,6 +70,11 @@ struct virtio_balloon {
>  
>  	/* Number of balloon pages we've told the Host we're not using. */
>  	unsigned int num_pages;
> +	/* Bitmap and length used to tell the host the pages */
> +	unsigned long *page_bitmap;
> +	unsigned long bmap_len;
> +	/* Used to record the processed pfn range */
> +	unsigned long min_pfn, max_pfn, start_pfn, end_pfn;
>  	/*
>  	 * The pages we've told the Host we're not using are enqueued
>  	 * at vb_dev_info->pages list.
> @@ -105,15 +118,51 @@ static void balloon_ack(struct virtqueue *vq)
>  	wake_up(&vb->acked);
>  }
>  
> +static inline void init_pfn_range(struct virtio_balloon *vb)
> +{
> +	vb->min_pfn = (1UL << 48);

Where does this value come from? Do you want ULONG_MAX?
This does not fit in long on 32 bit systems.


> +	vb->max_pfn = 0;
> +}
> +
> +static inline void update_pfn_range(struct virtio_balloon *vb,
> +				 struct page *page)
> +{
> +	unsigned long balloon_pfn = page_to_balloon_pfn(page);
> +
> +	if (balloon_pfn < vb->min_pfn)
> +		vb->min_pfn = balloon_pfn;
> +	if (balloon_pfn > vb->max_pfn)
> +		vb->max_pfn = balloon_pfn;
> +}
> +
>  static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
>  {
> -	struct scatterlist sg;
>  	unsigned int len;
>  
> -	sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP)) {
> +		struct balloon_bmap_hdr hdr;

why not init fields here?

> +		unsigned long bmap_len;

and here

> +		struct scatterlist sg[2];
> +
> +		hdr.id = cpu_to_virtio32(vb->vdev, 0);
> +		hdr.page_shift = cpu_to_virtio32(vb->vdev, PAGE_SHIFT);
> +		hdr.start_pfn = cpu_to_virtio64(vb->vdev, vb->start_pfn);
> +		bmap_len = min(vb->bmap_len,
> +				(vb->end_pfn - vb->start_pfn) / BITS_PER_BYTE);
> +		hdr.bmap_len = cpu_to_virtio64(vb->vdev, bmap_len);
> +		sg_init_table(sg, 2);
> +		sg_set_buf(&sg[0], &hdr, sizeof(hdr));
> +		sg_set_buf(&sg[1], vb->page_bitmap, bmap_len);
> +		virtqueue_add_outbuf(vq, sg, 2, vb, GFP_KERNEL);

might fail if queue size < 2. validate queue size and clear
VIRTIO_BALLOON_F_PAGE_BITMAP?

Alternatively, and I think preferably,
use first struct balloon_bmap_hdr bytes in the buffer
to pass the header to host.


> +	} else {
> +		struct scatterlist sg;
>  
> -	/* We should always be able to add one buffer to an empty queue. */
> -	virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
> +		sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> +		/* We should always be able to add one buffer to an
> +		* empty queue.
> +		*/
> +		virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
> +	}
>  	virtqueue_kick(vq);
>  
>  	/* When host has read buffer, this completes via balloon_ack */
> @@ -133,13 +182,50 @@ static void set_page_pfns(struct virtio_balloon *vb,
>  					  page_to_balloon_pfn(page) + i);
>  }
>  
> -static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
> +static void set_page_bitmap(struct virtio_balloon *vb,
> +			 struct list_head *pages, struct virtqueue *vq)
> +{
> +	unsigned long pfn;
> +	struct page *page, *next;
> +	bool find;

find -> found

> +
> +	vb->min_pfn = rounddown(vb->min_pfn, BITS_PER_LONG);
> +	vb->max_pfn = roundup(vb->max_pfn, BITS_PER_LONG);
> +	for (pfn = vb->min_pfn; pfn < vb->max_pfn;
> +			pfn += VIRTIO_BALLOON_PFNS_LIMIT) {
> +		vb->start_pfn = pfn;
> +		vb->end_pfn = pfn;
> +		memset(vb->page_bitmap, 0, vb->bmap_len);
> +		find = false;
> +		list_for_each_entry_safe(page, next, pages, lru) {

Why _safe?

> +			unsigned long balloon_pfn = page_to_balloon_pfn(page);
> +
> +			if (balloon_pfn < pfn ||
> +				 balloon_pfn >= pfn + VIRTIO_BALLOON_PFNS_LIMIT)
> +				continue;
> +			set_bit(balloon_pfn - pfn, vb->page_bitmap);
> +			if (balloon_pfn > vb->end_pfn)
> +				vb->end_pfn = balloon_pfn;
> +			find = true;

maybe remove page from list? this way we won't go over same entry
multiple times ...

> +		}
> +		if (find) {
> +			vb->end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
> +			tell_host(vb, vq);
> +		}
> +	}
> +}
> +
> +static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num,
> +				 bool use_bmap)
>  {
>  	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
>  	unsigned num_allocated_pages;
>  
> -	/* We can only do one array worth at a time. */
> -	num = min(num, ARRAY_SIZE(vb->pfns));
> +	if (use_bmap)
> +		init_pfn_range(vb);
> +	else
> +		/* We can only do one array worth at a time. */
> +		num = min(num, ARRAY_SIZE(vb->pfns));
>  
>  	mutex_lock(&vb->balloon_lock);
>  	for (vb->num_pfns = 0; vb->num_pfns < num;
> @@ -154,7 +240,10 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
>  			msleep(200);
>  			break;
>  		}
> -		set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
> +		if (use_bmap)
> +			update_pfn_range(vb, page);
> +		else
> +			set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
>  		vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
>  		if (!virtio_has_feature(vb->vdev,
>  					VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> @@ -163,8 +252,13 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
>  
>  	num_allocated_pages = vb->num_pfns;
>  	/* Did we get any? */
> -	if (vb->num_pfns != 0)
> -		tell_host(vb, vb->inflate_vq);
> +	if (vb->num_pfns != 0) {
> +		if (use_bmap)
> +			set_page_bitmap(vb, &vb_dev_info->pages,
> +					 vb->inflate_vq);

don't we need pages_lock if we access vb_dev_info->pages?

> +		else
> +			tell_host(vb, vb->inflate_vq);
> +	}
>  	mutex_unlock(&vb->balloon_lock);
>  
>  	return num_allocated_pages;
> @@ -184,15 +278,19 @@ static void release_pages_balloon(struct virtio_balloon *vb,
>  	}
>  }
>  
> -static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
> +static unsigned int leak_balloon(struct virtio_balloon *vb, size_t num,
> +				bool use_bmap)
>  {
>  	unsigned num_freed_pages;
>  	struct page *page;
>  	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
>  	LIST_HEAD(pages);
>  
> -	/* We can only do one array worth at a time. */
> -	num = min(num, ARRAY_SIZE(vb->pfns));
> +	if (use_bmap)
> +		init_pfn_range(vb);
> +	else
> +		/* We can only do one array worth at a time. */
> +		num = min(num, ARRAY_SIZE(vb->pfns));
>  
>  	mutex_lock(&vb->balloon_lock);
>  	for (vb->num_pfns = 0; vb->num_pfns < num;
> @@ -200,7 +298,10 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
>  		page = balloon_page_dequeue(vb_dev_info);
>  		if (!page)
>  			break;
> -		set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
> +		if (use_bmap)
> +			update_pfn_range(vb, page);
> +		else
> +			set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
>  		list_add(&page->lru, &pages);
>  		vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
>  	}
> @@ -211,9 +312,14 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
>  	 * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
>  	 * is true, we *have* to do it in this order
>  	 */
> -	if (vb->num_pfns != 0)
> -		tell_host(vb, vb->deflate_vq);
> -	release_pages_balloon(vb, &pages);
> +	if (vb->num_pfns != 0) {
> +		if (use_bmap)
> +			set_page_bitmap(vb, &pages, vb->deflate_vq);
> +		else
> +			tell_host(vb, vb->deflate_vq);
> +
> +		release_pages_balloon(vb, &pages);
> +	}
>  	mutex_unlock(&vb->balloon_lock);
>  	return num_freed_pages;
>  }
> @@ -347,13 +453,15 @@ static int virtballoon_oom_notify(struct notifier_block *self,
>  	struct virtio_balloon *vb;
>  	unsigned long *freed;
>  	unsigned num_freed_pages;
> +	bool use_bmap;
>  
>  	vb = container_of(self, struct virtio_balloon, nb);
>  	if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
>  		return NOTIFY_OK;
>  
>  	freed = parm;
> -	num_freed_pages = leak_balloon(vb, oom_pages);
> +	use_bmap = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP);
> +	num_freed_pages = leak_balloon(vb, oom_pages, use_bmap);
>  	update_balloon_size(vb);
>  	*freed += num_freed_pages;
>  
> @@ -373,15 +481,17 @@ static void update_balloon_size_func(struct work_struct *work)
>  {
>  	struct virtio_balloon *vb;
>  	s64 diff;
> +	bool use_bmap;
>  
>  	vb = container_of(work, struct virtio_balloon,
>  			  update_balloon_size_work);
> +	use_bmap = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP);
>  	diff = towards_target(vb);
>  
>  	if (diff > 0)
> -		diff -= fill_balloon(vb, diff);
> +		diff -= fill_balloon(vb, diff, use_bmap);
>  	else if (diff < 0)
> -		diff += leak_balloon(vb, -diff);
> +		diff += leak_balloon(vb, -diff, use_bmap);
>  	update_balloon_size(vb);
>  
>  	if (diff)
> @@ -508,6 +618,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
>  	spin_lock_init(&vb->stop_update_lock);
>  	vb->stop_update = false;
>  	vb->num_pages = 0;
> +	vb->bmap_len = ALIGN(VIRTIO_BALLOON_PFNS_LIMIT, BITS_PER_LONG) /
> +		 BITS_PER_BYTE + 2 * sizeof(unsigned long);
> +	vb->page_bitmap = kzalloc(vb->bmap_len, GFP_KERNEL);
> +	if (!vb->page_bitmap) {
> +		err = -ENOMEM;
> +		goto out;
> +	}

How about we clear the bitmap feature on this failure?

>  	mutex_init(&vb->balloon_lock);
>  	init_waitqueue_head(&vb->acked);
>  	vb->vdev = vdev;
> @@ -541,9 +658,12 @@ out:
>  
>  static void remove_common(struct virtio_balloon *vb)
>  {
> +	bool use_bmap;
> +
> +	use_bmap = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP);
>  	/* There might be pages left in the balloon: free them. */
>  	while (vb->num_pages)
> -		leak_balloon(vb, vb->num_pages);
> +		leak_balloon(vb, vb->num_pages, use_bmap);
>  	update_balloon_size(vb);
>  
>  	/* Now we reset the device so we can clean up the queues. */
> @@ -565,6 +685,7 @@ static void virtballoon_remove(struct virtio_device *vdev)
>  	cancel_work_sync(&vb->update_balloon_stats_work);
>  
>  	remove_common(vb);
> +	kfree(vb->page_bitmap);
>  	kfree(vb);
>  }
>  
> @@ -603,6 +724,7 @@ static unsigned int features[] = {
>  	VIRTIO_BALLOON_F_MUST_TELL_HOST,
>  	VIRTIO_BALLOON_F_STATS_VQ,
>  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
> +	VIRTIO_BALLOON_F_PAGE_BITMAP,
>  };
>  
>  static struct virtio_driver virtio_balloon_driver = {
> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> index 343d7dd..f78fa47 100644
> --- a/include/uapi/linux/virtio_balloon.h
> +++ b/include/uapi/linux/virtio_balloon.h
> @@ -34,6 +34,7 @@
>  #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
>  #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
>  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
> +#define VIRTIO_BALLOON_F_PAGE_BITMAP	3 /* Send page info with bitmap */
>  
>  /* Size of a PFN in the balloon interface. */
>  #define VIRTIO_BALLOON_PFN_SHIFT 12
> -- 
> 1.9.1
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liang Li June 24, 2016, 6:28 a.m. UTC | #3
Hi Michael,

Thanks for your comments!

> 
> 2<< 30  is 2G but that is not a useful comment.
> pls explain what is the reason for this selection.
> 

Will change in the next version.

> > +struct balloon_bmap_hdr {
> > +	__virtio32 id;
> > +	__virtio32 page_shift;
> > +	__virtio64 start_pfn;
> > +	__virtio64 bmap_len;
> > +};
> > +
> 
> Put this in an uapi header please.

Will change in the next version.

> > +static inline void init_pfn_range(struct virtio_balloon *vb) {
> > +	vb->min_pfn = (1UL << 48);
> 
> Where does this value come from? Do you want ULONG_MAX?
> This does not fit in long on 32 bit systems.

I just want to make it big enough, ULONG_MAX is better. Will change it.

> >  static void tell_host(struct virtio_balloon *vb, struct virtqueue
> > *vq)  {
> > -	struct scatterlist sg;
> >  	unsigned int len;
> >
> > -	sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> > +	if (virtio_has_feature(vb->vdev,
> VIRTIO_BALLOON_F_PAGE_BITMAP)) {
> > +		struct balloon_bmap_hdr hdr;
> 
> why not init fields here?
> 
> > +		unsigned long bmap_len;
> 
> and here

All the fields and the bmap_len will be updated later, so init is unnecessary?
 
> > +		struct scatterlist sg[2];
> > +
> > +		hdr.id = cpu_to_virtio32(vb->vdev, 0);
> > +		hdr.page_shift = cpu_to_virtio32(vb->vdev, PAGE_SHIFT);
> > +		hdr.start_pfn = cpu_to_virtio64(vb->vdev, vb->start_pfn);
> > +		bmap_len = min(vb->bmap_len,
> > +				(vb->end_pfn - vb->start_pfn) /
> BITS_PER_BYTE);
> > +		hdr.bmap_len = cpu_to_virtio64(vb->vdev, bmap_len);
> > +		sg_init_table(sg, 2);
> > +		sg_set_buf(&sg[0], &hdr, sizeof(hdr));
> > +		sg_set_buf(&sg[1], vb->page_bitmap, bmap_len);
> > +		virtqueue_add_outbuf(vq, sg, 2, vb, GFP_KERNEL);
> 
> might fail if queue size < 2. validate queue size and clear
> VIRTIO_BALLOON_F_PAGE_BITMAP?
> 
not considered yet.

> Alternatively, and I think preferably,
> use first struct balloon_bmap_hdr bytes in the buffer to pass the header to
> host.

How about the bitmap, in another sending?

> > +	struct page *page, *next;
> > +	bool find;
> 
> find -> found

Will change.

> > +	vb->max_pfn = roundup(vb->max_pfn, BITS_PER_LONG);
> > +	for (pfn = vb->min_pfn; pfn < vb->max_pfn;
> > +			pfn += VIRTIO_BALLOON_PFNS_LIMIT) {
> > +		vb->start_pfn = pfn;
> > +		vb->end_pfn = pfn;
> > +		memset(vb->page_bitmap, 0, vb->bmap_len);
> > +		find = false;
> > +		list_for_each_entry_safe(page, next, pages, lru) {
> 
> Why _safe?

No safe is OK. Will change.

> > +			unsigned long balloon_pfn =
> page_to_balloon_pfn(page);
> > +
> > +			if (balloon_pfn < pfn ||
> > +				 balloon_pfn >= pfn +
> VIRTIO_BALLOON_PFNS_LIMIT)
> > +				continue;
> > +			set_bit(balloon_pfn - pfn, vb->page_bitmap);
> > +			if (balloon_pfn > vb->end_pfn)
> > +				vb->end_pfn = balloon_pfn;
> > +			find = true;
> 
> maybe remove page from list? this way we won't go over same entry
> multiple times ...

No, we can't remove the page from list. The list saves all the pages filled in the balloon,
When delating, we fetch the pages from the list to return them to guest.  
If removed, we can't find them.

> > unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
> >
> >  	num_allocated_pages = vb->num_pfns;
> >  	/* Did we get any? */
> > -	if (vb->num_pfns != 0)
> > -		tell_host(vb, vb->inflate_vq);
> > +	if (vb->num_pfns != 0) {
> > +		if (use_bmap)
> > +			set_page_bitmap(vb, &vb_dev_info->pages,
> > +					 vb->inflate_vq);
> 
> don't we need pages_lock if we access vb_dev_info->pages?

It is protected by the vb->balloon_lock. not enough?

> > +	vb->page_bitmap = kzalloc(vb->bmap_len, GFP_KERNEL);
> > +	if (!vb->page_bitmap) {
> > +		err = -ENOMEM;
> > +		goto out;
> > +	}
> 
> How about we clear the bitmap feature on this failure?

That' better.  Will change.

Thanks again!
Liang
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 8d649a2..1fa601b 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -40,11 +40,19 @@ 
 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
 #define OOM_VBALLOON_DEFAULT_PAGES 256
 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
+#define VIRTIO_BALLOON_PFNS_LIMIT ((2 * (1ULL << 30)) >> PAGE_SHIFT) /* 2GB */
 
 static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
 module_param(oom_pages, int, S_IRUSR | S_IWUSR);
 MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
 
+struct balloon_bmap_hdr {
+	__virtio32 id;
+	__virtio32 page_shift;
+	__virtio64 start_pfn;
+	__virtio64 bmap_len;
+};
+
 struct virtio_balloon {
 	struct virtio_device *vdev;
 	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
@@ -62,6 +70,11 @@  struct virtio_balloon {
 
 	/* Number of balloon pages we've told the Host we're not using. */
 	unsigned int num_pages;
+	/* Bitmap and length used to tell the host the pages */
+	unsigned long *page_bitmap;
+	unsigned long bmap_len;
+	/* Used to record the processed pfn range */
+	unsigned long min_pfn, max_pfn, start_pfn, end_pfn;
 	/*
 	 * The pages we've told the Host we're not using are enqueued
 	 * at vb_dev_info->pages list.
@@ -105,15 +118,51 @@  static void balloon_ack(struct virtqueue *vq)
 	wake_up(&vb->acked);
 }
 
+static inline void init_pfn_range(struct virtio_balloon *vb)
+{
+	vb->min_pfn = (1UL << 48);
+	vb->max_pfn = 0;
+}
+
+static inline void update_pfn_range(struct virtio_balloon *vb,
+				 struct page *page)
+{
+	unsigned long balloon_pfn = page_to_balloon_pfn(page);
+
+	if (balloon_pfn < vb->min_pfn)
+		vb->min_pfn = balloon_pfn;
+	if (balloon_pfn > vb->max_pfn)
+		vb->max_pfn = balloon_pfn;
+}
+
 static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
 {
-	struct scatterlist sg;
 	unsigned int len;
 
-	sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP)) {
+		struct balloon_bmap_hdr hdr;
+		unsigned long bmap_len;
+		struct scatterlist sg[2];
+
+		hdr.id = cpu_to_virtio32(vb->vdev, 0);
+		hdr.page_shift = cpu_to_virtio32(vb->vdev, PAGE_SHIFT);
+		hdr.start_pfn = cpu_to_virtio64(vb->vdev, vb->start_pfn);
+		bmap_len = min(vb->bmap_len,
+				(vb->end_pfn - vb->start_pfn) / BITS_PER_BYTE);
+		hdr.bmap_len = cpu_to_virtio64(vb->vdev, bmap_len);
+		sg_init_table(sg, 2);
+		sg_set_buf(&sg[0], &hdr, sizeof(hdr));
+		sg_set_buf(&sg[1], vb->page_bitmap, bmap_len);
+		virtqueue_add_outbuf(vq, sg, 2, vb, GFP_KERNEL);
+	} else {
+		struct scatterlist sg;
 
-	/* We should always be able to add one buffer to an empty queue. */
-	virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
+		sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
+		/* We should always be able to add one buffer to an
+		* empty queue.
+		*/
+		virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
+	}
 	virtqueue_kick(vq);
 
 	/* When host has read buffer, this completes via balloon_ack */
@@ -133,13 +182,50 @@  static void set_page_pfns(struct virtio_balloon *vb,
 					  page_to_balloon_pfn(page) + i);
 }
 
-static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
+static void set_page_bitmap(struct virtio_balloon *vb,
+			 struct list_head *pages, struct virtqueue *vq)
+{
+	unsigned long pfn;
+	struct page *page, *next;
+	bool find;
+
+	vb->min_pfn = rounddown(vb->min_pfn, BITS_PER_LONG);
+	vb->max_pfn = roundup(vb->max_pfn, BITS_PER_LONG);
+	for (pfn = vb->min_pfn; pfn < vb->max_pfn;
+			pfn += VIRTIO_BALLOON_PFNS_LIMIT) {
+		vb->start_pfn = pfn;
+		vb->end_pfn = pfn;
+		memset(vb->page_bitmap, 0, vb->bmap_len);
+		find = false;
+		list_for_each_entry_safe(page, next, pages, lru) {
+			unsigned long balloon_pfn = page_to_balloon_pfn(page);
+
+			if (balloon_pfn < pfn ||
+				 balloon_pfn >= pfn + VIRTIO_BALLOON_PFNS_LIMIT)
+				continue;
+			set_bit(balloon_pfn - pfn, vb->page_bitmap);
+			if (balloon_pfn > vb->end_pfn)
+				vb->end_pfn = balloon_pfn;
+			find = true;
+		}
+		if (find) {
+			vb->end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
+			tell_host(vb, vq);
+		}
+	}
+}
+
+static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num,
+				 bool use_bmap)
 {
 	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
 	unsigned num_allocated_pages;
 
-	/* We can only do one array worth at a time. */
-	num = min(num, ARRAY_SIZE(vb->pfns));
+	if (use_bmap)
+		init_pfn_range(vb);
+	else
+		/* We can only do one array worth at a time. */
+		num = min(num, ARRAY_SIZE(vb->pfns));
 
 	mutex_lock(&vb->balloon_lock);
 	for (vb->num_pfns = 0; vb->num_pfns < num;
@@ -154,7 +240,10 @@  static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
 			msleep(200);
 			break;
 		}
-		set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
+		if (use_bmap)
+			update_pfn_range(vb, page);
+		else
+			set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
 		vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
 		if (!virtio_has_feature(vb->vdev,
 					VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
@@ -163,8 +252,13 @@  static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
 
 	num_allocated_pages = vb->num_pfns;
 	/* Did we get any? */
-	if (vb->num_pfns != 0)
-		tell_host(vb, vb->inflate_vq);
+	if (vb->num_pfns != 0) {
+		if (use_bmap)
+			set_page_bitmap(vb, &vb_dev_info->pages,
+					 vb->inflate_vq);
+		else
+			tell_host(vb, vb->inflate_vq);
+	}
 	mutex_unlock(&vb->balloon_lock);
 
 	return num_allocated_pages;
@@ -184,15 +278,19 @@  static void release_pages_balloon(struct virtio_balloon *vb,
 	}
 }
 
-static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
+static unsigned int leak_balloon(struct virtio_balloon *vb, size_t num,
+				bool use_bmap)
 {
 	unsigned num_freed_pages;
 	struct page *page;
 	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
 	LIST_HEAD(pages);
 
-	/* We can only do one array worth at a time. */
-	num = min(num, ARRAY_SIZE(vb->pfns));
+	if (use_bmap)
+		init_pfn_range(vb);
+	else
+		/* We can only do one array worth at a time. */
+		num = min(num, ARRAY_SIZE(vb->pfns));
 
 	mutex_lock(&vb->balloon_lock);
 	for (vb->num_pfns = 0; vb->num_pfns < num;
@@ -200,7 +298,10 @@  static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
 		page = balloon_page_dequeue(vb_dev_info);
 		if (!page)
 			break;
-		set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
+		if (use_bmap)
+			update_pfn_range(vb, page);
+		else
+			set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
 		list_add(&page->lru, &pages);
 		vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
 	}
@@ -211,9 +312,14 @@  static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
 	 * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
 	 * is true, we *have* to do it in this order
 	 */
-	if (vb->num_pfns != 0)
-		tell_host(vb, vb->deflate_vq);
-	release_pages_balloon(vb, &pages);
+	if (vb->num_pfns != 0) {
+		if (use_bmap)
+			set_page_bitmap(vb, &pages, vb->deflate_vq);
+		else
+			tell_host(vb, vb->deflate_vq);
+
+		release_pages_balloon(vb, &pages);
+	}
 	mutex_unlock(&vb->balloon_lock);
 	return num_freed_pages;
 }
@@ -347,13 +453,15 @@  static int virtballoon_oom_notify(struct notifier_block *self,
 	struct virtio_balloon *vb;
 	unsigned long *freed;
 	unsigned num_freed_pages;
+	bool use_bmap;
 
 	vb = container_of(self, struct virtio_balloon, nb);
 	if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
 		return NOTIFY_OK;
 
 	freed = parm;
-	num_freed_pages = leak_balloon(vb, oom_pages);
+	use_bmap = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP);
+	num_freed_pages = leak_balloon(vb, oom_pages, use_bmap);
 	update_balloon_size(vb);
 	*freed += num_freed_pages;
 
@@ -373,15 +481,17 @@  static void update_balloon_size_func(struct work_struct *work)
 {
 	struct virtio_balloon *vb;
 	s64 diff;
+	bool use_bmap;
 
 	vb = container_of(work, struct virtio_balloon,
 			  update_balloon_size_work);
+	use_bmap = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP);
 	diff = towards_target(vb);
 
 	if (diff > 0)
-		diff -= fill_balloon(vb, diff);
+		diff -= fill_balloon(vb, diff, use_bmap);
 	else if (diff < 0)
-		diff += leak_balloon(vb, -diff);
+		diff += leak_balloon(vb, -diff, use_bmap);
 	update_balloon_size(vb);
 
 	if (diff)
@@ -508,6 +618,13 @@  static int virtballoon_probe(struct virtio_device *vdev)
 	spin_lock_init(&vb->stop_update_lock);
 	vb->stop_update = false;
 	vb->num_pages = 0;
+	vb->bmap_len = ALIGN(VIRTIO_BALLOON_PFNS_LIMIT, BITS_PER_LONG) /
+		 BITS_PER_BYTE + 2 * sizeof(unsigned long);
+	vb->page_bitmap = kzalloc(vb->bmap_len, GFP_KERNEL);
+	if (!vb->page_bitmap) {
+		err = -ENOMEM;
+		goto out;
+	}
 	mutex_init(&vb->balloon_lock);
 	init_waitqueue_head(&vb->acked);
 	vb->vdev = vdev;
@@ -541,9 +658,12 @@  out:
 
 static void remove_common(struct virtio_balloon *vb)
 {
+	bool use_bmap;
+
+	use_bmap = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP);
 	/* There might be pages left in the balloon: free them. */
 	while (vb->num_pages)
-		leak_balloon(vb, vb->num_pages);
+		leak_balloon(vb, vb->num_pages, use_bmap);
 	update_balloon_size(vb);
 
 	/* Now we reset the device so we can clean up the queues. */
@@ -565,6 +685,7 @@  static void virtballoon_remove(struct virtio_device *vdev)
 	cancel_work_sync(&vb->update_balloon_stats_work);
 
 	remove_common(vb);
+	kfree(vb->page_bitmap);
 	kfree(vb);
 }
 
@@ -603,6 +724,7 @@  static unsigned int features[] = {
 	VIRTIO_BALLOON_F_MUST_TELL_HOST,
 	VIRTIO_BALLOON_F_STATS_VQ,
 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
+	VIRTIO_BALLOON_F_PAGE_BITMAP,
 };
 
 static struct virtio_driver virtio_balloon_driver = {
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 343d7dd..f78fa47 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -34,6 +34,7 @@ 
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
 #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
+#define VIRTIO_BALLOON_F_PAGE_BITMAP	3 /* Send page info with bitmap */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12