diff mbox

[v9,5/5] virtio-balloon: VIRTIO_BALLOON_F_MISC_VQ

Message ID 1492076108-117229-6-git-send-email-wei.w.wang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wang, Wei W April 13, 2017, 9:35 a.m. UTC
Add a new vq, miscq, to handle miscellaneous requests between the device
and the driver.

This patch implemnts the VIRTIO_BALLOON_MISCQ_INQUIRE_UNUSED_PAGES
request sent from the device. Upon receiving this request from the
miscq, the driver offers to the device the guest unused pages.

Tests have shown that skipping the transfer of unused pages of a 32G
guest can get the live migration time reduced to 1/8.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Liang Li <liang.z.li@intel.com>
---
 drivers/virtio/virtio_balloon.c     | 209 +++++++++++++++++++++++++++++++++---
 include/uapi/linux/virtio_balloon.h |   8 ++
 2 files changed, 204 insertions(+), 13 deletions(-)

Comments

Michael S. Tsirkin April 13, 2017, 5:08 p.m. UTC | #1
On Thu, Apr 13, 2017 at 05:35:08PM +0800, Wei Wang wrote:
> Add a new vq, miscq, to handle miscellaneous requests between the device
> and the driver.
> 
> This patch implemnts the VIRTIO_BALLOON_MISCQ_INQUIRE_UNUSED_PAGES

implements

> request sent from the device.

Commands are sent from host and handled on guest.
In fact how is this so different from stats?
How about reusing the stats vq then? You can use one buffer
for stats and one buffer for commands.

> Upon receiving this request from the
> miscq, the driver offers to the device the guest unused pages.
> 
> Tests have shown that skipping the transfer of unused pages of a 32G
> guest can get the live migration time reduced to 1/8.
> 
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> Signed-off-by: Liang Li <liang.z.li@intel.com>
> ---
>  drivers/virtio/virtio_balloon.c     | 209 +++++++++++++++++++++++++++++++++---
>  include/uapi/linux/virtio_balloon.h |   8 ++
>  2 files changed, 204 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 5e2e7cc..95c703e 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -56,11 +56,12 @@ static struct vfsmount *balloon_mnt;
>  
>  /* Types of pages to chunk */
>  #define PAGE_CHUNK_TYPE_BALLOON 0
> +#define PAGE_CHUNK_TYPE_UNUSED 1
>  
>  #define MAX_PAGE_CHUNKS 4096
>  struct virtio_balloon {
>  	struct virtio_device *vdev;
> -	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
> +	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *miscq;
>  
>  	/* The balloon servicing is delegated to a freezable workqueue. */
>  	struct work_struct update_balloon_stats_work;
> @@ -94,6 +95,19 @@ struct virtio_balloon {
>  	struct virtio_balloon_page_chunk_hdr *balloon_page_chunk_hdr;
>  	struct virtio_balloon_page_chunk *balloon_page_chunk;
>  
> +	/*
> +	 * Buffer for PAGE_CHUNK_TYPE_UNUSED:
> +	 * virtio_balloon_miscq_hdr +
> +	 * virtio_balloon_page_chunk_hdr +
> +	 * virtio_balloon_page_chunk * MAX_PAGE_CHUNKS
> +	 */
> +	struct virtio_balloon_miscq_hdr *miscq_out_hdr;
> +	struct virtio_balloon_page_chunk_hdr *unused_page_chunk_hdr;
> +	struct virtio_balloon_page_chunk *unused_page_chunk;
> +
> +	/* Buffer for host to send cmd to miscq */
> +	struct virtio_balloon_miscq_hdr *miscq_in_hdr;
> +
>  	/* Bitmap used to record pages */
>  	unsigned long *page_bmap[PAGE_BMAP_COUNT_MAX];
>  	/* Number of the allocated page_bmap */
> @@ -220,6 +234,10 @@ static void send_page_chunks(struct virtio_balloon *vb, struct virtqueue *vq,
>  		hdr = vb->balloon_page_chunk_hdr;
>  		len = 0;
>  		break;
> +	case PAGE_CHUNK_TYPE_UNUSED:
> +		hdr = vb->unused_page_chunk_hdr;
> +		len = sizeof(struct virtio_balloon_miscq_hdr);
> +		break;
>  	default:
>  		dev_warn(&vb->vdev->dev, "%s: chunk %d of unknown pages\n",
>  			 __func__, type);
> @@ -254,6 +272,10 @@ static void add_one_chunk(struct virtio_balloon *vb, struct virtqueue *vq,
>  		hdr = vb->balloon_page_chunk_hdr;
>  		chunk = vb->balloon_page_chunk;
>  		break;
> +	case PAGE_CHUNK_TYPE_UNUSED:
> +		hdr = vb->unused_page_chunk_hdr;
> +		chunk = vb->unused_page_chunk;
> +		break;
>  	default:
>  		dev_warn(&vb->vdev->dev, "%s: chunk %d of unknown pages\n",
>  			 __func__, type);
> @@ -686,28 +708,139 @@ static void update_balloon_size_func(struct work_struct *work)
>  		queue_work(system_freezable_wq, work);
>  }
>  
> +static void miscq_in_hdr_add(struct virtio_balloon *vb)
> +{
> +	struct scatterlist sg_in;
> +
> +	sg_init_one(&sg_in, vb->miscq_in_hdr,
> +		    sizeof(struct virtio_balloon_miscq_hdr));
> +	if (virtqueue_add_inbuf(vb->miscq, &sg_in, 1, vb->miscq_in_hdr,
> +	    GFP_KERNEL) < 0) {
> +		__virtio_clear_bit(vb->vdev,
> +				   VIRTIO_BALLOON_F_MISC_VQ);
> +		dev_warn(&vb->vdev->dev, "%s: add miscq_in_hdr err\n",
> +			 __func__);
> +		return;
> +	}
> +	virtqueue_kick(vb->miscq);
> +}
> +
> +static void miscq_send_unused_pages(struct virtio_balloon *vb)
> +{
> +	struct virtio_balloon_miscq_hdr *miscq_out_hdr = vb->miscq_out_hdr;
> +	struct virtqueue *vq = vb->miscq;
> +	int ret = 0;
> +	unsigned int order = 0, migratetype = 0;
> +	struct zone *zone = NULL;
> +	struct page *page = NULL;
> +	u64 pfn;
> +
> +	miscq_out_hdr->cmd =  VIRTIO_BALLOON_MISCQ_INQUIRE_UNUSED_PAGES;

Gets endian-ness and whitespace wrong. Pls use static checkers to catch
this type of error.

> +	miscq_out_hdr->flags = 0;
> +
> +	for_each_populated_zone(zone) {
> +		for (order = MAX_ORDER - 1; order > 0; order--) {
> +			for (migratetype = 0; migratetype < MIGRATE_TYPES;
> +			     migratetype++) {
> +				do {
> +					ret = inquire_unused_page_block(zone,
> +						order, migratetype, &page);
> +					if (!ret) {
> +						pfn = (u64)page_to_pfn(page);
> +						add_one_chunk(vb, vq,
> +							PAGE_CHUNK_TYPE_UNUSED,
> +							pfn,
> +							(u64)(1 << order));
> +					}
> +				} while (!ret);
> +			}
> +		}
> +	}
> +	miscq_out_hdr->flags |= VIRTIO_BALLOON_MISCQ_F_COMPLETE;

And where is miscq_out_hdr used? I see no add_outbuf anywhere.

Things like this should be passed through function parameters
and not stuffed into device structure, fields should be
initialized before use and not where we happen to
have the data handy.



Also, _F_ is normally a bit number, you use it as a value here.


> +	send_page_chunks(vb, vq, PAGE_CHUNK_TYPE_UNUSED, true);
> +}
> +
> +static void miscq_handle(struct virtqueue *vq)
> +{
> +	struct virtio_balloon *vb = vq->vdev->priv;
> +	struct virtio_balloon_miscq_hdr *hdr;
> +	unsigned int len;
> +
> +	hdr = virtqueue_get_buf(vb->miscq, &len);
> +	if (!hdr || len != sizeof(struct virtio_balloon_miscq_hdr)) {
> +		dev_warn(&vb->vdev->dev, "%s: invalid miscq hdr len\n",
> +			 __func__);
> +		miscq_in_hdr_add(vb);
> +		return;
> +	}
> +	switch (hdr->cmd) {
> +	case VIRTIO_BALLOON_MISCQ_INQUIRE_UNUSED_PAGES:
> +		miscq_send_unused_pages(vb);
> +		break;
> +	default:
> +		dev_warn(&vb->vdev->dev, "%s: miscq cmd %d not supported\n",
> +			 __func__, hdr->cmd);
> +	}
> +	miscq_in_hdr_add(vb);
> +}
> +
>  static int init_vqs(struct virtio_balloon *vb)
>  {
> -	struct virtqueue *vqs[3];
> -	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
> -	static const char * const names[] = { "inflate", "deflate", "stats" };
> -	int err, nvqs;
> +	struct virtqueue **vqs;
> +	vq_callback_t **callbacks;
> +	const char **names;
> +	int err = -ENOMEM;
> +	int i, nvqs;
> +
> +	 /* Inflateq and deflateq are used unconditionally */
> +	nvqs = 2;
> +
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ))
> +		nvqs++;
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_MISC_VQ))
> +		nvqs++;
> +
> +	/* Allocate space for find_vqs parameters */
> +	vqs = kcalloc(nvqs, sizeof(*vqs), GFP_KERNEL);
> +	if (!vqs)
> +		goto err_vq;
> +	callbacks = kmalloc_array(nvqs, sizeof(*callbacks), GFP_KERNEL);
> +	if (!callbacks)
> +		goto err_callback;
> +	names = kmalloc_array(nvqs, sizeof(*names), GFP_KERNEL);
> +	if (!names)
> +		goto err_names;
> +

All of 4 VQs, why are dynamic allocations called for?

> +	callbacks[0] = balloon_ack;
> +	names[0] = "inflate";
> +	callbacks[1] = balloon_ack;
> +	names[1] = "deflate";
> +
> +	i = 2;
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> +		callbacks[i] = stats_request;
> +		names[i] = "stats";
> +		i++;
> +	}
>  
> -	/*
> -	 * We expect two virtqueues: inflate and deflate, and
> -	 * optionally stat.
> -	 */
> -	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
> -	err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names);
> +	if (virtio_has_feature(vb->vdev,
> +				      VIRTIO_BALLOON_F_MISC_VQ)) {
> +		callbacks[i] = miscq_handle;
> +		names[i] = "miscq";
> +	}
> +
> +	err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks,
> +					 names);
>  	if (err)
> -		return err;
> +		goto err_find;
>  
>  	vb->inflate_vq = vqs[0];
>  	vb->deflate_vq = vqs[1];
> +	i = 2;
>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
>  		struct scatterlist sg;
> -		vb->stats_vq = vqs[2];
>  
> +		vb->stats_vq = vqs[i++];
>  		/*
>  		 * Prime this virtqueue with one buffer so the hypervisor can
>  		 * use it to signal us later (it can't be broken yet!).
> @@ -718,7 +851,25 @@ static int init_vqs(struct virtio_balloon *vb)
>  			BUG();
>  		virtqueue_kick(vb->stats_vq);
>  	}
> +
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_MISC_VQ)) {
> +		vb->miscq = vqs[i];
> +		miscq_in_hdr_add(vb);
> +	}
> +
> +	kfree(names);
> +	kfree(callbacks);
> +	kfree(vqs);
>  	return 0;
> +
> +err_find:
> +	kfree(names);
> +err_names:
> +	kfree(callbacks);
> +err_callback:
> +	kfree(vqs);
> +err_vq:
> +	return err;
>  }
>  
>  #ifdef CONFIG_BALLOON_COMPACTION
> @@ -843,6 +994,32 @@ static void balloon_page_chunk_init(struct virtio_balloon *vb)
>  	}
>  }
>  
> +static void miscq_init(struct virtio_balloon *vb)
> +{
> +	void *buf;
> +
> +	vb->miscq_in_hdr = kmalloc(sizeof(struct virtio_balloon_miscq_hdr),
> +				   GFP_KERNEL);
> +	buf = kmalloc(sizeof(struct virtio_balloon_miscq_hdr) +
> +		      sizeof(struct virtio_balloon_page_chunk_hdr) +
> +		      sizeof(struct virtio_balloon_page_chunk) *
> +		      MAX_PAGE_CHUNKS, GFP_KERNEL);

Mabe reduce MAX_PAGE_CHUNKS even further to fit in order-3 allocation.


> +	if (!vb->miscq_in_hdr || !buf) {
> +		kfree(buf);
> +		kfree(vb->miscq_in_hdr);
> +		__virtio_clear_bit(vb->vdev, VIRTIO_BALLOON_F_MISC_VQ);

Again this does not really work here. In this case it might be best to
just fail probe.

> +		dev_warn(&vb->vdev->dev, "%s: failed\n", __func__);
> +	} else {
> +		vb->miscq_out_hdr = buf;
> +		vb->unused_page_chunk_hdr = buf +
> +				sizeof(struct virtio_balloon_miscq_hdr);
> +		vb->unused_page_chunk_hdr->chunks = 0;
> +		vb->unused_page_chunk = buf +
> +				sizeof(struct virtio_balloon_miscq_hdr) +
> +				sizeof(struct virtio_balloon_page_chunk_hdr);
> +	}
> +}
> +
>  static int virtballoon_probe(struct virtio_device *vdev)
>  {
>  	struct virtio_balloon *vb;
> @@ -869,6 +1046,9 @@ static int virtballoon_probe(struct virtio_device *vdev)
>  	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_BALLOON_CHUNKS))
>  		balloon_page_chunk_init(vb);
>  
> +	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_MISC_VQ))
> +		miscq_init(vb);
> +
>  	mutex_init(&vb->balloon_lock);
>  	init_waitqueue_head(&vb->acked);
>  	vb->vdev = vdev;
> @@ -946,6 +1126,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
>  
>  	remove_common(vb);
>  	free_page_bmap(vb);
> +	kfree(vb->miscq_out_hdr);
> +	kfree(vb->miscq_in_hdr);
>  	if (vb->vb_dev_info.inode)
>  		iput(vb->vb_dev_info.inode);
>  	kfree(vb);
> @@ -987,6 +1169,7 @@ static unsigned int features[] = {
>  	VIRTIO_BALLOON_F_STATS_VQ,
>  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
>  	VIRTIO_BALLOON_F_BALLOON_CHUNKS,
> +	VIRTIO_BALLOON_F_MISC_VQ,
>  };
>  
>  static struct virtio_driver virtio_balloon_driver = {
> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> index be317b7..96bdc86 100644
> --- a/include/uapi/linux/virtio_balloon.h
> +++ b/include/uapi/linux/virtio_balloon.h
> @@ -35,6 +35,7 @@
>  #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
>  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
>  #define VIRTIO_BALLOON_F_BALLOON_CHUNKS 3 /* Inflate/Deflate pages in chunks */
> +#define VIRTIO_BALLOON_F_MISC_VQ	4 /* Virtqueue for misc. requests */

Is "misc" the best we can do? I think these are
actually host commands - aren't they?

>  
>  /* Size of a PFN in the balloon interface. */
>  #define VIRTIO_BALLOON_PFN_SHIFT 12
> @@ -95,4 +96,11 @@ struct virtio_balloon_page_chunk {
>  	__le64 size;
>  };
>  
> +#define VIRTIO_BALLOON_MISCQ_INQUIRE_UNUSED_PAGES 0

meaning what? Is this a command value? Is this a command
to report unused memory then? Let's call it this then.


> +#define VIRTIO_BALLOON_MISCQ_F_COMPLETE 0x1

meaning what?

> +struct virtio_balloon_miscq_hdr {
> +	__le16 cmd;
> +	__le16 flags;

Add padding to make it full 64 bit.

> +};
> +
>  #endif /* _LINUX_VIRTIO_BALLOON_H */
> -- 
> 2.7.4
Wang, Wei W April 27, 2017, 6:33 a.m. UTC | #2
On 04/14/2017 01:08 AM, Michael S. Tsirkin wrote:
> On Thu, Apr 13, 2017 at 05:35:08PM +0800, Wei Wang wrote:
>> Add a new vq, miscq, to handle miscellaneous requests between the device
>> and the driver.
>>
>> This patch implemnts the VIRTIO_BALLOON_MISCQ_INQUIRE_UNUSED_PAGES
> implements
>
>> request sent from the device.
> Commands are sent from host and handled on guest.
> In fact how is this so different from stats?
> How about reusing the stats vq then? You can use one buffer
> for stats and one buffer for commands.
>

The meaning of the two vqs is a little different. statq is used for
reporting statistics, while miscq is intended to be used to handle
miscellaneous requests from the guest or host (I think it can
also be used the other way around in the future when other
new features are added which need the guest to send requests
and the host to provide responses).

I would prefer to have them separate, because:
If we plan to combine them, we need to put the previous statq
related implementation under miscq with a new command (I think
we can't combine them without using commands to distinguish
the two features).
In this way, an old driver won't work with a new QEMU or a new
driver won't work with an old QEMU. Would this be considered
as an issue here?



>
>> +	miscq_out_hdr->flags = 0;
>> +
>> +	for_each_populated_zone(zone) {
>> +		for (order = MAX_ORDER - 1; order > 0; order--) {
>> +			for (migratetype = 0; migratetype < MIGRATE_TYPES;
>> +			     migratetype++) {
>> +				do {
>> +					ret = inquire_unused_page_block(zone,
>> +						order, migratetype, &page);
>> +					if (!ret) {
>> +						pfn = (u64)page_to_pfn(page);
>> +						add_one_chunk(vb, vq,
>> +							PAGE_CHUNK_TYPE_UNUSED,
>> +							pfn,
>> +							(u64)(1 << order));
>> +					}
>> +				} while (!ret);
>> +			}
>> +		}
>> +	}
>> +	miscq_out_hdr->flags |= VIRTIO_BALLOON_MISCQ_F_COMPLETE;
> And where is miscq_out_hdr used? I see no add_outbuf anywhere.
>
> Things like this should be passed through function parameters
> and not stuffed into device structure, fields should be
> initialized before use and not where we happen to
> have the data handy.
>

miscq_out_hdr is linear with the payload (i.e. kmalloc(hdr+payload) ).
It is the same as the use of statq - one request in-flight each time.


>
> Also, _F_ is normally a bit number, you use it as a value here.
>
It intends to be a bit number. Bit 0 of flags to indicate the completion
of handling the request.
Michael S. Tsirkin May 5, 2017, 10:21 p.m. UTC | #3
On Thu, Apr 27, 2017 at 02:33:44PM +0800, Wei Wang wrote:
> On 04/14/2017 01:08 AM, Michael S. Tsirkin wrote:
> > On Thu, Apr 13, 2017 at 05:35:08PM +0800, Wei Wang wrote:
> > > Add a new vq, miscq, to handle miscellaneous requests between the device
> > > and the driver.
> > > 
> > > This patch implemnts the VIRTIO_BALLOON_MISCQ_INQUIRE_UNUSED_PAGES
> > implements
> > 
> > > request sent from the device.
> > Commands are sent from host and handled on guest.
> > In fact how is this so different from stats?
> > How about reusing the stats vq then? You can use one buffer
> > for stats and one buffer for commands.
> > 
> 
> The meaning of the two vqs is a little different. statq is used for
> reporting statistics, while miscq is intended to be used to handle
> miscellaneous requests from the guest or host

misc just means "anything goes". If you want it to mean
"commands" name it so.

> (I think it can
> also be used the other way around in the future when other
> new features are added which need the guest to send requests
> and the host to provide responses).
> 
> I would prefer to have them separate, because:
> If we plan to combine them, we need to put the previous statq
> related implementation under miscq with a new command (I think
> we can't combine them without using commands to distinguish
> the two features).

Right.

> In this way, an old driver won't work with a new QEMU or a new
> driver won't work with an old QEMU. Would this be considered
> as an issue here?

Compatibility is and should always be handled using
feature flags.  There's a feature flag for this, isn't it?

> 
> 
> > 
> > > +	miscq_out_hdr->flags = 0;
> > > +
> > > +	for_each_populated_zone(zone) {
> > > +		for (order = MAX_ORDER - 1; order > 0; order--) {
> > > +			for (migratetype = 0; migratetype < MIGRATE_TYPES;
> > > +			     migratetype++) {
> > > +				do {
> > > +					ret = inquire_unused_page_block(zone,
> > > +						order, migratetype, &page);
> > > +					if (!ret) {
> > > +						pfn = (u64)page_to_pfn(page);
> > > +						add_one_chunk(vb, vq,
> > > +							PAGE_CHUNK_TYPE_UNUSED,
> > > +							pfn,
> > > +							(u64)(1 << order));
> > > +					}
> > > +				} while (!ret);
> > > +			}
> > > +		}
> > > +	}
> > > +	miscq_out_hdr->flags |= VIRTIO_BALLOON_MISCQ_F_COMPLETE;
> > And where is miscq_out_hdr used? I see no add_outbuf anywhere.
> > 
> > Things like this should be passed through function parameters
> > and not stuffed into device structure, fields should be
> > initialized before use and not where we happen to
> > have the data handy.
> > 
> 
> miscq_out_hdr is linear with the payload (i.e. kmalloc(hdr+payload) ).
> It is the same as the use of statq - one request in-flight each time.
> 
> 
> > 
> > Also, _F_ is normally a bit number, you use it as a value here.
> > 
> It intends to be a bit number. Bit 0 of flags to indicate the completion
> of handling the request.
Wang, Wei W May 7, 2017, 4:20 a.m. UTC | #4
On 05/06/2017 06:21 AM, Michael S. Tsirkin wrote:
> On Thu, Apr 27, 2017 at 02:33:44PM +0800, Wei Wang wrote:
> > On 04/14/2017 01:08 AM, Michael S. Tsirkin wrote:
> > > On Thu, Apr 13, 2017 at 05:35:08PM +0800, Wei Wang wrote:
> > > > Add a new vq, miscq, to handle miscellaneous requests between the
> > > > device and the driver.
> > > >
> > > > This patch implemnts the
> VIRTIO_BALLOON_MISCQ_INQUIRE_UNUSED_PAGES
> > > implements
> > >
> > > > request sent from the device.
> > > Commands are sent from host and handled on guest.
> > > In fact how is this so different from stats?
> > > How about reusing the stats vq then? You can use one buffer for
> > > stats and one buffer for commands.
> > >
> >
> > The meaning of the two vqs is a little different. statq is used for
> > reporting statistics, while miscq is intended to be used to handle
> > miscellaneous requests from the guest or host
> 
> misc just means "anything goes". If you want it to mean "commands" name it so.

Ok, will change it.

> > (I think it can
> > also be used the other way around in the future when other new
> > features are added which need the guest to send requests and the host
> > to provide responses).
> >
> > I would prefer to have them separate, because:
> > If we plan to combine them, we need to put the previous statq related
> > implementation under miscq with a new command (I think we can't
> > combine them without using commands to distinguish the two features).
> 
> Right.

> > In this way, an old driver won't work with a new QEMU or a new driver
> > won't work with an old QEMU. Would this be considered as an issue
> > here?
> 
> Compatibility is and should always be handled using feature flags.  There's a
> feature flag for this, isn't it?

The negotiation of the existing feature flag, VIRTIO_BALLOON_F_STATS_VQ
only indicates the support of the old statq implementation. To move the statq
implementation under cmdq, I think we would need a new feature flag for the
new statq implementation:
#define VIRTIO_BALLOON_F_CMDQ_STATS      5

What do you think?

Best,
Wei
diff mbox

Patch

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 5e2e7cc..95c703e 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -56,11 +56,12 @@  static struct vfsmount *balloon_mnt;
 
 /* Types of pages to chunk */
 #define PAGE_CHUNK_TYPE_BALLOON 0
+#define PAGE_CHUNK_TYPE_UNUSED 1
 
 #define MAX_PAGE_CHUNKS 4096
 struct virtio_balloon {
 	struct virtio_device *vdev;
-	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
+	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *miscq;
 
 	/* The balloon servicing is delegated to a freezable workqueue. */
 	struct work_struct update_balloon_stats_work;
@@ -94,6 +95,19 @@  struct virtio_balloon {
 	struct virtio_balloon_page_chunk_hdr *balloon_page_chunk_hdr;
 	struct virtio_balloon_page_chunk *balloon_page_chunk;
 
+	/*
+	 * Buffer for PAGE_CHUNK_TYPE_UNUSED:
+	 * virtio_balloon_miscq_hdr +
+	 * virtio_balloon_page_chunk_hdr +
+	 * virtio_balloon_page_chunk * MAX_PAGE_CHUNKS
+	 */
+	struct virtio_balloon_miscq_hdr *miscq_out_hdr;
+	struct virtio_balloon_page_chunk_hdr *unused_page_chunk_hdr;
+	struct virtio_balloon_page_chunk *unused_page_chunk;
+
+	/* Buffer for host to send cmd to miscq */
+	struct virtio_balloon_miscq_hdr *miscq_in_hdr;
+
 	/* Bitmap used to record pages */
 	unsigned long *page_bmap[PAGE_BMAP_COUNT_MAX];
 	/* Number of the allocated page_bmap */
@@ -220,6 +234,10 @@  static void send_page_chunks(struct virtio_balloon *vb, struct virtqueue *vq,
 		hdr = vb->balloon_page_chunk_hdr;
 		len = 0;
 		break;
+	case PAGE_CHUNK_TYPE_UNUSED:
+		hdr = vb->unused_page_chunk_hdr;
+		len = sizeof(struct virtio_balloon_miscq_hdr);
+		break;
 	default:
 		dev_warn(&vb->vdev->dev, "%s: chunk %d of unknown pages\n",
 			 __func__, type);
@@ -254,6 +272,10 @@  static void add_one_chunk(struct virtio_balloon *vb, struct virtqueue *vq,
 		hdr = vb->balloon_page_chunk_hdr;
 		chunk = vb->balloon_page_chunk;
 		break;
+	case PAGE_CHUNK_TYPE_UNUSED:
+		hdr = vb->unused_page_chunk_hdr;
+		chunk = vb->unused_page_chunk;
+		break;
 	default:
 		dev_warn(&vb->vdev->dev, "%s: chunk %d of unknown pages\n",
 			 __func__, type);
@@ -686,28 +708,139 @@  static void update_balloon_size_func(struct work_struct *work)
 		queue_work(system_freezable_wq, work);
 }
 
+static void miscq_in_hdr_add(struct virtio_balloon *vb)
+{
+	struct scatterlist sg_in;
+
+	sg_init_one(&sg_in, vb->miscq_in_hdr,
+		    sizeof(struct virtio_balloon_miscq_hdr));
+	if (virtqueue_add_inbuf(vb->miscq, &sg_in, 1, vb->miscq_in_hdr,
+	    GFP_KERNEL) < 0) {
+		__virtio_clear_bit(vb->vdev,
+				   VIRTIO_BALLOON_F_MISC_VQ);
+		dev_warn(&vb->vdev->dev, "%s: add miscq_in_hdr err\n",
+			 __func__);
+		return;
+	}
+	virtqueue_kick(vb->miscq);
+}
+
+static void miscq_send_unused_pages(struct virtio_balloon *vb)
+{
+	struct virtio_balloon_miscq_hdr *miscq_out_hdr = vb->miscq_out_hdr;
+	struct virtqueue *vq = vb->miscq;
+	int ret = 0;
+	unsigned int order = 0, migratetype = 0;
+	struct zone *zone = NULL;
+	struct page *page = NULL;
+	u64 pfn;
+
+	miscq_out_hdr->cmd =  VIRTIO_BALLOON_MISCQ_INQUIRE_UNUSED_PAGES;
+	miscq_out_hdr->flags = 0;
+
+	for_each_populated_zone(zone) {
+		for (order = MAX_ORDER - 1; order > 0; order--) {
+			for (migratetype = 0; migratetype < MIGRATE_TYPES;
+			     migratetype++) {
+				do {
+					ret = inquire_unused_page_block(zone,
+						order, migratetype, &page);
+					if (!ret) {
+						pfn = (u64)page_to_pfn(page);
+						add_one_chunk(vb, vq,
+							PAGE_CHUNK_TYPE_UNUSED,
+							pfn,
+							(u64)(1 << order));
+					}
+				} while (!ret);
+			}
+		}
+	}
+	miscq_out_hdr->flags |= VIRTIO_BALLOON_MISCQ_F_COMPLETE;
+	send_page_chunks(vb, vq, PAGE_CHUNK_TYPE_UNUSED, true);
+}
+
+static void miscq_handle(struct virtqueue *vq)
+{
+	struct virtio_balloon *vb = vq->vdev->priv;
+	struct virtio_balloon_miscq_hdr *hdr;
+	unsigned int len;
+
+	hdr = virtqueue_get_buf(vb->miscq, &len);
+	if (!hdr || len != sizeof(struct virtio_balloon_miscq_hdr)) {
+		dev_warn(&vb->vdev->dev, "%s: invalid miscq hdr len\n",
+			 __func__);
+		miscq_in_hdr_add(vb);
+		return;
+	}
+	switch (hdr->cmd) {
+	case VIRTIO_BALLOON_MISCQ_INQUIRE_UNUSED_PAGES:
+		miscq_send_unused_pages(vb);
+		break;
+	default:
+		dev_warn(&vb->vdev->dev, "%s: miscq cmd %d not supported\n",
+			 __func__, hdr->cmd);
+	}
+	miscq_in_hdr_add(vb);
+}
+
 static int init_vqs(struct virtio_balloon *vb)
 {
-	struct virtqueue *vqs[3];
-	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
-	static const char * const names[] = { "inflate", "deflate", "stats" };
-	int err, nvqs;
+	struct virtqueue **vqs;
+	vq_callback_t **callbacks;
+	const char **names;
+	int err = -ENOMEM;
+	int i, nvqs;
+
+	 /* Inflateq and deflateq are used unconditionally */
+	nvqs = 2;
+
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ))
+		nvqs++;
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_MISC_VQ))
+		nvqs++;
+
+	/* Allocate space for find_vqs parameters */
+	vqs = kcalloc(nvqs, sizeof(*vqs), GFP_KERNEL);
+	if (!vqs)
+		goto err_vq;
+	callbacks = kmalloc_array(nvqs, sizeof(*callbacks), GFP_KERNEL);
+	if (!callbacks)
+		goto err_callback;
+	names = kmalloc_array(nvqs, sizeof(*names), GFP_KERNEL);
+	if (!names)
+		goto err_names;
+
+	callbacks[0] = balloon_ack;
+	names[0] = "inflate";
+	callbacks[1] = balloon_ack;
+	names[1] = "deflate";
+
+	i = 2;
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+		callbacks[i] = stats_request;
+		names[i] = "stats";
+		i++;
+	}
 
-	/*
-	 * We expect two virtqueues: inflate and deflate, and
-	 * optionally stat.
-	 */
-	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
-	err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names);
+	if (virtio_has_feature(vb->vdev,
+				      VIRTIO_BALLOON_F_MISC_VQ)) {
+		callbacks[i] = miscq_handle;
+		names[i] = "miscq";
+	}
+
+	err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks,
+					 names);
 	if (err)
-		return err;
+		goto err_find;
 
 	vb->inflate_vq = vqs[0];
 	vb->deflate_vq = vqs[1];
+	i = 2;
 	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
 		struct scatterlist sg;
-		vb->stats_vq = vqs[2];
 
+		vb->stats_vq = vqs[i++];
 		/*
 		 * Prime this virtqueue with one buffer so the hypervisor can
 		 * use it to signal us later (it can't be broken yet!).
@@ -718,7 +851,25 @@  static int init_vqs(struct virtio_balloon *vb)
 			BUG();
 		virtqueue_kick(vb->stats_vq);
 	}
+
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_MISC_VQ)) {
+		vb->miscq = vqs[i];
+		miscq_in_hdr_add(vb);
+	}
+
+	kfree(names);
+	kfree(callbacks);
+	kfree(vqs);
 	return 0;
+
+err_find:
+	kfree(names);
+err_names:
+	kfree(callbacks);
+err_callback:
+	kfree(vqs);
+err_vq:
+	return err;
 }
 
 #ifdef CONFIG_BALLOON_COMPACTION
@@ -843,6 +994,32 @@  static void balloon_page_chunk_init(struct virtio_balloon *vb)
 	}
 }
 
+static void miscq_init(struct virtio_balloon *vb)
+{
+	void *buf;
+
+	vb->miscq_in_hdr = kmalloc(sizeof(struct virtio_balloon_miscq_hdr),
+				   GFP_KERNEL);
+	buf = kmalloc(sizeof(struct virtio_balloon_miscq_hdr) +
+		      sizeof(struct virtio_balloon_page_chunk_hdr) +
+		      sizeof(struct virtio_balloon_page_chunk) *
+		      MAX_PAGE_CHUNKS, GFP_KERNEL);
+	if (!vb->miscq_in_hdr || !buf) {
+		kfree(buf);
+		kfree(vb->miscq_in_hdr);
+		__virtio_clear_bit(vb->vdev, VIRTIO_BALLOON_F_MISC_VQ);
+		dev_warn(&vb->vdev->dev, "%s: failed\n", __func__);
+	} else {
+		vb->miscq_out_hdr = buf;
+		vb->unused_page_chunk_hdr = buf +
+				sizeof(struct virtio_balloon_miscq_hdr);
+		vb->unused_page_chunk_hdr->chunks = 0;
+		vb->unused_page_chunk = buf +
+				sizeof(struct virtio_balloon_miscq_hdr) +
+				sizeof(struct virtio_balloon_page_chunk_hdr);
+	}
+}
+
 static int virtballoon_probe(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb;
@@ -869,6 +1046,9 @@  static int virtballoon_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_BALLOON_CHUNKS))
 		balloon_page_chunk_init(vb);
 
+	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_MISC_VQ))
+		miscq_init(vb);
+
 	mutex_init(&vb->balloon_lock);
 	init_waitqueue_head(&vb->acked);
 	vb->vdev = vdev;
@@ -946,6 +1126,8 @@  static void virtballoon_remove(struct virtio_device *vdev)
 
 	remove_common(vb);
 	free_page_bmap(vb);
+	kfree(vb->miscq_out_hdr);
+	kfree(vb->miscq_in_hdr);
 	if (vb->vb_dev_info.inode)
 		iput(vb->vb_dev_info.inode);
 	kfree(vb);
@@ -987,6 +1169,7 @@  static unsigned int features[] = {
 	VIRTIO_BALLOON_F_STATS_VQ,
 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
 	VIRTIO_BALLOON_F_BALLOON_CHUNKS,
+	VIRTIO_BALLOON_F_MISC_VQ,
 };
 
 static struct virtio_driver virtio_balloon_driver = {
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index be317b7..96bdc86 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -35,6 +35,7 @@ 
 #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
 #define VIRTIO_BALLOON_F_BALLOON_CHUNKS 3 /* Inflate/Deflate pages in chunks */
+#define VIRTIO_BALLOON_F_MISC_VQ	4 /* Virtqueue for misc. requests */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
@@ -95,4 +96,11 @@  struct virtio_balloon_page_chunk {
 	__le64 size;
 };
 
+#define VIRTIO_BALLOON_MISCQ_INQUIRE_UNUSED_PAGES 0
+#define VIRTIO_BALLOON_MISCQ_F_COMPLETE 0x1
+struct virtio_balloon_miscq_hdr {
+	__le16 cmd;
+	__le16 flags;
+};
+
 #endif /* _LINUX_VIRTIO_BALLOON_H */