diff mbox series

[v2,5/5] virtio-balloon: Add support for providing page hints to host

Message ID 20190724170514.6685.17161.stgit@localhost.localdomain (mailing list archive)
State New, archived
Headers show
Series mm / virtio: Provide support for page hinting | expand

Commit Message

Alexander Duyck July 24, 2019, 5:05 p.m. UTC
From: Alexander Duyck <alexander.h.duyck@linux.intel.com>

Add support for the page hinting feature provided by virtio-balloon.
Hinting differs from the regular balloon functionality in that is is
much less durable than a standard memory balloon. Instead of creating a
list of pages that cannot be accessed the pages are only inaccessible
while they are being indicated to the virtio interface. Once the
interface has acknowledged them they are placed back into their respective
free lists and are once again accessible by the guest system.

Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
---
 drivers/virtio/Kconfig              |    1 +
 drivers/virtio/virtio_balloon.c     |   47 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/virtio_balloon.h |    1 +
 3 files changed, 49 insertions(+)

Comments

Michael S. Tsirkin July 24, 2019, 7:02 p.m. UTC | #1
On Wed, Jul 24, 2019 at 10:05:14AM -0700, Alexander Duyck wrote:
> From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> 
> Add support for the page hinting feature provided by virtio-balloon.
> Hinting differs from the regular balloon functionality in that is is
> much less durable than a standard memory balloon. Instead of creating a
> list of pages that cannot be accessed the pages are only inaccessible
> while they are being indicated to the virtio interface. Once the
> interface has acknowledged them they are placed back into their respective
> free lists and are once again accessible by the guest system.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>

Looking at the design, it seems that hinted pages can immediately be
reused. I wonder how we can efficiently support this
with kvm when poisoning is in effect. Of course we can just
ignore the poison. However it seems cleaner to
1. verify page is poisoned with the correct value
2. fill the page with the correct value on fault

Requirement 2 requires some kind of madvise that
will save the poison e.g. in the VMA.

Not a blocker for sure ... 


> ---
>  drivers/virtio/Kconfig              |    1 +
>  drivers/virtio/virtio_balloon.c     |   47 +++++++++++++++++++++++++++++++++++
>  include/uapi/linux/virtio_balloon.h |    1 +
>  3 files changed, 49 insertions(+)
> 
> diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
> index 078615cf2afc..d45556ae1f81 100644
> --- a/drivers/virtio/Kconfig
> +++ b/drivers/virtio/Kconfig
> @@ -58,6 +58,7 @@ config VIRTIO_BALLOON
>  	tristate "Virtio balloon driver"
>  	depends on VIRTIO
>  	select MEMORY_BALLOON
> +	select PAGE_HINTING
>  	---help---
>  	 This driver supports increasing and decreasing the amount
>  	 of memory within a KVM guest.
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 226fbb995fb0..dee9f8f3ad09 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -19,6 +19,7 @@
>  #include <linux/mount.h>
>  #include <linux/magic.h>
>  #include <linux/pseudo_fs.h>
> +#include <linux/page_hinting.h>
>  
>  /*
>   * Balloon device works in 4K page units.  So each page is pointed to by
> @@ -27,6 +28,7 @@
>   */
>  #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
>  #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
> +#define VIRTIO_BALLOON_ARRAY_HINTS_MAX	32
>  #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
>  
>  #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
> @@ -46,6 +48,7 @@ enum virtio_balloon_vq {
>  	VIRTIO_BALLOON_VQ_DEFLATE,
>  	VIRTIO_BALLOON_VQ_STATS,
>  	VIRTIO_BALLOON_VQ_FREE_PAGE,
> +	VIRTIO_BALLOON_VQ_HINTING,
>  	VIRTIO_BALLOON_VQ_MAX
>  };
>  
> @@ -113,6 +116,10 @@ struct virtio_balloon {
>  
>  	/* To register a shrinker to shrink memory upon memory pressure */
>  	struct shrinker shrinker;
> +
> +	/* Unused page hinting device */
> +	struct virtqueue *hinting_vq;
> +	struct page_hinting_dev_info ph_dev_info;
>  };
>  
>  static struct virtio_device_id id_table[] = {
> @@ -152,6 +159,22 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
>  
>  }
>  
> +void virtballoon_page_hinting_react(struct page_hinting_dev_info *ph_dev_info,
> +				    unsigned int num_hints)
> +{
> +	struct virtio_balloon *vb =
> +		container_of(ph_dev_info, struct virtio_balloon, ph_dev_info);
> +	struct virtqueue *vq = vb->hinting_vq;
> +	unsigned int unused;
> +
> +	/* We should always be able to add these buffers to an empty queue. */


can be an out of memory condition, and then ...

> +	virtqueue_add_inbuf(vq, ph_dev_info->sg, num_hints, vb, GFP_KERNEL);
> +	virtqueue_kick(vq);

... this will block forever.

> +	/* When host has read buffer, this completes via balloon_ack */
> +	wait_event(vb->acked, virtqueue_get_buf(vq, &unused));

However below I suggest limiting capacity which will solve
this problem for you.



> +}
> +
>  static void set_page_pfns(struct virtio_balloon *vb,
>  			  __virtio32 pfns[], struct page *page)
>  {
> @@ -476,6 +499,7 @@ static int init_vqs(struct virtio_balloon *vb)
>  	names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
>  	names[VIRTIO_BALLOON_VQ_STATS] = NULL;
>  	names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
> +	names[VIRTIO_BALLOON_VQ_HINTING] = NULL;
>  
>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
>  		names[VIRTIO_BALLOON_VQ_STATS] = "stats";
> @@ -487,11 +511,19 @@ static int init_vqs(struct virtio_balloon *vb)
>  		callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
>  	}
>  
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
> +		names[VIRTIO_BALLOON_VQ_HINTING] = "hinting_vq";
> +		callbacks[VIRTIO_BALLOON_VQ_HINTING] = balloon_ack;
> +	}
> +
>  	err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
>  					 vqs, callbacks, names, NULL, NULL);
>  	if (err)
>  		return err;
>  
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
> +		vb->hinting_vq = vqs[VIRTIO_BALLOON_VQ_HINTING];
> +
>  	vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
>  	vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> @@ -924,12 +956,24 @@ static int virtballoon_probe(struct virtio_device *vdev)
>  		if (err)
>  			goto out_del_balloon_wq;
>  	}
> +
> +	vb->ph_dev_info.react = virtballoon_page_hinting_react;
> +	vb->ph_dev_info.capacity = VIRTIO_BALLOON_ARRAY_HINTS_MAX;

As explained above I think you should limit this by vq size.
Otherwise virtqueue add buf might fail.
In fact by struct spec reading you need to limit it
anyway otherwise it will fail unconditionally.
In practice on most hypervisors it will typically work ...

> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
> +		err = page_hinting_startup(&vb->ph_dev_info);
> +		if (err)
> +			goto out_unregister_shrinker;
> +	}
> +
>  	virtio_device_ready(vdev);
>  
>  	if (towards_target(vb))
>  		virtballoon_changed(vdev);
>  	return 0;
>  
> +out_unregister_shrinker:
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> +		virtio_balloon_unregister_shrinker(vb);
>  out_del_balloon_wq:
>  	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
>  		destroy_workqueue(vb->balloon_wq);
> @@ -958,6 +1002,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
>  {
>  	struct virtio_balloon *vb = vdev->priv;
>  
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
> +		page_hinting_shutdown(&vb->ph_dev_info);
>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
>  		virtio_balloon_unregister_shrinker(vb);
>  	spin_lock_irq(&vb->stop_update_lock);
> @@ -1027,6 +1073,7 @@ static int virtballoon_validate(struct virtio_device *vdev)
>  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
>  	VIRTIO_BALLOON_F_FREE_PAGE_HINT,
>  	VIRTIO_BALLOON_F_PAGE_POISON,
> +	VIRTIO_BALLOON_F_HINTING,
>  };
>  
>  static struct virtio_driver virtio_balloon_driver = {
> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> index a1966cd7b677..2b0f62814e22 100644
> --- a/include/uapi/linux/virtio_balloon.h
> +++ b/include/uapi/linux/virtio_balloon.h
> @@ -36,6 +36,7 @@
>  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
>  #define VIRTIO_BALLOON_F_FREE_PAGE_HINT	3 /* VQ to report free pages */
>  #define VIRTIO_BALLOON_F_PAGE_POISON	4 /* Guest is using page poisoning */
> +#define VIRTIO_BALLOON_F_HINTING	5 /* Page hinting virtqueue */
>  
>  /* Size of a PFN in the balloon interface. */
>  #define VIRTIO_BALLOON_PFN_SHIFT 12
Nitesh Narayan Lal July 24, 2019, 7:07 p.m. UTC | #2
On 7/24/19 3:02 PM, Michael S. Tsirkin wrote:
> On Wed, Jul 24, 2019 at 10:05:14AM -0700, Alexander Duyck wrote:
>> From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
>>
>> Add support for the page hinting feature provided by virtio-balloon.
>> Hinting differs from the regular balloon functionality in that is is
>> much less durable than a standard memory balloon. Instead of creating a
>> list of pages that cannot be accessed the pages are only inaccessible
>> while they are being indicated to the virtio interface. Once the
>> interface has acknowledged them they are placed back into their respective
>> free lists and are once again accessible by the guest system.
>>
>> Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> Looking at the design, it seems that hinted pages can immediately be
> reused. I wonder how we can efficiently support this
> with kvm when poisoning is in effect. Of course we can just
> ignore the poison. However it seems cleaner to
> 1. verify page is poisoned with the correct value
> 2. fill the page with the correct value on fault
Once VIRTIO_BALLOON_F_PAGE_POISON user side support is available.
Can't we just use that at the time of initialization?
> Requirement 2 requires some kind of madvise that
> will save the poison e.g. in the VMA.
>
> Not a blocker for sure ... 
>
>
>> ---
>>  drivers/virtio/Kconfig              |    1 +
>>  drivers/virtio/virtio_balloon.c     |   47 +++++++++++++++++++++++++++++++++++
>>  include/uapi/linux/virtio_balloon.h |    1 +
>>  3 files changed, 49 insertions(+)
>>
>> diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
>> index 078615cf2afc..d45556ae1f81 100644
>> --- a/drivers/virtio/Kconfig
>> +++ b/drivers/virtio/Kconfig
>> @@ -58,6 +58,7 @@ config VIRTIO_BALLOON
>>  	tristate "Virtio balloon driver"
>>  	depends on VIRTIO
>>  	select MEMORY_BALLOON
>> +	select PAGE_HINTING
>>  	---help---
>>  	 This driver supports increasing and decreasing the amount
>>  	 of memory within a KVM guest.
>> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
>> index 226fbb995fb0..dee9f8f3ad09 100644
>> --- a/drivers/virtio/virtio_balloon.c
>> +++ b/drivers/virtio/virtio_balloon.c
>> @@ -19,6 +19,7 @@
>>  #include <linux/mount.h>
>>  #include <linux/magic.h>
>>  #include <linux/pseudo_fs.h>
>> +#include <linux/page_hinting.h>
>>  
>>  /*
>>   * Balloon device works in 4K page units.  So each page is pointed to by
>> @@ -27,6 +28,7 @@
>>   */
>>  #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
>>  #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
>> +#define VIRTIO_BALLOON_ARRAY_HINTS_MAX	32
>>  #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
>>  
>>  #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
>> @@ -46,6 +48,7 @@ enum virtio_balloon_vq {
>>  	VIRTIO_BALLOON_VQ_DEFLATE,
>>  	VIRTIO_BALLOON_VQ_STATS,
>>  	VIRTIO_BALLOON_VQ_FREE_PAGE,
>> +	VIRTIO_BALLOON_VQ_HINTING,
>>  	VIRTIO_BALLOON_VQ_MAX
>>  };
>>  
>> @@ -113,6 +116,10 @@ struct virtio_balloon {
>>  
>>  	/* To register a shrinker to shrink memory upon memory pressure */
>>  	struct shrinker shrinker;
>> +
>> +	/* Unused page hinting device */
>> +	struct virtqueue *hinting_vq;
>> +	struct page_hinting_dev_info ph_dev_info;
>>  };
>>  
>>  static struct virtio_device_id id_table[] = {
>> @@ -152,6 +159,22 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
>>  
>>  }
>>  
>> +void virtballoon_page_hinting_react(struct page_hinting_dev_info *ph_dev_info,
>> +				    unsigned int num_hints)
>> +{
>> +	struct virtio_balloon *vb =
>> +		container_of(ph_dev_info, struct virtio_balloon, ph_dev_info);
>> +	struct virtqueue *vq = vb->hinting_vq;
>> +	unsigned int unused;
>> +
>> +	/* We should always be able to add these buffers to an empty queue. */
>
> can be an out of memory condition, and then ...
>
>> +	virtqueue_add_inbuf(vq, ph_dev_info->sg, num_hints, vb, GFP_KERNEL);
>> +	virtqueue_kick(vq);
> ... this will block forever.
>
>> +	/* When host has read buffer, this completes via balloon_ack */
>> +	wait_event(vb->acked, virtqueue_get_buf(vq, &unused));
> However below I suggest limiting capacity which will solve
> this problem for you.
>
>
>
>> +}
>> +
>>  static void set_page_pfns(struct virtio_balloon *vb,
>>  			  __virtio32 pfns[], struct page *page)
>>  {
>> @@ -476,6 +499,7 @@ static int init_vqs(struct virtio_balloon *vb)
>>  	names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
>>  	names[VIRTIO_BALLOON_VQ_STATS] = NULL;
>>  	names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
>> +	names[VIRTIO_BALLOON_VQ_HINTING] = NULL;
>>  
>>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
>>  		names[VIRTIO_BALLOON_VQ_STATS] = "stats";
>> @@ -487,11 +511,19 @@ static int init_vqs(struct virtio_balloon *vb)
>>  		callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
>>  	}
>>  
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
>> +		names[VIRTIO_BALLOON_VQ_HINTING] = "hinting_vq";
>> +		callbacks[VIRTIO_BALLOON_VQ_HINTING] = balloon_ack;
>> +	}
>> +
>>  	err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
>>  					 vqs, callbacks, names, NULL, NULL);
>>  	if (err)
>>  		return err;
>>  
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
>> +		vb->hinting_vq = vqs[VIRTIO_BALLOON_VQ_HINTING];
>> +
>>  	vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
>>  	vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
>>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
>> @@ -924,12 +956,24 @@ static int virtballoon_probe(struct virtio_device *vdev)
>>  		if (err)
>>  			goto out_del_balloon_wq;
>>  	}
>> +
>> +	vb->ph_dev_info.react = virtballoon_page_hinting_react;
>> +	vb->ph_dev_info.capacity = VIRTIO_BALLOON_ARRAY_HINTS_MAX;
> As explained above I think you should limit this by vq size.
> Otherwise virtqueue add buf might fail.
> In fact by struct spec reading you need to limit it
> anyway otherwise it will fail unconditionally.
> In practice on most hypervisors it will typically work ...
>
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
>> +		err = page_hinting_startup(&vb->ph_dev_info);
>> +		if (err)
>> +			goto out_unregister_shrinker;
>> +	}
>> +
>>  	virtio_device_ready(vdev);
>>  
>>  	if (towards_target(vb))
>>  		virtballoon_changed(vdev);
>>  	return 0;
>>  
>> +out_unregister_shrinker:
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
>> +		virtio_balloon_unregister_shrinker(vb);
>>  out_del_balloon_wq:
>>  	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
>>  		destroy_workqueue(vb->balloon_wq);
>> @@ -958,6 +1002,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
>>  {
>>  	struct virtio_balloon *vb = vdev->priv;
>>  
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
>> +		page_hinting_shutdown(&vb->ph_dev_info);
>>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
>>  		virtio_balloon_unregister_shrinker(vb);
>>  	spin_lock_irq(&vb->stop_update_lock);
>> @@ -1027,6 +1073,7 @@ static int virtballoon_validate(struct virtio_device *vdev)
>>  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
>>  	VIRTIO_BALLOON_F_FREE_PAGE_HINT,
>>  	VIRTIO_BALLOON_F_PAGE_POISON,
>> +	VIRTIO_BALLOON_F_HINTING,
>>  };
>>  
>>  static struct virtio_driver virtio_balloon_driver = {
>> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
>> index a1966cd7b677..2b0f62814e22 100644
>> --- a/include/uapi/linux/virtio_balloon.h
>> +++ b/include/uapi/linux/virtio_balloon.h
>> @@ -36,6 +36,7 @@
>>  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
>>  #define VIRTIO_BALLOON_F_FREE_PAGE_HINT	3 /* VQ to report free pages */
>>  #define VIRTIO_BALLOON_F_PAGE_POISON	4 /* Guest is using page poisoning */
>> +#define VIRTIO_BALLOON_F_HINTING	5 /* Page hinting virtqueue */
>>  
>>  /* Size of a PFN in the balloon interface. */
>>  #define VIRTIO_BALLOON_PFN_SHIFT 12
Michael S. Tsirkin July 24, 2019, 7:26 p.m. UTC | #3
On Wed, Jul 24, 2019 at 03:07:42PM -0400, Nitesh Narayan Lal wrote:
> 
> On 7/24/19 3:02 PM, Michael S. Tsirkin wrote:
> > On Wed, Jul 24, 2019 at 10:05:14AM -0700, Alexander Duyck wrote:
> >> From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> >>
> >> Add support for the page hinting feature provided by virtio-balloon.
> >> Hinting differs from the regular balloon functionality in that is is
> >> much less durable than a standard memory balloon. Instead of creating a
> >> list of pages that cannot be accessed the pages are only inaccessible
> >> while they are being indicated to the virtio interface. Once the
> >> interface has acknowledged them they are placed back into their respective
> >> free lists and are once again accessible by the guest system.
> >>
> >> Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> > Looking at the design, it seems that hinted pages can immediately be
> > reused. I wonder how we can efficiently support this
> > with kvm when poisoning is in effect. Of course we can just
> > ignore the poison. However it seems cleaner to
> > 1. verify page is poisoned with the correct value
> > 2. fill the page with the correct value on fault
> Once VIRTIO_BALLOON_F_PAGE_POISON user side support is available.
> Can't we just use that at the time of initialization?

ATM VIRTIO_BALLOON_F_PAGE_POISON simply avoids freeing the pages at the
moment.

1+2 above are exactly a way to implement VIRTIO_BALLOON_F_PAGE_POISON
such that will still bring performance gains.

> > Requirement 2 requires some kind of madvise that
> > will save the poison e.g. in the VMA.
> >
> > Not a blocker for sure ... 
> >
> >
> >> ---
> >>  drivers/virtio/Kconfig              |    1 +
> >>  drivers/virtio/virtio_balloon.c     |   47 +++++++++++++++++++++++++++++++++++
> >>  include/uapi/linux/virtio_balloon.h |    1 +
> >>  3 files changed, 49 insertions(+)
> >>
> >> diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
> >> index 078615cf2afc..d45556ae1f81 100644
> >> --- a/drivers/virtio/Kconfig
> >> +++ b/drivers/virtio/Kconfig
> >> @@ -58,6 +58,7 @@ config VIRTIO_BALLOON
> >>  	tristate "Virtio balloon driver"
> >>  	depends on VIRTIO
> >>  	select MEMORY_BALLOON
> >> +	select PAGE_HINTING
> >>  	---help---
> >>  	 This driver supports increasing and decreasing the amount
> >>  	 of memory within a KVM guest.
> >> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> >> index 226fbb995fb0..dee9f8f3ad09 100644
> >> --- a/drivers/virtio/virtio_balloon.c
> >> +++ b/drivers/virtio/virtio_balloon.c
> >> @@ -19,6 +19,7 @@
> >>  #include <linux/mount.h>
> >>  #include <linux/magic.h>
> >>  #include <linux/pseudo_fs.h>
> >> +#include <linux/page_hinting.h>
> >>  
> >>  /*
> >>   * Balloon device works in 4K page units.  So each page is pointed to by
> >> @@ -27,6 +28,7 @@
> >>   */
> >>  #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
> >>  #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
> >> +#define VIRTIO_BALLOON_ARRAY_HINTS_MAX	32
> >>  #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
> >>  
> >>  #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
> >> @@ -46,6 +48,7 @@ enum virtio_balloon_vq {
> >>  	VIRTIO_BALLOON_VQ_DEFLATE,
> >>  	VIRTIO_BALLOON_VQ_STATS,
> >>  	VIRTIO_BALLOON_VQ_FREE_PAGE,
> >> +	VIRTIO_BALLOON_VQ_HINTING,
> >>  	VIRTIO_BALLOON_VQ_MAX
> >>  };
> >>  
> >> @@ -113,6 +116,10 @@ struct virtio_balloon {
> >>  
> >>  	/* To register a shrinker to shrink memory upon memory pressure */
> >>  	struct shrinker shrinker;
> >> +
> >> +	/* Unused page hinting device */
> >> +	struct virtqueue *hinting_vq;
> >> +	struct page_hinting_dev_info ph_dev_info;
> >>  };
> >>  
> >>  static struct virtio_device_id id_table[] = {
> >> @@ -152,6 +159,22 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
> >>  
> >>  }
> >>  
> >> +void virtballoon_page_hinting_react(struct page_hinting_dev_info *ph_dev_info,
> >> +				    unsigned int num_hints)
> >> +{
> >> +	struct virtio_balloon *vb =
> >> +		container_of(ph_dev_info, struct virtio_balloon, ph_dev_info);
> >> +	struct virtqueue *vq = vb->hinting_vq;
> >> +	unsigned int unused;
> >> +
> >> +	/* We should always be able to add these buffers to an empty queue. */
> >
> > can be an out of memory condition, and then ...
> >
> >> +	virtqueue_add_inbuf(vq, ph_dev_info->sg, num_hints, vb, GFP_KERNEL);
> >> +	virtqueue_kick(vq);
> > ... this will block forever.
> >
> >> +	/* When host has read buffer, this completes via balloon_ack */
> >> +	wait_event(vb->acked, virtqueue_get_buf(vq, &unused));
> > However below I suggest limiting capacity which will solve
> > this problem for you.
> >
> >
> >
> >> +}
> >> +
> >>  static void set_page_pfns(struct virtio_balloon *vb,
> >>  			  __virtio32 pfns[], struct page *page)
> >>  {
> >> @@ -476,6 +499,7 @@ static int init_vqs(struct virtio_balloon *vb)
> >>  	names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
> >>  	names[VIRTIO_BALLOON_VQ_STATS] = NULL;
> >>  	names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
> >> +	names[VIRTIO_BALLOON_VQ_HINTING] = NULL;
> >>  
> >>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> >>  		names[VIRTIO_BALLOON_VQ_STATS] = "stats";
> >> @@ -487,11 +511,19 @@ static int init_vqs(struct virtio_balloon *vb)
> >>  		callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
> >>  	}
> >>  
> >> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
> >> +		names[VIRTIO_BALLOON_VQ_HINTING] = "hinting_vq";
> >> +		callbacks[VIRTIO_BALLOON_VQ_HINTING] = balloon_ack;
> >> +	}
> >> +
> >>  	err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
> >>  					 vqs, callbacks, names, NULL, NULL);
> >>  	if (err)
> >>  		return err;
> >>  
> >> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
> >> +		vb->hinting_vq = vqs[VIRTIO_BALLOON_VQ_HINTING];
> >> +
> >>  	vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
> >>  	vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
> >>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> >> @@ -924,12 +956,24 @@ static int virtballoon_probe(struct virtio_device *vdev)
> >>  		if (err)
> >>  			goto out_del_balloon_wq;
> >>  	}
> >> +
> >> +	vb->ph_dev_info.react = virtballoon_page_hinting_react;
> >> +	vb->ph_dev_info.capacity = VIRTIO_BALLOON_ARRAY_HINTS_MAX;
> > As explained above I think you should limit this by vq size.
> > Otherwise virtqueue add buf might fail.
> > In fact by struct spec reading you need to limit it
> > anyway otherwise it will fail unconditionally.
> > In practice on most hypervisors it will typically work ...
> >
> >> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
> >> +		err = page_hinting_startup(&vb->ph_dev_info);
> >> +		if (err)
> >> +			goto out_unregister_shrinker;
> >> +	}
> >> +
> >>  	virtio_device_ready(vdev);
> >>  
> >>  	if (towards_target(vb))
> >>  		virtballoon_changed(vdev);
> >>  	return 0;
> >>  
> >> +out_unregister_shrinker:
> >> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> >> +		virtio_balloon_unregister_shrinker(vb);
> >>  out_del_balloon_wq:
> >>  	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
> >>  		destroy_workqueue(vb->balloon_wq);
> >> @@ -958,6 +1002,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
> >>  {
> >>  	struct virtio_balloon *vb = vdev->priv;
> >>  
> >> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
> >> +		page_hinting_shutdown(&vb->ph_dev_info);
> >>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> >>  		virtio_balloon_unregister_shrinker(vb);
> >>  	spin_lock_irq(&vb->stop_update_lock);
> >> @@ -1027,6 +1073,7 @@ static int virtballoon_validate(struct virtio_device *vdev)
> >>  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
> >>  	VIRTIO_BALLOON_F_FREE_PAGE_HINT,
> >>  	VIRTIO_BALLOON_F_PAGE_POISON,
> >> +	VIRTIO_BALLOON_F_HINTING,
> >>  };
> >>  
> >>  static struct virtio_driver virtio_balloon_driver = {
> >> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> >> index a1966cd7b677..2b0f62814e22 100644
> >> --- a/include/uapi/linux/virtio_balloon.h
> >> +++ b/include/uapi/linux/virtio_balloon.h
> >> @@ -36,6 +36,7 @@
> >>  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
> >>  #define VIRTIO_BALLOON_F_FREE_PAGE_HINT	3 /* VQ to report free pages */
> >>  #define VIRTIO_BALLOON_F_PAGE_POISON	4 /* Guest is using page poisoning */
> >> +#define VIRTIO_BALLOON_F_HINTING	5 /* Page hinting virtqueue */
> >>  
> >>  /* Size of a PFN in the balloon interface. */
> >>  #define VIRTIO_BALLOON_PFN_SHIFT 12
> -- 
> Thanks
> Nitesh
Alexander Duyck July 24, 2019, 8:37 p.m. UTC | #4
On Wed, 2019-07-24 at 15:02 -0400, Michael S. Tsirkin wrote:
> On Wed, Jul 24, 2019 at 10:05:14AM -0700, Alexander Duyck wrote:
> > From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> > 
> > Add support for the page hinting feature provided by virtio-balloon.
> > Hinting differs from the regular balloon functionality in that is is
> > much less durable than a standard memory balloon. Instead of creating a
> > list of pages that cannot be accessed the pages are only inaccessible
> > while they are being indicated to the virtio interface. Once the
> > interface has acknowledged them they are placed back into their respective
> > free lists and are once again accessible by the guest system.
> > 
> > Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> 
> Looking at the design, it seems that hinted pages can immediately be
> reused. I wonder how we can efficiently support this
> with kvm when poisoning is in effect. Of course we can just
> ignore the poison. However it seems cleaner to
> 1. verify page is poisoned with the correct value
> 2. fill the page with the correct value on fault
> 
> Requirement 2 requires some kind of madvise that
> will save the poison e.g. in the VMA.
> 
> Not a blocker for sure ... 

As per our discussion in the other patch I agree that we should either
ignore the hint/report if page poisoning is enabled, or page poisoning
should result in us poisoning the page when it is faulted back in. I had
assumed we were doing the latter, I didn't realize that is was just
disabling the free page hinting.

> > ---
> >  drivers/virtio/Kconfig              |    1 +
> >  drivers/virtio/virtio_balloon.c     |   47 +++++++++++++++++++++++++++++++++++
> >  include/uapi/linux/virtio_balloon.h |    1 +
> >  3 files changed, 49 insertions(+)
> > 
> > diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
> > index 078615cf2afc..d45556ae1f81 100644
> > --- a/drivers/virtio/Kconfig
> > +++ b/drivers/virtio/Kconfig
> > @@ -58,6 +58,7 @@ config VIRTIO_BALLOON
> >  	tristate "Virtio balloon driver"
> >  	depends on VIRTIO
> >  	select MEMORY_BALLOON
> > +	select PAGE_HINTING
> >  	---help---
> >  	 This driver supports increasing and decreasing the amount
> >  	 of memory within a KVM guest.
> > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> > index 226fbb995fb0..dee9f8f3ad09 100644
> > --- a/drivers/virtio/virtio_balloon.c
> > +++ b/drivers/virtio/virtio_balloon.c
> > @@ -19,6 +19,7 @@
> >  #include <linux/mount.h>
> >  #include <linux/magic.h>
> >  #include <linux/pseudo_fs.h>
> > +#include <linux/page_hinting.h>
> >  
> >  /*
> >   * Balloon device works in 4K page units.  So each page is pointed to by
> > @@ -27,6 +28,7 @@
> >   */
> >  #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
> >  #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
> > +#define VIRTIO_BALLOON_ARRAY_HINTS_MAX	32
> >  #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
> >  
> >  #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
> > @@ -46,6 +48,7 @@ enum virtio_balloon_vq {
> >  	VIRTIO_BALLOON_VQ_DEFLATE,
> >  	VIRTIO_BALLOON_VQ_STATS,
> >  	VIRTIO_BALLOON_VQ_FREE_PAGE,
> > +	VIRTIO_BALLOON_VQ_HINTING,
> >  	VIRTIO_BALLOON_VQ_MAX
> >  };
> >  
> > @@ -113,6 +116,10 @@ struct virtio_balloon {
> >  
> >  	/* To register a shrinker to shrink memory upon memory pressure */
> >  	struct shrinker shrinker;
> > +
> > +	/* Unused page hinting device */
> > +	struct virtqueue *hinting_vq;
> > +	struct page_hinting_dev_info ph_dev_info;
> >  };
> >  
> >  static struct virtio_device_id id_table[] = {
> > @@ -152,6 +159,22 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
> >  
> >  }
> >  
> > +void virtballoon_page_hinting_react(struct page_hinting_dev_info *ph_dev_info,
> > +				    unsigned int num_hints)
> > +{
> > +	struct virtio_balloon *vb =
> > +		container_of(ph_dev_info, struct virtio_balloon, ph_dev_info);
> > +	struct virtqueue *vq = vb->hinting_vq;
> > +	unsigned int unused;
> > +
> > +	/* We should always be able to add these buffers to an empty queue. */
> 
> can be an out of memory condition, and then ...
> 
> > +	virtqueue_add_inbuf(vq, ph_dev_info->sg, num_hints, vb, GFP_KERNEL);
> > +	virtqueue_kick(vq);
> 
> ... this will block forever.
> 
> > +	/* When host has read buffer, this completes via balloon_ack */
> > +	wait_event(vb->acked, virtqueue_get_buf(vq, &unused));
> 
> However below I suggest limiting capacity which will solve
> this problem for you.

I wasn't aware that virtqueue_add_inbuf actually performed an allocation.

> > +}
> > +
> >  static void set_page_pfns(struct virtio_balloon *vb,
> >  			  __virtio32 pfns[], struct page *page)
> >  {
> > @@ -476,6 +499,7 @@ static int init_vqs(struct virtio_balloon *vb)
> >  	names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
> >  	names[VIRTIO_BALLOON_VQ_STATS] = NULL;
> >  	names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
> > +	names[VIRTIO_BALLOON_VQ_HINTING] = NULL;
> >  
> >  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> >  		names[VIRTIO_BALLOON_VQ_STATS] = "stats";
> > @@ -487,11 +511,19 @@ static int init_vqs(struct virtio_balloon *vb)
> >  		callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
> >  	}
> >  
> > +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
> > +		names[VIRTIO_BALLOON_VQ_HINTING] = "hinting_vq";
> > +		callbacks[VIRTIO_BALLOON_VQ_HINTING] = balloon_ack;
> > +	}
> > +
> >  	err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
> >  					 vqs, callbacks, names, NULL, NULL);
> >  	if (err)
> >  		return err;
> >  
> > +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
> > +		vb->hinting_vq = vqs[VIRTIO_BALLOON_VQ_HINTING];
> > +
> >  	vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
> >  	vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
> >  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> > @@ -924,12 +956,24 @@ static int virtballoon_probe(struct virtio_device *vdev)
> >  		if (err)
> >  			goto out_del_balloon_wq;
> >  	}
> > +
> > +	vb->ph_dev_info.react = virtballoon_page_hinting_react;
> > +	vb->ph_dev_info.capacity = VIRTIO_BALLOON_ARRAY_HINTS_MAX;
> 
> As explained above I think you should limit this by vq size.
> Otherwise virtqueue add buf might fail.
> In fact by struct spec reading you need to limit it
> anyway otherwise it will fail unconditionally.
> In practice on most hypervisors it will typically work ...

So I would just need to query that via the virtqueue_get_vring_size
function correct? I could probably just set capacity to the minimum of the
HINTS_MAX and that value right?
Michael S. Tsirkin July 24, 2019, 8:43 p.m. UTC | #5
On Wed, Jul 24, 2019 at 01:37:47PM -0700, Alexander Duyck wrote:
> On Wed, 2019-07-24 at 15:02 -0400, Michael S. Tsirkin wrote:
> > On Wed, Jul 24, 2019 at 10:05:14AM -0700, Alexander Duyck wrote:
> > > From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> > > 
> > > Add support for the page hinting feature provided by virtio-balloon.
> > > Hinting differs from the regular balloon functionality in that is is
> > > much less durable than a standard memory balloon. Instead of creating a
> > > list of pages that cannot be accessed the pages are only inaccessible
> > > while they are being indicated to the virtio interface. Once the
> > > interface has acknowledged them they are placed back into their respective
> > > free lists and are once again accessible by the guest system.
> > > 
> > > Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> > 
> > Looking at the design, it seems that hinted pages can immediately be
> > reused. I wonder how we can efficiently support this
> > with kvm when poisoning is in effect. Of course we can just
> > ignore the poison. However it seems cleaner to
> > 1. verify page is poisoned with the correct value
> > 2. fill the page with the correct value on fault
> > 
> > Requirement 2 requires some kind of madvise that
> > will save the poison e.g. in the VMA.
> > 
> > Not a blocker for sure ... 
> 
> As per our discussion in the other patch I agree that we should either
> ignore the hint/report if page poisoning is enabled, or page poisoning
> should result in us poisoning the page when it is faulted back in. I had
> assumed we were doing the latter, I didn't realize that is was just
> disabling the free page hinting.

In fact I see that the latest versions of qemu don't seem to do
the later either. Need to fix that ASAP...


> > > ---
> > >  drivers/virtio/Kconfig              |    1 +
> > >  drivers/virtio/virtio_balloon.c     |   47 +++++++++++++++++++++++++++++++++++
> > >  include/uapi/linux/virtio_balloon.h |    1 +
> > >  3 files changed, 49 insertions(+)
> > > 
> > > diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
> > > index 078615cf2afc..d45556ae1f81 100644
> > > --- a/drivers/virtio/Kconfig
> > > +++ b/drivers/virtio/Kconfig
> > > @@ -58,6 +58,7 @@ config VIRTIO_BALLOON
> > >  	tristate "Virtio balloon driver"
> > >  	depends on VIRTIO
> > >  	select MEMORY_BALLOON
> > > +	select PAGE_HINTING
> > >  	---help---
> > >  	 This driver supports increasing and decreasing the amount
> > >  	 of memory within a KVM guest.
> > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> > > index 226fbb995fb0..dee9f8f3ad09 100644
> > > --- a/drivers/virtio/virtio_balloon.c
> > > +++ b/drivers/virtio/virtio_balloon.c
> > > @@ -19,6 +19,7 @@
> > >  #include <linux/mount.h>
> > >  #include <linux/magic.h>
> > >  #include <linux/pseudo_fs.h>
> > > +#include <linux/page_hinting.h>
> > >  
> > >  /*
> > >   * Balloon device works in 4K page units.  So each page is pointed to by
> > > @@ -27,6 +28,7 @@
> > >   */
> > >  #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
> > >  #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
> > > +#define VIRTIO_BALLOON_ARRAY_HINTS_MAX	32
> > >  #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
> > >  
> > >  #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
> > > @@ -46,6 +48,7 @@ enum virtio_balloon_vq {
> > >  	VIRTIO_BALLOON_VQ_DEFLATE,
> > >  	VIRTIO_BALLOON_VQ_STATS,
> > >  	VIRTIO_BALLOON_VQ_FREE_PAGE,
> > > +	VIRTIO_BALLOON_VQ_HINTING,
> > >  	VIRTIO_BALLOON_VQ_MAX
> > >  };
> > >  
> > > @@ -113,6 +116,10 @@ struct virtio_balloon {
> > >  
> > >  	/* To register a shrinker to shrink memory upon memory pressure */
> > >  	struct shrinker shrinker;
> > > +
> > > +	/* Unused page hinting device */
> > > +	struct virtqueue *hinting_vq;
> > > +	struct page_hinting_dev_info ph_dev_info;
> > >  };
> > >  
> > >  static struct virtio_device_id id_table[] = {
> > > @@ -152,6 +159,22 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
> > >  
> > >  }
> > >  
> > > +void virtballoon_page_hinting_react(struct page_hinting_dev_info *ph_dev_info,
> > > +				    unsigned int num_hints)
> > > +{
> > > +	struct virtio_balloon *vb =
> > > +		container_of(ph_dev_info, struct virtio_balloon, ph_dev_info);
> > > +	struct virtqueue *vq = vb->hinting_vq;
> > > +	unsigned int unused;
> > > +
> > > +	/* We should always be able to add these buffers to an empty queue. */
> > 
> > can be an out of memory condition, and then ...
> > 
> > > +	virtqueue_add_inbuf(vq, ph_dev_info->sg, num_hints, vb, GFP_KERNEL);
> > > +	virtqueue_kick(vq);
> > 
> > ... this will block forever.
> > 
> > > +	/* When host has read buffer, this completes via balloon_ack */
> > > +	wait_event(vb->acked, virtqueue_get_buf(vq, &unused));
> > 
> > However below I suggest limiting capacity which will solve
> > this problem for you.
> 
> I wasn't aware that virtqueue_add_inbuf actually performed an allocation.
> 
> > > +}
> > > +
> > >  static void set_page_pfns(struct virtio_balloon *vb,
> > >  			  __virtio32 pfns[], struct page *page)
> > >  {
> > > @@ -476,6 +499,7 @@ static int init_vqs(struct virtio_balloon *vb)
> > >  	names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
> > >  	names[VIRTIO_BALLOON_VQ_STATS] = NULL;
> > >  	names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
> > > +	names[VIRTIO_BALLOON_VQ_HINTING] = NULL;
> > >  
> > >  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> > >  		names[VIRTIO_BALLOON_VQ_STATS] = "stats";
> > > @@ -487,11 +511,19 @@ static int init_vqs(struct virtio_balloon *vb)
> > >  		callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
> > >  	}
> > >  
> > > +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
> > > +		names[VIRTIO_BALLOON_VQ_HINTING] = "hinting_vq";
> > > +		callbacks[VIRTIO_BALLOON_VQ_HINTING] = balloon_ack;
> > > +	}
> > > +
> > >  	err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
> > >  					 vqs, callbacks, names, NULL, NULL);
> > >  	if (err)
> > >  		return err;
> > >  
> > > +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
> > > +		vb->hinting_vq = vqs[VIRTIO_BALLOON_VQ_HINTING];
> > > +
> > >  	vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
> > >  	vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
> > >  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> > > @@ -924,12 +956,24 @@ static int virtballoon_probe(struct virtio_device *vdev)
> > >  		if (err)
> > >  			goto out_del_balloon_wq;
> > >  	}
> > > +
> > > +	vb->ph_dev_info.react = virtballoon_page_hinting_react;
> > > +	vb->ph_dev_info.capacity = VIRTIO_BALLOON_ARRAY_HINTS_MAX;
> > 
> > As explained above I think you should limit this by vq size.
> > Otherwise virtqueue add buf might fail.
> > In fact by struct spec reading you need to limit it
> > anyway otherwise it will fail unconditionally.
> > In practice on most hypervisors it will typically work ...
> 
> So I would just need to query that via the virtqueue_get_vring_size
> function correct? I could probably just set capacity to the minimum of the
> HINTS_MAX and that value right?
> 
>
Nitesh Narayan Lal July 25, 2019, 2:44 p.m. UTC | #6
On 7/24/19 3:02 PM, Michael S. Tsirkin wrote:
> On Wed, Jul 24, 2019 at 10:05:14AM -0700, Alexander Duyck wrote:
>> From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
>>
>> Add support for the page hinting feature provided by virtio-balloon.
>> Hinting differs from the regular balloon functionality in that is is
>> much less durable than a standard memory balloon. Instead of creating a
>> list of pages that cannot be accessed the pages are only inaccessible
>> while they are being indicated to the virtio interface. Once the
>> interface has acknowledged them they are placed back into their respective
>> free lists and are once again accessible by the guest system.
>>
>> Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> Looking at the design, it seems that hinted pages can immediately be
> reused. I wonder how we can efficiently support this
> with kvm when poisoning is in effect. Of course we can just
> ignore the poison. However it seems cleaner to
> 1. verify page is poisoned with the correct value
> 2. fill the page with the correct value on fault
>
> Requirement 2 requires some kind of madvise that
> will save the poison e.g. in the VMA.
>
> Not a blocker for sure ... 
>
>
>> ---
>>  drivers/virtio/Kconfig              |    1 +
>>  drivers/virtio/virtio_balloon.c     |   47 +++++++++++++++++++++++++++++++++++
>>  include/uapi/linux/virtio_balloon.h |    1 +
>>  3 files changed, 49 insertions(+)
>>
>> diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
>> index 078615cf2afc..d45556ae1f81 100644
>> --- a/drivers/virtio/Kconfig
>> +++ b/drivers/virtio/Kconfig
>> @@ -58,6 +58,7 @@ config VIRTIO_BALLOON
>>  	tristate "Virtio balloon driver"
>>  	depends on VIRTIO
>>  	select MEMORY_BALLOON
>> +	select PAGE_HINTING
>>  	---help---
>>  	 This driver supports increasing and decreasing the amount
>>  	 of memory within a KVM guest.
>> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
>> index 226fbb995fb0..dee9f8f3ad09 100644
>> --- a/drivers/virtio/virtio_balloon.c
>> +++ b/drivers/virtio/virtio_balloon.c
>> @@ -19,6 +19,7 @@
>>  #include <linux/mount.h>
>>  #include <linux/magic.h>
>>  #include <linux/pseudo_fs.h>
>> +#include <linux/page_hinting.h>
>>  
>>  /*
>>   * Balloon device works in 4K page units.  So each page is pointed to by
>> @@ -27,6 +28,7 @@
>>   */
>>  #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
>>  #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
>> +#define VIRTIO_BALLOON_ARRAY_HINTS_MAX	32
>>  #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
>>  
>>  #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
>> @@ -46,6 +48,7 @@ enum virtio_balloon_vq {
>>  	VIRTIO_BALLOON_VQ_DEFLATE,
>>  	VIRTIO_BALLOON_VQ_STATS,
>>  	VIRTIO_BALLOON_VQ_FREE_PAGE,
>> +	VIRTIO_BALLOON_VQ_HINTING,
>>  	VIRTIO_BALLOON_VQ_MAX
>>  };
>>  
>> @@ -113,6 +116,10 @@ struct virtio_balloon {
>>  
>>  	/* To register a shrinker to shrink memory upon memory pressure */
>>  	struct shrinker shrinker;
>> +
>> +	/* Unused page hinting device */
>> +	struct virtqueue *hinting_vq;
>> +	struct page_hinting_dev_info ph_dev_info;
>>  };
>>  
>>  static struct virtio_device_id id_table[] = {
>> @@ -152,6 +159,22 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
>>  
>>  }
>>  
>> +void virtballoon_page_hinting_react(struct page_hinting_dev_info *ph_dev_info,
>> +				    unsigned int num_hints)
>> +{
>> +	struct virtio_balloon *vb =
>> +		container_of(ph_dev_info, struct virtio_balloon, ph_dev_info);
>> +	struct virtqueue *vq = vb->hinting_vq;
>> +	unsigned int unused;
>> +
>> +	/* We should always be able to add these buffers to an empty queue. */
>
> can be an out of memory condition, and then ...

Do we need an error check here?

For situations where this fails we should disable hinting completely, maybe?


>
>> +	virtqueue_add_inbuf(vq, ph_dev_info->sg, num_hints, vb, GFP_KERNEL);
>> +	virtqueue_kick(vq);
> ... this will block forever.
>
>> +	/* When host has read buffer, this completes via balloon_ack */
>> +	wait_event(vb->acked, virtqueue_get_buf(vq, &unused));
> However below I suggest limiting capacity which will solve
> this problem for you.
>
>
>
>> +}
>> +
>>  static void set_page_pfns(struct virtio_balloon *vb,
>>  			  __virtio32 pfns[], struct page *page)
>>  {
>> @@ -476,6 +499,7 @@ static int init_vqs(struct virtio_balloon *vb)
>>  	names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
>>  	names[VIRTIO_BALLOON_VQ_STATS] = NULL;
>>  	names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
>> +	names[VIRTIO_BALLOON_VQ_HINTING] = NULL;
>>  
>>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
>>  		names[VIRTIO_BALLOON_VQ_STATS] = "stats";
>> @@ -487,11 +511,19 @@ static int init_vqs(struct virtio_balloon *vb)
>>  		callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
>>  	}
>>  
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
>> +		names[VIRTIO_BALLOON_VQ_HINTING] = "hinting_vq";
>> +		callbacks[VIRTIO_BALLOON_VQ_HINTING] = balloon_ack;
>> +	}
>> +
>>  	err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
>>  					 vqs, callbacks, names, NULL, NULL);
>>  	if (err)
>>  		return err;
>>  
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
>> +		vb->hinting_vq = vqs[VIRTIO_BALLOON_VQ_HINTING];
>> +
>>  	vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
>>  	vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
>>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
>> @@ -924,12 +956,24 @@ static int virtballoon_probe(struct virtio_device *vdev)
>>  		if (err)
>>  			goto out_del_balloon_wq;
>>  	}
>> +
>> +	vb->ph_dev_info.react = virtballoon_page_hinting_react;
>> +	vb->ph_dev_info.capacity = VIRTIO_BALLOON_ARRAY_HINTS_MAX;
> As explained above I think you should limit this by vq size.
> Otherwise virtqueue add buf might fail.
> In fact by struct spec reading you need to limit it
> anyway otherwise it will fail unconditionally.
> In practice on most hypervisors it will typically work ...
>
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
>> +		err = page_hinting_startup(&vb->ph_dev_info);
>> +		if (err)
>> +			goto out_unregister_shrinker;
>> +	}
>> +
>>  	virtio_device_ready(vdev);
>>  
>>  	if (towards_target(vb))
>>  		virtballoon_changed(vdev);
>>  	return 0;
>>  
>> +out_unregister_shrinker:
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
>> +		virtio_balloon_unregister_shrinker(vb);
>>  out_del_balloon_wq:
>>  	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
>>  		destroy_workqueue(vb->balloon_wq);
>> @@ -958,6 +1002,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
>>  {
>>  	struct virtio_balloon *vb = vdev->priv;
>>  
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
>> +		page_hinting_shutdown(&vb->ph_dev_info);
>>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
>>  		virtio_balloon_unregister_shrinker(vb);
>>  	spin_lock_irq(&vb->stop_update_lock);
>> @@ -1027,6 +1073,7 @@ static int virtballoon_validate(struct virtio_device *vdev)
>>  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
>>  	VIRTIO_BALLOON_F_FREE_PAGE_HINT,
>>  	VIRTIO_BALLOON_F_PAGE_POISON,
>> +	VIRTIO_BALLOON_F_HINTING,
>>  };
>>  
>>  static struct virtio_driver virtio_balloon_driver = {
>> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
>> index a1966cd7b677..2b0f62814e22 100644
>> --- a/include/uapi/linux/virtio_balloon.h
>> +++ b/include/uapi/linux/virtio_balloon.h
>> @@ -36,6 +36,7 @@
>>  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
>>  #define VIRTIO_BALLOON_F_FREE_PAGE_HINT	3 /* VQ to report free pages */
>>  #define VIRTIO_BALLOON_F_PAGE_POISON	4 /* Guest is using page poisoning */
>> +#define VIRTIO_BALLOON_F_HINTING	5 /* Page hinting virtqueue */
>>  
>>  /* Size of a PFN in the balloon interface. */
>>  #define VIRTIO_BALLOON_PFN_SHIFT 12
Michael S. Tsirkin July 25, 2019, 2:54 p.m. UTC | #7
On Thu, Jul 25, 2019 at 10:44:01AM -0400, Nitesh Narayan Lal wrote:
> 
> On 7/24/19 3:02 PM, Michael S. Tsirkin wrote:
> > On Wed, Jul 24, 2019 at 10:05:14AM -0700, Alexander Duyck wrote:
> >> From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> >>
> >> Add support for the page hinting feature provided by virtio-balloon.
> >> Hinting differs from the regular balloon functionality in that is is
> >> much less durable than a standard memory balloon. Instead of creating a
> >> list of pages that cannot be accessed the pages are only inaccessible
> >> while they are being indicated to the virtio interface. Once the
> >> interface has acknowledged them they are placed back into their respective
> >> free lists and are once again accessible by the guest system.
> >>
> >> Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> > Looking at the design, it seems that hinted pages can immediately be
> > reused. I wonder how we can efficiently support this
> > with kvm when poisoning is in effect. Of course we can just
> > ignore the poison. However it seems cleaner to
> > 1. verify page is poisoned with the correct value
> > 2. fill the page with the correct value on fault
> >
> > Requirement 2 requires some kind of madvise that
> > will save the poison e.g. in the VMA.
> >
> > Not a blocker for sure ... 
> >
> >
> >> ---
> >>  drivers/virtio/Kconfig              |    1 +
> >>  drivers/virtio/virtio_balloon.c     |   47 +++++++++++++++++++++++++++++++++++
> >>  include/uapi/linux/virtio_balloon.h |    1 +
> >>  3 files changed, 49 insertions(+)
> >>
> >> diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
> >> index 078615cf2afc..d45556ae1f81 100644
> >> --- a/drivers/virtio/Kconfig
> >> +++ b/drivers/virtio/Kconfig
> >> @@ -58,6 +58,7 @@ config VIRTIO_BALLOON
> >>  	tristate "Virtio balloon driver"
> >>  	depends on VIRTIO
> >>  	select MEMORY_BALLOON
> >> +	select PAGE_HINTING
> >>  	---help---
> >>  	 This driver supports increasing and decreasing the amount
> >>  	 of memory within a KVM guest.
> >> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> >> index 226fbb995fb0..dee9f8f3ad09 100644
> >> --- a/drivers/virtio/virtio_balloon.c
> >> +++ b/drivers/virtio/virtio_balloon.c
> >> @@ -19,6 +19,7 @@
> >>  #include <linux/mount.h>
> >>  #include <linux/magic.h>
> >>  #include <linux/pseudo_fs.h>
> >> +#include <linux/page_hinting.h>
> >>  
> >>  /*
> >>   * Balloon device works in 4K page units.  So each page is pointed to by
> >> @@ -27,6 +28,7 @@
> >>   */
> >>  #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
> >>  #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
> >> +#define VIRTIO_BALLOON_ARRAY_HINTS_MAX	32
> >>  #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
> >>  
> >>  #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
> >> @@ -46,6 +48,7 @@ enum virtio_balloon_vq {
> >>  	VIRTIO_BALLOON_VQ_DEFLATE,
> >>  	VIRTIO_BALLOON_VQ_STATS,
> >>  	VIRTIO_BALLOON_VQ_FREE_PAGE,
> >> +	VIRTIO_BALLOON_VQ_HINTING,
> >>  	VIRTIO_BALLOON_VQ_MAX
> >>  };
> >>  
> >> @@ -113,6 +116,10 @@ struct virtio_balloon {
> >>  
> >>  	/* To register a shrinker to shrink memory upon memory pressure */
> >>  	struct shrinker shrinker;
> >> +
> >> +	/* Unused page hinting device */
> >> +	struct virtqueue *hinting_vq;
> >> +	struct page_hinting_dev_info ph_dev_info;
> >>  };
> >>  
> >>  static struct virtio_device_id id_table[] = {
> >> @@ -152,6 +159,22 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
> >>  
> >>  }
> >>  
> >> +void virtballoon_page_hinting_react(struct page_hinting_dev_info *ph_dev_info,
> >> +				    unsigned int num_hints)
> >> +{
> >> +	struct virtio_balloon *vb =
> >> +		container_of(ph_dev_info, struct virtio_balloon, ph_dev_info);
> >> +	struct virtqueue *vq = vb->hinting_vq;
> >> +	unsigned int unused;
> >> +
> >> +	/* We should always be able to add these buffers to an empty queue. */
> >
> > can be an out of memory condition, and then ...
> 
> Do we need an error check here?
> 
> For situations where this fails we should disable hinting completely, maybe?

I would just limit this to vq size, then you know it won't fail.

> 
> >
> >> +	virtqueue_add_inbuf(vq, ph_dev_info->sg, num_hints, vb, GFP_KERNEL);
> >> +	virtqueue_kick(vq);
> > ... this will block forever.
> >
> >> +	/* When host has read buffer, this completes via balloon_ack */
> >> +	wait_event(vb->acked, virtqueue_get_buf(vq, &unused));
> > However below I suggest limiting capacity which will solve
> > this problem for you.
> >
> >
> >
> >> +}
> >> +
> >>  static void set_page_pfns(struct virtio_balloon *vb,
> >>  			  __virtio32 pfns[], struct page *page)
> >>  {
> >> @@ -476,6 +499,7 @@ static int init_vqs(struct virtio_balloon *vb)
> >>  	names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
> >>  	names[VIRTIO_BALLOON_VQ_STATS] = NULL;
> >>  	names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
> >> +	names[VIRTIO_BALLOON_VQ_HINTING] = NULL;
> >>  
> >>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> >>  		names[VIRTIO_BALLOON_VQ_STATS] = "stats";
> >> @@ -487,11 +511,19 @@ static int init_vqs(struct virtio_balloon *vb)
> >>  		callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
> >>  	}
> >>  
> >> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
> >> +		names[VIRTIO_BALLOON_VQ_HINTING] = "hinting_vq";
> >> +		callbacks[VIRTIO_BALLOON_VQ_HINTING] = balloon_ack;
> >> +	}
> >> +
> >>  	err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
> >>  					 vqs, callbacks, names, NULL, NULL);
> >>  	if (err)
> >>  		return err;
> >>  
> >> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
> >> +		vb->hinting_vq = vqs[VIRTIO_BALLOON_VQ_HINTING];
> >> +
> >>  	vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
> >>  	vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
> >>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> >> @@ -924,12 +956,24 @@ static int virtballoon_probe(struct virtio_device *vdev)
> >>  		if (err)
> >>  			goto out_del_balloon_wq;
> >>  	}
> >> +
> >> +	vb->ph_dev_info.react = virtballoon_page_hinting_react;
> >> +	vb->ph_dev_info.capacity = VIRTIO_BALLOON_ARRAY_HINTS_MAX;
> > As explained above I think you should limit this by vq size.
> > Otherwise virtqueue add buf might fail.
> > In fact by struct spec reading you need to limit it
> > anyway otherwise it will fail unconditionally.
> > In practice on most hypervisors it will typically work ...
> >
> >> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
> >> +		err = page_hinting_startup(&vb->ph_dev_info);
> >> +		if (err)
> >> +			goto out_unregister_shrinker;
> >> +	}
> >> +
> >>  	virtio_device_ready(vdev);
> >>  
> >>  	if (towards_target(vb))
> >>  		virtballoon_changed(vdev);
> >>  	return 0;
> >>  
> >> +out_unregister_shrinker:
> >> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> >> +		virtio_balloon_unregister_shrinker(vb);
> >>  out_del_balloon_wq:
> >>  	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
> >>  		destroy_workqueue(vb->balloon_wq);
> >> @@ -958,6 +1002,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
> >>  {
> >>  	struct virtio_balloon *vb = vdev->priv;
> >>  
> >> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
> >> +		page_hinting_shutdown(&vb->ph_dev_info);
> >>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> >>  		virtio_balloon_unregister_shrinker(vb);
> >>  	spin_lock_irq(&vb->stop_update_lock);
> >> @@ -1027,6 +1073,7 @@ static int virtballoon_validate(struct virtio_device *vdev)
> >>  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
> >>  	VIRTIO_BALLOON_F_FREE_PAGE_HINT,
> >>  	VIRTIO_BALLOON_F_PAGE_POISON,
> >> +	VIRTIO_BALLOON_F_HINTING,
> >>  };
> >>  
> >>  static struct virtio_driver virtio_balloon_driver = {
> >> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> >> index a1966cd7b677..2b0f62814e22 100644
> >> --- a/include/uapi/linux/virtio_balloon.h
> >> +++ b/include/uapi/linux/virtio_balloon.h
> >> @@ -36,6 +36,7 @@
> >>  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
> >>  #define VIRTIO_BALLOON_F_FREE_PAGE_HINT	3 /* VQ to report free pages */
> >>  #define VIRTIO_BALLOON_F_PAGE_POISON	4 /* Guest is using page poisoning */
> >> +#define VIRTIO_BALLOON_F_HINTING	5 /* Page hinting virtqueue */
> >>  
> >>  /* Size of a PFN in the balloon interface. */
> >>  #define VIRTIO_BALLOON_PFN_SHIFT 12
> -- 
> Thanks
> Nitesh
Alexander Duyck July 25, 2019, 2:56 p.m. UTC | #8
On Thu, 2019-07-25 at 10:44 -0400, Nitesh Narayan Lal wrote:
> On 7/24/19 3:02 PM, Michael S. Tsirkin wrote:
> > On Wed, Jul 24, 2019 at 10:05:14AM -0700, Alexander Duyck wrote:
> > > From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> > > 
> > > Add support for the page hinting feature provided by virtio-balloon.
> > > Hinting differs from the regular balloon functionality in that is is
> > > much less durable than a standard memory balloon. Instead of creating a
> > > list of pages that cannot be accessed the pages are only inaccessible
> > > while they are being indicated to the virtio interface. Once the
> > > interface has acknowledged them they are placed back into their respective
> > > free lists and are once again accessible by the guest system.
> > > 
> > > Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> > Looking at the design, it seems that hinted pages can immediately be
> > reused. I wonder how we can efficiently support this
> > with kvm when poisoning is in effect. Of course we can just
> > ignore the poison. However it seems cleaner to
> > 1. verify page is poisoned with the correct value
> > 2. fill the page with the correct value on fault
> > 
> > Requirement 2 requires some kind of madvise that
> > will save the poison e.g. in the VMA.
> > 
> > Not a blocker for sure ... 
> > 
> > 
> > > ---
> > >  drivers/virtio/Kconfig              |    1 +
> > >  drivers/virtio/virtio_balloon.c     |   47 +++++++++++++++++++++++++++++++++++
> > >  include/uapi/linux/virtio_balloon.h |    1 +
> > >  3 files changed, 49 insertions(+)
> > > 
> > > diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
> > > index 078615cf2afc..d45556ae1f81 100644
> > > --- a/drivers/virtio/Kconfig
> > > +++ b/drivers/virtio/Kconfig
> > > @@ -58,6 +58,7 @@ config VIRTIO_BALLOON
> > >  	tristate "Virtio balloon driver"
> > >  	depends on VIRTIO
> > >  	select MEMORY_BALLOON
> > > +	select PAGE_HINTING
> > >  	---help---
> > >  	 This driver supports increasing and decreasing the amount
> > >  	 of memory within a KVM guest.
> > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> > > index 226fbb995fb0..dee9f8f3ad09 100644
> > > --- a/drivers/virtio/virtio_balloon.c
> > > +++ b/drivers/virtio/virtio_balloon.c
> > > @@ -19,6 +19,7 @@
> > >  #include <linux/mount.h>
> > >  #include <linux/magic.h>
> > >  #include <linux/pseudo_fs.h>
> > > +#include <linux/page_hinting.h>
> > >  
> > >  /*
> > >   * Balloon device works in 4K page units.  So each page is pointed to by
> > > @@ -27,6 +28,7 @@
> > >   */
> > >  #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
> > >  #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
> > > +#define VIRTIO_BALLOON_ARRAY_HINTS_MAX	32
> > >  #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
> > >  
> > >  #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
> > > @@ -46,6 +48,7 @@ enum virtio_balloon_vq {
> > >  	VIRTIO_BALLOON_VQ_DEFLATE,
> > >  	VIRTIO_BALLOON_VQ_STATS,
> > >  	VIRTIO_BALLOON_VQ_FREE_PAGE,
> > > +	VIRTIO_BALLOON_VQ_HINTING,
> > >  	VIRTIO_BALLOON_VQ_MAX
> > >  };
> > >  
> > > @@ -113,6 +116,10 @@ struct virtio_balloon {
> > >  
> > >  	/* To register a shrinker to shrink memory upon memory pressure */
> > >  	struct shrinker shrinker;
> > > +
> > > +	/* Unused page hinting device */
> > > +	struct virtqueue *hinting_vq;
> > > +	struct page_hinting_dev_info ph_dev_info;
> > >  };
> > >  
> > >  static struct virtio_device_id id_table[] = {
> > > @@ -152,6 +159,22 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
> > >  
> > >  }
> > >  
> > > +void virtballoon_page_hinting_react(struct page_hinting_dev_info *ph_dev_info,
> > > +				    unsigned int num_hints)
> > > +{
> > > +	struct virtio_balloon *vb =
> > > +		container_of(ph_dev_info, struct virtio_balloon, ph_dev_info);
> > > +	struct virtqueue *vq = vb->hinting_vq;
> > > +	unsigned int unused;
> > > +
> > > +	/* We should always be able to add these buffers to an empty queue. */
> > 
> > can be an out of memory condition, and then ...
> 
> Do we need an error check here?
> 
> For situations where this fails we should disable hinting completely, maybe?

No. Instead I will just limit the capacity to no more than the vq size.
Doing that should allow us to avoid the out of memory issue here if I am
understanding things correctly.

I'm assuming the allocation being referred to is alloc_indirect_split(),
if so then it looks like it can fail and then we just fall back to using
the vring.desc directly which will work for my purposes as long as I limit
the capacity of the scatterlist to no more than the size of the vring.
Michael S. Tsirkin July 25, 2019, 2:59 p.m. UTC | #9
On Thu, Jul 25, 2019 at 07:56:15AM -0700, Alexander Duyck wrote:
> On Thu, 2019-07-25 at 10:44 -0400, Nitesh Narayan Lal wrote:
> > On 7/24/19 3:02 PM, Michael S. Tsirkin wrote:
> > > On Wed, Jul 24, 2019 at 10:05:14AM -0700, Alexander Duyck wrote:
> > > > From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> > > > 
> > > > Add support for the page hinting feature provided by virtio-balloon.
> > > > Hinting differs from the regular balloon functionality in that is is
> > > > much less durable than a standard memory balloon. Instead of creating a
> > > > list of pages that cannot be accessed the pages are only inaccessible
> > > > while they are being indicated to the virtio interface. Once the
> > > > interface has acknowledged them they are placed back into their respective
> > > > free lists and are once again accessible by the guest system.
> > > > 
> > > > Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> > > Looking at the design, it seems that hinted pages can immediately be
> > > reused. I wonder how we can efficiently support this
> > > with kvm when poisoning is in effect. Of course we can just
> > > ignore the poison. However it seems cleaner to
> > > 1. verify page is poisoned with the correct value
> > > 2. fill the page with the correct value on fault
> > > 
> > > Requirement 2 requires some kind of madvise that
> > > will save the poison e.g. in the VMA.
> > > 
> > > Not a blocker for sure ... 
> > > 
> > > 
> > > > ---
> > > >  drivers/virtio/Kconfig              |    1 +
> > > >  drivers/virtio/virtio_balloon.c     |   47 +++++++++++++++++++++++++++++++++++
> > > >  include/uapi/linux/virtio_balloon.h |    1 +
> > > >  3 files changed, 49 insertions(+)
> > > > 
> > > > diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
> > > > index 078615cf2afc..d45556ae1f81 100644
> > > > --- a/drivers/virtio/Kconfig
> > > > +++ b/drivers/virtio/Kconfig
> > > > @@ -58,6 +58,7 @@ config VIRTIO_BALLOON
> > > >  	tristate "Virtio balloon driver"
> > > >  	depends on VIRTIO
> > > >  	select MEMORY_BALLOON
> > > > +	select PAGE_HINTING
> > > >  	---help---
> > > >  	 This driver supports increasing and decreasing the amount
> > > >  	 of memory within a KVM guest.
> > > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> > > > index 226fbb995fb0..dee9f8f3ad09 100644
> > > > --- a/drivers/virtio/virtio_balloon.c
> > > > +++ b/drivers/virtio/virtio_balloon.c
> > > > @@ -19,6 +19,7 @@
> > > >  #include <linux/mount.h>
> > > >  #include <linux/magic.h>
> > > >  #include <linux/pseudo_fs.h>
> > > > +#include <linux/page_hinting.h>
> > > >  
> > > >  /*
> > > >   * Balloon device works in 4K page units.  So each page is pointed to by
> > > > @@ -27,6 +28,7 @@
> > > >   */
> > > >  #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
> > > >  #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
> > > > +#define VIRTIO_BALLOON_ARRAY_HINTS_MAX	32
> > > >  #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
> > > >  
> > > >  #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
> > > > @@ -46,6 +48,7 @@ enum virtio_balloon_vq {
> > > >  	VIRTIO_BALLOON_VQ_DEFLATE,
> > > >  	VIRTIO_BALLOON_VQ_STATS,
> > > >  	VIRTIO_BALLOON_VQ_FREE_PAGE,
> > > > +	VIRTIO_BALLOON_VQ_HINTING,
> > > >  	VIRTIO_BALLOON_VQ_MAX
> > > >  };
> > > >  
> > > > @@ -113,6 +116,10 @@ struct virtio_balloon {
> > > >  
> > > >  	/* To register a shrinker to shrink memory upon memory pressure */
> > > >  	struct shrinker shrinker;
> > > > +
> > > > +	/* Unused page hinting device */
> > > > +	struct virtqueue *hinting_vq;
> > > > +	struct page_hinting_dev_info ph_dev_info;
> > > >  };
> > > >  
> > > >  static struct virtio_device_id id_table[] = {
> > > > @@ -152,6 +159,22 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
> > > >  
> > > >  }
> > > >  
> > > > +void virtballoon_page_hinting_react(struct page_hinting_dev_info *ph_dev_info,
> > > > +				    unsigned int num_hints)
> > > > +{
> > > > +	struct virtio_balloon *vb =
> > > > +		container_of(ph_dev_info, struct virtio_balloon, ph_dev_info);
> > > > +	struct virtqueue *vq = vb->hinting_vq;
> > > > +	unsigned int unused;
> > > > +
> > > > +	/* We should always be able to add these buffers to an empty queue. */
> > > 
> > > can be an out of memory condition, and then ...
> > 
> > Do we need an error check here?
> > 
> > For situations where this fails we should disable hinting completely, maybe?
> 
> No. Instead I will just limit the capacity to no more than the vq size.
> Doing that should allow us to avoid the out of memory issue here if I am
> understanding things correctly.
> 
> I'm assuming the allocation being referred to is alloc_indirect_split(),
> if so then it looks like it can fail and then we just fall back to using
> the vring.desc directly which will work for my purposes as long as I limit
> the capacity of the scatterlist to no more than the size of the vring.
> 


Right. And maybe tweak the GFP mask - no reason to try to
allocate memory aggressively with just 1 element in flight.

>
Nitesh Narayan Lal July 25, 2019, 5:42 p.m. UTC | #10
On 7/24/19 1:05 PM, Alexander Duyck wrote:
> From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
>
> Add support for the page hinting feature provided by virtio-balloon.
> Hinting differs from the regular balloon functionality in that is is
> much less durable than a standard memory balloon. Instead of creating a
> list of pages that cannot be accessed the pages are only inaccessible
> while they are being indicated to the virtio interface. Once the
> interface has acknowledged them they are placed back into their respective
> free lists and are once again accessible by the guest system.
>
> Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> ---
>  drivers/virtio/Kconfig              |    1 +
>  drivers/virtio/virtio_balloon.c     |   47 +++++++++++++++++++++++++++++++++++
>  include/uapi/linux/virtio_balloon.h |    1 +
>  3 files changed, 49 insertions(+)
>
> diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
> index 078615cf2afc..d45556ae1f81 100644
> --- a/drivers/virtio/Kconfig
> +++ b/drivers/virtio/Kconfig
> @@ -58,6 +58,7 @@ config VIRTIO_BALLOON
>  	tristate "Virtio balloon driver"
>  	depends on VIRTIO
>  	select MEMORY_BALLOON
> +	select PAGE_HINTING
>  	---help---
>  	 This driver supports increasing and decreasing the amount
>  	 of memory within a KVM guest.
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 226fbb995fb0..dee9f8f3ad09 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -19,6 +19,7 @@
>  #include <linux/mount.h>
>  #include <linux/magic.h>
>  #include <linux/pseudo_fs.h>
> +#include <linux/page_hinting.h>
>  
>  /*
>   * Balloon device works in 4K page units.  So each page is pointed to by
> @@ -27,6 +28,7 @@
>   */
>  #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
>  #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
> +#define VIRTIO_BALLOON_ARRAY_HINTS_MAX	32
>  #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
>  
>  #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
> @@ -46,6 +48,7 @@ enum virtio_balloon_vq {
>  	VIRTIO_BALLOON_VQ_DEFLATE,
>  	VIRTIO_BALLOON_VQ_STATS,
>  	VIRTIO_BALLOON_VQ_FREE_PAGE,
> +	VIRTIO_BALLOON_VQ_HINTING,
>  	VIRTIO_BALLOON_VQ_MAX
>  };
>  
> @@ -113,6 +116,10 @@ struct virtio_balloon {
>  
>  	/* To register a shrinker to shrink memory upon memory pressure */
>  	struct shrinker shrinker;
> +
> +	/* Unused page hinting device */
> +	struct virtqueue *hinting_vq;
> +	struct page_hinting_dev_info ph_dev_info;
>  };
>  
>  static struct virtio_device_id id_table[] = {
> @@ -152,6 +159,22 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
>  
>  }
>  
> +void virtballoon_page_hinting_react(struct page_hinting_dev_info *ph_dev_info,
> +				    unsigned int num_hints)
> +{
> +	struct virtio_balloon *vb =
> +		container_of(ph_dev_info, struct virtio_balloon, ph_dev_info);
> +	struct virtqueue *vq = vb->hinting_vq;
> +	unsigned int unused;
> +
> +	/* We should always be able to add these buffers to an empty queue. */
> +	virtqueue_add_inbuf(vq, ph_dev_info->sg, num_hints, vb, GFP_KERNEL);
> +	virtqueue_kick(vq);
> +
> +	/* When host has read buffer, this completes via balloon_ack */
> +	wait_event(vb->acked, virtqueue_get_buf(vq, &unused));
> +}
> +
>  static void set_page_pfns(struct virtio_balloon *vb,
>  			  __virtio32 pfns[], struct page *page)
>  {
> @@ -476,6 +499,7 @@ static int init_vqs(struct virtio_balloon *vb)
>  	names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
>  	names[VIRTIO_BALLOON_VQ_STATS] = NULL;
>  	names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
> +	names[VIRTIO_BALLOON_VQ_HINTING] = NULL;
>  
>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
>  		names[VIRTIO_BALLOON_VQ_STATS] = "stats";
> @@ -487,11 +511,19 @@ static int init_vqs(struct virtio_balloon *vb)
>  		callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
>  	}
>  
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
> +		names[VIRTIO_BALLOON_VQ_HINTING] = "hinting_vq";
> +		callbacks[VIRTIO_BALLOON_VQ_HINTING] = balloon_ack;
> +	}
> +
>  	err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
>  					 vqs, callbacks, names, NULL, NULL);
>  	if (err)
>  		return err;
>  
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
> +		vb->hinting_vq = vqs[VIRTIO_BALLOON_VQ_HINTING];
> +
>  	vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
>  	vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> @@ -924,12 +956,24 @@ static int virtballoon_probe(struct virtio_device *vdev)
>  		if (err)
>  			goto out_del_balloon_wq;
>  	}
> +
> +	vb->ph_dev_info.react = virtballoon_page_hinting_react;
> +	vb->ph_dev_info.capacity = VIRTIO_BALLOON_ARRAY_HINTS_MAX;
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
> +		err = page_hinting_startup(&vb->ph_dev_info);
> +		if (err)
> +			goto out_unregister_shrinker;
> +	}
Any reason why you have kept vb->ph_dev_info.react & vb->ph_dev_info.capacity
initialization outside the feature check?
> +
>  	virtio_device_ready(vdev);
>  
>  	if (towards_target(vb))
>  		virtballoon_changed(vdev);
>  	return 0;
>  
> +out_unregister_shrinker:
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> +		virtio_balloon_unregister_shrinker(vb);
>  out_del_balloon_wq:
>  	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
>  		destroy_workqueue(vb->balloon_wq);
> @@ -958,6 +1002,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
>  {
>  	struct virtio_balloon *vb = vdev->priv;
>  
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
> +		page_hinting_shutdown(&vb->ph_dev_info);
>  	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
>  		virtio_balloon_unregister_shrinker(vb);
>  	spin_lock_irq(&vb->stop_update_lock);
> @@ -1027,6 +1073,7 @@ static int virtballoon_validate(struct virtio_device *vdev)
>  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
>  	VIRTIO_BALLOON_F_FREE_PAGE_HINT,
>  	VIRTIO_BALLOON_F_PAGE_POISON,
> +	VIRTIO_BALLOON_F_HINTING,
>  };
>  
>  static struct virtio_driver virtio_balloon_driver = {
> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> index a1966cd7b677..2b0f62814e22 100644
> --- a/include/uapi/linux/virtio_balloon.h
> +++ b/include/uapi/linux/virtio_balloon.h
> @@ -36,6 +36,7 @@
>  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
>  #define VIRTIO_BALLOON_F_FREE_PAGE_HINT	3 /* VQ to report free pages */
>  #define VIRTIO_BALLOON_F_PAGE_POISON	4 /* Guest is using page poisoning */
> +#define VIRTIO_BALLOON_F_HINTING	5 /* Page hinting virtqueue */
>  
>  /* Size of a PFN in the balloon interface. */
>  #define VIRTIO_BALLOON_PFN_SHIFT 12
>
Alexander Duyck July 25, 2019, 7:54 p.m. UTC | #11
On Thu, 2019-07-25 at 13:42 -0400, Nitesh Narayan Lal wrote:
> On 7/24/19 1:05 PM, Alexander Duyck wrote:
> > From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> > 
> > 

<snip>

> > @@ -924,12 +956,24 @@ static int virtballoon_probe(struct virtio_device *vdev)
> >  		if (err)
> >  			goto out_del_balloon_wq;
> >  	}
> > +
> > +	vb->ph_dev_info.react = virtballoon_page_hinting_react;
> > +	vb->ph_dev_info.capacity = VIRTIO_BALLOON_ARRAY_HINTS_MAX;
> > +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
> > +		err = page_hinting_startup(&vb->ph_dev_info);
> > +		if (err)
> > +			goto out_unregister_shrinker;
> > +	}
> Any reason why you have kept vb->ph_dev_info.react & vb->ph_dev_info.capacity
> initialization outside the feature check?

I just had them on the outside because it didn't really matter if I
initialized them or not if the feature was not present. So I just
defaulted to initializing them in all cases.

Since I will be updating capacity to be based on the size of the hinting
queue in the next patch set I will move capacity initialization inside of
the check.
diff mbox series

Patch

diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 078615cf2afc..d45556ae1f81 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -58,6 +58,7 @@  config VIRTIO_BALLOON
 	tristate "Virtio balloon driver"
 	depends on VIRTIO
 	select MEMORY_BALLOON
+	select PAGE_HINTING
 	---help---
 	 This driver supports increasing and decreasing the amount
 	 of memory within a KVM guest.
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 226fbb995fb0..dee9f8f3ad09 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -19,6 +19,7 @@ 
 #include <linux/mount.h>
 #include <linux/magic.h>
 #include <linux/pseudo_fs.h>
+#include <linux/page_hinting.h>
 
 /*
  * Balloon device works in 4K page units.  So each page is pointed to by
@@ -27,6 +28,7 @@ 
  */
 #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
+#define VIRTIO_BALLOON_ARRAY_HINTS_MAX	32
 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
 
 #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
@@ -46,6 +48,7 @@  enum virtio_balloon_vq {
 	VIRTIO_BALLOON_VQ_DEFLATE,
 	VIRTIO_BALLOON_VQ_STATS,
 	VIRTIO_BALLOON_VQ_FREE_PAGE,
+	VIRTIO_BALLOON_VQ_HINTING,
 	VIRTIO_BALLOON_VQ_MAX
 };
 
@@ -113,6 +116,10 @@  struct virtio_balloon {
 
 	/* To register a shrinker to shrink memory upon memory pressure */
 	struct shrinker shrinker;
+
+	/* Unused page hinting device */
+	struct virtqueue *hinting_vq;
+	struct page_hinting_dev_info ph_dev_info;
 };
 
 static struct virtio_device_id id_table[] = {
@@ -152,6 +159,22 @@  static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
 
 }
 
+void virtballoon_page_hinting_react(struct page_hinting_dev_info *ph_dev_info,
+				    unsigned int num_hints)
+{
+	struct virtio_balloon *vb =
+		container_of(ph_dev_info, struct virtio_balloon, ph_dev_info);
+	struct virtqueue *vq = vb->hinting_vq;
+	unsigned int unused;
+
+	/* We should always be able to add these buffers to an empty queue. */
+	virtqueue_add_inbuf(vq, ph_dev_info->sg, num_hints, vb, GFP_KERNEL);
+	virtqueue_kick(vq);
+
+	/* When host has read buffer, this completes via balloon_ack */
+	wait_event(vb->acked, virtqueue_get_buf(vq, &unused));
+}
+
 static void set_page_pfns(struct virtio_balloon *vb,
 			  __virtio32 pfns[], struct page *page)
 {
@@ -476,6 +499,7 @@  static int init_vqs(struct virtio_balloon *vb)
 	names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
 	names[VIRTIO_BALLOON_VQ_STATS] = NULL;
 	names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
+	names[VIRTIO_BALLOON_VQ_HINTING] = NULL;
 
 	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
 		names[VIRTIO_BALLOON_VQ_STATS] = "stats";
@@ -487,11 +511,19 @@  static int init_vqs(struct virtio_balloon *vb)
 		callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
 	}
 
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
+		names[VIRTIO_BALLOON_VQ_HINTING] = "hinting_vq";
+		callbacks[VIRTIO_BALLOON_VQ_HINTING] = balloon_ack;
+	}
+
 	err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
 					 vqs, callbacks, names, NULL, NULL);
 	if (err)
 		return err;
 
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
+		vb->hinting_vq = vqs[VIRTIO_BALLOON_VQ_HINTING];
+
 	vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
 	vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
 	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
@@ -924,12 +956,24 @@  static int virtballoon_probe(struct virtio_device *vdev)
 		if (err)
 			goto out_del_balloon_wq;
 	}
+
+	vb->ph_dev_info.react = virtballoon_page_hinting_react;
+	vb->ph_dev_info.capacity = VIRTIO_BALLOON_ARRAY_HINTS_MAX;
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING)) {
+		err = page_hinting_startup(&vb->ph_dev_info);
+		if (err)
+			goto out_unregister_shrinker;
+	}
+
 	virtio_device_ready(vdev);
 
 	if (towards_target(vb))
 		virtballoon_changed(vdev);
 	return 0;
 
+out_unregister_shrinker:
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
+		virtio_balloon_unregister_shrinker(vb);
 out_del_balloon_wq:
 	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
 		destroy_workqueue(vb->balloon_wq);
@@ -958,6 +1002,8 @@  static void virtballoon_remove(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb = vdev->priv;
 
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_HINTING))
+		page_hinting_shutdown(&vb->ph_dev_info);
 	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
 		virtio_balloon_unregister_shrinker(vb);
 	spin_lock_irq(&vb->stop_update_lock);
@@ -1027,6 +1073,7 @@  static int virtballoon_validate(struct virtio_device *vdev)
 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
 	VIRTIO_BALLOON_F_FREE_PAGE_HINT,
 	VIRTIO_BALLOON_F_PAGE_POISON,
+	VIRTIO_BALLOON_F_HINTING,
 };
 
 static struct virtio_driver virtio_balloon_driver = {
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index a1966cd7b677..2b0f62814e22 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -36,6 +36,7 @@ 
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
 #define VIRTIO_BALLOON_F_FREE_PAGE_HINT	3 /* VQ to report free pages */
 #define VIRTIO_BALLOON_F_PAGE_POISON	4 /* Guest is using page poisoning */
+#define VIRTIO_BALLOON_F_HINTING	5 /* Page hinting virtqueue */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12