diff mbox series

[RFC,v8,1/7] KVM: Support for guest free page hinting

Message ID 20190204201854.2328-2-nitesh@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: Guest Free Page Hinting | expand

Commit Message

Nitesh Narayan Lal Feb. 4, 2019, 8:18 p.m. UTC
This patch includes the following:
1. Basic skeleton for the support
2. Enablement of x86 platform to use the same

Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>
---
 arch/x86/Kbuild              |  2 +-
 arch/x86/kvm/Kconfig         |  8 ++++++++
 arch/x86/kvm/Makefile        |  2 ++
 include/linux/gfp.h          |  9 +++++++++
 include/linux/page_hinting.h | 17 +++++++++++++++++
 virt/kvm/page_hinting.c      | 36 ++++++++++++++++++++++++++++++++++++
 6 files changed, 73 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/page_hinting.h
 create mode 100644 virt/kvm/page_hinting.c

Comments

Michael S. Tsirkin Feb. 5, 2019, 4:14 a.m. UTC | #1
On Mon, Feb 04, 2019 at 03:18:48PM -0500, Nitesh Narayan Lal wrote:
> This patch includes the following:
> 1. Basic skeleton for the support
> 2. Enablement of x86 platform to use the same
> 
> Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>
> ---
>  arch/x86/Kbuild              |  2 +-
>  arch/x86/kvm/Kconfig         |  8 ++++++++
>  arch/x86/kvm/Makefile        |  2 ++
>  include/linux/gfp.h          |  9 +++++++++
>  include/linux/page_hinting.h | 17 +++++++++++++++++
>  virt/kvm/page_hinting.c      | 36 ++++++++++++++++++++++++++++++++++++
>  6 files changed, 73 insertions(+), 1 deletion(-)
>  create mode 100644 include/linux/page_hinting.h
>  create mode 100644 virt/kvm/page_hinting.c
> 
> diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
> index c625f57472f7..3244df4ee311 100644
> --- a/arch/x86/Kbuild
> +++ b/arch/x86/Kbuild
> @@ -2,7 +2,7 @@ obj-y += entry/
>  
>  obj-$(CONFIG_PERF_EVENTS) += events/
>  
> -obj-$(CONFIG_KVM) += kvm/
> +obj-$(subst m,y,$(CONFIG_KVM)) += kvm/
>  
>  # Xen paravirtualization support
>  obj-$(CONFIG_XEN) += xen/
> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
> index 72fa955f4a15..2fae31459706 100644
> --- a/arch/x86/kvm/Kconfig
> +++ b/arch/x86/kvm/Kconfig
> @@ -96,6 +96,14 @@ config KVM_MMU_AUDIT
>  	 This option adds a R/W kVM module parameter 'mmu_audit', which allows
>  	 auditing of KVM MMU events at runtime.
>  
> +# KVM_FREE_PAGE_HINTING will allow the guest to report the free pages to the
> +# host in regular interval of time.
> +config KVM_FREE_PAGE_HINTING
> +       def_bool y
> +       depends on KVM
> +       select VIRTIO
> +       select VIRTIO_BALLOON
> +
>  # OK, it's a little counter-intuitive to do this, but it puts it neatly under
>  # the virtualization menu.
>  source "drivers/vhost/Kconfig"
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index 69b3a7c30013..78640a80501e 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -16,6 +16,8 @@ kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
>  			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
>  			   hyperv.o page_track.o debugfs.o
>  
> +obj-$(CONFIG_KVM_FREE_PAGE_HINTING)    += $(KVM)/page_hinting.o
> +
>  kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
>  kvm-amd-y		+= svm.o pmu_amd.o
>  
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 5f5e25fd6149..e596527284ba 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -7,6 +7,7 @@
>  #include <linux/stddef.h>
>  #include <linux/linkage.h>
>  #include <linux/topology.h>
> +#include <linux/page_hinting.h>
>  
>  struct vm_area_struct;
>  
> @@ -456,6 +457,14 @@ static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
>  	return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags);
>  }
>  
> +#ifdef	CONFIG_KVM_FREE_PAGE_HINTING
> +#define HAVE_ARCH_FREE_PAGE
> +static inline void arch_free_page(struct page *page, int order)
> +{
> +	guest_free_page(page, order);
> +}
> +#endif
> +
>  #ifndef HAVE_ARCH_FREE_PAGE
>  static inline void arch_free_page(struct page *page, int order) { }
>  #endif

OK so arch_free_page hook is used to tie into mm code,
with follow-up patches the pages get queued in a list
and then sent to hypervisor so it can free them.
Fair enough but how do we know the page is
not reused by the time it's received by the hypervisor?
If it's reused then isn't it a problem that
hypervisor calls MADV_DONTNEED on them?


> diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h
> new file mode 100644
> index 000000000000..b54f7428f348
> --- /dev/null
> +++ b/include/linux/page_hinting.h
> @@ -0,0 +1,17 @@
> +/*
> + * Size of the array which is used to store the freed pages is defined by
> + * MAX_FGPT_ENTRIES. If possible, we have to find a better way using which
> + * we can get rid of the hardcoded array size.
> + */
> +#define MAX_FGPT_ENTRIES	1000
> +/*
> + * hypervisor_pages - It is a dummy structure passed with the hypercall.
> + * @pfn: page frame number for the page which needs to be sent to the host.
> + * @order: order of the page needs to be reported to the host.
> + */
> +struct hypervisor_pages {
> +	unsigned long pfn;
> +	unsigned int order;
> +};
> +
> +void guest_free_page(struct page *page, int order);
> diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
> new file mode 100644
> index 000000000000..818bd6b84e0c
> --- /dev/null
> +++ b/virt/kvm/page_hinting.c
> @@ -0,0 +1,36 @@
> +#include <linux/gfp.h>
> +#include <linux/mm.h>
> +#include <linux/kernel.h>
> +
> +/*
> + * struct kvm_free_pages - Tracks the pages which are freed by the guest.
> + * @pfn: page frame number for the page which is freed.
> + * @order: order corresponding to the page freed.
> + * @zonenum: zone number to which the freed page belongs.
> + */
> +struct kvm_free_pages {
> +	unsigned long pfn;
> +	unsigned int order;
> +	int zonenum;
> +};
> +
> +/*
> + * struct page_hinting - holds array objects for the structures used to track
> + * guest free pages, along with an index variable for each of them.
> + * @kvm_pt: array object for the structure kvm_free_pages.
> + * @kvm_pt_idx: index for kvm_free_pages object.
> + * @hypervisor_pagelist: array object for the structure hypervisor_pages.
> + * @hyp_idx: index for hypervisor_pages object.
> + */
> +struct page_hinting {
> +	struct kvm_free_pages kvm_pt[MAX_FGPT_ENTRIES];
> +	int kvm_pt_idx;
> +	struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
> +	int hyp_idx;
> +};
> +
> +DEFINE_PER_CPU(struct page_hinting, hinting_obj);
> +
> +void guest_free_page(struct page *page, int order)
> +{
> +}
> -- 
> 2.17.2
Nitesh Narayan Lal Feb. 5, 2019, 1:06 p.m. UTC | #2
On 2/4/19 11:14 PM, Michael S. Tsirkin wrote:
> On Mon, Feb 04, 2019 at 03:18:48PM -0500, Nitesh Narayan Lal wrote:
>> This patch includes the following:
>> 1. Basic skeleton for the support
>> 2. Enablement of x86 platform to use the same
>>
>> Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>
>> ---
>>  arch/x86/Kbuild              |  2 +-
>>  arch/x86/kvm/Kconfig         |  8 ++++++++
>>  arch/x86/kvm/Makefile        |  2 ++
>>  include/linux/gfp.h          |  9 +++++++++
>>  include/linux/page_hinting.h | 17 +++++++++++++++++
>>  virt/kvm/page_hinting.c      | 36 ++++++++++++++++++++++++++++++++++++
>>  6 files changed, 73 insertions(+), 1 deletion(-)
>>  create mode 100644 include/linux/page_hinting.h
>>  create mode 100644 virt/kvm/page_hinting.c
>>
>> diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
>> index c625f57472f7..3244df4ee311 100644
>> --- a/arch/x86/Kbuild
>> +++ b/arch/x86/Kbuild
>> @@ -2,7 +2,7 @@ obj-y += entry/
>>  
>>  obj-$(CONFIG_PERF_EVENTS) += events/
>>  
>> -obj-$(CONFIG_KVM) += kvm/
>> +obj-$(subst m,y,$(CONFIG_KVM)) += kvm/
>>  
>>  # Xen paravirtualization support
>>  obj-$(CONFIG_XEN) += xen/
>> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
>> index 72fa955f4a15..2fae31459706 100644
>> --- a/arch/x86/kvm/Kconfig
>> +++ b/arch/x86/kvm/Kconfig
>> @@ -96,6 +96,14 @@ config KVM_MMU_AUDIT
>>  	 This option adds a R/W kVM module parameter 'mmu_audit', which allows
>>  	 auditing of KVM MMU events at runtime.
>>  
>> +# KVM_FREE_PAGE_HINTING will allow the guest to report the free pages to the
>> +# host in regular interval of time.
>> +config KVM_FREE_PAGE_HINTING
>> +       def_bool y
>> +       depends on KVM
>> +       select VIRTIO
>> +       select VIRTIO_BALLOON
>> +
>>  # OK, it's a little counter-intuitive to do this, but it puts it neatly under
>>  # the virtualization menu.
>>  source "drivers/vhost/Kconfig"
>> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
>> index 69b3a7c30013..78640a80501e 100644
>> --- a/arch/x86/kvm/Makefile
>> +++ b/arch/x86/kvm/Makefile
>> @@ -16,6 +16,8 @@ kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
>>  			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
>>  			   hyperv.o page_track.o debugfs.o
>>  
>> +obj-$(CONFIG_KVM_FREE_PAGE_HINTING)    += $(KVM)/page_hinting.o
>> +
>>  kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
>>  kvm-amd-y		+= svm.o pmu_amd.o
>>  
>> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
>> index 5f5e25fd6149..e596527284ba 100644
>> --- a/include/linux/gfp.h
>> +++ b/include/linux/gfp.h
>> @@ -7,6 +7,7 @@
>>  #include <linux/stddef.h>
>>  #include <linux/linkage.h>
>>  #include <linux/topology.h>
>> +#include <linux/page_hinting.h>
>>  
>>  struct vm_area_struct;
>>  
>> @@ -456,6 +457,14 @@ static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
>>  	return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags);
>>  }
>>  
>> +#ifdef	CONFIG_KVM_FREE_PAGE_HINTING
>> +#define HAVE_ARCH_FREE_PAGE
>> +static inline void arch_free_page(struct page *page, int order)
>> +{
>> +	guest_free_page(page, order);
>> +}
>> +#endif
>> +
>>  #ifndef HAVE_ARCH_FREE_PAGE
>>  static inline void arch_free_page(struct page *page, int order) { }
>>  #endif
> OK so arch_free_page hook is used to tie into mm code,
> with follow-up patches the pages get queued in a list
> and then sent to hypervisor so it can free them.
> Fair enough but how do we know the page is
> not reused by the time it's received by the hypervisor?
> If it's reused then isn't it a problem that
> hypervisor calls MADV_DONTNEED on them?
Hi Michael,

In order to ensure that the page is not reused, we remove it from the
buddy free list by acquiring the zone lock. After the page is freed by
the hypervisor it is returned to the buddy free list again.
>
>
>> diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h
>> new file mode 100644
>> index 000000000000..b54f7428f348
>> --- /dev/null
>> +++ b/include/linux/page_hinting.h
>> @@ -0,0 +1,17 @@
>> +/*
>> + * Size of the array which is used to store the freed pages is defined by
>> + * MAX_FGPT_ENTRIES. If possible, we have to find a better way using which
>> + * we can get rid of the hardcoded array size.
>> + */
>> +#define MAX_FGPT_ENTRIES	1000
>> +/*
>> + * hypervisor_pages - It is a dummy structure passed with the hypercall.
>> + * @pfn: page frame number for the page which needs to be sent to the host.
>> + * @order: order of the page needs to be reported to the host.
>> + */
>> +struct hypervisor_pages {
>> +	unsigned long pfn;
>> +	unsigned int order;
>> +};
>> +
>> +void guest_free_page(struct page *page, int order);
>> diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
>> new file mode 100644
>> index 000000000000..818bd6b84e0c
>> --- /dev/null
>> +++ b/virt/kvm/page_hinting.c
>> @@ -0,0 +1,36 @@
>> +#include <linux/gfp.h>
>> +#include <linux/mm.h>
>> +#include <linux/kernel.h>
>> +
>> +/*
>> + * struct kvm_free_pages - Tracks the pages which are freed by the guest.
>> + * @pfn: page frame number for the page which is freed.
>> + * @order: order corresponding to the page freed.
>> + * @zonenum: zone number to which the freed page belongs.
>> + */
>> +struct kvm_free_pages {
>> +	unsigned long pfn;
>> +	unsigned int order;
>> +	int zonenum;
>> +};
>> +
>> +/*
>> + * struct page_hinting - holds array objects for the structures used to track
>> + * guest free pages, along with an index variable for each of them.
>> + * @kvm_pt: array object for the structure kvm_free_pages.
>> + * @kvm_pt_idx: index for kvm_free_pages object.
>> + * @hypervisor_pagelist: array object for the structure hypervisor_pages.
>> + * @hyp_idx: index for hypervisor_pages object.
>> + */
>> +struct page_hinting {
>> +	struct kvm_free_pages kvm_pt[MAX_FGPT_ENTRIES];
>> +	int kvm_pt_idx;
>> +	struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
>> +	int hyp_idx;
>> +};
>> +
>> +DEFINE_PER_CPU(struct page_hinting, hinting_obj);
>> +
>> +void guest_free_page(struct page *page, int order)
>> +{
>> +}
>> -- 
>> 2.17.2
Michael S. Tsirkin Feb. 5, 2019, 4:27 p.m. UTC | #3
On Tue, Feb 05, 2019 at 08:06:33AM -0500, Nitesh Narayan Lal wrote:
> On 2/4/19 11:14 PM, Michael S. Tsirkin wrote:
> > On Mon, Feb 04, 2019 at 03:18:48PM -0500, Nitesh Narayan Lal wrote:
> >> This patch includes the following:
> >> 1. Basic skeleton for the support
> >> 2. Enablement of x86 platform to use the same
> >>
> >> Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>
> >> ---
> >>  arch/x86/Kbuild              |  2 +-
> >>  arch/x86/kvm/Kconfig         |  8 ++++++++
> >>  arch/x86/kvm/Makefile        |  2 ++
> >>  include/linux/gfp.h          |  9 +++++++++
> >>  include/linux/page_hinting.h | 17 +++++++++++++++++
> >>  virt/kvm/page_hinting.c      | 36 ++++++++++++++++++++++++++++++++++++
> >>  6 files changed, 73 insertions(+), 1 deletion(-)
> >>  create mode 100644 include/linux/page_hinting.h
> >>  create mode 100644 virt/kvm/page_hinting.c
> >>
> >> diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
> >> index c625f57472f7..3244df4ee311 100644
> >> --- a/arch/x86/Kbuild
> >> +++ b/arch/x86/Kbuild
> >> @@ -2,7 +2,7 @@ obj-y += entry/
> >>  
> >>  obj-$(CONFIG_PERF_EVENTS) += events/
> >>  
> >> -obj-$(CONFIG_KVM) += kvm/
> >> +obj-$(subst m,y,$(CONFIG_KVM)) += kvm/
> >>  
> >>  # Xen paravirtualization support
> >>  obj-$(CONFIG_XEN) += xen/
> >> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
> >> index 72fa955f4a15..2fae31459706 100644
> >> --- a/arch/x86/kvm/Kconfig
> >> +++ b/arch/x86/kvm/Kconfig
> >> @@ -96,6 +96,14 @@ config KVM_MMU_AUDIT
> >>  	 This option adds a R/W kVM module parameter 'mmu_audit', which allows
> >>  	 auditing of KVM MMU events at runtime.
> >>  
> >> +# KVM_FREE_PAGE_HINTING will allow the guest to report the free pages to the
> >> +# host in regular interval of time.
> >> +config KVM_FREE_PAGE_HINTING
> >> +       def_bool y
> >> +       depends on KVM
> >> +       select VIRTIO
> >> +       select VIRTIO_BALLOON
> >> +
> >>  # OK, it's a little counter-intuitive to do this, but it puts it neatly under
> >>  # the virtualization menu.
> >>  source "drivers/vhost/Kconfig"
> >> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> >> index 69b3a7c30013..78640a80501e 100644
> >> --- a/arch/x86/kvm/Makefile
> >> +++ b/arch/x86/kvm/Makefile
> >> @@ -16,6 +16,8 @@ kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
> >>  			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
> >>  			   hyperv.o page_track.o debugfs.o
> >>  
> >> +obj-$(CONFIG_KVM_FREE_PAGE_HINTING)    += $(KVM)/page_hinting.o
> >> +
> >>  kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
> >>  kvm-amd-y		+= svm.o pmu_amd.o
> >>  
> >> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> >> index 5f5e25fd6149..e596527284ba 100644
> >> --- a/include/linux/gfp.h
> >> +++ b/include/linux/gfp.h
> >> @@ -7,6 +7,7 @@
> >>  #include <linux/stddef.h>
> >>  #include <linux/linkage.h>
> >>  #include <linux/topology.h>
> >> +#include <linux/page_hinting.h>
> >>  
> >>  struct vm_area_struct;
> >>  
> >> @@ -456,6 +457,14 @@ static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
> >>  	return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags);
> >>  }
> >>  
> >> +#ifdef	CONFIG_KVM_FREE_PAGE_HINTING
> >> +#define HAVE_ARCH_FREE_PAGE
> >> +static inline void arch_free_page(struct page *page, int order)
> >> +{
> >> +	guest_free_page(page, order);
> >> +}
> >> +#endif
> >> +
> >>  #ifndef HAVE_ARCH_FREE_PAGE
> >>  static inline void arch_free_page(struct page *page, int order) { }
> >>  #endif
> > OK so arch_free_page hook is used to tie into mm code,
> > with follow-up patches the pages get queued in a list
> > and then sent to hypervisor so it can free them.
> > Fair enough but how do we know the page is
> > not reused by the time it's received by the hypervisor?
> > If it's reused then isn't it a problem that
> > hypervisor calls MADV_DONTNEED on them?
> Hi Michael,
> 
> In order to ensure that the page is not reused, we remove it from the
> buddy free list by acquiring the zone lock. After the page is freed by
> the hypervisor it is returned to the buddy free list again.

Thanks that's good to know. Could you point me to code that does this?

> >
> >
> >> diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h
> >> new file mode 100644
> >> index 000000000000..b54f7428f348
> >> --- /dev/null
> >> +++ b/include/linux/page_hinting.h
> >> @@ -0,0 +1,17 @@
> >> +/*
> >> + * Size of the array which is used to store the freed pages is defined by
> >> + * MAX_FGPT_ENTRIES. If possible, we have to find a better way using which
> >> + * we can get rid of the hardcoded array size.
> >> + */
> >> +#define MAX_FGPT_ENTRIES	1000
> >> +/*
> >> + * hypervisor_pages - It is a dummy structure passed with the hypercall.
> >> + * @pfn: page frame number for the page which needs to be sent to the host.
> >> + * @order: order of the page needs to be reported to the host.
> >> + */
> >> +struct hypervisor_pages {
> >> +	unsigned long pfn;
> >> +	unsigned int order;
> >> +};
> >> +
> >> +void guest_free_page(struct page *page, int order);
> >> diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
> >> new file mode 100644
> >> index 000000000000..818bd6b84e0c
> >> --- /dev/null
> >> +++ b/virt/kvm/page_hinting.c
> >> @@ -0,0 +1,36 @@
> >> +#include <linux/gfp.h>
> >> +#include <linux/mm.h>
> >> +#include <linux/kernel.h>
> >> +
> >> +/*
> >> + * struct kvm_free_pages - Tracks the pages which are freed by the guest.
> >> + * @pfn: page frame number for the page which is freed.
> >> + * @order: order corresponding to the page freed.
> >> + * @zonenum: zone number to which the freed page belongs.
> >> + */
> >> +struct kvm_free_pages {
> >> +	unsigned long pfn;
> >> +	unsigned int order;
> >> +	int zonenum;
> >> +};
> >> +
> >> +/*
> >> + * struct page_hinting - holds array objects for the structures used to track
> >> + * guest free pages, along with an index variable for each of them.
> >> + * @kvm_pt: array object for the structure kvm_free_pages.
> >> + * @kvm_pt_idx: index for kvm_free_pages object.
> >> + * @hypervisor_pagelist: array object for the structure hypervisor_pages.
> >> + * @hyp_idx: index for hypervisor_pages object.
> >> + */
> >> +struct page_hinting {
> >> +	struct kvm_free_pages kvm_pt[MAX_FGPT_ENTRIES];
> >> +	int kvm_pt_idx;
> >> +	struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
> >> +	int hyp_idx;
> >> +};
> >> +
> >> +DEFINE_PER_CPU(struct page_hinting, hinting_obj);
> >> +
> >> +void guest_free_page(struct page *page, int order)
> >> +{
> >> +}
> >> -- 
> >> 2.17.2
> -- 
> Regards
> Nitesh
>
Nitesh Narayan Lal Feb. 5, 2019, 4:34 p.m. UTC | #4
On 2/5/19 11:27 AM, Michael S. Tsirkin wrote:
> On Tue, Feb 05, 2019 at 08:06:33AM -0500, Nitesh Narayan Lal wrote:
>> On 2/4/19 11:14 PM, Michael S. Tsirkin wrote:
>>> On Mon, Feb 04, 2019 at 03:18:48PM -0500, Nitesh Narayan Lal wrote:
>>>> This patch includes the following:
>>>> 1. Basic skeleton for the support
>>>> 2. Enablement of x86 platform to use the same
>>>>
>>>> Signed-off-by: Nitesh Narayan Lal <nitesh@redhat.com>
>>>> ---
>>>>  arch/x86/Kbuild              |  2 +-
>>>>  arch/x86/kvm/Kconfig         |  8 ++++++++
>>>>  arch/x86/kvm/Makefile        |  2 ++
>>>>  include/linux/gfp.h          |  9 +++++++++
>>>>  include/linux/page_hinting.h | 17 +++++++++++++++++
>>>>  virt/kvm/page_hinting.c      | 36 ++++++++++++++++++++++++++++++++++++
>>>>  6 files changed, 73 insertions(+), 1 deletion(-)
>>>>  create mode 100644 include/linux/page_hinting.h
>>>>  create mode 100644 virt/kvm/page_hinting.c
>>>>
>>>> diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
>>>> index c625f57472f7..3244df4ee311 100644
>>>> --- a/arch/x86/Kbuild
>>>> +++ b/arch/x86/Kbuild
>>>> @@ -2,7 +2,7 @@ obj-y += entry/
>>>>  
>>>>  obj-$(CONFIG_PERF_EVENTS) += events/
>>>>  
>>>> -obj-$(CONFIG_KVM) += kvm/
>>>> +obj-$(subst m,y,$(CONFIG_KVM)) += kvm/
>>>>  
>>>>  # Xen paravirtualization support
>>>>  obj-$(CONFIG_XEN) += xen/
>>>> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
>>>> index 72fa955f4a15..2fae31459706 100644
>>>> --- a/arch/x86/kvm/Kconfig
>>>> +++ b/arch/x86/kvm/Kconfig
>>>> @@ -96,6 +96,14 @@ config KVM_MMU_AUDIT
>>>>  	 This option adds a R/W kVM module parameter 'mmu_audit', which allows
>>>>  	 auditing of KVM MMU events at runtime.
>>>>  
>>>> +# KVM_FREE_PAGE_HINTING will allow the guest to report the free pages to the
>>>> +# host in regular interval of time.
>>>> +config KVM_FREE_PAGE_HINTING
>>>> +       def_bool y
>>>> +       depends on KVM
>>>> +       select VIRTIO
>>>> +       select VIRTIO_BALLOON
>>>> +
>>>>  # OK, it's a little counter-intuitive to do this, but it puts it neatly under
>>>>  # the virtualization menu.
>>>>  source "drivers/vhost/Kconfig"
>>>> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
>>>> index 69b3a7c30013..78640a80501e 100644
>>>> --- a/arch/x86/kvm/Makefile
>>>> +++ b/arch/x86/kvm/Makefile
>>>> @@ -16,6 +16,8 @@ kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
>>>>  			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
>>>>  			   hyperv.o page_track.o debugfs.o
>>>>  
>>>> +obj-$(CONFIG_KVM_FREE_PAGE_HINTING)    += $(KVM)/page_hinting.o
>>>> +
>>>>  kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
>>>>  kvm-amd-y		+= svm.o pmu_amd.o
>>>>  
>>>> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
>>>> index 5f5e25fd6149..e596527284ba 100644
>>>> --- a/include/linux/gfp.h
>>>> +++ b/include/linux/gfp.h
>>>> @@ -7,6 +7,7 @@
>>>>  #include <linux/stddef.h>
>>>>  #include <linux/linkage.h>
>>>>  #include <linux/topology.h>
>>>> +#include <linux/page_hinting.h>
>>>>  
>>>>  struct vm_area_struct;
>>>>  
>>>> @@ -456,6 +457,14 @@ static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
>>>>  	return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags);
>>>>  }
>>>>  
>>>> +#ifdef	CONFIG_KVM_FREE_PAGE_HINTING
>>>> +#define HAVE_ARCH_FREE_PAGE
>>>> +static inline void arch_free_page(struct page *page, int order)
>>>> +{
>>>> +	guest_free_page(page, order);
>>>> +}
>>>> +#endif
>>>> +
>>>>  #ifndef HAVE_ARCH_FREE_PAGE
>>>>  static inline void arch_free_page(struct page *page, int order) { }
>>>>  #endif
>>> OK so arch_free_page hook is used to tie into mm code,
>>> with follow-up patches the pages get queued in a list
>>> and then sent to hypervisor so it can free them.
>>> Fair enough but how do we know the page is
>>> not reused by the time it's received by the hypervisor?
>>> If it's reused then isn't it a problem that
>>> hypervisor calls MADV_DONTNEED on them?
>> Hi Michael,
>>
>> In order to ensure that the page is not reused, we remove it from the
>> buddy free list by acquiring the zone lock. After the page is freed by
>> the hypervisor it is returned to the buddy free list again.
> Thanks that's good to know. Could you point me to code that does this?
In Patch 0006-KVM-Enables-the-kernel-to-isolate-and-report-free-page.
hinting_fn() is responsible for scanning the per-cpu-array, acquiring
the lock, isolating the page and invoking hyperlist_ready().
Under hyperlist_ready, the hypercall to report the free pages is made
and once it is done in this function only those pages are returned to
the buddy free list.
>
>>>
>>>> diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h
>>>> new file mode 100644
>>>> index 000000000000..b54f7428f348
>>>> --- /dev/null
>>>> +++ b/include/linux/page_hinting.h
>>>> @@ -0,0 +1,17 @@
>>>> +/*
>>>> + * Size of the array which is used to store the freed pages is defined by
>>>> + * MAX_FGPT_ENTRIES. If possible, we have to find a better way using which
>>>> + * we can get rid of the hardcoded array size.
>>>> + */
>>>> +#define MAX_FGPT_ENTRIES	1000
>>>> +/*
>>>> + * hypervisor_pages - It is a dummy structure passed with the hypercall.
>>>> + * @pfn: page frame number for the page which needs to be sent to the host.
>>>> + * @order: order of the page needs to be reported to the host.
>>>> + */
>>>> +struct hypervisor_pages {
>>>> +	unsigned long pfn;
>>>> +	unsigned int order;
>>>> +};
>>>> +
>>>> +void guest_free_page(struct page *page, int order);
>>>> diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
>>>> new file mode 100644
>>>> index 000000000000..818bd6b84e0c
>>>> --- /dev/null
>>>> +++ b/virt/kvm/page_hinting.c
>>>> @@ -0,0 +1,36 @@
>>>> +#include <linux/gfp.h>
>>>> +#include <linux/mm.h>
>>>> +#include <linux/kernel.h>
>>>> +
>>>> +/*
>>>> + * struct kvm_free_pages - Tracks the pages which are freed by the guest.
>>>> + * @pfn: page frame number for the page which is freed.
>>>> + * @order: order corresponding to the page freed.
>>>> + * @zonenum: zone number to which the freed page belongs.
>>>> + */
>>>> +struct kvm_free_pages {
>>>> +	unsigned long pfn;
>>>> +	unsigned int order;
>>>> +	int zonenum;
>>>> +};
>>>> +
>>>> +/*
>>>> + * struct page_hinting - holds array objects for the structures used to track
>>>> + * guest free pages, along with an index variable for each of them.
>>>> + * @kvm_pt: array object for the structure kvm_free_pages.
>>>> + * @kvm_pt_idx: index for kvm_free_pages object.
>>>> + * @hypervisor_pagelist: array object for the structure hypervisor_pages.
>>>> + * @hyp_idx: index for hypervisor_pages object.
>>>> + */
>>>> +struct page_hinting {
>>>> +	struct kvm_free_pages kvm_pt[MAX_FGPT_ENTRIES];
>>>> +	int kvm_pt_idx;
>>>> +	struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
>>>> +	int hyp_idx;
>>>> +};
>>>> +
>>>> +DEFINE_PER_CPU(struct page_hinting, hinting_obj);
>>>> +
>>>> +void guest_free_page(struct page *page, int order)
>>>> +{
>>>> +}
>>>> -- 
>>>> 2.17.2
>> -- 
>> Regards
>> Nitesh
>>
>
>
diff mbox series

Patch

diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index c625f57472f7..3244df4ee311 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -2,7 +2,7 @@  obj-y += entry/
 
 obj-$(CONFIG_PERF_EVENTS) += events/
 
-obj-$(CONFIG_KVM) += kvm/
+obj-$(subst m,y,$(CONFIG_KVM)) += kvm/
 
 # Xen paravirtualization support
 obj-$(CONFIG_XEN) += xen/
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 72fa955f4a15..2fae31459706 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -96,6 +96,14 @@  config KVM_MMU_AUDIT
 	 This option adds a R/W kVM module parameter 'mmu_audit', which allows
 	 auditing of KVM MMU events at runtime.
 
+# KVM_FREE_PAGE_HINTING will allow the guest to report the free pages to the
+# host in regular interval of time.
+config KVM_FREE_PAGE_HINTING
+       def_bool y
+       depends on KVM
+       select VIRTIO
+       select VIRTIO_BALLOON
+
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
 source "drivers/vhost/Kconfig"
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 69b3a7c30013..78640a80501e 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -16,6 +16,8 @@  kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
 			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
 			   hyperv.o page_track.o debugfs.o
 
+obj-$(CONFIG_KVM_FREE_PAGE_HINTING)    += $(KVM)/page_hinting.o
+
 kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
 kvm-amd-y		+= svm.o pmu_amd.o
 
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 5f5e25fd6149..e596527284ba 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -7,6 +7,7 @@ 
 #include <linux/stddef.h>
 #include <linux/linkage.h>
 #include <linux/topology.h>
+#include <linux/page_hinting.h>
 
 struct vm_area_struct;
 
@@ -456,6 +457,14 @@  static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
 	return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags);
 }
 
+#ifdef	CONFIG_KVM_FREE_PAGE_HINTING
+#define HAVE_ARCH_FREE_PAGE
+static inline void arch_free_page(struct page *page, int order)
+{
+	guest_free_page(page, order);
+}
+#endif
+
 #ifndef HAVE_ARCH_FREE_PAGE
 static inline void arch_free_page(struct page *page, int order) { }
 #endif
diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h
new file mode 100644
index 000000000000..b54f7428f348
--- /dev/null
+++ b/include/linux/page_hinting.h
@@ -0,0 +1,17 @@ 
+/*
+ * Size of the array which is used to store the freed pages is defined by
+ * MAX_FGPT_ENTRIES. If possible, we have to find a better way using which
+ * we can get rid of the hardcoded array size.
+ */
+#define MAX_FGPT_ENTRIES	1000
+/*
+ * hypervisor_pages - It is a dummy structure passed with the hypercall.
+ * @pfn: page frame number for the page which needs to be sent to the host.
+ * @order: order of the page needs to be reported to the host.
+ */
+struct hypervisor_pages {
+	unsigned long pfn;
+	unsigned int order;
+};
+
+void guest_free_page(struct page *page, int order);
diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
new file mode 100644
index 000000000000..818bd6b84e0c
--- /dev/null
+++ b/virt/kvm/page_hinting.c
@@ -0,0 +1,36 @@ 
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+
+/*
+ * struct kvm_free_pages - Tracks the pages which are freed by the guest.
+ * @pfn: page frame number for the page which is freed.
+ * @order: order corresponding to the page freed.
+ * @zonenum: zone number to which the freed page belongs.
+ */
+struct kvm_free_pages {
+	unsigned long pfn;
+	unsigned int order;
+	int zonenum;
+};
+
+/*
+ * struct page_hinting - holds array objects for the structures used to track
+ * guest free pages, along with an index variable for each of them.
+ * @kvm_pt: array object for the structure kvm_free_pages.
+ * @kvm_pt_idx: index for kvm_free_pages object.
+ * @hypervisor_pagelist: array object for the structure hypervisor_pages.
+ * @hyp_idx: index for hypervisor_pages object.
+ */
+struct page_hinting {
+	struct kvm_free_pages kvm_pt[MAX_FGPT_ENTRIES];
+	int kvm_pt_idx;
+	struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
+	int hyp_idx;
+};
+
+DEFINE_PER_CPU(struct page_hinting, hinting_obj);
+
+void guest_free_page(struct page *page, int order)
+{
+}