diff mbox

[2/8] xen/balloon: Move common memory reservation routines to a module

Message ID 20180525153331.31188-3-andr2000@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Oleksandr Andrushchenko May 25, 2018, 3:33 p.m. UTC
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>

Memory {increase|decrease}_reservation and VA mappings update/reset
code used in balloon driver can be made common, so other drivers can
also re-use the same functionality without open-coding.
Create a dedicated module for the shared code and export corresponding
symbols for other kernel modules.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
---
 drivers/xen/Makefile          |   1 +
 drivers/xen/balloon.c         |  71 ++----------------
 drivers/xen/mem-reservation.c | 134 ++++++++++++++++++++++++++++++++++
 include/xen/mem_reservation.h |  29 ++++++++
 4 files changed, 170 insertions(+), 65 deletions(-)
 create mode 100644 drivers/xen/mem-reservation.c
 create mode 100644 include/xen/mem_reservation.h

Comments

Boris Ostrovsky May 29, 2018, 6:04 p.m. UTC | #1
On 05/25/2018 11:33 AM, Oleksandr Andrushchenko wrote:
> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>
> Memory {increase|decrease}_reservation and VA mappings update/reset
> code used in balloon driver can be made common, so other drivers can
> also re-use the same functionality without open-coding.
> Create a dedicated module 

IIUIC this is not really a module, it's a common file.


> for the shared code and export corresponding
> symbols for other kernel modules.
>
> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
> ---
>  drivers/xen/Makefile          |   1 +
>  drivers/xen/balloon.c         |  71 ++----------------
>  drivers/xen/mem-reservation.c | 134 ++++++++++++++++++++++++++++++++++
>  include/xen/mem_reservation.h |  29 ++++++++
>  4 files changed, 170 insertions(+), 65 deletions(-)
>  create mode 100644 drivers/xen/mem-reservation.c
>  create mode 100644 include/xen/mem_reservation.h
>
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index 451e833f5931..3c87b0c3aca6 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -2,6 +2,7 @@
>  obj-$(CONFIG_HOTPLUG_CPU)		+= cpu_hotplug.o
>  obj-$(CONFIG_X86)			+= fallback.o
>  obj-y	+= grant-table.o features.o balloon.o manage.o preempt.o time.o
> +obj-y	+= mem-reservation.o
>  obj-y	+= events/
>  obj-y	+= xenbus/
>  
> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
> index 065f0b607373..57b482d67a3a 100644
> --- a/drivers/xen/balloon.c
> +++ b/drivers/xen/balloon.c
> @@ -71,6 +71,7 @@
>  #include <xen/balloon.h>
>  #include <xen/features.h>
>  #include <xen/page.h>
> +#include <xen/mem_reservation.h>
>  
>  static int xen_hotplug_unpopulated;
>  
> @@ -157,13 +158,6 @@ static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
>  #define GFP_BALLOON \
>  	(GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
>  
> -static void scrub_page(struct page *page)
> -{
> -#ifdef CONFIG_XEN_SCRUB_PAGES
> -	clear_highpage(page);
> -#endif
> -}
> -
>  /* balloon_append: add the given page to the balloon. */
>  static void __balloon_append(struct page *page)
>  {
> @@ -463,11 +457,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
>  	int rc;
>  	unsigned long i;
>  	struct page   *page;
> -	struct xen_memory_reservation reservation = {
> -		.address_bits = 0,
> -		.extent_order = EXTENT_ORDER,
> -		.domid        = DOMID_SELF
> -	};
>  
>  	if (nr_pages > ARRAY_SIZE(frame_list))
>  		nr_pages = ARRAY_SIZE(frame_list);
> @@ -486,9 +475,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
>  		page = balloon_next_page(page);
>  	}
>  
> -	set_xen_guest_handle(reservation.extent_start, frame_list);
> -	reservation.nr_extents = nr_pages;
> -	rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
> +	rc = xenmem_reservation_increase(nr_pages, frame_list);
>  	if (rc <= 0)
>  		return BP_EAGAIN;
>  
> @@ -496,29 +483,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
>  		page = balloon_retrieve(false);
>  		BUG_ON(page == NULL);
>  
> -#ifdef CONFIG_XEN_HAVE_PVMMU
> -		/*
> -		 * We don't support PV MMU when Linux and Xen is using
> -		 * different page granularity.
> -		 */
> -		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
> -
> -		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> -			unsigned long pfn = page_to_pfn(page);
> -
> -			set_phys_to_machine(pfn, frame_list[i]);
> -
> -			/* Link back into the page tables if not highmem. */
> -			if (!PageHighMem(page)) {
> -				int ret;
> -				ret = HYPERVISOR_update_va_mapping(
> -						(unsigned long)__va(pfn << PAGE_SHIFT),
> -						mfn_pte(frame_list[i], PAGE_KERNEL),
> -						0);
> -				BUG_ON(ret);
> -			}
> -		}
> -#endif
> +		xenmem_reservation_va_mapping_update(1, &page, &frame_list[i]);


Can you make a single call to xenmem_reservation_va_mapping_update(rc,
...)? You need to keep track of pages but presumable they can be put
into an array (or a list). In fact, perhaps we can have
balloon_retrieve() return a set of pages.




>  
>  		/* Relinquish the page back to the allocator. */
>  		free_reserved_page(page);
> @@ -535,11 +500,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>  	unsigned long i;
>  	struct page *page, *tmp;
>  	int ret;
> -	struct xen_memory_reservation reservation = {
> -		.address_bits = 0,
> -		.extent_order = EXTENT_ORDER,
> -		.domid        = DOMID_SELF
> -	};
>  	LIST_HEAD(pages);
>  
>  	if (nr_pages > ARRAY_SIZE(frame_list))
> @@ -553,7 +513,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>  			break;
>  		}
>  		adjust_managed_page_count(page, -1);
> -		scrub_page(page);
> +		xenmem_reservation_scrub_page(page);
>  		list_add(&page->lru, &pages);
>  	}
>  
> @@ -575,25 +535,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>  		/* XENMEM_decrease_reservation requires a GFN */
>  		frame_list[i++] = xen_page_to_gfn(page);
>  
> -#ifdef CONFIG_XEN_HAVE_PVMMU
> -		/*
> -		 * We don't support PV MMU when Linux and Xen is using
> -		 * different page granularity.
> -		 */
> -		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
> -
> -		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> -			unsigned long pfn = page_to_pfn(page);
> +		xenmem_reservation_va_mapping_reset(1, &page);


and here too.


>  
> -			if (!PageHighMem(page)) {
> -				ret = HYPERVISOR_update_va_mapping(
> -						(unsigned long)__va(pfn << PAGE_SHIFT),
> -						__pte_ma(0), 0);
> -				BUG_ON(ret);
> -			}
> -			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
> -		}
> -#endif
>  		list_del(&page->lru);
>  
>  		balloon_append(page);
> @@ -601,9 +544,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>  
>  	flush_tlb_all();
>  
> -	set_xen_guest_handle(reservation.extent_start, frame_list);
> -	reservation.nr_extents   = nr_pages;
> -	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
> +	ret = xenmem_reservation_decrease(nr_pages, frame_list);
>  	BUG_ON(ret != nr_pages);
>  
>  	balloon_stats.current_pages -= nr_pages;
> diff --git a/drivers/xen/mem-reservation.c b/drivers/xen/mem-reservation.c
> new file mode 100644
> index 000000000000..29882e4324f5
> --- /dev/null
> +++ b/drivers/xen/mem-reservation.c
> @@ -0,0 +1,134 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT


Why is this "OR MIT"? The original file was licensed GPLv2 only.


> +
> +/******************************************************************************
> + * Xen memory reservation utilities.
> + *
> + * Copyright (c) 2003, B Dragovic
> + * Copyright (c) 2003-2004, M Williamson, K Fraser
> + * Copyright (c) 2005 Dan M. Smith, IBM Corporation
> + * Copyright (c) 2010 Daniel Kiper
> + * Copyright (c) 2018, Oleksandr Andrushchenko, EPAM Systems Inc.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/slab.h>
> +
> +#include <asm/tlb.h>
> +#include <asm/xen/hypercall.h>
> +
> +#include <xen/interface/memory.h>
> +#include <xen/page.h>
> +
> +/*
> + * Use one extent per PAGE_SIZE to avoid to break down the page into
> + * multiple frame.
> + */
> +#define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1)
> +
> +void xenmem_reservation_scrub_page(struct page *page)
> +{
> +#ifdef CONFIG_XEN_SCRUB_PAGES
> +	clear_highpage(page);
> +#endif
> +}
> +EXPORT_SYMBOL(xenmem_reservation_scrub_page);
> +
> +void xenmem_reservation_va_mapping_update(unsigned long count,
> +					  struct page **pages,
> +					  xen_pfn_t *frames)
> +{
> +#ifdef CONFIG_XEN_HAVE_PVMMU
> +	int i;
> +
> +	for (i = 0; i < count; i++) {
> +		struct page *page;
> +
> +		page = pages[i];
> +		BUG_ON(page == NULL);
> +
> +		/*
> +		 * We don't support PV MMU when Linux and Xen is using
> +		 * different page granularity.
> +		 */
> +		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
> +
> +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> +			unsigned long pfn = page_to_pfn(page);
> +
> +			set_phys_to_machine(pfn, frames[i]);
> +
> +			/* Link back into the page tables if not highmem. */
> +			if (!PageHighMem(page)) {
> +				int ret;
> +
> +				ret = HYPERVISOR_update_va_mapping(
> +						(unsigned long)__va(pfn << PAGE_SHIFT),
> +						mfn_pte(frames[i], PAGE_KERNEL),
> +						0);
> +				BUG_ON(ret);
> +			}
> +		}
> +	}
> +#endif
> +}
> +EXPORT_SYMBOL(xenmem_reservation_va_mapping_update);
> +
> +void xenmem_reservation_va_mapping_reset(unsigned long count,
> +					 struct page **pages)
> +{
> +#ifdef CONFIG_XEN_HAVE_PVMMU
> +	int i;
> +
> +	for (i = 0; i < count; i++) {
> +		/*
> +		 * We don't support PV MMU when Linux and Xen is using
> +		 * different page granularity.
> +		 */
> +		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
> +
> +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> +			struct page *page = pages[i];
> +			unsigned long pfn = page_to_pfn(page);
> +
> +			if (!PageHighMem(page)) {
> +				int ret;
> +
> +				ret = HYPERVISOR_update_va_mapping(
> +						(unsigned long)__va(pfn << PAGE_SHIFT),
> +						__pte_ma(0), 0);
> +				BUG_ON(ret);
> +			}
> +			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
> +		}
> +	}
> +#endif
> +}
> +EXPORT_SYMBOL(xenmem_reservation_va_mapping_reset);
> +
> +int xenmem_reservation_increase(int count, xen_pfn_t *frames)
> +{
> +	struct xen_memory_reservation reservation = {
> +		.address_bits = 0,
> +		.extent_order = EXTENT_ORDER,
> +		.domid        = DOMID_SELF
> +	};
> +
> +	set_xen_guest_handle(reservation.extent_start, frames);
> +	reservation.nr_extents = count;
> +	return HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
> +}
> +EXPORT_SYMBOL(xenmem_reservation_increase);
> +
> +int xenmem_reservation_decrease(int count, xen_pfn_t *frames)
> +{
> +	struct xen_memory_reservation reservation = {
> +		.address_bits = 0,
> +		.extent_order = EXTENT_ORDER,
> +		.domid        = DOMID_SELF
> +	};
> +
> +	set_xen_guest_handle(reservation.extent_start, frames);
> +	reservation.nr_extents = count;
> +	return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
> +}
> +EXPORT_SYMBOL(xenmem_reservation_decrease);
> diff --git a/include/xen/mem_reservation.h b/include/xen/mem_reservation.h
> new file mode 100644
> index 000000000000..9306d9b8743c
> --- /dev/null
> +++ b/include/xen/mem_reservation.h
> @@ -0,0 +1,29 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */

and here too.


-boris


> +
> +/*
> + * Xen memory reservation utilities.
> + *
> + * Copyright (c) 2003, B Dragovic
> + * Copyright (c) 2003-2004, M Williamson, K Fraser
> + * Copyright (c) 2005 Dan M. Smith, IBM Corporation
> + * Copyright (c) 2010 Daniel Kiper
> + * Copyright (c) 2018, Oleksandr Andrushchenko, EPAM Systems Inc.
> + */
> +
> +#ifndef _XENMEM_RESERVATION_H
> +#define _XENMEM_RESERVATION_H
> +
> +void xenmem_reservation_scrub_page(struct page *page);
> +
> +void xenmem_reservation_va_mapping_update(unsigned long count,
> +					  struct page **pages,
> +					  xen_pfn_t *frames);
> +
> +void xenmem_reservation_va_mapping_reset(unsigned long count,
> +					 struct page **pages);
> +
> +int xenmem_reservation_increase(int count, xen_pfn_t *frames);
> +
> +int xenmem_reservation_decrease(int count, xen_pfn_t *frames);
> +
> +#endif
Oleksandr Andrushchenko May 29, 2018, 6:22 p.m. UTC | #2
On 05/29/2018 09:04 PM, Boris Ostrovsky wrote:
> On 05/25/2018 11:33 AM, Oleksandr Andrushchenko wrote:
>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>
>> Memory {increase|decrease}_reservation and VA mappings update/reset
>> code used in balloon driver can be made common, so other drivers can
>> also re-use the same functionality without open-coding.
>> Create a dedicated module
> IIUIC this is not really a module, it's a common file.
Sure, will put "file" here
>
>> for the shared code and export corresponding
>> symbols for other kernel modules.
>>
>> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>> ---
>>   drivers/xen/Makefile          |   1 +
>>   drivers/xen/balloon.c         |  71 ++----------------
>>   drivers/xen/mem-reservation.c | 134 ++++++++++++++++++++++++++++++++++
>>   include/xen/mem_reservation.h |  29 ++++++++
>>   4 files changed, 170 insertions(+), 65 deletions(-)
>>   create mode 100644 drivers/xen/mem-reservation.c
>>   create mode 100644 include/xen/mem_reservation.h
>>
>> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
>> index 451e833f5931..3c87b0c3aca6 100644
>> --- a/drivers/xen/Makefile
>> +++ b/drivers/xen/Makefile
>> @@ -2,6 +2,7 @@
>>   obj-$(CONFIG_HOTPLUG_CPU)		+= cpu_hotplug.o
>>   obj-$(CONFIG_X86)			+= fallback.o
>>   obj-y	+= grant-table.o features.o balloon.o manage.o preempt.o time.o
>> +obj-y	+= mem-reservation.o
>>   obj-y	+= events/
>>   obj-y	+= xenbus/
>>   
>> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
>> index 065f0b607373..57b482d67a3a 100644
>> --- a/drivers/xen/balloon.c
>> +++ b/drivers/xen/balloon.c
>> @@ -71,6 +71,7 @@
>>   #include <xen/balloon.h>
>>   #include <xen/features.h>
>>   #include <xen/page.h>
>> +#include <xen/mem_reservation.h>
>>   
>>   static int xen_hotplug_unpopulated;
>>   
>> @@ -157,13 +158,6 @@ static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
>>   #define GFP_BALLOON \
>>   	(GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
>>   
>> -static void scrub_page(struct page *page)
>> -{
>> -#ifdef CONFIG_XEN_SCRUB_PAGES
>> -	clear_highpage(page);
>> -#endif
>> -}
>> -
>>   /* balloon_append: add the given page to the balloon. */
>>   static void __balloon_append(struct page *page)
>>   {
>> @@ -463,11 +457,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
>>   	int rc;
>>   	unsigned long i;
>>   	struct page   *page;
>> -	struct xen_memory_reservation reservation = {
>> -		.address_bits = 0,
>> -		.extent_order = EXTENT_ORDER,
>> -		.domid        = DOMID_SELF
>> -	};
>>   
>>   	if (nr_pages > ARRAY_SIZE(frame_list))
>>   		nr_pages = ARRAY_SIZE(frame_list);
>> @@ -486,9 +475,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
>>   		page = balloon_next_page(page);
>>   	}
>>   
>> -	set_xen_guest_handle(reservation.extent_start, frame_list);
>> -	reservation.nr_extents = nr_pages;
>> -	rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
>> +	rc = xenmem_reservation_increase(nr_pages, frame_list);
>>   	if (rc <= 0)
>>   		return BP_EAGAIN;
>>   
>> @@ -496,29 +483,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
>>   		page = balloon_retrieve(false);
>>   		BUG_ON(page == NULL);
>>   
>> -#ifdef CONFIG_XEN_HAVE_PVMMU
>> -		/*
>> -		 * We don't support PV MMU when Linux and Xen is using
>> -		 * different page granularity.
>> -		 */
>> -		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
>> -
>> -		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>> -			unsigned long pfn = page_to_pfn(page);
>> -
>> -			set_phys_to_machine(pfn, frame_list[i]);
>> -
>> -			/* Link back into the page tables if not highmem. */
>> -			if (!PageHighMem(page)) {
>> -				int ret;
>> -				ret = HYPERVISOR_update_va_mapping(
>> -						(unsigned long)__va(pfn << PAGE_SHIFT),
>> -						mfn_pte(frame_list[i], PAGE_KERNEL),
>> -						0);
>> -				BUG_ON(ret);
>> -			}
>> -		}
>> -#endif
>> +		xenmem_reservation_va_mapping_update(1, &page, &frame_list[i]);
>
> Can you make a single call to xenmem_reservation_va_mapping_update(rc,
> ...)? You need to keep track of pages but presumable they can be put
> into an array (or a list). In fact, perhaps we can have
> balloon_retrieve() return a set of pages.
This is actually how it is used later on for dma-buf, but I just didn't want
to alter original balloon code too much, but this can be done, in order 
of simplicity:

1. Similar to frame_list, e.g. static array of struct page* of size 
ARRAY_SIZE(frame_list):
more static memory is used, but no allocations

2. Allocated at run-time with kcalloc: allocation can fail

3. Make balloon_retrieve() return a set of pages: will require 
list/array allocation
and handling, allocation may fail, balloon_retrieve prototype change

Could you please tell which of the above will fit better?

>
>
>
>>   
>>   		/* Relinquish the page back to the allocator. */
>>   		free_reserved_page(page);
>> @@ -535,11 +500,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>   	unsigned long i;
>>   	struct page *page, *tmp;
>>   	int ret;
>> -	struct xen_memory_reservation reservation = {
>> -		.address_bits = 0,
>> -		.extent_order = EXTENT_ORDER,
>> -		.domid        = DOMID_SELF
>> -	};
>>   	LIST_HEAD(pages);
>>   
>>   	if (nr_pages > ARRAY_SIZE(frame_list))
>> @@ -553,7 +513,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>   			break;
>>   		}
>>   		adjust_managed_page_count(page, -1);
>> -		scrub_page(page);
>> +		xenmem_reservation_scrub_page(page);
>>   		list_add(&page->lru, &pages);
>>   	}
>>   
>> @@ -575,25 +535,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>   		/* XENMEM_decrease_reservation requires a GFN */
>>   		frame_list[i++] = xen_page_to_gfn(page);
>>   
>> -#ifdef CONFIG_XEN_HAVE_PVMMU
>> -		/*
>> -		 * We don't support PV MMU when Linux and Xen is using
>> -		 * different page granularity.
>> -		 */
>> -		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
>> -
>> -		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>> -			unsigned long pfn = page_to_pfn(page);
>> +		xenmem_reservation_va_mapping_reset(1, &page);
>
> and here too.
see above
>
>>   
>> -			if (!PageHighMem(page)) {
>> -				ret = HYPERVISOR_update_va_mapping(
>> -						(unsigned long)__va(pfn << PAGE_SHIFT),
>> -						__pte_ma(0), 0);
>> -				BUG_ON(ret);
>> -			}
>> -			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
>> -		}
>> -#endif
>>   		list_del(&page->lru);
>>   
>>   		balloon_append(page);
>> @@ -601,9 +544,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>   
>>   	flush_tlb_all();
>>   
>> -	set_xen_guest_handle(reservation.extent_start, frame_list);
>> -	reservation.nr_extents   = nr_pages;
>> -	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
>> +	ret = xenmem_reservation_decrease(nr_pages, frame_list);
>>   	BUG_ON(ret != nr_pages);
>>   
>>   	balloon_stats.current_pages -= nr_pages;
>> diff --git a/drivers/xen/mem-reservation.c b/drivers/xen/mem-reservation.c
>> new file mode 100644
>> index 000000000000..29882e4324f5
>> --- /dev/null
>> +++ b/drivers/xen/mem-reservation.c
>> @@ -0,0 +1,134 @@
>> +// SPDX-License-Identifier: GPL-2.0 OR MIT
>
> Why is this "OR MIT"? The original file was licensed GPLv2 only.
Will fix - I was not sure about the license to be used here, thanks
>
>> +
>> +/******************************************************************************
>> + * Xen memory reservation utilities.
>> + *
>> + * Copyright (c) 2003, B Dragovic
>> + * Copyright (c) 2003-2004, M Williamson, K Fraser
>> + * Copyright (c) 2005 Dan M. Smith, IBM Corporation
>> + * Copyright (c) 2010 Daniel Kiper
>> + * Copyright (c) 2018, Oleksandr Andrushchenko, EPAM Systems Inc.
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/slab.h>
>> +
>> +#include <asm/tlb.h>
>> +#include <asm/xen/hypercall.h>
>> +
>> +#include <xen/interface/memory.h>
>> +#include <xen/page.h>
>> +
>> +/*
>> + * Use one extent per PAGE_SIZE to avoid to break down the page into
>> + * multiple frame.
>> + */
>> +#define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1)
>> +
>> +void xenmem_reservation_scrub_page(struct page *page)
>> +{
>> +#ifdef CONFIG_XEN_SCRUB_PAGES
>> +	clear_highpage(page);
>> +#endif
>> +}
>> +EXPORT_SYMBOL(xenmem_reservation_scrub_page);
>> +
>> +void xenmem_reservation_va_mapping_update(unsigned long count,
>> +					  struct page **pages,
>> +					  xen_pfn_t *frames)
>> +{
>> +#ifdef CONFIG_XEN_HAVE_PVMMU
>> +	int i;
>> +
>> +	for (i = 0; i < count; i++) {
>> +		struct page *page;
>> +
>> +		page = pages[i];
>> +		BUG_ON(page == NULL);
>> +
>> +		/*
>> +		 * We don't support PV MMU when Linux and Xen is using
>> +		 * different page granularity.
>> +		 */
>> +		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
>> +
>> +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>> +			unsigned long pfn = page_to_pfn(page);
>> +
>> +			set_phys_to_machine(pfn, frames[i]);
>> +
>> +			/* Link back into the page tables if not highmem. */
>> +			if (!PageHighMem(page)) {
>> +				int ret;
>> +
>> +				ret = HYPERVISOR_update_va_mapping(
>> +						(unsigned long)__va(pfn << PAGE_SHIFT),
>> +						mfn_pte(frames[i], PAGE_KERNEL),
>> +						0);
>> +				BUG_ON(ret);
>> +			}
>> +		}
>> +	}
>> +#endif
>> +}
>> +EXPORT_SYMBOL(xenmem_reservation_va_mapping_update);
>> +
>> +void xenmem_reservation_va_mapping_reset(unsigned long count,
>> +					 struct page **pages)
>> +{
>> +#ifdef CONFIG_XEN_HAVE_PVMMU
>> +	int i;
>> +
>> +	for (i = 0; i < count; i++) {
>> +		/*
>> +		 * We don't support PV MMU when Linux and Xen is using
>> +		 * different page granularity.
>> +		 */
>> +		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
>> +
>> +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>> +			struct page *page = pages[i];
>> +			unsigned long pfn = page_to_pfn(page);
>> +
>> +			if (!PageHighMem(page)) {
>> +				int ret;
>> +
>> +				ret = HYPERVISOR_update_va_mapping(
>> +						(unsigned long)__va(pfn << PAGE_SHIFT),
>> +						__pte_ma(0), 0);
>> +				BUG_ON(ret);
>> +			}
>> +			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
>> +		}
>> +	}
>> +#endif
>> +}
>> +EXPORT_SYMBOL(xenmem_reservation_va_mapping_reset);
>> +
>> +int xenmem_reservation_increase(int count, xen_pfn_t *frames)
>> +{
>> +	struct xen_memory_reservation reservation = {
>> +		.address_bits = 0,
>> +		.extent_order = EXTENT_ORDER,
>> +		.domid        = DOMID_SELF
>> +	};
>> +
>> +	set_xen_guest_handle(reservation.extent_start, frames);
>> +	reservation.nr_extents = count;
>> +	return HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
>> +}
>> +EXPORT_SYMBOL(xenmem_reservation_increase);
>> +
>> +int xenmem_reservation_decrease(int count, xen_pfn_t *frames)
>> +{
>> +	struct xen_memory_reservation reservation = {
>> +		.address_bits = 0,
>> +		.extent_order = EXTENT_ORDER,
>> +		.domid        = DOMID_SELF
>> +	};
>> +
>> +	set_xen_guest_handle(reservation.extent_start, frames);
>> +	reservation.nr_extents = count;
>> +	return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
>> +}
>> +EXPORT_SYMBOL(xenmem_reservation_decrease);
>> diff --git a/include/xen/mem_reservation.h b/include/xen/mem_reservation.h
>> new file mode 100644
>> index 000000000000..9306d9b8743c
>> --- /dev/null
>> +++ b/include/xen/mem_reservation.h
>> @@ -0,0 +1,29 @@
>> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> and here too.
will fix
>
> -boris
>
Thank you,
Oleksandr
>> +
>> +/*
>> + * Xen memory reservation utilities.
>> + *
>> + * Copyright (c) 2003, B Dragovic
>> + * Copyright (c) 2003-2004, M Williamson, K Fraser
>> + * Copyright (c) 2005 Dan M. Smith, IBM Corporation
>> + * Copyright (c) 2010 Daniel Kiper
>> + * Copyright (c) 2018, Oleksandr Andrushchenko, EPAM Systems Inc.
>> + */
>> +
>> +#ifndef _XENMEM_RESERVATION_H
>> +#define _XENMEM_RESERVATION_H
>> +
>> +void xenmem_reservation_scrub_page(struct page *page);
>> +
>> +void xenmem_reservation_va_mapping_update(unsigned long count,
>> +					  struct page **pages,
>> +					  xen_pfn_t *frames);
>> +
>> +void xenmem_reservation_va_mapping_reset(unsigned long count,
>> +					 struct page **pages);
>> +
>> +int xenmem_reservation_increase(int count, xen_pfn_t *frames);
>> +
>> +int xenmem_reservation_decrease(int count, xen_pfn_t *frames);
>> +
>> +#endif
Oleksandr Andrushchenko May 29, 2018, 6:23 p.m. UTC | #3
On 05/29/2018 09:24 PM, Boris Ostrovsky wrote:
> On 05/25/2018 11:33 AM, Oleksandr Andrushchenko wrote:
>> +
>> +void xenmem_reservation_va_mapping_update(unsigned long count,
>> +					  struct page **pages,
>> +					  xen_pfn_t *frames)
>> +{
>> +#ifdef CONFIG_XEN_HAVE_PVMMU
>> +	int i;
>> +
>> +	for (i = 0; i < count; i++) {
>> +		struct page *page;
>> +
>> +		page = pages[i];
>> +		BUG_ON(page == NULL);
>> +
>> +		/*
>> +		 * We don't support PV MMU when Linux and Xen is using
>> +		 * different page granularity.
>> +		 */
>> +		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
>> +
>> +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>> +			unsigned long pfn = page_to_pfn(page);
>> +
>> +			set_phys_to_machine(pfn, frames[i]);
>> +
>> +			/* Link back into the page tables if not highmem. */
>> +			if (!PageHighMem(page)) {
>> +				int ret;
>> +
>> +				ret = HYPERVISOR_update_va_mapping(
>> +						(unsigned long)__va(pfn << PAGE_SHIFT),
>> +						mfn_pte(frames[i], PAGE_KERNEL),
>> +						0);
>> +				BUG_ON(ret);
>> +			}
>> +		}
>> +	}
>> +#endif
>> +}
>> +EXPORT_SYMBOL(xenmem_reservation_va_mapping_update);
>> +
>> +void xenmem_reservation_va_mapping_reset(unsigned long count,
>> +					 struct page **pages)
>> +{
>> +#ifdef CONFIG_XEN_HAVE_PVMMU
>> +	int i;
>> +
>> +	for (i = 0; i < count; i++) {
>> +		/*
>> +		 * We don't support PV MMU when Linux and Xen is using
>> +		 * different page granularity.
>> +		 */
>> +		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
>> +
>> +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>> +			struct page *page = pages[i];
>> +			unsigned long pfn = page_to_pfn(page);
>> +
>> +			if (!PageHighMem(page)) {
>> +				int ret;
>> +
>> +				ret = HYPERVISOR_update_va_mapping(
>> +						(unsigned long)__va(pfn << PAGE_SHIFT),
>> +						__pte_ma(0), 0);
>> +				BUG_ON(ret);
>> +			}
>> +			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
>> +		}
>> +	}
>> +#endif
>> +}
>> +EXPORT_SYMBOL(xenmem_reservation_va_mapping_reset);
> One other thing I noticed --- both of these can be declared as NOPs in
> the header file if !CONFIG_XEN_HAVE_PVMMU.
Will rework it to be NOp for !CONFIG_XEN_HAVE_PVMMU
> -boris
Boris Ostrovsky May 29, 2018, 6:24 p.m. UTC | #4
On 05/25/2018 11:33 AM, Oleksandr Andrushchenko wrote:
> +
> +void xenmem_reservation_va_mapping_update(unsigned long count,
> +					  struct page **pages,
> +					  xen_pfn_t *frames)
> +{
> +#ifdef CONFIG_XEN_HAVE_PVMMU
> +	int i;
> +
> +	for (i = 0; i < count; i++) {
> +		struct page *page;
> +
> +		page = pages[i];
> +		BUG_ON(page == NULL);
> +
> +		/*
> +		 * We don't support PV MMU when Linux and Xen is using
> +		 * different page granularity.
> +		 */
> +		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
> +
> +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> +			unsigned long pfn = page_to_pfn(page);
> +
> +			set_phys_to_machine(pfn, frames[i]);
> +
> +			/* Link back into the page tables if not highmem. */
> +			if (!PageHighMem(page)) {
> +				int ret;
> +
> +				ret = HYPERVISOR_update_va_mapping(
> +						(unsigned long)__va(pfn << PAGE_SHIFT),
> +						mfn_pte(frames[i], PAGE_KERNEL),
> +						0);
> +				BUG_ON(ret);
> +			}
> +		}
> +	}
> +#endif
> +}
> +EXPORT_SYMBOL(xenmem_reservation_va_mapping_update);
> +
> +void xenmem_reservation_va_mapping_reset(unsigned long count,
> +					 struct page **pages)
> +{
> +#ifdef CONFIG_XEN_HAVE_PVMMU
> +	int i;
> +
> +	for (i = 0; i < count; i++) {
> +		/*
> +		 * We don't support PV MMU when Linux and Xen is using
> +		 * different page granularity.
> +		 */
> +		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
> +
> +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> +			struct page *page = pages[i];
> +			unsigned long pfn = page_to_pfn(page);
> +
> +			if (!PageHighMem(page)) {
> +				int ret;
> +
> +				ret = HYPERVISOR_update_va_mapping(
> +						(unsigned long)__va(pfn << PAGE_SHIFT),
> +						__pte_ma(0), 0);
> +				BUG_ON(ret);
> +			}
> +			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
> +		}
> +	}
> +#endif
> +}
> +EXPORT_SYMBOL(xenmem_reservation_va_mapping_reset);

One other thing I noticed --- both of these can be declared as NOPs in
the header file if !CONFIG_XEN_HAVE_PVMMU.

-boris
Boris Ostrovsky May 29, 2018, 8:03 p.m. UTC | #5
On 05/29/2018 02:22 PM, Oleksandr Andrushchenko wrote:
> On 05/29/2018 09:04 PM, Boris Ostrovsky wrote:
>> On 05/25/2018 11:33 AM, Oleksandr Andrushchenko wrote:

>> @@ -463,11 +457,6 @@ static enum bp_state
>> increase_reservation(unsigned long nr_pages)
>>       int rc;
>>       unsigned long i;
>>       struct page   *page;
>> -    struct xen_memory_reservation reservation = {
>> -        .address_bits = 0,
>> -        .extent_order = EXTENT_ORDER,
>> -        .domid        = DOMID_SELF
>> -    };
>>         if (nr_pages > ARRAY_SIZE(frame_list))
>>           nr_pages = ARRAY_SIZE(frame_list);
>> @@ -486,9 +475,7 @@ static enum bp_state
>> increase_reservation(unsigned long nr_pages)
>>           page = balloon_next_page(page);
>>       }
>>   -    set_xen_guest_handle(reservation.extent_start, frame_list);
>> -    reservation.nr_extents = nr_pages;
>> -    rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
>> +    rc = xenmem_reservation_increase(nr_pages, frame_list);
>>       if (rc <= 0)
>>           return BP_EAGAIN;
>>   @@ -496,29 +483,7 @@ static enum bp_state
>> increase_reservation(unsigned long nr_pages)
>>           page = balloon_retrieve(false);
>>           BUG_ON(page == NULL);
>>   -#ifdef CONFIG_XEN_HAVE_PVMMU
>> -        /*
>> -         * We don't support PV MMU when Linux and Xen is using
>> -         * different page granularity.
>> -         */
>> -        BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
>> -
>> -        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>> -            unsigned long pfn = page_to_pfn(page);
>> -
>> -            set_phys_to_machine(pfn, frame_list[i]);
>> -
>> -            /* Link back into the page tables if not highmem. */
>> -            if (!PageHighMem(page)) {
>> -                int ret;
>> -                ret = HYPERVISOR_update_va_mapping(
>> -                        (unsigned long)__va(pfn << PAGE_SHIFT),
>> -                        mfn_pte(frame_list[i], PAGE_KERNEL),
>> -                        0);
>> -                BUG_ON(ret);
>> -            }
>> -        }
>> -#endif
>> +        xenmem_reservation_va_mapping_update(1, &page, &frame_list[i]);
>>
>> Can you make a single call to xenmem_reservation_va_mapping_update(rc,
>> ...)? You need to keep track of pages but presumable they can be put
>> into an array (or a list). In fact, perhaps we can have
>> balloon_retrieve() return a set of pages.
> This is actually how it is used later on for dma-buf, but I just
> didn't want
> to alter original balloon code too much, but this can be done, in
> order of simplicity:
>
> 1. Similar to frame_list, e.g. static array of struct page* of size
> ARRAY_SIZE(frame_list):
> more static memory is used, but no allocations
>
> 2. Allocated at run-time with kcalloc: allocation can fail


If this is called in freeing DMA buffer code path or in error path then
we shouldn't do it.


>
> 3. Make balloon_retrieve() return a set of pages: will require
> list/array allocation
> and handling, allocation may fail, balloon_retrieve prototype change


balloon pages are strung on the lru list. Can we keep have
balloon_retrieve return a list of pages on that list?

-boris


>
> Could you please tell which of the above will fit better?
>
>>
>>
Jürgen Groß May 30, 2018, 4:32 a.m. UTC | #6
On 25/05/18 17:33, Oleksandr Andrushchenko wrote:
> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
> 
> Memory {increase|decrease}_reservation and VA mappings update/reset
> code used in balloon driver can be made common, so other drivers can
> also re-use the same functionality without open-coding.
> Create a dedicated module for the shared code and export corresponding
> symbols for other kernel modules.
> 
> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
> ---
>  drivers/xen/Makefile          |   1 +
>  drivers/xen/balloon.c         |  71 ++----------------
>  drivers/xen/mem-reservation.c | 134 ++++++++++++++++++++++++++++++++++
>  include/xen/mem_reservation.h |  29 ++++++++
>  4 files changed, 170 insertions(+), 65 deletions(-)
>  create mode 100644 drivers/xen/mem-reservation.c
>  create mode 100644 include/xen/mem_reservation.h

Can you please name this include/xen/mem-reservation.h ?

> 
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index 451e833f5931..3c87b0c3aca6 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -2,6 +2,7 @@
>  obj-$(CONFIG_HOTPLUG_CPU)		+= cpu_hotplug.o
>  obj-$(CONFIG_X86)			+= fallback.o
>  obj-y	+= grant-table.o features.o balloon.o manage.o preempt.o time.o
> +obj-y	+= mem-reservation.o
>  obj-y	+= events/
>  obj-y	+= xenbus/
>  
> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
> index 065f0b607373..57b482d67a3a 100644
> --- a/drivers/xen/balloon.c
> +++ b/drivers/xen/balloon.c
> @@ -71,6 +71,7 @@
>  #include <xen/balloon.h>
>  #include <xen/features.h>
>  #include <xen/page.h>
> +#include <xen/mem_reservation.h>
>  
>  static int xen_hotplug_unpopulated;
>  
> @@ -157,13 +158,6 @@ static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
>  #define GFP_BALLOON \
>  	(GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
>  
> -static void scrub_page(struct page *page)
> -{
> -#ifdef CONFIG_XEN_SCRUB_PAGES
> -	clear_highpage(page);
> -#endif
> -}
> -
>  /* balloon_append: add the given page to the balloon. */
>  static void __balloon_append(struct page *page)
>  {
> @@ -463,11 +457,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
>  	int rc;
>  	unsigned long i;
>  	struct page   *page;
> -	struct xen_memory_reservation reservation = {
> -		.address_bits = 0,
> -		.extent_order = EXTENT_ORDER,
> -		.domid        = DOMID_SELF
> -	};
>  
>  	if (nr_pages > ARRAY_SIZE(frame_list))
>  		nr_pages = ARRAY_SIZE(frame_list);
> @@ -486,9 +475,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
>  		page = balloon_next_page(page);
>  	}
>  
> -	set_xen_guest_handle(reservation.extent_start, frame_list);
> -	reservation.nr_extents = nr_pages;
> -	rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
> +	rc = xenmem_reservation_increase(nr_pages, frame_list);
>  	if (rc <= 0)
>  		return BP_EAGAIN;
>  
> @@ -496,29 +483,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
>  		page = balloon_retrieve(false);
>  		BUG_ON(page == NULL);
>  
> -#ifdef CONFIG_XEN_HAVE_PVMMU
> -		/*
> -		 * We don't support PV MMU when Linux and Xen is using
> -		 * different page granularity.
> -		 */
> -		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
> -
> -		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> -			unsigned long pfn = page_to_pfn(page);
> -
> -			set_phys_to_machine(pfn, frame_list[i]);
> -
> -			/* Link back into the page tables if not highmem. */
> -			if (!PageHighMem(page)) {
> -				int ret;
> -				ret = HYPERVISOR_update_va_mapping(
> -						(unsigned long)__va(pfn << PAGE_SHIFT),
> -						mfn_pte(frame_list[i], PAGE_KERNEL),
> -						0);
> -				BUG_ON(ret);
> -			}
> -		}
> -#endif
> +		xenmem_reservation_va_mapping_update(1, &page, &frame_list[i]);
>  
>  		/* Relinquish the page back to the allocator. */
>  		free_reserved_page(page);
> @@ -535,11 +500,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>  	unsigned long i;
>  	struct page *page, *tmp;
>  	int ret;
> -	struct xen_memory_reservation reservation = {
> -		.address_bits = 0,
> -		.extent_order = EXTENT_ORDER,
> -		.domid        = DOMID_SELF
> -	};
>  	LIST_HEAD(pages);
>  
>  	if (nr_pages > ARRAY_SIZE(frame_list))
> @@ -553,7 +513,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>  			break;
>  		}
>  		adjust_managed_page_count(page, -1);
> -		scrub_page(page);
> +		xenmem_reservation_scrub_page(page);
>  		list_add(&page->lru, &pages);
>  	}
>  
> @@ -575,25 +535,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>  		/* XENMEM_decrease_reservation requires a GFN */
>  		frame_list[i++] = xen_page_to_gfn(page);
>  
> -#ifdef CONFIG_XEN_HAVE_PVMMU
> -		/*
> -		 * We don't support PV MMU when Linux and Xen is using
> -		 * different page granularity.
> -		 */
> -		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
> -
> -		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> -			unsigned long pfn = page_to_pfn(page);
> +		xenmem_reservation_va_mapping_reset(1, &page);
>  
> -			if (!PageHighMem(page)) {
> -				ret = HYPERVISOR_update_va_mapping(
> -						(unsigned long)__va(pfn << PAGE_SHIFT),
> -						__pte_ma(0), 0);
> -				BUG_ON(ret);
> -			}
> -			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
> -		}
> -#endif
>  		list_del(&page->lru);
>  
>  		balloon_append(page);
> @@ -601,9 +544,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>  
>  	flush_tlb_all();
>  
> -	set_xen_guest_handle(reservation.extent_start, frame_list);
> -	reservation.nr_extents   = nr_pages;
> -	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
> +	ret = xenmem_reservation_decrease(nr_pages, frame_list);
>  	BUG_ON(ret != nr_pages);
>  
>  	balloon_stats.current_pages -= nr_pages;
> diff --git a/drivers/xen/mem-reservation.c b/drivers/xen/mem-reservation.c
> new file mode 100644
> index 000000000000..29882e4324f5
> --- /dev/null
> +++ b/drivers/xen/mem-reservation.c
> @@ -0,0 +1,134 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +
> +/******************************************************************************
> + * Xen memory reservation utilities.
> + *
> + * Copyright (c) 2003, B Dragovic
> + * Copyright (c) 2003-2004, M Williamson, K Fraser
> + * Copyright (c) 2005 Dan M. Smith, IBM Corporation
> + * Copyright (c) 2010 Daniel Kiper
> + * Copyright (c) 2018, Oleksandr Andrushchenko, EPAM Systems Inc.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/slab.h>
> +
> +#include <asm/tlb.h>
> +#include <asm/xen/hypercall.h>
> +
> +#include <xen/interface/memory.h>
> +#include <xen/page.h>
> +
> +/*
> + * Use one extent per PAGE_SIZE to avoid to break down the page into
> + * multiple frame.
> + */
> +#define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1)
> +
> +void xenmem_reservation_scrub_page(struct page *page)
> +{
> +#ifdef CONFIG_XEN_SCRUB_PAGES
> +	clear_highpage(page);
> +#endif
> +}
> +EXPORT_SYMBOL(xenmem_reservation_scrub_page);

EXPORT_SYMBOL_GPL()

Multiple times below, too.

As a general rule of thumb: new exports should all be
EXPORT_SYMBOL_GPL() if you can't give a reason why they shouldn't be.


Juergen
Oleksandr Andrushchenko May 30, 2018, 5:30 a.m. UTC | #7
On 05/30/2018 07:32 AM, Juergen Gross wrote:
> On 25/05/18 17:33, Oleksandr Andrushchenko wrote:
>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>
>> Memory {increase|decrease}_reservation and VA mappings update/reset
>> code used in balloon driver can be made common, so other drivers can
>> also re-use the same functionality without open-coding.
>> Create a dedicated module for the shared code and export corresponding
>> symbols for other kernel modules.
>>
>> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>> ---
>>   drivers/xen/Makefile          |   1 +
>>   drivers/xen/balloon.c         |  71 ++----------------
>>   drivers/xen/mem-reservation.c | 134 ++++++++++++++++++++++++++++++++++
>>   include/xen/mem_reservation.h |  29 ++++++++
>>   4 files changed, 170 insertions(+), 65 deletions(-)
>>   create mode 100644 drivers/xen/mem-reservation.c
>>   create mode 100644 include/xen/mem_reservation.h
> Can you please name this include/xen/mem-reservation.h ?
>
Will rename
>> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
>> index 451e833f5931..3c87b0c3aca6 100644
>> --- a/drivers/xen/Makefile
>> +++ b/drivers/xen/Makefile
>> @@ -2,6 +2,7 @@
>>   obj-$(CONFIG_HOTPLUG_CPU)		+= cpu_hotplug.o
>>   obj-$(CONFIG_X86)			+= fallback.o
>>   obj-y	+= grant-table.o features.o balloon.o manage.o preempt.o time.o
>> +obj-y	+= mem-reservation.o
>>   obj-y	+= events/
>>   obj-y	+= xenbus/
>>   
>> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
>> index 065f0b607373..57b482d67a3a 100644
>> --- a/drivers/xen/balloon.c
>> +++ b/drivers/xen/balloon.c
>> @@ -71,6 +71,7 @@
>>   #include <xen/balloon.h>
>>   #include <xen/features.h>
>>   #include <xen/page.h>
>> +#include <xen/mem_reservation.h>
>>   
>>   static int xen_hotplug_unpopulated;
>>   
>> @@ -157,13 +158,6 @@ static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
>>   #define GFP_BALLOON \
>>   	(GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
>>   
>> -static void scrub_page(struct page *page)
>> -{
>> -#ifdef CONFIG_XEN_SCRUB_PAGES
>> -	clear_highpage(page);
>> -#endif
>> -}
>> -
>>   /* balloon_append: add the given page to the balloon. */
>>   static void __balloon_append(struct page *page)
>>   {
>> @@ -463,11 +457,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
>>   	int rc;
>>   	unsigned long i;
>>   	struct page   *page;
>> -	struct xen_memory_reservation reservation = {
>> -		.address_bits = 0,
>> -		.extent_order = EXTENT_ORDER,
>> -		.domid        = DOMID_SELF
>> -	};
>>   
>>   	if (nr_pages > ARRAY_SIZE(frame_list))
>>   		nr_pages = ARRAY_SIZE(frame_list);
>> @@ -486,9 +475,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
>>   		page = balloon_next_page(page);
>>   	}
>>   
>> -	set_xen_guest_handle(reservation.extent_start, frame_list);
>> -	reservation.nr_extents = nr_pages;
>> -	rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
>> +	rc = xenmem_reservation_increase(nr_pages, frame_list);
>>   	if (rc <= 0)
>>   		return BP_EAGAIN;
>>   
>> @@ -496,29 +483,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
>>   		page = balloon_retrieve(false);
>>   		BUG_ON(page == NULL);
>>   
>> -#ifdef CONFIG_XEN_HAVE_PVMMU
>> -		/*
>> -		 * We don't support PV MMU when Linux and Xen is using
>> -		 * different page granularity.
>> -		 */
>> -		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
>> -
>> -		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>> -			unsigned long pfn = page_to_pfn(page);
>> -
>> -			set_phys_to_machine(pfn, frame_list[i]);
>> -
>> -			/* Link back into the page tables if not highmem. */
>> -			if (!PageHighMem(page)) {
>> -				int ret;
>> -				ret = HYPERVISOR_update_va_mapping(
>> -						(unsigned long)__va(pfn << PAGE_SHIFT),
>> -						mfn_pte(frame_list[i], PAGE_KERNEL),
>> -						0);
>> -				BUG_ON(ret);
>> -			}
>> -		}
>> -#endif
>> +		xenmem_reservation_va_mapping_update(1, &page, &frame_list[i]);
>>   
>>   		/* Relinquish the page back to the allocator. */
>>   		free_reserved_page(page);
>> @@ -535,11 +500,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>   	unsigned long i;
>>   	struct page *page, *tmp;
>>   	int ret;
>> -	struct xen_memory_reservation reservation = {
>> -		.address_bits = 0,
>> -		.extent_order = EXTENT_ORDER,
>> -		.domid        = DOMID_SELF
>> -	};
>>   	LIST_HEAD(pages);
>>   
>>   	if (nr_pages > ARRAY_SIZE(frame_list))
>> @@ -553,7 +513,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>   			break;
>>   		}
>>   		adjust_managed_page_count(page, -1);
>> -		scrub_page(page);
>> +		xenmem_reservation_scrub_page(page);
>>   		list_add(&page->lru, &pages);
>>   	}
>>   
>> @@ -575,25 +535,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>   		/* XENMEM_decrease_reservation requires a GFN */
>>   		frame_list[i++] = xen_page_to_gfn(page);
>>   
>> -#ifdef CONFIG_XEN_HAVE_PVMMU
>> -		/*
>> -		 * We don't support PV MMU when Linux and Xen is using
>> -		 * different page granularity.
>> -		 */
>> -		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
>> -
>> -		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>> -			unsigned long pfn = page_to_pfn(page);
>> +		xenmem_reservation_va_mapping_reset(1, &page);
>>   
>> -			if (!PageHighMem(page)) {
>> -				ret = HYPERVISOR_update_va_mapping(
>> -						(unsigned long)__va(pfn << PAGE_SHIFT),
>> -						__pte_ma(0), 0);
>> -				BUG_ON(ret);
>> -			}
>> -			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
>> -		}
>> -#endif
>>   		list_del(&page->lru);
>>   
>>   		balloon_append(page);
>> @@ -601,9 +544,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>   
>>   	flush_tlb_all();
>>   
>> -	set_xen_guest_handle(reservation.extent_start, frame_list);
>> -	reservation.nr_extents   = nr_pages;
>> -	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
>> +	ret = xenmem_reservation_decrease(nr_pages, frame_list);
>>   	BUG_ON(ret != nr_pages);
>>   
>>   	balloon_stats.current_pages -= nr_pages;
>> diff --git a/drivers/xen/mem-reservation.c b/drivers/xen/mem-reservation.c
>> new file mode 100644
>> index 000000000000..29882e4324f5
>> --- /dev/null
>> +++ b/drivers/xen/mem-reservation.c
>> @@ -0,0 +1,134 @@
>> +// SPDX-License-Identifier: GPL-2.0 OR MIT
>> +
>> +/******************************************************************************
>> + * Xen memory reservation utilities.
>> + *
>> + * Copyright (c) 2003, B Dragovic
>> + * Copyright (c) 2003-2004, M Williamson, K Fraser
>> + * Copyright (c) 2005 Dan M. Smith, IBM Corporation
>> + * Copyright (c) 2010 Daniel Kiper
>> + * Copyright (c) 2018, Oleksandr Andrushchenko, EPAM Systems Inc.
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/slab.h>
>> +
>> +#include <asm/tlb.h>
>> +#include <asm/xen/hypercall.h>
>> +
>> +#include <xen/interface/memory.h>
>> +#include <xen/page.h>
>> +
>> +/*
>> + * Use one extent per PAGE_SIZE to avoid to break down the page into
>> + * multiple frame.
>> + */
>> +#define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1)
>> +
>> +void xenmem_reservation_scrub_page(struct page *page)
>> +{
>> +#ifdef CONFIG_XEN_SCRUB_PAGES
>> +	clear_highpage(page);
>> +#endif
>> +}
>> +EXPORT_SYMBOL(xenmem_reservation_scrub_page);
> EXPORT_SYMBOL_GPL()
>
> Multiple times below, too.
Ok, will change to _GPL
> As a general rule of thumb: new exports should all be
> EXPORT_SYMBOL_GPL() if you can't give a reason why they shouldn't be.
>
>
> Juergen
Thank you,
Oleksandr
Oleksandr Andrushchenko May 30, 2018, 8:29 a.m. UTC | #8
On 05/29/2018 11:03 PM, Boris Ostrovsky wrote:
> On 05/29/2018 02:22 PM, Oleksandr Andrushchenko wrote:
>> On 05/29/2018 09:04 PM, Boris Ostrovsky wrote:
>>> On 05/25/2018 11:33 AM, Oleksandr Andrushchenko wrote:
>>> @@ -463,11 +457,6 @@ static enum bp_state
>>> increase_reservation(unsigned long nr_pages)
>>>        int rc;
>>>        unsigned long i;
>>>        struct page   *page;
>>> -    struct xen_memory_reservation reservation = {
>>> -        .address_bits = 0,
>>> -        .extent_order = EXTENT_ORDER,
>>> -        .domid        = DOMID_SELF
>>> -    };
>>>          if (nr_pages > ARRAY_SIZE(frame_list))
>>>            nr_pages = ARRAY_SIZE(frame_list);
>>> @@ -486,9 +475,7 @@ static enum bp_state
>>> increase_reservation(unsigned long nr_pages)
>>>            page = balloon_next_page(page);
>>>        }
>>>    -    set_xen_guest_handle(reservation.extent_start, frame_list);
>>> -    reservation.nr_extents = nr_pages;
>>> -    rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
>>> +    rc = xenmem_reservation_increase(nr_pages, frame_list);
>>>        if (rc <= 0)
>>>            return BP_EAGAIN;
>>>    @@ -496,29 +483,7 @@ static enum bp_state
>>> increase_reservation(unsigned long nr_pages)
>>>            page = balloon_retrieve(false);
>>>            BUG_ON(page == NULL);
>>>    -#ifdef CONFIG_XEN_HAVE_PVMMU
>>> -        /*
>>> -         * We don't support PV MMU when Linux and Xen is using
>>> -         * different page granularity.
>>> -         */
>>> -        BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
>>> -
>>> -        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>>> -            unsigned long pfn = page_to_pfn(page);
>>> -
>>> -            set_phys_to_machine(pfn, frame_list[i]);
>>> -
>>> -            /* Link back into the page tables if not highmem. */
>>> -            if (!PageHighMem(page)) {
>>> -                int ret;
>>> -                ret = HYPERVISOR_update_va_mapping(
>>> -                        (unsigned long)__va(pfn << PAGE_SHIFT),
>>> -                        mfn_pte(frame_list[i], PAGE_KERNEL),
>>> -                        0);
>>> -                BUG_ON(ret);
>>> -            }
>>> -        }
>>> -#endif
>>> +        xenmem_reservation_va_mapping_update(1, &page, &frame_list[i]);
>>>
>>> Can you make a single call to xenmem_reservation_va_mapping_update(rc,
>>> ...)? You need to keep track of pages but presumable they can be put
>>> into an array (or a list). In fact, perhaps we can have
>>> balloon_retrieve() return a set of pages.
>> This is actually how it is used later on for dma-buf, but I just
>> didn't want
>> to alter original balloon code too much, but this can be done, in
>> order of simplicity:
>>
>> 1. Similar to frame_list, e.g. static array of struct page* of size
>> ARRAY_SIZE(frame_list):
>> more static memory is used, but no allocations
>>
>> 2. Allocated at run-time with kcalloc: allocation can fail
>
> If this is called in freeing DMA buffer code path or in error path then
> we shouldn't do it.
>
>
>> 3. Make balloon_retrieve() return a set of pages: will require
>> list/array allocation
>> and handling, allocation may fail, balloon_retrieve prototype change
>
> balloon pages are strung on the lru list. Can we keep have
> balloon_retrieve return a list of pages on that list?
First of all, before we go deep in details, I will highlight
the goal of the requested change: for balloon driver we call
xenmem_reservation_va_mapping_update(*1*, &page, &frame_list[i]);
from increase_reservation
and
xenmem_reservation_va_mapping_reset(*1*, &page);
from decrease_reservation and it seems to be not elegant because of
that one page/frame passed while we might have multiple pages/frames
passed at once.

In the balloon driver the producer of pages for increase_reservation
is balloon_retrieve(false) and for decrease_reservation it is
alloc_page(gfp).
In case of decrease_reservation the page is added on the list:
LIST_HEAD(pages);
[...]
list_add(&page->lru, &pages);

and in case of increase_reservation it is retrieved page by page
and can be put on a list as well with the same code from
decrease_reservation, e.g.
LIST_HEAD(pages);
[...]
list_add(&page->lru, &pages);

Thus, both decrease_reservation and increase_reservation may hold
their pages on a list before calling 
xenmem_reservation_va_mapping_{update|reset}.

For that we need a prototype change:
xenmem_reservation_va_mapping_reset(<nr_pages>, <list of pages>);
But for xenmem_reservation_va_mapping_update it will look like:
xenmem_reservation_va_mapping_update(<nr_pages>, <list of pages>, <array 
of frames>)
which seems to be inconsistent. Converting entries of the static 
frame_list array
into corresponding list doesn't seem to be cute as well.

For dma-buf use-case arrays are more preferable as dma-buf constructs 
scatter-gather
tables from array of pages etc. and if page list is passed then it needs 
to be
converted into page array anyways.

So, we can:
1. Keep the prototypes as is, e.g. accept array of pages and use 
nr_pages == 1 in
case of balloon driver (existing code)
2. Statically allocate struct page* array in the balloon driver and fill 
it with pages
when those pages are retrieved:
static struct page *page_list[ARRAY_SIZE(frame_list)];
which will take additional 8KiB of space on 64-bit platform, but 
simplify things a lot.
3. Allocate struct page *page_list[ARRAY_SIZE(frame_list)] dynamically

As to Boris' suggestion "balloon pages are strung on the lru list. Can 
we keep have
balloon_retrieve return a list of pages on that list?"
Because of alloc_xenballooned_pages' retry logic for page retireval, e.g.
     while (pgno < nr_pages) {
         page = balloon_retrieve(true);
         if (page) {
[...]
         } else {
             ret = add_ballooned_pages(nr_pages - pgno);
[...]
     }
I wouldn't change things that much.

IMO, we can keep 1 page based API with the only overhead for balloon 
driver of
function calls to xenmem_reservation_va_mapping_{update|reset} for each 
page.
> -boris
Thank you,
Oleksandr
>
>> Could you please tell which of the above will fit better?
>>
>>>
Boris Ostrovsky May 30, 2018, 3:54 p.m. UTC | #9
On 05/30/2018 04:29 AM, Oleksandr Andrushchenko wrote:
> On 05/29/2018 11:03 PM, Boris Ostrovsky wrote:
>> On 05/29/2018 02:22 PM, Oleksandr Andrushchenko wrote:
>>> On 05/29/2018 09:04 PM, Boris Ostrovsky wrote:
>>>> On 05/25/2018 11:33 AM, Oleksandr Andrushchenko wrote:
>>>> @@ -463,11 +457,6 @@ static enum bp_state
>>>> increase_reservation(unsigned long nr_pages)
>>>>        int rc;
>>>>        unsigned long i;
>>>>        struct page   *page;
>>>> -    struct xen_memory_reservation reservation = {
>>>> -        .address_bits = 0,
>>>> -        .extent_order = EXTENT_ORDER,
>>>> -        .domid        = DOMID_SELF
>>>> -    };
>>>>          if (nr_pages > ARRAY_SIZE(frame_list))
>>>>            nr_pages = ARRAY_SIZE(frame_list);
>>>> @@ -486,9 +475,7 @@ static enum bp_state
>>>> increase_reservation(unsigned long nr_pages)
>>>>            page = balloon_next_page(page);
>>>>        }
>>>>    -    set_xen_guest_handle(reservation.extent_start, frame_list);
>>>> -    reservation.nr_extents = nr_pages;
>>>> -    rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
>>>> +    rc = xenmem_reservation_increase(nr_pages, frame_list);
>>>>        if (rc <= 0)
>>>>            return BP_EAGAIN;
>>>>    @@ -496,29 +483,7 @@ static enum bp_state
>>>> increase_reservation(unsigned long nr_pages)
>>>>            page = balloon_retrieve(false);
>>>>            BUG_ON(page == NULL);
>>>>    -#ifdef CONFIG_XEN_HAVE_PVMMU
>>>> -        /*
>>>> -         * We don't support PV MMU when Linux and Xen is using
>>>> -         * different page granularity.
>>>> -         */
>>>> -        BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
>>>> -
>>>> -        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>>>> -            unsigned long pfn = page_to_pfn(page);
>>>> -
>>>> -            set_phys_to_machine(pfn, frame_list[i]);
>>>> -
>>>> -            /* Link back into the page tables if not highmem. */
>>>> -            if (!PageHighMem(page)) {
>>>> -                int ret;
>>>> -                ret = HYPERVISOR_update_va_mapping(
>>>> -                        (unsigned long)__va(pfn << PAGE_SHIFT),
>>>> -                        mfn_pte(frame_list[i], PAGE_KERNEL),
>>>> -                        0);
>>>> -                BUG_ON(ret);
>>>> -            }
>>>> -        }
>>>> -#endif
>>>> +        xenmem_reservation_va_mapping_update(1, &page,
>>>> &frame_list[i]);
>>>>
>>>> Can you make a single call to xenmem_reservation_va_mapping_update(rc,
>>>> ...)? You need to keep track of pages but presumable they can be put
>>>> into an array (or a list). In fact, perhaps we can have
>>>> balloon_retrieve() return a set of pages.
>>> This is actually how it is used later on for dma-buf, but I just
>>> didn't want
>>> to alter original balloon code too much, but this can be done, in
>>> order of simplicity:
>>>
>>> 1. Similar to frame_list, e.g. static array of struct page* of size
>>> ARRAY_SIZE(frame_list):
>>> more static memory is used, but no allocations
>>>
>>> 2. Allocated at run-time with kcalloc: allocation can fail
>>
>> If this is called in freeing DMA buffer code path or in error path then
>> we shouldn't do it.
>>
>>
>>> 3. Make balloon_retrieve() return a set of pages: will require
>>> list/array allocation
>>> and handling, allocation may fail, balloon_retrieve prototype change
>>
>> balloon pages are strung on the lru list. Can we keep have
>> balloon_retrieve return a list of pages on that list?
> First of all, before we go deep in details, I will highlight
> the goal of the requested change: for balloon driver we call
> xenmem_reservation_va_mapping_update(*1*, &page, &frame_list[i]);
> from increase_reservation
> and
> xenmem_reservation_va_mapping_reset(*1*, &page);
> from decrease_reservation and it seems to be not elegant because of
> that one page/frame passed while we might have multiple pages/frames
> passed at once.
>
> In the balloon driver the producer of pages for increase_reservation
> is balloon_retrieve(false) and for decrease_reservation it is
> alloc_page(gfp).
> In case of decrease_reservation the page is added on the list:
> LIST_HEAD(pages);
> [...]
> list_add(&page->lru, &pages);
>
> and in case of increase_reservation it is retrieved page by page
> and can be put on a list as well with the same code from
> decrease_reservation, e.g.
> LIST_HEAD(pages);
> [...]
> list_add(&page->lru, &pages);
>
> Thus, both decrease_reservation and increase_reservation may hold
> their pages on a list before calling
> xenmem_reservation_va_mapping_{update|reset}.
>
> For that we need a prototype change:
> xenmem_reservation_va_mapping_reset(<nr_pages>, <list of pages>);
> But for xenmem_reservation_va_mapping_update it will look like:
> xenmem_reservation_va_mapping_update(<nr_pages>, <list of pages>,
> <array of frames>)
> which seems to be inconsistent. Converting entries of the static
> frame_list array
> into corresponding list doesn't seem to be cute as well.
>
> For dma-buf use-case arrays are more preferable as dma-buf constructs
> scatter-gather
> tables from array of pages etc. and if page list is passed then it
> needs to be
> converted into page array anyways.
>
> So, we can:
> 1. Keep the prototypes as is, e.g. accept array of pages and use
> nr_pages == 1 in
> case of balloon driver (existing code)
> 2. Statically allocate struct page* array in the balloon driver and
> fill it with pages
> when those pages are retrieved:
> static struct page *page_list[ARRAY_SIZE(frame_list)];
> which will take additional 8KiB of space on 64-bit platform, but
> simplify things a lot.
> 3. Allocate struct page *page_list[ARRAY_SIZE(frame_list)] dynamically
>
> As to Boris' suggestion "balloon pages are strung on the lru list. Can
> we keep have
> balloon_retrieve return a list of pages on that list?"
> Because of alloc_xenballooned_pages' retry logic for page retireval, e.g.
>     while (pgno < nr_pages) {
>         page = balloon_retrieve(true);
>         if (page) {
> [...]
>         } else {
>             ret = add_ballooned_pages(nr_pages - pgno);
> [...]
>     }
> I wouldn't change things that much.
>
> IMO, we can keep 1 page based API with the only overhead for balloon
> driver of
> function calls to xenmem_reservation_va_mapping_{update|reset} for
> each page.



I still think what I suggested is doable but we can come back to it
later and keep your per-page implementation for now.

BTW, I also think you can further simplify
xenmem_reservation_va_mapping_* routines by bailing out right away if
xen_feature(XENFEAT_auto_translated_physmap). In fact, you might even
make them inlines, along the lines of

inline void xenmem_reservation_va_mapping_reset(unsigned long count,
					 struct page **pages)
{
#ifdef CONFIG_XEN_HAVE_PVMMU
	if (!xen_feature(XENFEAT_auto_translated_physmap))
		__xenmem_reservation_va_mapping_reset(...)
#endif
} 

Or some such.

-boris

-boris
Oleksandr Andrushchenko May 30, 2018, 5:46 p.m. UTC | #10
On 05/30/2018 06:54 PM, Boris Ostrovsky wrote:
> On 05/30/2018 04:29 AM, Oleksandr Andrushchenko wrote:
>> On 05/29/2018 11:03 PM, Boris Ostrovsky wrote:
>>> On 05/29/2018 02:22 PM, Oleksandr Andrushchenko wrote:
>>>> On 05/29/2018 09:04 PM, Boris Ostrovsky wrote:
>>>>> On 05/25/2018 11:33 AM, Oleksandr Andrushchenko wrote:
>>>>> @@ -463,11 +457,6 @@ static enum bp_state
>>>>> increase_reservation(unsigned long nr_pages)
>>>>>         int rc;
>>>>>         unsigned long i;
>>>>>         struct page   *page;
>>>>> -    struct xen_memory_reservation reservation = {
>>>>> -        .address_bits = 0,
>>>>> -        .extent_order = EXTENT_ORDER,
>>>>> -        .domid        = DOMID_SELF
>>>>> -    };
>>>>>           if (nr_pages > ARRAY_SIZE(frame_list))
>>>>>             nr_pages = ARRAY_SIZE(frame_list);
>>>>> @@ -486,9 +475,7 @@ static enum bp_state
>>>>> increase_reservation(unsigned long nr_pages)
>>>>>             page = balloon_next_page(page);
>>>>>         }
>>>>>     -    set_xen_guest_handle(reservation.extent_start, frame_list);
>>>>> -    reservation.nr_extents = nr_pages;
>>>>> -    rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
>>>>> +    rc = xenmem_reservation_increase(nr_pages, frame_list);
>>>>>         if (rc <= 0)
>>>>>             return BP_EAGAIN;
>>>>>     @@ -496,29 +483,7 @@ static enum bp_state
>>>>> increase_reservation(unsigned long nr_pages)
>>>>>             page = balloon_retrieve(false);
>>>>>             BUG_ON(page == NULL);
>>>>>     -#ifdef CONFIG_XEN_HAVE_PVMMU
>>>>> -        /*
>>>>> -         * We don't support PV MMU when Linux and Xen is using
>>>>> -         * different page granularity.
>>>>> -         */
>>>>> -        BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
>>>>> -
>>>>> -        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>>>>> -            unsigned long pfn = page_to_pfn(page);
>>>>> -
>>>>> -            set_phys_to_machine(pfn, frame_list[i]);
>>>>> -
>>>>> -            /* Link back into the page tables if not highmem. */
>>>>> -            if (!PageHighMem(page)) {
>>>>> -                int ret;
>>>>> -                ret = HYPERVISOR_update_va_mapping(
>>>>> -                        (unsigned long)__va(pfn << PAGE_SHIFT),
>>>>> -                        mfn_pte(frame_list[i], PAGE_KERNEL),
>>>>> -                        0);
>>>>> -                BUG_ON(ret);
>>>>> -            }
>>>>> -        }
>>>>> -#endif
>>>>> +        xenmem_reservation_va_mapping_update(1, &page,
>>>>> &frame_list[i]);
>>>>>
>>>>> Can you make a single call to xenmem_reservation_va_mapping_update(rc,
>>>>> ...)? You need to keep track of pages but presumable they can be put
>>>>> into an array (or a list). In fact, perhaps we can have
>>>>> balloon_retrieve() return a set of pages.
>>>> This is actually how it is used later on for dma-buf, but I just
>>>> didn't want
>>>> to alter original balloon code too much, but this can be done, in
>>>> order of simplicity:
>>>>
>>>> 1. Similar to frame_list, e.g. static array of struct page* of size
>>>> ARRAY_SIZE(frame_list):
>>>> more static memory is used, but no allocations
>>>>
>>>> 2. Allocated at run-time with kcalloc: allocation can fail
>>> If this is called in freeing DMA buffer code path or in error path then
>>> we shouldn't do it.
>>>
>>>
>>>> 3. Make balloon_retrieve() return a set of pages: will require
>>>> list/array allocation
>>>> and handling, allocation may fail, balloon_retrieve prototype change
>>> balloon pages are strung on the lru list. Can we keep have
>>> balloon_retrieve return a list of pages on that list?
>> First of all, before we go deep in details, I will highlight
>> the goal of the requested change: for balloon driver we call
>> xenmem_reservation_va_mapping_update(*1*, &page, &frame_list[i]);
>> from increase_reservation
>> and
>> xenmem_reservation_va_mapping_reset(*1*, &page);
>> from decrease_reservation and it seems to be not elegant because of
>> that one page/frame passed while we might have multiple pages/frames
>> passed at once.
>>
>> In the balloon driver the producer of pages for increase_reservation
>> is balloon_retrieve(false) and for decrease_reservation it is
>> alloc_page(gfp).
>> In case of decrease_reservation the page is added on the list:
>> LIST_HEAD(pages);
>> [...]
>> list_add(&page->lru, &pages);
>>
>> and in case of increase_reservation it is retrieved page by page
>> and can be put on a list as well with the same code from
>> decrease_reservation, e.g.
>> LIST_HEAD(pages);
>> [...]
>> list_add(&page->lru, &pages);
>>
>> Thus, both decrease_reservation and increase_reservation may hold
>> their pages on a list before calling
>> xenmem_reservation_va_mapping_{update|reset}.
>>
>> For that we need a prototype change:
>> xenmem_reservation_va_mapping_reset(<nr_pages>, <list of pages>);
>> But for xenmem_reservation_va_mapping_update it will look like:
>> xenmem_reservation_va_mapping_update(<nr_pages>, <list of pages>,
>> <array of frames>)
>> which seems to be inconsistent. Converting entries of the static
>> frame_list array
>> into corresponding list doesn't seem to be cute as well.
>>
>> For dma-buf use-case arrays are more preferable as dma-buf constructs
>> scatter-gather
>> tables from array of pages etc. and if page list is passed then it
>> needs to be
>> converted into page array anyways.
>>
>> So, we can:
>> 1. Keep the prototypes as is, e.g. accept array of pages and use
>> nr_pages == 1 in
>> case of balloon driver (existing code)
>> 2. Statically allocate struct page* array in the balloon driver and
>> fill it with pages
>> when those pages are retrieved:
>> static struct page *page_list[ARRAY_SIZE(frame_list)];
>> which will take additional 8KiB of space on 64-bit platform, but
>> simplify things a lot.
>> 3. Allocate struct page *page_list[ARRAY_SIZE(frame_list)] dynamically
>>
>> As to Boris' suggestion "balloon pages are strung on the lru list. Can
>> we keep have
>> balloon_retrieve return a list of pages on that list?"
>> Because of alloc_xenballooned_pages' retry logic for page retireval, e.g.
>>      while (pgno < nr_pages) {
>>          page = balloon_retrieve(true);
>>          if (page) {
>> [...]
>>          } else {
>>              ret = add_ballooned_pages(nr_pages - pgno);
>> [...]
>>      }
>> I wouldn't change things that much.
>>
>> IMO, we can keep 1 page based API with the only overhead for balloon
>> driver of
>> function calls to xenmem_reservation_va_mapping_{update|reset} for
>> each page.
>
>
> I still think what I suggested is doable but we can come back to it
> later and keep your per-page implementation for now.
>
> BTW, I also think you can further simplify
> xenmem_reservation_va_mapping_* routines by bailing out right away if
> xen_feature(XENFEAT_auto_translated_physmap). In fact, you might even
> make them inlines, along the lines of
>
> inline void xenmem_reservation_va_mapping_reset(unsigned long count,
> 					 struct page **pages)
> {
> #ifdef CONFIG_XEN_HAVE_PVMMU
> 	if (!xen_feature(XENFEAT_auto_translated_physmap))
> 		__xenmem_reservation_va_mapping_reset(...)
> #endif
> }
How about:

#ifdef CONFIG_XEN_HAVE_PVMMU
static inline __xenmem_reservation_va_mapping_reset(struct page *page)
{
[...]
}
#endif

and

void xenmem_reservation_va_mapping_reset(unsigned long count,
                      struct page **pages)
{
#ifdef CONFIG_XEN_HAVE_PVMMU
     if (!xen_feature(XENFEAT_auto_translated_physmap)) {
         int i;

         for (i = 0; i < count; i++)
             __xenmem_reservation_va_mapping_reset(pages[i]);
     }
#endif
}

This way I can use __xenmem_reservation_va_mapping_reset(page);
instead of xenmem_reservation_va_mapping_reset(1, &page);
> Or some such.
>
> -boris
>
> -boris
>
>
Boris Ostrovsky May 30, 2018, 7:24 p.m. UTC | #11
On 05/30/2018 01:46 PM, Oleksandr Andrushchenko wrote:
> On 05/30/2018 06:54 PM, Boris Ostrovsky wrote:
>>
>>
>> BTW, I also think you can further simplify
>> xenmem_reservation_va_mapping_* routines by bailing out right away if
>> xen_feature(XENFEAT_auto_translated_physmap). In fact, you might even
>> make them inlines, along the lines of
>>
>> inline void xenmem_reservation_va_mapping_reset(unsigned long count,
>>                      struct page **pages)
>> {
>> #ifdef CONFIG_XEN_HAVE_PVMMU
>>     if (!xen_feature(XENFEAT_auto_translated_physmap))
>>         __xenmem_reservation_va_mapping_reset(...)
>> #endif
>> }
> How about:
>
> #ifdef CONFIG_XEN_HAVE_PVMMU
> static inline __xenmem_reservation_va_mapping_reset(struct page *page)
> {
> [...]
> }
> #endif
>
> and
>
> void xenmem_reservation_va_mapping_reset(unsigned long count,
>                      struct page **pages)
> {
> #ifdef CONFIG_XEN_HAVE_PVMMU
>     if (!xen_feature(XENFEAT_auto_translated_physmap)) {
>         int i;
>
>         for (i = 0; i < count; i++)
>             __xenmem_reservation_va_mapping_reset(pages[i]);
>     }
> #endif
> }
>
> This way I can use __xenmem_reservation_va_mapping_reset(page);
> instead of xenmem_reservation_va_mapping_reset(1, &page);


Sure, this also works.

-boris
diff mbox

Patch

diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 451e833f5931..3c87b0c3aca6 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -2,6 +2,7 @@ 
 obj-$(CONFIG_HOTPLUG_CPU)		+= cpu_hotplug.o
 obj-$(CONFIG_X86)			+= fallback.o
 obj-y	+= grant-table.o features.o balloon.o manage.o preempt.o time.o
+obj-y	+= mem-reservation.o
 obj-y	+= events/
 obj-y	+= xenbus/
 
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 065f0b607373..57b482d67a3a 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -71,6 +71,7 @@ 
 #include <xen/balloon.h>
 #include <xen/features.h>
 #include <xen/page.h>
+#include <xen/mem_reservation.h>
 
 static int xen_hotplug_unpopulated;
 
@@ -157,13 +158,6 @@  static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
 #define GFP_BALLOON \
 	(GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
 
-static void scrub_page(struct page *page)
-{
-#ifdef CONFIG_XEN_SCRUB_PAGES
-	clear_highpage(page);
-#endif
-}
-
 /* balloon_append: add the given page to the balloon. */
 static void __balloon_append(struct page *page)
 {
@@ -463,11 +457,6 @@  static enum bp_state increase_reservation(unsigned long nr_pages)
 	int rc;
 	unsigned long i;
 	struct page   *page;
-	struct xen_memory_reservation reservation = {
-		.address_bits = 0,
-		.extent_order = EXTENT_ORDER,
-		.domid        = DOMID_SELF
-	};
 
 	if (nr_pages > ARRAY_SIZE(frame_list))
 		nr_pages = ARRAY_SIZE(frame_list);
@@ -486,9 +475,7 @@  static enum bp_state increase_reservation(unsigned long nr_pages)
 		page = balloon_next_page(page);
 	}
 
-	set_xen_guest_handle(reservation.extent_start, frame_list);
-	reservation.nr_extents = nr_pages;
-	rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
+	rc = xenmem_reservation_increase(nr_pages, frame_list);
 	if (rc <= 0)
 		return BP_EAGAIN;
 
@@ -496,29 +483,7 @@  static enum bp_state increase_reservation(unsigned long nr_pages)
 		page = balloon_retrieve(false);
 		BUG_ON(page == NULL);
 
-#ifdef CONFIG_XEN_HAVE_PVMMU
-		/*
-		 * We don't support PV MMU when Linux and Xen is using
-		 * different page granularity.
-		 */
-		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
-
-		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-			unsigned long pfn = page_to_pfn(page);
-
-			set_phys_to_machine(pfn, frame_list[i]);
-
-			/* Link back into the page tables if not highmem. */
-			if (!PageHighMem(page)) {
-				int ret;
-				ret = HYPERVISOR_update_va_mapping(
-						(unsigned long)__va(pfn << PAGE_SHIFT),
-						mfn_pte(frame_list[i], PAGE_KERNEL),
-						0);
-				BUG_ON(ret);
-			}
-		}
-#endif
+		xenmem_reservation_va_mapping_update(1, &page, &frame_list[i]);
 
 		/* Relinquish the page back to the allocator. */
 		free_reserved_page(page);
@@ -535,11 +500,6 @@  static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 	unsigned long i;
 	struct page *page, *tmp;
 	int ret;
-	struct xen_memory_reservation reservation = {
-		.address_bits = 0,
-		.extent_order = EXTENT_ORDER,
-		.domid        = DOMID_SELF
-	};
 	LIST_HEAD(pages);
 
 	if (nr_pages > ARRAY_SIZE(frame_list))
@@ -553,7 +513,7 @@  static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 			break;
 		}
 		adjust_managed_page_count(page, -1);
-		scrub_page(page);
+		xenmem_reservation_scrub_page(page);
 		list_add(&page->lru, &pages);
 	}
 
@@ -575,25 +535,8 @@  static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 		/* XENMEM_decrease_reservation requires a GFN */
 		frame_list[i++] = xen_page_to_gfn(page);
 
-#ifdef CONFIG_XEN_HAVE_PVMMU
-		/*
-		 * We don't support PV MMU when Linux and Xen is using
-		 * different page granularity.
-		 */
-		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
-
-		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-			unsigned long pfn = page_to_pfn(page);
+		xenmem_reservation_va_mapping_reset(1, &page);
 
-			if (!PageHighMem(page)) {
-				ret = HYPERVISOR_update_va_mapping(
-						(unsigned long)__va(pfn << PAGE_SHIFT),
-						__pte_ma(0), 0);
-				BUG_ON(ret);
-			}
-			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
-		}
-#endif
 		list_del(&page->lru);
 
 		balloon_append(page);
@@ -601,9 +544,7 @@  static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 
 	flush_tlb_all();
 
-	set_xen_guest_handle(reservation.extent_start, frame_list);
-	reservation.nr_extents   = nr_pages;
-	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+	ret = xenmem_reservation_decrease(nr_pages, frame_list);
 	BUG_ON(ret != nr_pages);
 
 	balloon_stats.current_pages -= nr_pages;
diff --git a/drivers/xen/mem-reservation.c b/drivers/xen/mem-reservation.c
new file mode 100644
index 000000000000..29882e4324f5
--- /dev/null
+++ b/drivers/xen/mem-reservation.c
@@ -0,0 +1,134 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/******************************************************************************
+ * Xen memory reservation utilities.
+ *
+ * Copyright (c) 2003, B Dragovic
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
+ * Copyright (c) 2005 Dan M. Smith, IBM Corporation
+ * Copyright (c) 2010 Daniel Kiper
+ * Copyright (c) 2018, Oleksandr Andrushchenko, EPAM Systems Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include <asm/tlb.h>
+#include <asm/xen/hypercall.h>
+
+#include <xen/interface/memory.h>
+#include <xen/page.h>
+
+/*
+ * Use one extent per PAGE_SIZE to avoid to break down the page into
+ * multiple frame.
+ */
+#define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1)
+
+void xenmem_reservation_scrub_page(struct page *page)
+{
+#ifdef CONFIG_XEN_SCRUB_PAGES
+	clear_highpage(page);
+#endif
+}
+EXPORT_SYMBOL(xenmem_reservation_scrub_page);
+
+void xenmem_reservation_va_mapping_update(unsigned long count,
+					  struct page **pages,
+					  xen_pfn_t *frames)
+{
+#ifdef CONFIG_XEN_HAVE_PVMMU
+	int i;
+
+	for (i = 0; i < count; i++) {
+		struct page *page;
+
+		page = pages[i];
+		BUG_ON(page == NULL);
+
+		/*
+		 * We don't support PV MMU when Linux and Xen is using
+		 * different page granularity.
+		 */
+		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
+
+		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+			unsigned long pfn = page_to_pfn(page);
+
+			set_phys_to_machine(pfn, frames[i]);
+
+			/* Link back into the page tables if not highmem. */
+			if (!PageHighMem(page)) {
+				int ret;
+
+				ret = HYPERVISOR_update_va_mapping(
+						(unsigned long)__va(pfn << PAGE_SHIFT),
+						mfn_pte(frames[i], PAGE_KERNEL),
+						0);
+				BUG_ON(ret);
+			}
+		}
+	}
+#endif
+}
+EXPORT_SYMBOL(xenmem_reservation_va_mapping_update);
+
+void xenmem_reservation_va_mapping_reset(unsigned long count,
+					 struct page **pages)
+{
+#ifdef CONFIG_XEN_HAVE_PVMMU
+	int i;
+
+	for (i = 0; i < count; i++) {
+		/*
+		 * We don't support PV MMU when Linux and Xen is using
+		 * different page granularity.
+		 */
+		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
+
+		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+			struct page *page = pages[i];
+			unsigned long pfn = page_to_pfn(page);
+
+			if (!PageHighMem(page)) {
+				int ret;
+
+				ret = HYPERVISOR_update_va_mapping(
+						(unsigned long)__va(pfn << PAGE_SHIFT),
+						__pte_ma(0), 0);
+				BUG_ON(ret);
+			}
+			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+		}
+	}
+#endif
+}
+EXPORT_SYMBOL(xenmem_reservation_va_mapping_reset);
+
+int xenmem_reservation_increase(int count, xen_pfn_t *frames)
+{
+	struct xen_memory_reservation reservation = {
+		.address_bits = 0,
+		.extent_order = EXTENT_ORDER,
+		.domid        = DOMID_SELF
+	};
+
+	set_xen_guest_handle(reservation.extent_start, frames);
+	reservation.nr_extents = count;
+	return HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
+}
+EXPORT_SYMBOL(xenmem_reservation_increase);
+
+int xenmem_reservation_decrease(int count, xen_pfn_t *frames)
+{
+	struct xen_memory_reservation reservation = {
+		.address_bits = 0,
+		.extent_order = EXTENT_ORDER,
+		.domid        = DOMID_SELF
+	};
+
+	set_xen_guest_handle(reservation.extent_start, frames);
+	reservation.nr_extents = count;
+	return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+}
+EXPORT_SYMBOL(xenmem_reservation_decrease);
diff --git a/include/xen/mem_reservation.h b/include/xen/mem_reservation.h
new file mode 100644
index 000000000000..9306d9b8743c
--- /dev/null
+++ b/include/xen/mem_reservation.h
@@ -0,0 +1,29 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+/*
+ * Xen memory reservation utilities.
+ *
+ * Copyright (c) 2003, B Dragovic
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
+ * Copyright (c) 2005 Dan M. Smith, IBM Corporation
+ * Copyright (c) 2010 Daniel Kiper
+ * Copyright (c) 2018, Oleksandr Andrushchenko, EPAM Systems Inc.
+ */
+
+#ifndef _XENMEM_RESERVATION_H
+#define _XENMEM_RESERVATION_H
+
+void xenmem_reservation_scrub_page(struct page *page);
+
+void xenmem_reservation_va_mapping_update(unsigned long count,
+					  struct page **pages,
+					  xen_pfn_t *frames);
+
+void xenmem_reservation_va_mapping_reset(unsigned long count,
+					 struct page **pages);
+
+int xenmem_reservation_increase(int count, xen_pfn_t *frames);
+
+int xenmem_reservation_decrease(int count, xen_pfn_t *frames);
+
+#endif