diff mbox series

[RFC,12/15] mm: Make MAX_ORDER of buddy allocator configurable via Kconfig SET_MAX_ORDER.

Message ID 20210805190253.2795604-13-zi.yan@sent.com (mailing list archive)
State New
Headers show
Series Make MAX_ORDER adjustable as a kernel boot time parameter. | expand

Commit Message

Zi Yan Aug. 5, 2021, 7:02 p.m. UTC
From: Zi Yan <ziy@nvidia.com>

With SPARSEMEM_VMEMMAP, all struct page are virtually contigous,
thus kernel can manipulate arbitrarily large pages. By checking
PFN validity during buddy page merging process, all free pages in buddy
allocator's free area have their PFNs contiguous even if the system has
several not physically contiguous memory sections. With these two
conditions, it is OK to remove the restriction of
MAX_ORDER - 1 + PAGE_SHIFT < SECTION_SIZE_BITS and change MAX_ORDER
freely.

Add SET_MAX_ORDER to allow MAX_ORDER adjustment when arch does not set
its own MAX_ORDER via ARCH_FORCE_MAX_ORDER. Make it depend
on SPARSEMEM_VMEMMAP, when MAX_ORDER is not limited by SECTION_SIZE_BITS.

Signed-off-by: Zi Yan <ziy@nvidia.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
---
 arch/Kconfig           |  4 ++++
 include/linux/mmzone.h | 14 +++++++++++++-
 mm/Kconfig             | 16 ++++++++++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

Comments

Vlastimil Babka Aug. 6, 2021, 3:16 p.m. UTC | #1
On 8/5/21 9:02 PM, Zi Yan wrote:
> From: Zi Yan <ziy@nvidia.com>
> +config SET_MAX_ORDER
> +	int "Set maximum order of buddy allocator"
> +    depends on SPARSEMEM_VMEMMAP && (ARCH_FORCE_MAX_ORDER = 0)
> +	range 11 255
> +	default "11"
> +	help
> +	  The kernel memory allocator divides physically contiguous memory
> +	  blocks into "zones", where each zone is a power of two number of
> +	  pages.  This option selects the largest power of two that the kernel
> +	  keeps in the memory allocator.  If you need to allocate very large
> +	  blocks of physically contiguous memory, then you may need to
> +	  increase this value.
> +
> +	  This config option is actually maximum order plus one. For example,
> +	  a value of 11 means that the largest free memory block is 2^10 pages.

It's enough that it's confusing for the devs, we could spare the users and add
+1 to the value they specify :)

>  config HAVE_MEMBLOCK_PHYS_MAP
>  	bool
>  
>
Zi Yan Aug. 6, 2021, 3:23 p.m. UTC | #2
On 6 Aug 2021, at 11:16, Vlastimil Babka wrote:

> On 8/5/21 9:02 PM, Zi Yan wrote:
>> From: Zi Yan <ziy@nvidia.com>
>> +config SET_MAX_ORDER
>> +	int "Set maximum order of buddy allocator"
>> +    depends on SPARSEMEM_VMEMMAP && (ARCH_FORCE_MAX_ORDER = 0)
>> +	range 11 255
>> +	default "11"
>> +	help
>> +	  The kernel memory allocator divides physically contiguous memory
>> +	  blocks into "zones", where each zone is a power of two number of
>> +	  pages.  This option selects the largest power of two that the kernel
>> +	  keeps in the memory allocator.  If you need to allocate very large
>> +	  blocks of physically contiguous memory, then you may need to
>> +	  increase this value.
>> +
>> +	  This config option is actually maximum order plus one. For example,
>> +	  a value of 11 means that the largest free memory block is 2^10 pages.
>
> It's enough that it's confusing for the devs, we could spare the users and add
> +1 to the value they specify :)

Sure. I will change the existing ARCH_FORCE_MAX_ORDER too, otherwise people might
get confused by two different MAX_ORDERs. Since this Kconfig only appears when
ARCH_FORCE_MAX_ORDER is not specified.


—
Best Regards,
Yan, Zi
diff mbox series

Patch

diff --git a/arch/Kconfig b/arch/Kconfig
index 01a3f8048cb7..40bd222adeb8 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -11,6 +11,10 @@  source "arch/$(SRCARCH)/Kconfig"
 
 menu "General architecture-dependent options"
 
+config ARCH_FORCE_MAX_ORDER
+    int
+    default "0"
+
 config CRASH_CORE
 	bool
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 04f790ed81b7..322b995942e5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -25,10 +25,14 @@ 
 
 /* Free memory management - zoned buddy allocator.  */
 #ifndef CONFIG_ARCH_FORCE_MAX_ORDER
+#ifdef CONFIG_SET_MAX_ORDER
+#define MAX_ORDER CONFIG_SET_MAX_ORDER
+#else
 #define MAX_ORDER 11
+#endif /* CONFIG_SET_MAX_ORDER */
 #else
 #define MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER
-#endif
+#endif /* CONFIG_ARCH_FORCE_MAX_ORDER */
 #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
 
 /*
@@ -1245,12 +1249,20 @@  static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
 #define SECTION_BLOCKFLAGS_BITS \
 	((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS)
 
+/*
+ * The MAX_ORDER check is not necessary when CONFIG_SET_MAX_ORDER is set, since
+ * it depends on CONFIG_SPARSEMEM_VMEMMAP, where all struct page are virtually
+ * contiguous, thus > section size pages can be allocated and manipulated
+ * without worrying about non-contiguous struct page.
+ */
+#ifndef CONFIG_SET_MAX_ORDER
 /* NO_MAX_ORDER_CHECK when compiling x64 32bit VDSO for 64bit system */
 #ifndef NO_MAX_ORDER_CHECK
 #if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS
 #error Allocator MAX_ORDER exceeds SECTION_SIZE
 #endif
 #endif /* NO_MAX_ORDER_CHECK */
+#endif /* CONFIG_SET_MAX_ORDER*/
 
 static inline unsigned long pfn_to_section_nr(unsigned long pfn)
 {
diff --git a/mm/Kconfig b/mm/Kconfig
index 1f9bd3371765..3a030b439501 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -89,6 +89,22 @@  config SPARSEMEM_VMEMMAP
 	  pfn_to_page and page_to_pfn operations.  This is the most
 	  efficient option when sufficient kernel resources are available.
 
+config SET_MAX_ORDER
+	int "Set maximum order of buddy allocator"
+    depends on SPARSEMEM_VMEMMAP && (ARCH_FORCE_MAX_ORDER = 0)
+	range 11 255
+	default "11"
+	help
+	  The kernel memory allocator divides physically contiguous memory
+	  blocks into "zones", where each zone is a power of two number of
+	  pages.  This option selects the largest power of two that the kernel
+	  keeps in the memory allocator.  If you need to allocate very large
+	  blocks of physically contiguous memory, then you may need to
+	  increase this value.
+
+	  This config option is actually maximum order plus one. For example,
+	  a value of 11 means that the largest free memory block is 2^10 pages.
+
 config HAVE_MEMBLOCK_PHYS_MAP
 	bool