diff mbox series

[v13,14/14] powerpc/64s/radix: Enable huge vmalloc mappings

Message ID 20210317062402.533919-15-npiggin@gmail.com (mailing list archive)
State New, archived
Headers show
Series huge vmalloc mappings | expand

Commit Message

Nicholas Piggin March 17, 2021, 6:24 a.m. UTC
This reduces TLB misses by nearly 30x on a `git diff` workload on a
2-node POWER9 (59,800 -> 2,100) and reduces CPU cycles by 0.54%, due
to vfs hashes being allocated with 2MB pages.

Cc: linuxppc-dev@lists.ozlabs.org
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 .../admin-guide/kernel-parameters.txt         |  2 ++
 arch/powerpc/Kconfig                          |  1 +
 arch/powerpc/kernel/module.c                  | 22 +++++++++++++++----
 3 files changed, 21 insertions(+), 4 deletions(-)

Comments

Christophe Leroy April 15, 2021, 10:23 a.m. UTC | #1
Hi Nick,

Le 17/03/2021 à 07:24, Nicholas Piggin a écrit :
> This reduces TLB misses by nearly 30x on a `git diff` workload on a
> 2-node POWER9 (59,800 -> 2,100) and reduces CPU cycles by 0.54%, due
> to vfs hashes being allocated with 2MB pages.
> 
> Cc: linuxppc-dev@lists.ozlabs.org
> Acked-by: Michael Ellerman <mpe@ellerman.id.au>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   .../admin-guide/kernel-parameters.txt         |  2 ++
>   arch/powerpc/Kconfig                          |  1 +
>   arch/powerpc/kernel/module.c                  | 22 +++++++++++++++----
>   3 files changed, 21 insertions(+), 4 deletions(-)
> 
> --- a/arch/powerpc/kernel/module.c
> +++ b/arch/powerpc/kernel/module.c
> @@ -8,6 +8,7 @@
>   #include <linux/moduleloader.h>
>   #include <linux/err.h>
>   #include <linux/vmalloc.h>
> +#include <linux/mm.h>
>   #include <linux/bug.h>
>   #include <asm/module.h>
>   #include <linux/uaccess.h>
> @@ -87,13 +88,26 @@ int module_finalize(const Elf_Ehdr *hdr,
>   	return 0;
>   }
>   
> -#ifdef MODULES_VADDR
>   void *module_alloc(unsigned long size)
>   {
> +	unsigned long start = VMALLOC_START;
> +	unsigned long end = VMALLOC_END;
> +
> +#ifdef MODULES_VADDR
>   	BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
> +	start = MODULES_VADDR;
> +	end = MODULES_END;
> +#endif
> +
> +	/*
> +	 * Don't do huge page allocations for modules yet until more testing
> +	 * is done. STRICT_MODULE_RWX may require extra work to support this
> +	 * too.
> +	 */
>   
> -	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL,
> -				    PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,


I think you should add the following in <asm/pgtable.h>

#ifndef MODULES_VADDR
#define MODULES_VADDR VMALLOC_START
#define MODULES_END VMALLOC_END
#endif

And leave module_alloc() as is (just removing the enclosing #ifdef MODULES_VADDR and adding the 
VM_NO_HUGE_VMAP  flag)

This would minimise the conflits with the changes I did in powerpc/next reported by Stephen R.

> +	return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL,
> +				    PAGE_KERNEL_EXEC,
> +				    VM_NO_HUGE_VMAP | VM_FLUSH_RESET_PERMS,
> +				    NUMA_NO_NODE,
>   				    __builtin_return_address(0));
>   }
> -#endif
>
Andrew Morton April 15, 2021, 6:55 p.m. UTC | #2
On Thu, 15 Apr 2021 12:23:55 +0200 Christophe Leroy <christophe.leroy@csgroup.eu> wrote:
> > +	 * is done. STRICT_MODULE_RWX may require extra work to support this
> > +	 * too.
> > +	 */
> >   
> > -	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL,
> > -				    PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
> 
> 
> I think you should add the following in <asm/pgtable.h>
> 
> #ifndef MODULES_VADDR
> #define MODULES_VADDR VMALLOC_START
> #define MODULES_END VMALLOC_END
> #endif
> 
> And leave module_alloc() as is (just removing the enclosing #ifdef MODULES_VADDR and adding the 
> VM_NO_HUGE_VMAP  flag)
> 
> This would minimise the conflits with the changes I did in powerpc/next reported by Stephen R.
> 

I'll drop powerpc-64s-radix-enable-huge-vmalloc-mappings.patch for now,
make life simpler.

Nick, a redo on top of Christophe's changes in linux-next would be best
please.
Stephen Rothwell April 15, 2021, 11:04 p.m. UTC | #3
Hi all,

On Thu, 15 Apr 2021 11:55:29 -0700 Andrew Morton <akpm@linux-foundation.org> wrote:
>
> On Thu, 15 Apr 2021 12:23:55 +0200 Christophe Leroy <christophe.leroy@csgroup.eu> wrote:
> > > +	 * is done. STRICT_MODULE_RWX may require extra work to support this
> > > +	 * too.
> > > +	 */
> > >   
> > > -	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL,
> > > -				    PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,  
> > 
> > 
> > I think you should add the following in <asm/pgtable.h>
> > 
> > #ifndef MODULES_VADDR
> > #define MODULES_VADDR VMALLOC_START
> > #define MODULES_END VMALLOC_END
> > #endif
> > 
> > And leave module_alloc() as is (just removing the enclosing #ifdef MODULES_VADDR and adding the 
> > VM_NO_HUGE_VMAP  flag)
> > 
> > This would minimise the conflits with the changes I did in powerpc/next reported by Stephen R.
> >   
> 
> I'll drop powerpc-64s-radix-enable-huge-vmalloc-mappings.patch for now,
> make life simpler.

I have dropped that patch from linux-next.
Nicholas Piggin April 17, 2021, 2:39 a.m. UTC | #4
Excerpts from Andrew Morton's message of April 16, 2021 4:55 am:
> On Thu, 15 Apr 2021 12:23:55 +0200 Christophe Leroy <christophe.leroy@csgroup.eu> wrote:
>> > +	 * is done. STRICT_MODULE_RWX may require extra work to support this
>> > +	 * too.
>> > +	 */
>> >   
>> > -	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL,
>> > -				    PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
>> 
>> 
>> I think you should add the following in <asm/pgtable.h>
>> 
>> #ifndef MODULES_VADDR
>> #define MODULES_VADDR VMALLOC_START
>> #define MODULES_END VMALLOC_END
>> #endif
>> 
>> And leave module_alloc() as is (just removing the enclosing #ifdef MODULES_VADDR and adding the 
>> VM_NO_HUGE_VMAP  flag)
>> 
>> This would minimise the conflits with the changes I did in powerpc/next reported by Stephen R.
>> 
> 
> I'll drop powerpc-64s-radix-enable-huge-vmalloc-mappings.patch for now,
> make life simpler.

Yeah that's fine.

> Nick, a redo on top of Christophe's changes in linux-next would be best
> please.

Will do.

Thanks,
Nick
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 04545725f187..1f481f904895 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3243,6 +3243,8 @@ 
 
 	nohugeiomap	[KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.
 
+	nohugevmalloc	[PPC] Disable kernel huge vmalloc mappings.
+
 	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
 			Equivalent to smt=1.
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 386ae12d8523..b7cade9566da 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -181,6 +181,7 @@  config PPC
 	select GENERIC_GETTIMEOFDAY
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_HUGE_VMAP		if PPC_BOOK3S_64 && PPC_RADIX_MMU
+	select HAVE_ARCH_HUGE_VMALLOC		if HAVE_ARCH_HUGE_VMAP
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_KASAN			if PPC32 && PPC_PAGE_SHIFT <= 14
 	select HAVE_ARCH_KASAN_VMALLOC		if PPC32 && PPC_PAGE_SHIFT <= 14
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index a211b0253cdb..cdb2d88c54e7 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -8,6 +8,7 @@ 
 #include <linux/moduleloader.h>
 #include <linux/err.h>
 #include <linux/vmalloc.h>
+#include <linux/mm.h>
 #include <linux/bug.h>
 #include <asm/module.h>
 #include <linux/uaccess.h>
@@ -87,13 +88,26 @@  int module_finalize(const Elf_Ehdr *hdr,
 	return 0;
 }
 
-#ifdef MODULES_VADDR
 void *module_alloc(unsigned long size)
 {
+	unsigned long start = VMALLOC_START;
+	unsigned long end = VMALLOC_END;
+
+#ifdef MODULES_VADDR
 	BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
+	start = MODULES_VADDR;
+	end = MODULES_END;
+#endif
+
+	/*
+	 * Don't do huge page allocations for modules yet until more testing
+	 * is done. STRICT_MODULE_RWX may require extra work to support this
+	 * too.
+	 */
 
-	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL,
-				    PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+	return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL,
+				    PAGE_KERNEL_EXEC,
+				    VM_NO_HUGE_VMAP | VM_FLUSH_RESET_PERMS,
+				    NUMA_NO_NODE,
 				    __builtin_return_address(0));
 }
-#endif