Message ID | 20210317062402.533919-15-npiggin@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | huge vmalloc mappings | expand |
Hi Nick, Le 17/03/2021 à 07:24, Nicholas Piggin a écrit : > This reduces TLB misses by nearly 30x on a `git diff` workload on a > 2-node POWER9 (59,800 -> 2,100) and reduces CPU cycles by 0.54%, due > to vfs hashes being allocated with 2MB pages. > > Cc: linuxppc-dev@lists.ozlabs.org > Acked-by: Michael Ellerman <mpe@ellerman.id.au> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com> > --- > .../admin-guide/kernel-parameters.txt | 2 ++ > arch/powerpc/Kconfig | 1 + > arch/powerpc/kernel/module.c | 22 +++++++++++++++---- > 3 files changed, 21 insertions(+), 4 deletions(-) > > --- a/arch/powerpc/kernel/module.c > +++ b/arch/powerpc/kernel/module.c > @@ -8,6 +8,7 @@ > #include <linux/moduleloader.h> > #include <linux/err.h> > #include <linux/vmalloc.h> > +#include <linux/mm.h> > #include <linux/bug.h> > #include <asm/module.h> > #include <linux/uaccess.h> > @@ -87,13 +88,26 @@ int module_finalize(const Elf_Ehdr *hdr, > return 0; > } > > -#ifdef MODULES_VADDR > void *module_alloc(unsigned long size) > { > + unsigned long start = VMALLOC_START; > + unsigned long end = VMALLOC_END; > + > +#ifdef MODULES_VADDR > BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); > + start = MODULES_VADDR; > + end = MODULES_END; > +#endif > + > + /* > + * Don't do huge page allocations for modules yet until more testing > + * is done. STRICT_MODULE_RWX may require extra work to support this > + * too. > + */ > > - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, > - PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, I think you should add the following in <asm/pgtable.h> #ifndef MODULES_VADDR #define MODULES_VADDR VMALLOC_START #define MODULES_END VMALLOC_END #endif And leave module_alloc() as is (just removing the enclosing #ifdef MODULES_VADDR and adding the VM_NO_HUGE_VMAP flag) This would minimise the conflits with the changes I did in powerpc/next reported by Stephen R. > + return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, > + PAGE_KERNEL_EXEC, > + VM_NO_HUGE_VMAP | VM_FLUSH_RESET_PERMS, > + NUMA_NO_NODE, > __builtin_return_address(0)); > } > -#endif >
On Thu, 15 Apr 2021 12:23:55 +0200 Christophe Leroy <christophe.leroy@csgroup.eu> wrote: > > + * is done. STRICT_MODULE_RWX may require extra work to support this > > + * too. > > + */ > > > > - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, > > - PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, > > > I think you should add the following in <asm/pgtable.h> > > #ifndef MODULES_VADDR > #define MODULES_VADDR VMALLOC_START > #define MODULES_END VMALLOC_END > #endif > > And leave module_alloc() as is (just removing the enclosing #ifdef MODULES_VADDR and adding the > VM_NO_HUGE_VMAP flag) > > This would minimise the conflits with the changes I did in powerpc/next reported by Stephen R. > I'll drop powerpc-64s-radix-enable-huge-vmalloc-mappings.patch for now, make life simpler. Nick, a redo on top of Christophe's changes in linux-next would be best please.
Hi all, On Thu, 15 Apr 2021 11:55:29 -0700 Andrew Morton <akpm@linux-foundation.org> wrote: > > On Thu, 15 Apr 2021 12:23:55 +0200 Christophe Leroy <christophe.leroy@csgroup.eu> wrote: > > > + * is done. STRICT_MODULE_RWX may require extra work to support this > > > + * too. > > > + */ > > > > > > - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, > > > - PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, > > > > > > I think you should add the following in <asm/pgtable.h> > > > > #ifndef MODULES_VADDR > > #define MODULES_VADDR VMALLOC_START > > #define MODULES_END VMALLOC_END > > #endif > > > > And leave module_alloc() as is (just removing the enclosing #ifdef MODULES_VADDR and adding the > > VM_NO_HUGE_VMAP flag) > > > > This would minimise the conflits with the changes I did in powerpc/next reported by Stephen R. > > > > I'll drop powerpc-64s-radix-enable-huge-vmalloc-mappings.patch for now, > make life simpler. I have dropped that patch from linux-next.
Excerpts from Andrew Morton's message of April 16, 2021 4:55 am: > On Thu, 15 Apr 2021 12:23:55 +0200 Christophe Leroy <christophe.leroy@csgroup.eu> wrote: >> > + * is done. STRICT_MODULE_RWX may require extra work to support this >> > + * too. >> > + */ >> > >> > - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, >> > - PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, >> >> >> I think you should add the following in <asm/pgtable.h> >> >> #ifndef MODULES_VADDR >> #define MODULES_VADDR VMALLOC_START >> #define MODULES_END VMALLOC_END >> #endif >> >> And leave module_alloc() as is (just removing the enclosing #ifdef MODULES_VADDR and adding the >> VM_NO_HUGE_VMAP flag) >> >> This would minimise the conflits with the changes I did in powerpc/next reported by Stephen R. >> > > I'll drop powerpc-64s-radix-enable-huge-vmalloc-mappings.patch for now, > make life simpler. Yeah that's fine. > Nick, a redo on top of Christophe's changes in linux-next would be best > please. Will do. Thanks, Nick
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 04545725f187..1f481f904895 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3243,6 +3243,8 @@ nohugeiomap [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings. + nohugevmalloc [PPC] Disable kernel huge vmalloc mappings. + nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 386ae12d8523..b7cade9566da 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -181,6 +181,7 @@ config PPC select GENERIC_GETTIMEOFDAY select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU + select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14 diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index a211b0253cdb..cdb2d88c54e7 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -8,6 +8,7 @@ #include <linux/moduleloader.h> #include <linux/err.h> #include <linux/vmalloc.h> +#include <linux/mm.h> #include <linux/bug.h> #include <asm/module.h> #include <linux/uaccess.h> @@ -87,13 +88,26 @@ int module_finalize(const Elf_Ehdr *hdr, return 0; } -#ifdef MODULES_VADDR void *module_alloc(unsigned long size) { + unsigned long start = VMALLOC_START; + unsigned long end = VMALLOC_END; + +#ifdef MODULES_VADDR BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); + start = MODULES_VADDR; + end = MODULES_END; +#endif + + /* + * Don't do huge page allocations for modules yet until more testing + * is done. STRICT_MODULE_RWX may require extra work to support this + * too. + */ - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, - PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, + return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, + PAGE_KERNEL_EXEC, + VM_NO_HUGE_VMAP | VM_FLUSH_RESET_PERMS, + NUMA_NO_NODE, __builtin_return_address(0)); } -#endif