Message ID | 20240411160051.2093261-7-rppt@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | mm: jit/text allocator | expand |
Hi Mike. On Thu, Apr 11, 2024 at 07:00:42PM +0300, Mike Rapoport wrote: > From: "Mike Rapoport (IBM)" <rppt@kernel.org> > > Several architectures override module_alloc() only to define address > range for code allocations different than VMALLOC address space. > > Provide a generic implementation in execmem that uses the parameters for > address space ranges, required alignment and page protections provided > by architectures. > > The architectures must fill execmem_info structure and implement > execmem_arch_setup() that returns a pointer to that structure. This way the > execmem initialization won't be called from every architecture, but rather > from a central place, namely a core_initcall() in execmem. > > The execmem provides execmem_alloc() API that wraps __vmalloc_node_range() > with the parameters defined by the architectures. If an architecture does > not implement execmem_arch_setup(), execmem_alloc() will fall back to > module_alloc(). > > Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org> > --- This code snippet could be more readable ... > diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c > index 66c45a2764bc..b70047f944cc 100644 > --- a/arch/sparc/kernel/module.c > +++ b/arch/sparc/kernel/module.c > @@ -14,6 +14,7 @@ > #include <linux/string.h> > #include <linux/ctype.h> > #include <linux/mm.h> > +#include <linux/execmem.h> > > #include <asm/processor.h> > #include <asm/spitfire.h> > @@ -21,34 +22,26 @@ > > #include "entry.h" > > +static struct execmem_info execmem_info __ro_after_init = { > + .ranges = { > + [EXECMEM_DEFAULT] = { > #ifdef CONFIG_SPARC64 > - > -#include <linux/jump_label.h> > - > -static void *module_map(unsigned long size) > -{ > - if (PAGE_ALIGN(size) > MODULES_LEN) > - return NULL; > - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, > - GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, > - __builtin_return_address(0)); > -} > + .start = MODULES_VADDR, > + .end = MODULES_END, > #else > -static void *module_map(unsigned long size) > + .start = VMALLOC_START, > + .end = VMALLOC_END, > +#endif > + .alignment = 1, > + }, > + }, > +}; > + > +struct execmem_info __init *execmem_arch_setup(void) > { > - return vmalloc(size); > -} > -#endif /* CONFIG_SPARC64 */ > - > -void *module_alloc(unsigned long size) > -{ > - void *ret; > - > - ret = module_map(size); > - if (ret) > - memset(ret, 0, size); > + execmem_info.ranges[EXECMEM_DEFAULT].pgprot = PAGE_KERNEL; > > - return ret; > + return &execmem_info; > } > > /* Make generic code ignore STT_REGISTER dummy undefined symbols. */ ... if the following was added: diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 9e85d57ac3f2..62bcafe38b1f 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -432,6 +432,8 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma, #define VMALLOC_START _AC(0xfe600000,UL) #define VMALLOC_END _AC(0xffc00000,UL) +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END Then the #ifdef CONFIG_SPARC64 could be dropped and the code would be the same for 32 and 64 bits. Just a drive-by comment. Sam
On Thu, Apr 11, 2024 at 10:53:46PM +0200, Sam Ravnborg wrote: > Hi Mike. > > On Thu, Apr 11, 2024 at 07:00:42PM +0300, Mike Rapoport wrote: > > From: "Mike Rapoport (IBM)" <rppt@kernel.org> > > > > Several architectures override module_alloc() only to define address > > range for code allocations different than VMALLOC address space. > > > > Provide a generic implementation in execmem that uses the parameters for > > address space ranges, required alignment and page protections provided > > by architectures. > > > > The architectures must fill execmem_info structure and implement > > execmem_arch_setup() that returns a pointer to that structure. This way the > > execmem initialization won't be called from every architecture, but rather > > from a central place, namely a core_initcall() in execmem. > > > > The execmem provides execmem_alloc() API that wraps __vmalloc_node_range() > > with the parameters defined by the architectures. If an architecture does > > not implement execmem_arch_setup(), execmem_alloc() will fall back to > > module_alloc(). > > > > Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org> > > --- > > This code snippet could be more readable ... > > diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c > > index 66c45a2764bc..b70047f944cc 100644 > > --- a/arch/sparc/kernel/module.c > > +++ b/arch/sparc/kernel/module.c > > @@ -14,6 +14,7 @@ > > #include <linux/string.h> > > #include <linux/ctype.h> > > #include <linux/mm.h> > > +#include <linux/execmem.h> > > > > #include <asm/processor.h> > > #include <asm/spitfire.h> > > @@ -21,34 +22,26 @@ > > > > #include "entry.h" > > > > +static struct execmem_info execmem_info __ro_after_init = { > > + .ranges = { > > + [EXECMEM_DEFAULT] = { > > #ifdef CONFIG_SPARC64 > > - > > -#include <linux/jump_label.h> > > - > > -static void *module_map(unsigned long size) > > -{ > > - if (PAGE_ALIGN(size) > MODULES_LEN) > > - return NULL; > > - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, > > - GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, > > - __builtin_return_address(0)); > > -} > > + .start = MODULES_VADDR, > > + .end = MODULES_END, > > #else > > -static void *module_map(unsigned long size) > > + .start = VMALLOC_START, > > + .end = VMALLOC_END, > > +#endif > > + .alignment = 1, > > + }, > > + }, > > +}; > > + > > +struct execmem_info __init *execmem_arch_setup(void) > > { > > - return vmalloc(size); > > -} > > -#endif /* CONFIG_SPARC64 */ > > - > > -void *module_alloc(unsigned long size) > > -{ > > - void *ret; > > - > > - ret = module_map(size); > > - if (ret) > > - memset(ret, 0, size); > > + execmem_info.ranges[EXECMEM_DEFAULT].pgprot = PAGE_KERNEL; > > > > - return ret; > > + return &execmem_info; > > } > > > > /* Make generic code ignore STT_REGISTER dummy undefined symbols. */ > > ... if the following was added: > > diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h > index 9e85d57ac3f2..62bcafe38b1f 100644 > --- a/arch/sparc/include/asm/pgtable_32.h > +++ b/arch/sparc/include/asm/pgtable_32.h > @@ -432,6 +432,8 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma, > > #define VMALLOC_START _AC(0xfe600000,UL) > #define VMALLOC_END _AC(0xffc00000,UL) > +#define MODULES_VADDR VMALLOC_START > +#define MODULES_END VMALLOC_END > > > Then the #ifdef CONFIG_SPARC64 could be dropped and the code would be > the same for 32 and 64 bits. Yeah, the #ifdef there can be dropped even regardless of execmem. I'll add a patch for that. > Just a drive-by comment. > > Sam >
On Thu, Apr 11, 2024 at 07:00:42PM +0300, Mike Rapoport wrote: > +static struct execmem_info execmem_info __ro_after_init = { > + .ranges = { > + [EXECMEM_DEFAULT] = { > + .start = MODULES_VADDR, > + .end = MODULES_END, > + .alignment = 1, > + }, > + }, > +}; > + > +struct execmem_info __init *execmem_arch_setup(void) > { > + execmem_info.ranges[EXECMEM_DEFAULT].pgprot = PAGE_KERNEL; > + > + return &execmem_info; > } > +static struct execmem_info execmem_info __ro_after_init = { > + .ranges = { > + [EXECMEM_DEFAULT] = { > + .start = MODULES_VADDR, > + .end = MODULES_END, > + .pgprot = PAGE_KERNEL_EXEC, > + .alignment = 1, > + }, > + }, > +}; > + > +struct execmem_info __init *execmem_arch_setup(void) > { > + return &execmem_info; > } > +static struct execmem_info execmem_info __ro_after_init = { > + .ranges = { > + [EXECMEM_DEFAULT] = { > + .pgprot = PAGE_KERNEL_RWX, > + .alignment = 1, > + }, > + }, > +}; > + > +struct execmem_info __init *execmem_arch_setup(void) > { > + execmem_info.ranges[EXECMEM_DEFAULT].start = VMALLOC_START; > + execmem_info.ranges[EXECMEM_DEFAULT].end = VMALLOC_END; > + > + return &execmem_info; > } > +static struct execmem_info execmem_info __ro_after_init = { > + .ranges = { > + [EXECMEM_DEFAULT] = { > + .pgprot = PAGE_KERNEL, > + .alignment = 1, > + }, > + }, > +}; > + > +struct execmem_info __init *execmem_arch_setup(void) > { > + execmem_info.ranges[EXECMEM_DEFAULT].start = MODULES_VADDR; > + execmem_info.ranges[EXECMEM_DEFAULT].end = MODULES_END; > + > + return &execmem_info; > } > +static struct execmem_info execmem_info __ro_after_init = { > + .ranges = { > + [EXECMEM_DEFAULT] = { > #ifdef CONFIG_SPARC64 > + .start = MODULES_VADDR, > + .end = MODULES_END, > #else > + .start = VMALLOC_START, > + .end = VMALLOC_END, > +#endif > + .alignment = 1, > + }, > + }, > +}; > + > +struct execmem_info __init *execmem_arch_setup(void) > { > + execmem_info.ranges[EXECMEM_DEFAULT].pgprot = PAGE_KERNEL; > > + return &execmem_info; > } I'm amazed by the weird and inconsistent breakup of initializations. What exactly is wrong with something like: static struct execmem_info execmem_info __ro_after_init; struct execmem_info __init *execmem_arch_setup(void) { execmem_info = (struct execmem_info){ .ranges = { [EXECMEM_DEFAULT] = { .start = MODULES_VADDR, .end = MODULES_END, .pgprot = PAGE_KERNEL, .alignment = 1, }, }, }; return &execmem_info; }
diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index c7d0338d12c1..78c6a68f6c3c 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -18,6 +18,7 @@ #include <linux/ftrace.h> #include <linux/string.h> #include <linux/kernel.h> +#include <linux/execmem.h> #include <asm/alternative.h> #include <asm/inst.h> #include <asm/unwind.h> @@ -490,10 +491,21 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, return 0; } -void *module_alloc(unsigned long size) +static struct execmem_info execmem_info __ro_after_init = { + .ranges = { + [EXECMEM_DEFAULT] = { + .pgprot = PAGE_KERNEL, + .alignment = 1, + }, + }, +}; + +struct execmem_info __init *execmem_arch_setup(void) { - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); + execmem_info.ranges[EXECMEM_DEFAULT].start = MODULES_VADDR; + execmem_info.ranges[EXECMEM_DEFAULT].end = MODULES_END; + + return &execmem_info; } static void module_init_ftrace_plt(const Elf_Ehdr *hdr, diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c index 9a6c96014904..50505e910763 100644 --- a/arch/mips/kernel/module.c +++ b/arch/mips/kernel/module.c @@ -20,6 +20,7 @@ #include <linux/kernel.h> #include <linux/spinlock.h> #include <linux/jump_label.h> +#include <linux/execmem.h> #include <asm/jump_label.h> struct mips_hi16 { @@ -32,11 +33,21 @@ static LIST_HEAD(dbe_list); static DEFINE_SPINLOCK(dbe_lock); #ifdef MODULES_VADDR -void *module_alloc(unsigned long size) +static struct execmem_info execmem_info __ro_after_init = { + .ranges = { + [EXECMEM_DEFAULT] = { + .start = MODULES_VADDR, + .end = MODULES_END, + .alignment = 1, + }, + }, +}; + +struct execmem_info __init *execmem_arch_setup(void) { - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + execmem_info.ranges[EXECMEM_DEFAULT].pgprot = PAGE_KERNEL; + + return &execmem_info; } #endif diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c index 9c97b7513853..2b68ef8aad42 100644 --- a/arch/nios2/kernel/module.c +++ b/arch/nios2/kernel/module.c @@ -18,15 +18,24 @@ #include <linux/fs.h> #include <linux/string.h> #include <linux/kernel.h> +#include <linux/execmem.h> #include <asm/cacheflush.h> -void *module_alloc(unsigned long size) +static struct execmem_info execmem_info __ro_after_init = { + .ranges = { + [EXECMEM_DEFAULT] = { + .start = MODULES_VADDR, + .end = MODULES_END, + .pgprot = PAGE_KERNEL_EXEC, + .alignment = 1, + }, + }, +}; + +struct execmem_info __init *execmem_arch_setup(void) { - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, - VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, - __builtin_return_address(0)); + return &execmem_info; } int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index d214bbe3c2af..721324c42b7d 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -49,6 +49,7 @@ #include <linux/bug.h> #include <linux/mm.h> #include <linux/slab.h> +#include <linux/execmem.h> #include <asm/unwind.h> #include <asm/sections.h> @@ -173,15 +174,21 @@ static inline int reassemble_22(int as22) ((as22 & 0x0003ff) << 3)); } -void *module_alloc(unsigned long size) +static struct execmem_info execmem_info __ro_after_init = { + .ranges = { + [EXECMEM_DEFAULT] = { + .pgprot = PAGE_KERNEL_RWX, + .alignment = 1, + }, + }, +}; + +struct execmem_info __init *execmem_arch_setup(void) { - /* using RWX means less protection for modules, but it's - * easier than trying to map the text, data, init_text and - * init_data correctly */ - return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, - PAGE_KERNEL_RWX, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + execmem_info.ranges[EXECMEM_DEFAULT].start = VMALLOC_START; + execmem_info.ranges[EXECMEM_DEFAULT].end = VMALLOC_END; + + return &execmem_info; } #ifndef CONFIG_64BIT diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 5e5a82644451..ad32e2a8621a 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -14,6 +14,7 @@ #include <linux/vmalloc.h> #include <linux/sizes.h> #include <linux/pgtable.h> +#include <linux/execmem.h> #include <asm/alternative.h> #include <asm/sections.h> @@ -906,13 +907,21 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, } #if defined(CONFIG_MMU) && defined(CONFIG_64BIT) -void *module_alloc(unsigned long size) +static struct execmem_info execmem_info __ro_after_init = { + .ranges = { + [EXECMEM_DEFAULT] = { + .pgprot = PAGE_KERNEL, + .alignment = 1, + }, + }, +}; + +struct execmem_info __init *execmem_arch_setup(void) { - return __vmalloc_node_range(size, 1, MODULES_VADDR, - MODULES_END, GFP_KERNEL, - PAGE_KERNEL, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, - __builtin_return_address(0)); + execmem_info.ranges[EXECMEM_DEFAULT].start = MODULES_VADDR; + execmem_info.ranges[EXECMEM_DEFAULT].end = MODULES_END; + + return &execmem_info; } #endif diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index 66c45a2764bc..b70047f944cc 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -14,6 +14,7 @@ #include <linux/string.h> #include <linux/ctype.h> #include <linux/mm.h> +#include <linux/execmem.h> #include <asm/processor.h> #include <asm/spitfire.h> @@ -21,34 +22,26 @@ #include "entry.h" +static struct execmem_info execmem_info __ro_after_init = { + .ranges = { + [EXECMEM_DEFAULT] = { #ifdef CONFIG_SPARC64 - -#include <linux/jump_label.h> - -static void *module_map(unsigned long size) -{ - if (PAGE_ALIGN(size) > MODULES_LEN) - return NULL; - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); -} + .start = MODULES_VADDR, + .end = MODULES_END, #else -static void *module_map(unsigned long size) + .start = VMALLOC_START, + .end = VMALLOC_END, +#endif + .alignment = 1, + }, + }, +}; + +struct execmem_info __init *execmem_arch_setup(void) { - return vmalloc(size); -} -#endif /* CONFIG_SPARC64 */ - -void *module_alloc(unsigned long size) -{ - void *ret; - - ret = module_map(size); - if (ret) - memset(ret, 0, size); + execmem_info.ranges[EXECMEM_DEFAULT].pgprot = PAGE_KERNEL; - return ret; + return &execmem_info; } /* Make generic code ignore STT_REGISTER dummy undefined symbols. */ diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 43e7995593a1..89173be320cf 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -33,6 +33,47 @@ enum execmem_type { EXECMEM_TYPE_MAX, }; +/** + * struct execmem_range - definition of an address space suitable for code and + * related data allocations + * @start: address space start + * @end: address space end (inclusive) + * @pgprot: permissions for memory in this address space + * @alignment: alignment required for text allocations + */ +struct execmem_range { + unsigned long start; + unsigned long end; + pgprot_t pgprot; + unsigned int alignment; +}; + +/** + * struct execmem_info - architecture parameters for code allocations + * @ranges: array of parameter sets defining architecture specific + * parameters for executable memory allocations. The ranges that are not + * explicitly initialized by an architecture use parameters defined for + * @EXECMEM_DEFAULT. + */ +struct execmem_info { + struct execmem_range ranges[EXECMEM_TYPE_MAX]; +}; + +/** + * execmem_arch_setup - define parameters for allocations of executable memory + * + * A hook for architectures to define parameters for allocations of + * executable memory. These parameters should be filled into the + * @execmem_info structure. + * + * For architectures that do not implement this method a default set of + * parameters will be used + * + * Return: a structure defining architecture parameters and restrictions + * for allocations of executable memory + */ +struct execmem_info *execmem_arch_setup(void); + /** * execmem_alloc - allocate executable memory * @type: type of the allocation diff --git a/mm/execmem.c b/mm/execmem.c index ed2ea41a2543..d9fb20bc7354 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -5,14 +5,30 @@ #include <linux/execmem.h> #include <linux/moduleloader.h> -static void *__execmem_alloc(size_t size) +static struct execmem_info *execmem_info __ro_after_init; + +static void *__execmem_alloc(struct execmem_range *range, size_t size) { - return module_alloc(size); + unsigned long start = range->start; + unsigned long end = range->end; + unsigned int align = range->alignment; + pgprot_t pgprot = range->pgprot; + + return __vmalloc_node_range(size, align, start, end, + GFP_KERNEL, pgprot, VM_FLUSH_RESET_PERMS, + NUMA_NO_NODE, __builtin_return_address(0)); } void *execmem_alloc(enum execmem_type type, size_t size) { - return __execmem_alloc(size); + struct execmem_range *range; + + if (!execmem_info) + return module_alloc(size); + + range = &execmem_info->ranges[type]; + + return __execmem_alloc(range, size); } void execmem_free(void *ptr) @@ -24,3 +40,54 @@ void execmem_free(void *ptr) WARN_ON(in_interrupt()); vfree(ptr); } + +static bool execmem_validate(struct execmem_info *info) +{ + struct execmem_range *r = &info->ranges[EXECMEM_DEFAULT]; + + if (!r->alignment || !r->start || !r->end || !pgprot_val(r->pgprot)) { + pr_crit("Invalid parameters for execmem allocator, module loading will fail"); + return false; + } + + return true; +} + +static void execmem_init_missing(struct execmem_info *info) +{ + struct execmem_range *default_range = &info->ranges[EXECMEM_DEFAULT]; + + for (int i = EXECMEM_DEFAULT + 1; i < EXECMEM_TYPE_MAX; i++) { + struct execmem_range *r = &info->ranges[i]; + + if (!r->start) { + r->pgprot = default_range->pgprot; + r->alignment = default_range->alignment; + r->start = default_range->start; + r->end = default_range->end; + } + } +} + +struct execmem_info * __weak execmem_arch_setup(void) +{ + return NULL; +} + +static int __init execmem_init(void) +{ + struct execmem_info *info = execmem_arch_setup(); + + if (!info) + return 0; + + if (!execmem_validate(info)) + return -EINVAL; + + execmem_init_missing(info); + + execmem_info = info; + + return 0; +} +core_initcall(execmem_init);