Message ID | 20221007234315.2877365-4-song@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | vmalloc_exec for modules and BPF programs | expand |
On Fri, Oct 07, 2022 at 04:43:14PM -0700, Song Liu wrote: > This is a prototype that allows modules to share 2MB text pages with other > modules and BPF programs. > > Current version only covers core_layout. > --- > arch/x86/Kconfig | 1 + > arch/x86/kernel/alternative.c | 30 ++++++++++++++++++++++++------ > arch/x86/kernel/module.c | 1 + > kernel/module/main.c | 23 +++++++++++++---------- > kernel/module/strict_rwx.c | 3 --- > kernel/trace/ftrace.c | 3 ++- > 6 files changed, 41 insertions(+), 20 deletions(-) > > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig > index f9920f1341c8..0b1ea05a1da6 100644 > --- a/arch/x86/Kconfig > +++ b/arch/x86/Kconfig > @@ -91,6 +91,7 @@ config X86 > select ARCH_HAS_SET_DIRECT_MAP > select ARCH_HAS_STRICT_KERNEL_RWX > select ARCH_HAS_STRICT_MODULE_RWX > + select ARCH_WANTS_MODULES_DATA_IN_VMALLOC if X86_64 > select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE > select ARCH_HAS_SYSCALL_WRAPPER > select ARCH_HAS_UBSAN_SANITIZE_ALL > diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c > index 4f3204364caa..0e47a558c5bc 100644 > --- a/arch/x86/kernel/alternative.c > +++ b/arch/x86/kernel/alternative.c > @@ -332,7 +332,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, > > DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr); > > - text_poke_early(instr, insn_buff, insn_buff_sz); > + if (system_state < SYSTEM_RUNNING) { > + text_poke_early(instr, insn_buff, insn_buff_sz); > + } else { > + mutex_lock(&text_mutex); > + text_poke(instr, insn_buff, insn_buff_sz); > + mutex_unlock(&text_mutex); > + } > > next: > optimize_nops(instr, a->instrlen); > @@ -503,7 +509,13 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) > optimize_nops(bytes, len); > DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); > DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); > - text_poke_early(addr, bytes, len); > + if (system_state == SYSTEM_BOOTING) { > + text_poke_early(addr, bytes, len); > + } else { > + mutex_lock(&text_mutex); > + text_poke(addr, bytes, len); > + mutex_unlock(&text_mutex); > + } > } > } > } > @@ -568,7 +580,13 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) > if (len == insn.length) { > DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); > DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); > - text_poke_early(addr, bytes, len); > + if (unlikely(system_state == SYSTEM_BOOTING)) { > + text_poke_early(addr, bytes, len); > + } else { > + mutex_lock(&text_mutex); > + text_poke(addr, bytes, len); > + mutex_unlock(&text_mutex); > + } > } > } > } > @@ -609,7 +627,7 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) > */ > DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); > DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); > - text_poke_early(addr, &poison, 4); > + text_poke(addr, &poison, 4); > } > } > > @@ -791,7 +809,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, > > /* Pad the rest with nops */ > add_nops(insn_buff + used, p->len - used); > - text_poke_early(p->instr, insn_buff, p->len); > + text_poke(p->instr, insn_buff, p->len); Got below warning when booting a VM: [ 0.190098] ------------[ cut here ]------------ [ 0.190377] WARNING: CPU: 0 PID: 0 at /home/aaron/linux/src/arch/x86/kernel/alternative.c:1224 text_poke+0x53/0x60 [ 0.191083] Modules linked in: [ 0.191269] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.0.0-00004-gc49d19177d78 #5 [ 0.191721] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 [ 0.192083] RIP: 0010:text_poke+0x53/0x60 [ 0.192326] Code: c7 c7 20 e7 02 81 5b 5d e9 2a f8 ff ff be ff ff ff ff 48 c7 c7 b0 6d 06 83 48 89 14 24 e8 75 fd bf 00 85 c0 48 8b 14 24 75 c8 <0f> 0b eb c4 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 57 41 56 [ 0.193083] RSP: 0000:ffffffff83003d60 EFLAGS: 00010246 [ 0.194083] RAX: 0000000000000000 RBX: ffffffff810295b7 RCX: 0000000000000001 [ 0.194506] RDX: 0000000000000006 RSI: ffffffff828b01c5 RDI: ffffffff8293898e [ 0.195083] RBP: ffffffff83003d82 R08: ffffffff82206520 R09: 0000000000000001 [ 0.195506] R10: 0000000000000000 R11: 0000000000000001 R12: ffffffff8a9949c0 [ 0.195929] R13: ffffffff8a95f400 R14: 00000000ffffffff R15: 00000000ffffffff [ 0.196083] FS: 0000000000000000(0000) GS:ffff88842de00000(0000) knlGS:0000000000000000 [ 0.196562] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 0.197083] CR2: ffff88843ffff000 CR3: 0000000003012001 CR4: 0000000000770ef0 [ 0.197508] PKRU: 55555554 [ 0.197673] Call Trace: [ 0.197822] <TASK> [ 0.198084] apply_paravirt+0xaf/0x150 [ 0.198313] ? __might_resched+0x3f/0x280 [ 0.198557] ? synchronize_rcu+0xe0/0x1c0 [ 0.198799] ? lock_release+0x230/0x450 [ 0.199030] ? _raw_spin_unlock_irqrestore+0x30/0x60 [ 0.199083] ? lockdep_hardirqs_on+0x79/0x100 [ 0.199345] ? _raw_spin_unlock_irqrestore+0x3b/0x60 [ 0.199643] ? atomic_notifier_chain_unregister+0x51/0x80 [ 0.200084] alternative_instructions+0x27/0xfa [ 0.200357] check_bugs+0xe08/0xe82 [ 0.200570] start_kernel+0x692/0x6cc [ 0.200797] secondary_startup_64_no_verify+0xe0/0xeb [ 0.201088] </TASK> [ 0.201223] irq event stamp: 13575 [ 0.201428] hardirqs last enabled at (13583): [<ffffffff811193c2>] __up_console_sem+0x52/0x60 [ 0.202083] hardirqs last disabled at (13592): [<ffffffff811193a7>] __up_console_sem+0x37/0x60 [ 0.202594] softirqs last enabled at (12762): [<ffffffff8117e169>] cgroup_idr_alloc.constprop.60+0x59/0x100 [ 0.203083] softirqs last disabled at (12750): [<ffffffff8117e13d>] cgroup_idr_alloc.constprop.60+0x2d/0x100 [ 0.203665] ---[ end trace 0000000000000000 ]--- Looks like it is also necessary to differentiate system_state in apply_paravirt() like you did in the other apply_XXX() functions.
On Thu, Oct 13, 2022 at 8:49 PM Aaron Lu <aaron.lu@intel.com> wrote: > > On Fri, Oct 07, 2022 at 04:43:14PM -0700, Song Liu wrote: > > This is a prototype that allows modules to share 2MB text pages with other > > modules and BPF programs. > > > > Current version only covers core_layout. > > --- > > arch/x86/Kconfig | 1 + > > arch/x86/kernel/alternative.c | 30 ++++++++++++++++++++++++------ > > arch/x86/kernel/module.c | 1 + > > kernel/module/main.c | 23 +++++++++++++---------- > > kernel/module/strict_rwx.c | 3 --- > > kernel/trace/ftrace.c | 3 ++- > > 6 files changed, 41 insertions(+), 20 deletions(-) > > > > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig > > index f9920f1341c8..0b1ea05a1da6 100644 > > --- a/arch/x86/Kconfig > > +++ b/arch/x86/Kconfig > > @@ -91,6 +91,7 @@ config X86 > > select ARCH_HAS_SET_DIRECT_MAP > > select ARCH_HAS_STRICT_KERNEL_RWX > > select ARCH_HAS_STRICT_MODULE_RWX > > + select ARCH_WANTS_MODULES_DATA_IN_VMALLOC if X86_64 > > select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE > > select ARCH_HAS_SYSCALL_WRAPPER > > select ARCH_HAS_UBSAN_SANITIZE_ALL > > diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c > > index 4f3204364caa..0e47a558c5bc 100644 > > --- a/arch/x86/kernel/alternative.c > > +++ b/arch/x86/kernel/alternative.c > > @@ -332,7 +332,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, > > > > DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr); > > > > - text_poke_early(instr, insn_buff, insn_buff_sz); > > + if (system_state < SYSTEM_RUNNING) { > > + text_poke_early(instr, insn_buff, insn_buff_sz); > > + } else { > > + mutex_lock(&text_mutex); > > + text_poke(instr, insn_buff, insn_buff_sz); > > + mutex_unlock(&text_mutex); > > + } > > > > next: > > optimize_nops(instr, a->instrlen); > > @@ -503,7 +509,13 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) > > optimize_nops(bytes, len); > > DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); > > DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); > > - text_poke_early(addr, bytes, len); > > + if (system_state == SYSTEM_BOOTING) { > > + text_poke_early(addr, bytes, len); > > + } else { > > + mutex_lock(&text_mutex); > > + text_poke(addr, bytes, len); > > + mutex_unlock(&text_mutex); > > + } > > } > > } > > } > > @@ -568,7 +580,13 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) > > if (len == insn.length) { > > DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); > > DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); > > - text_poke_early(addr, bytes, len); > > + if (unlikely(system_state == SYSTEM_BOOTING)) { > > + text_poke_early(addr, bytes, len); > > + } else { > > + mutex_lock(&text_mutex); > > + text_poke(addr, bytes, len); > > + mutex_unlock(&text_mutex); > > + } > > } > > } > > } > > @@ -609,7 +627,7 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) > > */ > > DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); > > DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); > > - text_poke_early(addr, &poison, 4); > > + text_poke(addr, &poison, 4); > > } > > } > > > > @@ -791,7 +809,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, > > > > /* Pad the rest with nops */ > > add_nops(insn_buff + used, p->len - used); > > - text_poke_early(p->instr, insn_buff, p->len); > > + text_poke(p->instr, insn_buff, p->len); > > Got below warning when booting a VM: > > [ 0.190098] ------------[ cut here ]------------ > [ 0.190377] WARNING: CPU: 0 PID: 0 at /home/aaron/linux/src/arch/x86/kernel/alternative.c:1224 text_poke+0x53/0x60 > [ 0.191083] Modules linked in: > [ 0.191269] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.0.0-00004-gc49d19177d78 #5 > [ 0.191721] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 > [ 0.192083] RIP: 0010:text_poke+0x53/0x60 > [ 0.192326] Code: c7 c7 20 e7 02 81 5b 5d e9 2a f8 ff ff be ff ff ff ff 48 c7 c7 b0 6d 06 83 48 89 14 24 e8 75 fd bf 00 85 c0 48 8b 14 24 75 c8 <0f> 0b eb c4 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 57 41 56 > [ 0.193083] RSP: 0000:ffffffff83003d60 EFLAGS: 00010246 > [ 0.194083] RAX: 0000000000000000 RBX: ffffffff810295b7 RCX: 0000000000000001 > [ 0.194506] RDX: 0000000000000006 RSI: ffffffff828b01c5 RDI: ffffffff8293898e > [ 0.195083] RBP: ffffffff83003d82 R08: ffffffff82206520 R09: 0000000000000001 > [ 0.195506] R10: 0000000000000000 R11: 0000000000000001 R12: ffffffff8a9949c0 > [ 0.195929] R13: ffffffff8a95f400 R14: 00000000ffffffff R15: 00000000ffffffff > [ 0.196083] FS: 0000000000000000(0000) GS:ffff88842de00000(0000) knlGS:0000000000000000 > [ 0.196562] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 0.197083] CR2: ffff88843ffff000 CR3: 0000000003012001 CR4: 0000000000770ef0 > [ 0.197508] PKRU: 55555554 > [ 0.197673] Call Trace: > [ 0.197822] <TASK> > [ 0.198084] apply_paravirt+0xaf/0x150 > [ 0.198313] ? __might_resched+0x3f/0x280 > [ 0.198557] ? synchronize_rcu+0xe0/0x1c0 > [ 0.198799] ? lock_release+0x230/0x450 > [ 0.199030] ? _raw_spin_unlock_irqrestore+0x30/0x60 > [ 0.199083] ? lockdep_hardirqs_on+0x79/0x100 > [ 0.199345] ? _raw_spin_unlock_irqrestore+0x3b/0x60 > [ 0.199643] ? atomic_notifier_chain_unregister+0x51/0x80 > [ 0.200084] alternative_instructions+0x27/0xfa > [ 0.200357] check_bugs+0xe08/0xe82 > [ 0.200570] start_kernel+0x692/0x6cc > [ 0.200797] secondary_startup_64_no_verify+0xe0/0xeb > [ 0.201088] </TASK> > [ 0.201223] irq event stamp: 13575 > [ 0.201428] hardirqs last enabled at (13583): [<ffffffff811193c2>] __up_console_sem+0x52/0x60 > [ 0.202083] hardirqs last disabled at (13592): [<ffffffff811193a7>] __up_console_sem+0x37/0x60 > [ 0.202594] softirqs last enabled at (12762): [<ffffffff8117e169>] cgroup_idr_alloc.constprop.60+0x59/0x100 > [ 0.203083] softirqs last disabled at (12750): [<ffffffff8117e13d>] cgroup_idr_alloc.constprop.60+0x2d/0x100 > [ 0.203665] ---[ end trace 0000000000000000 ]--- > > Looks like it is also necessary to differentiate system_state in > apply_paravirt() like you did in the other apply_XXX() functions. Thanks for the report! Somehow I didn't see this in my qemu vm. Song
> On Oct 14, 2022, at 8:42 AM, Edgecombe, Rick P <rick.p.edgecombe@intel.com> wrote: > > On Fri, 2022-10-07 at 16:43 -0700, Song Liu wrote: >> diff --git a/kernel/module/main.c b/kernel/module/main.c >> index a4e4d84b6f4e..b44806e31a56 100644 >> --- a/kernel/module/main.c >> +++ b/kernel/module/main.c >> @@ -53,6 +53,7 @@ >> #include <linux/bsearch.h> >> #include <linux/dynamic_debug.h> >> #include <linux/audit.h> >> +#include <linux/bpf.h> >> #include <uapi/linux/module.h> >> #include "internal.h" >> >> @@ -1203,7 +1204,7 @@ static void free_module(struct module *mod) >> lockdep_free_key_range(mod->data_layout.base, mod- >>> data_layout.size); >> >> /* Finally, free the core (containing the module structure) >> */ >> - module_memfree(mod->core_layout.base); >> + vfree_exec(mod->core_layout.base); >> #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC >> vfree(mod->data_layout.base); >> #endif >> @@ -1321,7 +1322,8 @@ static int simplify_symbols(struct module *mod, >> const struct load_info *info) >> ksym = resolve_symbol_wait(mod, info, name); >> /* Ok if resolved. */ >> if (ksym && !IS_ERR(ksym)) { >> - sym[i].st_value = >> kernel_symbol_value(ksym); >> + unsigned long val = >> kernel_symbol_value(ksym); >> + bpf_arch_text_copy(&sym[i].st_value, >> &val, sizeof(val)); > > Why bpf_arch_text_copy()? This of course won't work for other > architectures. So there needs to be fallback method. That RFC broke the > operation into two stages: Loading and finalized. When loading, on non- > x86 the writes would simply be to the allocation mapped as writable. > When it was finalized it changed it to it's final permission (RO, etc). > Then for x86 it does text_pokes() for the writes and has it RO from the > beginning. Yeah, this one (3/4) is really a prototype to show vmalloc_exec could work for modules (with a lot more work of course). And something to replace bpf_arch_text_copy() is one of the issues we need to address in the future. > > I ended up needing a staging buffer for modules too, so that the code > could operate on it directly. I can't remember why that was, it might > be unneeded now since you moved data out of the core allocation. Both bpf_jit and bpf_dispather uses a staging buffer with bpf_prog_pack. The benefit of this approach is that it minimizes the number of text_poke/copy() calls. OTOH, it is quite a pain to make all the relative calls correct, as the staging buffer has different address to the final allocation. I think we may not need the staging buffer for modules, as module load/unload happens less often than BPF program JITs (so it is ok for it to be slightly slower). btw: I cannot take credit for split module data out of core allocation, Christophe Leroy did the work. :) Thanks, Song
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f9920f1341c8..0b1ea05a1da6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -91,6 +91,7 @@ config X86 select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX + select ARCH_WANTS_MODULES_DATA_IN_VMALLOC if X86_64 select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_UBSAN_SANITIZE_ALL diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 4f3204364caa..0e47a558c5bc 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -332,7 +332,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr); - text_poke_early(instr, insn_buff, insn_buff_sz); + if (system_state < SYSTEM_RUNNING) { + text_poke_early(instr, insn_buff, insn_buff_sz); + } else { + mutex_lock(&text_mutex); + text_poke(instr, insn_buff, insn_buff_sz); + mutex_unlock(&text_mutex); + } next: optimize_nops(instr, a->instrlen); @@ -503,7 +509,13 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) optimize_nops(bytes, len); DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); - text_poke_early(addr, bytes, len); + if (system_state == SYSTEM_BOOTING) { + text_poke_early(addr, bytes, len); + } else { + mutex_lock(&text_mutex); + text_poke(addr, bytes, len); + mutex_unlock(&text_mutex); + } } } } @@ -568,7 +580,13 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) if (len == insn.length) { DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); - text_poke_early(addr, bytes, len); + if (unlikely(system_state == SYSTEM_BOOTING)) { + text_poke_early(addr, bytes, len); + } else { + mutex_lock(&text_mutex); + text_poke(addr, bytes, len); + mutex_unlock(&text_mutex); + } } } } @@ -609,7 +627,7 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) */ DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); - text_poke_early(addr, &poison, 4); + text_poke(addr, &poison, 4); } } @@ -791,7 +809,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, /* Pad the rest with nops */ add_nops(insn_buff + used, p->len - used); - text_poke_early(p->instr, insn_buff, p->len); + text_poke(p->instr, insn_buff, p->len); } } extern struct paravirt_patch_site __start_parainstructions[], @@ -1699,7 +1717,7 @@ void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void * struct text_poke_loc tp; if (unlikely(system_state == SYSTEM_BOOTING)) { - text_poke_early(addr, opcode, len); + text_poke(addr, opcode, len); return; } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index b1abf663417c..577e31647dc4 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -229,6 +229,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, bool early = me->state == MODULE_STATE_UNFORMED; void *(*write)(void *, const void *, size_t) = memcpy; + early = false; if (!early) { write = text_poke; mutex_lock(&text_mutex); diff --git a/kernel/module/main.c b/kernel/module/main.c index a4e4d84b6f4e..b44806e31a56 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -53,6 +53,7 @@ #include <linux/bsearch.h> #include <linux/dynamic_debug.h> #include <linux/audit.h> +#include <linux/bpf.h> #include <uapi/linux/module.h> #include "internal.h" @@ -1203,7 +1204,7 @@ static void free_module(struct module *mod) lockdep_free_key_range(mod->data_layout.base, mod->data_layout.size); /* Finally, free the core (containing the module structure) */ - module_memfree(mod->core_layout.base); + vfree_exec(mod->core_layout.base); #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC vfree(mod->data_layout.base); #endif @@ -1321,7 +1322,8 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) ksym = resolve_symbol_wait(mod, info, name); /* Ok if resolved. */ if (ksym && !IS_ERR(ksym)) { - sym[i].st_value = kernel_symbol_value(ksym); + unsigned long val = kernel_symbol_value(ksym); + bpf_arch_text_copy(&sym[i].st_value, &val, sizeof(val)); break; } @@ -1342,7 +1344,8 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) secbase = (unsigned long)mod_percpu(mod); else secbase = info->sechdrs[sym[i].st_shndx].sh_addr; - sym[i].st_value += secbase; + secbase += sym[i].st_value; + bpf_arch_text_copy(&sym[i].st_value, &secbase, sizeof(secbase)); break; } } @@ -2123,7 +2126,7 @@ static int move_module(struct module *mod, struct load_info *info) void *ptr; /* Do the allocs. */ - ptr = module_alloc(mod->core_layout.size); + ptr = vmalloc_exec(mod->core_layout.size, PAGE_SIZE); /* * The pointer to this block is stored in the module structure * which is inside the block. Just mark it as not being a @@ -2133,7 +2136,7 @@ static int move_module(struct module *mod, struct load_info *info) if (!ptr) return -ENOMEM; - memset(ptr, 0, mod->core_layout.size); +/* memset(ptr, 0, mod->core_layout.size); */ mod->core_layout.base = ptr; if (mod->init_layout.size) { @@ -2146,7 +2149,7 @@ static int move_module(struct module *mod, struct load_info *info) */ kmemleak_ignore(ptr); if (!ptr) { - module_memfree(mod->core_layout.base); + vfree_exec(mod->core_layout.base); return -ENOMEM; } memset(ptr, 0, mod->init_layout.size); @@ -2156,7 +2159,7 @@ static int move_module(struct module *mod, struct load_info *info) #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC /* Do the allocs. */ - ptr = vzalloc(mod->data_layout.size); + ptr = module_alloc(mod->data_layout.size); /* * The pointer to this block is stored in the module structure * which is inside the block. Just mark it as not being a @@ -2164,7 +2167,7 @@ static int move_module(struct module *mod, struct load_info *info) */ kmemleak_not_leak(ptr); if (!ptr) { - module_memfree(mod->core_layout.base); + vfree_exec(mod->core_layout.base); module_memfree(mod->init_layout.base); return -ENOMEM; } @@ -2189,7 +2192,7 @@ static int move_module(struct module *mod, struct load_info *info) dest = mod->core_layout.base + shdr->sh_entsize; if (shdr->sh_type != SHT_NOBITS) - memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size); + bpf_arch_text_copy(dest, (void *)shdr->sh_addr, shdr->sh_size); /* Update sh_addr to point to copy in image. */ shdr->sh_addr = (unsigned long)dest; pr_debug("\t0x%lx %s\n", @@ -2345,7 +2348,7 @@ static void module_deallocate(struct module *mod, struct load_info *info) percpu_modfree(mod); module_arch_freeing_init(mod); module_memfree(mod->init_layout.base); - module_memfree(mod->core_layout.base); + vfree_exec(mod->core_layout.base); #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC vfree(mod->data_layout.base); #endif diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c index 14fbea66f12f..d392eb7bf574 100644 --- a/kernel/module/strict_rwx.c +++ b/kernel/module/strict_rwx.c @@ -85,7 +85,6 @@ void module_enable_x(const struct module *mod) !PAGE_ALIGNED(mod->init_layout.base)) return; - frob_text(&mod->core_layout, set_memory_x); frob_text(&mod->init_layout, set_memory_x); } @@ -98,9 +97,7 @@ void module_enable_ro(const struct module *mod, bool after_init) return; #endif - set_vm_flush_reset_perms(mod->core_layout.base); set_vm_flush_reset_perms(mod->init_layout.base); - frob_text(&mod->core_layout, set_memory_ro); frob_rodata(&mod->data_layout, set_memory_ro); frob_text(&mod->init_layout, set_memory_ro); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 439e2ab6905e..818418d5b853 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3142,6 +3142,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) if (mod) rec_flags |= FTRACE_FL_DISABLED; + ftrace_arch_code_modify_prepare(); for (pg = new_pgs; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { @@ -3163,7 +3164,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) update_cnt++; } } - + ftrace_arch_code_modify_post_process(); stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; ftrace_update_tot_cnt += update_cnt;