@@ -65,4 +65,20 @@ static inline const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
return NULL;
}
+static u64 module_direct_base __ro_after_init;
+static u64 module_plt_base __ro_after_init;
+
+static inline u64 get_modules_base(void)
+{
+ return (module_plt_base) ? module_plt_base : module_direct_base;
+}
+
+static inline u64 get_modules_end(void)
+{
+ return (module_plt_base) ? module_plt_base + SZ_2G :
+ module_direct_base + SZ_128M;
+}
+
+void *module_alloc(unsigned long size);
+
#endif /* __ASM_MODULE_H */
@@ -41,4 +41,6 @@ static inline bool arch_parse_debug_rodata(char *arg)
}
#define arch_parse_debug_rodata arch_parse_debug_rodata
+int __init module_init_limits(void);
+
#endif
@@ -31,4 +31,7 @@ static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
return pgprot_tagged(prot);
}
+#define arch_init_checked_vmap_ranges arch_init_checked_vmap_ranges
+inline void __init arch_init_checked_vmap_ranges(void);
+
#endif /* _ASM_ARM64_VMALLOC_H */
@@ -25,90 +25,7 @@
#include <asm/insn.h>
#include <asm/scs.h>
#include <asm/sections.h>
-
-static u64 module_direct_base __ro_after_init = 0;
-static u64 module_plt_base __ro_after_init = 0;
-
-/*
- * Choose a random page-aligned base address for a window of 'size' bytes which
- * entirely contains the interval [start, end - 1].
- */
-static u64 __init random_bounding_box(u64 size, u64 start, u64 end)
-{
- u64 max_pgoff, pgoff;
-
- if ((end - start) >= size)
- return 0;
-
- max_pgoff = (size - (end - start)) / PAGE_SIZE;
- pgoff = get_random_u32_inclusive(0, max_pgoff);
-
- return start - pgoff * PAGE_SIZE;
-}
-
-/*
- * Modules may directly reference data and text anywhere within the kernel
- * image and other modules. References using PREL32 relocations have a +/-2G
- * range, and so we need to ensure that the entire kernel image and all modules
- * fall within a 2G window such that these are always within range.
- *
- * Modules may directly branch to functions and code within the kernel text,
- * and to functions and code within other modules. These branches will use
- * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure
- * that the entire kernel text and all module text falls within a 128M window
- * such that these are always within range. With PLTs, we can expand this to a
- * 2G window.
- *
- * We chose the 128M region to surround the entire kernel image (rather than
- * just the text) as using the same bounds for the 128M and 2G regions ensures
- * by construction that we never select a 128M region that is not a subset of
- * the 2G region. For very large and unusual kernel configurations this means
- * we may fall back to PLTs where they could have been avoided, but this keeps
- * the logic significantly simpler.
- */
-static int __init module_init_limits(void)
-{
- u64 kernel_end = (u64)_end;
- u64 kernel_start = (u64)_text;
- u64 kernel_size = kernel_end - kernel_start;
-
- /*
- * The default modules region is placed immediately below the kernel
- * image, and is large enough to use the full 2G relocation range.
- */
- BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END);
- BUILD_BUG_ON(MODULES_VSIZE < SZ_2G);
-
- if (!kaslr_enabled()) {
- if (kernel_size < SZ_128M)
- module_direct_base = kernel_end - SZ_128M;
- if (kernel_size < SZ_2G)
- module_plt_base = kernel_end - SZ_2G;
- } else {
- u64 min = kernel_start;
- u64 max = kernel_end;
-
- if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
- pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n");
- } else {
- module_direct_base = random_bounding_box(SZ_128M, min, max);
- if (module_direct_base) {
- min = module_direct_base;
- max = module_direct_base + SZ_128M;
- }
- }
-
- module_plt_base = random_bounding_box(SZ_2G, min, max);
- }
-
- pr_info("%llu pages in range for non-PLT usage",
- module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0);
- pr_info("%llu pages in range for PLT usage",
- module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0);
-
- return 0;
-}
-subsys_initcall(module_init_limits);
+#include <asm/module.h>
void *module_alloc(unsigned long size)
{
@@ -131,9 +131,10 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
void *alloc_insn_page(void)
{
- return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
- NUMA_NO_NODE, __builtin_return_address(0));
+ return __vmalloc_node_range(PAGE_SIZE, 1, get_modules_base(),
+ get_modules_end(), GFP_KERNEL, PAGE_KERNEL_ROX,
+ VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+ __builtin_return_address(0));
}
/* arm kprobe: install breakpoint in text */
@@ -268,6 +268,86 @@ static int __init reserve_memblock_reserved_regions(void)
}
arch_initcall(reserve_memblock_reserved_regions);
+/*
+ * Choose a random page-aligned base address for a window of 'size' bytes which
+ * entirely contains the interval [start, end - 1].
+ */
+static u64 __init random_bounding_box(u64 size, u64 start, u64 end)
+{
+ u64 max_pgoff, pgoff;
+
+ if ((end - start) >= size)
+ return 0;
+
+ max_pgoff = (size - (end - start)) / PAGE_SIZE;
+ pgoff = get_random_u32_inclusive(0, max_pgoff);
+
+ return start - pgoff * PAGE_SIZE;
+}
+
+/*
+ * Modules may directly reference data and text anywhere within the kernel
+ * image and other modules. References using PREL32 relocations have a +/-2G
+ * range, and so we need to ensure that the entire kernel image and all modules
+ * fall within a 2G window such that these are always within range.
+ *
+ * Modules may directly branch to functions and code within the kernel text,
+ * and to functions and code within other modules. These branches will use
+ * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure
+ * that the entire kernel text and all module text falls within a 128M window
+ * such that these are always within range. With PLTs, we can expand this to a
+ * 2G window.
+ *
+ * We chose the 128M region to surround the entire kernel image (rather than
+ * just the text) as using the same bounds for the 128M and 2G regions ensures
+ * by construction that we never select a 128M region that is not a subset of
+ * the 2G region. For very large and unusual kernel configurations this means
+ * we may fall back to PLTs where they could have been avoided, but this keeps
+ * the logic significantly simpler.
+ */
+int __init module_init_limits(void)
+{
+ u64 kernel_end = (u64)_end;
+ u64 kernel_start = (u64)_text;
+ u64 kernel_size = kernel_end - kernel_start;
+
+ /*
+ * The default modules region is placed immediately below the kernel
+ * image, and is large enough to use the full 2G relocation range.
+ */
+ BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END);
+ BUILD_BUG_ON(MODULES_VSIZE < SZ_2G);
+
+ if (!kaslr_enabled()) {
+ if (kernel_size < SZ_128M)
+ module_direct_base = kernel_end - SZ_128M;
+ if (kernel_size < SZ_2G)
+ module_plt_base = kernel_end - SZ_2G;
+ } else {
+ u64 min = kernel_start;
+ u64 max = kernel_end;
+
+ if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
+ pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n");
+ } else {
+ module_direct_base = random_bounding_box(SZ_128M, min, max);
+ if (module_direct_base) {
+ min = module_direct_base;
+ max = module_direct_base + SZ_128M;
+ }
+ }
+
+ module_plt_base = random_bounding_box(SZ_2G, min, max);
+ }
+
+ pr_info("%llu pages in range for non-PLT usage",
+ module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0);
+ pr_info("%llu pages in range for PLT usage",
+ module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0);
+
+ return 0;
+}
+
u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
u64 cpu_logical_map(unsigned int cpu)
@@ -366,6 +446,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
"This indicates a broken bootloader or old kernel\n",
boot_args[1], boot_args[2], boot_args[3]);
}
+
+ module_init_limits();
}
static inline bool cpu_can_disable(unsigned int cpu)
@@ -2,7 +2,8 @@
obj-y := dma-mapping.o extable.o fault.o init.o cache.o copypage.o flush.o ioremap.o mmap.o pgd.o mmu.o - context.o proc.o pageattr.o fixmap.o
+ context.o proc.o pageattr.o fixmap.o + vmalloc.o
obj- += contpte.o
obj- += hugetlbpage.o
obj- += ptdump.o
new file mode 100644
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/vmalloc.h>
+#include <linux/elf.h>
+
+#include <asm/module.h>
+
+inline void __init arch_init_checked_vmap_ranges(void)
+{
+ create_vmalloc_range_check(get_modules_base(), get_modules_end());
+}
@@ -13,6 +13,8 @@
#include <linux/memory.h>
#include <linux/printk.h>
#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/moduleloader.h>
#include <asm/asm-extable.h>
#include <asm/byteorder.h>
@@ -1790,18 +1792,18 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len)
u64 bpf_jit_alloc_exec_limit(void)
{
- return VMALLOC_END - VMALLOC_START;
+ return get_modules_end() - get_modules_base();
}
void *bpf_jit_alloc_exec(unsigned long size)
{
/* Memory is intended to be executable, reset the pointer tag. */
- return kasan_reset_tag(vmalloc(size));
+ return kasan_reset_tag(module_alloc(size));
}
void bpf_jit_free_exec(void *addr)
{
- return vfree(addr);
+ return module_memfree(addr);
}
/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
Use the vmalloc infrastructure to prevent interleaving code and data pages, working to both maintain compatible management assumptions made by non-arch-specific code and make management of these regions more precise. This will allow, for example, the maintenance of PXNTable bits on dynamically allocated memory or the immutability of certain page middle directory and higher level descriptors. For this purpose, move module_init_limits to setup.c, a more appropriate place since it is an initialization routine, and as a result, move module_plt_base and module_direct_base to module.h and provide appropriate "getter" methods. Make the two existing code allocation calls for BPF and kprobes use the modules memory region, ensuring they no longer pollute data memory. This will make code ensuring the self-patching interface cannot be used to modify data and data interfaces cannot be used to modify code more performant. Add in a mm/vmalloc.c file to perform the appropriate vmalloc_init callbacks required to ensure segmentation of the virtual memory space. Signed-off-by: Maxwell Bland <mbland@motorola.com> --- Hello, Thanks for your review this patch. The ultimate goal, enforcement of dynamic PXNTable, is present in v3: v3: 20240416122254.868007168-1-mbland@motorola.com But is not included here as we first need to make an infrastructural change in order to address the comments given on v2/v1: v2: 20240220203256.31153-1-mbland@motorola.com v1: CAP5Mv+ydhk=Ob4b40ZahGMgT-5+-VEHxtmA=-LkJiEOOU+K6hw@mail.gmail.com Regards, Maxwell Bland arch/arm64/include/asm/module.h | 16 ++++++ arch/arm64/include/asm/setup.h | 2 + arch/arm64/include/asm/vmalloc.h | 3 ++ arch/arm64/kernel/module.c | 85 +----------------------------- arch/arm64/kernel/probes/kprobes.c | 7 +-- arch/arm64/kernel/setup.c | 82 ++++++++++++++++++++++++++++ arch/arm64/mm/Makefile | 3 +- arch/arm64/mm/vmalloc.c | 10 ++++ arch/arm64/net/bpf_jit_comp.c | 8 +-- 9 files changed, 125 insertions(+), 91 deletions(-) create mode 100644 arch/arm64/mm/vmalloc.c