diff mbox series

[bpf-next,v3,6/6] x86: use register_text_tail_vm

Message ID 20221117010621.1891711-7-song@kernel.org (mailing list archive)
State New
Headers show
Series execmem_alloc for BPF programs | expand

Commit Message

Song Liu Nov. 17, 2022, 1:06 a.m. UTC
Allocate 2MB pages up to round_up(_etext, 2MB), and register memory
[round_up(_etext, 4kb), round_up(_etext, 2MB)] with register_text_tail_vm
so that we can use this part of memory for dynamic kernel text (BPF
programs, etc.).

Here is an example:

[root@eth50-1 ~]# grep _etext /proc/kallsyms
ffffffff82202a08 T _etext

[root@eth50-1 ~]# grep bpf_prog_ /proc/kallsyms  | tail -n 3
ffffffff8220f920 t bpf_prog_cc61a5364ac11d93_handle__sched_wakeup       [bpf]
ffffffff8220fa28 t bpf_prog_cc61a5364ac11d93_handle__sched_wakeup_new   [bpf]
ffffffff8220fad4 t bpf_prog_3bf73fa16f5e3d92_handle__sched_switch       [bpf]

[root@eth50-1 ~]#  grep 0xffffffff82200000 /sys/kernel/debug/page_tables/kernel
0xffffffff82200000-0xffffffff82400000     2M     ro   PSE         x  pmd

ffffffff82200000-ffffffff82400000 is a 2MB page, serving kernel text, and
bpf programs.

Also update Documentation/x86/x86_64/mm.rst to show execmem can be mapped
to kernel text addresses.

Signed-off-by: Song Liu <song@kernel.org>
---
 Documentation/x86/x86_64/mm.rst         | 4 ++--
 arch/x86/include/asm/pgtable_64_types.h | 1 +
 arch/x86/mm/init_64.c                   | 4 +++-
 3 files changed, 6 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/Documentation/x86/x86_64/mm.rst b/Documentation/x86/x86_64/mm.rst
index 9798676bb0bf..6ee95e5fa7e9 100644
--- a/Documentation/x86/x86_64/mm.rst
+++ b/Documentation/x86/x86_64/mm.rst
@@ -62,7 +62,7 @@  Complete virtual memory map with 4-level page tables
    ffffff8000000000 | -512    GB | ffffffeeffffffff |  444 GB | ... unused hole
    ffffffef00000000 |  -68    GB | fffffffeffffffff |   64 GB | EFI region mapping space
    ffffffff00000000 |   -4    GB | ffffffff7fffffff |    2 GB | ... unused hole
-   ffffffff80000000 |   -2    GB | ffffffff9fffffff |  512 MB | kernel text mapping, mapped to physical address 0
+   ffffffff80000000 |   -2    GB | ffffffff9fffffff |  512 MB | kernel text mapping and execmem, mapped to physical address 0
    ffffffff80000000 |-2048    MB |                  |         |
    ffffffffa0000000 |-1536    MB | fffffffffeffffff | 1520 MB | module mapping space
    ffffffffff000000 |  -16    MB |                  |         |
@@ -121,7 +121,7 @@  Complete virtual memory map with 5-level page tables
    ffffff8000000000 | -512    GB | ffffffeeffffffff |  444 GB | ... unused hole
    ffffffef00000000 |  -68    GB | fffffffeffffffff |   64 GB | EFI region mapping space
    ffffffff00000000 |   -4    GB | ffffffff7fffffff |    2 GB | ... unused hole
-   ffffffff80000000 |   -2    GB | ffffffff9fffffff |  512 MB | kernel text mapping, mapped to physical address 0
+   ffffffff80000000 |   -2    GB | ffffffff9fffffff |  512 MB | kernel text mapping and execmem, mapped to physical address 0
    ffffffff80000000 |-2048    MB |                  |         |
    ffffffffa0000000 |-1536    MB | fffffffffeffffff | 1520 MB | module mapping space
    ffffffffff000000 |  -16    MB |                  |         |
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 04f36063ad54..c0f9cceb109a 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -101,6 +101,7 @@  extern unsigned int ptrs_per_p4d;
 #define PUD_MASK	(~(PUD_SIZE - 1))
 #define PGDIR_SIZE	(_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
+#define PMD_ALIGN(x)	(((unsigned long)(x) + (PMD_SIZE - 1)) & PMD_MASK)
 
 /*
  * See Documentation/x86/x86_64/mm.rst for a description of the memory map.
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3f040c6e5d13..5b42fc0c6099 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1373,7 +1373,7 @@  void mark_rodata_ro(void)
 	unsigned long start = PFN_ALIGN(_text);
 	unsigned long rodata_start = PFN_ALIGN(__start_rodata);
 	unsigned long end = (unsigned long)__end_rodata_hpage_align;
-	unsigned long text_end = PFN_ALIGN(_etext);
+	unsigned long text_end = PMD_ALIGN(_etext);
 	unsigned long rodata_end = PFN_ALIGN(__end_rodata);
 	unsigned long all_end;
 
@@ -1414,6 +1414,8 @@  void mark_rodata_ro(void)
 				(void *)rodata_end, (void *)_sdata);
 
 	debug_checkwx();
+	register_text_tail_vm(PFN_ALIGN((unsigned long)_etext),
+			      PMD_ALIGN((unsigned long)_etext));
 }
 
 int kern_addr_valid(unsigned long addr)