diff mbox series

[1/2] Adding BPF NX

Message ID SEZPR03MB6786385FE7630DC906EB0BFAB4602@SEZPR03MB6786.apcprd03.prod.outlook.com (mailing list archive)
State Superseded
Headers show
Series [1/2] Adding BPF NX | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 1122 this patch: 6
netdev/cc_maintainers warning 21 maintainers not CCed: corbet@lwn.net sdf@google.com andrii@kernel.org kpsingh@kernel.org jolsa@kernel.org hpa@zytor.com x86@kernel.org tenut@Niobium yonghong.song@linux.dev peterz@infradead.org ast@kernel.org luto@kernel.org john.fastabend@gmail.com haoluo@google.com mingo@redhat.com dave.hansen@linux.intel.com martin.lau@linux.dev daniel@iogearbox.net tglx@linutronix.de bp@alien8.de song@kernel.org
netdev/build_clang fail Errors and warnings before: 3553 this patch: 6
netdev/verify_signedoff fail author Signed-off-by missing
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 17130 this patch: 6
netdev/checkpatch warning CHECK: Logical continuations should be on the previous line WARNING: 'seperate' may be misspelled - perhaps 'separate'? WARNING: Do not crash the kernel unless it is absolutely unavoidable--use WARN_ON_ONCE() plus recovery code (if feasible) instead of BUG() or variants WARNING: line length of 81 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Maxwell Bland Jan. 3, 2024, 6:56 p.m. UTC
From: Tenut <tenut@Niobium>
Subject: [PATCH 1/2] Adding BPF NX

Reserve a memory region for BPF program, and check for it in the interpreter. This simulate the effect
of non-executable memory for BPF execution.

Signed-off-by: Maxwell Bland <mbland@motorola.com>
---
arch/x86/include/asm/pgtable_64_types.h |  9 +++++++++
 arch/x86/mm/fault.c                     |  6 +++++-
 kernel/bpf/Kconfig                      | 16 +++++++++++++++
 kernel/bpf/core.c                       | 35 ++++++++++++++++++++++++++++++---
 4 files changed, 62 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 38b54b992f32..ad11651eb073 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -123,6 +123,9 @@  extern unsigned int ptrs_per_p4d;
 
 #define __VMALLOC_BASE_L4	0xffffc90000000000UL
 #define __VMALLOC_BASE_L5 	0xffa0000000000000UL
+#ifdef CONFIG_BPF_NX
+#define __BPF_VBASE		0xffffeb0000000000UL
+#endif
 
 #define VMALLOC_SIZE_TB_L4	32UL
 #define VMALLOC_SIZE_TB_L5	12800UL
@@ -169,6 +172,12 @@  extern unsigned int ptrs_per_p4d;
 #define VMALLOC_QUARTER_SIZE	((VMALLOC_SIZE_TB << 40) >> 2)
 #define VMALLOC_END		(VMALLOC_START + VMALLOC_QUARTER_SIZE - 1)
 
+#ifdef CONFIG_BPF_NX
+#define BPF_SIZE_GB		512UL
+#define BPF_VSTART		__BPF_VBASE
+#define BPF_VEND		(BPF_VSTART + _AC(BPF_SIZE_GB << 30, UL))
+#endif /* CONFIG_BPF_NX */
+
 /*
  * vmalloc metadata addresses are calculated by adding shadow/origin offsets
  * to vmalloc address.
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ab778eac1952..cfb63ef72168 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -235,7 +235,11 @@  static noinline int vmalloc_fault(unsigned long address)
 	pte_t *pte_k;
 
 	/* Make sure we are in vmalloc area: */
-	if (!(address >= VMALLOC_START && address < VMALLOC_END))
+	if (!(address >= VMALLOC_START && address < VMALLOC_END)
+#ifdef BPF_NX
+		&& !(address >= BPF_VSTART && address < BPF_VEND)
+#endif
+	)
 		return -1;
 
 	/*
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index 6a906ff93006..7160dcaaa58a 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -86,6 +86,22 @@  config BPF_UNPRIV_DEFAULT_OFF
 
 	  If you are unsure how to answer this question, answer Y.
 
+config BPF_HARDENING
+	bool "Enable BPF interpreter hardening"
+	select BPF
+	depends on X86_64 && !RANDOMIZE_MEMORY && !BPF_JIT_ALWAYS_ON
+	default n
+	help
+	  Enhance bpf interpreter's security
+
+config BPF_NX
+bool "Enable bpf NX"
+	depends on BPF_HARDENING && !DYNAMIC_MEMORY_LAYOUT
+	default n
+	help
+	  Allocate eBPF programs in seperate area and make sure the
+	  interpreted programs are in the region.
+
 source "kernel/bpf/preload/Kconfig"
 
 config BPF_LSM
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index fe254ae035fe..56d9e8d4a6de 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -88,6 +88,34 @@  void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
 	return NULL;
 }
 
+#ifdef CONFIG_BPF_NX
+#define BPF_MEMORY_ALIGN roundup_pow_of_two(sizeof(struct bpf_prog) + \
+		BPF_MAXINSNS * sizeof(struct bpf_insn))
+static void *__bpf_vmalloc(unsigned long size, gfp_t gfp_mask)
+{
+	return __vmalloc_node_range(size, BPF_MEMORY_ALIGN, BPF_VSTART, BPF_VEND,
+			gfp_mask, PAGE_KERNEL, 0, NUMA_NO_NODE,
+			__builtin_return_address(0));
+}
+
+static void bpf_insn_check_range(const struct bpf_insn *insn)
+{
+	if ((unsigned long)insn < BPF_VSTART
+			|| (unsigned long)insn >= BPF_VEND - sizeof(struct bpf_insn))
+		BUG();
+}
+
+#else
+static void *__bpf_vmalloc(unsigned long size, gfp_t gfp_mask)
+{
+	return __vmalloc(size, gfp_mask);
+}
+
+static void bpf_insn_check_range(const struct bpf_insn *insn)
+{
+}
+#endif /* CONFIG_BPF_NX */
+
 struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
 {
 	gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
@@ -95,7 +123,7 @@  struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
 	struct bpf_prog *fp;
 
 	size = round_up(size, PAGE_SIZE);
-	fp = __vmalloc(size, gfp_flags);
+	fp = __bpf_vmalloc(size, gfp_flags);
 	if (fp == NULL)
 		return NULL;
 
@@ -246,7 +274,7 @@  struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 	if (pages <= fp_old->pages)
 		return fp_old;
 
-	fp = __vmalloc(size, gfp_flags);
+	fp = __bpf_vmalloc(size, gfp_flags);
 	if (fp) {
 		memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
 		fp->pages = pages;
@@ -1380,7 +1408,7 @@  static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other,
 	gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
 	struct bpf_prog *fp;
 
-	fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags);
+	fp = __bpf_vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags);
 	if (fp != NULL) {
 		/* aux->prog still points to the fp_other one, so
 		 * when promoting the clone to the real program,
@@ -1695,6 +1723,7 @@  static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
 #define CONT_JMP ({ insn++; goto select_insn; })
 
 select_insn:
+	bpf_insn_check_range(insn);
 	goto *jumptable[insn->code];
 
 	/* Explicitly mask the register-based shift amounts with 63 or 31