diff mbox series

[RFC,10/17] bpf: Implement direct packet access in dequeue progs

Message ID 20220713111430.134810-11-toke@redhat.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series xdp: Add packet queueing and scheduling capabilities | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR pending PR summary
bpf/vmtest-bpf-next-VM_Test-2 pending Logs for Kernel LATEST on ubuntu-latest with llvm-15
bpf/vmtest-bpf-next-VM_Test-3 pending Logs for Kernel LATEST on z15 with gcc
bpf/vmtest-bpf-next-VM_Test-1 fail Logs for Kernel LATEST on ubuntu-latest with gcc
netdev/tree_selection success Guessed tree name to be net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count fail Series longer than 15 patches (and no cover letter)
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1428 this patch: 1407
netdev/cc_maintainers success CCed 18 of 18 maintainers
netdev/build_clang success Errors and warnings before: 170 this patch: 170
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1437 this patch: 1416
netdev/checkpatch warning WARNING: line length of 100 exceeds 80 columns WARNING: line length of 118 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Toke Høiland-Jørgensen July 13, 2022, 11:14 a.m. UTC
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>

Allow user to obtain packet pointers from dequeued xdp_md BTF pointer,
by allowing convert_ctx_access implementation for PTR_TO_BTF_ID, and
then tagging loads as packet pointers in verifier context.

Previously, convert_ctx_access was limited to just PTR_TO_CTX, but now
it will also be used to translate access into PTR_TO_BTF_ID of xdp_md
obtained from bpf_packet_dequeue, so it works like xdp_md ctx in XDP
programs. We must also remember that while xdp_buff backs ctx in XDP
programs, xdp_frame backs xdp_md in dequeue programs.

Next, we use pkt_uid support and transfer ref_obj_id on load data,
data_end, and data_meta fields, to make verifier aware of provenance of
these packet pointers, so that comparison can be rejected for unsafe
cases.

In the end, user can reuse their code meant for XDP ctx in deqeueue
programs as well, and don't have to do things differently.

Once packet pointers are obtained, regular verifier logic kicks in where
pointers from same xdp_frame can be compared to modify the range and
perform access into the packet.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
 include/linux/bpf.h          |  26 +++++--
 include/linux/bpf_verifier.h |   6 ++
 kernel/bpf/verifier.c        |  48 +++++++++---
 net/core/filter.c            | 143 +++++++++++++++++++++++++++++++++++
 4 files changed, 206 insertions(+), 17 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6ea5d6d188cf..a568ddc1f1ea 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -653,6 +653,12 @@  struct bpf_prog_ops {
 			union bpf_attr __user *uattr);
 };
 
+typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
+					const struct bpf_insn *src,
+					struct bpf_insn *dst,
+					struct bpf_prog *prog,
+					u32 *target_size);
+
 struct bpf_verifier_ops {
 	/* return eBPF function prototype for verification */
 	const struct bpf_func_proto *
@@ -678,6 +684,9 @@  struct bpf_verifier_ops {
 				 const struct btf_type *t, int off, int size,
 				 enum bpf_access_type atype,
 				 u32 *next_btf_id, enum bpf_type_flag *flag);
+	bpf_convert_ctx_access_t (*get_convert_ctx_access)(struct bpf_verifier_log *log,
+							   const struct btf *btf,
+							   u32 btf_id);
 };
 
 struct bpf_prog_offload_ops {
@@ -1360,11 +1369,6 @@  const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void);
 
 typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
 					unsigned long off, unsigned long len);
-typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
-					const struct bpf_insn *src,
-					struct bpf_insn *dst,
-					struct bpf_prog *prog,
-					u32 *target_size);
 
 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
@@ -2180,6 +2184,18 @@  static inline bool unprivileged_ebpf_enabled(void)
 	return false;
 }
 
+static inline struct btf *bpf_get_btf_vmlinux(void)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
+				    const struct btf_type *t, int off, int size,
+				    enum bpf_access_type atype __maybe_unused,
+				    u32 *next_btf_id, enum bpf_type_flag *flag)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 93b69dbf3d19..640f92fece12 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -532,8 +532,14 @@  __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
 				      const char *fmt, va_list args);
 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 					   const char *fmt, ...);
+#ifdef CONFIG_BPF_SYSCALL
 __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
 			    const char *fmt, ...);
+#else
+static inline void bpf_log(struct bpf_verifier_log *log, const char *fmt, ...)
+{
+}
+#endif
 
 static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
 {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f319e9392587..7edc2b834d9b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1707,7 +1707,7 @@  static void mark_reg_not_init(struct bpf_verifier_env *env,
 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
 			    struct bpf_reg_state *regs, u32 regno,
 			    enum bpf_reg_type reg_type,
-			    struct btf *btf, u32 btf_id,
+			    struct btf *btf, u32 reg_id,
 			    enum bpf_type_flag flag)
 {
 	if (reg_type == SCALAR_VALUE) {
@@ -1715,9 +1715,14 @@  static void mark_btf_ld_reg(struct bpf_verifier_env *env,
 		return;
 	}
 	mark_reg_known_zero(env, regs, regno);
-	regs[regno].type = PTR_TO_BTF_ID | flag;
+	regs[regno].type = (int)reg_type | flag;
+	if (type_is_pkt_pointer_any(reg_type)) {
+		regs[regno].pkt_uid = reg_id;
+		return;
+	}
+	WARN_ON_ONCE(base_type(reg_type) != PTR_TO_BTF_ID);
 	regs[regno].btf = btf;
-	regs[regno].btf_id = btf_id;
+	regs[regno].btf_id = reg_id;
 }
 
 #define DEF_NOT_SUBREG	(0)
@@ -4479,13 +4484,14 @@  static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 				   struct bpf_reg_state *regs,
 				   int regno, int off, int size,
 				   enum bpf_access_type atype,
-				   int value_regno)
+				   int value_regno, int insn_idx)
 {
 	struct bpf_reg_state *reg = regs + regno;
 	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
 	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
+	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
 	enum bpf_type_flag flag = 0;
-	u32 btf_id;
+	u32 reg_id;
 	int ret;
 
 	if (off < 0) {
@@ -4520,7 +4526,7 @@  static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 
 	if (env->ops->btf_struct_access) {
 		ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
-						  off, size, atype, &btf_id, &flag);
+						  off, size, atype, &reg_id, &flag);
 	} else {
 		if (atype != BPF_READ) {
 			verbose(env, "only read is supported\n");
@@ -4528,7 +4534,7 @@  static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 		}
 
 		ret = btf_struct_access(&env->log, reg->btf, t, off, size,
-					atype, &btf_id, &flag);
+					atype, &reg_id, &flag);
 	}
 
 	if (ret < 0)
@@ -4540,8 +4546,19 @@  static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 	if (type_flag(reg->type) & PTR_UNTRUSTED)
 		flag |= PTR_UNTRUSTED;
 
-	if (atype == BPF_READ && value_regno >= 0)
-		mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
+	/* Remember the BTF ID for later use in convert_ctx_accesses */
+	aux->btf_var.btf_id = reg->btf_id;
+	aux->btf_var.btf = reg->btf;
+
+	if (atype == BPF_READ && value_regno >= 0) {
+		/* For pkt pointers, reg_id is set to pkt_uid, which must be the
+		 * ref_obj_id of the referenced register from which they are
+		 * obtained, denoting different packets e.g. in dequeue progs.
+		 */
+		if (type_is_pkt_pointer_any(ret))
+			reg_id = reg->ref_obj_id;
+		mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, reg_id, flag);
+	}
 
 	return 0;
 }
@@ -4896,7 +4913,7 @@  static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 	} else if (base_type(reg->type) == PTR_TO_BTF_ID &&
 		   !type_may_be_null(reg->type)) {
 		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
-					      value_regno);
+					      value_regno, insn_idx);
 	} else if (reg->type == CONST_PTR_TO_MAP) {
 		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
 					      value_regno);
@@ -13515,8 +13532,15 @@  static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		case PTR_TO_BTF_ID:
 		case PTR_TO_BTF_ID | PTR_UNTRUSTED:
 			if (type == BPF_READ) {
-				insn->code = BPF_LDX | BPF_PROBE_MEM |
-					BPF_SIZE((insn)->code);
+				if (env->ops->get_convert_ctx_access) {
+					struct btf *btf = env->insn_aux_data[i + delta].btf_var.btf;
+					u32 btf_id = env->insn_aux_data[i + delta].btf_var.btf_id;
+
+					convert_ctx_access = env->ops->get_convert_ctx_access(&env->log, btf, btf_id);
+					if (convert_ctx_access)
+						break;
+				}
+				insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code);
 				env->prog->aux->num_exentries++;
 			} else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
 				verbose(env, "Writes through BTF pointers are not allowed\n");
diff --git a/net/core/filter.c b/net/core/filter.c
index 893b75515859..6a4881739e9b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -79,6 +79,7 @@ 
 #include <net/tls.h>
 #include <net/xdp.h>
 #include <net/mptcp.h>
+#include <linux/bpf_verifier.h>
 
 static const struct bpf_func_proto *
 bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -9918,6 +9919,146 @@  static u32 dequeue_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
+static int dequeue_btf_struct_access(struct bpf_verifier_log *log,
+				     const struct btf *btf,
+				     const struct btf_type *t, int off, int size,
+				     enum bpf_access_type atype,
+				     u32 *next_btf_id, enum bpf_type_flag *flag)
+{
+	const struct btf_type *pkt_type;
+	enum bpf_reg_type reg_type;
+	struct btf *btf_vmlinux;
+
+	btf_vmlinux = bpf_get_btf_vmlinux();
+	if (IS_ERR_OR_NULL(btf_vmlinux) || btf != btf_vmlinux)
+		return -EINVAL;
+
+	if (atype != BPF_READ)
+		return -EACCES;
+
+	pkt_type = btf_type_by_id(btf_vmlinux, xdp_md_btf_ids[0]);
+	if (!pkt_type)
+		return -EINVAL;
+	if (t != pkt_type)
+		return btf_struct_access(log, btf, t, off, size, atype,
+					 next_btf_id, flag);
+
+	switch (off) {
+	case offsetof(struct xdp_md, data):
+		reg_type = PTR_TO_PACKET;
+		break;
+	case offsetof(struct xdp_md, data_meta):
+		reg_type = PTR_TO_PACKET_META;
+		break;
+	case offsetof(struct xdp_md, data_end):
+		reg_type = PTR_TO_PACKET_END;
+		break;
+	default:
+		bpf_log(log, "no read support for xdp_md at off %d\n", off);
+		return -EACCES;
+	}
+
+	if (!__is_valid_xdp_access(off, size))
+		return -EINVAL;
+	return reg_type;
+}
+
+static u32
+dequeue_convert_xdp_md_access(enum bpf_access_type type,
+			      const struct bpf_insn *si, struct bpf_insn *insn_buf,
+			      struct bpf_prog *prog, u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+	int src_reg;
+
+	switch (si->off) {
+	case offsetof(struct xdp_md, data):
+		/* dst_reg = *(src_reg + off(xdp_frame, data)) */
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_frame, data),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct xdp_frame, data));
+		break;
+	case offsetof(struct xdp_md, data_meta):
+		if (si->dst_reg == si->src_reg) {
+			src_reg = BPF_REG_9;
+			if (si->dst_reg == src_reg)
+				src_reg--;
+			*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, src_reg,
+					      offsetof(struct xdp_frame, next));
+			*insn++ = BPF_MOV64_REG(src_reg, si->src_reg);
+		} else {
+			src_reg = si->src_reg;
+		}
+		/* AX = src_reg
+		 * dst_reg = *(src_reg + off(xdp_frame, data))
+		 * src_reg = *(src_reg + off(xdp_frame, metasize))
+		 * dst_reg -= src_reg
+		 * src_reg = AX
+		 */
+		*insn++ = BPF_MOV64_REG(BPF_REG_AX, src_reg);
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_frame, data),
+				      si->dst_reg, src_reg,
+				      offsetof(struct xdp_frame, data));
+		*insn++ = BPF_LDX_MEM(BPF_B, /* metasize == 8 bits */
+				      src_reg, src_reg,
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+				      offsetofend(struct xdp_frame, headroom) + 3);
+#elif defined(__BIG_ENDIAN_BITFIELD)
+				      offsetofend(struct xdp_frame, headroom));
+#endif
+		*insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, src_reg);
+		*insn++ = BPF_MOV64_REG(src_reg, BPF_REG_AX);
+		if (si->dst_reg == si->src_reg)
+			*insn++ = BPF_LDX_MEM(BPF_DW, src_reg, si->src_reg,
+					      offsetof(struct xdp_frame, next));
+		break;
+	case offsetof(struct xdp_md, data_end):
+		if (si->dst_reg == si->src_reg) {
+			src_reg = BPF_REG_9;
+			if (si->dst_reg == src_reg)
+				src_reg--;
+			*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, src_reg,
+					      offsetof(struct xdp_frame, next));
+			*insn++ = BPF_MOV64_REG(src_reg, si->src_reg);
+		} else {
+			src_reg = si->src_reg;
+		}
+		/* AX = src_reg
+		 * dst_reg = *(src_reg + off(xdp_frame, data))
+		 * src_reg = *(src_reg + off(xdp_frame, len))
+		 * dst_reg += src_reg
+		 * src_reg = AX
+		 */
+		*insn++ = BPF_MOV64_REG(BPF_REG_AX, src_reg);
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_frame, data),
+				      si->dst_reg, src_reg,
+				      offsetof(struct xdp_frame, data));
+		*insn++ = BPF_LDX_MEM(BPF_H, src_reg, src_reg,
+				      offsetof(struct xdp_frame, len));
+		*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, src_reg);
+		*insn++ = BPF_MOV64_REG(src_reg, BPF_REG_AX);
+		if (si->dst_reg == si->src_reg)
+			*insn++ = BPF_LDX_MEM(BPF_DW, src_reg, si->src_reg,
+					      offsetof(struct xdp_frame, next));
+		break;
+	}
+	return insn - insn_buf;
+}
+
+static bpf_convert_ctx_access_t
+dequeue_get_convert_ctx_access(struct bpf_verifier_log *log,
+			       const struct btf *btf, u32 btf_id)
+{
+	struct btf *btf_vmlinux;
+
+	btf_vmlinux = bpf_get_btf_vmlinux();
+	if (IS_ERR_OR_NULL(btf_vmlinux) || btf != btf_vmlinux)
+		return NULL;
+	if (btf_id != xdp_md_btf_ids[0])
+		return NULL;
+	return dequeue_convert_xdp_md_access;
+}
+
 /* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
  * context Structure, F is Field in context structure that contains a pointer
  * to Nested Structure of type NS that has the field NF.
@@ -10775,6 +10916,8 @@  const struct bpf_verifier_ops dequeue_verifier_ops = {
 	.is_valid_access	= dequeue_is_valid_access,
 	.convert_ctx_access	= dequeue_convert_ctx_access,
 	.gen_prologue		= bpf_noop_prologue,
+	.btf_struct_access	= dequeue_btf_struct_access,
+	.get_convert_ctx_access = dequeue_get_convert_ctx_access,
 };
 
 const struct bpf_prog_ops dequeue_prog_ops = {