diff mbox series

[bpf-next,v1,3/5] bpf: Introduce bpf_packet_pointer helper to do DPA

Message ID 20220306234311.452206-4-memxor@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Introduce bpf_packet_pointer helper | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1789 this patch: 1789
netdev/cc_maintainers warning 4 maintainers not CCed: kpsingh@kernel.org davem@davemloft.net yhs@fb.com songliubraving@fb.com
netdev/build_clang success Errors and warnings before: 194 this patch: 194
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1808 this patch: 1808
netdev/checkpatch warning WARNING: line length of 103 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next success VM_Test

Commit Message

Kumar Kartikeya Dwivedi March 6, 2022, 11:43 p.m. UTC
Introduce a new helper 'bpf_packet_pointer', that returns a packet
pointer to a linear area in a possibly multi-buffer XDP buff. Earlier,
user had to use bpf_xdp_load_bytes and bpf_xdp_store_bytes to read from
and write to multi-bufer XDP buff, but this led to a memcpy for an ideal
case (where we detect a linear area in the initial frame or frags).
Instead, we can expose the bpf_packet_pointer function, and return a
packet pointer with a fixed range, so that user can do direct packet
access in the contiguous region.

The name bpf_packet_pointer is chosen so this helper can also be
implemented for TC programs in the future, using skb as ctx.

The helper either returns the pointer to linear contiguous area, or NULL
if it fails to find one. In that case, user can resort to the existing
helpers to do access across frame or frag boundaries. The case of offset
+ len > xdp_get_buff_len is still rejected, but the user can already
check for that beforehand so the error code is dropped for it, and NULL
is returned.

We use the support for ARG_SCALAR, ARG_CONSTANT, and pkt_uid for
PTR_TO_PACKET in this commit. First, it is enforced that offset is only
in range [0, 0xffff], and that len is a constant, with value in range
[1, 0xffff]. Then, we introduce ret_pkt_len member in bpf_call_arg_meta
to remember the length to set for the returned packet pointer. A fresh
ID is assigned to pkt_uid on each call, so that comparisons of these
PTR_TO_PACKET is rejected with existing packet pointers obtained from
ctx or other calls to bpf_packet_pointer, to prevent range manipulation.
The existing bpf_xdp_load_bytes/bpf_xdp_store_bytes now do a call to
bpf_xdp_copy_buf directly. The intended usage is that user first calls
bpf_packet_pointer, and on receiving NULL from the call, invokes these
'slow path' helpers that handle the access across head/frag boundary.

Note that the reason we choose PTR_TO_PACKET as the return value, and
not PTR_TO_MEM with a fixed mem_size, is because these pointers need
to be invalided (by clear_all_pkt_pointers) when a helper that changes
packet is invoked. Instead of special casing PTR_TO_MEM for that
purpose, it is better to adjust PTR_TO_PACKET to work for this mode with
minimal additions on the verifier side (from previous commit). Also, the
verifier errors related to bad access mention pkt pointer and not
pointer to memory, which is more meaningful to the BPF programmer.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 include/linux/bpf.h            |  2 ++
 include/uapi/linux/bpf.h       | 12 +++++++++
 kernel/bpf/verifier.c          | 37 ++++++++++++++++++++++++++
 net/core/filter.c              | 48 +++++++++++++++++-----------------
 tools/include/uapi/linux/bpf.h | 12 +++++++++
 5 files changed, 87 insertions(+), 24 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7841d90b83df..981e87c64e47 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -421,6 +421,7 @@  enum bpf_return_type {
 	RET_PTR_TO_ALLOC_MEM,		/* returns a pointer to dynamically allocated memory */
 	RET_PTR_TO_MEM_OR_BTF_ID,	/* returns a pointer to a valid memory or a btf_id */
 	RET_PTR_TO_BTF_ID,		/* returns a pointer to a btf_id */
+	RET_PTR_TO_PACKET,		/* returns a pointer to a packet */
 	__BPF_RET_TYPE_MAX,
 
 	/* Extended ret_types. */
@@ -430,6 +431,7 @@  enum bpf_return_type {
 	RET_PTR_TO_SOCK_COMMON_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
 	RET_PTR_TO_ALLOC_MEM_OR_NULL	= PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM,
 	RET_PTR_TO_BTF_ID_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
+	RET_PTR_TO_PACKET_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_PACKET,
 
 	/* This must be the last entry. Its purpose is to ensure the enum is
 	 * wide enough to hold the higher bits reserved for bpf_type_flag.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4eebea830613..3736cfbb325e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5117,6 +5117,17 @@  union bpf_attr {
  *		0 on success.
  *		**-EINVAL** for invalid input
  *		**-EOPNOTSUPP** for unsupported delivery_time_type and protocol
+ *
+ * void *bpf_packet_pointer(void *ctx, u32 offset, u32 len)
+ *	Description
+ *		Return a pointer to linear area in packet at *offset* of length
+ *		*len*. The returned packet pointer cannot be compared to any
+ *		other packet pointers.
+ *
+ *		This helper is only available to XDP programs.
+ *	Return
+ *		Pointer to packet on success that can be accessed for *len*
+ *		bytes, or NULL when it fails.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5312,6 +5323,7 @@  union bpf_attr {
 	FN(xdp_store_bytes),		\
 	FN(copy_from_user_task),	\
 	FN(skb_set_delivery_time),      \
+	FN(packet_pointer),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 88ac2c833bed..e6e494e07f4c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -257,6 +257,7 @@  struct bpf_call_arg_meta {
 	struct btf *ret_btf;
 	u32 ret_btf_id;
 	u32 subprogno;
+	int ret_pkt_len;
 };
 
 struct btf *btf_vmlinux;
@@ -5654,6 +5655,32 @@  static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 			verbose(env, "R%d is not a known constant\n", regno);
 			return -EACCES;
 		}
+
+		if (meta->func_id == BPF_FUNC_packet_pointer) {
+			struct tnum range;
+
+			switch (arg + 1) {
+			case 2:
+				/* arg2 = offset, enforce that the range is [0, 0xffff] */
+				range = tnum_range(0, 0xffff);
+				if (!tnum_in(range, reg->var_off)) {
+					verbose(env, "R%d must be in range [0, 0xffff]\n", regno);
+					return -EINVAL;
+				}
+				break;
+			case 3:
+				/* arg3 = len, already checked to be constant */
+				if (!reg->var_off.value || reg->var_off.value > 0xffff) {
+					verbose(env, "R%d must be in range [1, 0xffff]\n", regno);
+					return -EINVAL;
+				}
+				meta->ret_pkt_len = reg->var_off.value;
+				break;
+			default:
+				verbose(env, "verifier internal error: bpf_xdp_pointer unknown arg\n");
+				return -EFAULT;
+			}
+		}
 	}
 
 	return err;
@@ -6873,6 +6900,16 @@  static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		 */
 		regs[BPF_REG_0].btf = btf_vmlinux;
 		regs[BPF_REG_0].btf_id = ret_btf_id;
+	} else if (base_type(ret_type) == RET_PTR_TO_PACKET) {
+		mark_reg_known_zero(env, regs, BPF_REG_0);
+		regs[BPF_REG_0].type = PTR_TO_PACKET | ret_flag;
+		regs[BPF_REG_0].pkt_uid = ++env->id_gen;
+		if (!meta.ret_pkt_len) {
+			verbose(env, "verifier internal error: ret_pkt_len unset\n");
+			return -EFAULT;
+		}
+		/* Already checked to be in range [1, 0xffff] */
+		regs[BPF_REG_0].range = meta.ret_pkt_len;
 	} else {
 		verbose(env, "unknown return type %u of func %s#%d\n",
 			base_type(ret_type), func_id_name(func_id), func_id);
diff --git a/net/core/filter.c b/net/core/filter.c
index 88767f7da150..4fc19b9e64c7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3889,18 +3889,15 @@  static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
 	}
 }
 
-static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+BPF_CALL_3(bpf_xdp_pointer, struct xdp_buff *, xdp, u32, offset, u32, len)
 {
 	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
 	u32 size = xdp->data_end - xdp->data;
 	void *addr = xdp->data;
 	int i;
 
-	if (unlikely(offset > 0xffff || len > 0xffff))
-		return ERR_PTR(-EFAULT);
-
 	if (offset + len > xdp_get_buff_len(xdp))
-		return ERR_PTR(-EINVAL);
+		return (unsigned long)NULL;
 
 	if (offset < size) /* linear area */
 		goto out;
@@ -3917,23 +3914,28 @@  static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
 		offset -= frag_size;
 	}
 out:
-	return offset + len < size ? addr + offset : NULL;
+	return offset + len < size ? (unsigned long)addr + offset : (unsigned long)NULL;
 }
 
+static const struct bpf_func_proto bpf_xdp_pointer_proto = {
+	.func		= bpf_xdp_pointer,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_PACKET_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_SCALAR,
+	.arg3_type	= ARG_CONSTANT,
+};
+
 BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
 	   void *, buf, u32, len)
 {
-	void *ptr;
-
-	ptr = bpf_xdp_pointer(xdp, offset, len);
-	if (IS_ERR(ptr))
-		return PTR_ERR(ptr);
+	if (unlikely(offset > 0xffff || len > 0xffff))
+		return -EFAULT;
 
-	if (!ptr)
-		bpf_xdp_copy_buf(xdp, offset, buf, len, false);
-	else
-		memcpy(buf, ptr, len);
+	if (offset + len > xdp_get_buff_len(xdp))
+		return -EINVAL;
 
+	bpf_xdp_copy_buf(xdp, offset, buf, len, false);
 	return 0;
 }
 
@@ -3950,17 +3952,13 @@  static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
 BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
 	   void *, buf, u32, len)
 {
-	void *ptr;
-
-	ptr = bpf_xdp_pointer(xdp, offset, len);
-	if (IS_ERR(ptr))
-		return PTR_ERR(ptr);
+	if (unlikely(offset > 0xffff || len > 0xffff))
+		return -EFAULT;
 
-	if (!ptr)
-		bpf_xdp_copy_buf(xdp, offset, buf, len, true);
-	else
-		memcpy(ptr, buf, len);
+	if (offset + len > xdp_get_buff_len(xdp))
+		return -EINVAL;
 
+	bpf_xdp_copy_buf(xdp, offset, buf, len, true);
 	return 0;
 }
 
@@ -7820,6 +7818,8 @@  xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_load_bytes_proto;
 	case BPF_FUNC_xdp_store_bytes:
 		return &bpf_xdp_store_bytes_proto;
+	case BPF_FUNC_packet_pointer:
+		return &bpf_xdp_pointer_proto;
 	case BPF_FUNC_fib_lookup:
 		return &bpf_xdp_fib_lookup_proto;
 	case BPF_FUNC_check_mtu:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4eebea830613..3736cfbb325e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5117,6 +5117,17 @@  union bpf_attr {
  *		0 on success.
  *		**-EINVAL** for invalid input
  *		**-EOPNOTSUPP** for unsupported delivery_time_type and protocol
+ *
+ * void *bpf_packet_pointer(void *ctx, u32 offset, u32 len)
+ *	Description
+ *		Return a pointer to linear area in packet at *offset* of length
+ *		*len*. The returned packet pointer cannot be compared to any
+ *		other packet pointers.
+ *
+ *		This helper is only available to XDP programs.
+ *	Return
+ *		Pointer to packet on success that can be accessed for *len*
+ *		bytes, or NULL when it fails.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5312,6 +5323,7 @@  union bpf_attr {
 	FN(xdp_store_bytes),		\
 	FN(copy_from_user_task),	\
 	FN(skb_set_delivery_time),      \
+	FN(packet_pointer),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper