@@ -421,6 +421,7 @@ enum bpf_return_type {
RET_PTR_TO_ALLOC_MEM, /* returns a pointer to dynamically allocated memory */
RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */
RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */
+ RET_PTR_TO_PACKET, /* returns a pointer to a packet */
__BPF_RET_TYPE_MAX,
/* Extended ret_types. */
@@ -430,6 +431,7 @@ enum bpf_return_type {
RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM,
RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
+ RET_PTR_TO_PACKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_PACKET,
/* This must be the last entry. Its purpose is to ensure the enum is
* wide enough to hold the higher bits reserved for bpf_type_flag.
@@ -5117,6 +5117,17 @@ union bpf_attr {
* 0 on success.
* **-EINVAL** for invalid input
* **-EOPNOTSUPP** for unsupported delivery_time_type and protocol
+ *
+ * void *bpf_packet_pointer(void *ctx, u32 offset, u32 len)
+ * Description
+ * Return a pointer to linear area in packet at *offset* of length
+ * *len*. The returned packet pointer cannot be compared to any
+ * other packet pointers.
+ *
+ * This helper is only available to XDP programs.
+ * Return
+ * Pointer to packet on success that can be accessed for *len*
+ * bytes, or NULL when it fails.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5312,6 +5323,7 @@ union bpf_attr {
FN(xdp_store_bytes), \
FN(copy_from_user_task), \
FN(skb_set_delivery_time), \
+ FN(packet_pointer), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -257,6 +257,7 @@ struct bpf_call_arg_meta {
struct btf *ret_btf;
u32 ret_btf_id;
u32 subprogno;
+ int ret_pkt_len;
};
struct btf *btf_vmlinux;
@@ -5654,6 +5655,32 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
verbose(env, "R%d is not a known constant\n", regno);
return -EACCES;
}
+
+ if (meta->func_id == BPF_FUNC_packet_pointer) {
+ struct tnum range;
+
+ switch (arg + 1) {
+ case 2:
+ /* arg2 = offset, enforce that the range is [0, 0xffff] */
+ range = tnum_range(0, 0xffff);
+ if (!tnum_in(range, reg->var_off)) {
+ verbose(env, "R%d must be in range [0, 0xffff]\n", regno);
+ return -EINVAL;
+ }
+ break;
+ case 3:
+ /* arg3 = len, already checked to be constant */
+ if (!reg->var_off.value || reg->var_off.value > 0xffff) {
+ verbose(env, "R%d must be in range [1, 0xffff]\n", regno);
+ return -EINVAL;
+ }
+ meta->ret_pkt_len = reg->var_off.value;
+ break;
+ default:
+ verbose(env, "verifier internal error: bpf_xdp_pointer unknown arg\n");
+ return -EFAULT;
+ }
+ }
}
return err;
@@ -6873,6 +6900,16 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
*/
regs[BPF_REG_0].btf = btf_vmlinux;
regs[BPF_REG_0].btf_id = ret_btf_id;
+ } else if (base_type(ret_type) == RET_PTR_TO_PACKET) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_PACKET | ret_flag;
+ regs[BPF_REG_0].pkt_uid = ++env->id_gen;
+ if (!meta.ret_pkt_len) {
+ verbose(env, "verifier internal error: ret_pkt_len unset\n");
+ return -EFAULT;
+ }
+ /* Already checked to be in range [1, 0xffff] */
+ regs[BPF_REG_0].range = meta.ret_pkt_len;
} else {
verbose(env, "unknown return type %u of func %s#%d\n",
base_type(ret_type), func_id_name(func_id), func_id);
@@ -3889,18 +3889,15 @@ static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
}
}
-static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+BPF_CALL_3(bpf_xdp_pointer, struct xdp_buff *, xdp, u32, offset, u32, len)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
u32 size = xdp->data_end - xdp->data;
void *addr = xdp->data;
int i;
- if (unlikely(offset > 0xffff || len > 0xffff))
- return ERR_PTR(-EFAULT);
-
if (offset + len > xdp_get_buff_len(xdp))
- return ERR_PTR(-EINVAL);
+ return (unsigned long)NULL;
if (offset < size) /* linear area */
goto out;
@@ -3917,23 +3914,28 @@ static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
offset -= frag_size;
}
out:
- return offset + len < size ? addr + offset : NULL;
+ return offset + len < size ? (unsigned long)addr + offset : (unsigned long)NULL;
}
+static const struct bpf_func_proto bpf_xdp_pointer_proto = {
+ .func = bpf_xdp_pointer,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_PACKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_SCALAR,
+ .arg3_type = ARG_CONSTANT,
+};
+
BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
void *, buf, u32, len)
{
- void *ptr;
-
- ptr = bpf_xdp_pointer(xdp, offset, len);
- if (IS_ERR(ptr))
- return PTR_ERR(ptr);
+ if (unlikely(offset > 0xffff || len > 0xffff))
+ return -EFAULT;
- if (!ptr)
- bpf_xdp_copy_buf(xdp, offset, buf, len, false);
- else
- memcpy(buf, ptr, len);
+ if (offset + len > xdp_get_buff_len(xdp))
+ return -EINVAL;
+ bpf_xdp_copy_buf(xdp, offset, buf, len, false);
return 0;
}
@@ -3950,17 +3952,13 @@ static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
void *, buf, u32, len)
{
- void *ptr;
-
- ptr = bpf_xdp_pointer(xdp, offset, len);
- if (IS_ERR(ptr))
- return PTR_ERR(ptr);
+ if (unlikely(offset > 0xffff || len > 0xffff))
+ return -EFAULT;
- if (!ptr)
- bpf_xdp_copy_buf(xdp, offset, buf, len, true);
- else
- memcpy(ptr, buf, len);
+ if (offset + len > xdp_get_buff_len(xdp))
+ return -EINVAL;
+ bpf_xdp_copy_buf(xdp, offset, buf, len, true);
return 0;
}
@@ -7820,6 +7818,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_load_bytes_proto;
case BPF_FUNC_xdp_store_bytes:
return &bpf_xdp_store_bytes_proto;
+ case BPF_FUNC_packet_pointer:
+ return &bpf_xdp_pointer_proto;
case BPF_FUNC_fib_lookup:
return &bpf_xdp_fib_lookup_proto;
case BPF_FUNC_check_mtu:
@@ -5117,6 +5117,17 @@ union bpf_attr {
* 0 on success.
* **-EINVAL** for invalid input
* **-EOPNOTSUPP** for unsupported delivery_time_type and protocol
+ *
+ * void *bpf_packet_pointer(void *ctx, u32 offset, u32 len)
+ * Description
+ * Return a pointer to linear area in packet at *offset* of length
+ * *len*. The returned packet pointer cannot be compared to any
+ * other packet pointers.
+ *
+ * This helper is only available to XDP programs.
+ * Return
+ * Pointer to packet on success that can be accessed for *len*
+ * bytes, or NULL when it fails.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5312,6 +5323,7 @@ union bpf_attr {
FN(xdp_store_bytes), \
FN(copy_from_user_task), \
FN(skb_set_delivery_time), \
+ FN(packet_pointer), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
Introduce a new helper 'bpf_packet_pointer', that returns a packet pointer to a linear area in a possibly multi-buffer XDP buff. Earlier, user had to use bpf_xdp_load_bytes and bpf_xdp_store_bytes to read from and write to multi-bufer XDP buff, but this led to a memcpy for an ideal case (where we detect a linear area in the initial frame or frags). Instead, we can expose the bpf_packet_pointer function, and return a packet pointer with a fixed range, so that user can do direct packet access in the contiguous region. The name bpf_packet_pointer is chosen so this helper can also be implemented for TC programs in the future, using skb as ctx. The helper either returns the pointer to linear contiguous area, or NULL if it fails to find one. In that case, user can resort to the existing helpers to do access across frame or frag boundaries. The case of offset + len > xdp_get_buff_len is still rejected, but the user can already check for that beforehand so the error code is dropped for it, and NULL is returned. We use the support for ARG_SCALAR, ARG_CONSTANT, and pkt_uid for PTR_TO_PACKET in this commit. First, it is enforced that offset is only in range [0, 0xffff], and that len is a constant, with value in range [1, 0xffff]. Then, we introduce ret_pkt_len member in bpf_call_arg_meta to remember the length to set for the returned packet pointer. A fresh ID is assigned to pkt_uid on each call, so that comparisons of these PTR_TO_PACKET is rejected with existing packet pointers obtained from ctx or other calls to bpf_packet_pointer, to prevent range manipulation. The existing bpf_xdp_load_bytes/bpf_xdp_store_bytes now do a call to bpf_xdp_copy_buf directly. The intended usage is that user first calls bpf_packet_pointer, and on receiving NULL from the call, invokes these 'slow path' helpers that handle the access across head/frag boundary. Note that the reason we choose PTR_TO_PACKET as the return value, and not PTR_TO_MEM with a fixed mem_size, is because these pointers need to be invalided (by clear_all_pkt_pointers) when a helper that changes packet is invoked. Instead of special casing PTR_TO_MEM for that purpose, it is better to adjust PTR_TO_PACKET to work for this mode with minimal additions on the verifier side (from previous commit). Also, the verifier errors related to bad access mention pkt pointer and not pointer to memory, which is more meaningful to the BPF programmer. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> --- include/linux/bpf.h | 2 ++ include/uapi/linux/bpf.h | 12 +++++++++ kernel/bpf/verifier.c | 37 ++++++++++++++++++++++++++ net/core/filter.c | 48 +++++++++++++++++----------------- tools/include/uapi/linux/bpf.h | 12 +++++++++ 5 files changed, 87 insertions(+), 24 deletions(-)