diff mbox series

[RFC,06/17] xdp: Add dequeue program type for getting packets from a PIFO

Message ID 20220713111430.134810-7-toke@redhat.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series xdp: Add packet queueing and scheduling capabilities | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR pending PR summary
bpf/vmtest-bpf-next-VM_Test-2 pending Logs for Kernel LATEST on ubuntu-latest with llvm-15
bpf/vmtest-bpf-next-VM_Test-3 pending Logs for Kernel LATEST on z15 with gcc
bpf/vmtest-bpf-next-VM_Test-1 fail Logs for Kernel LATEST on ubuntu-latest with gcc
netdev/tree_selection success Guessed tree name to be net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count fail Series longer than 15 patches (and no cover letter)
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 4655 this patch: 4656
netdev/cc_maintainers success CCed 18 of 18 maintainers
netdev/build_clang fail Errors and warnings before: 1126 this patch: 1128
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 4813 this patch: 4814
netdev/checkpatch warning WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP WARNING: line length of 81 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Toke Høiland-Jørgensen July 13, 2022, 11:14 a.m. UTC
Add a new BPF_PROG_TYPE_DEQUEUE, which will be executed by a new device
hook to retrieve queued packets for transmission. The API of the dequeue
program is simple: it takes a context object containing as its sole member
the ifindex of the device it is being executed on. The program can return a
pointer to a packet, or NULL to indicate it has nothing to transmit at this
time. Packet pointers are obtained by dequeueing them from a PIFO
map (using a helper added in a subsequent commit).

This commit adds dequeue program type and the ability to run it using the
bpf_prog_run() syscall (returning the dequeued packet to userspace); a
subsequent commit introduces the network stack hook to attach and execute
dequeue programs.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
 include/linux/bpf.h            |  9 ++++++
 include/linux/bpf_types.h      |  2 ++
 include/net/xdp.h              |  4 +++
 include/uapi/linux/bpf.h       |  5 ++++
 kernel/bpf/syscall.c           |  1 +
 net/bpf/test_run.c             | 33 +++++++++++++++++++++
 net/core/filter.c              | 53 ++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  5 ++++
 8 files changed, 112 insertions(+)
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ea994acebb81..6ea5d6d188cf 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1864,6 +1864,8 @@  int array_map_alloc_check(union bpf_attr *attr);
 
 int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 			  union bpf_attr __user *uattr);
+int bpf_prog_test_run_dequeue(struct bpf_prog *prog, const union bpf_attr *kattr,
+			      union bpf_attr __user *uattr);
 int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 			  union bpf_attr __user *uattr);
 int bpf_prog_test_run_tracing(struct bpf_prog *prog,
@@ -2107,6 +2109,13 @@  static inline int bpf_prog_test_run_xdp(struct bpf_prog *prog,
 	return -ENOTSUPP;
 }
 
+static inline int bpf_prog_test_run_dequeue(struct bpf_prog *prog,
+					    const union bpf_attr *kattr,
+					    union bpf_attr __user *uattr)
+{
+	return -ENOTSUPP;
+}
+
 static inline int bpf_prog_test_run_skb(struct bpf_prog *prog,
 					const union bpf_attr *kattr,
 					union bpf_attr __user *uattr)
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 26ef981a8aa5..e6bc962befb7 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -10,6 +10,8 @@  BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act,
 	      struct __sk_buff, struct sk_buff)
 BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp,
 	      struct xdp_md, struct xdp_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_DEQUEUE, dequeue,
+	      struct dequeue_ctx, struct dequeue_data)
 #ifdef CONFIG_CGROUP_BPF
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb,
 	      struct __sk_buff, struct sk_buff)
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 7c694fb26f34..728ce943d352 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -85,6 +85,10 @@  struct xdp_buff {
 	u32 flags; /* supported values defined in xdp_buff_flags */
 };
 
+struct dequeue_data {
+	struct xdp_txq_info *txq;
+};
+
 static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp)
 {
 	return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f0947ddee784..974fb5882305 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -954,6 +954,7 @@  enum bpf_prog_type {
 	BPF_PROG_TYPE_LSM,
 	BPF_PROG_TYPE_SK_LOOKUP,
 	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
+	BPF_PROG_TYPE_DEQUEUE,
 };
 
 enum bpf_attach_type {
@@ -5961,6 +5962,10 @@  struct xdp_md {
 	__u32 egress_ifindex;  /* txq->dev->ifindex */
 };
 
+struct dequeue_ctx {
+	__u32 egress_ifindex;
+};
+
 /* DEVMAP map-value layout
  *
  * The struct data-layout of map-value is a configuration interface.
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 31899882e513..c4af9119b68a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2370,6 +2370,7 @@  bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 		default:
 			return -EINVAL;
 		}
+	case BPF_PROG_TYPE_DEQUEUE:
 	case BPF_PROG_TYPE_SYSCALL:
 	case BPF_PROG_TYPE_EXT:
 		if (expected_attach_type)
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index f05d13717430..a7f479a19fe0 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -1390,6 +1390,39 @@  int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 	return ret;
 }
 
+int bpf_prog_test_run_dequeue(struct bpf_prog *prog, const union bpf_attr *kattr,
+			      union bpf_attr __user *uattr)
+{
+	struct xdp_txq_info txq = { .dev = current->nsproxy->net_ns->loopback_dev };
+	u32 repeat = kattr->test.repeat, duration, size;
+	struct dequeue_data ctx = { .txq = &txq };
+	struct xdp_buff xdp = {};
+	struct xdp_frame *pkt;
+	int ret = -EINVAL;
+	u64 retval;
+
+	if (prog->expected_attach_type)
+		return -EINVAL;
+
+	if (kattr->test.data_in || kattr->test.data_size_in ||
+	    kattr->test.ctx_in || kattr->test.ctx_out || repeat > 1)
+		return -EINVAL;
+
+	ret = bpf_test_run(prog, &ctx, repeat, &retval, &duration, false);
+	if (ret)
+		return ret;
+	if (!retval)
+		return bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
+
+	pkt = (void *)(unsigned long)retval;
+	xdp_convert_frame_to_buff(pkt, &xdp);
+	size = xdp.data_end - xdp.data_meta;
+	/* We set retval == 1 if pkt != NULL, otherwise 0 */
+	ret = bpf_test_finish(kattr, uattr, xdp.data_meta, NULL, size, !!retval, duration);
+	xdp_return_frame(pkt);
+	return ret;
+}
+
 static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx)
 {
 	/* make sure the fields we don't use are zeroed */
diff --git a/net/core/filter.c b/net/core/filter.c
index 8e6ea17a29db..30bd3a6aedab 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8062,6 +8062,12 @@  xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }
 
+static const struct bpf_func_proto *
+dequeue_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	return bpf_base_func_proto(func_id);
+}
+
 const struct bpf_func_proto bpf_sock_map_update_proto __weak;
 const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
 
@@ -8776,6 +8782,20 @@  void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog,
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
+static bool dequeue_is_valid_access(int off, int size,
+				    enum bpf_access_type type,
+				    const struct bpf_prog *prog,
+				    struct bpf_insn_access_aux *info)
+{
+	if (type == BPF_WRITE)
+		return false;
+	switch (off) {
+	case offsetof(struct dequeue_ctx, egress_ifindex):
+		return true;
+	}
+	return false;
+}
+
 static bool sock_addr_is_valid_access(int off, int size,
 				      enum bpf_access_type type,
 				      const struct bpf_prog *prog,
@@ -9835,6 +9855,28 @@  static u32 xdp_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
+static u32 dequeue_convert_ctx_access(enum bpf_access_type type,
+				      const struct bpf_insn *si,
+				      struct bpf_insn *insn_buf,
+				      struct bpf_prog *prog, u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (si->off) {
+	case offsetof(struct dequeue_ctx, egress_ifindex):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct dequeue_data, txq),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct dequeue_data, txq));
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
+				      si->dst_reg, si->dst_reg,
+				      offsetof(struct xdp_txq_info, dev));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct net_device, ifindex));
+		break;
+	}
+	return insn - insn_buf;
+}
+
 /* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
  * context Structure, F is Field in context structure that contains a pointer
  * to Nested Structure of type NS that has the field NF.
@@ -10687,6 +10729,17 @@  const struct bpf_prog_ops xdp_prog_ops = {
 	.test_run		= bpf_prog_test_run_xdp,
 };
 
+const struct bpf_verifier_ops dequeue_verifier_ops = {
+	.get_func_proto		= dequeue_func_proto,
+	.is_valid_access	= dequeue_is_valid_access,
+	.convert_ctx_access	= dequeue_convert_ctx_access,
+	.gen_prologue		= bpf_noop_prologue,
+};
+
+const struct bpf_prog_ops dequeue_prog_ops = {
+	.test_run		= bpf_prog_test_run_dequeue,
+};
+
 const struct bpf_verifier_ops cg_skb_verifier_ops = {
 	.get_func_proto		= cg_skb_func_proto,
 	.is_valid_access	= cg_skb_is_valid_access,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 623421377f6e..4dd8a563f85d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -954,6 +954,7 @@  enum bpf_prog_type {
 	BPF_PROG_TYPE_LSM,
 	BPF_PROG_TYPE_SK_LOOKUP,
 	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
+	BPF_PROG_TYPE_DEQUEUE,
 };
 
 enum bpf_attach_type {
@@ -5961,6 +5962,10 @@  struct xdp_md {
 	__u32 egress_ifindex;  /* txq->dev->ifindex */
 };
 
+struct dequeue_ctx {
+	__u32 egress_ifindex;
+};
+
 /* DEVMAP map-value layout
  *
  * The struct data-layout of map-value is a configuration interface.