diff mbox series

[RFC,12/17] bpf: Add helper to schedule an interface for TX dequeue

Message ID 20220713111430.134810-13-toke@redhat.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series xdp: Add packet queueing and scheduling capabilities | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR pending PR summary
bpf/vmtest-bpf-next-VM_Test-2 pending Logs for Kernel LATEST on ubuntu-latest with llvm-15
bpf/vmtest-bpf-next-VM_Test-3 pending Logs for Kernel LATEST on z15 with gcc
bpf/vmtest-bpf-next-VM_Test-1 fail Logs for Kernel LATEST on ubuntu-latest with gcc
netdev/tree_selection success Guessed tree name to be net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count fail Series longer than 15 patches (and no cover letter)
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1758 this patch: 1758
netdev/cc_maintainers success CCed 18 of 18 maintainers
netdev/build_clang success Errors and warnings before: 186 this patch: 186
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1769 this patch: 1769
netdev/checkpatch warning WARNING: line length of 100 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Toke Høiland-Jørgensen July 13, 2022, 11:14 a.m. UTC
This adds a helper that a BPF program can call to schedule an interface for
transmission. The helper can be used from both a regular XDP program (to
schedule transmission after queueing a packet), and from a dequeue program
to (re-)schedule transmission after a dequeue operation. In particular, the
latter use can be combined with BPF timers to schedule delayed
transmission, for instance to implement traffic shaping.

The helper always schedules transmission on the interface on the current
CPU. For cross-CPU operation, it is up to the BPF program to arrange for
the helper to be called on the appropriate CPU, either by configuring
hardware RSS appropriately, or by using a cpumap. Likewise, it is up to the
BPF programs to decide whether to use separate queues per CPU (by using
multiple maps to queue packets in), or accept the lock contention of using
a single map across CPUs.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
 include/uapi/linux/bpf.h       | 11 +++++++
 net/core/filter.c              | 52 ++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h | 11 +++++++
 3 files changed, 74 insertions(+)
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d44382644391..b352ecc280f4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5358,6 +5358,16 @@  union bpf_attr {
  *		*bpf_packet_dequeue()* (and checked to not be NULL).
  *	Return
  *		This always succeeds and returns zero.
+ *
+ * long bpf_schedule_iface_dequeue(void *ctx, int ifindex, int flags)
+ *	Description
+ *		Schedule the interface with index *ifindex* for transmission from
+ *		its dequeue program as soon as possible. The *flags* argument
+ *		must be zero.
+ *
+ *	Return
+ *		Returns zero on success, or -ENOENT if no dequeue program is
+ *		loaded on the interface.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5570,6 +5580,7 @@  union bpf_attr {
 	FN(tcp_raw_check_syncookie_ipv6),	\
 	FN(packet_dequeue),		\
 	FN(packet_drop),		\
+	FN(schedule_iface_dequeue),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/net/core/filter.c b/net/core/filter.c
index 7c89eaa01c29..bb556d873b52 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4431,6 +4431,54 @@  static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
 	.arg3_type      = ARG_ANYTHING,
 };
 
+static int bpf_schedule_iface_dequeue(struct net *net, int ifindex, int flags)
+{
+	struct net_device *dev;
+	struct bpf_prog *prog;
+
+	if (flags)
+		return -EINVAL;
+
+	dev = dev_get_by_index_rcu(net, ifindex);
+	if (!dev)
+		return -ENODEV;
+
+	prog = rcu_dereference(dev->xdp_dequeue_prog);
+	if (!prog)
+		return -ENOENT;
+
+	dev_schedule_xdp_dequeue(dev);
+	return 0;
+}
+
+BPF_CALL_3(bpf_xdp_schedule_iface_dequeue, struct xdp_buff *, ctx, int, ifindex, int, flags)
+{
+	return bpf_schedule_iface_dequeue(dev_net(ctx->rxq->dev), ifindex, flags);
+}
+
+static const struct bpf_func_proto bpf_xdp_schedule_iface_dequeue_proto = {
+	.func           = bpf_xdp_schedule_iface_dequeue,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+	.arg3_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_dequeue_schedule_iface_dequeue, struct dequeue_data *, ctx, int, ifindex, int, flags)
+{
+	return bpf_schedule_iface_dequeue(dev_net(ctx->txq->dev), ifindex, flags);
+}
+
+static const struct bpf_func_proto bpf_dequeue_schedule_iface_dequeue_proto = {
+	.func           = bpf_dequeue_schedule_iface_dequeue,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+	.arg3_type      = ARG_ANYTHING,
+};
+
 BTF_ID_LIST_SINGLE(xdp_md_btf_ids, struct, xdp_md)
 
 BPF_CALL_4(bpf_packet_dequeue, struct dequeue_data *, ctx, struct bpf_map *, map,
@@ -8068,6 +8116,8 @@  xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_fib_lookup_proto;
 	case BPF_FUNC_check_mtu:
 		return &bpf_xdp_check_mtu_proto;
+	case BPF_FUNC_schedule_iface_dequeue:
+		return &bpf_xdp_schedule_iface_dequeue_proto;
 #ifdef CONFIG_INET
 	case BPF_FUNC_sk_lookup_udp:
 		return &bpf_xdp_sk_lookup_udp_proto;
@@ -8105,6 +8155,8 @@  dequeue_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_packet_dequeue_proto;
 	case BPF_FUNC_packet_drop:
 		return &bpf_packet_drop_proto;
+	case BPF_FUNC_schedule_iface_dequeue:
+		return &bpf_dequeue_schedule_iface_dequeue_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1dab68a89e18..9eb9a5b52c76 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5358,6 +5358,16 @@  union bpf_attr {
  *		*bpf_packet_dequeue()* (and checked to not be NULL).
  *	Return
  *		This always succeeds and returns zero.
+ *
+ * long bpf_schedule_iface_dequeue(void *ctx, int ifindex, int flags)
+ *	Description
+ *		Schedule the interface with index *ifindex* for transmission from
+ *		its dequeue program as soon as possible. The *flags* argument
+ *		must be zero.
+ *
+ *	Return
+ *		Returns zero on success, or -ENOENT if no dequeue program is
+ *		loaded on the interface.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5570,6 +5580,7 @@  union bpf_attr {
 	FN(tcp_raw_check_syncookie_ipv6),	\
 	FN(packet_dequeue),		\
 	FN(packet_drop),		\
+	FN(schedule_iface_dequeue),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper