diff mbox series

[RFC,bpf-next,v2,10/11] selftests/bpf: Extend xdp_hw_metadata with devtx kfuncs

Message ID 20230621170244.1283336-11-sdf@google.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series bpf: Netdev TX metadata | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain_full }}
bpf/vmtest-bpf-next-VM_Test-2 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 fail Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 fail Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-5 fail Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 fail Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-8 success Logs for veristat
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 13 this patch: 13
netdev/cc_maintainers warning 6 maintainers not CCed: kuba@kernel.org hawk@kernel.org shuah@kernel.org mykolal@fb.com davem@davemloft.net linux-kselftest@vger.kernel.org
netdev/build_clang fail Errors and warnings before: 13 this patch: 13
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 13 this patch: 13
netdev/checkpatch warning CHECK: Macro argument 'len' may be better as '(len)' to avoid precedence issues WARNING: Prefer using '"%s...", __func__' to using 'usage', this function's name, in a string WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Stanislav Fomichev June 21, 2023, 5:02 p.m. UTC
When we get packets on port 9091, we swap src/dst and send it out.
At this point, we also request the timestamp and plumb it back
to the userspace. The userspace simply prints the timestamp.

Haven't really tested, still working on mlx5 patches...

Cc: netdev@vger.kernel.org
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 .../selftests/bpf/progs/xdp_hw_metadata.c     | 107 ++++++++++
 tools/testing/selftests/bpf/xdp_hw_metadata.c | 198 ++++++++++++++++--
 2 files changed, 285 insertions(+), 20 deletions(-)
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
index b2dfd7066c6e..84f10d6b11f1 100644
--- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
@@ -4,6 +4,7 @@ 
 #include "xdp_metadata.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
 
 struct {
 	__uint(type, BPF_MAP_TYPE_XSKMAP);
@@ -12,14 +13,30 @@  struct {
 	__type(value, __u32);
 } xsk SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 10);
+} tx_compl_buf SEC(".maps");
+
 __u64 pkts_skip = 0;
+__u64 pkts_tx_skip = 0;
 __u64 pkts_fail = 0;
 __u64 pkts_redir = 0;
+__u64 pkts_fail_tx = 0;
+__u64 pkts_ringbuf_full = 0;
+
+int ifindex = -1;
+__u64 net_cookie = -1;
 
 extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
 					 __u64 *timestamp) __ksym;
 extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
 				    enum xdp_rss_hash_type *rss_type) __ksym;
+extern int bpf_devtx_sb_request_timestamp(const struct devtx_frame *ctx) __ksym;
+extern int bpf_devtx_cp_timestamp(const struct devtx_frame *ctx, __u64 *timestamp) __ksym;
+
+extern int bpf_devtx_sb_attach(int ifindex, int prog_fd) __ksym;
+extern int bpf_devtx_cp_attach(int ifindex, int prog_fd) __ksym;
 
 SEC("xdp")
 int rx(struct xdp_md *ctx)
@@ -90,4 +107,94 @@  int rx(struct xdp_md *ctx)
 	return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
 }
 
+/* This is not strictly required; only to showcase how to access the payload. */
+static __always_inline bool tx_filter(const struct devtx_frame *frame)
+{
+	int port_offset = sizeof(struct ethhdr) + offsetof(struct udphdr, source);
+	struct ethhdr eth = {};
+	struct udphdr udp = {};
+
+	bpf_probe_read_kernel(&eth.h_proto, sizeof(eth.h_proto),
+			      frame->data + offsetof(struct ethhdr, h_proto));
+
+	if (eth.h_proto == bpf_htons(ETH_P_IP)) {
+		port_offset += sizeof(struct iphdr);
+	} else if (eth.h_proto == bpf_htons(ETH_P_IPV6)) {
+		port_offset += sizeof(struct ipv6hdr);
+	} else {
+		__sync_add_and_fetch(&pkts_tx_skip, 1);
+		return false;
+	}
+
+	bpf_probe_read_kernel(&udp.source, sizeof(udp.source), frame->data + port_offset);
+
+	/* Replies to UDP:9091 */
+	if (udp.source != bpf_htons(9091)) {
+		__sync_add_and_fetch(&pkts_tx_skip, 1);
+		return false;
+	}
+
+	return true;
+}
+
+SEC("fentry")
+int BPF_PROG(tx_submit, const struct devtx_frame *frame)
+{
+	struct xdp_tx_meta meta = {};
+	int ret;
+
+	if (frame->netdev->ifindex != ifindex)
+		return 0;
+	if (frame->netdev->nd_net.net->net_cookie != net_cookie)
+		return 0;
+	if (frame->meta_len != TX_META_LEN)
+		return 0;
+
+	bpf_probe_read_kernel(&meta, sizeof(meta), frame->data - TX_META_LEN);
+	if (!meta.request_timestamp)
+		return 0;
+
+	if (!tx_filter(frame))
+		return 0;
+
+	ret = bpf_devtx_sb_request_timestamp(frame);
+	if (ret < 0)
+		__sync_add_and_fetch(&pkts_fail_tx, 1);
+
+	return 0;
+}
+
+SEC("fentry")
+int BPF_PROG(tx_complete, const struct devtx_frame *frame)
+{
+	struct xdp_tx_meta meta = {};
+	struct devtx_sample *sample;
+
+	if (frame->netdev->ifindex != ifindex)
+		return 0;
+	if (frame->netdev->nd_net.net->net_cookie != net_cookie)
+		return 0;
+	if (frame->meta_len != TX_META_LEN)
+		return 0;
+
+	bpf_probe_read_kernel(&meta, sizeof(meta), frame->data - TX_META_LEN);
+	if (!meta.request_timestamp)
+		return 0;
+
+	if (!tx_filter(frame))
+		return 0;
+
+	sample = bpf_ringbuf_reserve(&tx_compl_buf, sizeof(*sample), 0);
+	if (!sample) {
+		__sync_add_and_fetch(&pkts_ringbuf_full, 1);
+		return 0;
+	}
+
+	sample->timestamp_retval = bpf_devtx_cp_timestamp(frame, &sample->timestamp);
+
+	bpf_ringbuf_submit(sample, 0);
+
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 613321eb84c1..0bbe8377a34b 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -10,7 +10,8 @@ 
  *   - rx_hash
  *
  * TX:
- * - TBD
+ * - UDP 9091 packets trigger TX reply
+ * - TX HW timestamp is requested and reported back upon completion
  */
 
 #include <test_progs.h>
@@ -28,6 +29,8 @@ 
 #include <net/if.h>
 #include <poll.h>
 #include <time.h>
+#include <unistd.h>
+#include <libgen.h>
 
 #include "xdp_metadata.h"
 
@@ -54,13 +57,14 @@  int rxq;
 
 void test__fail(void) { /* for network_helpers.c */ }
 
-static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
+static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id, int flags)
 {
 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
 	const struct xsk_socket_config socket_config = {
 		.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
 		.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
-		.bind_flags = XDP_COPY,
+		.bind_flags = flags,
+		.tx_metadata_len = TX_META_LEN,
 	};
 	const struct xsk_umem_config umem_config = {
 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
@@ -228,7 +232,87 @@  static void verify_skb_metadata(int fd)
 	printf("skb hwtstamp is not found!\n");
 }
 
-static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
+static void complete_tx(struct xsk *xsk, struct ring_buffer *ringbuf)
+{
+	__u32 idx;
+	__u64 addr;
+
+	ring_buffer__poll(ringbuf, 1000);
+
+	if (xsk_ring_cons__peek(&xsk->comp, 1, &idx)) {
+		addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
+
+		printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
+		xsk_ring_cons__release(&xsk->comp, 1);
+	}
+}
+
+#define swap(a, b, len) do { \
+	for (int i = 0; i < len; i++) { \
+		__u8 tmp = ((__u8 *)a)[i]; \
+		((__u8 *)a)[i] = ((__u8 *)b)[i]; \
+		((__u8 *)b)[i] = tmp; \
+	} \
+} while (0)
+
+static void ping_pong(struct xsk *xsk, void *rx_packet)
+{
+	struct ipv6hdr *ip6h = NULL;
+	struct iphdr *iph = NULL;
+	struct xdp_tx_meta *meta;
+	struct xdp_desc *tx_desc;
+	struct udphdr *udph;
+	struct ethhdr *eth;
+	void *data;
+	__u32 idx;
+	int ret;
+	int len;
+
+	ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
+	if (ret != 1) {
+		printf("%p: failed to reserve tx slot\n", xsk);
+		return;
+	}
+
+	tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
+	tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + TX_META_LEN;
+	data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
+
+	meta = data - TX_META_LEN;
+	meta->request_timestamp = 1;
+
+	eth = data;
+
+	if (eth->h_proto == htons(ETH_P_IP)) {
+		iph = (void *)(eth + 1);
+		udph = (void *)(iph + 1);
+	} else if (eth->h_proto == htons(ETH_P_IPV6)) {
+		ip6h = (void *)(eth + 1);
+		udph = (void *)(ip6h + 1);
+	} else {
+		xsk_ring_prod__cancel(&xsk->tx, 1);
+		return;
+	}
+
+	len = ETH_HLEN;
+	if (ip6h)
+		len += sizeof(*ip6h) + ntohs(ip6h->payload_len);
+	if (iph)
+		len += ntohs(iph->tot_len);
+
+	memcpy(data, rx_packet, len);
+	swap(eth->h_dest, eth->h_source, ETH_ALEN);
+	if (iph)
+		swap(&iph->saddr, &iph->daddr, 4);
+	else
+		swap(&ip6h->saddr, &ip6h->daddr, 16);
+	swap(&udph->source, &udph->dest, 2);
+
+	xsk_ring_prod__submit(&xsk->tx, 1);
+}
+
+static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id,
+			   struct ring_buffer *ringbuf)
 {
 	const struct xdp_desc *rx_desc;
 	struct pollfd fds[rxq + 1];
@@ -251,8 +335,9 @@  static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
 	while (true) {
 		errno = 0;
 		ret = poll(fds, rxq + 1, 1000);
-		printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n",
+		printf("poll: %d (%d) skip=%llu/%llu fail=%llu redir=%llu\n",
 		       ret, errno, bpf_obj->bss->pkts_skip,
+		       bpf_obj->bss->pkts_tx_skip,
 		       bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir);
 		if (ret < 0)
 			break;
@@ -280,6 +365,11 @@  static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
 			       xsk, idx, rx_desc->addr, addr, comp_addr);
 			verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
 					    clock_id);
+
+			/* mirror packet back */
+			ping_pong(xsk, xsk_umem__get_data(xsk->umem_area, addr));
+			complete_tx(xsk, ringbuf);
+
 			xsk_ring_cons__release(&xsk->rx, 1);
 			refill_rx(xsk, comp_addr);
 		}
@@ -373,16 +463,6 @@  static void cleanup(void)
 	int ret;
 	int i;
 
-	if (bpf_obj) {
-		opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx);
-		if (opts.old_prog_fd >= 0) {
-			printf("detaching bpf program....\n");
-			ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts);
-			if (ret)
-				printf("failed to detach XDP program: %d\n", ret);
-		}
-	}
-
 	for (i = 0; i < rxq; i++)
 		close_xsk(&rx_xsk[i]);
 
@@ -404,21 +484,69 @@  static void timestamping_enable(int fd, int val)
 		error(1, errno, "setsockopt(SO_TIMESTAMPING)");
 }
 
+static int process_sample(void *ctx, void *data, size_t len)
+{
+	struct devtx_sample *sample = data;
+
+	printf("got tx timestamp sample %u %llu\n",
+	       sample->timestamp_retval, sample->timestamp);
+
+	return 0;
+}
+
+static void usage(const char *prog)
+{
+	fprintf(stderr,
+		"usage: %s [OPTS] <ifname>\n"
+		"OPTS:\n"
+		"    -s    symbol name for tx_submit\n"
+		"    -c    symbol name for tx_complete\n"
+		"    -C    run in copy mode\n",
+		prog);
+}
+
 int main(int argc, char *argv[])
 {
+	struct ring_buffer *tx_compl_ringbuf = NULL;
 	clockid_t clock_id = CLOCK_TAI;
+	char *tx_complete = NULL;
+	char *tx_submit = NULL;
+	int bind_flags = 0;
 	int server_fd = -1;
+	int opt;
 	int ret;
 	int i;
 
 	struct bpf_program *prog;
 
-	if (argc != 2) {
+	while ((opt = getopt(argc, argv, "s:c:C")) != -1) {
+		switch (opt) {
+		case 's':
+			tx_submit = optarg;
+			break;
+		case 'c':
+			tx_complete = optarg;
+			break;
+		case 'C':
+			bind_flags |= XDP_COPY;
+			break;
+		default:
+			usage(basename(argv[0]));
+			return 1;
+		}
+	}
+
+	if (argc < 2) {
 		fprintf(stderr, "pass device name\n");
 		return -1;
 	}
 
-	ifname = argv[1];
+	if (optind >= argc) {
+		usage(basename(argv[0]));
+		return 1;
+	}
+
+	ifname = argv[optind];
 	ifindex = if_nametoindex(ifname);
 	rxq = rxq_num(ifname);
 
@@ -432,7 +560,7 @@  int main(int argc, char *argv[])
 
 	for (i = 0; i < rxq; i++) {
 		printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i);
-		ret = open_xsk(ifindex, &rx_xsk[i], i);
+		ret = open_xsk(ifindex, &rx_xsk[i], i, bind_flags);
 		if (ret)
 			error(1, -ret, "open_xsk");
 
@@ -444,15 +572,45 @@  int main(int argc, char *argv[])
 	if (libbpf_get_error(bpf_obj))
 		error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open");
 
+	bpf_obj->data->ifindex = ifindex;
+	bpf_obj->data->net_cookie = get_net_cookie();
+
 	prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
 	bpf_program__set_ifindex(prog, ifindex);
 	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
 
+	prog = bpf_object__find_program_by_name(bpf_obj->obj, "tx_submit");
+	bpf_program__set_ifindex(prog, ifindex);
+	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+	if (tx_submit) {
+		printf("attaching devtx submit program to %s\n", tx_submit);
+		bpf_program__set_attach_target(prog, 0, tx_submit);
+	} else {
+		printf("skipping devtx submit program\n");
+		bpf_program__set_autoattach(prog, false);
+	}
+
+	prog = bpf_object__find_program_by_name(bpf_obj->obj, "tx_complete");
+	bpf_program__set_ifindex(prog, ifindex);
+	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+	if (tx_complete) {
+		printf("attaching devtx complete program to %s\n", tx_complete);
+		bpf_program__set_attach_target(prog, 0, tx_complete);
+	} else {
+		printf("skipping devtx complete program\n");
+		bpf_program__set_autoattach(prog, false);
+	}
+
 	printf("load bpf program...\n");
 	ret = xdp_hw_metadata__load(bpf_obj);
 	if (ret)
 		error(1, -ret, "xdp_hw_metadata__load");
 
+	tx_compl_ringbuf = ring_buffer__new(bpf_map__fd(bpf_obj->maps.tx_compl_buf),
+					    process_sample, NULL, NULL);
+	if (libbpf_get_error(tx_compl_ringbuf))
+		error(1, -libbpf_get_error(tx_compl_ringbuf), "ring_buffer__new");
+
 	printf("prepare skb endpoint...\n");
 	server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000);
 	if (server_fd < 0)
@@ -472,7 +630,7 @@  int main(int argc, char *argv[])
 			error(1, -ret, "bpf_map_update_elem");
 	}
 
-	printf("attach bpf program...\n");
+	printf("attach rx bpf program...\n");
 	ret = bpf_xdp_attach(ifindex,
 			     bpf_program__fd(bpf_obj->progs.rx),
 			     XDP_FLAGS, NULL);
@@ -480,7 +638,7 @@  int main(int argc, char *argv[])
 		error(1, -ret, "bpf_xdp_attach");
 
 	signal(SIGINT, handle_signal);
-	ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id);
+	ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id, tx_compl_ringbuf);
 	close(server_fd);
 	cleanup();
 	if (ret)