diff mbox series

[RFC,bpf-next,7/7] selftests/bpf: extend xdp_hw_metadata with devtx kfuncs

Message ID 20230612172307.3923165-8-sdf@google.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series bpf: netdev TX metadata | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain_full }}
bpf/vmtest-bpf-next-VM_Test-2 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 fail Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 fail Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-5 fail Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-8 success Logs for veristat
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/cc_maintainers warning 6 maintainers not CCed: kuba@kernel.org hawk@kernel.org shuah@kernel.org mykolal@fb.com davem@davemloft.net linux-kselftest@vger.kernel.org
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Macro argument 'len' may be better as '(len)' to avoid precedence issues WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Stanislav Fomichev June 12, 2023, 5:23 p.m. UTC
When we get packets on port 9091, we swap src/dst and send it out.
At this point, we also request the timestamp and plumb it back
to the userspace. The userspace simply prints the timestamp.

Haven't really tested, still working on mlx5 patches...

Cc: netdev@vger.kernel.org
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 .../selftests/bpf/progs/xdp_hw_metadata.c     |  59 +++++++
 tools/testing/selftests/bpf/xdp_hw_metadata.c | 160 +++++++++++++++++-
 2 files changed, 214 insertions(+), 5 deletions(-)

Comments

Willem de Bruijn June 13, 2023, 3:03 p.m. UTC | #1
On Mon, Jun 12, 2023 at 7:26 PM Stanislav Fomichev <sdf@google.com> wrote:
>
> When we get packets on port 9091, we swap src/dst and send it out.
> At this point, we also request the timestamp and plumb it back
> to the userspace. The userspace simply prints the timestamp.
>
> Haven't really tested, still working on mlx5 patches...
>
> Cc: netdev@vger.kernel.org
> Signed-off-by: Stanislav Fomichev <sdf@google.com>


> +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
> @@ -10,7 +10,8 @@
>   *   - rx_hash
>   *
>   * TX:
> - * - TBD
> + * - UDP 9091 packets trigger TX reply

This branch on port is missing?

> +static void ping_pong(struct xsk *xsk, void *rx_packet)
> +{
> +       struct ipv6hdr *ip6h = NULL;
> +       struct iphdr *iph = NULL;
> +       struct xdp_desc *tx_desc;
> +       struct udphdr *udph;
> +       struct ethhdr *eth;
> +       void *data;
> +       __u32 idx;
> +       int ret;
> +       int len;
> +
> +       ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
> +       if (ret != 1) {
> +               printf("%p: failed to reserve tx slot\n", xsk);
> +               return;
> +       }
> +
> +       tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
> +       tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE;
> +       data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
> +
> +       eth = data;
> +
> +       if (eth->h_proto == htons(ETH_P_IP)) {
> +               iph = (void *)(eth + 1);
> +               udph = (void *)(iph + 1);
> +       } else if (eth->h_proto == htons(ETH_P_IPV6)) {
> +               ip6h = (void *)(eth + 1);
> +               udph = (void *)(ip6h + 1);
> +       } else {
> +               xsk_ring_prod__cancel(&xsk->tx, 1);
> +               return;
> +       }
> +
> +       len = ETH_HLEN;
> +       if (ip6h)
> +               len += ntohs(ip6h->payload_len);

sizeof(*ip6h) + ntohs(ip6h->payload_len) ?

> +       if (iph)
> +               len += ntohs(iph->tot_len);
> +
> +       memcpy(data, rx_packet, len);
> +       swap(eth->h_dest, eth->h_source, ETH_ALEN);
> +       if (iph)
> +               swap(&iph->saddr, &iph->daddr, 4);

need to recompute ipv4 checksum?
Stanislav Fomichev June 13, 2023, 7 p.m. UTC | #2
On Tue, Jun 13, 2023 at 8:03 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Mon, Jun 12, 2023 at 7:26 PM Stanislav Fomichev <sdf@google.com> wrote:
> >
> > When we get packets on port 9091, we swap src/dst and send it out.
> > At this point, we also request the timestamp and plumb it back
> > to the userspace. The userspace simply prints the timestamp.
> >
> > Haven't really tested, still working on mlx5 patches...
> >
> > Cc: netdev@vger.kernel.org
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
>
>
> > +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
> > @@ -10,7 +10,8 @@
> >   *   - rx_hash
> >   *
> >   * TX:
> > - * - TBD
> > + * - UDP 9091 packets trigger TX reply
>
> This branch on port is missing?

That's the ping_pong part. Evey packet arriving on 9091 port gets
received by af_xdp and is sent back.

> > +static void ping_pong(struct xsk *xsk, void *rx_packet)
> > +{
> > +       struct ipv6hdr *ip6h = NULL;
> > +       struct iphdr *iph = NULL;
> > +       struct xdp_desc *tx_desc;
> > +       struct udphdr *udph;
> > +       struct ethhdr *eth;
> > +       void *data;
> > +       __u32 idx;
> > +       int ret;
> > +       int len;
> > +
> > +       ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
> > +       if (ret != 1) {
> > +               printf("%p: failed to reserve tx slot\n", xsk);
> > +               return;
> > +       }
> > +
> > +       tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
> > +       tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE;
> > +       data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
> > +
> > +       eth = data;
> > +
> > +       if (eth->h_proto == htons(ETH_P_IP)) {
> > +               iph = (void *)(eth + 1);
> > +               udph = (void *)(iph + 1);
> > +       } else if (eth->h_proto == htons(ETH_P_IPV6)) {
> > +               ip6h = (void *)(eth + 1);
> > +               udph = (void *)(ip6h + 1);
> > +       } else {
> > +               xsk_ring_prod__cancel(&xsk->tx, 1);
> > +               return;
> > +       }
> > +
> > +       len = ETH_HLEN;
> > +       if (ip6h)
> > +               len += ntohs(ip6h->payload_len);
>
> sizeof(*ip6h) + ntohs(ip6h->payload_len) ?

Ooop, thanks, that's clearly not tested :-)

> > +       if (iph)
> > +               len += ntohs(iph->tot_len);
> > +
> > +       memcpy(data, rx_packet, len);
> > +       swap(eth->h_dest, eth->h_source, ETH_ALEN);
> > +       if (iph)
> > +               swap(&iph->saddr, &iph->daddr, 4);
>
> need to recompute ipv4 checksum?

Discussed offline: swapping the aligned-u16 chunks preserves the checksum.
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
index b2dfd7066c6e..e27823b755ef 100644
--- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
@@ -4,6 +4,7 @@ 
 #include "xdp_metadata.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
 
 struct {
 	__uint(type, BPF_MAP_TYPE_XSKMAP);
@@ -12,14 +13,26 @@  struct {
 	__type(value, __u32);
 } xsk SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 10);
+} tx_compl_buf SEC(".maps");
+
 __u64 pkts_skip = 0;
 __u64 pkts_fail = 0;
 __u64 pkts_redir = 0;
+__u64 pkts_fail_tx = 0;
+__u64 pkts_ringbuf_full = 0;
 
 extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
 					 __u64 *timestamp) __ksym;
 extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
 				    enum xdp_rss_hash_type *rss_type) __ksym;
+extern int bpf_devtx_sb_request_timestamp(const struct devtx_frame *ctx) __ksym;
+extern int bpf_devtx_cp_timestamp(const struct devtx_frame *ctx, __u64 *timestamp) __ksym;
+
+extern int bpf_devtx_sb_attach(int ifindex, int prog_fd) __ksym;
+extern int bpf_devtx_cp_attach(int ifindex, int prog_fd) __ksym;
 
 SEC("xdp")
 int rx(struct xdp_md *ctx)
@@ -90,4 +103,50 @@  int rx(struct xdp_md *ctx)
 	return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
 }
 
+SEC("fentry/devtx_sb")
+int BPF_PROG(devtx_sb, const struct devtx_frame *frame)
+{
+	int ret;
+
+	ret = bpf_devtx_sb_request_timestamp(frame);
+	if (ret < 0)
+		__sync_add_and_fetch(&pkts_fail_tx, 1);
+
+	return 0;
+}
+
+SEC("fentry/devtx_cp")
+int BPF_PROG(devtx_cp, const struct devtx_frame *frame)
+{
+	struct devtx_sample *sample;
+
+	sample = bpf_ringbuf_reserve(&tx_compl_buf, sizeof(*sample), 0);
+	if (!sample) {
+		__sync_add_and_fetch(&pkts_ringbuf_full, 1);
+		return 0;
+	}
+
+	sample->timestamp_retval = bpf_devtx_cp_timestamp(frame, &sample->timestamp);
+
+	bpf_ringbuf_submit(sample, 0);
+
+	return 0;
+}
+
+SEC("syscall")
+int attach_prog(struct devtx_attach_args *ctx)
+{
+	ctx->devtx_sb_retval = bpf_devtx_sb_attach(ctx->ifindex, ctx->devtx_sb_prog_fd);
+	ctx->devtx_cp_retval = bpf_devtx_cp_attach(ctx->ifindex, ctx->devtx_cp_prog_fd);
+	return 0;
+}
+
+SEC("syscall")
+int detach_prog(struct devtx_attach_args *ctx)
+{
+	ctx->devtx_sb_retval = bpf_devtx_sb_attach(ctx->ifindex, -1);
+	ctx->devtx_cp_retval = bpf_devtx_cp_attach(ctx->ifindex, -1);
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 613321eb84c1..6cc364c2af8a 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -10,7 +10,8 @@ 
  *   - rx_hash
  *
  * TX:
- * - TBD
+ * - UDP 9091 packets trigger TX reply
+ * - TX HW timestamp is requested and reported back upon completion
  */
 
 #include <test_progs.h>
@@ -228,7 +229,83 @@  static void verify_skb_metadata(int fd)
 	printf("skb hwtstamp is not found!\n");
 }
 
-static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
+static void complete_tx(struct xsk *xsk, struct ring_buffer *ringbuf)
+{
+	__u32 idx;
+	__u64 addr;
+
+	ring_buffer__poll(ringbuf, 1000);
+
+	if (xsk_ring_cons__peek(&xsk->comp, 1, &idx)) {
+		addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
+
+		printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
+		xsk_ring_cons__release(&xsk->comp, 1);
+	}
+}
+
+#define swap(a, b, len) do { \
+	for (int i = 0; i < len; i++) { \
+		__u8 tmp = ((__u8 *)a)[i]; \
+		((__u8 *)a)[i] = ((__u8 *)b)[i]; \
+		((__u8 *)b)[i] = tmp; \
+	} \
+} while (0)
+
+static void ping_pong(struct xsk *xsk, void *rx_packet)
+{
+	struct ipv6hdr *ip6h = NULL;
+	struct iphdr *iph = NULL;
+	struct xdp_desc *tx_desc;
+	struct udphdr *udph;
+	struct ethhdr *eth;
+	void *data;
+	__u32 idx;
+	int ret;
+	int len;
+
+	ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
+	if (ret != 1) {
+		printf("%p: failed to reserve tx slot\n", xsk);
+		return;
+	}
+
+	tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
+	tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE;
+	data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
+
+	eth = data;
+
+	if (eth->h_proto == htons(ETH_P_IP)) {
+		iph = (void *)(eth + 1);
+		udph = (void *)(iph + 1);
+	} else if (eth->h_proto == htons(ETH_P_IPV6)) {
+		ip6h = (void *)(eth + 1);
+		udph = (void *)(ip6h + 1);
+	} else {
+		xsk_ring_prod__cancel(&xsk->tx, 1);
+		return;
+	}
+
+	len = ETH_HLEN;
+	if (ip6h)
+		len += ntohs(ip6h->payload_len);
+	if (iph)
+		len += ntohs(iph->tot_len);
+
+	memcpy(data, rx_packet, len);
+	swap(eth->h_dest, eth->h_source, ETH_ALEN);
+	if (iph)
+		swap(&iph->saddr, &iph->daddr, 4);
+	else
+		swap(&ip6h->saddr, &ip6h->daddr, 16);
+	swap(&udph->source, &udph->dest, 2);
+
+	xsk_ring_prod__submit(&xsk->tx, 1);
+}
+
+static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id,
+			   struct ring_buffer *ringbuf)
 {
 	const struct xdp_desc *rx_desc;
 	struct pollfd fds[rxq + 1];
@@ -280,6 +357,11 @@  static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
 			       xsk, idx, rx_desc->addr, addr, comp_addr);
 			verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
 					    clock_id);
+
+			/* mirror packet back */
+			ping_pong(xsk, xsk_umem__get_data(xsk->umem_area, addr));
+			complete_tx(xsk, ringbuf);
+
 			xsk_ring_cons__release(&xsk->rx, 1);
 			refill_rx(xsk, comp_addr);
 		}
@@ -370,6 +452,7 @@  static void hwtstamp_enable(const char *ifname)
 static void cleanup(void)
 {
 	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+	int syscall_fd;
 	int ret;
 	int i;
 
@@ -379,8 +462,26 @@  static void cleanup(void)
 			printf("detaching bpf program....\n");
 			ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts);
 			if (ret)
-				printf("failed to detach XDP program: %d\n", ret);
+				printf("failed to detach RX XDP program: %d\n", ret);
 		}
+
+		struct devtx_attach_args args = {
+			.ifindex = ifindex,
+			.devtx_sb_prog_fd = bpf_program__fd(bpf_obj->progs.devtx_sb),
+			.devtx_cp_prog_fd = bpf_program__fd(bpf_obj->progs.devtx_cp),
+			.devtx_sb_retval = -1,
+			.devtx_cp_retval = -1,
+		};
+		DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattr,
+			.ctx_in = &args,
+			.ctx_size_in = sizeof(args),
+		);
+
+		syscall_fd = bpf_program__fd(bpf_obj->progs.detach_prog);
+		ret = bpf_prog_test_run_opts(syscall_fd, &tattr);
+		if (ret < 0 || args.devtx_sb_retval < 0 || args.devtx_cp_retval < 0)
+			printf("failed to detach TX XDP programs: %d %d %d\n",
+			       ret, args.devtx_sb_retval, args.devtx_cp_retval);
 	}
 
 	for (i = 0; i < rxq; i++)
@@ -404,10 +505,22 @@  static void timestamping_enable(int fd, int val)
 		error(1, errno, "setsockopt(SO_TIMESTAMPING)");
 }
 
+static int process_sample(void *ctx, void *data, size_t len)
+{
+	struct devtx_sample *sample = data;
+
+	printf("got tx timestamp sample %u %llu\n",
+	       sample->timestamp_retval, sample->timestamp);
+
+	return 0;
+}
+
 int main(int argc, char *argv[])
 {
+	struct ring_buffer *tx_compl_ringbuf = NULL;
 	clockid_t clock_id = CLOCK_TAI;
 	int server_fd = -1;
+	int syscall_fd;
 	int ret;
 	int i;
 
@@ -448,11 +561,26 @@  int main(int argc, char *argv[])
 	bpf_program__set_ifindex(prog, ifindex);
 	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
 
+	prog = bpf_object__find_program_by_name(bpf_obj->obj, "devtx_sb");
+	bpf_program__set_ifindex(prog, ifindex);
+	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+	bpf_program__set_autoattach(prog, false);
+
+	prog = bpf_object__find_program_by_name(bpf_obj->obj, "devtx_cp");
+	bpf_program__set_ifindex(prog, ifindex);
+	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+	bpf_program__set_autoattach(prog, false);
+
 	printf("load bpf program...\n");
 	ret = xdp_hw_metadata__load(bpf_obj);
 	if (ret)
 		error(1, -ret, "xdp_hw_metadata__load");
 
+	tx_compl_ringbuf = ring_buffer__new(bpf_map__fd(bpf_obj->maps.tx_compl_buf),
+					    process_sample, NULL, NULL);
+	if (libbpf_get_error(tx_compl_ringbuf))
+		error(1, -libbpf_get_error(tx_compl_ringbuf), "ring_buffer__new");
+
 	printf("prepare skb endpoint...\n");
 	server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000);
 	if (server_fd < 0)
@@ -472,15 +600,37 @@  int main(int argc, char *argv[])
 			error(1, -ret, "bpf_map_update_elem");
 	}
 
-	printf("attach bpf program...\n");
+	printf("attach rx bpf program...\n");
 	ret = bpf_xdp_attach(ifindex,
 			     bpf_program__fd(bpf_obj->progs.rx),
 			     XDP_FLAGS, NULL);
 	if (ret)
 		error(1, -ret, "bpf_xdp_attach");
 
+	printf("attach tx bpf programs...\n");
+	struct devtx_attach_args args = {
+		.ifindex = ifindex,
+		.devtx_sb_prog_fd = bpf_program__fd(bpf_obj->progs.devtx_sb),
+		.devtx_cp_prog_fd = bpf_program__fd(bpf_obj->progs.devtx_cp),
+		.devtx_sb_retval = -1,
+		.devtx_cp_retval = -1,
+	};
+	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattr,
+		.ctx_in = &args,
+		.ctx_size_in = sizeof(args),
+	);
+
+	syscall_fd = bpf_program__fd(bpf_obj->progs.attach_prog);
+	ret = bpf_prog_test_run_opts(syscall_fd, &tattr);
+	if (ret)
+		error(1, -ret, "bpf_prog_test_run_opts");
+	if (args.devtx_sb_retval < 0)
+		error(1, args.devtx_sb_retval, "devtx_sb_retval");
+	if (args.devtx_cp_retval < 0)
+		error(1, args.devtx_cp_retval, "devtx_cp_retval");
+
 	signal(SIGINT, handle_signal);
-	ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id);
+	ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id, tx_compl_ringbuf);
 	close(server_fd);
 	cleanup();
 	if (ret)