diff mbox series

[bpf-next,v9,5/5] selftests/bpf: Add selftest for XDP_REDIRECT in BPF_PROG_RUN

Message ID 20220306223404.60170-6-toke@redhat.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Add support for transmitting packets using XDP in bpf_prog_run() | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 1 maintainers not CCed: linux-kselftest@vger.kernel.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: Macros with flow control statements should be avoided WARNING: Use of volatile is usually wrong: see Documentation/process/volatile-considered-harmful.rst WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 83 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next success VM_Test

Commit Message

Toke Høiland-Jørgensen March 6, 2022, 10:34 p.m. UTC
This adds a selftest for the XDP_REDIRECT facility in BPF_PROG_RUN, that
redirects packets into a veth and counts them using an XDP program on the
other side of the veth pair and a TC program on the local side of the veth.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
 .../bpf/prog_tests/xdp_do_redirect.c          | 176 ++++++++++++++++++
 .../bpf/progs/test_xdp_do_redirect.c          |  92 +++++++++
 2 files changed, 268 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c

Comments

Martin KaFai Lau March 8, 2022, 5:59 a.m. UTC | #1
On Sun, Mar 06, 2022 at 11:34:04PM +0100, Toke Høiland-Jørgensen wrote:

> +#define NUM_PKTS 1000000
It took my qemu 30s to run.
Would it have the same test coverage by lowering it to something
like 10000  ?

> +void test_xdp_do_redirect(void)
> +{
> +	int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
> +	char data[sizeof(pkt_udp) + sizeof(__u32)];
> +	struct test_xdp_do_redirect *skel = NULL;
> +	struct nstoken *nstoken = NULL;
> +	struct bpf_link *link;
> +
> +	struct xdp_md ctx_in = { .data = sizeof(__u32),
> +				 .data_end = sizeof(data) };
> +	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
> +			    .data_in = &data,
> +			    .data_size_in = sizeof(data),
> +			    .ctx_in = &ctx_in,
> +			    .ctx_size_in = sizeof(ctx_in),
> +			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
> +			    .repeat = NUM_PKTS,
> +			    .batch_size = 64,
> +		);
> +	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
> +			    .attach_point = BPF_TC_INGRESS);
> +
> +	memcpy(&data[sizeof(__u32)], &pkt_udp, sizeof(pkt_udp));
> +	*((__u32 *)data) = 0x42; /* metadata test value */
> +
> +	skel = test_xdp_do_redirect__open();
> +	if (!ASSERT_OK_PTR(skel, "skel"))
> +		return;
> +
> +	/* The XDP program we run with bpf_prog_run() will cycle through all
> +	 * three xmit (PASS/TX/REDIRECT) return codes starting from above, and
> +	 * ending up with PASS, so we should end up with two packets on the dst
> +	 * iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP
> +	 * payload.
> +	 */
> +	SYS("ip netns add testns");
> +	nstoken = open_netns("testns");
> +	if (!ASSERT_OK_PTR(nstoken, "setns"))
> +		goto out;
> +
> +	SYS("ip link add veth_src type veth peer name veth_dst");
> +	SYS("ip link set dev veth_src address 00:11:22:33:44:55");
> +	SYS("ip link set dev veth_dst address 66:77:88:99:aa:bb");
> +	SYS("ip link set dev veth_src up");
> +	SYS("ip link set dev veth_dst up");
> +	SYS("ip addr add dev veth_src fc00::1/64");
> +	SYS("ip addr add dev veth_dst fc00::2/64");
> +	SYS("ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb");
> +
> +	/* We enable forwarding in the test namespace because that will cause
> +	 * the packets that go through the kernel stack (with XDP_PASS) to be
> +	 * forwarded back out the same interface (because of the packet dst
> +	 * combined with the interface addresses). When this happens, the
> +	 * regular forwarding path will end up going through the same
> +	 * veth_xdp_xmit() call as the XDP_REDIRECT code, which can cause a
> +	 * deadlock if it happens on the same CPU. There's a local_bh_disable()
> +	 * in the test_run code to prevent this, but an earlier version of the
> +	 * code didn't have this, so we keep the test behaviour to make sure the
> +	 * bug doesn't resurface.
> +	 */
> +	SYS("sysctl -qw net.ipv6.conf.all.forwarding=1");
> +
> +	ifindex_src = if_nametoindex("veth_src");
> +	ifindex_dst = if_nametoindex("veth_dst");
> +	if (!ASSERT_NEQ(ifindex_src, 0, "ifindex_src") ||
> +	    !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst"))
> +		goto out;
> +
> +	memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN);
> +	skel->rodata->ifindex_out = ifindex_src; /* redirect back to the same iface */
> +	skel->rodata->ifindex_in = ifindex_src;
> +	ctx_in.ingress_ifindex = ifindex_src;
> +	tc_hook.ifindex = ifindex_src;
> +
> +	if (!ASSERT_OK(test_xdp_do_redirect__load(skel), "load"))
> +		goto out;
> +
> +	link = bpf_program__attach_xdp(skel->progs.xdp_count_pkts, ifindex_dst);
> +	if (!ASSERT_OK_PTR(link, "prog_attach"))
> +		goto out;
> +	skel->links.xdp_count_pkts = link;
> +
> +	tc_prog_fd = bpf_program__fd(skel->progs.tc_count_pkts);
> +	if (attach_tc_prog(&tc_hook, tc_prog_fd))
> +		goto out;
> +
> +	xdp_prog_fd = bpf_program__fd(skel->progs.xdp_redirect);
> +	err = bpf_prog_test_run_opts(xdp_prog_fd, &opts);
> +	if (!ASSERT_OK(err, "prog_run"))
> +		goto out_tc;
> +
> +	/* wait for the packets to be flushed */
> +	kern_sync_rcu();
> +
> +	/* There will be one packet sent through XDP_REDIRECT and one through
> +	 * XDP_TX; these will show up on the XDP counting program, while the
> +	 * rest will be counted at the TC ingress hook (and the counting program
> +	 * resets the packet payload so they don't get counted twice even though
> +	 * they are re-xmited out the veth device
> +	 */
> +	ASSERT_EQ(skel->bss->pkts_seen_xdp, 2, "pkt_count_xdp");
> +	ASSERT_EQ(skel->bss->pkts_seen_tc, NUM_PKTS - 2, "pkt_count_tc");
> +
> +out_tc:
> +	bpf_tc_hook_destroy(&tc_hook);
> +out:
> +	if (nstoken)
> +		close_netns(nstoken);
> +	system("ip netns del testns");
> +	test_xdp_do_redirect__destroy(skel);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
> new file mode 100644
> index 000000000000..d785f48304ea
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
> @@ -0,0 +1,92 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <vmlinux.h>
> +#include <bpf/bpf_helpers.h>
> +
> +#define ETH_ALEN 6
> +#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
> +const volatile int ifindex_out;
> +const volatile int ifindex_in;
> +const volatile __u8 expect_dst[ETH_ALEN];
> +volatile int pkts_seen_xdp = 0;
> +volatile int pkts_seen_tc = 0;
> +volatile int retcode = XDP_REDIRECT;
> +
> +SEC("xdp")
> +int xdp_redirect(struct xdp_md *xdp)
> +{
> +	__u32 *metadata = (void *)(long)xdp->data_meta;
> +	void *data_end = (void *)(long)xdp->data_end;
> +	void *data = (void *)(long)xdp->data;
> +
> +	__u8 *payload = data + HDR_SZ;
> +	int ret = retcode;
> +
> +	if (payload + 1 > data_end)
> +		return XDP_ABORTED;
> +
> +	if (xdp->ingress_ifindex != ifindex_in)
> +		return XDP_ABORTED;
> +
> +	if (metadata + 1 > data)
> +		return XDP_ABORTED;
> +
> +	if (*metadata != 0x42)
> +		return XDP_ABORTED;
> +
> +	*payload = 0x42;
nit. How about also adding a pkts_seen_zero counter here, like
	if (*payload == 0) {
		*payload = 0x42;
		pkts_seen_zero++;
	}

and add ASSERT_EQ(skel->bss->pkts_seen_zero, 2, "pkt_count_zero")
to the prog_tests.  It can better show the recycled page's data
is not re-initialized.

> +
> +	if (bpf_xdp_adjust_meta(xdp, 4))
> +		return XDP_ABORTED;
> +
> +	if (retcode > XDP_PASS)
> +		retcode--;
> +
> +	if (ret == XDP_REDIRECT)
> +		return bpf_redirect(ifindex_out, 0);
> +
> +	return ret;
> +}
> +
> +static bool check_pkt(void *data, void *data_end)
> +{
> +	struct ipv6hdr *iph = data + sizeof(struct ethhdr);
> +	__u8 *payload = data + HDR_SZ;
> +
> +	if (payload + 1 > data_end)
> +		return false;
> +
> +	if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42)
> +		return false;
> +
> +	/* reset the payload so the same packet doesn't get counted twice when
> +	 * it cycles back through the kernel path and out the dst veth
> +	 */
> +	*payload = 0;
> +	return true;
> +}
> +
> +SEC("xdp")
> +int xdp_count_pkts(struct xdp_md *xdp)
> +{
> +	void *data = (void *)(long)xdp->data;
> +	void *data_end = (void *)(long)xdp->data_end;
> +
> +	if (check_pkt(data, data_end))
> +		pkts_seen_xdp++;
> +
> +	return XDP_DROP;
nit.  A comment here will be useful to explain XDP_DROP from
the xdp@veth@ingress will put the page back to the recycle
pool, which will be similar to xmit-ing out of a real NIC.

> +}
> +
> +SEC("tc")
> +int tc_count_pkts(struct __sk_buff *skb)
> +{
> +	void *data = (void *)(long)skb->data;
> +	void *data_end = (void *)(long)skb->data_end;
> +
> +	if (check_pkt(data, data_end))
> +		pkts_seen_tc++;
> +
> +	return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> -- 
> 2.35.1
>
Toke Høiland-Jørgensen March 8, 2022, 2:41 p.m. UTC | #2
Martin KaFai Lau <kafai@fb.com> writes:

> On Sun, Mar 06, 2022 at 11:34:04PM +0100, Toke Høiland-Jørgensen wrote:
>
>> +#define NUM_PKTS 1000000
> It took my qemu 30s to run.
> Would it have the same test coverage by lowering it to something
> like 10000  ?

Yikes! Sure, that should be fine I think!

>> +void test_xdp_do_redirect(void)
>> +{
>> +	int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
>> +	char data[sizeof(pkt_udp) + sizeof(__u32)];
>> +	struct test_xdp_do_redirect *skel = NULL;
>> +	struct nstoken *nstoken = NULL;
>> +	struct bpf_link *link;
>> +
>> +	struct xdp_md ctx_in = { .data = sizeof(__u32),
>> +				 .data_end = sizeof(data) };
>> +	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
>> +			    .data_in = &data,
>> +			    .data_size_in = sizeof(data),
>> +			    .ctx_in = &ctx_in,
>> +			    .ctx_size_in = sizeof(ctx_in),
>> +			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
>> +			    .repeat = NUM_PKTS,
>> +			    .batch_size = 64,
>> +		);
>> +	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
>> +			    .attach_point = BPF_TC_INGRESS);
>> +
>> +	memcpy(&data[sizeof(__u32)], &pkt_udp, sizeof(pkt_udp));
>> +	*((__u32 *)data) = 0x42; /* metadata test value */
>> +
>> +	skel = test_xdp_do_redirect__open();
>> +	if (!ASSERT_OK_PTR(skel, "skel"))
>> +		return;
>> +
>> +	/* The XDP program we run with bpf_prog_run() will cycle through all
>> +	 * three xmit (PASS/TX/REDIRECT) return codes starting from above, and
>> +	 * ending up with PASS, so we should end up with two packets on the dst
>> +	 * iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP
>> +	 * payload.
>> +	 */
>> +	SYS("ip netns add testns");
>> +	nstoken = open_netns("testns");
>> +	if (!ASSERT_OK_PTR(nstoken, "setns"))
>> +		goto out;
>> +
>> +	SYS("ip link add veth_src type veth peer name veth_dst");
>> +	SYS("ip link set dev veth_src address 00:11:22:33:44:55");
>> +	SYS("ip link set dev veth_dst address 66:77:88:99:aa:bb");
>> +	SYS("ip link set dev veth_src up");
>> +	SYS("ip link set dev veth_dst up");
>> +	SYS("ip addr add dev veth_src fc00::1/64");
>> +	SYS("ip addr add dev veth_dst fc00::2/64");
>> +	SYS("ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb");
>> +
>> +	/* We enable forwarding in the test namespace because that will cause
>> +	 * the packets that go through the kernel stack (with XDP_PASS) to be
>> +	 * forwarded back out the same interface (because of the packet dst
>> +	 * combined with the interface addresses). When this happens, the
>> +	 * regular forwarding path will end up going through the same
>> +	 * veth_xdp_xmit() call as the XDP_REDIRECT code, which can cause a
>> +	 * deadlock if it happens on the same CPU. There's a local_bh_disable()
>> +	 * in the test_run code to prevent this, but an earlier version of the
>> +	 * code didn't have this, so we keep the test behaviour to make sure the
>> +	 * bug doesn't resurface.
>> +	 */
>> +	SYS("sysctl -qw net.ipv6.conf.all.forwarding=1");
>> +
>> +	ifindex_src = if_nametoindex("veth_src");
>> +	ifindex_dst = if_nametoindex("veth_dst");
>> +	if (!ASSERT_NEQ(ifindex_src, 0, "ifindex_src") ||
>> +	    !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst"))
>> +		goto out;
>> +
>> +	memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN);
>> +	skel->rodata->ifindex_out = ifindex_src; /* redirect back to the same iface */
>> +	skel->rodata->ifindex_in = ifindex_src;
>> +	ctx_in.ingress_ifindex = ifindex_src;
>> +	tc_hook.ifindex = ifindex_src;
>> +
>> +	if (!ASSERT_OK(test_xdp_do_redirect__load(skel), "load"))
>> +		goto out;
>> +
>> +	link = bpf_program__attach_xdp(skel->progs.xdp_count_pkts, ifindex_dst);
>> +	if (!ASSERT_OK_PTR(link, "prog_attach"))
>> +		goto out;
>> +	skel->links.xdp_count_pkts = link;
>> +
>> +	tc_prog_fd = bpf_program__fd(skel->progs.tc_count_pkts);
>> +	if (attach_tc_prog(&tc_hook, tc_prog_fd))
>> +		goto out;
>> +
>> +	xdp_prog_fd = bpf_program__fd(skel->progs.xdp_redirect);
>> +	err = bpf_prog_test_run_opts(xdp_prog_fd, &opts);
>> +	if (!ASSERT_OK(err, "prog_run"))
>> +		goto out_tc;
>> +
>> +	/* wait for the packets to be flushed */
>> +	kern_sync_rcu();
>> +
>> +	/* There will be one packet sent through XDP_REDIRECT and one through
>> +	 * XDP_TX; these will show up on the XDP counting program, while the
>> +	 * rest will be counted at the TC ingress hook (and the counting program
>> +	 * resets the packet payload so they don't get counted twice even though
>> +	 * they are re-xmited out the veth device
>> +	 */
>> +	ASSERT_EQ(skel->bss->pkts_seen_xdp, 2, "pkt_count_xdp");
>> +	ASSERT_EQ(skel->bss->pkts_seen_tc, NUM_PKTS - 2, "pkt_count_tc");
>> +
>> +out_tc:
>> +	bpf_tc_hook_destroy(&tc_hook);
>> +out:
>> +	if (nstoken)
>> +		close_netns(nstoken);
>> +	system("ip netns del testns");
>> +	test_xdp_do_redirect__destroy(skel);
>> +}
>> diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
>> new file mode 100644
>> index 000000000000..d785f48304ea
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
>> @@ -0,0 +1,92 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +#include <vmlinux.h>
>> +#include <bpf/bpf_helpers.h>
>> +
>> +#define ETH_ALEN 6
>> +#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
>> +const volatile int ifindex_out;
>> +const volatile int ifindex_in;
>> +const volatile __u8 expect_dst[ETH_ALEN];
>> +volatile int pkts_seen_xdp = 0;
>> +volatile int pkts_seen_tc = 0;
>> +volatile int retcode = XDP_REDIRECT;
>> +
>> +SEC("xdp")
>> +int xdp_redirect(struct xdp_md *xdp)
>> +{
>> +	__u32 *metadata = (void *)(long)xdp->data_meta;
>> +	void *data_end = (void *)(long)xdp->data_end;
>> +	void *data = (void *)(long)xdp->data;
>> +
>> +	__u8 *payload = data + HDR_SZ;
>> +	int ret = retcode;
>> +
>> +	if (payload + 1 > data_end)
>> +		return XDP_ABORTED;
>> +
>> +	if (xdp->ingress_ifindex != ifindex_in)
>> +		return XDP_ABORTED;
>> +
>> +	if (metadata + 1 > data)
>> +		return XDP_ABORTED;
>> +
>> +	if (*metadata != 0x42)
>> +		return XDP_ABORTED;
>> +
>> +	*payload = 0x42;
> nit. How about also adding a pkts_seen_zero counter here, like
> 	if (*payload == 0) {
> 		*payload = 0x42;
> 		pkts_seen_zero++;
> 	}
>
> and add ASSERT_EQ(skel->bss->pkts_seen_zero, 2, "pkt_count_zero")
> to the prog_tests.  It can better show the recycled page's data
> is not re-initialized.

Good idea, will add!

>> +
>> +	if (bpf_xdp_adjust_meta(xdp, 4))
>> +		return XDP_ABORTED;
>> +
>> +	if (retcode > XDP_PASS)
>> +		retcode--;
>> +
>> +	if (ret == XDP_REDIRECT)
>> +		return bpf_redirect(ifindex_out, 0);
>> +
>> +	return ret;
>> +}
>> +
>> +static bool check_pkt(void *data, void *data_end)
>> +{
>> +	struct ipv6hdr *iph = data + sizeof(struct ethhdr);
>> +	__u8 *payload = data + HDR_SZ;
>> +
>> +	if (payload + 1 > data_end)
>> +		return false;
>> +
>> +	if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42)
>> +		return false;
>> +
>> +	/* reset the payload so the same packet doesn't get counted twice when
>> +	 * it cycles back through the kernel path and out the dst veth
>> +	 */
>> +	*payload = 0;
>> +	return true;
>> +}
>> +
>> +SEC("xdp")
>> +int xdp_count_pkts(struct xdp_md *xdp)
>> +{
>> +	void *data = (void *)(long)xdp->data;
>> +	void *data_end = (void *)(long)xdp->data_end;
>> +
>> +	if (check_pkt(data, data_end))
>> +		pkts_seen_xdp++;
>> +
>> +	return XDP_DROP;
> nit.  A comment here will be useful to explain XDP_DROP from
> the xdp@veth@ingress will put the page back to the recycle
> pool, which will be similar to xmit-ing out of a real NIC.

Sure, can do.

-Toke
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
new file mode 100644
index 000000000000..9024bb24c204
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -0,0 +1,176 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <net/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/udp.h>
+#include <bpf/bpf_endian.h>
+#include "test_xdp_do_redirect.skel.h"
+
+#define SYS(fmt, ...)						\
+	({							\
+		char cmd[1024];					\
+		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
+		if (!ASSERT_OK(system(cmd), cmd))		\
+			goto out;				\
+	})
+
+struct udp_packet {
+	struct ethhdr eth;
+	struct ipv6hdr iph;
+	struct udphdr udp;
+	__u8 payload[64 - sizeof(struct udphdr)
+		     - sizeof(struct ethhdr) - sizeof(struct ipv6hdr)];
+} __packed;
+
+static struct udp_packet pkt_udp = {
+	.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+	.eth.h_dest = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55},
+	.eth.h_source = {0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb},
+	.iph.version = 6,
+	.iph.nexthdr = IPPROTO_UDP,
+	.iph.payload_len = bpf_htons(sizeof(struct udp_packet)
+				     - offsetof(struct udp_packet, udp)),
+	.iph.hop_limit = 2,
+	.iph.saddr.s6_addr16 = {bpf_htons(0xfc00), 0, 0, 0, 0, 0, 0, bpf_htons(1)},
+	.iph.daddr.s6_addr16 = {bpf_htons(0xfc00), 0, 0, 0, 0, 0, 0, bpf_htons(2)},
+	.udp.source = bpf_htons(1),
+	.udp.dest = bpf_htons(1),
+	.udp.len = bpf_htons(sizeof(struct udp_packet)
+			     - offsetof(struct udp_packet, udp)),
+	.payload = {0x42}, /* receiver XDP program matches on this */
+};
+
+static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
+{
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, .prog_fd = fd);
+	int ret;
+
+	ret = bpf_tc_hook_create(hook);
+	if (!ASSERT_OK(ret, "create tc hook"))
+		return ret;
+
+	ret = bpf_tc_attach(hook, &opts);
+	if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+		bpf_tc_hook_destroy(hook);
+		return ret;
+	}
+
+	return 0;
+}
+
+#define NUM_PKTS 1000000
+void test_xdp_do_redirect(void)
+{
+	int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
+	char data[sizeof(pkt_udp) + sizeof(__u32)];
+	struct test_xdp_do_redirect *skel = NULL;
+	struct nstoken *nstoken = NULL;
+	struct bpf_link *link;
+
+	struct xdp_md ctx_in = { .data = sizeof(__u32),
+				 .data_end = sizeof(data) };
+	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+			    .data_in = &data,
+			    .data_size_in = sizeof(data),
+			    .ctx_in = &ctx_in,
+			    .ctx_size_in = sizeof(ctx_in),
+			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
+			    .repeat = NUM_PKTS,
+			    .batch_size = 64,
+		);
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+			    .attach_point = BPF_TC_INGRESS);
+
+	memcpy(&data[sizeof(__u32)], &pkt_udp, sizeof(pkt_udp));
+	*((__u32 *)data) = 0x42; /* metadata test value */
+
+	skel = test_xdp_do_redirect__open();
+	if (!ASSERT_OK_PTR(skel, "skel"))
+		return;
+
+	/* The XDP program we run with bpf_prog_run() will cycle through all
+	 * three xmit (PASS/TX/REDIRECT) return codes starting from above, and
+	 * ending up with PASS, so we should end up with two packets on the dst
+	 * iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP
+	 * payload.
+	 */
+	SYS("ip netns add testns");
+	nstoken = open_netns("testns");
+	if (!ASSERT_OK_PTR(nstoken, "setns"))
+		goto out;
+
+	SYS("ip link add veth_src type veth peer name veth_dst");
+	SYS("ip link set dev veth_src address 00:11:22:33:44:55");
+	SYS("ip link set dev veth_dst address 66:77:88:99:aa:bb");
+	SYS("ip link set dev veth_src up");
+	SYS("ip link set dev veth_dst up");
+	SYS("ip addr add dev veth_src fc00::1/64");
+	SYS("ip addr add dev veth_dst fc00::2/64");
+	SYS("ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb");
+
+	/* We enable forwarding in the test namespace because that will cause
+	 * the packets that go through the kernel stack (with XDP_PASS) to be
+	 * forwarded back out the same interface (because of the packet dst
+	 * combined with the interface addresses). When this happens, the
+	 * regular forwarding path will end up going through the same
+	 * veth_xdp_xmit() call as the XDP_REDIRECT code, which can cause a
+	 * deadlock if it happens on the same CPU. There's a local_bh_disable()
+	 * in the test_run code to prevent this, but an earlier version of the
+	 * code didn't have this, so we keep the test behaviour to make sure the
+	 * bug doesn't resurface.
+	 */
+	SYS("sysctl -qw net.ipv6.conf.all.forwarding=1");
+
+	ifindex_src = if_nametoindex("veth_src");
+	ifindex_dst = if_nametoindex("veth_dst");
+	if (!ASSERT_NEQ(ifindex_src, 0, "ifindex_src") ||
+	    !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst"))
+		goto out;
+
+	memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN);
+	skel->rodata->ifindex_out = ifindex_src; /* redirect back to the same iface */
+	skel->rodata->ifindex_in = ifindex_src;
+	ctx_in.ingress_ifindex = ifindex_src;
+	tc_hook.ifindex = ifindex_src;
+
+	if (!ASSERT_OK(test_xdp_do_redirect__load(skel), "load"))
+		goto out;
+
+	link = bpf_program__attach_xdp(skel->progs.xdp_count_pkts, ifindex_dst);
+	if (!ASSERT_OK_PTR(link, "prog_attach"))
+		goto out;
+	skel->links.xdp_count_pkts = link;
+
+	tc_prog_fd = bpf_program__fd(skel->progs.tc_count_pkts);
+	if (attach_tc_prog(&tc_hook, tc_prog_fd))
+		goto out;
+
+	xdp_prog_fd = bpf_program__fd(skel->progs.xdp_redirect);
+	err = bpf_prog_test_run_opts(xdp_prog_fd, &opts);
+	if (!ASSERT_OK(err, "prog_run"))
+		goto out_tc;
+
+	/* wait for the packets to be flushed */
+	kern_sync_rcu();
+
+	/* There will be one packet sent through XDP_REDIRECT and one through
+	 * XDP_TX; these will show up on the XDP counting program, while the
+	 * rest will be counted at the TC ingress hook (and the counting program
+	 * resets the packet payload so they don't get counted twice even though
+	 * they are re-xmited out the veth device
+	 */
+	ASSERT_EQ(skel->bss->pkts_seen_xdp, 2, "pkt_count_xdp");
+	ASSERT_EQ(skel->bss->pkts_seen_tc, NUM_PKTS - 2, "pkt_count_tc");
+
+out_tc:
+	bpf_tc_hook_destroy(&tc_hook);
+out:
+	if (nstoken)
+		close_netns(nstoken);
+	system("ip netns del testns");
+	test_xdp_do_redirect__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
new file mode 100644
index 000000000000..d785f48304ea
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
@@ -0,0 +1,92 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define ETH_ALEN 6
+#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
+const volatile int ifindex_out;
+const volatile int ifindex_in;
+const volatile __u8 expect_dst[ETH_ALEN];
+volatile int pkts_seen_xdp = 0;
+volatile int pkts_seen_tc = 0;
+volatile int retcode = XDP_REDIRECT;
+
+SEC("xdp")
+int xdp_redirect(struct xdp_md *xdp)
+{
+	__u32 *metadata = (void *)(long)xdp->data_meta;
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+
+	__u8 *payload = data + HDR_SZ;
+	int ret = retcode;
+
+	if (payload + 1 > data_end)
+		return XDP_ABORTED;
+
+	if (xdp->ingress_ifindex != ifindex_in)
+		return XDP_ABORTED;
+
+	if (metadata + 1 > data)
+		return XDP_ABORTED;
+
+	if (*metadata != 0x42)
+		return XDP_ABORTED;
+
+	*payload = 0x42;
+
+	if (bpf_xdp_adjust_meta(xdp, 4))
+		return XDP_ABORTED;
+
+	if (retcode > XDP_PASS)
+		retcode--;
+
+	if (ret == XDP_REDIRECT)
+		return bpf_redirect(ifindex_out, 0);
+
+	return ret;
+}
+
+static bool check_pkt(void *data, void *data_end)
+{
+	struct ipv6hdr *iph = data + sizeof(struct ethhdr);
+	__u8 *payload = data + HDR_SZ;
+
+	if (payload + 1 > data_end)
+		return false;
+
+	if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42)
+		return false;
+
+	/* reset the payload so the same packet doesn't get counted twice when
+	 * it cycles back through the kernel path and out the dst veth
+	 */
+	*payload = 0;
+	return true;
+}
+
+SEC("xdp")
+int xdp_count_pkts(struct xdp_md *xdp)
+{
+	void *data = (void *)(long)xdp->data;
+	void *data_end = (void *)(long)xdp->data_end;
+
+	if (check_pkt(data, data_end))
+		pkts_seen_xdp++;
+
+	return XDP_DROP;
+}
+
+SEC("tc")
+int tc_count_pkts(struct __sk_buff *skb)
+{
+	void *data = (void *)(long)skb->data;
+	void *data_end = (void *)(long)skb->data_end;
+
+	if (check_pkt(data, data_end))
+		pkts_seen_tc++;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";