diff mbox series

[RFC,bpf-next,RESEND,16/16] selftests/crib: Add test for dumping/restoring UDP socket packets

Message ID AM6PR03MB5848FD102CAF71CC35E175FD99A52@AM6PR03MB5848.eurprd03.prod.outlook.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series bpf: Checkpoint/Restore In eBPF (CRIB) | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-7 pending Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 pending Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 fail Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 fail Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 fail Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 fail Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 fail Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
netdev/series_format fail Series longer than 15 patches
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 816 this patch: 816
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 2 maintainers not CCed: linux-kselftest@vger.kernel.org shuah@kernel.org
netdev/build_clang success Errors and warnings before: 821 this patch: 821
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 826 this patch: 826
netdev/checkpatch warning CHECK: Alignment should match open parenthesis WARNING: Improper SPDX comment style for 'tools/testing/selftests/crib/test_restore_udp_socket.h', please use '/*' instead WARNING: Missing a blank line after declarations WARNING: Missing or malformed SPDX-License-Identifier tag in line 1 WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: externs should be avoided in .c files WARNING: line length of 100 exceeds 80 columns WARNING: line length of 103 exceeds 80 columns WARNING: line length of 104 exceeds 80 columns WARNING: line length of 105 exceeds 80 columns WARNING: line length of 108 exceeds 80 columns WARNING: line length of 110 exceeds 80 columns WARNING: line length of 112 exceeds 80 columns WARNING: line length of 113 exceeds 80 columns WARNING: line length of 114 exceeds 80 columns WARNING: line length of 115 exceeds 80 columns WARNING: line length of 116 exceeds 80 columns WARNING: line length of 118 exceeds 80 columns WARNING: line length of 119 exceeds 80 columns WARNING: line length of 147 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns WARNING: line length of 97 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns WARNING: line length of 99 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Juntong Deng July 11, 2024, 11:19 a.m. UTC
In this test, UDP socket packets are dumped/restored through CRIB,
including write queue and receive queue (and reader queue).

A "checkpoint socket" and a "restore socket" are created,
the write/receive queue packets of the "checkpoint socket" are
dumped and restored to the "restore socket", and after that
the "restore socket" will be checked to see if it can normally
receive and send the packets that were restored to the queue.

Write queue packets are not restored through the CRIB ebpf program
in this test, because it is not wise to rewrite the entire UDP
send process. Using regular send() is a better choice.

Signed-off-by: Juntong Deng <juntong.deng@outlook.com>
---
 .../crib/test_restore_udp_socket.bpf.c        | 311 ++++++++++++++++
 .../selftests/crib/test_restore_udp_socket.c  | 333 ++++++++++++++++++
 .../selftests/crib/test_restore_udp_socket.h  |  51 +++
 3 files changed, 695 insertions(+)
 create mode 100644 tools/testing/selftests/crib/test_restore_udp_socket.bpf.c
 create mode 100644 tools/testing/selftests/crib/test_restore_udp_socket.c
 create mode 100644 tools/testing/selftests/crib/test_restore_udp_socket.h
diff mbox series

Patch

diff --git a/tools/testing/selftests/crib/test_restore_udp_socket.bpf.c b/tools/testing/selftests/crib/test_restore_udp_socket.bpf.c
new file mode 100644
index 000000000000..527ee6d72256
--- /dev/null
+++ b/tools/testing/selftests/crib/test_restore_udp_socket.bpf.c
@@ -0,0 +1,311 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author:
+ *	Juntong Deng <juntong.deng@outlook.com>
+ */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#include "test_restore_udp_socket.h"
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 100000);
+} rb SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_USER_RINGBUF);
+	__uint(max_entries, 100000);
+} urb SEC(".maps");
+
+extern struct task_struct *bpf_task_from_vpid(pid_t vpid) __ksym;
+extern void bpf_task_release(struct task_struct *p) __ksym;
+
+extern struct sock *bpf_sock_from_task_fd(struct task_struct *task, int fd) __ksym;
+extern void bpf_sock_release(struct sock *sk) __ksym;
+
+extern struct udp_sock *bpf_udp_sock_from_sock(struct sock *sk) __ksym;
+extern struct sk_buff_head *bpf_receive_queue_from_sock(struct sock *sk)  __ksym;
+extern struct sk_buff_head *bpf_write_queue_from_sock(struct sock *sk) __ksym;
+extern struct sk_buff_head *bpf_reader_queue_from_udp_sock(struct udp_sock *up) __ksym;
+
+extern int bpf_iter_skb_new(struct bpf_iter_skb *it, struct sk_buff_head *head) __ksym;
+extern struct sk_buff *bpf_iter_skb_next(struct bpf_iter_skb *it) __ksym;
+extern void bpf_iter_skb_destroy(struct bpf_iter_skb *it) __ksym;
+
+extern int bpf_iter_skb_data_new(struct bpf_iter_skb_data *it, struct sk_buff *skb, char *buf, int buflen) __ksym;
+extern char *bpf_iter_skb_data_next(struct bpf_iter_skb_data *it) __ksym;
+extern void bpf_iter_skb_data_set_buf(struct bpf_iter_skb_data *it, char *buf, int buflen) __ksym;
+extern int bpf_iter_skb_data_get_chunk_len(struct bpf_iter_skb_data *it) __ksym;
+extern int bpf_iter_skb_data_get_offset(struct bpf_iter_skb_data *it) __ksym;
+extern void bpf_iter_skb_data_destroy(struct bpf_iter_skb_data *it) __ksym;
+
+extern int bpf_cal_skb_size(struct sk_buff *skb) __ksym;
+extern struct sk_buff *bpf_skb_peek_tail(struct sk_buff_head *head) __ksym;
+extern void bpf_skb_release(struct sk_buff *skb) __ksym;
+
+extern struct sk_buff *bpf_restore_skb_rcv_queue(struct sk_buff_head *head, struct sock *sk,
+						 struct bpf_crib_skb_info *skb_info) __ksym;
+extern int bpf_restore_skb_data(struct sk_buff *skb, int offset, char *data, int len) __ksym;
+
+static int dump_skb_data(struct sk_buff *skb, int subtype, int skb_num)
+{
+	struct bpf_iter_skb_data skb_data_it;
+	int err = 0;
+
+	/*
+	 * Since bpf_iter_skb_data_next will dump the skb data into the buffer,
+	 * the buffer needs to be allocated in advance
+	 */
+	struct event_skb_data *e_skb_data;
+	e_skb_data = bpf_ringbuf_reserve(&rb, sizeof(struct event_skb_data), 0);
+	if (!e_skb_data) {
+		err = -2;
+		goto error_buf;
+	}
+
+	bpf_iter_skb_data_new(&skb_data_it, skb, e_skb_data->buf, sizeof(e_skb_data->buf));
+	while (bpf_iter_skb_data_next(&skb_data_it)) {
+		e_skb_data->hdr.type = EVENT_TYPE_SKB_DATA;
+		e_skb_data->hdr.subtype = subtype;
+		e_skb_data->skb_num = skb_num;
+		e_skb_data->chunk_length = bpf_iter_skb_data_get_chunk_len(&skb_data_it);
+		e_skb_data->offset = bpf_iter_skb_data_get_offset(&skb_data_it);
+		bpf_ringbuf_submit(e_skb_data, 0);
+
+		/*
+		 * For the same reason as above, the buffer used in
+		 * the next iteration needs to be allocated now
+		 */
+		e_skb_data = bpf_ringbuf_reserve(&rb, sizeof(struct event_skb_data), 0);
+		if (!e_skb_data) {
+			err = -2;
+			goto error_in_buf;
+		}
+
+		bpf_iter_skb_data_set_buf(&skb_data_it, e_skb_data->buf, sizeof(e_skb_data->buf));
+	}
+	/* Discard the pre-allocated buffer in the last iteration (it will not be used) */
+	bpf_ringbuf_discard(e_skb_data, 0);
+
+error_in_buf:
+	bpf_iter_skb_data_destroy(&skb_data_it);
+error_buf:
+	return err;
+}
+
+static int dump_all_queue_skb(struct sk_buff_head *head, int subtype)
+{
+	struct bpf_iter_skb skb_it;
+	struct sk_buff *cur_skb;
+	int skb_num = 0;
+	int err = 0;
+
+	bpf_iter_skb_new(&skb_it, head);
+	while ((cur_skb = bpf_iter_skb_next(&skb_it))) {
+		struct event_skb *e_skb = bpf_ringbuf_reserve(&rb, sizeof(struct event_skb), 0);
+		if (!e_skb) {
+			err = -2;
+			goto error;
+		}
+
+		e_skb->hdr.type = EVENT_TYPE_SKB;
+		e_skb->hdr.subtype = subtype;
+		e_skb->skb_num = skb_num;
+		e_skb->len = BPF_CORE_READ(cur_skb, len);
+		e_skb->tstamp = BPF_CORE_READ(cur_skb, tstamp);
+		e_skb->dev_scratch = BPF_CORE_READ(cur_skb, dev_scratch);
+		e_skb->protocol = BPF_CORE_READ(cur_skb, protocol);
+		e_skb->transport_header = BPF_CORE_READ(cur_skb, transport_header);
+		e_skb->network_header = BPF_CORE_READ(cur_skb, network_header);
+		e_skb->mac_header = BPF_CORE_READ(cur_skb, mac_header);
+		e_skb->csum = BPF_CORE_READ(cur_skb, csum);
+		e_skb->csum = BPF_CORE_READ(cur_skb, csum);
+		e_skb->size = bpf_cal_skb_size(cur_skb);
+
+		unsigned char *head = BPF_CORE_READ(cur_skb, head);
+		unsigned char *data = BPF_CORE_READ(cur_skb, data);
+		e_skb->headerlen = data - head; //skb_headroom
+
+		bpf_ringbuf_submit(e_skb, 0);
+
+		if (dump_skb_data(cur_skb, subtype, skb_num) != 0) {
+			err = -1;
+			goto error;
+		}
+
+		skb_num++;
+	}
+error:
+	bpf_iter_skb_destroy(&skb_it);
+	return err;
+}
+
+int dump_write_queue_skb(struct sock *sk)
+{
+	struct sk_buff_head *write_queue_head = bpf_write_queue_from_sock(sk);
+	return dump_all_queue_skb(write_queue_head, EVENT_SUBTYPE_WRITE_QUEUE);
+}
+
+int dump_receive_queue_skb(struct sock *sk)
+{
+	struct sk_buff_head *receive_queue_head = bpf_receive_queue_from_sock(sk);
+	return dump_all_queue_skb(receive_queue_head, EVENT_SUBTYPE_RECEIVE_QUEUE);
+}
+
+int dump_reader_queue_skb(struct sock *sk)
+{
+	struct udp_sock *up = bpf_udp_sock_from_sock(sk);
+	struct sk_buff_head *reader_queue_head = bpf_reader_queue_from_udp_sock(up);
+	return dump_all_queue_skb(reader_queue_head, EVENT_SUBTYPE_READER_QUEUE);
+}
+
+SEC("crib")
+int dump_socket_queue(struct prog_args *arg)
+{
+	int err = 0;
+
+	struct task_struct *task = bpf_task_from_vpid(arg->pid);
+	if (!task) {
+		err = -1;
+		goto error;
+	}
+
+	struct sock *sk = bpf_sock_from_task_fd(task, arg->fd);
+	if (!sk) {
+		err = -1;
+		goto error_sock;
+	}
+
+	dump_write_queue_skb(sk);
+	dump_receive_queue_skb(sk);
+	dump_reader_queue_skb(sk);
+
+	struct event_hdr *e_dump_end = bpf_ringbuf_reserve(&rb, sizeof(struct event_hdr), 0);
+	if (!e_dump_end) {
+		err = -2;
+		goto error_buf;
+	}
+
+	e_dump_end->type = EVENT_TYPE_END;
+	bpf_ringbuf_submit(e_dump_end, 0);
+
+error_buf:
+	bpf_sock_release(sk);
+error_sock:
+	bpf_task_release(task);
+error:
+	return err;
+}
+
+static int handle_restore_skb_data(struct event_skb_data *e_skb_data, struct sk_buff_head *head)
+{
+	struct sk_buff *skb = bpf_skb_peek_tail(head);
+	if (!skb)
+		return -1;
+
+	bpf_restore_skb_data(skb, e_skb_data->offset, e_skb_data->buf, e_skb_data->chunk_length);
+
+	bpf_skb_release(skb);
+	return 0;
+}
+
+static int handle_restore_skb(struct event_skb *e_skb, struct sk_buff_head *head, struct sock *sk)
+{
+	struct bpf_crib_skb_info skb_info;
+	skb_info.headerlen = e_skb->headerlen;
+	skb_info.len = e_skb->len;
+	skb_info.size = e_skb->size;
+	skb_info.tstamp = e_skb->tstamp;
+	skb_info.dev_scratch = e_skb->dev_scratch;
+	skb_info.protocol = e_skb->protocol;
+	skb_info.csum = e_skb->csum;
+	skb_info.transport_header = e_skb->transport_header;
+	skb_info.network_header = e_skb->network_header;
+	skb_info.mac_header = e_skb->mac_header;
+
+	struct sk_buff *skb = bpf_restore_skb_rcv_queue(head, sk, &skb_info);
+	if (!skb)
+		return -1;
+
+	bpf_skb_release(skb);
+	return 0;
+}
+
+static long handle_restore_event(struct bpf_dynptr *dynptr, void *context)
+{
+	struct prog_args *arg_context = (struct prog_args *)context;
+	int err = 0;
+
+	struct task_struct *task = bpf_task_from_vpid(arg_context->pid);
+	if (!task) {
+		err = 1;
+		goto error;
+	}
+
+	struct sock *sk = bpf_sock_from_task_fd(task, arg_context->fd);
+	if (!sk) {
+		err = 1;
+		goto error_sock;
+	}
+
+	struct udp_sock *up = bpf_udp_sock_from_sock(sk);
+
+	struct sk_buff_head *reader_queue = bpf_reader_queue_from_udp_sock(up);
+	struct sk_buff_head *receive_queue = bpf_receive_queue_from_sock(sk);
+
+	struct event_hdr *e_hdr = bpf_dynptr_data(dynptr, 0, sizeof(struct event_hdr));
+	if (!e_hdr) {
+		err = 1;
+		goto error_dynptr;
+	}
+
+	if (e_hdr->type == EVENT_TYPE_SKB) {
+		struct event_skb *e_skb = bpf_dynptr_data(dynptr, 0, sizeof(struct event_skb));
+		if (!e_skb) {
+			err = 1;
+			goto error_dynptr;
+		}
+
+		if (e_hdr->subtype == EVENT_SUBTYPE_RECEIVE_QUEUE)
+			handle_restore_skb(e_skb, receive_queue, sk);
+		else if (e_hdr->subtype == EVENT_SUBTYPE_READER_QUEUE)
+			handle_restore_skb(e_skb, reader_queue, sk);
+	} else if (e_hdr->type == EVENT_TYPE_SKB_DATA) {
+		struct event_skb_data *e_skb_data = bpf_dynptr_data(dynptr, 0, sizeof(struct event_skb_data));
+		if (!e_skb_data) {
+			err = 1;
+			goto error_dynptr;
+		}
+
+		if (e_hdr->subtype == EVENT_SUBTYPE_RECEIVE_QUEUE)
+			handle_restore_skb_data(e_skb_data, receive_queue);
+		else if (e_hdr->subtype == EVENT_SUBTYPE_READER_QUEUE)
+			handle_restore_skb_data(e_skb_data, reader_queue);
+	}
+
+error_dynptr:
+	bpf_sock_release(sk);
+error_sock:
+	bpf_task_release(task);
+error:
+	return err;
+}
+
+SEC("crib")
+int restore_socket_queue(struct prog_args *arg)
+{
+	struct prog_args arg_context = {
+		.fd = arg->fd,
+		.pid = arg->pid
+	};
+
+	bpf_user_ringbuf_drain(&urb, handle_restore_event, &arg_context, 0);
+	return 0;
+}
diff --git a/tools/testing/selftests/crib/test_restore_udp_socket.c b/tools/testing/selftests/crib/test_restore_udp_socket.c
new file mode 100644
index 000000000000..f986ff4dfc49
--- /dev/null
+++ b/tools/testing/selftests/crib/test_restore_udp_socket.c
@@ -0,0 +1,333 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author:
+ *	Juntong Deng <juntong.deng@outlook.com>
+ */
+
+#include <argp.h>
+#include <stdio.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <netinet/in.h>
+#include <netinet/udp.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <errno.h>
+#include <netdb.h>
+#include <linux/if_packet.h>
+#include <net/ethernet.h>
+#include <linux/netlink.h>
+#include <asm/types.h>
+
+#include "../kselftest_harness.h"
+
+#include "test_restore_udp_socket.h"
+#include "test_restore_udp_socket.bpf.skel.h"
+
+static int sockfd_checkpoint;
+static int sockfd_restore;
+static int sockfd_client;
+static int sockfd_server;
+
+static int dump_socket_queue_fd;
+static int restore_socket_queue_fd;
+
+static struct ring_buffer *rb;
+static struct user_ring_buffer *urb;
+
+char buffer_send1[1000], buffer_send2[1000];
+char buffer_recv1[1000], buffer_recv2[1000];
+
+static int last_skb_num = -1;
+static int last_skb_transport_header;
+
+static int handle_dump_end_event(void)
+{
+	struct prog_args arg_restore = {
+		.pid = getpid(),
+		.fd = sockfd_restore
+	};
+
+	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+		.ctx_in = &arg_restore,
+		.ctx_size_in = sizeof(arg_restore),
+	);
+
+	int err = bpf_prog_test_run_opts(restore_socket_queue_fd, &opts);
+	return err;
+}
+
+static int handle_dump_skb_data_event(struct event_skb_data *e_skb_data)
+{
+	if (e_skb_data->hdr.subtype == EVENT_SUBTYPE_WRITE_QUEUE) {
+		if (last_skb_num != e_skb_data->skb_num) {
+			send(sockfd_restore, e_skb_data->buf + last_skb_transport_header + 8,
+				e_skb_data->chunk_length - last_skb_transport_header - 8, 0);
+			last_skb_num = e_skb_data->skb_num;
+		} else {
+			send(sockfd_restore, e_skb_data->buf, e_skb_data->chunk_length, 0);
+		}
+	} else {
+		struct event_skb_data *e_restore_skb_data = (struct event_skb_data *)user_ring_buffer__reserve(urb, sizeof(struct event_skb_data));
+		if (!e_restore_skb_data) {
+			printf("user_ring_buffer__reserve error\n");
+			return -2;
+		}
+
+		e_restore_skb_data->hdr.type = EVENT_TYPE_SKB_DATA;
+		e_restore_skb_data->hdr.subtype = e_skb_data->hdr.subtype;
+		e_restore_skb_data->skb_num = e_skb_data->skb_num;
+		e_restore_skb_data->chunk_length = e_skb_data->chunk_length;
+		e_restore_skb_data->offset = e_skb_data->offset;
+		memcpy(e_restore_skb_data->buf, e_skb_data->buf, e_skb_data->chunk_length);
+
+		user_ring_buffer__submit(urb, e_restore_skb_data);
+	}
+	return 0;
+}
+
+static int handle_dump_skb_event(struct event_skb *e_skb)
+{
+	if (e_skb->hdr.subtype == EVENT_SUBTYPE_WRITE_QUEUE) {
+		last_skb_transport_header = e_skb->transport_header;
+		return 0;
+	}
+
+	struct event_skb *e_restore_skb = (struct event_skb *)user_ring_buffer__reserve(urb, sizeof(struct event_skb));
+	if (!e_restore_skb) {
+		printf("user_ring_buffer__reserve error\n");
+		return -2;
+	}
+
+	e_restore_skb->hdr.type = EVENT_TYPE_SKB;
+	e_restore_skb->hdr.subtype = e_skb->hdr.subtype;
+	e_restore_skb->skb_num = e_skb->skb_num;
+	e_restore_skb->len = e_skb->len;
+	e_restore_skb->headerlen = e_skb->headerlen;
+	e_restore_skb->size = e_skb->size;
+	e_restore_skb->tstamp = e_skb->tstamp;
+	e_restore_skb->dev_scratch = e_skb->dev_scratch;
+	e_restore_skb->protocol = e_skb->protocol;
+	e_restore_skb->csum = e_skb->csum;
+	e_restore_skb->transport_header = e_skb->transport_header;
+	e_restore_skb->network_header = e_skb->network_header;
+	e_restore_skb->mac_header = e_skb->mac_header;
+
+	user_ring_buffer__submit(urb, e_restore_skb);
+	return 0;
+}
+
+static int handle_event(void *ctx, void *data, size_t data_sz)
+{
+	const struct event_hdr *e_hdr = data;
+	int err = 0;
+
+	switch (e_hdr->type) {
+	case EVENT_TYPE_SKB:
+		handle_dump_skb_event((struct event_skb *)data);
+		break;
+	case EVENT_TYPE_SKB_DATA:
+		handle_dump_skb_data_event((struct event_skb_data *)data);
+		break;
+	case EVENT_TYPE_END:
+		handle_dump_end_event();
+		break;
+	default:
+		err = -1;
+		printf("Unknown event type!\n");
+		break;
+	}
+	return err;
+}
+
+static int check_restore_data_correctness(void)
+{
+	const int disable = 0;
+	if (setsockopt(sockfd_restore, IPPROTO_UDP, UDP_CORK, &disable, sizeof(disable)))
+		return -1;
+
+	char buffer1[1000], buffer2[2000];
+	memset(buffer1, 0, sizeof(buffer1));
+	memset(buffer2, 0, sizeof(buffer2));
+
+	struct sockaddr_in src_addr, client_src_addr;
+	socklen_t sockaddr_len = sizeof(struct sockaddr_in);
+	memset(&src_addr, 0, sizeof(struct sockaddr_in));
+	memset(&client_src_addr, 0, sizeof(struct sockaddr_in));
+
+	if (getsockname(sockfd_client, (struct sockaddr *)&client_src_addr, &sockaddr_len))
+		return -1;
+
+	if (recvfrom(sockfd_restore, buffer1, sizeof(buffer1), 0, (struct sockaddr *)&src_addr, &sockaddr_len) <= 0)
+		return -1;
+
+	if (memcmp(buffer1, buffer_recv1, sizeof(buffer_recv1)) != 0)
+		return -1;
+
+	if (src_addr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) || src_addr.sin_port != client_src_addr.sin_port)
+		return -1;
+
+	if (recvfrom(sockfd_restore, buffer1, sizeof(buffer1), 0, (struct sockaddr *)&src_addr, &sockaddr_len) <= 0)
+		return -1;
+
+	if (memcmp(buffer1, buffer_recv2, sizeof(buffer_recv2)) != 0)
+		return -1;
+
+	if (src_addr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) || src_addr.sin_port != client_src_addr.sin_port)
+		return -1;
+
+	if (recvfrom(sockfd_server, buffer2, sizeof(buffer2), 0, (struct sockaddr *)&src_addr, &sockaddr_len) <= 0)
+		return -1;
+
+	if (memcmp(buffer2, buffer_send1, sizeof(buffer_send1)) != 0)
+		return -1;
+
+	if (memcmp(buffer2 + sizeof(buffer_send1), buffer_send2, sizeof(buffer_send2)) != 0)
+		return -1;
+
+	return 0;
+}
+
+static int check_restore_socket(void)
+{
+	/*
+	 * Check that the restore socket can continue to work properly
+	 * (the restore process did not damage the socket)
+	 */
+	char buffer[1000];
+	memset(buffer, 0, sizeof(buffer));
+
+	struct sockaddr_in src_addr, restore_src_addr;
+	socklen_t sockaddr_len = sizeof(struct sockaddr_in);
+	memset(&src_addr, 0, sizeof(struct sockaddr_in));
+	memset(&restore_src_addr, 0, sizeof(struct sockaddr_in));
+
+	if (getsockname(sockfd_restore, (struct sockaddr *)&restore_src_addr, &sockaddr_len))
+		return -1;
+
+	if (connect(sockfd_server, (struct sockaddr *)&restore_src_addr, sizeof(struct sockaddr_in)) < 0)
+		return -1;
+
+	if (send(sockfd_restore, buffer_send1, sizeof(buffer_send1), 0) <= 0)
+		return -1;
+
+	if (send(sockfd_server, buffer_send2, sizeof(buffer_send2), 0) <= 0)
+		return -1;
+
+	if (recvfrom(sockfd_server, buffer, sizeof(buffer), 0, (struct sockaddr *)&src_addr, &sockaddr_len) <= 0)
+		return -1;
+
+	if (memcmp(buffer, buffer_send1, sizeof(buffer_send1)) != 0)
+		return -1;
+
+	if (recvfrom(sockfd_restore, buffer, sizeof(buffer), 0, (struct sockaddr *)&src_addr, &sockaddr_len) <= 0)
+		return -1;
+
+	if (memcmp(buffer, buffer_send2, sizeof(buffer_send2)) != 0)
+		return -1;
+
+	if (src_addr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) || src_addr.sin_port != htons(6003))
+		return -1;
+
+	return 0;
+}
+
+TEST(restore_udp_socket)
+{
+	sockfd_checkpoint = socket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, IPPROTO_UDP);
+	ASSERT_GT(sockfd_checkpoint, 0);
+
+	sockfd_restore = socket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, IPPROTO_UDP);
+	ASSERT_GT(sockfd_restore, 0);
+
+	sockfd_client = socket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, IPPROTO_UDP);
+	ASSERT_GT(sockfd_client, 0);
+
+	sockfd_server = socket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, IPPROTO_UDP);
+	ASSERT_GT(sockfd_server, 0);
+
+	struct sockaddr_in checkpoint_src_addr = {
+		.sin_family = AF_INET,
+		.sin_addr.s_addr = htonl(INADDR_ANY),
+		.sin_port = htons(6001)
+	};
+
+	struct sockaddr_in checkpoint_dst_addr = {
+		.sin_family = AF_INET,
+		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+		.sin_port = htons(6002)
+	};
+
+	struct sockaddr_in restore_dst_addr = {
+		.sin_family = AF_INET,
+		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+		.sin_port = htons(6003)
+	};
+
+	const int enable = 1;
+	ASSERT_EQ(setsockopt(sockfd_checkpoint, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable)), 0);
+	ASSERT_EQ(setsockopt(sockfd_server, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable)), 0);
+
+	ASSERT_EQ(setsockopt(sockfd_checkpoint, IPPROTO_UDP, UDP_CORK, &enable, sizeof(enable)), 0);
+	ASSERT_EQ(setsockopt(sockfd_restore, IPPROTO_UDP, UDP_CORK, &enable, sizeof(enable)), 0);
+
+	ASSERT_EQ(bind(sockfd_checkpoint, (struct sockaddr *)&checkpoint_src_addr, sizeof(struct sockaddr_in)), 0);
+	ASSERT_EQ(bind(sockfd_server, (struct sockaddr *)&restore_dst_addr, sizeof(struct sockaddr_in)), 0);
+
+	memset(buffer_send1, 'a', 1000);
+	memset(buffer_send2, 'b', 1000);
+	memset(buffer_recv1, 'c', 1000);
+	memset(buffer_recv2, 'd', 1000);
+
+	ASSERT_EQ(connect(sockfd_client, (struct sockaddr *)&checkpoint_src_addr, sizeof(struct sockaddr_in)), 0);
+	ASSERT_EQ(send(sockfd_client, buffer_recv1, sizeof(buffer_recv1), 0), sizeof(buffer_recv1));
+	ASSERT_EQ(send(sockfd_client, buffer_recv2, sizeof(buffer_recv2), 0), sizeof(buffer_recv2));
+
+	ASSERT_EQ(connect(sockfd_checkpoint, (struct sockaddr *)&checkpoint_dst_addr, sizeof(struct sockaddr_in)), 0);
+	ASSERT_EQ(connect(sockfd_restore, (struct sockaddr *)&restore_dst_addr, sizeof(struct sockaddr_in)), 0);
+
+	ASSERT_EQ(send(sockfd_checkpoint, buffer_send1, sizeof(buffer_send1), 0), sizeof(buffer_send1));
+	ASSERT_EQ(send(sockfd_checkpoint, buffer_send2, sizeof(buffer_send2), 0), sizeof(buffer_send2));
+
+	struct prog_args arg_checkpoint = {
+		.pid = getpid(),
+		.fd = sockfd_checkpoint
+	};
+
+	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+		.ctx_in = &arg_checkpoint,
+		.ctx_size_in = sizeof(arg_checkpoint),
+	);
+
+	struct test_restore_udp_socket_bpf *skel = test_restore_udp_socket_bpf__open_and_load();
+	dump_socket_queue_fd = bpf_program__fd(skel->progs.dump_socket_queue);
+	restore_socket_queue_fd = bpf_program__fd(skel->progs.restore_socket_queue);
+
+	rb = ring_buffer__new(bpf_map__fd(skel->maps.rb), handle_event, NULL, NULL);
+	ASSERT_NE(rb, NULL);
+
+	urb = user_ring_buffer__new(bpf_map__fd(skel->maps.urb), NULL);
+	ASSERT_NE(urb, NULL);
+
+	ASSERT_EQ(bpf_prog_test_run_opts(dump_socket_queue_fd, &opts), 0);
+
+	ASSERT_GT(ring_buffer__poll(rb, 100), 0);
+
+	ASSERT_EQ(check_restore_data_correctness(), 0);
+	ASSERT_EQ(check_restore_socket(), 0);
+
+	ASSERT_EQ(close(sockfd_checkpoint), 0);
+	ASSERT_EQ(close(sockfd_restore), 0);
+	ASSERT_EQ(close(sockfd_client), 0);
+	ASSERT_EQ(close(sockfd_server), 0);
+	ring_buffer__free(rb);
+	user_ring_buffer__free(urb);
+	test_restore_udp_socket_bpf__destroy(skel);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/crib/test_restore_udp_socket.h b/tools/testing/selftests/crib/test_restore_udp_socket.h
new file mode 100644
index 000000000000..0ea5d3cb1b81
--- /dev/null
+++ b/tools/testing/selftests/crib/test_restore_udp_socket.h
@@ -0,0 +1,51 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author:
+ *	Juntong Deng <juntong.deng@outlook.com>
+ */
+
+#ifndef __TEST_RESTORE_UDP_SOCKET_H
+#define __TEST_RESTORE_UDP_SOCKET_H
+
+#define EVENT_TYPE_SKB 0
+#define EVENT_TYPE_SKB_DATA 1
+#define EVENT_TYPE_END 2
+
+#define EVENT_SUBTYPE_RECEIVE_QUEUE 0
+#define EVENT_SUBTYPE_WRITE_QUEUE 1
+#define EVENT_SUBTYPE_READER_QUEUE 2
+
+struct prog_args {
+	int pid;
+	int fd;
+};
+
+struct event_hdr {
+	int type;
+	int subtype;
+};
+
+struct event_skb {
+	struct event_hdr hdr;
+	int skb_num;
+	int headerlen;
+	int len;
+	int size;
+	int tstamp;
+	int dev_scratch;
+	int protocol;
+	int csum;
+	int transport_header;
+	int network_header;
+	int mac_header;
+};
+
+struct event_skb_data {
+	struct event_hdr hdr;
+	int skb_num;
+	int chunk_length;
+	int offset;
+	char buf[500];
+};
+
+#endif /* __TEST_RESTORE_UDP_SOCKET_H */