diff mbox series

[RFC,9/9] liburing: support sqe ext_flags & sqe group

Message ID 20240408010322.4104395-10-ming.lei@redhat.com (mailing list archive)
State New
Headers show
Series io_uring: support sqe group and provide group kbuf | expand

Commit Message

Ming Lei April 8, 2024, 1:03 a.m. UTC
From: Ming Lei <tom.leiming@gmail.com>

Provide helper to set sqe ext_flags.

Add one test case to cover sqe group feature by applying sqe group for
copying one part of source file into multiple destinations via single
syscall.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
---
 src/include/liburing.h          |  15 ++
 src/include/liburing/io_uring.h |  20 +++
 test/Makefile                   |   1 +
 test/group_cp.c                 | 260 ++++++++++++++++++++++++++++++++
 4 files changed, 296 insertions(+)
 create mode 100644 test/group_cp.c
diff mbox series

Patch

diff --git a/src/include/liburing.h b/src/include/liburing.h
index 3e47298..5379d53 100644
--- a/src/include/liburing.h
+++ b/src/include/liburing.h
@@ -368,6 +368,21 @@  IOURINGINLINE void io_uring_sqe_set_flags(struct io_uring_sqe *sqe,
 	sqe->flags = (__u8) flags;
 }
 
+IOURINGINLINE void io_uring_sqe_set_ext_flags(struct io_uring_sqe *sqe,
+					  unsigned flags)
+{
+	sqe->ext_flags = (__u8) flags;
+	sqe->flags |= IOSQE_HAS_EXT_FLAGS;
+}
+
+IOURINGINLINE void io_uring_cmd_set_ext_flags(struct io_uring_sqe *sqe,
+					  unsigned flags)
+{
+	sqe->uring_cmd_flags &= ~IORING_URING_CMD_EXT_MASK;
+	sqe->uring_cmd_flags |= ((__u8) flags) << 16;
+	sqe->flags |= IOSQE_HAS_EXT_FLAGS;
+}
+
 IOURINGINLINE void __io_uring_set_target_fixed_file(struct io_uring_sqe *sqe,
 						    unsigned int file_index)
 {
diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
index bde1199..b0a0318 100644
--- a/src/include/liburing/io_uring.h
+++ b/src/include/liburing/io_uring.h
@@ -98,6 +98,10 @@  struct io_uring_sqe {
 			__u64	__pad2[1];
 		};
 		__u64	optval;
+		struct {
+			__u8	__pad4[15];
+			__u8	ext_flags;
+		};
 		/*
 		 * If the ring is initialized with IORING_SETUP_SQE128, then
 		 * this field is used for 80 bytes of arbitrary command data
@@ -123,6 +127,9 @@  enum {
 	IOSQE_ASYNC_BIT,
 	IOSQE_BUFFER_SELECT_BIT,
 	IOSQE_CQE_SKIP_SUCCESS_BIT,
+	IOSQE_HAS_EXT_FLAGS_BIT,
+
+	IOSQE_EXT_SQE_GROUP_BIT = 0,
 };
 
 /*
@@ -142,6 +149,13 @@  enum {
 #define IOSQE_BUFFER_SELECT	(1U << IOSQE_BUFFER_SELECT_BIT)
 /* don't post CQE if request succeeded */
 #define IOSQE_CQE_SKIP_SUCCESS	(1U << IOSQE_CQE_SKIP_SUCCESS_BIT)
+/*
+ * sqe ext flags carried in the last byte, or bit23~bit16 of
+ * sqe->uring_cmd_flags for IORING_URING_CMD.
+ */
+#define IOSQE_HAS_EXT_FLAGS	(1U << IOSQE_HAS_EXT_FLAGS_BIT)
+/* defines sqe group */
+#define IOSQE_EXT_SQE_GROUP	(1U << IOSQE_EXT_SQE_GROUP_BIT)
 
 /*
  * io_uring_setup() flags
@@ -265,8 +279,14 @@  enum io_uring_op {
  * sqe->uring_cmd_flags
  * IORING_URING_CMD_FIXED	use registered buffer; pass this flag
  *				along with setting sqe->buf_index.
+ * IORING_PROVIDE_GROUP_KBUF	this command provides group kernel buffer
+ *				for member requests which can retrieve
+ *				any sub-buffer with offset(sqe->addr) and
+ *				len(sqe->len)
  */
 #define IORING_URING_CMD_FIXED	(1U << 0)
+#define IORING_PROVIDE_GROUP_KBUF	(1U << 1)
+#define IORING_URING_CMD_EXT_MASK	0x00ff0000
 
 
 /*
diff --git a/test/Makefile b/test/Makefile
index 32848ec..dd3b394 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -207,6 +207,7 @@  test_srcs := \
 	wakeup-hang.c \
 	wq-aff.c \
 	xattr.c \
+	group_cp.c \
 	# EOL
 
 all_targets :=
diff --git a/test/group_cp.c b/test/group_cp.c
new file mode 100644
index 0000000..4ac5cdb
--- /dev/null
+++ b/test/group_cp.c
@@ -0,0 +1,260 @@ 
+/* SPDX-License-Identifier: MIT */
+/*
+ * Test SQE group feature
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <assert.h>
+#include <sys/stat.h>
+#include <linux/fs.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define BUF_SIZE    8192
+#define BUFFERS     6
+
+
+struct test_data {
+	const char *src_file;
+	struct iovec iov[BUFFERS];
+	int fd_in, fd_out1, fd_out2;
+};
+
+static void set_sqe_group(struct io_uring_sqe *sqe)
+{
+	io_uring_sqe_set_ext_flags(sqe, IOSQE_EXT_SQE_GROUP);
+}
+
+static int check_cqe(struct io_uring *ring, unsigned int nr)
+{
+	int i, ret;
+
+        for (i = 0; i < nr; ++i) {
+		struct io_uring_cqe *cqe;
+                int res;
+
+                ret = io_uring_peek_cqe(ring, &cqe);
+                res = cqe->res;
+                if (ret) {
+                        fprintf(stderr, "peek failed: %d\n", ret);
+                        return ret;
+                }
+                io_uring_cqe_seen(ring, cqe);
+                if (res != BUF_SIZE)
+                        fprintf(stderr, "bad result %d, user_data %llx\n",
+					res, cqe->user_data);
+		//printf("cqe %lld res %d\n", cqe->user_data, cqe->res);
+        }
+
+	return 0;
+}
+
+static int prep_test(struct io_uring *ring, struct test_data *data)
+{
+	int ret, i;
+
+	data->fd_in = open(data->src_file, O_RDONLY | O_DIRECT, 0644);
+	if (data->fd_in < 0) {
+		perror("open in");
+		return 1;
+	}
+
+	data->fd_out1 = open(".test_group_cp1", O_RDWR | O_CREAT | O_DIRECT, 0644);
+	unlink(".test_group_cp1");
+
+	data->fd_out2 = open(".test_group_cp2", O_RDWR | O_CREAT| O_DIRECT, 0644);
+	unlink(".test_group_cp2");
+
+	if (data->fd_out1 < 0 || data->fd_out2 < 0) {
+		perror("open out");
+		return 1;
+	}
+
+	for (i = 0; i < BUFFERS; i++) {
+		void *buf;
+
+		assert(!posix_memalign(&buf, 4096, BUF_SIZE));
+		data->iov[i].iov_base = buf;
+		data->iov[i].iov_len = BUF_SIZE;
+		memset(data->iov[i].iov_base, 0, BUF_SIZE);
+	}
+
+	ret = io_uring_register_buffers(ring, data->iov, BUFFERS);
+	if (ret) {
+		fprintf(stderr, "Error registering buffers: %s",
+				strerror(-ret));
+		return 1;
+	}
+
+	return 0;
+}
+
+static void unprep_test(struct io_uring *ring, struct test_data *d)
+{
+	io_uring_unregister_buffers(ring);
+	close(d->fd_in);
+	close(d->fd_out1);
+	close(d->fd_out2);
+}
+
+static unsigned build_group_sqes(struct io_uring *ring, struct test_data *d,
+	off_t off, int buf_idx, __u8 lead_flags, __u8 mem_flags)
+{
+	struct io_uring_sqe *sqe, *sqe2, *sqe1;
+
+	sqe = io_uring_get_sqe(ring);
+	sqe1 = io_uring_get_sqe(ring);
+	sqe2 = io_uring_get_sqe(ring);
+	assert(sqe && sqe1 && sqe2);
+
+	io_uring_prep_read_fixed(sqe, d->fd_in, d->iov[buf_idx].iov_base,
+			BUF_SIZE, 0, buf_idx);
+	set_sqe_group(sqe);
+	sqe->user_data = buf_idx + 1;
+	sqe->flags |= lead_flags;
+
+	io_uring_prep_write_fixed(sqe1, d->fd_out1, d->iov[buf_idx].iov_base,
+			BUF_SIZE, off, buf_idx);
+	set_sqe_group(sqe1);
+	sqe1->user_data = buf_idx + 2;
+	sqe1->flags |= mem_flags;
+
+	io_uring_prep_write_fixed(sqe2, d->fd_out2, d->iov[buf_idx].iov_base,
+			BUF_SIZE, off, buf_idx);
+	sqe2->user_data = buf_idx + 3;
+	sqe2->flags |= mem_flags;
+
+	return 3;
+}
+
+static int verify_cp(struct io_uring *ring, struct test_data *d, off_t off,
+		unsigned int buf_idx)
+{
+	struct io_uring_sqe *sqe2, *sqe1;
+	int ret;
+
+	sqe1 = io_uring_get_sqe(ring);
+	sqe2 = io_uring_get_sqe(ring);
+	assert(sqe1 && sqe2);
+
+	io_uring_prep_read_fixed(sqe1, d->fd_out1, d->iov[buf_idx + 1].iov_base,
+			BUF_SIZE, off, buf_idx + 1);
+	sqe1->user_data = buf_idx + 7;
+	io_uring_prep_read_fixed(sqe2, d->fd_out2, d->iov[buf_idx + 2].iov_base,
+			BUF_SIZE, off, buf_idx + 2);
+	sqe1->user_data = buf_idx + 8;
+	ret = io_uring_submit_and_wait(ring, 2);
+	if (ret < 0) {
+                fprintf(stderr, "submit failed: %d\n", ret);
+                return 1;
+        }
+
+	ret = check_cqe(ring, 2);
+	if (ret)
+		return ret;
+
+	if (memcmp(d->iov[buf_idx].iov_base, d->iov[buf_idx + 1].iov_base, BUF_SIZE)) {
+		fprintf(stderr, "data not match for destination 1\n");
+		return 1;
+	}
+
+	if (memcmp(d->iov[buf_idx].iov_base, d->iov[buf_idx + 2].iov_base, BUF_SIZE)) {
+		fprintf(stderr, "data not match for destination 2\n");
+		return 1;
+	}
+	return 0;
+}
+
+static int test(struct io_uring *ring, struct test_data *d,
+		__u8 lead_flags, __u8 mem_flags)
+{
+	unsigned test_link = lead_flags & IOSQE_IO_LINK;
+	unsigned nr;
+	int ret;
+
+	if (!test_link) {
+		nr = build_group_sqes(ring, d, 0, 0, lead_flags, mem_flags);
+	} else {
+		/* two groups linked together */
+		nr = build_group_sqes(ring, d, 0, 0, lead_flags, mem_flags);
+		nr += build_group_sqes(ring, d, BUF_SIZE, 3, lead_flags,
+				mem_flags);
+	}
+
+	ret = io_uring_submit_and_wait(ring, nr);
+	if (ret < 0) {
+                fprintf(stderr, "submit failed: %d\n", ret);
+                return 1;
+        }
+
+	if (check_cqe(ring, nr))
+                return 1;
+
+	ret = verify_cp(ring, d, 0, 0);
+	if (ret)
+		return ret;
+	if (test_link)
+		return verify_cp(ring, d, BUF_SIZE, 3);
+	return 0;
+}
+
+static int run_test(struct io_uring *ring, struct test_data *d,
+		__u8 lead_flags, __u8 mem_flags)
+{
+	int ret = test(ring, d, lead_flags, mem_flags);
+	if (ret) {
+		fprintf(stderr, "Test failed lead flags %x mem flags %x\n",
+				lead_flags, mem_flags);
+		return T_EXIT_FAIL;
+	}
+
+	exit(0);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	struct test_data data = {
+		.src_file = argv[0],
+	};
+	int ret;
+	int g_link, g_async, m_async;
+
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	ret = t_create_ring(16, &ring, 0);
+	if (ret == T_SETUP_SKIP)
+		return T_EXIT_SKIP;
+	else if (ret < 0)
+		return T_EXIT_FAIL;
+
+	ret = prep_test(&ring, &data);
+	if (ret) {
+		fprintf(stderr, "Prepare Test failed\n");
+		return T_EXIT_FAIL;
+	}
+
+	for (g_async = 0; g_async < 2; g_async += 1)
+		for (g_link = 0; g_link < 2; g_link += 1)
+			for (m_async = 0; m_async < 2; m_async += 1) {
+				__u8 g_flags = (g_async ? IOSQE_ASYNC : 0) |
+					(g_link ? IOSQE_IO_LINK : 0);
+				__u8 m_flags = (g_async ? IOSQE_ASYNC : 0);
+
+				if (run_test(&ring, &data, g_flags, m_flags))
+					return T_EXIT_FAIL;
+			}
+	unprep_test(&ring, &data);
+
+	io_uring_queue_exit(&ring);
+	return T_EXIT_PASS;
+}