diff mbox series

[v2,1/4] fsstress: add IO_URING read and write operations

Message ID 20200809063040.15521-2-zlang@redhat.com (mailing list archive)
State New, archived
Headers show
Series fsstress,fsx: add io_uring test and do some fix | expand

Commit Message

Zorro Lang Aug. 9, 2020, 6:30 a.m. UTC
IO_URING is a new feature of curent linux kernel, add basic IO_URING
read/write into fsstess to cover this kind of IO testing.

Signed-off-by: Zorro Lang <zlang@redhat.com>
---
 README                 |   4 +-
 configure.ac           |   1 +
 include/builddefs.in   |   1 +
 ltp/Makefile           |   5 ++
 ltp/fsstress.c         | 139 ++++++++++++++++++++++++++++++++++++++++-
 m4/Makefile            |   1 +
 m4/package_liburing.m4 |   4 ++
 7 files changed, 152 insertions(+), 3 deletions(-)
 create mode 100644 m4/package_liburing.m4

Comments

Jens Axboe Aug. 9, 2020, 5:51 p.m. UTC | #1
On 8/9/20 12:30 AM, Zorro Lang wrote:
> @@ -2170,6 +2189,108 @@ do_aio_rw(int opno, long r, int flags)
>  }
>  #endif
>  
> +#ifdef URING
> +void
> +do_uring_rw(int opno, long r, int flags)
> +{
> +	char		*buf;
> +	int		e;
> +	pathname_t	f;
> +	int		fd;
> +	size_t		len;
> +	int64_t		lr;
> +	off64_t		off;
> +	struct stat64	stb;
> +	int		v;
> +	char		st[1024];
> +	struct io_uring_sqe	*sqe;
> +	struct io_uring_cqe	*cqe;
> +	struct iovec	iovec;
> +	int		iswrite = (flags & (O_WRONLY | O_RDWR)) ? 1 : 0;
> +
> +	init_pathname(&f);
> +	if (!get_fname(FT_REGFILE, r, &f, NULL, NULL, &v)) {
> +		if (v)
> +			printf("%d/%d: do_uring_rw - no filename\n", procid, opno);
> +		goto uring_out3;
> +	}
> +	fd = open_path(&f, flags);
> +	e = fd < 0 ? errno : 0;
> +	check_cwd();
> +	if (fd < 0) {
> +		if (v)
> +			printf("%d/%d: do_uring_rw - open %s failed %d\n",
> +			       procid, opno, f.path, e);
> +		goto uring_out3;
> +	}
> +	if (fstat64(fd, &stb) < 0) {
> +		if (v)
> +			printf("%d/%d: do_uring_rw - fstat64 %s failed %d\n",
> +			       procid, opno, f.path, errno);
> +		goto uring_out2;
> +	}
> +	inode_info(st, sizeof(st), &stb, v);
> +	if (!iswrite && stb.st_size == 0) {
> +		if (v)
> +			printf("%d/%d: do_uring_rw - %s%s zero size\n", procid, opno,
> +			       f.path, st);
> +		goto uring_out2;
> +	}
> +	sqe = io_uring_get_sqe(&ring);
> +	if (!sqe) {
> +		if (v)
> +			printf("%d/%d: do_uring_rw - io_uring_get_sqe failed\n",
> +			       procid, opno);
> +		goto uring_out2;
> +	}
> +	lr = ((int64_t)random() << 32) + random();
> +	len = (random() % FILELEN_MAX) + 1;
> +	buf = malloc(len);
> +	if (!buf) {
> +		if (v)
> +			printf("%d/%d: do_uring_rw - malloc failed\n",
> +			       procid, opno);
> +		goto uring_out2;
> +	}
> +	iovec.iov_base = buf;
> +	iovec.iov_len = len;
> +	if (iswrite) {
> +		off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE));
> +		off %= maxfsize;
> +		memset(buf, nameseq & 0xff, len);
> +		io_uring_prep_writev(sqe, fd, &iovec, 1, off);
> +	} else {
> +		off = (off64_t)(lr % stb.st_size);
> +		io_uring_prep_readv(sqe, fd, &iovec, 1, off);
> +	}
> +
> +	if ((e = io_uring_submit(&ring)) != 1) {
> +		if (v)
> +			printf("%d/%d: %s - io_uring_submit failed %d\n", procid, opno,
> +			       iswrite ? "uring_write" : "uring_read", e);
> +		goto uring_out1;
> +	}
> +	if ((e = io_uring_wait_cqe(&ring, &cqe)) < 0) {
> +		if (v)
> +			printf("%d/%d: %s - io_uring_wait_cqe failed %d\n", procid, opno,
> +			       iswrite ? "uring_write" : "uring_read", e);
> +		goto uring_out1;
> +	}

You could use io_uring_submit_and_wait() here, that'll save a system
call for sync IO. Same comment goes for 4/4.

Apart from that, looks pretty straight forward to me.
Jens Axboe Aug. 22, 2020, 6:05 p.m. UTC | #2
On 8/22/20 12:14 PM, Zorro Lang wrote:
>>> +	if ((e = io_uring_submit(&ring)) != 1) {
>>> +		if (v)
>>> +			printf("%d/%d: %s - io_uring_submit failed %d\n", procid, opno,
>>> +			       iswrite ? "uring_write" : "uring_read", e);
>>> +		goto uring_out1;
>>> +	}
>>> +	if ((e = io_uring_wait_cqe(&ring, &cqe)) < 0) {
>>> +		if (v)
>>> +			printf("%d/%d: %s - io_uring_wait_cqe failed %d\n", procid, opno,
>>> +			       iswrite ? "uring_write" : "uring_read", e);
>>> +		goto uring_out1;
>>> +	}
>>
>> You could use io_uring_submit_and_wait() here, that'll save a system
>> call for sync IO. Same comment goes for 4/4.
> 
> Hi Jens,
> 
> Sorry I think I haven't learned about io_uring enough, why the
> io_uring_submit_and_wait can save a system call? Is it same with
> io_uring_submit(), except a wait_nr ? The io_uring_wait_cqe() and
> io_uring_cqe_seen() are still needed, right?

If you just call io_uring_submit(), it'll enter the kernel and submit
that IO. Then right after that you're saying "I want to wait for
completion of a request", which is then another system call. If you do
io_uring_submit_and_wait() you're entering the kernel with the intent of
"submit my request(s), and wait for N requests" hence only doing a
single system call even though it's an async interface.

Nothing else changes, io_uring_wait_cqe() will not enter the kernel if a
cqe is available in the ring already.
Zorro Lang Aug. 22, 2020, 6:14 p.m. UTC | #3
On Sun, Aug 09, 2020 at 11:51:45AM -0600, Jens Axboe wrote:
> On 8/9/20 12:30 AM, Zorro Lang wrote:
> > @@ -2170,6 +2189,108 @@ do_aio_rw(int opno, long r, int flags)
> >  }
> >  #endif
> >  
> > +#ifdef URING
> > +void
> > +do_uring_rw(int opno, long r, int flags)
> > +{
> > +	char		*buf;
> > +	int		e;
> > +	pathname_t	f;
> > +	int		fd;
> > +	size_t		len;
> > +	int64_t		lr;
> > +	off64_t		off;
> > +	struct stat64	stb;
> > +	int		v;
> > +	char		st[1024];
> > +	struct io_uring_sqe	*sqe;
> > +	struct io_uring_cqe	*cqe;
> > +	struct iovec	iovec;
> > +	int		iswrite = (flags & (O_WRONLY | O_RDWR)) ? 1 : 0;
> > +
> > +	init_pathname(&f);
> > +	if (!get_fname(FT_REGFILE, r, &f, NULL, NULL, &v)) {
> > +		if (v)
> > +			printf("%d/%d: do_uring_rw - no filename\n", procid, opno);
> > +		goto uring_out3;
> > +	}
> > +	fd = open_path(&f, flags);
> > +	e = fd < 0 ? errno : 0;
> > +	check_cwd();
> > +	if (fd < 0) {
> > +		if (v)
> > +			printf("%d/%d: do_uring_rw - open %s failed %d\n",
> > +			       procid, opno, f.path, e);
> > +		goto uring_out3;
> > +	}
> > +	if (fstat64(fd, &stb) < 0) {
> > +		if (v)
> > +			printf("%d/%d: do_uring_rw - fstat64 %s failed %d\n",
> > +			       procid, opno, f.path, errno);
> > +		goto uring_out2;
> > +	}
> > +	inode_info(st, sizeof(st), &stb, v);
> > +	if (!iswrite && stb.st_size == 0) {
> > +		if (v)
> > +			printf("%d/%d: do_uring_rw - %s%s zero size\n", procid, opno,
> > +			       f.path, st);
> > +		goto uring_out2;
> > +	}
> > +	sqe = io_uring_get_sqe(&ring);
> > +	if (!sqe) {
> > +		if (v)
> > +			printf("%d/%d: do_uring_rw - io_uring_get_sqe failed\n",
> > +			       procid, opno);
> > +		goto uring_out2;
> > +	}
> > +	lr = ((int64_t)random() << 32) + random();
> > +	len = (random() % FILELEN_MAX) + 1;
> > +	buf = malloc(len);
> > +	if (!buf) {
> > +		if (v)
> > +			printf("%d/%d: do_uring_rw - malloc failed\n",
> > +			       procid, opno);
> > +		goto uring_out2;
> > +	}
> > +	iovec.iov_base = buf;
> > +	iovec.iov_len = len;
> > +	if (iswrite) {
> > +		off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE));
> > +		off %= maxfsize;
> > +		memset(buf, nameseq & 0xff, len);
> > +		io_uring_prep_writev(sqe, fd, &iovec, 1, off);
> > +	} else {
> > +		off = (off64_t)(lr % stb.st_size);
> > +		io_uring_prep_readv(sqe, fd, &iovec, 1, off);
> > +	}
> > +
> > +	if ((e = io_uring_submit(&ring)) != 1) {
> > +		if (v)
> > +			printf("%d/%d: %s - io_uring_submit failed %d\n", procid, opno,
> > +			       iswrite ? "uring_write" : "uring_read", e);
> > +		goto uring_out1;
> > +	}
> > +	if ((e = io_uring_wait_cqe(&ring, &cqe)) < 0) {
> > +		if (v)
> > +			printf("%d/%d: %s - io_uring_wait_cqe failed %d\n", procid, opno,
> > +			       iswrite ? "uring_write" : "uring_read", e);
> > +		goto uring_out1;
> > +	}
> 
> You could use io_uring_submit_and_wait() here, that'll save a system
> call for sync IO. Same comment goes for 4/4.

Hi Jens,

Sorry I think I haven't learned about io_uring enough, why the
io_uring_submit_and_wait can save a system call? Is it same with
io_uring_submit(), except a wait_nr ? The io_uring_wait_cqe() and
io_uring_cqe_seen() are still needed, right?

Thanks,
Zorro

> 
> Apart from that, looks pretty straight forward to me.
> 
> -- 
> Jens Axboe
>
diff mbox series

Patch

diff --git a/README b/README
index d0e23fcd..ae0f804d 100644
--- a/README
+++ b/README
@@ -8,13 +8,13 @@  _______________________
 	sudo apt-get install xfslibs-dev uuid-dev libtool-bin \
 	e2fsprogs automake gcc libuuid1 quota attr libattr1-dev make \
 	libacl1-dev libaio-dev xfsprogs libgdbm-dev gawk fio dbench \
-	uuid-runtime python sqlite3
+	uuid-runtime python sqlite3 liburing-dev
   For Fedora, RHEL, or CentOS:
 	yum install acl attr automake bc dbench dump e2fsprogs fio \
 	gawk gcc indent libtool lvm2 make psmisc quota sed \
 	xfsdump xfsprogs \
 	libacl-devel libattr-devel libaio-devel libuuid-devel \
-	xfsprogs-devel btrfs-progs-devel python sqlite
+	xfsprogs-devel btrfs-progs-devel python sqlite liburing-devel
 	(Older distributions may require xfsprogs-qa-devel as well.)
 	(Note that for RHEL and CentOS, you may need the EPEL repo.)
 - run make
diff --git a/configure.ac b/configure.ac
index 4bb50b32..8922c47e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -61,6 +61,7 @@  AC_PACKAGE_NEED_ACLINIT_LIBACL
 
 AC_PACKAGE_WANT_GDBM
 AC_PACKAGE_WANT_AIO
+AC_PACKAGE_WANT_URING
 AC_PACKAGE_WANT_DMAPI
 AC_PACKAGE_WANT_LINUX_FIEMAP_H
 AC_PACKAGE_WANT_FALLOCATE
diff --git a/include/builddefs.in b/include/builddefs.in
index e7894b1a..fded3230 100644
--- a/include/builddefs.in
+++ b/include/builddefs.in
@@ -61,6 +61,7 @@  RPM_VERSION     = @rpm_version@
 ENABLE_SHARED = @enable_shared@
 HAVE_DB = @have_db@
 HAVE_AIO = @have_aio@
+HAVE_URING = @have_uring@
 HAVE_FALLOCATE = @have_fallocate@
 HAVE_OPEN_BY_HANDLE_AT = @have_open_by_handle_at@
 HAVE_DMAPI = @have_dmapi@
diff --git a/ltp/Makefile b/ltp/Makefile
index ebf40336..198d930f 100644
--- a/ltp/Makefile
+++ b/ltp/Makefile
@@ -24,6 +24,11 @@  LCFLAGS += -DAIO
 LLDLIBS += -laio -lpthread
 endif
 
+ifeq ($(HAVE_URING), true)
+LCFLAGS += -DURING
+LLDLIBS += -luring
+endif
+
 ifeq ($(HAVE_LIBBTRFSUTIL), true)
 LLDLIBS += -lbtrfsutil
 endif
diff --git a/ltp/fsstress.c b/ltp/fsstress.c
index 709fdeec..a4188e1c 100644
--- a/ltp/fsstress.c
+++ b/ltp/fsstress.c
@@ -30,6 +30,11 @@ 
 #include <libaio.h>
 io_context_t	io_ctx;
 #endif
+#ifdef URING
+#include <liburing.h>
+#define URING_ENTRIES	1
+struct io_uring	ring;
+#endif
 #include <sys/syscall.h>
 #include <sys/xattr.h>
 
@@ -139,6 +144,8 @@  typedef enum {
 	OP_TRUNCATE,
 	OP_UNLINK,
 	OP_UNRESVSP,
+	OP_URING_READ,
+	OP_URING_WRITE,
 	OP_WRITE,
 	OP_WRITEV,
 	OP_LAST
@@ -267,6 +274,8 @@  void	sync_f(int, long);
 void	truncate_f(int, long);
 void	unlink_f(int, long);
 void	unresvsp_f(int, long);
+void	uring_read_f(int, long);
+void	uring_write_f(int, long);
 void	write_f(int, long);
 void	writev_f(int, long);
 char	*xattr_flag_to_string(int);
@@ -335,6 +344,8 @@  opdesc_t	ops[] = {
 	{ OP_TRUNCATE, "truncate", truncate_f, 2, 1 },
 	{ OP_UNLINK, "unlink", unlink_f, 1, 1 },
 	{ OP_UNRESVSP, "unresvsp", unresvsp_f, 1, 1 },
+	{ OP_URING_READ, "uring_read", uring_read_f, 1, 0 },
+	{ OP_URING_WRITE, "uring_write", uring_write_f, 1, 1 },
 	{ OP_WRITE, "write", write_f, 4, 1 },
 	{ OP_WRITEV, "writev", writev_f, 4, 1 },
 }, *ops_end;
@@ -692,6 +703,12 @@  int main(int argc, char **argv)
 				fprintf(stderr, "io_setup failed");
 				exit(1);
 			}
+#endif
+#ifdef URING
+			if (io_uring_queue_init(URING_ENTRIES, &ring, 0)) {
+				fprintf(stderr, "io_uring_queue_init failed\n");
+				exit(1);
+			}
 #endif
 			for (i = 0; !loops || (i < loops); i++)
 				doproc();
@@ -701,7 +718,9 @@  int main(int argc, char **argv)
 				return 1;
 			}
 #endif
-
+#ifdef URING
+			io_uring_queue_exit(&ring);
+#endif
 			cleanup_flist();
 			free(freq_table);
 			return 0;
@@ -2170,6 +2189,108 @@  do_aio_rw(int opno, long r, int flags)
 }
 #endif
 
+#ifdef URING
+void
+do_uring_rw(int opno, long r, int flags)
+{
+	char		*buf;
+	int		e;
+	pathname_t	f;
+	int		fd;
+	size_t		len;
+	int64_t		lr;
+	off64_t		off;
+	struct stat64	stb;
+	int		v;
+	char		st[1024];
+	struct io_uring_sqe	*sqe;
+	struct io_uring_cqe	*cqe;
+	struct iovec	iovec;
+	int		iswrite = (flags & (O_WRONLY | O_RDWR)) ? 1 : 0;
+
+	init_pathname(&f);
+	if (!get_fname(FT_REGFILE, r, &f, NULL, NULL, &v)) {
+		if (v)
+			printf("%d/%d: do_uring_rw - no filename\n", procid, opno);
+		goto uring_out3;
+	}
+	fd = open_path(&f, flags);
+	e = fd < 0 ? errno : 0;
+	check_cwd();
+	if (fd < 0) {
+		if (v)
+			printf("%d/%d: do_uring_rw - open %s failed %d\n",
+			       procid, opno, f.path, e);
+		goto uring_out3;
+	}
+	if (fstat64(fd, &stb) < 0) {
+		if (v)
+			printf("%d/%d: do_uring_rw - fstat64 %s failed %d\n",
+			       procid, opno, f.path, errno);
+		goto uring_out2;
+	}
+	inode_info(st, sizeof(st), &stb, v);
+	if (!iswrite && stb.st_size == 0) {
+		if (v)
+			printf("%d/%d: do_uring_rw - %s%s zero size\n", procid, opno,
+			       f.path, st);
+		goto uring_out2;
+	}
+	sqe = io_uring_get_sqe(&ring);
+	if (!sqe) {
+		if (v)
+			printf("%d/%d: do_uring_rw - io_uring_get_sqe failed\n",
+			       procid, opno);
+		goto uring_out2;
+	}
+	lr = ((int64_t)random() << 32) + random();
+	len = (random() % FILELEN_MAX) + 1;
+	buf = malloc(len);
+	if (!buf) {
+		if (v)
+			printf("%d/%d: do_uring_rw - malloc failed\n",
+			       procid, opno);
+		goto uring_out2;
+	}
+	iovec.iov_base = buf;
+	iovec.iov_len = len;
+	if (iswrite) {
+		off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE));
+		off %= maxfsize;
+		memset(buf, nameseq & 0xff, len);
+		io_uring_prep_writev(sqe, fd, &iovec, 1, off);
+	} else {
+		off = (off64_t)(lr % stb.st_size);
+		io_uring_prep_readv(sqe, fd, &iovec, 1, off);
+	}
+
+	if ((e = io_uring_submit(&ring)) != 1) {
+		if (v)
+			printf("%d/%d: %s - io_uring_submit failed %d\n", procid, opno,
+			       iswrite ? "uring_write" : "uring_read", e);
+		goto uring_out1;
+	}
+	if ((e = io_uring_wait_cqe(&ring, &cqe)) < 0) {
+		if (v)
+			printf("%d/%d: %s - io_uring_wait_cqe failed %d\n", procid, opno,
+			       iswrite ? "uring_write" : "uring_read", e);
+		goto uring_out1;
+	}
+	if (v)
+		printf("%d/%d: %s %s%s [%lld, %d(res=%d)] %d\n",
+		       procid, opno, iswrite ? "uring_write" : "uring_read",
+		       f.path, st, (long long)off, (int)len, cqe->res, e);
+	io_uring_cqe_seen(&ring, cqe);
+
+ uring_out1:
+	free(buf);
+ uring_out2:
+	close(fd);
+ uring_out3:
+	free_pathname(&f);
+}
+#endif
+
 void
 aread_f(int opno, long r)
 {
@@ -5044,6 +5165,22 @@  unresvsp_f(int opno, long r)
 	close(fd);
 }
 
+void
+uring_read_f(int opno, long r)
+{
+#ifdef URING
+	do_uring_rw(opno, r, O_RDONLY);
+#endif
+}
+
+void
+uring_write_f(int opno, long r)
+{
+#ifdef URING
+	do_uring_rw(opno, r, O_WRONLY);
+#endif
+}
+
 void
 write_f(int opno, long r)
 {
diff --git a/m4/Makefile b/m4/Makefile
index 7fbff822..0352534d 100644
--- a/m4/Makefile
+++ b/m4/Makefile
@@ -14,6 +14,7 @@  LSRCFILES = \
 	package_dmapidev.m4 \
 	package_globals.m4 \
 	package_libcdev.m4 \
+	package_liburing.m4 \
 	package_ncurses.m4 \
 	package_pthread.m4 \
 	package_ssldev.m4 \
diff --git a/m4/package_liburing.m4 b/m4/package_liburing.m4
new file mode 100644
index 00000000..c92cc02a
--- /dev/null
+++ b/m4/package_liburing.m4
@@ -0,0 +1,4 @@ 
+AC_DEFUN([AC_PACKAGE_WANT_URING],
+  [ AC_CHECK_HEADERS(liburing.h, [ have_uring=true ], [ have_uring=false ])
+    AC_SUBST(have_uring)
+  ])