diff mbox series

tools/io_uring: Fix memory ordering

Message ID 20190820172958.92837-1-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series tools/io_uring: Fix memory ordering | expand

Commit Message

Bart Van Assche Aug. 20, 2019, 5:29 p.m. UTC
Order head and tail stores properly against CQE / SQE memory accesses.
Use <asm/barrier.h> instead of the io_uring "barrier.h" header file.

Cc: Roman Penyaev <rpenyaev@suse.de>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 tools/io_uring/Makefile         |  2 +-
 tools/io_uring/barrier.h        | 16 ---------------
 tools/io_uring/io_uring-bench.c | 20 +++++++++---------
 tools/io_uring/liburing.h       |  9 ++++-----
 tools/io_uring/queue.c          | 36 +++++++++++----------------------
 5 files changed, 26 insertions(+), 57 deletions(-)
 delete mode 100644 tools/io_uring/barrier.h

Comments

Stefan Hajnoczi Aug. 22, 2019, 1:14 p.m. UTC | #1
On Tue, Aug 20, 2019 at 10:29:58AM -0700, Bart Van Assche wrote:
> Order head and tail stores properly against CQE / SQE memory accesses.
> Use <asm/barrier.h> instead of the io_uring "barrier.h" header file.

Where does this header file come from?

Linux has an asm-generic/barrier.h file which is not uapi and therefore
not installed in /usr/include.

I couldn't find an asm/barrier.h in the Debian packages collection
either.

> 
> Cc: Roman Penyaev <rpenyaev@suse.de>
> Cc: Stefan Hajnoczi <stefanha@redhat.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  tools/io_uring/Makefile         |  2 +-
>  tools/io_uring/barrier.h        | 16 ---------------
>  tools/io_uring/io_uring-bench.c | 20 +++++++++---------
>  tools/io_uring/liburing.h       |  9 ++++-----
>  tools/io_uring/queue.c          | 36 +++++++++++----------------------
>  5 files changed, 26 insertions(+), 57 deletions(-)
>  delete mode 100644 tools/io_uring/barrier.h
> 
> diff --git a/tools/io_uring/Makefile b/tools/io_uring/Makefile
> index 00f146c54c53..bbec91c6274c 100644
> --- a/tools/io_uring/Makefile
> +++ b/tools/io_uring/Makefile
> @@ -1,6 +1,6 @@
>  # SPDX-License-Identifier: GPL-2.0
>  # Makefile for io_uring test tools
> -CFLAGS += -Wall -Wextra -g -D_GNU_SOURCE
> +CFLAGS += -Wall -Wextra -g -D_GNU_SOURCE -I../include
>  LDLIBS += -lpthread
>  
>  all: io_uring-cp io_uring-bench
> diff --git a/tools/io_uring/barrier.h b/tools/io_uring/barrier.h
> deleted file mode 100644
> index ef00f6722ba9..000000000000
> --- a/tools/io_uring/barrier.h
> +++ /dev/null
> @@ -1,16 +0,0 @@
> -#ifndef LIBURING_BARRIER_H
> -#define LIBURING_BARRIER_H
> -
> -#if defined(__x86_64) || defined(__i386__)
> -#define read_barrier()	__asm__ __volatile__("":::"memory")
> -#define write_barrier()	__asm__ __volatile__("":::"memory")
> -#else
> -/*
> - * Add arch appropriate definitions. Be safe and use full barriers for
> - * archs we don't have support for.
> - */
> -#define read_barrier()	__sync_synchronize()
> -#define write_barrier()	__sync_synchronize()
> -#endif
> -
> -#endif
> diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c
> index 0f257139b003..29971a2a1c74 100644
> --- a/tools/io_uring/io_uring-bench.c
> +++ b/tools/io_uring/io_uring-bench.c
> @@ -28,9 +28,9 @@
>  #include <string.h>
>  #include <pthread.h>
>  #include <sched.h>
> +#include <asm/barrier.h>
>  
>  #include "liburing.h"
> -#include "barrier.h"
>  
>  #define min(a, b)		((a < b) ? (a) : (b))
>  
> @@ -199,8 +199,7 @@ static int prep_more_ios(struct submitter *s, unsigned max_ios)
>  	next_tail = tail = *ring->tail;
>  	do {
>  		next_tail++;
> -		read_barrier();
> -		if (next_tail == *ring->head)
> +		if (next_tail == smp_load_acquire(ring->head))
>  			break;
>  
>  		index = tail & sq_ring_mask;
> @@ -211,10 +210,11 @@ static int prep_more_ios(struct submitter *s, unsigned max_ios)
>  	} while (prepped < max_ios);
>  
>  	if (*ring->tail != tail) {
> -		/* order tail store with writes to sqes above */
> -		write_barrier();
> -		*ring->tail = tail;
> -		write_barrier();
> +		/*
> +		 * Ensure that the kernel sees the SQE updates before it sees
> +		 * the tail update.
> +		 */
> +		smp_store_release(ring->tail, tail);
>  	}
>  	return prepped;
>  }
> @@ -251,8 +251,7 @@ static int reap_events(struct submitter *s)
>  	do {
>  		struct file *f;
>  
> -		read_barrier();
> -		if (head == *ring->tail)
> +		if (head == smp_load_acquire(ring->tail))
>  			break;
>  		cqe = &ring->cqes[head & cq_ring_mask];
>  		if (!do_nop) {
> @@ -270,8 +269,7 @@ static int reap_events(struct submitter *s)
>  	} while (1);
>  
>  	s->inflight -= reaped;
> -	*ring->head = head;
> -	write_barrier();
> +	smp_store_release(ring->head, head);
>  	return reaped;
>  }
>  
> diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h
> index 5f305c86b892..15b29bfac811 100644
> --- a/tools/io_uring/liburing.h
> +++ b/tools/io_uring/liburing.h
> @@ -10,7 +10,7 @@ extern "C" {
>  #include <string.h>
>  #include "../../include/uapi/linux/io_uring.h"
>  #include <inttypes.h>
> -#include "barrier.h"
> +#include <asm/barrier.h>
>  
>  /*
>   * Library interface to io_uring
> @@ -82,12 +82,11 @@ static inline void io_uring_cqe_seen(struct io_uring *ring,
>  	if (cqe) {
>  		struct io_uring_cq *cq = &ring->cq;
>  
> -		(*cq->khead)++;
>  		/*
> -		 * Ensure that the kernel sees our new head, the kernel has
> -		 * the matching read barrier.
> +		 * Ensure that the kernel only sees the new value of the head
> +		 * index after the CQEs have been read.
>  		 */
> -		write_barrier();
> +		smp_store_release(cq->khead, *cq->khead + 1);
>  	}
>  }
>  
> diff --git a/tools/io_uring/queue.c b/tools/io_uring/queue.c
> index 321819c132c7..ada05bc74361 100644
> --- a/tools/io_uring/queue.c
> +++ b/tools/io_uring/queue.c
> @@ -4,9 +4,9 @@
>  #include <unistd.h>
>  #include <errno.h>
>  #include <string.h>
> +#include <asm/barrier.h>
>  
>  #include "liburing.h"
> -#include "barrier.h"
>  
>  static int __io_uring_get_cqe(struct io_uring *ring,
>  			      struct io_uring_cqe **cqe_ptr, int wait)
> @@ -20,14 +20,12 @@ static int __io_uring_get_cqe(struct io_uring *ring,
>  	head = *cq->khead;
>  	do {
>  		/*
> -		 * It's necessary to use a read_barrier() before reading
> -		 * the CQ tail, since the kernel updates it locklessly. The
> -		 * kernel has the matching store barrier for the update. The
> -		 * kernel also ensures that previous stores to CQEs are ordered
> +		 * It's necessary to use smp_load_acquire() to read the CQ
> +		 * tail. The kernel uses smp_store_release() for updating the
> +		 * CQ tail to ensure that previous stores to CQEs are ordered
>  		 * with the tail update.
>  		 */
> -		read_barrier();
> -		if (head != *cq->ktail) {
> +		if (head != smp_load_acquire(cq->ktail)) {
>  			*cqe_ptr = &cq->cqes[head & mask];
>  			break;
>  		}
> @@ -73,12 +71,11 @@ int io_uring_submit(struct io_uring *ring)
>  	int ret;
>  
>  	/*
> -	 * If we have pending IO in the kring, submit it first. We need a
> -	 * read barrier here to match the kernels store barrier when updating
> -	 * the SQ head.
> +	 * If we have pending IO in the kring, submit it first. We need
> +	 * smp_load_acquire() here to match the kernels smp_store_release()
> +	 * when updating the SQ head.
>  	 */
> -	read_barrier();
> -	if (*sq->khead != *sq->ktail) {
> +	if (smp_load_acquire(sq->khead) != *sq->ktail) {
>  		submitted = *sq->kring_entries;
>  		goto submit;
>  	}
> @@ -94,7 +91,6 @@ int io_uring_submit(struct io_uring *ring)
>  	to_submit = sq->sqe_tail - sq->sqe_head;
>  	while (to_submit--) {
>  		ktail_next++;
> -		read_barrier();
>  
>  		sq->array[ktail & mask] = sq->sqe_head & mask;
>  		ktail = ktail_next;
> @@ -108,18 +104,10 @@ int io_uring_submit(struct io_uring *ring)
>  
>  	if (*sq->ktail != ktail) {
>  		/*
> -		 * First write barrier ensures that the SQE stores are updated
> -		 * with the tail update. This is needed so that the kernel
> -		 * will never see a tail update without the preceeding sQE
> -		 * stores being done.
> +		 * Use smp_store_release() so that the kernel will never see a
> +		 * tail update without the preceding sQE stores being done.
>  		 */
> -		write_barrier();
> -		*sq->ktail = ktail;
> -		/*
> -		 * The kernel has the matching read barrier for reading the
> -		 * SQ tail.
> -		 */
> -		write_barrier();
> +		smp_store_release(sq->ktail, ktail);
>  	}
>  
>  submit:
> -- 
> 2.23.0.rc1.153.gdeed80330f-goog
>
Jens Axboe Aug. 22, 2019, 1:17 p.m. UTC | #2
On 8/20/19 11:29 AM, Bart Van Assche wrote:
> Order head and tail stores properly against CQE / SQE memory accesses.
> Use <asm/barrier.h> instead of the io_uring "barrier.h" header file.

Trying to think of how we can best keeps things sane in this area going
forward. As you know, the tools/io_uring/ stuff is basically just copies
of either liburing files, exception being the io_uring-bench.c which is
a copy of the t/io_uring.c from fio.

We could move t/io_uring.c to liburing instead as a sample file as well.
That is a bit odd, since t/io_uring doesn't use the liburing API at all.
I could change that to just use the setup etc helpers, but still use
the raw interface. I think that's a valid use case, for other programs.
If we do that, then we're down to just syncing with liburing, and we can
take some steps to make that easier to do.

Open to suggestions...
Bart Van Assche Aug. 22, 2019, 2:54 p.m. UTC | #3
On 8/22/19 6:14 AM, Stefan Hajnoczi wrote:
> On Tue, Aug 20, 2019 at 10:29:58AM -0700, Bart Van Assche wrote:
>> Order head and tail stores properly against CQE / SQE memory accesses.
>> Use <asm/barrier.h> instead of the io_uring "barrier.h" header file.
> 
> Where does this header file come from?
> 
> Linux has an asm-generic/barrier.h file which is not uapi and therefore
> not installed in /usr/include.
> 
> I couldn't find an asm/barrier.h in the Debian packages collection
> either.

There two flavours of the asm/barrier.h header file present in the 
kernel tree. I think that the arch/*/include/asm/barrier.h header files 
are intended for kernel code and that the tools/include/asm/barrier.h 
header file is intended for the user space code in the tools directory.

Bart.
Stefan Hajnoczi Aug. 23, 2019, 10:28 a.m. UTC | #4
On Thu, Aug 22, 2019 at 07:54:08AM -0700, Bart Van Assche wrote:
> On 8/22/19 6:14 AM, Stefan Hajnoczi wrote:
> > On Tue, Aug 20, 2019 at 10:29:58AM -0700, Bart Van Assche wrote:
> > > Order head and tail stores properly against CQE / SQE memory accesses.
> > > Use <asm/barrier.h> instead of the io_uring "barrier.h" header file.
> > 
> > Where does this header file come from?
> > 
> > Linux has an asm-generic/barrier.h file which is not uapi and therefore
> > not installed in /usr/include.
> > 
> > I couldn't find an asm/barrier.h in the Debian packages collection
> > either.
> 
> There two flavours of the asm/barrier.h header file present in the kernel
> tree. I think that the arch/*/include/asm/barrier.h header files are
> intended for kernel code and that the tools/include/asm/barrier.h header
> file is intended for the user space code in the tools directory.

Sorry, my mistake.  I thought this was a liburing.git patch :P.

Stefan
diff mbox series

Patch

diff --git a/tools/io_uring/Makefile b/tools/io_uring/Makefile
index 00f146c54c53..bbec91c6274c 100644
--- a/tools/io_uring/Makefile
+++ b/tools/io_uring/Makefile
@@ -1,6 +1,6 @@ 
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for io_uring test tools
-CFLAGS += -Wall -Wextra -g -D_GNU_SOURCE
+CFLAGS += -Wall -Wextra -g -D_GNU_SOURCE -I../include
 LDLIBS += -lpthread
 
 all: io_uring-cp io_uring-bench
diff --git a/tools/io_uring/barrier.h b/tools/io_uring/barrier.h
deleted file mode 100644
index ef00f6722ba9..000000000000
--- a/tools/io_uring/barrier.h
+++ /dev/null
@@ -1,16 +0,0 @@ 
-#ifndef LIBURING_BARRIER_H
-#define LIBURING_BARRIER_H
-
-#if defined(__x86_64) || defined(__i386__)
-#define read_barrier()	__asm__ __volatile__("":::"memory")
-#define write_barrier()	__asm__ __volatile__("":::"memory")
-#else
-/*
- * Add arch appropriate definitions. Be safe and use full barriers for
- * archs we don't have support for.
- */
-#define read_barrier()	__sync_synchronize()
-#define write_barrier()	__sync_synchronize()
-#endif
-
-#endif
diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c
index 0f257139b003..29971a2a1c74 100644
--- a/tools/io_uring/io_uring-bench.c
+++ b/tools/io_uring/io_uring-bench.c
@@ -28,9 +28,9 @@ 
 #include <string.h>
 #include <pthread.h>
 #include <sched.h>
+#include <asm/barrier.h>
 
 #include "liburing.h"
-#include "barrier.h"
 
 #define min(a, b)		((a < b) ? (a) : (b))
 
@@ -199,8 +199,7 @@  static int prep_more_ios(struct submitter *s, unsigned max_ios)
 	next_tail = tail = *ring->tail;
 	do {
 		next_tail++;
-		read_barrier();
-		if (next_tail == *ring->head)
+		if (next_tail == smp_load_acquire(ring->head))
 			break;
 
 		index = tail & sq_ring_mask;
@@ -211,10 +210,11 @@  static int prep_more_ios(struct submitter *s, unsigned max_ios)
 	} while (prepped < max_ios);
 
 	if (*ring->tail != tail) {
-		/* order tail store with writes to sqes above */
-		write_barrier();
-		*ring->tail = tail;
-		write_barrier();
+		/*
+		 * Ensure that the kernel sees the SQE updates before it sees
+		 * the tail update.
+		 */
+		smp_store_release(ring->tail, tail);
 	}
 	return prepped;
 }
@@ -251,8 +251,7 @@  static int reap_events(struct submitter *s)
 	do {
 		struct file *f;
 
-		read_barrier();
-		if (head == *ring->tail)
+		if (head == smp_load_acquire(ring->tail))
 			break;
 		cqe = &ring->cqes[head & cq_ring_mask];
 		if (!do_nop) {
@@ -270,8 +269,7 @@  static int reap_events(struct submitter *s)
 	} while (1);
 
 	s->inflight -= reaped;
-	*ring->head = head;
-	write_barrier();
+	smp_store_release(ring->head, head);
 	return reaped;
 }
 
diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h
index 5f305c86b892..15b29bfac811 100644
--- a/tools/io_uring/liburing.h
+++ b/tools/io_uring/liburing.h
@@ -10,7 +10,7 @@  extern "C" {
 #include <string.h>
 #include "../../include/uapi/linux/io_uring.h"
 #include <inttypes.h>
-#include "barrier.h"
+#include <asm/barrier.h>
 
 /*
  * Library interface to io_uring
@@ -82,12 +82,11 @@  static inline void io_uring_cqe_seen(struct io_uring *ring,
 	if (cqe) {
 		struct io_uring_cq *cq = &ring->cq;
 
-		(*cq->khead)++;
 		/*
-		 * Ensure that the kernel sees our new head, the kernel has
-		 * the matching read barrier.
+		 * Ensure that the kernel only sees the new value of the head
+		 * index after the CQEs have been read.
 		 */
-		write_barrier();
+		smp_store_release(cq->khead, *cq->khead + 1);
 	}
 }
 
diff --git a/tools/io_uring/queue.c b/tools/io_uring/queue.c
index 321819c132c7..ada05bc74361 100644
--- a/tools/io_uring/queue.c
+++ b/tools/io_uring/queue.c
@@ -4,9 +4,9 @@ 
 #include <unistd.h>
 #include <errno.h>
 #include <string.h>
+#include <asm/barrier.h>
 
 #include "liburing.h"
-#include "barrier.h"
 
 static int __io_uring_get_cqe(struct io_uring *ring,
 			      struct io_uring_cqe **cqe_ptr, int wait)
@@ -20,14 +20,12 @@  static int __io_uring_get_cqe(struct io_uring *ring,
 	head = *cq->khead;
 	do {
 		/*
-		 * It's necessary to use a read_barrier() before reading
-		 * the CQ tail, since the kernel updates it locklessly. The
-		 * kernel has the matching store barrier for the update. The
-		 * kernel also ensures that previous stores to CQEs are ordered
+		 * It's necessary to use smp_load_acquire() to read the CQ
+		 * tail. The kernel uses smp_store_release() for updating the
+		 * CQ tail to ensure that previous stores to CQEs are ordered
 		 * with the tail update.
 		 */
-		read_barrier();
-		if (head != *cq->ktail) {
+		if (head != smp_load_acquire(cq->ktail)) {
 			*cqe_ptr = &cq->cqes[head & mask];
 			break;
 		}
@@ -73,12 +71,11 @@  int io_uring_submit(struct io_uring *ring)
 	int ret;
 
 	/*
-	 * If we have pending IO in the kring, submit it first. We need a
-	 * read barrier here to match the kernels store barrier when updating
-	 * the SQ head.
+	 * If we have pending IO in the kring, submit it first. We need
+	 * smp_load_acquire() here to match the kernels smp_store_release()
+	 * when updating the SQ head.
 	 */
-	read_barrier();
-	if (*sq->khead != *sq->ktail) {
+	if (smp_load_acquire(sq->khead) != *sq->ktail) {
 		submitted = *sq->kring_entries;
 		goto submit;
 	}
@@ -94,7 +91,6 @@  int io_uring_submit(struct io_uring *ring)
 	to_submit = sq->sqe_tail - sq->sqe_head;
 	while (to_submit--) {
 		ktail_next++;
-		read_barrier();
 
 		sq->array[ktail & mask] = sq->sqe_head & mask;
 		ktail = ktail_next;
@@ -108,18 +104,10 @@  int io_uring_submit(struct io_uring *ring)
 
 	if (*sq->ktail != ktail) {
 		/*
-		 * First write barrier ensures that the SQE stores are updated
-		 * with the tail update. This is needed so that the kernel
-		 * will never see a tail update without the preceeding sQE
-		 * stores being done.
+		 * Use smp_store_release() so that the kernel will never see a
+		 * tail update without the preceding sQE stores being done.
 		 */
-		write_barrier();
-		*sq->ktail = ktail;
-		/*
-		 * The kernel has the matching read barrier for reading the
-		 * SQ tail.
-		 */
-		write_barrier();
+		smp_store_release(sq->ktail, ktail);
 	}
 
 submit: