diff mbox series

[05/11] block: avoid ordered task state change for polled IO

Message ID 20181113154233.15256-6-axboe@kernel.dk (mailing list archive)
State New, archived
Headers show
Series Various block optimizations | expand

Commit Message

Jens Axboe Nov. 13, 2018, 3:42 p.m. UTC
Ensure that writes to the dio/bio waiter field are ordered
correctly. With the smp_rmb() before the READ_ONCE() check,
we should be able to use a more relaxed ordering for the
task state setting.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c | 11 +++++++++--
 fs/iomap.c     |  6 +++++-
 2 files changed, 14 insertions(+), 3 deletions(-)

Comments

jianchao.wang Nov. 14, 2018, 2:29 a.m. UTC | #1
Hi Jens

On 11/13/18 11:42 PM, Jens Axboe wrote:
> @@ -181,6 +181,7 @@ static void blkdev_bio_end_io_simple(struct bio *bio)
>  	struct task_struct *waiter = bio->bi_private;
>  
>  	WRITE_ONCE(bio->bi_private, NULL);
> +	smp_wmb();
>  	wake_up_process(waiter);
>  }


The wake up path has contained a full memory barrier with the raw_spin_lock
and following smp_mb__after_spinlock

Please refer to:

wake_up_process
  -> try_to_wake_up

	raw_spin_lock_irqsave(&p->pi_lock, flags);
	smp_mb__after_spinlock();

So the smp_wmb here is not necessary.

Thanks
Jianchao
Jens Axboe Nov. 14, 2018, 2:35 a.m. UTC | #2
On 11/13/18 7:29 PM, jianchao.wang wrote:
> Hi Jens
> 
> On 11/13/18 11:42 PM, Jens Axboe wrote:
>> @@ -181,6 +181,7 @@ static void blkdev_bio_end_io_simple(struct bio *bio)
>>  	struct task_struct *waiter = bio->bi_private;
>>  
>>  	WRITE_ONCE(bio->bi_private, NULL);
>> +	smp_wmb();
>>  	wake_up_process(waiter);
>>  }
> 
> 
> The wake up path has contained a full memory barrier with the raw_spin_lock
> and following smp_mb__after_spinlock
> 
> Please refer to:
> 
> wake_up_process
>   -> try_to_wake_up
> 
> 	raw_spin_lock_irqsave(&p->pi_lock, flags);
> 	smp_mb__after_spinlock();
> 
> So the smp_wmb here is not necessary.

And I guess we don't need it for the next patch either, since the path
that doesn't take wake_up_process() is local.

I'll update them (again), thanks!
diff mbox series

Patch

diff --git a/fs/block_dev.c b/fs/block_dev.c
index c039abfb2052..2f920c03996e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -181,6 +181,7 @@  static void blkdev_bio_end_io_simple(struct bio *bio)
 	struct task_struct *waiter = bio->bi_private;
 
 	WRITE_ONCE(bio->bi_private, NULL);
+	smp_wmb();
 	wake_up_process(waiter);
 }
 
@@ -237,9 +238,12 @@  __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 
 	qc = submit_bio(&bio);
 	for (;;) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+
+		smp_rmb();
 		if (!READ_ONCE(bio.bi_private))
 			break;
+
 		if (!(iocb->ki_flags & IOCB_HIPRI) ||
 		    !blk_poll(bdev_get_queue(bdev), qc))
 			io_schedule();
@@ -305,6 +309,7 @@  static void blkdev_bio_end_io(struct bio *bio)
 			struct task_struct *waiter = dio->waiter;
 
 			WRITE_ONCE(dio->waiter, NULL);
+			smp_wmb();
 			wake_up_process(waiter);
 		}
 	}
@@ -403,7 +408,9 @@  __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 		return -EIOCBQUEUED;
 
 	for (;;) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+
+		smp_rmb();
 		if (!READ_ONCE(dio->waiter))
 			break;
 
diff --git a/fs/iomap.c b/fs/iomap.c
index f61d13dfdf09..7898927e758e 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1524,7 +1524,9 @@  static void iomap_dio_bio_end_io(struct bio *bio)
 	if (atomic_dec_and_test(&dio->ref)) {
 		if (dio->wait_for_completion) {
 			struct task_struct *waiter = dio->submit.waiter;
+
 			WRITE_ONCE(dio->submit.waiter, NULL);
+			smp_wmb();
 			wake_up_process(waiter);
 		} else if (dio->flags & IOMAP_DIO_WRITE) {
 			struct inode *inode = file_inode(dio->iocb->ki_filp);
@@ -1888,7 +1890,9 @@  iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 			return -EIOCBQUEUED;
 
 		for (;;) {
-			set_current_state(TASK_UNINTERRUPTIBLE);
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+
+			smp_rmb();
 			if (!READ_ONCE(dio->submit.waiter))
 				break;