diff mbox series

[04/11] block: avoid ordered task state change for polled IO

Message ID 20181115195135.22812-5-axboe@kernel.dk (mailing list archive)
State New, archived
Headers show
Series Various block optimizations | expand

Commit Message

Jens Axboe Nov. 15, 2018, 7:51 p.m. UTC
Ensure that writes to the dio/bio waiter field are ordered
correctly. With the smp_rmb() before the READ_ONCE() check,
we should be able to use a more relaxed ordering for the
task state setting. We don't need a heavier barrier on
the wakeup side after writing the waiter field, since we
either going to be in the task we care about, or go through
wake_up_process() which implies a strong enough barrier.

For the core poll helper, the task state setting don't need
to imply any atomics, as it's the current task itself that
is being modified and we're not going to sleep.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 4 ++--
 fs/block_dev.c | 9 +++++++--
 fs/iomap.c     | 4 +++-
 mm/page_io.c   | 4 +++-
 4 files changed, 15 insertions(+), 6 deletions(-)

Comments

Christoph Hellwig Nov. 16, 2018, 8:41 a.m. UTC | #1
On Thu, Nov 15, 2018 at 12:51:28PM -0700, Jens Axboe wrote:
> Ensure that writes to the dio/bio waiter field are ordered
> correctly. With the smp_rmb() before the READ_ONCE() check,
> we should be able to use a more relaxed ordering for the
> task state setting. We don't need a heavier barrier on
> the wakeup side after writing the waiter field, since we
> either going to be in the task we care about, or go through
> wake_up_process() which implies a strong enough barrier.
> 
> For the core poll helper, the task state setting don't need
> to imply any atomics, as it's the current task itself that
> is being modified and we're not going to sleep.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  block/blk-mq.c | 4 ++--
>  fs/block_dev.c | 9 +++++++--
>  fs/iomap.c     | 4 +++-
>  mm/page_io.c   | 4 +++-
>  4 files changed, 15 insertions(+), 6 deletions(-)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 32b246ed44c0..7fc4abb4cc36 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -3331,12 +3331,12 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
>  		ret = q->mq_ops->poll(hctx, rq->tag);
>  		if (ret > 0) {
>  			hctx->poll_success++;
> -			set_current_state(TASK_RUNNING);
> +			__set_current_state(TASK_RUNNING);
>  			return true;
>  		}
>  
>  		if (signal_pending_state(state, current))
> -			set_current_state(TASK_RUNNING);
> +			__set_current_state(TASK_RUNNING);
>  
>  		if (current->state == TASK_RUNNING)
>  			return true;
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index c039abfb2052..5b754f84c814 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -237,9 +237,12 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
>  
>  	qc = submit_bio(&bio);
>  	for (;;) {
> -		set_current_state(TASK_UNINTERRUPTIBLE);
> +		__set_current_state(TASK_UNINTERRUPTIBLE);
> +
> +		smp_rmb();
>  		if (!READ_ONCE(bio.bi_private))
>  			break;
> +
>  		if (!(iocb->ki_flags & IOCB_HIPRI) ||
>  		    !blk_poll(bdev_get_queue(bdev), qc))
>  			io_schedule();
> @@ -403,7 +406,9 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
>  		return -EIOCBQUEUED;
>  
>  	for (;;) {
> -		set_current_state(TASK_UNINTERRUPTIBLE);
> +		__set_current_state(TASK_UNINTERRUPTIBLE);
> +
> +		smp_rmb();
>  		if (!READ_ONCE(dio->waiter))
>  			break;
>  
> diff --git a/fs/iomap.c b/fs/iomap.c
> index f61d13dfdf09..3373ea4984d9 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -1888,7 +1888,9 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  			return -EIOCBQUEUED;
>  
>  		for (;;) {
> -			set_current_state(TASK_UNINTERRUPTIBLE);
> +			__set_current_state(TASK_UNINTERRUPTIBLE);
> +
> +			smp_rmb();
>  			if (!READ_ONCE(dio->submit.waiter))
>  				break;
>  
> diff --git a/mm/page_io.c b/mm/page_io.c
> index d4d1c89bcddd..008f6d00c47c 100644
> --- a/mm/page_io.c
> +++ b/mm/page_io.c
> @@ -405,7 +405,9 @@ int swap_readpage(struct page *page, bool synchronous)
>  	bio_get(bio);
>  	qc = submit_bio(bio);
>  	while (synchronous) {
> -		set_current_state(TASK_UNINTERRUPTIBLE);
> +		__set_current_state(TASK_UNINTERRUPTIBLE);
> +
> +		smp_rmb();
>  		if (!READ_ONCE(bio->bi_private))

I think any smp_rmb() should have a big fact comment explaining it.

Also to help stupid people like me that dont understand why we even
need it here  given the READ_ONCE below.
Jens Axboe Nov. 16, 2018, 3:32 p.m. UTC | #2
On 11/16/18 1:41 AM, Christoph Hellwig wrote:
> On Thu, Nov 15, 2018 at 12:51:28PM -0700, Jens Axboe wrote:
>> Ensure that writes to the dio/bio waiter field are ordered
>> correctly. With the smp_rmb() before the READ_ONCE() check,
>> we should be able to use a more relaxed ordering for the
>> task state setting. We don't need a heavier barrier on
>> the wakeup side after writing the waiter field, since we
>> either going to be in the task we care about, or go through
>> wake_up_process() which implies a strong enough barrier.
>>
>> For the core poll helper, the task state setting don't need
>> to imply any atomics, as it's the current task itself that
>> is being modified and we're not going to sleep.
>>
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>> ---
>>  block/blk-mq.c | 4 ++--
>>  fs/block_dev.c | 9 +++++++--
>>  fs/iomap.c     | 4 +++-
>>  mm/page_io.c   | 4 +++-
>>  4 files changed, 15 insertions(+), 6 deletions(-)
>>
>> diff --git a/block/blk-mq.c b/block/blk-mq.c
>> index 32b246ed44c0..7fc4abb4cc36 100644
>> --- a/block/blk-mq.c
>> +++ b/block/blk-mq.c
>> @@ -3331,12 +3331,12 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
>>  		ret = q->mq_ops->poll(hctx, rq->tag);
>>  		if (ret > 0) {
>>  			hctx->poll_success++;
>> -			set_current_state(TASK_RUNNING);
>> +			__set_current_state(TASK_RUNNING);
>>  			return true;
>>  		}
>>  
>>  		if (signal_pending_state(state, current))
>> -			set_current_state(TASK_RUNNING);
>> +			__set_current_state(TASK_RUNNING);
>>  
>>  		if (current->state == TASK_RUNNING)
>>  			return true;
>> diff --git a/fs/block_dev.c b/fs/block_dev.c
>> index c039abfb2052..5b754f84c814 100644
>> --- a/fs/block_dev.c
>> +++ b/fs/block_dev.c
>> @@ -237,9 +237,12 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
>>  
>>  	qc = submit_bio(&bio);
>>  	for (;;) {
>> -		set_current_state(TASK_UNINTERRUPTIBLE);
>> +		__set_current_state(TASK_UNINTERRUPTIBLE);
>> +
>> +		smp_rmb();
>>  		if (!READ_ONCE(bio.bi_private))
>>  			break;
>> +
>>  		if (!(iocb->ki_flags & IOCB_HIPRI) ||
>>  		    !blk_poll(bdev_get_queue(bdev), qc))
>>  			io_schedule();
>> @@ -403,7 +406,9 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
>>  		return -EIOCBQUEUED;
>>  
>>  	for (;;) {
>> -		set_current_state(TASK_UNINTERRUPTIBLE);
>> +		__set_current_state(TASK_UNINTERRUPTIBLE);
>> +
>> +		smp_rmb();
>>  		if (!READ_ONCE(dio->waiter))
>>  			break;
>>  
>> diff --git a/fs/iomap.c b/fs/iomap.c
>> index f61d13dfdf09..3373ea4984d9 100644
>> --- a/fs/iomap.c
>> +++ b/fs/iomap.c
>> @@ -1888,7 +1888,9 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>>  			return -EIOCBQUEUED;
>>  
>>  		for (;;) {
>> -			set_current_state(TASK_UNINTERRUPTIBLE);
>> +			__set_current_state(TASK_UNINTERRUPTIBLE);
>> +
>> +			smp_rmb();
>>  			if (!READ_ONCE(dio->submit.waiter))
>>  				break;
>>  
>> diff --git a/mm/page_io.c b/mm/page_io.c
>> index d4d1c89bcddd..008f6d00c47c 100644
>> --- a/mm/page_io.c
>> +++ b/mm/page_io.c
>> @@ -405,7 +405,9 @@ int swap_readpage(struct page *page, bool synchronous)
>>  	bio_get(bio);
>>  	qc = submit_bio(bio);
>>  	while (synchronous) {
>> -		set_current_state(TASK_UNINTERRUPTIBLE);
>> +		__set_current_state(TASK_UNINTERRUPTIBLE);
>> +
>> +		smp_rmb();
>>  		if (!READ_ONCE(bio->bi_private))
> 
> I think any smp_rmb() should have a big fact comment explaining it.
> 
> Also to help stupid people like me that dont understand why we even
> need it here  given the READ_ONCE below.

Thinking about it, I don't think we need it at all. The barrier for
the task check is done on the wakeup side, the READ_ONCE() should
be enough for the read below. I'll update it.
diff mbox series

Patch

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 32b246ed44c0..7fc4abb4cc36 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3331,12 +3331,12 @@  static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
 		ret = q->mq_ops->poll(hctx, rq->tag);
 		if (ret > 0) {
 			hctx->poll_success++;
-			set_current_state(TASK_RUNNING);
+			__set_current_state(TASK_RUNNING);
 			return true;
 		}
 
 		if (signal_pending_state(state, current))
-			set_current_state(TASK_RUNNING);
+			__set_current_state(TASK_RUNNING);
 
 		if (current->state == TASK_RUNNING)
 			return true;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c039abfb2052..5b754f84c814 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -237,9 +237,12 @@  __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 
 	qc = submit_bio(&bio);
 	for (;;) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+
+		smp_rmb();
 		if (!READ_ONCE(bio.bi_private))
 			break;
+
 		if (!(iocb->ki_flags & IOCB_HIPRI) ||
 		    !blk_poll(bdev_get_queue(bdev), qc))
 			io_schedule();
@@ -403,7 +406,9 @@  __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 		return -EIOCBQUEUED;
 
 	for (;;) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+
+		smp_rmb();
 		if (!READ_ONCE(dio->waiter))
 			break;
 
diff --git a/fs/iomap.c b/fs/iomap.c
index f61d13dfdf09..3373ea4984d9 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1888,7 +1888,9 @@  iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 			return -EIOCBQUEUED;
 
 		for (;;) {
-			set_current_state(TASK_UNINTERRUPTIBLE);
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+
+			smp_rmb();
 			if (!READ_ONCE(dio->submit.waiter))
 				break;
 
diff --git a/mm/page_io.c b/mm/page_io.c
index d4d1c89bcddd..008f6d00c47c 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -405,7 +405,9 @@  int swap_readpage(struct page *page, bool synchronous)
 	bio_get(bio);
 	qc = submit_bio(bio);
 	while (synchronous) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+
+		smp_rmb();
 		if (!READ_ONCE(bio->bi_private))
 			break;