Message ID | 20181115195135.22812-5-axboe@kernel.dk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Various block optimizations | expand |
On Thu, Nov 15, 2018 at 12:51:28PM -0700, Jens Axboe wrote: > Ensure that writes to the dio/bio waiter field are ordered > correctly. With the smp_rmb() before the READ_ONCE() check, > we should be able to use a more relaxed ordering for the > task state setting. We don't need a heavier barrier on > the wakeup side after writing the waiter field, since we > either going to be in the task we care about, or go through > wake_up_process() which implies a strong enough barrier. > > For the core poll helper, the task state setting don't need > to imply any atomics, as it's the current task itself that > is being modified and we're not going to sleep. > > Signed-off-by: Jens Axboe <axboe@kernel.dk> > --- > block/blk-mq.c | 4 ++-- > fs/block_dev.c | 9 +++++++-- > fs/iomap.c | 4 +++- > mm/page_io.c | 4 +++- > 4 files changed, 15 insertions(+), 6 deletions(-) > > diff --git a/block/blk-mq.c b/block/blk-mq.c > index 32b246ed44c0..7fc4abb4cc36 100644 > --- a/block/blk-mq.c > +++ b/block/blk-mq.c > @@ -3331,12 +3331,12 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) > ret = q->mq_ops->poll(hctx, rq->tag); > if (ret > 0) { > hctx->poll_success++; > - set_current_state(TASK_RUNNING); > + __set_current_state(TASK_RUNNING); > return true; > } > > if (signal_pending_state(state, current)) > - set_current_state(TASK_RUNNING); > + __set_current_state(TASK_RUNNING); > > if (current->state == TASK_RUNNING) > return true; > diff --git a/fs/block_dev.c b/fs/block_dev.c > index c039abfb2052..5b754f84c814 100644 > --- a/fs/block_dev.c > +++ b/fs/block_dev.c > @@ -237,9 +237,12 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, > > qc = submit_bio(&bio); > for (;;) { > - set_current_state(TASK_UNINTERRUPTIBLE); > + __set_current_state(TASK_UNINTERRUPTIBLE); > + > + smp_rmb(); > if (!READ_ONCE(bio.bi_private)) > break; > + > if (!(iocb->ki_flags & IOCB_HIPRI) || > !blk_poll(bdev_get_queue(bdev), qc)) > io_schedule(); > @@ -403,7 +406,9 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) > return -EIOCBQUEUED; > > for (;;) { > - set_current_state(TASK_UNINTERRUPTIBLE); > + __set_current_state(TASK_UNINTERRUPTIBLE); > + > + smp_rmb(); > if (!READ_ONCE(dio->waiter)) > break; > > diff --git a/fs/iomap.c b/fs/iomap.c > index f61d13dfdf09..3373ea4984d9 100644 > --- a/fs/iomap.c > +++ b/fs/iomap.c > @@ -1888,7 +1888,9 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, > return -EIOCBQUEUED; > > for (;;) { > - set_current_state(TASK_UNINTERRUPTIBLE); > + __set_current_state(TASK_UNINTERRUPTIBLE); > + > + smp_rmb(); > if (!READ_ONCE(dio->submit.waiter)) > break; > > diff --git a/mm/page_io.c b/mm/page_io.c > index d4d1c89bcddd..008f6d00c47c 100644 > --- a/mm/page_io.c > +++ b/mm/page_io.c > @@ -405,7 +405,9 @@ int swap_readpage(struct page *page, bool synchronous) > bio_get(bio); > qc = submit_bio(bio); > while (synchronous) { > - set_current_state(TASK_UNINTERRUPTIBLE); > + __set_current_state(TASK_UNINTERRUPTIBLE); > + > + smp_rmb(); > if (!READ_ONCE(bio->bi_private)) I think any smp_rmb() should have a big fact comment explaining it. Also to help stupid people like me that dont understand why we even need it here given the READ_ONCE below.
On 11/16/18 1:41 AM, Christoph Hellwig wrote: > On Thu, Nov 15, 2018 at 12:51:28PM -0700, Jens Axboe wrote: >> Ensure that writes to the dio/bio waiter field are ordered >> correctly. With the smp_rmb() before the READ_ONCE() check, >> we should be able to use a more relaxed ordering for the >> task state setting. We don't need a heavier barrier on >> the wakeup side after writing the waiter field, since we >> either going to be in the task we care about, or go through >> wake_up_process() which implies a strong enough barrier. >> >> For the core poll helper, the task state setting don't need >> to imply any atomics, as it's the current task itself that >> is being modified and we're not going to sleep. >> >> Signed-off-by: Jens Axboe <axboe@kernel.dk> >> --- >> block/blk-mq.c | 4 ++-- >> fs/block_dev.c | 9 +++++++-- >> fs/iomap.c | 4 +++- >> mm/page_io.c | 4 +++- >> 4 files changed, 15 insertions(+), 6 deletions(-) >> >> diff --git a/block/blk-mq.c b/block/blk-mq.c >> index 32b246ed44c0..7fc4abb4cc36 100644 >> --- a/block/blk-mq.c >> +++ b/block/blk-mq.c >> @@ -3331,12 +3331,12 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) >> ret = q->mq_ops->poll(hctx, rq->tag); >> if (ret > 0) { >> hctx->poll_success++; >> - set_current_state(TASK_RUNNING); >> + __set_current_state(TASK_RUNNING); >> return true; >> } >> >> if (signal_pending_state(state, current)) >> - set_current_state(TASK_RUNNING); >> + __set_current_state(TASK_RUNNING); >> >> if (current->state == TASK_RUNNING) >> return true; >> diff --git a/fs/block_dev.c b/fs/block_dev.c >> index c039abfb2052..5b754f84c814 100644 >> --- a/fs/block_dev.c >> +++ b/fs/block_dev.c >> @@ -237,9 +237,12 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, >> >> qc = submit_bio(&bio); >> for (;;) { >> - set_current_state(TASK_UNINTERRUPTIBLE); >> + __set_current_state(TASK_UNINTERRUPTIBLE); >> + >> + smp_rmb(); >> if (!READ_ONCE(bio.bi_private)) >> break; >> + >> if (!(iocb->ki_flags & IOCB_HIPRI) || >> !blk_poll(bdev_get_queue(bdev), qc)) >> io_schedule(); >> @@ -403,7 +406,9 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) >> return -EIOCBQUEUED; >> >> for (;;) { >> - set_current_state(TASK_UNINTERRUPTIBLE); >> + __set_current_state(TASK_UNINTERRUPTIBLE); >> + >> + smp_rmb(); >> if (!READ_ONCE(dio->waiter)) >> break; >> >> diff --git a/fs/iomap.c b/fs/iomap.c >> index f61d13dfdf09..3373ea4984d9 100644 >> --- a/fs/iomap.c >> +++ b/fs/iomap.c >> @@ -1888,7 +1888,9 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, >> return -EIOCBQUEUED; >> >> for (;;) { >> - set_current_state(TASK_UNINTERRUPTIBLE); >> + __set_current_state(TASK_UNINTERRUPTIBLE); >> + >> + smp_rmb(); >> if (!READ_ONCE(dio->submit.waiter)) >> break; >> >> diff --git a/mm/page_io.c b/mm/page_io.c >> index d4d1c89bcddd..008f6d00c47c 100644 >> --- a/mm/page_io.c >> +++ b/mm/page_io.c >> @@ -405,7 +405,9 @@ int swap_readpage(struct page *page, bool synchronous) >> bio_get(bio); >> qc = submit_bio(bio); >> while (synchronous) { >> - set_current_state(TASK_UNINTERRUPTIBLE); >> + __set_current_state(TASK_UNINTERRUPTIBLE); >> + >> + smp_rmb(); >> if (!READ_ONCE(bio->bi_private)) > > I think any smp_rmb() should have a big fact comment explaining it. > > Also to help stupid people like me that dont understand why we even > need it here given the READ_ONCE below. Thinking about it, I don't think we need it at all. The barrier for the task check is done on the wakeup side, the READ_ONCE() should be enough for the read below. I'll update it.
diff --git a/block/blk-mq.c b/block/blk-mq.c index 32b246ed44c0..7fc4abb4cc36 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3331,12 +3331,12 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) ret = q->mq_ops->poll(hctx, rq->tag); if (ret > 0) { hctx->poll_success++; - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); return true; } if (signal_pending_state(state, current)) - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); if (current->state == TASK_RUNNING) return true; diff --git a/fs/block_dev.c b/fs/block_dev.c index c039abfb2052..5b754f84c814 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -237,9 +237,12 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, qc = submit_bio(&bio); for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(TASK_UNINTERRUPTIBLE); + + smp_rmb(); if (!READ_ONCE(bio.bi_private)) break; + if (!(iocb->ki_flags & IOCB_HIPRI) || !blk_poll(bdev_get_queue(bdev), qc)) io_schedule(); @@ -403,7 +406,9 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) return -EIOCBQUEUED; for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(TASK_UNINTERRUPTIBLE); + + smp_rmb(); if (!READ_ONCE(dio->waiter)) break; diff --git a/fs/iomap.c b/fs/iomap.c index f61d13dfdf09..3373ea4984d9 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1888,7 +1888,9 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, return -EIOCBQUEUED; for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(TASK_UNINTERRUPTIBLE); + + smp_rmb(); if (!READ_ONCE(dio->submit.waiter)) break; diff --git a/mm/page_io.c b/mm/page_io.c index d4d1c89bcddd..008f6d00c47c 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -405,7 +405,9 @@ int swap_readpage(struct page *page, bool synchronous) bio_get(bio); qc = submit_bio(bio); while (synchronous) { - set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(TASK_UNINTERRUPTIBLE); + + smp_rmb(); if (!READ_ONCE(bio->bi_private)) break;
Ensure that writes to the dio/bio waiter field are ordered correctly. With the smp_rmb() before the READ_ONCE() check, we should be able to use a more relaxed ordering for the task state setting. We don't need a heavier barrier on the wakeup side after writing the waiter field, since we either going to be in the task we care about, or go through wake_up_process() which implies a strong enough barrier. For the core poll helper, the task state setting don't need to imply any atomics, as it's the current task itself that is being modified and we're not going to sleep. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- block/blk-mq.c | 4 ++-- fs/block_dev.c | 9 +++++++-- fs/iomap.c | 4 +++- mm/page_io.c | 4 +++- 4 files changed, 15 insertions(+), 6 deletions(-)