Message ID | 20200523185755.8494-13-axboe@kernel.dk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add support for async buffered reads | expand |
On 23/05/2020 21:57, Jens Axboe wrote: > If the file is flagged with FMODE_BUF_RASYNC, then we don't have to punt > the buffered read to an io-wq worker. Instead we can rely on page > unlocking callbacks to support retry based async IO. This is a lot more > efficient than doing async thread offload. > > The retry is done similarly to how we handle poll based retry. From > the unlock callback, we simply queue the retry to a task_work based > handler. > > Signed-off-by: Jens Axboe <axboe@kernel.dk> > --- > fs/io_uring.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 99 insertions(+) > ... > + > + init_task_work(&rw->task_work, io_async_buf_retry); > + /* submit ref gets dropped, acquire a new one */ > + refcount_inc(&req->refs); > + tsk = req->task; > + ret = task_work_add(tsk, &rw->task_work, true); > + if (unlikely(ret)) { > + /* queue just for cancelation */ > + init_task_work(&rw->task_work, io_async_buf_cancel); > + tsk = io_wq_get_task(req->ctx->io_wq); IIRC, task will be put somewhere around io_free_req(). Then shouldn't here be some juggling with reassigning req->task with task_{get,put}()? > + task_work_add(tsk, &rw->task_work, true); > + } > + wake_up_process(tsk); > + return 1; > +} ... > static int io_read(struct io_kiocb *req, bool force_nonblock) > { > struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; > @@ -2601,6 +2696,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) > if (!ret) { > ssize_t ret2; > > +retry: > if (req->file->f_op->read_iter) > ret2 = call_read_iter(req->file, kiocb, &iter); > else > @@ -2619,6 +2715,9 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) > if (!(req->flags & REQ_F_NOWAIT) && > !file_can_poll(req->file)) > req->flags |= REQ_F_MUST_PUNT; > + if (io_rw_should_retry(req)) It looks like a state machine with IOCB_WAITQ and gotos. Wouldn't it be cleaner to call call_read_iter()/loop_rw_iter() here directly instead of "goto retry" ? BTW, can this async stuff return -EAGAIN ? > + goto retry; > + kiocb->ki_flags &= ~IOCB_WAITQ; > return -EAGAIN; > } > } >
On 5/25/20 1:29 AM, Pavel Begunkov wrote: > On 23/05/2020 21:57, Jens Axboe wrote: >> If the file is flagged with FMODE_BUF_RASYNC, then we don't have to punt >> the buffered read to an io-wq worker. Instead we can rely on page >> unlocking callbacks to support retry based async IO. This is a lot more >> efficient than doing async thread offload. >> >> The retry is done similarly to how we handle poll based retry. From >> the unlock callback, we simply queue the retry to a task_work based >> handler. >> >> Signed-off-by: Jens Axboe <axboe@kernel.dk> >> --- >> fs/io_uring.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 99 insertions(+) >> > ... >> + >> + init_task_work(&rw->task_work, io_async_buf_retry); >> + /* submit ref gets dropped, acquire a new one */ >> + refcount_inc(&req->refs); >> + tsk = req->task; >> + ret = task_work_add(tsk, &rw->task_work, true); >> + if (unlikely(ret)) { >> + /* queue just for cancelation */ >> + init_task_work(&rw->task_work, io_async_buf_cancel); >> + tsk = io_wq_get_task(req->ctx->io_wq); > > IIRC, task will be put somewhere around io_free_req(). Then shouldn't here be > some juggling with reassigning req->task with task_{get,put}()? Not sure I follow? Yes, we'll put this task again when the request is freed, but not sure what you mean with juggling? >> + task_work_add(tsk, &rw->task_work, true); >> + } >> + wake_up_process(tsk); >> + return 1; >> +} > ... >> static int io_read(struct io_kiocb *req, bool force_nonblock) >> { >> struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; >> @@ -2601,6 +2696,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) >> if (!ret) { >> ssize_t ret2; >> >> +retry: >> if (req->file->f_op->read_iter) >> ret2 = call_read_iter(req->file, kiocb, &iter); >> else >> @@ -2619,6 +2715,9 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) >> if (!(req->flags & REQ_F_NOWAIT) && >> !file_can_poll(req->file)) >> req->flags |= REQ_F_MUST_PUNT; >> + if (io_rw_should_retry(req)) > > It looks like a state machine with IOCB_WAITQ and gotos. Wouldn't it be cleaner > to call call_read_iter()/loop_rw_iter() here directly instead of "goto retry" ? We could, probably making that part a separate helper then. How about the below incremental? > BTW, can this async stuff return -EAGAIN ? Probably? Prefer not to make any definitive calls on that being possible or not, as it's sure to disappoint. If it does and IOCB_WAITQ is already set, then we'll punt to a thread like before. diff --git a/fs/io_uring.c b/fs/io_uring.c index a5a4d9602915..669dccd81207 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2677,6 +2677,13 @@ static bool io_rw_should_retry(struct io_kiocb *req) return false; } +static int __io_read(struct io_kiocb *req, struct iov_iter *iter) +{ + if (req->file->f_op->read_iter) + return call_read_iter(req->file, &req->rw.kiocb, iter); + return loop_rw_iter(READ, req->file, &req->rw.kiocb, iter); +} + static int io_read(struct io_kiocb *req, bool force_nonblock) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; @@ -2710,11 +2717,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) if (!ret) { ssize_t ret2; -retry: - if (req->file->f_op->read_iter) - ret2 = call_read_iter(req->file, kiocb, &iter); - else - ret2 = loop_rw_iter(READ, req->file, kiocb, &iter); + ret2 = __io_read(req, &iter); /* Catch -EAGAIN return for forced non-blocking submission */ if (!force_nonblock || ret2 != -EAGAIN) { @@ -2729,8 +2732,11 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) if (!(req->flags & REQ_F_NOWAIT) && !file_can_poll(req->file)) req->flags |= REQ_F_MUST_PUNT; - if (io_rw_should_retry(req)) - goto retry; + if (io_rw_should_retry(req)) { + ret2 = __io_read(req, &iter); + if (ret2 != -EAGAIN) + goto out_free; + } kiocb->ki_flags &= ~IOCB_WAITQ; return -EAGAIN; }
On 23/05/2020 21:57, Jens Axboe wrote: > If the file is flagged with FMODE_BUF_RASYNC, then we don't have to punt > the buffered read to an io-wq worker. Instead we can rely on page > unlocking callbacks to support retry based async IO. This is a lot more > efficient than doing async thread offload. > > The retry is done similarly to how we handle poll based retry. From > the unlock callback, we simply queue the retry to a task_work based > handler. > > Signed-off-by: Jens Axboe <axboe@kernel.dk> > --- > fs/io_uring.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 99 insertions(+) > > diff --git a/fs/io_uring.c b/fs/io_uring.c > index e95481c552ff..dd532d2634c2 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -498,6 +498,8 @@ struct io_async_rw { > struct iovec *iov; > ssize_t nr_segs; > ssize_t size; > + struct wait_page_queue wpq; > + struct callback_head task_work; > }; > > struct io_async_ctx { > @@ -2568,6 +2570,99 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, > return 0; > } > > +static void io_async_buf_cancel(struct callback_head *cb) > +{ > + struct io_async_rw *rw; > + struct io_ring_ctx *ctx; > + struct io_kiocb *req; > + > + rw = container_of(cb, struct io_async_rw, task_work); > + req = rw->wpq.wait.private; > + ctx = req->ctx; > + > + spin_lock_irq(&ctx->completion_lock); > + io_cqring_fill_event(req, -ECANCELED); It seems like it should go through kiocb_done()/io_complete_rw_common(). My concern is missing io_put_kbuf(). > + io_commit_cqring(ctx); > + spin_unlock_irq(&ctx->completion_lock); > + > + io_cqring_ev_posted(ctx); > + req_set_fail_links(req); > + io_double_put_req(req); > +}
On 25/05/2020 22:59, Jens Axboe wrote: > On 5/25/20 1:29 AM, Pavel Begunkov wrote: >> On 23/05/2020 21:57, Jens Axboe wrote: >>> If the file is flagged with FMODE_BUF_RASYNC, then we don't have to punt >>> the buffered read to an io-wq worker. Instead we can rely on page >>> unlocking callbacks to support retry based async IO. This is a lot more >>> efficient than doing async thread offload. >>> >>> The retry is done similarly to how we handle poll based retry. From >>> the unlock callback, we simply queue the retry to a task_work based >>> handler. >>> >>> Signed-off-by: Jens Axboe <axboe@kernel.dk> >>> --- >>> fs/io_uring.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++ >>> 1 file changed, 99 insertions(+) >>> >> ... >>> + >>> + init_task_work(&rw->task_work, io_async_buf_retry); >>> + /* submit ref gets dropped, acquire a new one */ >>> + refcount_inc(&req->refs); >>> + tsk = req->task; >>> + ret = task_work_add(tsk, &rw->task_work, true); >>> + if (unlikely(ret)) { >>> + /* queue just for cancelation */ >>> + init_task_work(&rw->task_work, io_async_buf_cancel); >>> + tsk = io_wq_get_task(req->ctx->io_wq); >> >> IIRC, task will be put somewhere around io_free_req(). Then shouldn't here be >> some juggling with reassigning req->task with task_{get,put}()? > > Not sure I follow? Yes, we'll put this task again when the request > is freed, but not sure what you mean with juggling? I meant something like: ... /* queue just for cancelation */ init_task_work(&rw->task_work, io_async_buf_cancel); + put_task_struct(req->task); + req->task = get_task_struct(io_wq_task); but, thinking twice, if I got the whole idea right, it should be ok as is -- io-wq won't go away before the request anyway, and leaving req->task pinned down for a bit is not a problem. >>> + task_work_add(tsk, &rw->task_work, true); >>> + } >>> + wake_up_process(tsk); >>> + return 1; >>> +} >> ... >>> static int io_read(struct io_kiocb *req, bool force_nonblock) >>> { >>> struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; >>> @@ -2601,6 +2696,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) >>> if (!ret) { >>> ssize_t ret2; >>> >>> +retry: >>> if (req->file->f_op->read_iter) >>> ret2 = call_read_iter(req->file, kiocb, &iter); >>> else >>> @@ -2619,6 +2715,9 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) >>> if (!(req->flags & REQ_F_NOWAIT) && >>> !file_can_poll(req->file)) >>> req->flags |= REQ_F_MUST_PUNT; >>> + if (io_rw_should_retry(req)) >> >> It looks like a state machine with IOCB_WAITQ and gotos. Wouldn't it be cleaner >> to call call_read_iter()/loop_rw_iter() here directly instead of "goto retry" ? > > We could, probably making that part a separate helper then. How about the > below incremental? IMHO, it was easy to get lost with such implicit state switching. Looks better now! See a small comment below. > >> BTW, can this async stuff return -EAGAIN ? > > Probably? Prefer not to make any definitive calls on that being possible or > not, as it's sure to disappoint. If it does and IOCB_WAITQ is already set, > then we'll punt to a thread like before. Sounds reasonable > > > diff --git a/fs/io_uring.c b/fs/io_uring.c > index a5a4d9602915..669dccd81207 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -2677,6 +2677,13 @@ static bool io_rw_should_retry(struct io_kiocb *req) > return false; > } > > +static int __io_read(struct io_kiocb *req, struct iov_iter *iter) > +{ > + if (req->file->f_op->read_iter) > + return call_read_iter(req->file, &req->rw.kiocb, iter); > + return loop_rw_iter(READ, req->file, &req->rw.kiocb, iter); > +} > + > static int io_read(struct io_kiocb *req, bool force_nonblock) > { > struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; > @@ -2710,11 +2717,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) > if (!ret) { > ssize_t ret2; > > -retry: > - if (req->file->f_op->read_iter) > - ret2 = call_read_iter(req->file, kiocb, &iter); > - else > - ret2 = loop_rw_iter(READ, req->file, kiocb, &iter); > + ret2 = __io_read(req, &iter); > > /* Catch -EAGAIN return for forced non-blocking submission */ > if (!force_nonblock || ret2 != -EAGAIN) { > @@ -2729,8 +2732,11 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) > if (!(req->flags & REQ_F_NOWAIT) && > !file_can_poll(req->file)) > req->flags |= REQ_F_MUST_PUNT; > - if (io_rw_should_retry(req)) > - goto retry; > + if (io_rw_should_retry(req)) { > + ret2 = __io_read(req, &iter); > + if (ret2 != -EAGAIN) > + goto out_free; "goto out_free" returns ret=0, so someone should add a cqe if (ret2 != -EAGAIN) { kiocb_done(kiocb, ret2); goto free_out; } > + } > kiocb->ki_flags &= ~IOCB_WAITQ; > return -EAGAIN; > } >
On 5/26/20 1:38 AM, Pavel Begunkov wrote: > On 23/05/2020 21:57, Jens Axboe wrote: >> If the file is flagged with FMODE_BUF_RASYNC, then we don't have to punt >> the buffered read to an io-wq worker. Instead we can rely on page >> unlocking callbacks to support retry based async IO. This is a lot more >> efficient than doing async thread offload. >> >> The retry is done similarly to how we handle poll based retry. From >> the unlock callback, we simply queue the retry to a task_work based >> handler. >> >> Signed-off-by: Jens Axboe <axboe@kernel.dk> >> --- >> fs/io_uring.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 99 insertions(+) >> >> diff --git a/fs/io_uring.c b/fs/io_uring.c >> index e95481c552ff..dd532d2634c2 100644 >> --- a/fs/io_uring.c >> +++ b/fs/io_uring.c >> @@ -498,6 +498,8 @@ struct io_async_rw { >> struct iovec *iov; >> ssize_t nr_segs; >> ssize_t size; >> + struct wait_page_queue wpq; >> + struct callback_head task_work; >> }; >> >> struct io_async_ctx { >> @@ -2568,6 +2570,99 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, >> return 0; >> } >> >> +static void io_async_buf_cancel(struct callback_head *cb) >> +{ >> + struct io_async_rw *rw; >> + struct io_ring_ctx *ctx; >> + struct io_kiocb *req; >> + >> + rw = container_of(cb, struct io_async_rw, task_work); >> + req = rw->wpq.wait.private; >> + ctx = req->ctx; >> + >> + spin_lock_irq(&ctx->completion_lock); >> + io_cqring_fill_event(req, -ECANCELED); > > It seems like it should go through kiocb_done()/io_complete_rw_common(). > My concern is missing io_put_kbuf(). Yeah, I noticed that too after sending it out. If you look at the current one that I updated yesterday, it does add that (and also renames the iter read helper): https://git.kernel.dk/cgit/linux-block/commit/?h=async-buffered.5&id=6f4e3a4066d0db3e3478e58cc250afb16d8d4d91
On 5/26/20 1:44 AM, Pavel Begunkov wrote: > On 25/05/2020 22:59, Jens Axboe wrote: >> On 5/25/20 1:29 AM, Pavel Begunkov wrote: >>> On 23/05/2020 21:57, Jens Axboe wrote: >>>> If the file is flagged with FMODE_BUF_RASYNC, then we don't have to punt >>>> the buffered read to an io-wq worker. Instead we can rely on page >>>> unlocking callbacks to support retry based async IO. This is a lot more >>>> efficient than doing async thread offload. >>>> >>>> The retry is done similarly to how we handle poll based retry. From >>>> the unlock callback, we simply queue the retry to a task_work based >>>> handler. >>>> >>>> Signed-off-by: Jens Axboe <axboe@kernel.dk> >>>> --- >>>> fs/io_uring.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++ >>>> 1 file changed, 99 insertions(+) >>>> >>> ... >>>> + >>>> + init_task_work(&rw->task_work, io_async_buf_retry); >>>> + /* submit ref gets dropped, acquire a new one */ >>>> + refcount_inc(&req->refs); >>>> + tsk = req->task; >>>> + ret = task_work_add(tsk, &rw->task_work, true); >>>> + if (unlikely(ret)) { >>>> + /* queue just for cancelation */ >>>> + init_task_work(&rw->task_work, io_async_buf_cancel); >>>> + tsk = io_wq_get_task(req->ctx->io_wq); >>> >>> IIRC, task will be put somewhere around io_free_req(). Then shouldn't here be >>> some juggling with reassigning req->task with task_{get,put}()? >> >> Not sure I follow? Yes, we'll put this task again when the request >> is freed, but not sure what you mean with juggling? > > I meant something like: > > ... > /* queue just for cancelation */ > init_task_work(&rw->task_work, io_async_buf_cancel); > + put_task_struct(req->task); > + req->task = get_task_struct(io_wq_task); > > > but, thinking twice, if I got the whole idea right, it should be ok as > is -- io-wq won't go away before the request anyway, and leaving > req->task pinned down for a bit is not a problem. OK good, then I thin kwe agree it's fine. >>>> + task_work_add(tsk, &rw->task_work, true); >>>> + } >>>> + wake_up_process(tsk); >>>> + return 1; >>>> +} >>> ... >>>> static int io_read(struct io_kiocb *req, bool force_nonblock) >>>> { >>>> struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; >>>> @@ -2601,6 +2696,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) >>>> if (!ret) { >>>> ssize_t ret2; >>>> >>>> +retry: >>>> if (req->file->f_op->read_iter) >>>> ret2 = call_read_iter(req->file, kiocb, &iter); >>>> else >>>> @@ -2619,6 +2715,9 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) >>>> if (!(req->flags & REQ_F_NOWAIT) && >>>> !file_can_poll(req->file)) >>>> req->flags |= REQ_F_MUST_PUNT; >>>> + if (io_rw_should_retry(req)) >>> >>> It looks like a state machine with IOCB_WAITQ and gotos. Wouldn't it be cleaner >>> to call call_read_iter()/loop_rw_iter() here directly instead of "goto retry" ? >> >> We could, probably making that part a separate helper then. How about the >> below incremental? > > IMHO, it was easy to get lost with such implicit state switching. > Looks better now! See a small comment below. Agree, that is cleaner. >> diff --git a/fs/io_uring.c b/fs/io_uring.c >> index a5a4d9602915..669dccd81207 100644 >> --- a/fs/io_uring.c >> +++ b/fs/io_uring.c >> @@ -2677,6 +2677,13 @@ static bool io_rw_should_retry(struct io_kiocb *req) >> return false; >> } >> >> +static int __io_read(struct io_kiocb *req, struct iov_iter *iter) >> +{ >> + if (req->file->f_op->read_iter) >> + return call_read_iter(req->file, &req->rw.kiocb, iter); >> + return loop_rw_iter(READ, req->file, &req->rw.kiocb, iter); >> +} >> + >> static int io_read(struct io_kiocb *req, bool force_nonblock) >> { >> struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; >> @@ -2710,11 +2717,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) >> if (!ret) { >> ssize_t ret2; >> >> -retry: >> - if (req->file->f_op->read_iter) >> - ret2 = call_read_iter(req->file, kiocb, &iter); >> - else >> - ret2 = loop_rw_iter(READ, req->file, kiocb, &iter); >> + ret2 = __io_read(req, &iter); >> >> /* Catch -EAGAIN return for forced non-blocking submission */ >> if (!force_nonblock || ret2 != -EAGAIN) { >> @@ -2729,8 +2732,11 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) >> if (!(req->flags & REQ_F_NOWAIT) && >> !file_can_poll(req->file)) >> req->flags |= REQ_F_MUST_PUNT; >> - if (io_rw_should_retry(req)) >> - goto retry; >> + if (io_rw_should_retry(req)) { >> + ret2 = __io_read(req, &iter); >> + if (ret2 != -EAGAIN) >> + goto out_free; > > "goto out_free" returns ret=0, so someone should add a cqe > > if (ret2 != -EAGAIN) { > kiocb_done(kiocb, ret2); > goto free_out; > } Fixed up in the current one.
diff --git a/fs/io_uring.c b/fs/io_uring.c index e95481c552ff..dd532d2634c2 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -498,6 +498,8 @@ struct io_async_rw { struct iovec *iov; ssize_t nr_segs; ssize_t size; + struct wait_page_queue wpq; + struct callback_head task_work; }; struct io_async_ctx { @@ -2568,6 +2570,99 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, return 0; } +static void io_async_buf_cancel(struct callback_head *cb) +{ + struct io_async_rw *rw; + struct io_ring_ctx *ctx; + struct io_kiocb *req; + + rw = container_of(cb, struct io_async_rw, task_work); + req = rw->wpq.wait.private; + ctx = req->ctx; + + spin_lock_irq(&ctx->completion_lock); + io_cqring_fill_event(req, -ECANCELED); + io_commit_cqring(ctx); + spin_unlock_irq(&ctx->completion_lock); + + io_cqring_ev_posted(ctx); + req_set_fail_links(req); + io_double_put_req(req); +} + +static void io_async_buf_retry(struct callback_head *cb) +{ + struct io_async_rw *rw; + struct io_ring_ctx *ctx; + struct io_kiocb *req; + + rw = container_of(cb, struct io_async_rw, task_work); + req = rw->wpq.wait.private; + ctx = req->ctx; + + __set_current_state(TASK_RUNNING); + mutex_lock(&ctx->uring_lock); + __io_queue_sqe(req, NULL); + mutex_unlock(&ctx->uring_lock); +} + +static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, + int sync, void *arg) +{ + struct wait_page_queue *wpq; + struct io_kiocb *req = wait->private; + struct io_async_rw *rw = &req->io->rw; + struct wait_page_key *key = arg; + struct task_struct *tsk; + int ret; + + wpq = container_of(wait, struct wait_page_queue, wait); + + ret = wake_page_match(wpq, key); + if (ret != 1) + return ret; + + list_del_init(&wait->entry); + + init_task_work(&rw->task_work, io_async_buf_retry); + /* submit ref gets dropped, acquire a new one */ + refcount_inc(&req->refs); + tsk = req->task; + ret = task_work_add(tsk, &rw->task_work, true); + if (unlikely(ret)) { + /* queue just for cancelation */ + init_task_work(&rw->task_work, io_async_buf_cancel); + tsk = io_wq_get_task(req->ctx->io_wq); + task_work_add(tsk, &rw->task_work, true); + } + wake_up_process(tsk); + return 1; +} + +static bool io_rw_should_retry(struct io_kiocb *req) +{ + struct kiocb *kiocb = &req->rw.kiocb; + int ret; + + /* already tried, or we're doing O_DIRECT */ + if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_WAITQ)) + return false; + /* + * just use poll if we can, and don't attempt if the fs doesn't + * support callback based unlocks + */ + if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC)) + return false; + + ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq, + io_async_buf_func, req); + if (ret) + return false; + get_task_struct(current); + req->task = current; + return true; +} + static int io_read(struct io_kiocb *req, bool force_nonblock) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; @@ -2601,6 +2696,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) if (!ret) { ssize_t ret2; +retry: if (req->file->f_op->read_iter) ret2 = call_read_iter(req->file, kiocb, &iter); else @@ -2619,6 +2715,9 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) if (!(req->flags & REQ_F_NOWAIT) && !file_can_poll(req->file)) req->flags |= REQ_F_MUST_PUNT; + if (io_rw_should_retry(req)) + goto retry; + kiocb->ki_flags &= ~IOCB_WAITQ; return -EAGAIN; } }
If the file is flagged with FMODE_BUF_RASYNC, then we don't have to punt the buffered read to an io-wq worker. Instead we can rely on page unlocking callbacks to support retry based async IO. This is a lot more efficient than doing async thread offload. The retry is done similarly to how we handle poll based retry. From the unlock callback, we simply queue the retry to a task_work based handler. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- fs/io_uring.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+)