Message ID | 20230412222732.1623901-1-davidhwei@meta.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [v2] io_uring: add support for multishot timeouts | expand |
On 4/12/23 23:27, David Wei wrote: > A multishot timeout submission will repeatedly generate completions with > the IORING_CQE_F_MORE cflag set. Depending on the value of the `off' field > in the submission, these timeouts can either repeat indefinitely until > cancelled (`off' = 0) or for a fixed number of times (`off' > 0). > > Only noseq timeouts (i.e. not dependent on the number of I/O > completions) are supported. > > An indefinite timer will be cancelled with EOVERFLOW if the CQ ever > overflows. Seems mostly fine, two comments below > Signed-off-by: David Wei <davidhwei@meta.com> > --- > include/uapi/linux/io_uring.h | 1 + > io_uring/timeout.c | 59 +++++++++++++++++++++++++++++++++-- > 2 files changed, 57 insertions(+), 3 deletions(-) > > diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h > index f8d14d1c58d3..0716cb17e436 100644 > --- a/include/uapi/linux/io_uring.h > +++ b/include/uapi/linux/io_uring.h > @@ -250,6 +250,7 @@ enum io_uring_op { > #define IORING_TIMEOUT_REALTIME (1U << 3) > #define IORING_LINK_TIMEOUT_UPDATE (1U << 4) > #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) > +#define IORING_TIMEOUT_MULTISHOT (1U << 6) > #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) > #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) > /* > diff --git a/io_uring/timeout.c b/io_uring/timeout.c > index 5c6c6f720809..61b8488565ce 100644 ... > +static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts) > +{ > + struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); > + struct io_timeout_data *data = req->async_data; > + struct io_ring_ctx *ctx = req->ctx; > + > + if (!io_timeout_finish(timeout, data)) { > + bool filled; > + filled = io_aux_cqe(ctx, false, req->cqe.user_data, -ETIME, > + IORING_CQE_F_MORE, false); > + if (filled) { > + /* re-arm timer */ > + spin_lock_irq(&ctx->timeout_lock); > + list_add(&timeout->list, ctx->timeout_list.prev); > + data->timer.function = io_timeout_fn; > + hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); > + spin_unlock_irq(&ctx->timeout_lock); > + return; > + } > + io_req_set_res(req, -EOVERFLOW, 0); Let's not change the return value. It's considered a normal completion and we don't change the code for them. And there is IORING_CQE_F_MORE for userspace to figure out that it has been terminated. > + } > + > + io_req_task_complete(req, ts); > +} > + > static bool io_kill_timeout(struct io_kiocb *req, int status) > __must_hold(&req->ctx->timeout_lock) > { > @@ -212,7 +253,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) > req_set_fail(req); > > io_req_set_res(req, -ETIME, 0); > - req->io_task_work.func = io_req_task_complete; > + req->io_task_work.func = io_timeout_complete; > io_req_task_work_add(req); > return HRTIMER_NORESTART; > } > @@ -470,16 +511,28 @@ static int __io_timeout_prep(struct io_kiocb *req, > return -EINVAL; > flags = READ_ONCE(sqe->timeout_flags); > if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK | > - IORING_TIMEOUT_ETIME_SUCCESS)) > + IORING_TIMEOUT_ETIME_SUCCESS | > + IORING_TIMEOUT_MULTISHOT)) { > return -EINVAL; > + } Please, don't add braces, they're not needed here. > /* more than one clock specified is invalid, obviously */ > if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) > return -EINVAL; > + /* multishot requests only make sense with rel values */ > + if (!(~flags & (IORING_TIMEOUT_MULTISHOT | IORING_TIMEOUT_ABS))) > + return -EINVAL; > > INIT_LIST_HEAD(&timeout->list); > timeout->off = off; > if (unlikely(off && !req->ctx->off_timeout_used)) > req->ctx->off_timeout_used = true; > + /* > + * for multishot reqs w/ fixed nr of repeats, target_seq tracks the > + * remaining nr > + */ > + timeout->repeats = 0; > + if ((flags & IORING_TIMEOUT_MULTISHOT) && off > 0) > + timeout->repeats = off; > > if (WARN_ON_ONCE(req_has_async_data(req))) > return -EFAULT;
On 14/04/2023 07:10, Pavel Begunkov wrote: > > > On 4/12/23 23:27, David Wei wrote: >> A multishot timeout submission will repeatedly generate completions with >> the IORING_CQE_F_MORE cflag set. Depending on the value of the `off' field >> in the submission, these timeouts can either repeat indefinitely until >> cancelled (`off' = 0) or for a fixed number of times (`off' > 0). >> >> Only noseq timeouts (i.e. not dependent on the number of I/O >> completions) are supported. >> >> An indefinite timer will be cancelled with EOVERFLOW if the CQ ever >> overflows. > > Seems mostly fine, two comments below > > >> Signed-off-by: David Wei <davidhwei@meta.com> >> --- >> include/uapi/linux/io_uring.h | 1 + >> io_uring/timeout.c | 59 +++++++++++++++++++++++++++++++++-- >> 2 files changed, 57 insertions(+), 3 deletions(-) >> >> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h >> index f8d14d1c58d3..0716cb17e436 100644 >> --- a/include/uapi/linux/io_uring.h >> +++ b/include/uapi/linux/io_uring.h >> @@ -250,6 +250,7 @@ enum io_uring_op { >> #define IORING_TIMEOUT_REALTIME (1U << 3) >> #define IORING_LINK_TIMEOUT_UPDATE (1U << 4) >> #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) >> +#define IORING_TIMEOUT_MULTISHOT (1U << 6) >> #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) >> #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) >> /* >> diff --git a/io_uring/timeout.c b/io_uring/timeout.c >> index 5c6c6f720809..61b8488565ce 100644 > ... >> +static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts) >> +{ >> + struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); >> + struct io_timeout_data *data = req->async_data; >> + struct io_ring_ctx *ctx = req->ctx; >> + >> + if (!io_timeout_finish(timeout, data)) { >> + bool filled; >> + filled = io_aux_cqe(ctx, false, req->cqe.user_data, -ETIME, >> + IORING_CQE_F_MORE, false); >> + if (filled) { >> + /* re-arm timer */ >> + spin_lock_irq(&ctx->timeout_lock); >> + list_add(&timeout->list, ctx->timeout_list.prev); >> + data->timer.function = io_timeout_fn; >> + hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); >> + spin_unlock_irq(&ctx->timeout_lock); >> + return; >> + } >> + io_req_set_res(req, -EOVERFLOW, 0); > > Let's not change the return value. It's considered a normal completion > and we don't change the code for them. And there is IORING_CQE_F_MORE > for userspace to figure out that it has been terminated. Makes sense. I'll keep the return value as-is. > > >> + } >> + >> + io_req_task_complete(req, ts); >> +} >> + >> static bool io_kill_timeout(struct io_kiocb *req, int status) >> __must_hold(&req->ctx->timeout_lock) >> { >> @@ -212,7 +253,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) >> req_set_fail(req); >> io_req_set_res(req, -ETIME, 0); >> - req->io_task_work.func = io_req_task_complete; >> + req->io_task_work.func = io_timeout_complete; >> io_req_task_work_add(req); >> return HRTIMER_NORESTART; >> } >> @@ -470,16 +511,28 @@ static int __io_timeout_prep(struct io_kiocb *req, >> return -EINVAL; >> flags = READ_ONCE(sqe->timeout_flags); >> if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK | >> - IORING_TIMEOUT_ETIME_SUCCESS)) >> + IORING_TIMEOUT_ETIME_SUCCESS | >> + IORING_TIMEOUT_MULTISHOT)) { >> return -EINVAL; >> + } > > Please, don't add braces, they're not needed here. Thanks, will remove. > >> /* more than one clock specified is invalid, obviously */ >> if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) >> return -EINVAL; >> + /* multishot requests only make sense with rel values */ >> + if (!(~flags & (IORING_TIMEOUT_MULTISHOT | IORING_TIMEOUT_ABS))) >> + return -EINVAL; >> INIT_LIST_HEAD(&timeout->list); >> timeout->off = off; >> if (unlikely(off && !req->ctx->off_timeout_used)) >> req->ctx->off_timeout_used = true; >> + /* >> + * for multishot reqs w/ fixed nr of repeats, target_seq tracks the >> + * remaining nr >> + */ >> + timeout->repeats = 0; >> + if ((flags & IORING_TIMEOUT_MULTISHOT) && off > 0) >> + timeout->repeats = off; >> if (WARN_ON_ONCE(req_has_async_data(req))) >> return -EFAULT; >
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index f8d14d1c58d3..0716cb17e436 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -250,6 +250,7 @@ enum io_uring_op { #define IORING_TIMEOUT_REALTIME (1U << 3) #define IORING_LINK_TIMEOUT_UPDATE (1U << 4) #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) +#define IORING_TIMEOUT_MULTISHOT (1U << 6) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) /* diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 5c6c6f720809..61b8488565ce 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -17,6 +17,7 @@ struct io_timeout { struct file *file; u32 off; u32 target_seq; + u32 repeats; struct list_head list; /* head of the link, used by linked timeouts only */ struct io_kiocb *head; @@ -37,8 +38,9 @@ struct io_timeout_rem { static inline bool io_is_timeout_noseq(struct io_kiocb *req) { struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); + struct io_timeout_data *data = req->async_data; - return !timeout->off; + return !timeout->off || data->flags & IORING_TIMEOUT_MULTISHOT; } static inline void io_put_req(struct io_kiocb *req) @@ -49,6 +51,45 @@ static inline void io_put_req(struct io_kiocb *req) } } +static inline bool io_timeout_finish(struct io_timeout *timeout, + struct io_timeout_data *data) +{ + if (!(data->flags & IORING_TIMEOUT_MULTISHOT)) + return true; + + if (!timeout->off || (timeout->repeats && --timeout->repeats)) + return false; + + return true; +} + +static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer); + +static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts) +{ + struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); + struct io_timeout_data *data = req->async_data; + struct io_ring_ctx *ctx = req->ctx; + + if (!io_timeout_finish(timeout, data)) { + bool filled; + filled = io_aux_cqe(ctx, false, req->cqe.user_data, -ETIME, + IORING_CQE_F_MORE, false); + if (filled) { + /* re-arm timer */ + spin_lock_irq(&ctx->timeout_lock); + list_add(&timeout->list, ctx->timeout_list.prev); + data->timer.function = io_timeout_fn; + hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); + spin_unlock_irq(&ctx->timeout_lock); + return; + } + io_req_set_res(req, -EOVERFLOW, 0); + } + + io_req_task_complete(req, ts); +} + static bool io_kill_timeout(struct io_kiocb *req, int status) __must_hold(&req->ctx->timeout_lock) { @@ -212,7 +253,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) req_set_fail(req); io_req_set_res(req, -ETIME, 0); - req->io_task_work.func = io_req_task_complete; + req->io_task_work.func = io_timeout_complete; io_req_task_work_add(req); return HRTIMER_NORESTART; } @@ -470,16 +511,28 @@ static int __io_timeout_prep(struct io_kiocb *req, return -EINVAL; flags = READ_ONCE(sqe->timeout_flags); if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK | - IORING_TIMEOUT_ETIME_SUCCESS)) + IORING_TIMEOUT_ETIME_SUCCESS | + IORING_TIMEOUT_MULTISHOT)) { return -EINVAL; + } /* more than one clock specified is invalid, obviously */ if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) return -EINVAL; + /* multishot requests only make sense with rel values */ + if (!(~flags & (IORING_TIMEOUT_MULTISHOT | IORING_TIMEOUT_ABS))) + return -EINVAL; INIT_LIST_HEAD(&timeout->list); timeout->off = off; if (unlikely(off && !req->ctx->off_timeout_used)) req->ctx->off_timeout_used = true; + /* + * for multishot reqs w/ fixed nr of repeats, target_seq tracks the + * remaining nr + */ + timeout->repeats = 0; + if ((flags & IORING_TIMEOUT_MULTISHOT) && off > 0) + timeout->repeats = off; if (WARN_ON_ONCE(req_has_async_data(req))) return -EFAULT;
A multishot timeout submission will repeatedly generate completions with the IORING_CQE_F_MORE cflag set. Depending on the value of the `off' field in the submission, these timeouts can either repeat indefinitely until cancelled (`off' = 0) or for a fixed number of times (`off' > 0). Only noseq timeouts (i.e. not dependent on the number of I/O completions) are supported. An indefinite timer will be cancelled with EOVERFLOW if the CQ ever overflows. Signed-off-by: David Wei <davidhwei@meta.com> --- include/uapi/linux/io_uring.h | 1 + io_uring/timeout.c | 59 +++++++++++++++++++++++++++++++++-- 2 files changed, 57 insertions(+), 3 deletions(-)