diff mbox series

[v2] io_uring: add support for multishot timeouts

Message ID 20230412222732.1623901-1-davidhwei@meta.com (mailing list archive)
State New
Headers show
Series [v2] io_uring: add support for multishot timeouts | expand

Commit Message

David Wei April 12, 2023, 10:27 p.m. UTC
A multishot timeout submission will repeatedly generate completions with
the IORING_CQE_F_MORE cflag set. Depending on the value of the `off' field
in the submission, these timeouts can either repeat indefinitely until
cancelled (`off' = 0) or for a fixed number of times (`off' > 0).

Only noseq timeouts (i.e. not dependent on the number of I/O
completions) are supported.

An indefinite timer will be cancelled with EOVERFLOW if the CQ ever
overflows.

Signed-off-by: David Wei <davidhwei@meta.com>
---
 include/uapi/linux/io_uring.h |  1 +
 io_uring/timeout.c            | 59 +++++++++++++++++++++++++++++++++--
 2 files changed, 57 insertions(+), 3 deletions(-)

Comments

Pavel Begunkov April 14, 2023, 2:10 p.m. UTC | #1
On 4/12/23 23:27, David Wei wrote:
> A multishot timeout submission will repeatedly generate completions with
> the IORING_CQE_F_MORE cflag set. Depending on the value of the `off' field
> in the submission, these timeouts can either repeat indefinitely until
> cancelled (`off' = 0) or for a fixed number of times (`off' > 0).
> 
> Only noseq timeouts (i.e. not dependent on the number of I/O
> completions) are supported.
> 
> An indefinite timer will be cancelled with EOVERFLOW if the CQ ever
> overflows.

Seems mostly fine, two comments below


> Signed-off-by: David Wei <davidhwei@meta.com>
> ---
>   include/uapi/linux/io_uring.h |  1 +
>   io_uring/timeout.c            | 59 +++++++++++++++++++++++++++++++++--
>   2 files changed, 57 insertions(+), 3 deletions(-)
> 
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index f8d14d1c58d3..0716cb17e436 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -250,6 +250,7 @@ enum io_uring_op {
>   #define IORING_TIMEOUT_REALTIME		(1U << 3)
>   #define IORING_LINK_TIMEOUT_UPDATE	(1U << 4)
>   #define IORING_TIMEOUT_ETIME_SUCCESS	(1U << 5)
> +#define IORING_TIMEOUT_MULTISHOT	(1U << 6)
>   #define IORING_TIMEOUT_CLOCK_MASK	(IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
>   #define IORING_TIMEOUT_UPDATE_MASK	(IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
>   /*
> diff --git a/io_uring/timeout.c b/io_uring/timeout.c
> index 5c6c6f720809..61b8488565ce 100644
...
> +static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
> +{
> +	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
> +	struct io_timeout_data *data = req->async_data;
> +	struct io_ring_ctx *ctx = req->ctx;
> +
> +	if (!io_timeout_finish(timeout, data)) {
> +		bool filled;
> +		filled = io_aux_cqe(ctx, false, req->cqe.user_data, -ETIME,
> +				      IORING_CQE_F_MORE, false);
> +		if (filled) {
> +			/* re-arm timer */
> +			spin_lock_irq(&ctx->timeout_lock);
> +			list_add(&timeout->list, ctx->timeout_list.prev);
> +			data->timer.function = io_timeout_fn;
> +			hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
> +			spin_unlock_irq(&ctx->timeout_lock);
> +			return;
> +		}
> +		io_req_set_res(req, -EOVERFLOW, 0);

Let's not change the return value. It's considered a normal completion
and we don't change the code for them. And there is IORING_CQE_F_MORE
for userspace to figure out that it has been terminated.


> +	}
> +
> +	io_req_task_complete(req, ts);
> +}
> +
>   static bool io_kill_timeout(struct io_kiocb *req, int status)
>   	__must_hold(&req->ctx->timeout_lock)
>   {
> @@ -212,7 +253,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
>   		req_set_fail(req);
>   
>   	io_req_set_res(req, -ETIME, 0);
> -	req->io_task_work.func = io_req_task_complete;
> +	req->io_task_work.func = io_timeout_complete;
>   	io_req_task_work_add(req);
>   	return HRTIMER_NORESTART;
>   }
> @@ -470,16 +511,28 @@ static int __io_timeout_prep(struct io_kiocb *req,
>   		return -EINVAL;
>   	flags = READ_ONCE(sqe->timeout_flags);
>   	if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK |
> -		      IORING_TIMEOUT_ETIME_SUCCESS))
> +		      IORING_TIMEOUT_ETIME_SUCCESS |
> +		      IORING_TIMEOUT_MULTISHOT)) {
>   		return -EINVAL;
> +	}

Please, don't add braces, they're not needed here.

>   	/* more than one clock specified is invalid, obviously */
>   	if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
>   		return -EINVAL;
> +	/* multishot requests only make sense with rel values */
> +	if (!(~flags & (IORING_TIMEOUT_MULTISHOT | IORING_TIMEOUT_ABS)))
> +		return -EINVAL;
>   
>   	INIT_LIST_HEAD(&timeout->list);
>   	timeout->off = off;
>   	if (unlikely(off && !req->ctx->off_timeout_used))
>   		req->ctx->off_timeout_used = true;
> +	/*
> +	 * for multishot reqs w/ fixed nr of repeats, target_seq tracks the
> +	 * remaining nr
> +	 */
> +	timeout->repeats = 0;
> +	if ((flags & IORING_TIMEOUT_MULTISHOT) && off > 0)
> +		timeout->repeats = off;
>   
>   	if (WARN_ON_ONCE(req_has_async_data(req)))
>   		return -EFAULT;
David Wei April 14, 2023, 5:17 p.m. UTC | #2
On 14/04/2023 07:10, Pavel Begunkov wrote:
> > 
> On 4/12/23 23:27, David Wei wrote:
>> A multishot timeout submission will repeatedly generate completions with
>> the IORING_CQE_F_MORE cflag set. Depending on the value of the `off' field
>> in the submission, these timeouts can either repeat indefinitely until
>> cancelled (`off' = 0) or for a fixed number of times (`off' > 0).
>>
>> Only noseq timeouts (i.e. not dependent on the number of I/O
>> completions) are supported.
>>
>> An indefinite timer will be cancelled with EOVERFLOW if the CQ ever
>> overflows.
> 
> Seems mostly fine, two comments below
> 
> 
>> Signed-off-by: David Wei <davidhwei@meta.com>
>> ---
>>   include/uapi/linux/io_uring.h |  1 +
>>   io_uring/timeout.c            | 59 +++++++++++++++++++++++++++++++++--
>>   2 files changed, 57 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>> index f8d14d1c58d3..0716cb17e436 100644
>> --- a/include/uapi/linux/io_uring.h
>> +++ b/include/uapi/linux/io_uring.h
>> @@ -250,6 +250,7 @@ enum io_uring_op {
>>   #define IORING_TIMEOUT_REALTIME        (1U << 3)
>>   #define IORING_LINK_TIMEOUT_UPDATE    (1U << 4)
>>   #define IORING_TIMEOUT_ETIME_SUCCESS    (1U << 5)
>> +#define IORING_TIMEOUT_MULTISHOT    (1U << 6)
>>   #define IORING_TIMEOUT_CLOCK_MASK    (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
>>   #define IORING_TIMEOUT_UPDATE_MASK    (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
>>   /*
>> diff --git a/io_uring/timeout.c b/io_uring/timeout.c
>> index 5c6c6f720809..61b8488565ce 100644
> ...
>> +static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
>> +{
>> +    struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
>> +    struct io_timeout_data *data = req->async_data;
>> +    struct io_ring_ctx *ctx = req->ctx;
>> +
>> +    if (!io_timeout_finish(timeout, data)) {
>> +        bool filled;
>> +        filled = io_aux_cqe(ctx, false, req->cqe.user_data, -ETIME,
>> +                      IORING_CQE_F_MORE, false);
>> +        if (filled) {
>> +            /* re-arm timer */
>> +            spin_lock_irq(&ctx->timeout_lock);
>> +            list_add(&timeout->list, ctx->timeout_list.prev);
>> +            data->timer.function = io_timeout_fn;
>> +            hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
>> +            spin_unlock_irq(&ctx->timeout_lock);
>> +            return;
>> +        }
>> +        io_req_set_res(req, -EOVERFLOW, 0);
> 
> Let's not change the return value. It's considered a normal completion
> and we don't change the code for them. And there is IORING_CQE_F_MORE
> for userspace to figure out that it has been terminated.

Makes sense. I'll keep the return value as-is.

> 
> 
>> +    }
>> +
>> +    io_req_task_complete(req, ts);
>> +}
>> +
>>   static bool io_kill_timeout(struct io_kiocb *req, int status)
>>       __must_hold(&req->ctx->timeout_lock)
>>   {
>> @@ -212,7 +253,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
>>           req_set_fail(req);
>>         io_req_set_res(req, -ETIME, 0);
>> -    req->io_task_work.func = io_req_task_complete;
>> +    req->io_task_work.func = io_timeout_complete;
>>       io_req_task_work_add(req);
>>       return HRTIMER_NORESTART;
>>   }
>> @@ -470,16 +511,28 @@ static int __io_timeout_prep(struct io_kiocb *req,
>>           return -EINVAL;
>>       flags = READ_ONCE(sqe->timeout_flags);
>>       if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK |
>> -              IORING_TIMEOUT_ETIME_SUCCESS))
>> +              IORING_TIMEOUT_ETIME_SUCCESS |
>> +              IORING_TIMEOUT_MULTISHOT)) {
>>           return -EINVAL;
>> +    }
> 
> Please, don't add braces, they're not needed here.

Thanks, will remove.

> 
>>       /* more than one clock specified is invalid, obviously */
>>       if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
>>           return -EINVAL;
>> +    /* multishot requests only make sense with rel values */
>> +    if (!(~flags & (IORING_TIMEOUT_MULTISHOT | IORING_TIMEOUT_ABS)))
>> +        return -EINVAL;
>>         INIT_LIST_HEAD(&timeout->list);
>>       timeout->off = off;
>>       if (unlikely(off && !req->ctx->off_timeout_used))
>>           req->ctx->off_timeout_used = true;
>> +    /*
>> +     * for multishot reqs w/ fixed nr of repeats, target_seq tracks the
>> +     * remaining nr
>> +     */
>> +    timeout->repeats = 0;
>> +    if ((flags & IORING_TIMEOUT_MULTISHOT) && off > 0)
>> +        timeout->repeats = off;
>>         if (WARN_ON_ONCE(req_has_async_data(req)))
>>           return -EFAULT;
>
diff mbox series

Patch

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index f8d14d1c58d3..0716cb17e436 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -250,6 +250,7 @@  enum io_uring_op {
 #define IORING_TIMEOUT_REALTIME		(1U << 3)
 #define IORING_LINK_TIMEOUT_UPDATE	(1U << 4)
 #define IORING_TIMEOUT_ETIME_SUCCESS	(1U << 5)
+#define IORING_TIMEOUT_MULTISHOT	(1U << 6)
 #define IORING_TIMEOUT_CLOCK_MASK	(IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
 #define IORING_TIMEOUT_UPDATE_MASK	(IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
 /*
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 5c6c6f720809..61b8488565ce 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -17,6 +17,7 @@  struct io_timeout {
 	struct file			*file;
 	u32				off;
 	u32				target_seq;
+	u32				repeats;
 	struct list_head		list;
 	/* head of the link, used by linked timeouts only */
 	struct io_kiocb			*head;
@@ -37,8 +38,9 @@  struct io_timeout_rem {
 static inline bool io_is_timeout_noseq(struct io_kiocb *req)
 {
 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
+	struct io_timeout_data *data = req->async_data;
 
-	return !timeout->off;
+	return !timeout->off || data->flags & IORING_TIMEOUT_MULTISHOT;
 }
 
 static inline void io_put_req(struct io_kiocb *req)
@@ -49,6 +51,45 @@  static inline void io_put_req(struct io_kiocb *req)
 	}
 }
 
+static inline bool io_timeout_finish(struct io_timeout *timeout,
+				     struct io_timeout_data *data)
+{
+	if (!(data->flags & IORING_TIMEOUT_MULTISHOT))
+		return true;
+
+	if (!timeout->off || (timeout->repeats && --timeout->repeats))
+		return false;
+
+	return true;
+}
+
+static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer);
+
+static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
+{
+	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
+	struct io_timeout_data *data = req->async_data;
+	struct io_ring_ctx *ctx = req->ctx;
+
+	if (!io_timeout_finish(timeout, data)) {
+		bool filled;
+		filled = io_aux_cqe(ctx, false, req->cqe.user_data, -ETIME,
+				      IORING_CQE_F_MORE, false);
+		if (filled) {
+			/* re-arm timer */
+			spin_lock_irq(&ctx->timeout_lock);
+			list_add(&timeout->list, ctx->timeout_list.prev);
+			data->timer.function = io_timeout_fn;
+			hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
+			spin_unlock_irq(&ctx->timeout_lock);
+			return;
+		}
+		io_req_set_res(req, -EOVERFLOW, 0);
+	}
+
+	io_req_task_complete(req, ts);
+}
+
 static bool io_kill_timeout(struct io_kiocb *req, int status)
 	__must_hold(&req->ctx->timeout_lock)
 {
@@ -212,7 +253,7 @@  static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
 		req_set_fail(req);
 
 	io_req_set_res(req, -ETIME, 0);
-	req->io_task_work.func = io_req_task_complete;
+	req->io_task_work.func = io_timeout_complete;
 	io_req_task_work_add(req);
 	return HRTIMER_NORESTART;
 }
@@ -470,16 +511,28 @@  static int __io_timeout_prep(struct io_kiocb *req,
 		return -EINVAL;
 	flags = READ_ONCE(sqe->timeout_flags);
 	if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK |
-		      IORING_TIMEOUT_ETIME_SUCCESS))
+		      IORING_TIMEOUT_ETIME_SUCCESS |
+		      IORING_TIMEOUT_MULTISHOT)) {
 		return -EINVAL;
+	}
 	/* more than one clock specified is invalid, obviously */
 	if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
 		return -EINVAL;
+	/* multishot requests only make sense with rel values */
+	if (!(~flags & (IORING_TIMEOUT_MULTISHOT | IORING_TIMEOUT_ABS)))
+		return -EINVAL;
 
 	INIT_LIST_HEAD(&timeout->list);
 	timeout->off = off;
 	if (unlikely(off && !req->ctx->off_timeout_used))
 		req->ctx->off_timeout_used = true;
+	/*
+	 * for multishot reqs w/ fixed nr of repeats, target_seq tracks the
+	 * remaining nr
+	 */
+	timeout->repeats = 0;
+	if ((flags & IORING_TIMEOUT_MULTISHOT) && off > 0)
+		timeout->repeats = off;
 
 	if (WARN_ON_ONCE(req_has_async_data(req)))
 		return -EFAULT;