diff mbox series

[v2] io_uring: IORING_OP_TIMEOUT support

Message ID afda2462-d192-7f95-6a26-b2e604d57463@kernel.dk (mailing list archive)
State New, archived
Headers show
Series [v2] io_uring: IORING_OP_TIMEOUT support | expand

Commit Message

Jens Axboe Sept. 17, 2019, 7:38 p.m. UTC
There's been a few requests for functionality similar to io_getevents()
and epoll_wait(), where the user can specify a timeout for waiting on
events. I deliberately did not add support for this through the system
call initially to avoid overloading the args, but I can see that the use
cases for this are valid.

This adds support for IORING_OP_TIMEOUT. If a user wants to get woken
when waiting for events, simply submit one of these timeout commands
with your wait call (or before). This ensures that the application
sleeping on the CQ ring waiting for events will get woken. The timeout
command is passed in as a pointer to a struct timespec. Timeouts are
relative.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

V2

- Ensure any timeout will result in a return to userspace from
  io_cqring_wait().
- Improve commit message
- Add ->file to struct io_timeout
- Kill separate 'kt' value, use timespec_to_kt() directly

Comments

Jackie Liu Sept. 18, 2019, 2:10 a.m. UTC | #1
在 2019年9月18日,03:38,Jens Axboe <axboe@kernel.dk> 写道:
> 
> There's been a few requests for functionality similar to io_getevents()
> and epoll_wait(), where the user can specify a timeout for waiting on
> events. I deliberately did not add support for this through the system
> call initially to avoid overloading the args, but I can see that the use
> cases for this are valid.
> 
> This adds support for IORING_OP_TIMEOUT. If a user wants to get woken
> when waiting for events, simply submit one of these timeout commands
> with your wait call (or before). This ensures that the application
> sleeping on the CQ ring waiting for events will get woken. The timeout
> command is passed in as a pointer to a struct timespec. Timeouts are
> relative.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> 
> ---
> 
> V2
> 
> - Ensure any timeout will result in a return to userspace from
>  io_cqring_wait().
> - Improve commit message
> - Add ->file to struct io_timeout
> - Kill separate 'kt' value, use timespec_to_kt() directly
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 0dadbdbead0f..02db09b89e83 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -216,6 +216,7 @@ struct io_ring_ctx {
> 		struct wait_queue_head	cq_wait;
> 		struct fasync_struct	*cq_fasync;
> 		struct eventfd_ctx	*cq_ev_fd;
> +		atomic_t		cq_timeouts;
> 	} ____cacheline_aligned_in_smp;
> 
> 	struct io_rings	*rings;
> @@ -283,6 +284,11 @@ struct io_poll_iocb {
> 	struct wait_queue_entry		wait;
> };
> 
> +struct io_timeout {
> +	struct file			*file;
> +	struct hrtimer			timer;
> +};
> +
> /*
>  * NOTE! Each of the iocb union members has the file pointer
>  * as the first entry in their struct definition. So you can
> @@ -294,6 +300,7 @@ struct io_kiocb {
> 		struct file		*file;
> 		struct kiocb		rw;
> 		struct io_poll_iocb	poll;
> +		struct io_timeout	timeout;
> 	};
> 
> 	struct sqe_submit	submit;
> @@ -1765,6 +1772,35 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> 	return ipt.error;
> }
> 
> +static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
> +{
> +	struct io_kiocb *req;
> +
> +	req = container_of(timer, struct io_kiocb, timeout.timer);
> +	atomic_inc(&req->ctx->cq_timeouts);
> +	io_cqring_add_event(req->ctx, req->user_data, 0);
> +	io_put_req(req);
> +	return HRTIMER_NORESTART;
> +}
> +
> +static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> +	struct timespec ts;
> +
> +	if (sqe->flags || sqe->ioprio || sqe->off || sqe->buf_index)
> +		return -EINVAL;
> +	if (sqe->len != 1)
> +		return -EINVAL;

Should we need this?

if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
	return -EINVAL;

Otherwise it will be queue by io_iopoll_req_issued.

> +	if (copy_from_user(&ts, (void __user *) sqe->addr, sizeof(ts)))
> +		return -EFAULT;
> +
> +	hrtimer_init(&req->timeout.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	req->timeout.timer.function = io_timeout_fn;
> +	hrtimer_start(&req->timeout.timer, timespec_to_ktime(ts),
> +			HRTIMER_MODE_REL);
> +	return 0;
> +}
> +

--
BR, Jackie Liu
Jens Axboe Sept. 18, 2019, 2:27 a.m. UTC | #2
On 9/17/19 8:10 PM, Jackie Liu wrote:
> 
> 在 2019年9月18日,03:38,Jens Axboe <axboe@kernel.dk> 写道:
>>
>> There's been a few requests for functionality similar to io_getevents()
>> and epoll_wait(), where the user can specify a timeout for waiting on
>> events. I deliberately did not add support for this through the system
>> call initially to avoid overloading the args, but I can see that the use
>> cases for this are valid.
>>
>> This adds support for IORING_OP_TIMEOUT. If a user wants to get woken
>> when waiting for events, simply submit one of these timeout commands
>> with your wait call (or before). This ensures that the application
>> sleeping on the CQ ring waiting for events will get woken. The timeout
>> command is passed in as a pointer to a struct timespec. Timeouts are
>> relative.
>>
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>>
>> ---
>>
>> V2
>>
>> - Ensure any timeout will result in a return to userspace from
>>   io_cqring_wait().
>> - Improve commit message
>> - Add ->file to struct io_timeout
>> - Kill separate 'kt' value, use timespec_to_kt() directly
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 0dadbdbead0f..02db09b89e83 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -216,6 +216,7 @@ struct io_ring_ctx {
>> 		struct wait_queue_head	cq_wait;
>> 		struct fasync_struct	*cq_fasync;
>> 		struct eventfd_ctx	*cq_ev_fd;
>> +		atomic_t		cq_timeouts;
>> 	} ____cacheline_aligned_in_smp;
>>
>> 	struct io_rings	*rings;
>> @@ -283,6 +284,11 @@ struct io_poll_iocb {
>> 	struct wait_queue_entry		wait;
>> };
>>
>> +struct io_timeout {
>> +	struct file			*file;
>> +	struct hrtimer			timer;
>> +};
>> +
>> /*
>>   * NOTE! Each of the iocb union members has the file pointer
>>   * as the first entry in their struct definition. So you can
>> @@ -294,6 +300,7 @@ struct io_kiocb {
>> 		struct file		*file;
>> 		struct kiocb		rw;
>> 		struct io_poll_iocb	poll;
>> +		struct io_timeout	timeout;
>> 	};
>>
>> 	struct sqe_submit	submit;
>> @@ -1765,6 +1772,35 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>> 	return ipt.error;
>> }
>>
>> +static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
>> +{
>> +	struct io_kiocb *req;
>> +
>> +	req = container_of(timer, struct io_kiocb, timeout.timer);
>> +	atomic_inc(&req->ctx->cq_timeouts);
>> +	io_cqring_add_event(req->ctx, req->user_data, 0);
>> +	io_put_req(req);
>> +	return HRTIMER_NORESTART;
>> +}
>> +
>> +static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>> +{
>> +	struct timespec ts;
>> +
>> +	if (sqe->flags || sqe->ioprio || sqe->off || sqe->buf_index)
>> +		return -EINVAL;
>> +	if (sqe->len != 1)
>> +		return -EINVAL;
> 
> Should we need this?
> 
> if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
> 	return -EINVAL;
> 
> Otherwise it will be queue by io_iopoll_req_issued.

Good point. I did actually think of the polled case, but yeah we don't
want it to get added to the poll list. Since polled never waits, I
think we're fine disallowing it on polled rings.

I'll make that change, thanks.
diff mbox series

Patch

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0dadbdbead0f..02db09b89e83 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -216,6 +216,7 @@  struct io_ring_ctx {
 		struct wait_queue_head	cq_wait;
 		struct fasync_struct	*cq_fasync;
 		struct eventfd_ctx	*cq_ev_fd;
+		atomic_t		cq_timeouts;
 	} ____cacheline_aligned_in_smp;
 
 	struct io_rings	*rings;
@@ -283,6 +284,11 @@  struct io_poll_iocb {
 	struct wait_queue_entry		wait;
 };
 
+struct io_timeout {
+	struct file			*file;
+	struct hrtimer			timer;
+};
+
 /*
  * NOTE! Each of the iocb union members has the file pointer
  * as the first entry in their struct definition. So you can
@@ -294,6 +300,7 @@  struct io_kiocb {
 		struct file		*file;
 		struct kiocb		rw;
 		struct io_poll_iocb	poll;
+		struct io_timeout	timeout;
 	};
 
 	struct sqe_submit	submit;
@@ -1765,6 +1772,35 @@  static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	return ipt.error;
 }
 
+static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
+{
+	struct io_kiocb *req;
+
+	req = container_of(timer, struct io_kiocb, timeout.timer);
+	atomic_inc(&req->ctx->cq_timeouts);
+	io_cqring_add_event(req->ctx, req->user_data, 0);
+	io_put_req(req);
+	return HRTIMER_NORESTART;
+}
+
+static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct timespec ts;
+
+	if (sqe->flags || sqe->ioprio || sqe->off || sqe->buf_index)
+		return -EINVAL;
+	if (sqe->len != 1)
+		return -EINVAL;
+	if (copy_from_user(&ts, (void __user *) sqe->addr, sizeof(ts)))
+		return -EFAULT;
+
+	hrtimer_init(&req->timeout.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	req->timeout.timer.function = io_timeout_fn;
+	hrtimer_start(&req->timeout.timer, timespec_to_ktime(ts),
+			HRTIMER_MODE_REL);
+	return 0;
+}
+
 static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
 			const struct io_uring_sqe *sqe)
 {
@@ -1842,6 +1878,9 @@  static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	case IORING_OP_RECVMSG:
 		ret = io_recvmsg(req, s->sqe, force_nonblock);
 		break;
+	case IORING_OP_TIMEOUT:
+		ret = io_timeout(req, s->sqe);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -2593,6 +2632,7 @@  static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 			  const sigset_t __user *sig, size_t sigsz)
 {
 	struct io_rings *rings = ctx->rings;
+	unsigned nr_timeouts;
 	int ret;
 
 	if (io_cqring_events(rings) >= min_events)
@@ -2611,7 +2651,15 @@  static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 			return ret;
 	}
 
-	ret = wait_event_interruptible(ctx->wait, io_cqring_events(rings) >= min_events);
+	/*
+	 * Return if we have enough events, or if a timeout occured since
+	 * we started waiting. For timeouts, we always want to return to
+	 * userspace.
+	 */
+	nr_timeouts = atomic_read(&ctx->cq_timeouts);
+	ret = wait_event_interruptible(ctx->wait,
+				io_cqring_events(rings) >= min_events ||
+				atomic_read(&ctx->cq_timeouts) != nr_timeouts);
 	restore_saved_sigmask_unless(ret == -ERESTARTSYS);
 	if (ret == -ERESTARTSYS)
 		ret = -EINTR;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 96ee9d94b73e..cf3101dc6b1e 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -61,6 +61,7 @@  struct io_uring_sqe {
 #define IORING_OP_SYNC_FILE_RANGE	8
 #define IORING_OP_SENDMSG	9
 #define IORING_OP_RECVMSG	10
+#define IORING_OP_TIMEOUT	11
 
 /*
  * sqe->fsync_flags