diff mbox series

[2/6] io_uring: add io_file_can_poll() helper

Message ID 20240206162402.643507-3-axboe@kernel.dk (mailing list archive)
State New
Headers show
Series Misc cleanups / optimizations | expand

Commit Message

Jens Axboe Feb. 6, 2024, 4:22 p.m. UTC
This adds a flag to avoid dipping dereferencing file and then f_op
to figure out if the file has a poll handler defined or not. We
generally call this at least twice for networked workloads.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h |  3 +++
 io_uring/io_uring.c            |  2 +-
 io_uring/io_uring.h            | 12 ++++++++++++
 io_uring/kbuf.c                |  2 +-
 io_uring/poll.c                |  2 +-
 io_uring/rw.c                  |  6 +++---
 6 files changed, 21 insertions(+), 6 deletions(-)

Comments

Pavel Begunkov Feb. 7, 2024, 12:57 a.m. UTC | #1
On 2/6/24 16:22, Jens Axboe wrote:
> This adds a flag to avoid dipping dereferencing file and then f_op
> to figure out if the file has a poll handler defined or not. We
> generally call this at least twice for networked workloads.

Sends are not using poll every time. For recv, we touch it
in io_arm_poll_handler(), which is done only once, and so
ammortised to 0 for multishots.

Looking at the patch, the second time we might care about is
in io_ring_buffer_select(), but I'd argue that it shouldn't
be there in the first place. It's fragile, and I don't see
why selected buffers would care specifically about polling
but not asking more generally "can it go true async"? For
reads you might want to also test FMODE_BUF_RASYNC.

Also note that when called from recv we already know that
it's pollable, it might be much easier to pass it in as an
argument.


> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>   include/linux/io_uring_types.h |  3 +++
>   io_uring/io_uring.c            |  2 +-
>   io_uring/io_uring.h            | 12 ++++++++++++
>   io_uring/kbuf.c                |  2 +-
>   io_uring/poll.c                |  2 +-
>   io_uring/rw.c                  |  6 +++---
>   6 files changed, 21 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index 5ac18b05d4ee..7f06cee02b58 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -463,6 +463,7 @@ enum io_req_flags {
>   	REQ_F_SUPPORT_NOWAIT_BIT,
>   	REQ_F_ISREG_BIT,
>   	REQ_F_POLL_NO_LAZY_BIT,
> +	REQ_F_CAN_POLL_BIT,
>   
>   	/* not a real bit, just to check we're not overflowing the space */
>   	__REQ_F_LAST_BIT,
> @@ -535,6 +536,8 @@ enum {
>   	REQ_F_HASH_LOCKED	= IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT),
>   	/* don't use lazy poll wake for this request */
>   	REQ_F_POLL_NO_LAZY	= IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT),
> +	/* file is pollable */
> +	REQ_F_CAN_POLL		= IO_REQ_FLAG(REQ_F_CAN_POLL_BIT),
>   };
>   
>   typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 360a7ee41d3a..d0e06784926f 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -1969,7 +1969,7 @@ void io_wq_submit_work(struct io_wq_work *work)
>   	if (req->flags & REQ_F_FORCE_ASYNC) {
>   		bool opcode_poll = def->pollin || def->pollout;
>   
> -		if (opcode_poll && file_can_poll(req->file)) {
> +		if (opcode_poll && io_file_can_poll(req)) {
>   			needs_poll = true;
>   			issue_flags |= IO_URING_F_NONBLOCK;
>   		}
> diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
> index d5495710c178..2952551fe345 100644
> --- a/io_uring/io_uring.h
> +++ b/io_uring/io_uring.h
> @@ -5,6 +5,7 @@
>   #include <linux/lockdep.h>
>   #include <linux/resume_user_mode.h>
>   #include <linux/kasan.h>
> +#include <linux/poll.h>
>   #include <linux/io_uring_types.h>
>   #include <uapi/linux/eventpoll.h>
>   #include "io-wq.h"
> @@ -398,4 +399,15 @@ static inline size_t uring_sqe_size(struct io_ring_ctx *ctx)
>   		return 2 * sizeof(struct io_uring_sqe);
>   	return sizeof(struct io_uring_sqe);
>   }
> +
> +static inline bool io_file_can_poll(struct io_kiocb *req)
> +{
> +	if (req->flags & REQ_F_CAN_POLL)
> +		return true;
> +	if (file_can_poll(req->file)) {
> +		req->flags |= REQ_F_CAN_POLL;
> +		return true;
> +	}
> +	return false;
> +}
>   #endif
> diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
> index 18df5a9d2f5e..71880615bb78 100644
> --- a/io_uring/kbuf.c
> +++ b/io_uring/kbuf.c
> @@ -180,7 +180,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
>   	req->buf_list = bl;
>   	req->buf_index = buf->bid;
>   
> -	if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) {
> +	if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
>   		/*
>   		 * If we came in unlocked, we have no choice but to consume the
>   		 * buffer here, otherwise nothing ensures that the buffer won't
> diff --git a/io_uring/poll.c b/io_uring/poll.c
> index 7513afc7b702..4afec733fef6 100644
> --- a/io_uring/poll.c
> +++ b/io_uring/poll.c
> @@ -727,7 +727,7 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
>   
>   	if (!def->pollin && !def->pollout)
>   		return IO_APOLL_ABORTED;
> -	if (!file_can_poll(req->file))
> +	if (!io_file_can_poll(req))
>   		return IO_APOLL_ABORTED;
>   	if (!(req->flags & REQ_F_APOLL_MULTISHOT))
>   		mask |= EPOLLONESHOT;
> diff --git a/io_uring/rw.c b/io_uring/rw.c
> index d5e79d9bdc71..0fb7a045163a 100644
> --- a/io_uring/rw.c
> +++ b/io_uring/rw.c
> @@ -682,7 +682,7 @@ static bool io_rw_should_retry(struct io_kiocb *req)
>   	 * just use poll if we can, and don't attempt if the fs doesn't
>   	 * support callback based unlocks
>   	 */
> -	if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
> +	if (io_file_can_poll(req) || !(req->file->f_mode & FMODE_BUF_RASYNC))
>   		return false;
>   
>   	wait->wait.func = io_async_buf_func;
> @@ -831,7 +831,7 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
>   		 * If we can poll, just do that. For a vectored read, we'll
>   		 * need to copy state first.
>   		 */
> -		if (file_can_poll(req->file) && !io_issue_defs[req->opcode].vectored)
> +		if (io_file_can_poll(req) && !io_issue_defs[req->opcode].vectored)
>   			return -EAGAIN;
>   		/* IOPOLL retry should happen for io-wq threads */
>   		if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
> @@ -930,7 +930,7 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
>   	/*
>   	 * Multishot MUST be used on a pollable file
>   	 */
> -	if (!file_can_poll(req->file))
> +	if (!io_file_can_poll(req))
>   		return -EBADFD;
>   
>   	ret = __io_read(req, issue_flags);
Jens Axboe Feb. 7, 2024, 2:15 a.m. UTC | #2
On 2/6/24 5:57 PM, Pavel Begunkov wrote:
> On 2/6/24 16:22, Jens Axboe wrote:
>> This adds a flag to avoid dipping dereferencing file and then f_op
>> to figure out if the file has a poll handler defined or not. We
>> generally call this at least twice for networked workloads.
> 
> Sends are not using poll every time. For recv, we touch it
> in io_arm_poll_handler(), which is done only once, and so
> ammortised to 0 for multishots.

Correct

> Looking at the patch, the second time we might care about is
> in io_ring_buffer_select(), but I'd argue that it shouldn't
> be there in the first place. It's fragile, and I don't see
> why selected buffers would care specifically about polling
> but not asking more generally "can it go true async"? For
> reads you might want to also test FMODE_BUF_RASYNC.

That is indeed the second case that is hit, and I don't think we can
easily get around that which is the reason for the hint.

> Also note that when called from recv we already know that
> it's pollable, it might be much easier to pass it in as an
> argument.

I did think about that, but I don't see a clean way to do it. We could
potentially do it as an issue flag, but that seems kind of ugly to me.
Open to suggestions!
Pavel Begunkov Feb. 7, 2024, 3:33 a.m. UTC | #3
On 2/7/24 02:15, Jens Axboe wrote:
> On 2/6/24 5:57 PM, Pavel Begunkov wrote:
>> On 2/6/24 16:22, Jens Axboe wrote:
>>> This adds a flag to avoid dipping dereferencing file and then f_op
>>> to figure out if the file has a poll handler defined or not. We
>>> generally call this at least twice for networked workloads.
>>
>> Sends are not using poll every time. For recv, we touch it
>> in io_arm_poll_handler(), which is done only once, and so
>> ammortised to 0 for multishots.
> 
> Correct
> 
>> Looking at the patch, the second time we might care about is
>> in io_ring_buffer_select(), but I'd argue that it shouldn't
>> be there in the first place. It's fragile, and I don't see
>> why selected buffers would care specifically about polling
>> but not asking more generally "can it go true async"? For
>> reads you might want to also test FMODE_BUF_RASYNC.
> 
> That is indeed the second case that is hit, and I don't think we can
> easily get around that which is the reason for the hint.
> 
>> Also note that when called from recv we already know that
>> it's pollable, it might be much easier to pass it in as an
>> argument.
> 
> I did think about that, but I don't see a clean way to do it. We could
> potentially do it as an issue flag, but that seems kind of ugly to me.
> Open to suggestions!

I'd argue passing it as an argument is much much cleaner
and more robust design wise, those leaked abstractions are
always fragile and unreliable. And now there is an argument
that it's even faster because for recv you can just pass "true".
IOW, I'd prefer here potentially a slightly uglier but safer
code.

Surely it'd have been be great to move this "eject buffer"
thing out of the selection func and let the caller decide,
but I haven't stared at the code for long enough to say
anything concrete.
diff mbox series

Patch

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 5ac18b05d4ee..7f06cee02b58 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -463,6 +463,7 @@  enum io_req_flags {
 	REQ_F_SUPPORT_NOWAIT_BIT,
 	REQ_F_ISREG_BIT,
 	REQ_F_POLL_NO_LAZY_BIT,
+	REQ_F_CAN_POLL_BIT,
 
 	/* not a real bit, just to check we're not overflowing the space */
 	__REQ_F_LAST_BIT,
@@ -535,6 +536,8 @@  enum {
 	REQ_F_HASH_LOCKED	= IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT),
 	/* don't use lazy poll wake for this request */
 	REQ_F_POLL_NO_LAZY	= IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT),
+	/* file is pollable */
+	REQ_F_CAN_POLL		= IO_REQ_FLAG(REQ_F_CAN_POLL_BIT),
 };
 
 typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 360a7ee41d3a..d0e06784926f 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1969,7 +1969,7 @@  void io_wq_submit_work(struct io_wq_work *work)
 	if (req->flags & REQ_F_FORCE_ASYNC) {
 		bool opcode_poll = def->pollin || def->pollout;
 
-		if (opcode_poll && file_can_poll(req->file)) {
+		if (opcode_poll && io_file_can_poll(req)) {
 			needs_poll = true;
 			issue_flags |= IO_URING_F_NONBLOCK;
 		}
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index d5495710c178..2952551fe345 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -5,6 +5,7 @@ 
 #include <linux/lockdep.h>
 #include <linux/resume_user_mode.h>
 #include <linux/kasan.h>
+#include <linux/poll.h>
 #include <linux/io_uring_types.h>
 #include <uapi/linux/eventpoll.h>
 #include "io-wq.h"
@@ -398,4 +399,15 @@  static inline size_t uring_sqe_size(struct io_ring_ctx *ctx)
 		return 2 * sizeof(struct io_uring_sqe);
 	return sizeof(struct io_uring_sqe);
 }
+
+static inline bool io_file_can_poll(struct io_kiocb *req)
+{
+	if (req->flags & REQ_F_CAN_POLL)
+		return true;
+	if (file_can_poll(req->file)) {
+		req->flags |= REQ_F_CAN_POLL;
+		return true;
+	}
+	return false;
+}
 #endif
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 18df5a9d2f5e..71880615bb78 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -180,7 +180,7 @@  static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
 	req->buf_list = bl;
 	req->buf_index = buf->bid;
 
-	if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) {
+	if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
 		/*
 		 * If we came in unlocked, we have no choice but to consume the
 		 * buffer here, otherwise nothing ensures that the buffer won't
diff --git a/io_uring/poll.c b/io_uring/poll.c
index 7513afc7b702..4afec733fef6 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -727,7 +727,7 @@  int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
 
 	if (!def->pollin && !def->pollout)
 		return IO_APOLL_ABORTED;
-	if (!file_can_poll(req->file))
+	if (!io_file_can_poll(req))
 		return IO_APOLL_ABORTED;
 	if (!(req->flags & REQ_F_APOLL_MULTISHOT))
 		mask |= EPOLLONESHOT;
diff --git a/io_uring/rw.c b/io_uring/rw.c
index d5e79d9bdc71..0fb7a045163a 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -682,7 +682,7 @@  static bool io_rw_should_retry(struct io_kiocb *req)
 	 * just use poll if we can, and don't attempt if the fs doesn't
 	 * support callback based unlocks
 	 */
-	if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
+	if (io_file_can_poll(req) || !(req->file->f_mode & FMODE_BUF_RASYNC))
 		return false;
 
 	wait->wait.func = io_async_buf_func;
@@ -831,7 +831,7 @@  static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
 		 * If we can poll, just do that. For a vectored read, we'll
 		 * need to copy state first.
 		 */
-		if (file_can_poll(req->file) && !io_issue_defs[req->opcode].vectored)
+		if (io_file_can_poll(req) && !io_issue_defs[req->opcode].vectored)
 			return -EAGAIN;
 		/* IOPOLL retry should happen for io-wq threads */
 		if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
@@ -930,7 +930,7 @@  int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
 	/*
 	 * Multishot MUST be used on a pollable file
 	 */
-	if (!file_can_poll(req->file))
+	if (!io_file_can_poll(req))
 		return -EBADFD;
 
 	ret = __io_read(req, issue_flags);