diff mbox series

[PATCHv4,1/5] io_uring: move fixed buffer import to issue path

Message ID 20250218224229.837848-2-kbusch@meta.com (mailing list archive)
State New
Headers show
Series ublk zero-copy support | expand

Commit Message

Keith Busch Feb. 18, 2025, 10:42 p.m. UTC
From: Keith Busch <kbusch@kernel.org>

Similar to the fixed file path, requests may depend on a previous one
to set up an index, so we need to allow linking them. The prep callback
happens too soon for linked commands, so the lookup needs to be deferred
to the issue path. Change the prep callbacks to just set the buf_index
and let generic io_uring code handle the fixed buffer node setup, just
like it already does for fixed files.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/io_uring_types.h |  3 +++
 io_uring/io_uring.c            | 19 ++++++++++++++
 io_uring/net.c                 | 25 ++++++-------------
 io_uring/nop.c                 | 22 +++--------------
 io_uring/rw.c                  | 45 ++++++++++++++++++++++++----------
 io_uring/uring_cmd.c           | 16 ++----------
 6 files changed, 67 insertions(+), 63 deletions(-)

Comments

Caleb Sander Mateos Feb. 19, 2025, 1:27 a.m. UTC | #1
On Tue, Feb 18, 2025 at 2:43 PM Keith Busch <kbusch@meta.com> wrote:
>
> From: Keith Busch <kbusch@kernel.org>
>
> Similar to the fixed file path, requests may depend on a previous one
> to set up an index, so we need to allow linking them. The prep callback
> happens too soon for linked commands, so the lookup needs to be deferred
> to the issue path. Change the prep callbacks to just set the buf_index
> and let generic io_uring code handle the fixed buffer node setup, just
> like it already does for fixed files.
>
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---
>  include/linux/io_uring_types.h |  3 +++
>  io_uring/io_uring.c            | 19 ++++++++++++++
>  io_uring/net.c                 | 25 ++++++-------------
>  io_uring/nop.c                 | 22 +++--------------
>  io_uring/rw.c                  | 45 ++++++++++++++++++++++++----------
>  io_uring/uring_cmd.c           | 16 ++----------
>  6 files changed, 67 insertions(+), 63 deletions(-)
>
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index c0fe8a00fe53a..0bcaefc4ffe02 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -482,6 +482,7 @@ enum {
>         REQ_F_DOUBLE_POLL_BIT,
>         REQ_F_APOLL_MULTISHOT_BIT,
>         REQ_F_CLEAR_POLLIN_BIT,
> +       REQ_F_FIXED_BUFFER_BIT,

Move this to the end of the REQ_F_*_BIT definitions (before __REQ_F_LAST_BIT)?

>         /* keep async read/write and isreg together and in order */
>         REQ_F_SUPPORT_NOWAIT_BIT,
>         REQ_F_ISREG_BIT,
> @@ -574,6 +575,8 @@ enum {
>         REQ_F_BUF_NODE          = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT),
>         /* request has read/write metadata assigned */
>         REQ_F_HAS_METADATA      = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT),
> +       /* request has a fixed buffer at buf_index */
> +       REQ_F_FIXED_BUFFER      = IO_REQ_FLAG(REQ_F_FIXED_BUFFER_BIT),
>  };
>
>  typedef void (*io_req_tw_func_t)(struct io_kiocb *req, io_tw_token_t tw);
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 58528bf61638e..7800edbc57279 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -1721,6 +1721,23 @@ static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
>         return !!req->file;
>  }
>
> +static bool io_assign_buffer(struct io_kiocb *req, unsigned int issue_flags)
> +{
> +       struct io_ring_ctx *ctx = req->ctx;
> +       struct io_rsrc_node *node;
> +
> +       if (req->buf_node || !(req->flags & REQ_F_FIXED_BUFFER))
> +               return true;
> +
> +       io_ring_submit_lock(ctx, issue_flags);
> +       node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
> +       if (node)
> +               io_req_assign_buf_node(req, node);
> +       io_ring_submit_unlock(ctx, issue_flags);
> +
> +       return !!node;
> +}
> +
>  static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
>  {
>         const struct io_issue_def *def = &io_issue_defs[req->opcode];
> @@ -1729,6 +1746,8 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
>
>         if (unlikely(!io_assign_file(req, def, issue_flags)))
>                 return -EBADF;
> +       if (unlikely(!io_assign_buffer(req, issue_flags)))
> +               return -EFAULT;
>
>         if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred()))
>                 creds = override_creds(req->creds);
> diff --git a/io_uring/net.c b/io_uring/net.c
> index 000dc70d08d0d..39838e8575b53 100644
> --- a/io_uring/net.c
> +++ b/io_uring/net.c
> @@ -1373,6 +1373,10 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  #endif
>         if (unlikely(!io_msg_alloc_async(req)))
>                 return -ENOMEM;
> +       if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
> +               req->buf_index = zc->buf_index;

Can the buf_index field of io_sr_msg be removed now that it's only
used within io_send_zc_prep()?

> +               req->flags |= REQ_F_FIXED_BUFFER;
> +       }
>         if (req->opcode != IORING_OP_SENDMSG_ZC)
>                 return io_send_setup(req, sqe);
>         return io_sendmsg_setup(req, sqe);
> @@ -1434,25 +1438,10 @@ static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
>         struct io_async_msghdr *kmsg = req->async_data;
>         int ret;
>
> -       if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
> -               struct io_ring_ctx *ctx = req->ctx;
> -               struct io_rsrc_node *node;
> -
> -               ret = -EFAULT;
> -               io_ring_submit_lock(ctx, issue_flags);
> -               node = io_rsrc_node_lookup(&ctx->buf_table, sr->buf_index);
> -               if (node) {
> -                       io_req_assign_buf_node(sr->notif, node);
> -                       ret = 0;
> -               }
> -               io_ring_submit_unlock(ctx, issue_flags);
> -
> -               if (unlikely(ret))
> -                       return ret;
> -
> +       if (req->flags & REQ_F_FIXED_BUFFER) {
>                 ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter,
> -                                       node->buf, (u64)(uintptr_t)sr->buf,
> -                                       sr->len);
> +                                       req->buf_node->buf,
> +                                       (u64)(uintptr_t)sr->buf, sr->len);
>                 if (unlikely(ret))
>                         return ret;
>                 kmsg->msg.sg_from_iter = io_sg_from_iter;
> diff --git a/io_uring/nop.c b/io_uring/nop.c
> index 5e5196df650a1..989908603112f 100644
> --- a/io_uring/nop.c
> +++ b/io_uring/nop.c
> @@ -16,7 +16,6 @@ struct io_nop {
>         struct file     *file;
>         int             result;
>         int             fd;
> -       int             buffer;
>         unsigned int    flags;
>  };
>
> @@ -39,10 +38,10 @@ int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>                 nop->fd = READ_ONCE(sqe->fd);
>         else
>                 nop->fd = -1;
> -       if (nop->flags & IORING_NOP_FIXED_BUFFER)
> -               nop->buffer = READ_ONCE(sqe->buf_index);
> -       else
> -               nop->buffer = -1;
> +       if (nop->flags & IORING_NOP_FIXED_BUFFER) {
> +               req->buf_index = READ_ONCE(sqe->buf_index);
> +               req->flags |= REQ_F_FIXED_BUFFER;
> +       }
>         return 0;
>  }
>
> @@ -63,19 +62,6 @@ int io_nop(struct io_kiocb *req, unsigned int issue_flags)
>                         goto done;
>                 }
>         }
> -       if (nop->flags & IORING_NOP_FIXED_BUFFER) {
> -               struct io_ring_ctx *ctx = req->ctx;
> -               struct io_rsrc_node *node;
> -
> -               ret = -EFAULT;
> -               io_ring_submit_lock(ctx, issue_flags);
> -               node = io_rsrc_node_lookup(&ctx->buf_table, nop->buffer);
> -               if (node) {
> -                       io_req_assign_buf_node(req, node);
> -                       ret = 0;
> -               }
> -               io_ring_submit_unlock(ctx, issue_flags);
> -       }
>  done:
>         if (ret < 0)
>                 req_set_fail(req);
> diff --git a/io_uring/rw.c b/io_uring/rw.c
> index 16f12f94943f7..2d8910d9197a0 100644
> --- a/io_uring/rw.c
> +++ b/io_uring/rw.c
> @@ -353,25 +353,14 @@ int io_prep_writev(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  static int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe,
>                             int ddir)
>  {
> -       struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
> -       struct io_ring_ctx *ctx = req->ctx;
> -       struct io_rsrc_node *node;
> -       struct io_async_rw *io;
>         int ret;
>
>         ret = io_prep_rw(req, sqe, ddir, false);
>         if (unlikely(ret))
>                 return ret;
>
> -       node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
> -       if (!node)
> -               return -EFAULT;
> -       io_req_assign_buf_node(req, node);
> -
> -       io = req->async_data;
> -       ret = io_import_fixed(ddir, &io->iter, node->buf, rw->addr, rw->len);
> -       iov_iter_save_state(&io->iter, &io->iter_state);
> -       return ret;
> +       req->flags |= REQ_F_FIXED_BUFFER;
> +       return 0;
>  }
>
>  int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> @@ -954,10 +943,36 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
>         return ret;
>  }
>
> +static int io_import_fixed_buffer(struct io_kiocb *req, int ddir)
> +{
> +       struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
> +       struct io_async_rw *io;
> +       int ret;
> +
> +       if (!(req->flags & REQ_F_FIXED_BUFFER))
> +               return 0;
> +
> +       io = req->async_data;
> +       if (io->bytes_done)
> +               return 0;
> +
> +       ret = io_import_fixed(ddir, &io->iter, req->buf_node->buf, rw->addr,
> +                             rw->len);
> +       if (ret)
> +               return ret;
> +
> +       iov_iter_save_state(&io->iter, &io->iter_state);
> +       return 0;
> +}
> +
>  int io_read(struct io_kiocb *req, unsigned int issue_flags)
>  {
>         int ret;
>
> +       ret = io_import_fixed_buffer(req, READ);
> +       if (unlikely(ret))
> +               return ret;
> +
>         ret = __io_read(req, issue_flags);
>         if (ret >= 0)
>                 return kiocb_done(req, ret, issue_flags);
> @@ -1062,6 +1077,10 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
>         ssize_t ret, ret2;
>         loff_t *ppos;
>
> +       ret = io_import_fixed_buffer(req, WRITE);
> +       if (unlikely(ret))
> +               return ret;
> +
>         ret = io_rw_init_file(req, FMODE_WRITE, WRITE);
>         if (unlikely(ret))
>                 return ret;
> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
> index 8bdf2c9b3fef9..112b49fde23e5 100644
> --- a/io_uring/uring_cmd.c
> +++ b/io_uring/uring_cmd.c
> @@ -200,19 +200,8 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>                 return -EINVAL;
>
>         if (ioucmd->flags & IORING_URING_CMD_FIXED) {
> -               struct io_ring_ctx *ctx = req->ctx;
> -               struct io_rsrc_node *node;
> -               u16 index = READ_ONCE(sqe->buf_index);
> -
> -               node = io_rsrc_node_lookup(&ctx->buf_table, index);
> -               if (unlikely(!node))
> -                       return -EFAULT;
> -               /*
> -                * Pi node upfront, prior to io_uring_cmd_import_fixed()
> -                * being called. This prevents destruction of the mapped buffer
> -                * we'll need at actual import time.
> -                */
> -               io_req_assign_buf_node(req, node);
> +               req->buf_index = READ_ONCE(sqe->buf_index);
> +               req->flags |= REQ_F_FIXED_BUFFER;
>         }
>         ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
>
> @@ -262,7 +251,6 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
>         struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
>         struct io_rsrc_node *node = req->buf_node;
>
> -       /* Must have had rsrc_node assigned at prep time */
>         if (node)
>                 return io_import_fixed(rw, iter, node->buf, ubuf, len);

Is it possible for node to be NULL? If the buffer lookup failed,
io_issue_sqe() would have returned early and not called ->issue(), so
this function wouldn't have been called.

Best,
Caleb

>
> --
> 2.43.5
>
Ming Lei Feb. 19, 2025, 4:23 a.m. UTC | #2
On Tue, Feb 18, 2025 at 02:42:25PM -0800, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
> 
> Similar to the fixed file path, requests may depend on a previous one
> to set up an index, so we need to allow linking them. The prep callback
> happens too soon for linked commands, so the lookup needs to be deferred
> to the issue path. Change the prep callbacks to just set the buf_index
> and let generic io_uring code handle the fixed buffer node setup, just
> like it already does for fixed files.
> 
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---

...

> diff --git a/io_uring/net.c b/io_uring/net.c
> index 000dc70d08d0d..39838e8575b53 100644
> --- a/io_uring/net.c
> +++ b/io_uring/net.c
> @@ -1373,6 +1373,10 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  #endif
>  	if (unlikely(!io_msg_alloc_async(req)))
>  		return -ENOMEM;
> +	if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
> +		req->buf_index = zc->buf_index;
> +		req->flags |= REQ_F_FIXED_BUFFER;
> +	}
>  	if (req->opcode != IORING_OP_SENDMSG_ZC)
>  		return io_send_setup(req, sqe);
>  	return io_sendmsg_setup(req, sqe);
> @@ -1434,25 +1438,10 @@ static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
>  	struct io_async_msghdr *kmsg = req->async_data;
>  	int ret;
>  
> -	if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
> -		struct io_ring_ctx *ctx = req->ctx;
> -		struct io_rsrc_node *node;
> -
> -		ret = -EFAULT;
> -		io_ring_submit_lock(ctx, issue_flags);
> -		node = io_rsrc_node_lookup(&ctx->buf_table, sr->buf_index);
> -		if (node) {
> -			io_req_assign_buf_node(sr->notif, node);

Here the node buffer is assigned to ->notif req, instead of the current
request, and you may have to deal with this case here.

Otherwise, this patch looks fine:

Reviewed-by: Ming Lei <ming.lei@redhat.com>



Thanks,
Ming
Pavel Begunkov Feb. 19, 2025, 4:48 p.m. UTC | #3
On 2/18/25 22:42, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
> 
> Similar to the fixed file path, requests may depend on a previous one
> to set up an index, so we need to allow linking them. The prep callback
> happens too soon for linked commands, so the lookup needs to be deferred
> to the issue path. Change the prep callbacks to just set the buf_index
> and let generic io_uring code handle the fixed buffer node setup, just
> like it already does for fixed files.
> 
> Signed-off-by: Keith Busch <kbusch@kernel.org>

It wasn't great before, and it'd be harder to follow if we shove it
into the issue path like that. Add additional overhead in the common
path and that it's not super flexible, like the notification problem
and what we need out of it for other features.

We're better to remove the lookup vs import split like below.
Here is a branch, let's do it on top.

https://github.com/isilence/linux.git regbuf-import


diff --git a/io_uring/net.c b/io_uring/net.c
index ce0a39972cce..322cf023233a 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1360,24 +1360,10 @@ static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
  	int ret;
  
  	if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
-		struct io_ring_ctx *ctx = req->ctx;
-		struct io_rsrc_node *node;
-
-		ret = -EFAULT;
-		io_ring_submit_lock(ctx, issue_flags);
-		node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
-		if (node) {
-			io_req_assign_buf_node(sr->notif, node);
-			ret = 0;
-		}
-		io_ring_submit_unlock(ctx, issue_flags);
-
-		if (unlikely(ret))
-			return ret;
-
-		ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter,
-					node->buf, (u64)(uintptr_t)sr->buf,
-					sr->len);
+		sr->notif->buf_index = req->buf_index;
+		ret = io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter,
+					(u64)(uintptr_t)sr->buf, sr->len,
+					ITER_SOURCE, issue_flags);
  		if (unlikely(ret))
  			return ret;
  		kmsg->msg.sg_from_iter = io_sg_from_iter;
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index af39b69eb4fd..a4557ed14bea 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -860,9 +860,9 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
  	return ret;
  }
  
-int io_import_fixed(int ddir, struct iov_iter *iter,
-			   struct io_mapped_ubuf *imu,
-			   u64 buf_addr, size_t len)
+static int io_import_fixed_imu(int ddir, struct iov_iter *iter,
+				struct io_mapped_ubuf *imu,
+				u64 buf_addr, size_t len)
  {
  	u64 buf_end;
  	size_t offset;
@@ -919,6 +919,35 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
  	return 0;
  }
  
+static inline struct io_rsrc_node *io_find_buf_node(struct io_kiocb *req,
+						    unsigned issue_flags)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_rsrc_node *node;
+
+	if (req->buf_node)
+		return req->buf_node;
+
+	io_ring_submit_lock(ctx, issue_flags);
+	node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
+	if (node)
+		io_req_assign_buf_node(req, node);
+	io_ring_submit_unlock(ctx, issue_flags);
+	return node;
+}
+
+int io_import_reg_buf(struct io_kiocb *req, struct iov_iter *iter,
+			u64 buf_addr, size_t len, int ddir,
+			unsigned issue_flags)
+{
+	struct io_rsrc_node *node;
+
+	node = io_find_buf_node(req, issue_flags);
+	if (!node)
+		return -EFAULT;
+	return io_import_fixed_imu(ddir, iter, node->buf, buf_addr, len);
+}
+
  /* Lock two rings at once. The rings must be different! */
  static void lock_two_rings(struct io_ring_ctx *ctx1, struct io_ring_ctx *ctx2)
  {
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index a6d883c62b22..ce199eb0ac9f 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -50,9 +50,9 @@ void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node);
  void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data);
  int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr);
  
-int io_import_fixed(int ddir, struct iov_iter *iter,
-			   struct io_mapped_ubuf *imu,
-			   u64 buf_addr, size_t len);
+int io_import_reg_buf(struct io_kiocb *req, struct iov_iter *iter,
+			u64 buf_addr, size_t len, int ddir,
+			unsigned issue_flags);
  
  int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg);
  int io_sqe_buffers_unregister(struct io_ring_ctx *ctx);
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 16f12f94943f..31a1f15f5f01 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -354,8 +354,6 @@ static int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe
  			    int ddir)
  {
  	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
-	struct io_ring_ctx *ctx = req->ctx;
-	struct io_rsrc_node *node;
  	struct io_async_rw *io;
  	int ret;
  
@@ -363,13 +361,8 @@ static int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe
  	if (unlikely(ret))
  		return ret;
  
-	node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
-	if (!node)
-		return -EFAULT;
-	io_req_assign_buf_node(req, node);
-
  	io = req->async_data;
-	ret = io_import_fixed(ddir, &io->iter, node->buf, rw->addr, rw->len);
+	ret = io_import_reg_buf(req, &io->iter, rw->addr, rw->len, ddir, 0);
  	iov_iter_save_state(&io->iter, &io->iter_state);
  	return ret;
  }
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index cf5e9822e49a..80046e755211 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -199,21 +199,9 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  	if (ioucmd->flags & ~IORING_URING_CMD_MASK)
  		return -EINVAL;
  
-	if (ioucmd->flags & IORING_URING_CMD_FIXED) {
-		struct io_ring_ctx *ctx = req->ctx;
-		struct io_rsrc_node *node;
-		u16 index = READ_ONCE(sqe->buf_index);
-
-		node = io_rsrc_node_lookup(&ctx->buf_table, index);
-		if (unlikely(!node))
-			return -EFAULT;
-		/*
-		 * Pi node upfront, prior to io_uring_cmd_import_fixed()
-		 * being called. This prevents destruction of the mapped buffer
-		 * we'll need at actual import time.
-		 */
-		io_req_assign_buf_node(req, node);
-	}
+	if (ioucmd->flags & IORING_URING_CMD_FIXED)
+		req->buf_index = READ_ONCE(sqe->buf_index);
+
  	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
  
  	return io_uring_cmd_prep_setup(req, sqe);
@@ -261,13 +249,8 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
  			      unsigned int issue_flags)
  {
  	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
-	struct io_rsrc_node *node = req->buf_node;
-
-	/* Must have had rsrc_node assigned at prep time */
-	if (node)
-		return io_import_fixed(rw, iter, node->buf, ubuf, len);
  
-	return -EFAULT;
+	return io_import_reg_buf(req, iter, ubuf, len, rw, issue_flags);
  }
  EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed);
Pavel Begunkov Feb. 19, 2025, 5:15 p.m. UTC | #4
On 2/19/25 16:48, Pavel Begunkov wrote:
> On 2/18/25 22:42, Keith Busch wrote:
>> From: Keith Busch <kbusch@kernel.org>
>>
>> Similar to the fixed file path, requests may depend on a previous one
>> to set up an index, so we need to allow linking them. The prep callback
>> happens too soon for linked commands, so the lookup needs to be deferred
>> to the issue path. Change the prep callbacks to just set the buf_index
>> and let generic io_uring code handle the fixed buffer node setup, just
>> like it already does for fixed files.
>>
>> Signed-off-by: Keith Busch <kbusch@kernel.org>
> 
> It wasn't great before, and it'd be harder to follow if we shove it
> into the issue path like that. Add additional overhead in the common
> path and that it's not super flexible, like the notification problem
> and what we need out of it for other features.
> 
> We're better to remove the lookup vs import split like below.
> Here is a branch, let's do it on top.
> 
> https://github.com/isilence/linux.git regbuf-import
> 
> 
> diff --git a/io_uring/net.c b/io_uring/net.c
> index ce0a39972cce..322cf023233a 100644
> --- a/io_uring/net.c
> +++ b/io_uring/net.c
> @@ -1360,24 +1360,10 @@ static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
...
> -int io_import_fixed(int ddir, struct iov_iter *iter,
> -               struct io_mapped_ubuf *imu,
> -               u64 buf_addr, size_t len)
> +static int io_import_fixed_imu(int ddir, struct iov_iter *iter,
> +                struct io_mapped_ubuf *imu,
> +                u64 buf_addr, size_t len)
>   {
>       u64 buf_end;
>       size_t offset;
> @@ -919,6 +919,35 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
>       return 0;
>   }
> 
> +static inline struct io_rsrc_node *io_find_buf_node(struct io_kiocb *req,
> +                            unsigned issue_flags)
> +{
> +    struct io_ring_ctx *ctx = req->ctx;
> +    struct io_rsrc_node *node;
> +
> +    if (req->buf_node)

Seems it should be checking for REQ_F_BUF_NODE instead

> +        return req->buf_node;
> +
> +    io_ring_submit_lock(ctx, issue_flags);
> +    node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
> +    if (node)
> +        io_req_assign_buf_node(req, node);
> +    io_ring_submit_unlock(ctx, issue_flags);
> +    return node;
> +}
Keith Busch Feb. 20, 2025, 1:25 a.m. UTC | #5
On Wed, Feb 19, 2025 at 04:48:30PM +0000, Pavel Begunkov wrote:
> We're better to remove the lookup vs import split like below.
> Here is a branch, let's do it on top.
> 
> https://github.com/isilence/linux.git regbuf-import

Your first patch adds a 10th parameter to an nvme function, most of
which are unused in half the branches. I think we've done something
wrong here, so I want to take a shot at cleaning that up. Otherwise I
think what you're proposing is an improvement.
Pavel Begunkov Feb. 20, 2025, 10:12 a.m. UTC | #6
On 2/20/25 01:25, Keith Busch wrote:
> On Wed, Feb 19, 2025 at 04:48:30PM +0000, Pavel Begunkov wrote:
>> We're better to remove the lookup vs import split like below.
>> Here is a branch, let's do it on top.
>>
>> https://github.com/isilence/linux.git regbuf-import
> 
> Your first patch adds a 10th parameter to an nvme function, most of
> which are unused in half the branches. I think we've done something
> wrong here, so I want to take a shot at cleaning that up. Otherwise I

That would be great, I didn't feel great about it either, even
the existing ioucmd arg doesn't seem like a perfect fit.


> think what you're proposing is an improvement.
diff mbox series

Patch

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index c0fe8a00fe53a..0bcaefc4ffe02 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -482,6 +482,7 @@  enum {
 	REQ_F_DOUBLE_POLL_BIT,
 	REQ_F_APOLL_MULTISHOT_BIT,
 	REQ_F_CLEAR_POLLIN_BIT,
+	REQ_F_FIXED_BUFFER_BIT,
 	/* keep async read/write and isreg together and in order */
 	REQ_F_SUPPORT_NOWAIT_BIT,
 	REQ_F_ISREG_BIT,
@@ -574,6 +575,8 @@  enum {
 	REQ_F_BUF_NODE		= IO_REQ_FLAG(REQ_F_BUF_NODE_BIT),
 	/* request has read/write metadata assigned */
 	REQ_F_HAS_METADATA	= IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT),
+	/* request has a fixed buffer at buf_index */
+	REQ_F_FIXED_BUFFER	= IO_REQ_FLAG(REQ_F_FIXED_BUFFER_BIT),
 };
 
 typedef void (*io_req_tw_func_t)(struct io_kiocb *req, io_tw_token_t tw);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 58528bf61638e..7800edbc57279 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1721,6 +1721,23 @@  static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
 	return !!req->file;
 }
 
+static bool io_assign_buffer(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_rsrc_node *node;
+
+	if (req->buf_node || !(req->flags & REQ_F_FIXED_BUFFER))
+		return true;
+
+	io_ring_submit_lock(ctx, issue_flags);
+	node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
+	if (node)
+		io_req_assign_buf_node(req, node);
+	io_ring_submit_unlock(ctx, issue_flags);
+
+	return !!node;
+}
+
 static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 {
 	const struct io_issue_def *def = &io_issue_defs[req->opcode];
@@ -1729,6 +1746,8 @@  static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 
 	if (unlikely(!io_assign_file(req, def, issue_flags)))
 		return -EBADF;
+	if (unlikely(!io_assign_buffer(req, issue_flags)))
+		return -EFAULT;
 
 	if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred()))
 		creds = override_creds(req->creds);
diff --git a/io_uring/net.c b/io_uring/net.c
index 000dc70d08d0d..39838e8575b53 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1373,6 +1373,10 @@  int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 #endif
 	if (unlikely(!io_msg_alloc_async(req)))
 		return -ENOMEM;
+	if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
+		req->buf_index = zc->buf_index;
+		req->flags |= REQ_F_FIXED_BUFFER;
+	}
 	if (req->opcode != IORING_OP_SENDMSG_ZC)
 		return io_send_setup(req, sqe);
 	return io_sendmsg_setup(req, sqe);
@@ -1434,25 +1438,10 @@  static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
 	struct io_async_msghdr *kmsg = req->async_data;
 	int ret;
 
-	if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
-		struct io_ring_ctx *ctx = req->ctx;
-		struct io_rsrc_node *node;
-
-		ret = -EFAULT;
-		io_ring_submit_lock(ctx, issue_flags);
-		node = io_rsrc_node_lookup(&ctx->buf_table, sr->buf_index);
-		if (node) {
-			io_req_assign_buf_node(sr->notif, node);
-			ret = 0;
-		}
-		io_ring_submit_unlock(ctx, issue_flags);
-
-		if (unlikely(ret))
-			return ret;
-
+	if (req->flags & REQ_F_FIXED_BUFFER) {
 		ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter,
-					node->buf, (u64)(uintptr_t)sr->buf,
-					sr->len);
+					req->buf_node->buf,
+					(u64)(uintptr_t)sr->buf, sr->len);
 		if (unlikely(ret))
 			return ret;
 		kmsg->msg.sg_from_iter = io_sg_from_iter;
diff --git a/io_uring/nop.c b/io_uring/nop.c
index 5e5196df650a1..989908603112f 100644
--- a/io_uring/nop.c
+++ b/io_uring/nop.c
@@ -16,7 +16,6 @@  struct io_nop {
 	struct file     *file;
 	int             result;
 	int		fd;
-	int		buffer;
 	unsigned int	flags;
 };
 
@@ -39,10 +38,10 @@  int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		nop->fd = READ_ONCE(sqe->fd);
 	else
 		nop->fd = -1;
-	if (nop->flags & IORING_NOP_FIXED_BUFFER)
-		nop->buffer = READ_ONCE(sqe->buf_index);
-	else
-		nop->buffer = -1;
+	if (nop->flags & IORING_NOP_FIXED_BUFFER) {
+		req->buf_index = READ_ONCE(sqe->buf_index);
+		req->flags |= REQ_F_FIXED_BUFFER;
+	}
 	return 0;
 }
 
@@ -63,19 +62,6 @@  int io_nop(struct io_kiocb *req, unsigned int issue_flags)
 			goto done;
 		}
 	}
-	if (nop->flags & IORING_NOP_FIXED_BUFFER) {
-		struct io_ring_ctx *ctx = req->ctx;
-		struct io_rsrc_node *node;
-
-		ret = -EFAULT;
-		io_ring_submit_lock(ctx, issue_flags);
-		node = io_rsrc_node_lookup(&ctx->buf_table, nop->buffer);
-		if (node) {
-			io_req_assign_buf_node(req, node);
-			ret = 0;
-		}
-		io_ring_submit_unlock(ctx, issue_flags);
-	}
 done:
 	if (ret < 0)
 		req_set_fail(req);
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 16f12f94943f7..2d8910d9197a0 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -353,25 +353,14 @@  int io_prep_writev(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 static int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			    int ddir)
 {
-	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
-	struct io_ring_ctx *ctx = req->ctx;
-	struct io_rsrc_node *node;
-	struct io_async_rw *io;
 	int ret;
 
 	ret = io_prep_rw(req, sqe, ddir, false);
 	if (unlikely(ret))
 		return ret;
 
-	node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
-	if (!node)
-		return -EFAULT;
-	io_req_assign_buf_node(req, node);
-
-	io = req->async_data;
-	ret = io_import_fixed(ddir, &io->iter, node->buf, rw->addr, rw->len);
-	iov_iter_save_state(&io->iter, &io->iter_state);
-	return ret;
+	req->flags |= REQ_F_FIXED_BUFFER;
+	return 0;
 }
 
 int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -954,10 +943,36 @@  static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
 	return ret;
 }
 
+static int io_import_fixed_buffer(struct io_kiocb *req, int ddir)
+{
+	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+	struct io_async_rw *io;
+	int ret;
+
+	if (!(req->flags & REQ_F_FIXED_BUFFER))
+		return 0;
+
+	io = req->async_data;
+	if (io->bytes_done)
+		return 0;
+
+	ret = io_import_fixed(ddir, &io->iter, req->buf_node->buf, rw->addr,
+			      rw->len);
+	if (ret)
+		return ret;
+
+	iov_iter_save_state(&io->iter, &io->iter_state);
+	return 0;
+}
+
 int io_read(struct io_kiocb *req, unsigned int issue_flags)
 {
 	int ret;
 
+	ret = io_import_fixed_buffer(req, READ);
+	if (unlikely(ret))
+		return ret;
+
 	ret = __io_read(req, issue_flags);
 	if (ret >= 0)
 		return kiocb_done(req, ret, issue_flags);
@@ -1062,6 +1077,10 @@  int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	ssize_t ret, ret2;
 	loff_t *ppos;
 
+	ret = io_import_fixed_buffer(req, WRITE);
+	if (unlikely(ret))
+		return ret;
+
 	ret = io_rw_init_file(req, FMODE_WRITE, WRITE);
 	if (unlikely(ret))
 		return ret;
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 8bdf2c9b3fef9..112b49fde23e5 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -200,19 +200,8 @@  int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		return -EINVAL;
 
 	if (ioucmd->flags & IORING_URING_CMD_FIXED) {
-		struct io_ring_ctx *ctx = req->ctx;
-		struct io_rsrc_node *node;
-		u16 index = READ_ONCE(sqe->buf_index);
-
-		node = io_rsrc_node_lookup(&ctx->buf_table, index);
-		if (unlikely(!node))
-			return -EFAULT;
-		/*
-		 * Pi node upfront, prior to io_uring_cmd_import_fixed()
-		 * being called. This prevents destruction of the mapped buffer
-		 * we'll need at actual import time.
-		 */
-		io_req_assign_buf_node(req, node);
+		req->buf_index = READ_ONCE(sqe->buf_index);
+		req->flags |= REQ_F_FIXED_BUFFER;
 	}
 	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
 
@@ -262,7 +251,6 @@  int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
 	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
 	struct io_rsrc_node *node = req->buf_node;
 
-	/* Must have had rsrc_node assigned at prep time */
 	if (node)
 		return io_import_fixed(rw, iter, node->buf, ubuf, len);