diff mbox series

[1/2] io_uring: add support for truncate

Message ID 20240122193732.23217-1-tony.solomonik@gmail.com (mailing list archive)
State New
Headers show
Series [1/2] io_uring: add support for truncate | expand

Commit Message

Tony Solomonik Jan. 22, 2024, 7:37 p.m. UTC
Libraries that are built on io_uring currently need to maintain a
separate thread pool implementation when they want to truncate a file.
---
 include/uapi/linux/io_uring.h |  1 +
 io_uring/Makefile             |  2 +-
 io_uring/opdef.c              |  8 ++++++
 io_uring/truncate.c           | 53 +++++++++++++++++++++++++++++++++++
 io_uring/truncate.h           |  4 +++
 5 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 io_uring/truncate.c
 create mode 100644 io_uring/truncate.h

Comments

Breno Leitao Jan. 22, 2024, 7:56 p.m. UTC | #1
Hello Tony,

On Mon, Jan 22, 2024 at 09:37:31PM +0200, Tony Solomonik wrote:
> +int io_truncate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> +	struct io_trunc *tr = io_kiocb_to_cmd(req, struct io_trunc);
> +
> +	if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
> +		return -EINVAL;
> +	if (unlikely(req->flags & REQ_F_FIXED_FILE))
> +		return -EBADF;
> +
> +	tr->pathname = u64_to_user_ptr(READ_ONCE(sqe->addr));
> +	tr->len = READ_ONCE(sqe->len);

sqe->len is 32 bits. I _think_ loff_t is or could be 64-bits. Isn't it
possible to use a u64 here? Maybe sqe->off or sqe->addr?
Jens Axboe Jan. 22, 2024, 8:10 p.m. UTC | #2
On 1/22/24 12:37 PM, Tony Solomonik wrote:
> Libraries that are built on io_uring currently need to maintain a
> separate thread pool implementation when they want to truncate a file.

In general, I think we should just make this one opcode, and then
require fd to be -1 for the truncate case (with path in addr, like you
have, or have a valid fd and NULL addr for the fruncate case. Then at
least we don't waste two opcodes on essentially the same functionality.
One minor code comment, which applies to both patches:

> +int io_truncate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> +	struct io_trunc *tr = io_kiocb_to_cmd(req, struct io_trunc);
> +
> +	if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
> +		return -EINVAL;
> +	if (unlikely(req->flags & REQ_F_FIXED_FILE))
> +		return -EBADF;
> +
> +	tr->pathname = u64_to_user_ptr(READ_ONCE(sqe->addr));
> +	tr->len = READ_ONCE(sqe->len);

This should use offset rather than len, as Breno pointed out.

> +	req->flags |= REQ_F_NEED_CLEANUP;

Why is this being set? There's nothing to clean up post completion, so
this just slows req completion down for no particularly reason. And the
you can kill the same flag force clear in io_truncate as well.
Gabriel Krisman Bertazi Jan. 22, 2024, 8:12 p.m. UTC | #3
Tony Solomonik <tony.solomonik@gmail.com> writes:

> Libraries that are built on io_uring currently need to maintain a
> separate thread pool implementation when they want to truncate a file.

I don't think it makes sense to have both ftruncate and truncate in
io_uring.  One can just as easily link an open+ftruncate to have the
same semantics in one go.

> ---
>  include/uapi/linux/io_uring.h |  1 +
>  io_uring/Makefile             |  2 +-
>  io_uring/opdef.c              |  8 ++++++
>  io_uring/truncate.c           | 53 +++++++++++++++++++++++++++++++++++
>  io_uring/truncate.h           |  4 +++
>  5 files changed, 67 insertions(+), 1 deletion(-)
>  create mode 100644 io_uring/truncate.c
>  create mode 100644 io_uring/truncate.h
>
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index f1c16f817742..513f31ee8ce9 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -253,6 +253,7 @@ enum io_uring_op {
>  	IORING_OP_FUTEX_WAIT,
>  	IORING_OP_FUTEX_WAKE,
>  	IORING_OP_FUTEX_WAITV,
> +	IORING_OP_TRUNCATE,
>  
>  	/* this goes last, obviously */
>  	IORING_OP_LAST,
> diff --git a/io_uring/Makefile b/io_uring/Makefile
> index e5be47e4fc3b..4f8ed6530a29 100644
> --- a/io_uring/Makefile
> +++ b/io_uring/Makefile
> @@ -8,6 +8,6 @@ obj-$(CONFIG_IO_URING)		+= io_uring.o xattr.o nop.o fs.o splice.o \
>  					statx.o net.o msg_ring.o timeout.o \
>  					sqpoll.o fdinfo.o tctx.o poll.o \
>  					cancel.o kbuf.o rsrc.o rw.o opdef.o \
> -					notif.o waitid.o
> +					notif.o waitid.o truncate.o
>  obj-$(CONFIG_IO_WQ)		+= io-wq.o
>  obj-$(CONFIG_FUTEX)		+= futex.o
> diff --git a/io_uring/opdef.c b/io_uring/opdef.c
> index 799db44283c7..60827099e244 100644
> --- a/io_uring/opdef.c
> +++ b/io_uring/opdef.c
> @@ -35,6 +35,7 @@
>  #include "rw.h"
>  #include "waitid.h"
>  #include "futex.h"
> +#include "truncate.h"
>  
>  static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags)
>  {
> @@ -469,6 +470,10 @@ const struct io_issue_def io_issue_defs[] = {
>  		.prep			= io_eopnotsupp_prep,
>  #endif
>  	},
> +	[IORING_OP_TRUNCATE] = {
> +		.prep			= io_truncate_prep,
> +		.issue			= io_truncate,
> +	},
>  };
>  
>  const struct io_cold_def io_cold_defs[] = {
> @@ -704,6 +709,9 @@ const struct io_cold_def io_cold_defs[] = {
>  	[IORING_OP_FUTEX_WAITV] = {
>  		.name			= "FUTEX_WAITV",
>  	},
> +	[IORING_OP_TRUNCATE] = {
> +		.name			= "TRUNCATE",
> +	},
>  };
>  
>  const char *io_uring_get_opcode(u8 opcode)
> diff --git a/io_uring/truncate.c b/io_uring/truncate.c
> new file mode 100644
> index 000000000000..82648b2fbc7e
> --- /dev/null
> +++ b/io_uring/truncate.c
> @@ -0,0 +1,53 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/kernel.h>
> +#include <linux/errno.h>
> +#include <linux/fs.h>
> +#include <linux/file.h>
> +#include <linux/mm.h>
> +#include <linux/slab.h>
> +#include <linux/syscalls.h>
> +#include <linux/io_uring.h>
> +
> +#include <uapi/linux/io_uring.h>
> +
> +#include "../fs/internal.h"
> +
> +#include "io_uring.h"
> +#include "truncate.h"
> +
> +struct io_trunc {
> +	struct files    *file;
> +	char __user     *pathname;
> +	loff_t				len;
> +};
> +
> +int io_truncate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> +	struct io_trunc *tr = io_kiocb_to_cmd(req, struct io_trunc);
> +
> +	if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
> +		return -EINVAL;
> +	if (unlikely(req->flags & REQ_F_FIXED_FILE))
> +		return -EBADF;
> +
> +	tr->pathname = u64_to_user_ptr(READ_ONCE(sqe->addr));
> +	tr->len = READ_ONCE(sqe->len);
> +
> +	req->flags |= REQ_F_NEED_CLEANUP;
> +	req->flags |= REQ_F_FORCE_ASYNC;
> +	return 0;
> +}
> +
> +int io_truncate(struct io_kiocb *req, unsigned int issue_flags)
> +{
> +	struct io_trunc *tr = io_kiocb_to_cmd(req, struct io_trunc);
> +	int ret;
> +
> +	WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
> +
> +	ret = do_sys_truncate(tr->pathname, tr->len);
> +
> +	req->flags &= ~REQ_F_NEED_CLEANUP;
> +	io_req_set_res(req, ret, 0);
> +	return IOU_OK;
> +}
> diff --git a/io_uring/truncate.h b/io_uring/truncate.h
> new file mode 100644
> index 000000000000..ab17cb9acc90
> --- /dev/null
> +++ b/io_uring/truncate.h
> @@ -0,0 +1,4 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +int io_truncate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
> +int io_truncate(struct io_kiocb *req, unsigned int issue_flags);
Jens Axboe Jan. 22, 2024, 8:21 p.m. UTC | #4
One more thing on this one...

> +int io_truncate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> +	struct io_trunc *tr = io_kiocb_to_cmd(req, struct io_trunc);
> +
> +	if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
> +		return -EINVAL;
> +	if (unlikely(req->flags & REQ_F_FIXED_FILE))
> +		return -EBADF;
> +
> +	tr->pathname = u64_to_user_ptr(READ_ONCE(sqe->addr));

io_uring generally guarantees that any data passed in is stable past
submit returns, but that's not the case here. Imagine you had code ala:

prep_truncate(ring, dir, filename)
{
	char path[PATH_MAX];

	sprintf(path, "%s/%s", dir, filename);
	sqe = io_uring_get_seq(ring);
	io_uring_prep_truncate(sqe, path, -1, 0);
	/* your io_truncate_prep() will be run here */
	io_uring_submit(ring);
}

io_loop()
{
	...
	prep_truncate(ring, dir, filename);
	/* path was on stack and now out-of-scope, and there's nothing
	   preventing io_truncate() from running post that. */
}

which implies you'd want some refactoring done here as well, so you can
pass in a path for do_sys_truncate(). And then you would certainly need
the cleanup flag set, but also provide a ->cleanup() helper. See some of
the other fs handling code, like xattr, for how that should be done.

This problem obviously doesn't exist for the fd ftruncate variant, as
there's no path resolution to do there.
Jens Axboe Jan. 22, 2024, 8:22 p.m. UTC | #5
On 1/22/24 1:12 PM, Gabriel Krisman Bertazi wrote:
> Tony Solomonik <tony.solomonik@gmail.com> writes:
> 
>> Libraries that are built on io_uring currently need to maintain a
>> separate thread pool implementation when they want to truncate a file.
> 
> I don't think it makes sense to have both ftruncate and truncate in
> io_uring.  One can just as easily link an open+ftruncate to have the
> same semantics in one go.

Yeah, see comment on the life time issue with this one as well, which
is avoided with the fd variant. So if just having the ftruncate variant
is good enough, that's solve that headache too. And if done like I
suggested where fd must be valid and we -EINVAL on sqe->addr being
set, you could always add truncate by path functionality later on top
without requiring a new opcode just for that.
diff mbox series

Patch

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index f1c16f817742..513f31ee8ce9 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -253,6 +253,7 @@  enum io_uring_op {
 	IORING_OP_FUTEX_WAIT,
 	IORING_OP_FUTEX_WAKE,
 	IORING_OP_FUTEX_WAITV,
+	IORING_OP_TRUNCATE,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
diff --git a/io_uring/Makefile b/io_uring/Makefile
index e5be47e4fc3b..4f8ed6530a29 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -8,6 +8,6 @@  obj-$(CONFIG_IO_URING)		+= io_uring.o xattr.o nop.o fs.o splice.o \
 					statx.o net.o msg_ring.o timeout.o \
 					sqpoll.o fdinfo.o tctx.o poll.o \
 					cancel.o kbuf.o rsrc.o rw.o opdef.o \
-					notif.o waitid.o
+					notif.o waitid.o truncate.o
 obj-$(CONFIG_IO_WQ)		+= io-wq.o
 obj-$(CONFIG_FUTEX)		+= futex.o
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 799db44283c7..60827099e244 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -35,6 +35,7 @@ 
 #include "rw.h"
 #include "waitid.h"
 #include "futex.h"
+#include "truncate.h"
 
 static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags)
 {
@@ -469,6 +470,10 @@  const struct io_issue_def io_issue_defs[] = {
 		.prep			= io_eopnotsupp_prep,
 #endif
 	},
+	[IORING_OP_TRUNCATE] = {
+		.prep			= io_truncate_prep,
+		.issue			= io_truncate,
+	},
 };
 
 const struct io_cold_def io_cold_defs[] = {
@@ -704,6 +709,9 @@  const struct io_cold_def io_cold_defs[] = {
 	[IORING_OP_FUTEX_WAITV] = {
 		.name			= "FUTEX_WAITV",
 	},
+	[IORING_OP_TRUNCATE] = {
+		.name			= "TRUNCATE",
+	},
 };
 
 const char *io_uring_get_opcode(u8 opcode)
diff --git a/io_uring/truncate.c b/io_uring/truncate.c
new file mode 100644
index 000000000000..82648b2fbc7e
--- /dev/null
+++ b/io_uring/truncate.c
@@ -0,0 +1,53 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/io_uring.h>
+
+#include <uapi/linux/io_uring.h>
+
+#include "../fs/internal.h"
+
+#include "io_uring.h"
+#include "truncate.h"
+
+struct io_trunc {
+	struct files    *file;
+	char __user     *pathname;
+	loff_t				len;
+};
+
+int io_truncate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_trunc *tr = io_kiocb_to_cmd(req, struct io_trunc);
+
+	if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+		return -EINVAL;
+	if (unlikely(req->flags & REQ_F_FIXED_FILE))
+		return -EBADF;
+
+	tr->pathname = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	tr->len = READ_ONCE(sqe->len);
+
+	req->flags |= REQ_F_NEED_CLEANUP;
+	req->flags |= REQ_F_FORCE_ASYNC;
+	return 0;
+}
+
+int io_truncate(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_trunc *tr = io_kiocb_to_cmd(req, struct io_trunc);
+	int ret;
+
+	WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
+
+	ret = do_sys_truncate(tr->pathname, tr->len);
+
+	req->flags &= ~REQ_F_NEED_CLEANUP;
+	io_req_set_res(req, ret, 0);
+	return IOU_OK;
+}
diff --git a/io_uring/truncate.h b/io_uring/truncate.h
new file mode 100644
index 000000000000..ab17cb9acc90
--- /dev/null
+++ b/io_uring/truncate.h
@@ -0,0 +1,4 @@ 
+// SPDX-License-Identifier: GPL-2.0
+
+int io_truncate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_truncate(struct io_kiocb *req, unsigned int issue_flags);