diff mbox series

[PATCHv11,03/10] io_uring: add write stream attribute

Message ID 20241206015308.3342386-4-kbusch@meta.com (mailing list archive)
State New
Headers show
Series block write streams with nvme fdp | expand

Commit Message

Keith Busch Dec. 6, 2024, 1:53 a.m. UTC
From: Keith Busch <kbusch@kernel.org>

Adds a new attribute type to specify a write stream per-IO.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/uapi/linux/io_uring.h |  9 ++++++++-
 io_uring/rw.c                 | 28 +++++++++++++++++++++++++++-
 2 files changed, 35 insertions(+), 2 deletions(-)

Comments

Anuj Gupta Dec. 6, 2024, 9:55 a.m. UTC | #1
On 05/12/24 05:53PM, Keith Busch wrote:
>From: Keith Busch <kbusch@kernel.org>
>
>Adds a new attribute type to specify a write stream per-IO.
>
>Signed-off-by: Keith Busch <kbusch@kernel.org>
>---
> include/uapi/linux/io_uring.h |  9 ++++++++-
> io_uring/rw.c                 | 28 +++++++++++++++++++++++++++-
> 2 files changed, 35 insertions(+), 2 deletions(-)
>
>diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>index 5fa38467d6070..263cd57aae72d 100644
>--- a/include/uapi/linux/io_uring.h
>+++ b/include/uapi/linux/io_uring.h
>@@ -123,7 +123,14 @@ struct io_uring_attr_pi {
> 	__u64	rsvd;
> };
>
>-#define IORING_RW_ATTR_FLAGS_SUPPORTED (IORING_RW_ATTR_FLAG_PI)
>+#define IORING_RW_ATTR_FLAG_WRITE_STREAM (1U << 1)
>+struct io_uring_write_stream {
Nit:
You can consider keeping a io_uring_attr_* prefix here, so that it aligns
with current attribute naming style.
s/io_uring_write_stream/io_uring_attr_write_stream

>+	__u16	write_stream;
>+	__u8	rsvd[6];
>+};
>+
>+#define IORING_RW_ATTR_FLAGS_SUPPORTED (IORING_RW_ATTR_FLAG_PI | 	\
>+					IORING_RW_ATTR_FLAG_WRITE_STREAM)
>
> /*
>  * If sqe->file_index is set to this for opcodes that instantiate a new
>diff --git a/io_uring/rw.c b/io_uring/rw.c
>index a2987aefb2cec..69b566e296f6d 100644
>--- a/io_uring/rw.c
>+++ b/io_uring/rw.c
>@@ -299,6 +299,22 @@ static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
> 	return ret;
> }
>
>+static int io_prep_rw_write_stream(struct io_rw *rw, u64 *attr_ptr)
>+{
>+	struct io_uring_write_stream write_stream;
>+
>+	if (copy_from_user(&write_stream, u64_to_user_ptr(*attr_ptr),
>+			   sizeof(write_stream)))
>+		return -EFAULT;
>+
>+	if (!memchr_inv(write_stream.rsvd, 0, sizeof(write_stream.rsvd)))
This should be:
	if (memchr_inv(write_stream.rsvd, 0, sizeof(write_stream.rsvd)))
Kanchan Joshi Dec. 6, 2024, 12:44 p.m. UTC | #2
On 12/6/2024 7:23 AM, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
> 
> Adds a new attribute type to specify a write stream per-IO.
> 
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---
>   include/uapi/linux/io_uring.h |  9 ++++++++-
>   io_uring/rw.c                 | 28 +++++++++++++++++++++++++++-
>   2 files changed, 35 insertions(+), 2 deletions(-)
> 
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 5fa38467d6070..263cd57aae72d 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -123,7 +123,14 @@ struct io_uring_attr_pi {
>   	__u64	rsvd;
>   };
>   
> -#define IORING_RW_ATTR_FLAGS_SUPPORTED (IORING_RW_ATTR_FLAG_PI)
> +#define IORING_RW_ATTR_FLAG_WRITE_STREAM (1U << 1)
> +struct io_uring_write_stream {
> +	__u16	write_stream;
> +	__u8	rsvd[6];
> +};

So this needs 8 bytes. Maybe passing just 'u16 write_stream' is better? 
Or do you expect future additions here (to keep rsvd).

Optimization is possible (now or in future) if it's 4 bytes or smaller, 
as that can be placed in SQE along with a new RW attribute flag that 
says it's placed inline. Like this -

--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -92,6 +92,10 @@ struct io_uring_sqe {
                         __u16   addr_len;
                         __u16   __pad3[1];
                 };
+               struct {
+                       __u16   write_hint;
+                       __u16   __rsvd[1];
+               };
         };
         union {
                 struct {
@@ -113,6 +117,7 @@ struct io_uring_sqe {

  /* sqe->attr_type_mask flags */
  #define IORING_RW_ATTR_FLAG_PI (1U << 0)
+#define IORING_RW_ATTR_FLAG_WRITE_STREAM_INLINE        (1U << 1)
Keith Busch Dec. 6, 2024, 4:53 p.m. UTC | #3
On Fri, Dec 06, 2024 at 06:14:29PM +0530, Kanchan Joshi wrote:
> On 12/6/2024 7:23 AM, Keith Busch wrote:
> > From: Keith Busch <kbusch@kernel.org>
> > 
> > Adds a new attribute type to specify a write stream per-IO.
> > 
> > Signed-off-by: Keith Busch <kbusch@kernel.org>
> > ---
> >   include/uapi/linux/io_uring.h |  9 ++++++++-
> >   io_uring/rw.c                 | 28 +++++++++++++++++++++++++++-
> >   2 files changed, 35 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> > index 5fa38467d6070..263cd57aae72d 100644
> > --- a/include/uapi/linux/io_uring.h
> > +++ b/include/uapi/linux/io_uring.h
> > @@ -123,7 +123,14 @@ struct io_uring_attr_pi {
> >   	__u64	rsvd;
> >   };
> >   
> > -#define IORING_RW_ATTR_FLAGS_SUPPORTED (IORING_RW_ATTR_FLAG_PI)
> > +#define IORING_RW_ATTR_FLAG_WRITE_STREAM (1U << 1)
> > +struct io_uring_write_stream {
> > +	__u16	write_stream;
> > +	__u8	rsvd[6];
> > +};
> 
> So this needs 8 bytes. Maybe passing just 'u16 write_stream' is better? 
> Or do you expect future additions here (to keep rsvd).

I don't have any plans to use it. It's just padded for alignment. I am
not sure what future attributes might be proposed, but I don't want to
force them be align to a 2-byte boundary.
 
> Optimization is possible (now or in future) if it's 4 bytes or smaller, 
> as that can be placed in SQE along with a new RW attribute flag that 
> says it's placed inline. Like this -

Oh, that's definitely preferred IMO, because it is that much easier to
reach the capability. Previous versions of this proposal had the field
in the next union, so I for some reason this union you're showing here
was unavailable for new fields, but it looks like it's unused for
read/write. So, yeah, let's put it in the sqe if there's no conflict
here.

> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -92,6 +92,10 @@ struct io_uring_sqe {
>                          __u16   addr_len;
>                          __u16   __pad3[1];
>                  };
> +               struct {
> +                       __u16   write_hint;
> +                       __u16   __rsvd[1];
> +               };
>          };
>          union {
>                  struct {
diff mbox series

Patch

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 5fa38467d6070..263cd57aae72d 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -123,7 +123,14 @@  struct io_uring_attr_pi {
 	__u64	rsvd;
 };
 
-#define IORING_RW_ATTR_FLAGS_SUPPORTED (IORING_RW_ATTR_FLAG_PI)
+#define IORING_RW_ATTR_FLAG_WRITE_STREAM (1U << 1)
+struct io_uring_write_stream {
+	__u16	write_stream;
+	__u8	rsvd[6];
+};
+
+#define IORING_RW_ATTR_FLAGS_SUPPORTED (IORING_RW_ATTR_FLAG_PI | 	\
+					IORING_RW_ATTR_FLAG_WRITE_STREAM)
 
 /*
  * If sqe->file_index is set to this for opcodes that instantiate a new
diff --git a/io_uring/rw.c b/io_uring/rw.c
index a2987aefb2cec..69b566e296f6d 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -299,6 +299,22 @@  static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
 	return ret;
 }
 
+static int io_prep_rw_write_stream(struct io_rw *rw, u64 *attr_ptr)
+{
+	struct io_uring_write_stream write_stream;
+
+	if (copy_from_user(&write_stream, u64_to_user_ptr(*attr_ptr),
+			   sizeof(write_stream)))
+		return -EFAULT;
+
+	if (!memchr_inv(write_stream.rsvd, 0, sizeof(write_stream.rsvd)))
+		return -EINVAL;
+
+	rw->kiocb.ki_write_stream = write_stream.write_stream;
+	*attr_ptr += sizeof(write_stream);
+	return 0;
+}
+
 static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		      int ddir, bool do_import)
 {
@@ -340,7 +356,17 @@  static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			return -EINVAL;
 
 		attr_ptr = READ_ONCE(sqe->attr_ptr);
-		ret = io_prep_rw_pi(req, rw, ddir, attr_ptr, attr_type_mask);
+		if (attr_type_mask & IORING_RW_ATTR_FLAG_PI) {
+			ret = io_prep_rw_pi(req, rw, ddir, &attr_ptr);
+			if (ret)
+				return ret;
+		}
+
+		if (attr_type_mask & IORING_RW_ATTR_FLAG_WRITE_STREAM) {
+			ret = io_prep_rw_write_stream(rw, &attr_ptr);
+			if (ret)
+				return ret;
+		}
 	}
 	return ret;
 }