Message ID | 20241206015308.3342386-4-kbusch@meta.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | block write streams with nvme fdp | expand |
On 05/12/24 05:53PM, Keith Busch wrote: >From: Keith Busch <kbusch@kernel.org> > >Adds a new attribute type to specify a write stream per-IO. > >Signed-off-by: Keith Busch <kbusch@kernel.org> >--- > include/uapi/linux/io_uring.h | 9 ++++++++- > io_uring/rw.c | 28 +++++++++++++++++++++++++++- > 2 files changed, 35 insertions(+), 2 deletions(-) > >diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h >index 5fa38467d6070..263cd57aae72d 100644 >--- a/include/uapi/linux/io_uring.h >+++ b/include/uapi/linux/io_uring.h >@@ -123,7 +123,14 @@ struct io_uring_attr_pi { > __u64 rsvd; > }; > >-#define IORING_RW_ATTR_FLAGS_SUPPORTED (IORING_RW_ATTR_FLAG_PI) >+#define IORING_RW_ATTR_FLAG_WRITE_STREAM (1U << 1) >+struct io_uring_write_stream { Nit: You can consider keeping a io_uring_attr_* prefix here, so that it aligns with current attribute naming style. s/io_uring_write_stream/io_uring_attr_write_stream >+ __u16 write_stream; >+ __u8 rsvd[6]; >+}; >+ >+#define IORING_RW_ATTR_FLAGS_SUPPORTED (IORING_RW_ATTR_FLAG_PI | \ >+ IORING_RW_ATTR_FLAG_WRITE_STREAM) > > /* > * If sqe->file_index is set to this for opcodes that instantiate a new >diff --git a/io_uring/rw.c b/io_uring/rw.c >index a2987aefb2cec..69b566e296f6d 100644 >--- a/io_uring/rw.c >+++ b/io_uring/rw.c >@@ -299,6 +299,22 @@ static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir, > return ret; > } > >+static int io_prep_rw_write_stream(struct io_rw *rw, u64 *attr_ptr) >+{ >+ struct io_uring_write_stream write_stream; >+ >+ if (copy_from_user(&write_stream, u64_to_user_ptr(*attr_ptr), >+ sizeof(write_stream))) >+ return -EFAULT; >+ >+ if (!memchr_inv(write_stream.rsvd, 0, sizeof(write_stream.rsvd))) This should be: if (memchr_inv(write_stream.rsvd, 0, sizeof(write_stream.rsvd)))
On 12/6/2024 7:23 AM, Keith Busch wrote: > From: Keith Busch <kbusch@kernel.org> > > Adds a new attribute type to specify a write stream per-IO. > > Signed-off-by: Keith Busch <kbusch@kernel.org> > --- > include/uapi/linux/io_uring.h | 9 ++++++++- > io_uring/rw.c | 28 +++++++++++++++++++++++++++- > 2 files changed, 35 insertions(+), 2 deletions(-) > > diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h > index 5fa38467d6070..263cd57aae72d 100644 > --- a/include/uapi/linux/io_uring.h > +++ b/include/uapi/linux/io_uring.h > @@ -123,7 +123,14 @@ struct io_uring_attr_pi { > __u64 rsvd; > }; > > -#define IORING_RW_ATTR_FLAGS_SUPPORTED (IORING_RW_ATTR_FLAG_PI) > +#define IORING_RW_ATTR_FLAG_WRITE_STREAM (1U << 1) > +struct io_uring_write_stream { > + __u16 write_stream; > + __u8 rsvd[6]; > +}; So this needs 8 bytes. Maybe passing just 'u16 write_stream' is better? Or do you expect future additions here (to keep rsvd). Optimization is possible (now or in future) if it's 4 bytes or smaller, as that can be placed in SQE along with a new RW attribute flag that says it's placed inline. Like this - --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -92,6 +92,10 @@ struct io_uring_sqe { __u16 addr_len; __u16 __pad3[1]; }; + struct { + __u16 write_hint; + __u16 __rsvd[1]; + }; }; union { struct { @@ -113,6 +117,7 @@ struct io_uring_sqe { /* sqe->attr_type_mask flags */ #define IORING_RW_ATTR_FLAG_PI (1U << 0) +#define IORING_RW_ATTR_FLAG_WRITE_STREAM_INLINE (1U << 1)
On Fri, Dec 06, 2024 at 06:14:29PM +0530, Kanchan Joshi wrote: > On 12/6/2024 7:23 AM, Keith Busch wrote: > > From: Keith Busch <kbusch@kernel.org> > > > > Adds a new attribute type to specify a write stream per-IO. > > > > Signed-off-by: Keith Busch <kbusch@kernel.org> > > --- > > include/uapi/linux/io_uring.h | 9 ++++++++- > > io_uring/rw.c | 28 +++++++++++++++++++++++++++- > > 2 files changed, 35 insertions(+), 2 deletions(-) > > > > diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h > > index 5fa38467d6070..263cd57aae72d 100644 > > --- a/include/uapi/linux/io_uring.h > > +++ b/include/uapi/linux/io_uring.h > > @@ -123,7 +123,14 @@ struct io_uring_attr_pi { > > __u64 rsvd; > > }; > > > > -#define IORING_RW_ATTR_FLAGS_SUPPORTED (IORING_RW_ATTR_FLAG_PI) > > +#define IORING_RW_ATTR_FLAG_WRITE_STREAM (1U << 1) > > +struct io_uring_write_stream { > > + __u16 write_stream; > > + __u8 rsvd[6]; > > +}; > > So this needs 8 bytes. Maybe passing just 'u16 write_stream' is better? > Or do you expect future additions here (to keep rsvd). I don't have any plans to use it. It's just padded for alignment. I am not sure what future attributes might be proposed, but I don't want to force them be align to a 2-byte boundary. > Optimization is possible (now or in future) if it's 4 bytes or smaller, > as that can be placed in SQE along with a new RW attribute flag that > says it's placed inline. Like this - Oh, that's definitely preferred IMO, because it is that much easier to reach the capability. Previous versions of this proposal had the field in the next union, so I for some reason this union you're showing here was unavailable for new fields, but it looks like it's unused for read/write. So, yeah, let's put it in the sqe if there's no conflict here. > --- a/include/uapi/linux/io_uring.h > +++ b/include/uapi/linux/io_uring.h > @@ -92,6 +92,10 @@ struct io_uring_sqe { > __u16 addr_len; > __u16 __pad3[1]; > }; > + struct { > + __u16 write_hint; > + __u16 __rsvd[1]; > + }; > }; > union { > struct {
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 5fa38467d6070..263cd57aae72d 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -123,7 +123,14 @@ struct io_uring_attr_pi { __u64 rsvd; }; -#define IORING_RW_ATTR_FLAGS_SUPPORTED (IORING_RW_ATTR_FLAG_PI) +#define IORING_RW_ATTR_FLAG_WRITE_STREAM (1U << 1) +struct io_uring_write_stream { + __u16 write_stream; + __u8 rsvd[6]; +}; + +#define IORING_RW_ATTR_FLAGS_SUPPORTED (IORING_RW_ATTR_FLAG_PI | \ + IORING_RW_ATTR_FLAG_WRITE_STREAM) /* * If sqe->file_index is set to this for opcodes that instantiate a new diff --git a/io_uring/rw.c b/io_uring/rw.c index a2987aefb2cec..69b566e296f6d 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -299,6 +299,22 @@ static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir, return ret; } +static int io_prep_rw_write_stream(struct io_rw *rw, u64 *attr_ptr) +{ + struct io_uring_write_stream write_stream; + + if (copy_from_user(&write_stream, u64_to_user_ptr(*attr_ptr), + sizeof(write_stream))) + return -EFAULT; + + if (!memchr_inv(write_stream.rsvd, 0, sizeof(write_stream.rsvd))) + return -EINVAL; + + rw->kiocb.ki_write_stream = write_stream.write_stream; + *attr_ptr += sizeof(write_stream); + return 0; +} + static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, int ddir, bool do_import) { @@ -340,7 +356,17 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, return -EINVAL; attr_ptr = READ_ONCE(sqe->attr_ptr); - ret = io_prep_rw_pi(req, rw, ddir, attr_ptr, attr_type_mask); + if (attr_type_mask & IORING_RW_ATTR_FLAG_PI) { + ret = io_prep_rw_pi(req, rw, ddir, &attr_ptr); + if (ret) + return ret; + } + + if (attr_type_mask & IORING_RW_ATTR_FLAG_WRITE_STREAM) { + ret = io_prep_rw_write_stream(rw, &attr_ptr); + if (ret) + return ret; + } } return ret; }