Message ID | 20240901-b4-fuse-uring-rfcv3-without-mmap-v3-6-9207f7391444@ddn.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | fuse: fuse-over-io-uring | expand |
On Sun, Sep 1, 2024 at 6:37 AM Bernd Schubert <bschubert@ddn.com> wrote: > > Signed-off-by: Bernd Schubert <bschubert@ddn.com> > --- > fs/fuse/dev.c | 30 ++++++++++++++++++++++++++++++ > fs/fuse/dev_uring.c | 2 ++ > fs/fuse/dev_uring_i.h | 13 +++++++++++++ > fs/fuse/fuse_i.h | 4 ++++ > include/uapi/linux/fuse.h | 39 +++++++++++++++++++++++++++++++++++++++ > 5 files changed, 88 insertions(+) > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > index 6489179e7260..06ea4dc5ffe1 100644 > --- a/fs/fuse/dev.c > +++ b/fs/fuse/dev.c > @@ -2379,6 +2379,33 @@ static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp) > return fuse_backing_close(fud->fc, backing_id); > } > > +#ifdef CONFIG_FUSE_IO_URING > +static long fuse_uring_queue_ioc(struct file *file, __u32 __user *argp) > +{ > + int res = 0; > + struct fuse_dev *fud; > + struct fuse_conn *fc; > + struct fuse_ring_queue_config qcfg; > + > + res = copy_from_user(&qcfg, (void *)argp, sizeof(qcfg)); > + if (res != 0) > + return -EFAULT; > + > + res = _fuse_dev_ioctl_clone(file, qcfg.control_fd); I'm confused how this works for > 1 queues. If I'm understanding this correctly, if a system has multiple cores and the server would like multi-queues, then the server needs to call the ioctl FUSE_DEV_IOC_URING_QUEUE_CFG multiple times (each with a different qid). In this handler, when we get to _fuse_dev_ioctl_clone() -> fuse_device_clone(), it allocates and installs a new fud and then sets file->private_data to fud, but isn't this underlying file the same for all of the queues since they are using the same fd for the ioctl calls? It seems like every queue after the 1st would fail with -EINVAL from the "if (new->private_data)" check in fuse_device_clone()? Not sure if I'm missing something or if this intentionally doesn't support multi-queue yet. If the latter, then I'm curious how you're planning to get the fud for a specific queue given that file->private_data and fuse_get_dev() only can support the single queue case. Thanks, Joanne > + if (res != 0) > + return res; > + > + fud = fuse_get_dev(file); > + if (fud == NULL) > + return -ENODEV; > + fc = fud->fc; > + > + fud->ring_q = fuse_uring_get_queue(fc->ring, qcfg.qid); > + > + return 0; > +} > +#endif > + > static long > fuse_dev_ioctl(struct file *file, unsigned int cmd, > unsigned long arg) > @@ -2398,6 +2425,9 @@ fuse_dev_ioctl(struct file *file, unsigned int cmd, > #ifdef CONFIG_FUSE_IO_URING > case FUSE_DEV_IOC_URING_CFG: > return fuse_uring_conn_cfg(file, argp); > + > + case FUSE_DEV_IOC_URING_QUEUE_CFG: > + return fuse_uring_queue_ioc(file, argp); > #endif > default: > return -ENOTTY; > diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c > index 4e7518ef6527..4dcb4972242e 100644 > --- a/fs/fuse/dev_uring.c > +++ b/fs/fuse/dev_uring.c > @@ -42,6 +42,8 @@ static void fuse_uring_queue_cfg(struct fuse_ring_queue *queue, int qid, > > ent->queue = queue; > ent->tag = tag; > + > + ent->state = FRRS_INIT; > } > } > > diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h > index d4eff87bcd1f..301b37d16506 100644 > --- a/fs/fuse/dev_uring_i.h > +++ b/fs/fuse/dev_uring_i.h > @@ -14,6 +14,13 @@ > /* IORING_MAX_ENTRIES */ > #define FUSE_URING_MAX_QUEUE_DEPTH 32768 > > +enum fuse_ring_req_state { > + > + /* request is basially initialized */ > + FRRS_INIT = 1, > + > +}; > + > /* A fuse ring entry, part of the ring queue */ > struct fuse_ring_ent { > /* back pointer */ > @@ -21,6 +28,12 @@ struct fuse_ring_ent { > > /* array index in the ring-queue */ > unsigned int tag; > + > + /* > + * state the request is currently in > + * (enum fuse_ring_req_state) > + */ > + unsigned long state; > }; > > struct fuse_ring_queue { > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > index 33e81b895fee..5eb8552d9d7f 100644 > --- a/fs/fuse/fuse_i.h > +++ b/fs/fuse/fuse_i.h > @@ -540,6 +540,10 @@ struct fuse_dev { > > /** list entry on fc->devices */ > struct list_head entry; > + > +#ifdef CONFIG_FUSE_IO_URING > + struct fuse_ring_queue *ring_q; > +#endif > }; > > enum fuse_dax_mode { > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h > index a1c35e0338f0..143ed3c1c7b3 100644 > --- a/include/uapi/linux/fuse.h > +++ b/include/uapi/linux/fuse.h > @@ -1118,6 +1118,18 @@ struct fuse_ring_config { > uint8_t padding[64]; > }; > > +struct fuse_ring_queue_config { > + /* qid the command is for */ > + uint32_t qid; > + > + /* /dev/fuse fd that initiated the mount. */ > + uint32_t control_fd; > + > + /* for future extensions */ > + uint8_t padding[64]; > +}; > + > + > /* Device ioctls: */ > #define FUSE_DEV_IOC_MAGIC 229 > #define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) > @@ -1126,6 +1138,8 @@ struct fuse_ring_config { > #define FUSE_DEV_IOC_BACKING_CLOSE _IOW(FUSE_DEV_IOC_MAGIC, 2, uint32_t) > #define FUSE_DEV_IOC_URING_CFG _IOR(FUSE_DEV_IOC_MAGIC, 3, \ > struct fuse_ring_config) > +#define FUSE_DEV_IOC_URING_QUEUE_CFG _IOR(FUSE_DEV_IOC_MAGIC, 3, \ > + struct fuse_ring_queue_config) > > struct fuse_lseek_in { > uint64_t fh; > @@ -1233,4 +1247,29 @@ struct fuse_supp_groups { > #define FUSE_RING_HEADER_BUF_SIZE 4096 > #define FUSE_RING_MIN_IN_OUT_ARG_SIZE 4096 > > +/** > + * This structure mapped onto the > + */ > +struct fuse_ring_req { > + union { > + /* The first 4K are command data */ > + char ring_header[FUSE_RING_HEADER_BUF_SIZE]; > + > + struct { > + uint64_t flags; > + > + uint32_t in_out_arg_len; > + uint32_t padding; > + > + /* kernel fills in, reads out */ > + union { > + struct fuse_in_header in; > + struct fuse_out_header out; > + }; > + }; > + }; > + > + char in_out_arg[]; > +}; > + > #endif /* _LINUX_FUSE_H */ > > -- > 2.43.0 >
On 9/5/24 00:23, Joanne Koong wrote: > On Sun, Sep 1, 2024 at 6:37 AM Bernd Schubert <bschubert@ddn.com> wrote: >> >> Signed-off-by: Bernd Schubert <bschubert@ddn.com> >> --- >> fs/fuse/dev.c | 30 ++++++++++++++++++++++++++++++ >> fs/fuse/dev_uring.c | 2 ++ >> fs/fuse/dev_uring_i.h | 13 +++++++++++++ >> fs/fuse/fuse_i.h | 4 ++++ >> include/uapi/linux/fuse.h | 39 +++++++++++++++++++++++++++++++++++++++ >> 5 files changed, 88 insertions(+) >> >> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c >> index 6489179e7260..06ea4dc5ffe1 100644 >> --- a/fs/fuse/dev.c >> +++ b/fs/fuse/dev.c >> @@ -2379,6 +2379,33 @@ static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp) >> return fuse_backing_close(fud->fc, backing_id); >> } >> >> +#ifdef CONFIG_FUSE_IO_URING >> +static long fuse_uring_queue_ioc(struct file *file, __u32 __user *argp) >> +{ >> + int res = 0; >> + struct fuse_dev *fud; >> + struct fuse_conn *fc; >> + struct fuse_ring_queue_config qcfg; >> + >> + res = copy_from_user(&qcfg, (void *)argp, sizeof(qcfg)); >> + if (res != 0) >> + return -EFAULT; >> + >> + res = _fuse_dev_ioctl_clone(file, qcfg.control_fd); > > I'm confused how this works for > 1 queues. If I'm understanding this > correctly, if a system has multiple cores and the server would like > multi-queues, then the server needs to call the ioctl > FUSE_DEV_IOC_URING_QUEUE_CFG multiple times (each with a different > qid). > > In this handler, when we get to _fuse_dev_ioctl_clone() -> > fuse_device_clone(), it allocates and installs a new fud and then sets > file->private_data to fud, but isn't this underlying file the same for > all of the queues since they are using the same fd for the ioctl > calls? It seems like every queue after the 1st would fail with -EINVAL > from the "if (new->private_data)" check in fuse_device_clone()? Each queue is using it's own fd - this works exactly the same as a existing FUSE_DEV_IOC_CLONE - each clone has to open /dev/fuse on its own. A bit a pity that dup() isn't sufficient. Only difference to FUSE_DEV_IOC_CLONE is the additional qid. > > Not sure if I'm missing something or if this intentionally doesn't > support multi-queue yet. If the latter, then I'm curious how you're > planning to get the fud for a specific queue given that > file->private_data and fuse_get_dev() only can support the single > queue case. Strictly in the current patch set, the clone is only needed in the next patch "07/17] fuse: {uring} Add a dev_release exception for fuse-over-io-uring" Though, since we have the fud anyway and link to the ring-queue, it makes use of it in 08/17] fuse: {uring} Handle SQEs - register commands in fuse_uring_cmd(). I hope I understood your question right. Thanks, Bernd
On Wed, Sep 4, 2024 at 3:38 PM Bernd Schubert <bernd.schubert@fastmail.fm> wrote: > > On 9/5/24 00:23, Joanne Koong wrote: > > On Sun, Sep 1, 2024 at 6:37 AM Bernd Schubert <bschubert@ddn.com> wrote: > >> > >> Signed-off-by: Bernd Schubert <bschubert@ddn.com> > >> --- > >> fs/fuse/dev.c | 30 ++++++++++++++++++++++++++++++ > >> fs/fuse/dev_uring.c | 2 ++ > >> fs/fuse/dev_uring_i.h | 13 +++++++++++++ > >> fs/fuse/fuse_i.h | 4 ++++ > >> include/uapi/linux/fuse.h | 39 +++++++++++++++++++++++++++++++++++++++ > >> 5 files changed, 88 insertions(+) > >> > >> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > >> index 6489179e7260..06ea4dc5ffe1 100644 > >> --- a/fs/fuse/dev.c > >> +++ b/fs/fuse/dev.c > >> @@ -2379,6 +2379,33 @@ static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp) > >> return fuse_backing_close(fud->fc, backing_id); > >> } > >> > >> +#ifdef CONFIG_FUSE_IO_URING > >> +static long fuse_uring_queue_ioc(struct file *file, __u32 __user *argp) > >> +{ > >> + int res = 0; > >> + struct fuse_dev *fud; > >> + struct fuse_conn *fc; > >> + struct fuse_ring_queue_config qcfg; > >> + > >> + res = copy_from_user(&qcfg, (void *)argp, sizeof(qcfg)); > >> + if (res != 0) > >> + return -EFAULT; > >> + > >> + res = _fuse_dev_ioctl_clone(file, qcfg.control_fd); > > > > I'm confused how this works for > 1 queues. If I'm understanding this > > correctly, if a system has multiple cores and the server would like > > multi-queues, then the server needs to call the ioctl > > FUSE_DEV_IOC_URING_QUEUE_CFG multiple times (each with a different > > qid). > > > > In this handler, when we get to _fuse_dev_ioctl_clone() -> > > fuse_device_clone(), it allocates and installs a new fud and then sets > > file->private_data to fud, but isn't this underlying file the same for > > all of the queues since they are using the same fd for the ioctl > > calls? It seems like every queue after the 1st would fail with -EINVAL > > from the "if (new->private_data)" check in fuse_device_clone()? > > Each queue is using it's own fd - this works exactly the same as > a existing FUSE_DEV_IOC_CLONE - each clone has to open /dev/fuse on its Ah gotcha, this is the part I was missing. I didn't realize the expectation is that the server needs to open a new /dev/fuse for each queue. This makes more sense to me now, thanks. > own. A bit a pity that dup() isn't sufficient. Only difference to > FUSE_DEV_IOC_CLONE is the additional qid. > > > > > Not sure if I'm missing something or if this intentionally doesn't > > support multi-queue yet. If the latter, then I'm curious how you're > > planning to get the fud for a specific queue given that > > file->private_data and fuse_get_dev() only can support the single > > queue case. > > > Strictly in the current patch set, the clone is only needed in the > next patch > "07/17] fuse: {uring} Add a dev_release exception for fuse-over-io-uring" > Though, since we have the fud anyway and link to the ring-queue, it makes > use of it in > 08/17] fuse: {uring} Handle SQEs - register commands > > in fuse_uring_cmd(). > > > I hope I understood your question right. > > > Thanks, > Bernd
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 6489179e7260..06ea4dc5ffe1 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2379,6 +2379,33 @@ static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp) return fuse_backing_close(fud->fc, backing_id); } +#ifdef CONFIG_FUSE_IO_URING +static long fuse_uring_queue_ioc(struct file *file, __u32 __user *argp) +{ + int res = 0; + struct fuse_dev *fud; + struct fuse_conn *fc; + struct fuse_ring_queue_config qcfg; + + res = copy_from_user(&qcfg, (void *)argp, sizeof(qcfg)); + if (res != 0) + return -EFAULT; + + res = _fuse_dev_ioctl_clone(file, qcfg.control_fd); + if (res != 0) + return res; + + fud = fuse_get_dev(file); + if (fud == NULL) + return -ENODEV; + fc = fud->fc; + + fud->ring_q = fuse_uring_get_queue(fc->ring, qcfg.qid); + + return 0; +} +#endif + static long fuse_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -2398,6 +2425,9 @@ fuse_dev_ioctl(struct file *file, unsigned int cmd, #ifdef CONFIG_FUSE_IO_URING case FUSE_DEV_IOC_URING_CFG: return fuse_uring_conn_cfg(file, argp); + + case FUSE_DEV_IOC_URING_QUEUE_CFG: + return fuse_uring_queue_ioc(file, argp); #endif default: return -ENOTTY; diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index 4e7518ef6527..4dcb4972242e 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -42,6 +42,8 @@ static void fuse_uring_queue_cfg(struct fuse_ring_queue *queue, int qid, ent->queue = queue; ent->tag = tag; + + ent->state = FRRS_INIT; } } diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h index d4eff87bcd1f..301b37d16506 100644 --- a/fs/fuse/dev_uring_i.h +++ b/fs/fuse/dev_uring_i.h @@ -14,6 +14,13 @@ /* IORING_MAX_ENTRIES */ #define FUSE_URING_MAX_QUEUE_DEPTH 32768 +enum fuse_ring_req_state { + + /* request is basially initialized */ + FRRS_INIT = 1, + +}; + /* A fuse ring entry, part of the ring queue */ struct fuse_ring_ent { /* back pointer */ @@ -21,6 +28,12 @@ struct fuse_ring_ent { /* array index in the ring-queue */ unsigned int tag; + + /* + * state the request is currently in + * (enum fuse_ring_req_state) + */ + unsigned long state; }; struct fuse_ring_queue { diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 33e81b895fee..5eb8552d9d7f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -540,6 +540,10 @@ struct fuse_dev { /** list entry on fc->devices */ struct list_head entry; + +#ifdef CONFIG_FUSE_IO_URING + struct fuse_ring_queue *ring_q; +#endif }; enum fuse_dax_mode { diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index a1c35e0338f0..143ed3c1c7b3 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -1118,6 +1118,18 @@ struct fuse_ring_config { uint8_t padding[64]; }; +struct fuse_ring_queue_config { + /* qid the command is for */ + uint32_t qid; + + /* /dev/fuse fd that initiated the mount. */ + uint32_t control_fd; + + /* for future extensions */ + uint8_t padding[64]; +}; + + /* Device ioctls: */ #define FUSE_DEV_IOC_MAGIC 229 #define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) @@ -1126,6 +1138,8 @@ struct fuse_ring_config { #define FUSE_DEV_IOC_BACKING_CLOSE _IOW(FUSE_DEV_IOC_MAGIC, 2, uint32_t) #define FUSE_DEV_IOC_URING_CFG _IOR(FUSE_DEV_IOC_MAGIC, 3, \ struct fuse_ring_config) +#define FUSE_DEV_IOC_URING_QUEUE_CFG _IOR(FUSE_DEV_IOC_MAGIC, 3, \ + struct fuse_ring_queue_config) struct fuse_lseek_in { uint64_t fh; @@ -1233,4 +1247,29 @@ struct fuse_supp_groups { #define FUSE_RING_HEADER_BUF_SIZE 4096 #define FUSE_RING_MIN_IN_OUT_ARG_SIZE 4096 +/** + * This structure mapped onto the + */ +struct fuse_ring_req { + union { + /* The first 4K are command data */ + char ring_header[FUSE_RING_HEADER_BUF_SIZE]; + + struct { + uint64_t flags; + + uint32_t in_out_arg_len; + uint32_t padding; + + /* kernel fills in, reads out */ + union { + struct fuse_in_header in; + struct fuse_out_header out; + }; + }; + }; + + char in_out_arg[]; +}; + #endif /* _LINUX_FUSE_H */
Signed-off-by: Bernd Schubert <bschubert@ddn.com> --- fs/fuse/dev.c | 30 ++++++++++++++++++++++++++++++ fs/fuse/dev_uring.c | 2 ++ fs/fuse/dev_uring_i.h | 13 +++++++++++++ fs/fuse/fuse_i.h | 4 ++++ include/uapi/linux/fuse.h | 39 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 88 insertions(+)