Message ID | 20240529-fuse-uring-for-6-9-rfc2-out-v1-8-d149476b1d65@ddn.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | fuse: fuse-over-io-uring | expand |
On Wed, May 29, 2024 at 08:00:43PM +0200, Bernd Schubert wrote: > Signed-off-by: Bernd Schubert <bschubert@ddn.com> > --- > fs/fuse/dev.c | 10 +++++ > fs/fuse/dev_uring.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++ > fs/fuse/dev_uring_i.h | 18 +++++++++ > fs/fuse/fuse_i.h | 3 ++ > include/uapi/linux/fuse.h | 26 +++++++++++++ > 5 files changed, 152 insertions(+) > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > index 349c1d16b0df..78c05516da7f 100644 > --- a/fs/fuse/dev.c > +++ b/fs/fuse/dev.c > @@ -2395,6 +2395,12 @@ static long fuse_uring_ioctl(struct file *file, __u32 __user *argp) > if (res != 0) > return -EFAULT; > > + if (cfg.cmd == FUSE_URING_IOCTL_CMD_QUEUE_CFG) { > + res = _fuse_dev_ioctl_clone(file, cfg.qconf.control_fd); > + if (res != 0) > + return res; > + } > + > fud = fuse_get_dev(file); > if (fud == NULL) > return -ENODEV; > @@ -2424,6 +2430,10 @@ static long fuse_uring_ioctl(struct file *file, __u32 __user *argp) > if (res != 0) > return res; > break; > + case FUSE_URING_IOCTL_CMD_QUEUE_CFG: > + fud->uring_dev = 1; > + res = fuse_uring_queue_cfg(fc->ring, &cfg.qconf); > + break; > default: > res = -EINVAL; > } > diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c > index 9491bdaa5716..2c0ccb378908 100644 > --- a/fs/fuse/dev_uring.c > +++ b/fs/fuse/dev_uring.c > @@ -144,6 +144,39 @@ static char *fuse_uring_alloc_queue_buf(int size, int node) > return buf ? buf : ERR_PTR(-ENOMEM); > } > > +/* > + * mmaped allocated buffers, but does not know which queue that is for > + * This ioctl uses the userspace address as key to identify the kernel address > + * and assign it to the kernel side of the queue. > + */ > +static int fuse_uring_ioctl_mem_reg(struct fuse_ring *ring, > + struct fuse_ring_queue *queue, > + uint64_t uaddr) > +{ > + struct rb_node *node; > + struct fuse_uring_mbuf *entry; > + int tag; > + > + node = rb_find((const void *)uaddr, &ring->mem_buf_map, > + fuse_uring_rb_tree_buf_cmp); > + if (!node) > + return -ENOENT; > + entry = rb_entry(node, struct fuse_uring_mbuf, rb_node); > + > + rb_erase(node, &ring->mem_buf_map); > + > + queue->queue_req_buf = entry->kbuf; > + > + for (tag = 0; tag < ring->queue_depth; tag++) { > + struct fuse_ring_ent *ent = &queue->ring_ent[tag]; > + > + ent->rreq = entry->kbuf + tag * ring->req_buf_sz; > + } > + > + kfree(node); > + return 0; > +} > + > /** > * fuse uring mmap, per ring qeuue. > * Userpsace maps a kernel allocated ring/queue buffer. For numa awareness, > @@ -234,3 +267,65 @@ fuse_uring_mmap(struct file *filp, struct vm_area_struct *vma) > > return ret; > } > + > +int fuse_uring_queue_cfg(struct fuse_ring *ring, > + struct fuse_ring_queue_config *qcfg) > +{ > + int tag; > + struct fuse_ring_queue *queue; > + > + if (qcfg->qid >= ring->nr_queues) { > + pr_info("fuse ring queue config: qid=%u >= nr-queues=%zu\n", > + qcfg->qid, ring->nr_queues); > + return -EINVAL; > + } > + queue = fuse_uring_get_queue(ring, qcfg->qid); > + > + if (queue->configured) { > + pr_info("fuse ring qid=%u already configured!\n", queue->qid); > + return -EALREADY; > + } > + > + mutex_lock(&ring->start_stop_lock); > + fuse_uring_ioctl_mem_reg(ring, queue, qcfg->uaddr); > + mutex_unlock(&ring->start_stop_lock); You're not handling the error here. Thanks, Josef
On 5/30/24 17:54, Josef Bacik wrote: > On Wed, May 29, 2024 at 08:00:43PM +0200, Bernd Schubert wrote: >> Signed-off-by: Bernd Schubert <bschubert@ddn.com> >> --- >> fs/fuse/dev.c | 10 +++++ >> fs/fuse/dev_uring.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++ >> fs/fuse/dev_uring_i.h | 18 +++++++++ >> fs/fuse/fuse_i.h | 3 ++ >> include/uapi/linux/fuse.h | 26 +++++++++++++ >> 5 files changed, 152 insertions(+) >> >> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c >> index 349c1d16b0df..78c05516da7f 100644 >> --- a/fs/fuse/dev.c >> +++ b/fs/fuse/dev.c >> @@ -2395,6 +2395,12 @@ static long fuse_uring_ioctl(struct file *file, __u32 __user *argp) >> if (res != 0) >> return -EFAULT; >> >> + if (cfg.cmd == FUSE_URING_IOCTL_CMD_QUEUE_CFG) { >> + res = _fuse_dev_ioctl_clone(file, cfg.qconf.control_fd); >> + if (res != 0) >> + return res; >> + } >> + >> fud = fuse_get_dev(file); >> if (fud == NULL) >> return -ENODEV; >> @@ -2424,6 +2430,10 @@ static long fuse_uring_ioctl(struct file *file, __u32 __user *argp) >> if (res != 0) >> return res; >> break; >> + case FUSE_URING_IOCTL_CMD_QUEUE_CFG: >> + fud->uring_dev = 1; >> + res = fuse_uring_queue_cfg(fc->ring, &cfg.qconf); >> + break; >> default: >> res = -EINVAL; >> } >> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c >> index 9491bdaa5716..2c0ccb378908 100644 >> --- a/fs/fuse/dev_uring.c >> +++ b/fs/fuse/dev_uring.c >> @@ -144,6 +144,39 @@ static char *fuse_uring_alloc_queue_buf(int size, int node) >> return buf ? buf : ERR_PTR(-ENOMEM); >> } >> >> +/* >> + * mmaped allocated buffers, but does not know which queue that is for >> + * This ioctl uses the userspace address as key to identify the kernel address >> + * and assign it to the kernel side of the queue. >> + */ >> +static int fuse_uring_ioctl_mem_reg(struct fuse_ring *ring, >> + struct fuse_ring_queue *queue, >> + uint64_t uaddr) >> +{ >> + struct rb_node *node; >> + struct fuse_uring_mbuf *entry; >> + int tag; >> + >> + node = rb_find((const void *)uaddr, &ring->mem_buf_map, >> + fuse_uring_rb_tree_buf_cmp); >> + if (!node) >> + return -ENOENT; >> + entry = rb_entry(node, struct fuse_uring_mbuf, rb_node); >> + >> + rb_erase(node, &ring->mem_buf_map); >> + >> + queue->queue_req_buf = entry->kbuf; >> + >> + for (tag = 0; tag < ring->queue_depth; tag++) { >> + struct fuse_ring_ent *ent = &queue->ring_ent[tag]; >> + >> + ent->rreq = entry->kbuf + tag * ring->req_buf_sz; >> + } >> + >> + kfree(node); >> + return 0; >> +} >> + >> /** >> * fuse uring mmap, per ring qeuue. >> * Userpsace maps a kernel allocated ring/queue buffer. For numa awareness, >> @@ -234,3 +267,65 @@ fuse_uring_mmap(struct file *filp, struct vm_area_struct *vma) >> >> return ret; >> } >> + >> +int fuse_uring_queue_cfg(struct fuse_ring *ring, >> + struct fuse_ring_queue_config *qcfg) >> +{ >> + int tag; >> + struct fuse_ring_queue *queue; >> + >> + if (qcfg->qid >= ring->nr_queues) { >> + pr_info("fuse ring queue config: qid=%u >= nr-queues=%zu\n", >> + qcfg->qid, ring->nr_queues); >> + return -EINVAL; >> + } >> + queue = fuse_uring_get_queue(ring, qcfg->qid); >> + >> + if (queue->configured) { >> + pr_info("fuse ring qid=%u already configured!\n", queue->qid); >> + return -EALREADY; >> + } >> + >> + mutex_lock(&ring->start_stop_lock); >> + fuse_uring_ioctl_mem_reg(ring, queue, qcfg->uaddr); >> + mutex_unlock(&ring->start_stop_lock); > > You're not handling the error here. Thanks, Thanks again for all your reviews! All fixed up to here, except vmalloc_node_user(), as you suggested, I will try to decouple it from this series. And d'oh! I didn't find the simple numa_node_id() function. Thanks so much for pointing that out. New branch is here: https://github.com/bsbernd/linux/tree/fuse-uring-for-6.9-rfc3 Thanks, Bernd
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 349c1d16b0df..78c05516da7f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2395,6 +2395,12 @@ static long fuse_uring_ioctl(struct file *file, __u32 __user *argp) if (res != 0) return -EFAULT; + if (cfg.cmd == FUSE_URING_IOCTL_CMD_QUEUE_CFG) { + res = _fuse_dev_ioctl_clone(file, cfg.qconf.control_fd); + if (res != 0) + return res; + } + fud = fuse_get_dev(file); if (fud == NULL) return -ENODEV; @@ -2424,6 +2430,10 @@ static long fuse_uring_ioctl(struct file *file, __u32 __user *argp) if (res != 0) return res; break; + case FUSE_URING_IOCTL_CMD_QUEUE_CFG: + fud->uring_dev = 1; + res = fuse_uring_queue_cfg(fc->ring, &cfg.qconf); + break; default: res = -EINVAL; } diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index 9491bdaa5716..2c0ccb378908 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -144,6 +144,39 @@ static char *fuse_uring_alloc_queue_buf(int size, int node) return buf ? buf : ERR_PTR(-ENOMEM); } +/* + * mmaped allocated buffers, but does not know which queue that is for + * This ioctl uses the userspace address as key to identify the kernel address + * and assign it to the kernel side of the queue. + */ +static int fuse_uring_ioctl_mem_reg(struct fuse_ring *ring, + struct fuse_ring_queue *queue, + uint64_t uaddr) +{ + struct rb_node *node; + struct fuse_uring_mbuf *entry; + int tag; + + node = rb_find((const void *)uaddr, &ring->mem_buf_map, + fuse_uring_rb_tree_buf_cmp); + if (!node) + return -ENOENT; + entry = rb_entry(node, struct fuse_uring_mbuf, rb_node); + + rb_erase(node, &ring->mem_buf_map); + + queue->queue_req_buf = entry->kbuf; + + for (tag = 0; tag < ring->queue_depth; tag++) { + struct fuse_ring_ent *ent = &queue->ring_ent[tag]; + + ent->rreq = entry->kbuf + tag * ring->req_buf_sz; + } + + kfree(node); + return 0; +} + /** * fuse uring mmap, per ring qeuue. * Userpsace maps a kernel allocated ring/queue buffer. For numa awareness, @@ -234,3 +267,65 @@ fuse_uring_mmap(struct file *filp, struct vm_area_struct *vma) return ret; } + +int fuse_uring_queue_cfg(struct fuse_ring *ring, + struct fuse_ring_queue_config *qcfg) +{ + int tag; + struct fuse_ring_queue *queue; + + if (qcfg->qid >= ring->nr_queues) { + pr_info("fuse ring queue config: qid=%u >= nr-queues=%zu\n", + qcfg->qid, ring->nr_queues); + return -EINVAL; + } + queue = fuse_uring_get_queue(ring, qcfg->qid); + + if (queue->configured) { + pr_info("fuse ring qid=%u already configured!\n", queue->qid); + return -EALREADY; + } + + mutex_lock(&ring->start_stop_lock); + fuse_uring_ioctl_mem_reg(ring, queue, qcfg->uaddr); + mutex_unlock(&ring->start_stop_lock); + + queue->qid = qcfg->qid; + queue->ring = ring; + spin_lock_init(&queue->lock); + INIT_LIST_HEAD(&queue->sync_fuse_req_queue); + INIT_LIST_HEAD(&queue->async_fuse_req_queue); + + INIT_LIST_HEAD(&queue->sync_ent_avail_queue); + INIT_LIST_HEAD(&queue->async_ent_avail_queue); + + INIT_LIST_HEAD(&queue->ent_in_userspace); + + for (tag = 0; tag < ring->queue_depth; tag++) { + struct fuse_ring_ent *ent = &queue->ring_ent[tag]; + + ent->queue = queue; + ent->tag = tag; + ent->fuse_req = NULL; + + pr_devel("initialize qid=%d tag=%d queue=%p req=%p", qcfg->qid, + tag, queue, ent); + + ent->rreq->flags = 0; + + ent->state = 0; + set_bit(FRRS_INIT, &ent->state); + + INIT_LIST_HEAD(&ent->list); + } + + queue->configured = 1; + ring->nr_queues_ioctl_init++; + if (ring->nr_queues_ioctl_init == ring->nr_queues) { + pr_devel("ring=%p nr-queues=%zu depth=%zu ioctl ready\n", ring, + ring->nr_queues, ring->queue_depth); + } + + return 0; +} + diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h index c455ae0e729a..7a2f540d3ea5 100644 --- a/fs/fuse/dev_uring_i.h +++ b/fs/fuse/dev_uring_i.h @@ -16,6 +16,24 @@ /* IORING_MAX_ENTRIES */ #define FUSE_URING_MAX_QUEUE_DEPTH 32768 +enum fuse_ring_req_state { + + /* request is basially initialized */ + FRRS_INIT, + + /* The ring request waits for a new fuse request */ + FRRS_WAIT, + + /* The ring req got assigned a fuse req */ + FRRS_FUSE_REQ, + + /* request is in or on the way to user space */ + FRRS_USERSPACE, + + /* request is released */ + FRRS_FREED, +}; + struct fuse_uring_mbuf { struct rb_node rb_node; void *kbuf; /* kernel allocated ring request buffer */ diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index d2b058ccb677..fadc51a22bb9 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -540,6 +540,9 @@ struct fuse_dev { /** list entry on fc->devices */ struct list_head entry; + + /** Is the device used for fuse-over-io-uring? */ + unsigned int uring_dev : 1; }; enum fuse_dax_mode { diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 00d0154ec2da..88d4078c4171 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -1262,4 +1262,30 @@ struct fuse_supp_groups { /* The offset parameter is used to identify the request type */ #define FUSE_URING_MMAP_OFF 0xf8000000ULL +/** + * This structure mapped onto the + */ +struct fuse_ring_req { + union { + /* The first 4K are command data */ + char ring_header[FUSE_RING_HEADER_BUF_SIZE]; + + struct { + uint64_t flags; + + /* enum fuse_ring_buf_cmd */ + uint32_t in_out_arg_len; + uint32_t padding; + + /* kernel fills in, reads out */ + union { + struct fuse_in_header in; + struct fuse_out_header out; + }; + }; + }; + + char in_out_arg[]; +}; + #endif /* _LINUX_FUSE_H */
Signed-off-by: Bernd Schubert <bschubert@ddn.com> --- fs/fuse/dev.c | 10 +++++ fs/fuse/dev_uring.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++ fs/fuse/dev_uring_i.h | 18 +++++++++ fs/fuse/fuse_i.h | 3 ++ include/uapi/linux/fuse.h | 26 +++++++++++++ 5 files changed, 152 insertions(+)