diff mbox series

[RFC,v2,08/19] fuse: Add the queue configuration ioctl

Message ID 20240529-fuse-uring-for-6-9-rfc2-out-v1-8-d149476b1d65@ddn.com (mailing list archive)
State New
Headers show
Series fuse: fuse-over-io-uring | expand

Commit Message

Bernd Schubert May 29, 2024, 6 p.m. UTC
Signed-off-by: Bernd Schubert <bschubert@ddn.com>
---
 fs/fuse/dev.c             | 10 +++++
 fs/fuse/dev_uring.c       | 95 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/dev_uring_i.h     | 18 +++++++++
 fs/fuse/fuse_i.h          |  3 ++
 include/uapi/linux/fuse.h | 26 +++++++++++++
 5 files changed, 152 insertions(+)

Comments

Josef Bacik May 30, 2024, 3:54 p.m. UTC | #1
On Wed, May 29, 2024 at 08:00:43PM +0200, Bernd Schubert wrote:
> Signed-off-by: Bernd Schubert <bschubert@ddn.com>
> ---
>  fs/fuse/dev.c             | 10 +++++
>  fs/fuse/dev_uring.c       | 95 +++++++++++++++++++++++++++++++++++++++++++++++
>  fs/fuse/dev_uring_i.h     | 18 +++++++++
>  fs/fuse/fuse_i.h          |  3 ++
>  include/uapi/linux/fuse.h | 26 +++++++++++++
>  5 files changed, 152 insertions(+)
> 
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 349c1d16b0df..78c05516da7f 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -2395,6 +2395,12 @@ static long fuse_uring_ioctl(struct file *file, __u32 __user *argp)
>  	if (res != 0)
>  		return -EFAULT;
>  
> +	if (cfg.cmd == FUSE_URING_IOCTL_CMD_QUEUE_CFG) {
> +		res = _fuse_dev_ioctl_clone(file, cfg.qconf.control_fd);
> +		if (res != 0)
> +			return res;
> +	}
> +
>  	fud = fuse_get_dev(file);
>  	if (fud == NULL)
>  		return -ENODEV;
> @@ -2424,6 +2430,10 @@ static long fuse_uring_ioctl(struct file *file, __u32 __user *argp)
>  		if (res != 0)
>  			return res;
>  		break;
> +		case FUSE_URING_IOCTL_CMD_QUEUE_CFG:
> +			fud->uring_dev = 1;
> +			res = fuse_uring_queue_cfg(fc->ring, &cfg.qconf);
> +		break;
>  	default:
>  		res = -EINVAL;
>  	}
> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
> index 9491bdaa5716..2c0ccb378908 100644
> --- a/fs/fuse/dev_uring.c
> +++ b/fs/fuse/dev_uring.c
> @@ -144,6 +144,39 @@ static char *fuse_uring_alloc_queue_buf(int size, int node)
>  	return buf ? buf : ERR_PTR(-ENOMEM);
>  }
>  
> +/*
> + * mmaped allocated buffers, but does not know which queue that is for
> + * This ioctl uses the userspace address as key to identify the kernel address
> + * and assign it to the kernel side of the queue.
> + */
> +static int fuse_uring_ioctl_mem_reg(struct fuse_ring *ring,
> +				    struct fuse_ring_queue *queue,
> +				    uint64_t uaddr)
> +{
> +	struct rb_node *node;
> +	struct fuse_uring_mbuf *entry;
> +	int tag;
> +
> +	node = rb_find((const void *)uaddr, &ring->mem_buf_map,
> +		       fuse_uring_rb_tree_buf_cmp);
> +	if (!node)
> +		return -ENOENT;
> +	entry = rb_entry(node, struct fuse_uring_mbuf, rb_node);
> +
> +	rb_erase(node, &ring->mem_buf_map);
> +
> +	queue->queue_req_buf = entry->kbuf;
> +
> +	for (tag = 0; tag < ring->queue_depth; tag++) {
> +		struct fuse_ring_ent *ent = &queue->ring_ent[tag];
> +
> +		ent->rreq = entry->kbuf + tag * ring->req_buf_sz;
> +	}
> +
> +	kfree(node);
> +	return 0;
> +}
> +
>  /**
>   * fuse uring mmap, per ring qeuue.
>   * Userpsace maps a kernel allocated ring/queue buffer. For numa awareness,
> @@ -234,3 +267,65 @@ fuse_uring_mmap(struct file *filp, struct vm_area_struct *vma)
>  
>  	return ret;
>  }
> +
> +int fuse_uring_queue_cfg(struct fuse_ring *ring,
> +			 struct fuse_ring_queue_config *qcfg)
> +{
> +	int tag;
> +	struct fuse_ring_queue *queue;
> +
> +	if (qcfg->qid >= ring->nr_queues) {
> +		pr_info("fuse ring queue config: qid=%u >= nr-queues=%zu\n",
> +			qcfg->qid, ring->nr_queues);
> +		return -EINVAL;
> +	}
> +	queue = fuse_uring_get_queue(ring, qcfg->qid);
> +
> +	if (queue->configured) {
> +		pr_info("fuse ring qid=%u already configured!\n", queue->qid);
> +		return -EALREADY;
> +	}
> +
> +	mutex_lock(&ring->start_stop_lock);
> +	fuse_uring_ioctl_mem_reg(ring, queue, qcfg->uaddr);
> +	mutex_unlock(&ring->start_stop_lock);

You're not handling the error here.  Thanks,

Josef
Bernd Schubert May 30, 2024, 5:49 p.m. UTC | #2
On 5/30/24 17:54, Josef Bacik wrote:
> On Wed, May 29, 2024 at 08:00:43PM +0200, Bernd Schubert wrote:
>> Signed-off-by: Bernd Schubert <bschubert@ddn.com>
>> ---
>>  fs/fuse/dev.c             | 10 +++++
>>  fs/fuse/dev_uring.c       | 95 +++++++++++++++++++++++++++++++++++++++++++++++
>>  fs/fuse/dev_uring_i.h     | 18 +++++++++
>>  fs/fuse/fuse_i.h          |  3 ++
>>  include/uapi/linux/fuse.h | 26 +++++++++++++
>>  5 files changed, 152 insertions(+)
>>
>> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
>> index 349c1d16b0df..78c05516da7f 100644
>> --- a/fs/fuse/dev.c
>> +++ b/fs/fuse/dev.c
>> @@ -2395,6 +2395,12 @@ static long fuse_uring_ioctl(struct file *file, __u32 __user *argp)
>>  	if (res != 0)
>>  		return -EFAULT;
>>  
>> +	if (cfg.cmd == FUSE_URING_IOCTL_CMD_QUEUE_CFG) {
>> +		res = _fuse_dev_ioctl_clone(file, cfg.qconf.control_fd);
>> +		if (res != 0)
>> +			return res;
>> +	}
>> +
>>  	fud = fuse_get_dev(file);
>>  	if (fud == NULL)
>>  		return -ENODEV;
>> @@ -2424,6 +2430,10 @@ static long fuse_uring_ioctl(struct file *file, __u32 __user *argp)
>>  		if (res != 0)
>>  			return res;
>>  		break;
>> +		case FUSE_URING_IOCTL_CMD_QUEUE_CFG:
>> +			fud->uring_dev = 1;
>> +			res = fuse_uring_queue_cfg(fc->ring, &cfg.qconf);
>> +		break;
>>  	default:
>>  		res = -EINVAL;
>>  	}
>> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
>> index 9491bdaa5716..2c0ccb378908 100644
>> --- a/fs/fuse/dev_uring.c
>> +++ b/fs/fuse/dev_uring.c
>> @@ -144,6 +144,39 @@ static char *fuse_uring_alloc_queue_buf(int size, int node)
>>  	return buf ? buf : ERR_PTR(-ENOMEM);
>>  }
>>  
>> +/*
>> + * mmaped allocated buffers, but does not know which queue that is for
>> + * This ioctl uses the userspace address as key to identify the kernel address
>> + * and assign it to the kernel side of the queue.
>> + */
>> +static int fuse_uring_ioctl_mem_reg(struct fuse_ring *ring,
>> +				    struct fuse_ring_queue *queue,
>> +				    uint64_t uaddr)
>> +{
>> +	struct rb_node *node;
>> +	struct fuse_uring_mbuf *entry;
>> +	int tag;
>> +
>> +	node = rb_find((const void *)uaddr, &ring->mem_buf_map,
>> +		       fuse_uring_rb_tree_buf_cmp);
>> +	if (!node)
>> +		return -ENOENT;
>> +	entry = rb_entry(node, struct fuse_uring_mbuf, rb_node);
>> +
>> +	rb_erase(node, &ring->mem_buf_map);
>> +
>> +	queue->queue_req_buf = entry->kbuf;
>> +
>> +	for (tag = 0; tag < ring->queue_depth; tag++) {
>> +		struct fuse_ring_ent *ent = &queue->ring_ent[tag];
>> +
>> +		ent->rreq = entry->kbuf + tag * ring->req_buf_sz;
>> +	}
>> +
>> +	kfree(node);
>> +	return 0;
>> +}
>> +
>>  /**
>>   * fuse uring mmap, per ring qeuue.
>>   * Userpsace maps a kernel allocated ring/queue buffer. For numa awareness,
>> @@ -234,3 +267,65 @@ fuse_uring_mmap(struct file *filp, struct vm_area_struct *vma)
>>  
>>  	return ret;
>>  }
>> +
>> +int fuse_uring_queue_cfg(struct fuse_ring *ring,
>> +			 struct fuse_ring_queue_config *qcfg)
>> +{
>> +	int tag;
>> +	struct fuse_ring_queue *queue;
>> +
>> +	if (qcfg->qid >= ring->nr_queues) {
>> +		pr_info("fuse ring queue config: qid=%u >= nr-queues=%zu\n",
>> +			qcfg->qid, ring->nr_queues);
>> +		return -EINVAL;
>> +	}
>> +	queue = fuse_uring_get_queue(ring, qcfg->qid);
>> +
>> +	if (queue->configured) {
>> +		pr_info("fuse ring qid=%u already configured!\n", queue->qid);
>> +		return -EALREADY;
>> +	}
>> +
>> +	mutex_lock(&ring->start_stop_lock);
>> +	fuse_uring_ioctl_mem_reg(ring, queue, qcfg->uaddr);
>> +	mutex_unlock(&ring->start_stop_lock);
> 
> You're not handling the error here.  Thanks,


Thanks again for all your reviews! All fixed up to here, except
vmalloc_node_user(), as you suggested, I will try to decouple it from
this series.

And d'oh! I didn't find the simple numa_node_id() function. Thanks so
much for pointing that out.

New branch is here:
https://github.com/bsbernd/linux/tree/fuse-uring-for-6.9-rfc3


Thanks,
Bernd
diff mbox series

Patch

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 349c1d16b0df..78c05516da7f 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2395,6 +2395,12 @@  static long fuse_uring_ioctl(struct file *file, __u32 __user *argp)
 	if (res != 0)
 		return -EFAULT;
 
+	if (cfg.cmd == FUSE_URING_IOCTL_CMD_QUEUE_CFG) {
+		res = _fuse_dev_ioctl_clone(file, cfg.qconf.control_fd);
+		if (res != 0)
+			return res;
+	}
+
 	fud = fuse_get_dev(file);
 	if (fud == NULL)
 		return -ENODEV;
@@ -2424,6 +2430,10 @@  static long fuse_uring_ioctl(struct file *file, __u32 __user *argp)
 		if (res != 0)
 			return res;
 		break;
+		case FUSE_URING_IOCTL_CMD_QUEUE_CFG:
+			fud->uring_dev = 1;
+			res = fuse_uring_queue_cfg(fc->ring, &cfg.qconf);
+		break;
 	default:
 		res = -EINVAL;
 	}
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index 9491bdaa5716..2c0ccb378908 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -144,6 +144,39 @@  static char *fuse_uring_alloc_queue_buf(int size, int node)
 	return buf ? buf : ERR_PTR(-ENOMEM);
 }
 
+/*
+ * mmaped allocated buffers, but does not know which queue that is for
+ * This ioctl uses the userspace address as key to identify the kernel address
+ * and assign it to the kernel side of the queue.
+ */
+static int fuse_uring_ioctl_mem_reg(struct fuse_ring *ring,
+				    struct fuse_ring_queue *queue,
+				    uint64_t uaddr)
+{
+	struct rb_node *node;
+	struct fuse_uring_mbuf *entry;
+	int tag;
+
+	node = rb_find((const void *)uaddr, &ring->mem_buf_map,
+		       fuse_uring_rb_tree_buf_cmp);
+	if (!node)
+		return -ENOENT;
+	entry = rb_entry(node, struct fuse_uring_mbuf, rb_node);
+
+	rb_erase(node, &ring->mem_buf_map);
+
+	queue->queue_req_buf = entry->kbuf;
+
+	for (tag = 0; tag < ring->queue_depth; tag++) {
+		struct fuse_ring_ent *ent = &queue->ring_ent[tag];
+
+		ent->rreq = entry->kbuf + tag * ring->req_buf_sz;
+	}
+
+	kfree(node);
+	return 0;
+}
+
 /**
  * fuse uring mmap, per ring qeuue.
  * Userpsace maps a kernel allocated ring/queue buffer. For numa awareness,
@@ -234,3 +267,65 @@  fuse_uring_mmap(struct file *filp, struct vm_area_struct *vma)
 
 	return ret;
 }
+
+int fuse_uring_queue_cfg(struct fuse_ring *ring,
+			 struct fuse_ring_queue_config *qcfg)
+{
+	int tag;
+	struct fuse_ring_queue *queue;
+
+	if (qcfg->qid >= ring->nr_queues) {
+		pr_info("fuse ring queue config: qid=%u >= nr-queues=%zu\n",
+			qcfg->qid, ring->nr_queues);
+		return -EINVAL;
+	}
+	queue = fuse_uring_get_queue(ring, qcfg->qid);
+
+	if (queue->configured) {
+		pr_info("fuse ring qid=%u already configured!\n", queue->qid);
+		return -EALREADY;
+	}
+
+	mutex_lock(&ring->start_stop_lock);
+	fuse_uring_ioctl_mem_reg(ring, queue, qcfg->uaddr);
+	mutex_unlock(&ring->start_stop_lock);
+
+	queue->qid = qcfg->qid;
+	queue->ring = ring;
+	spin_lock_init(&queue->lock);
+	INIT_LIST_HEAD(&queue->sync_fuse_req_queue);
+	INIT_LIST_HEAD(&queue->async_fuse_req_queue);
+
+	INIT_LIST_HEAD(&queue->sync_ent_avail_queue);
+	INIT_LIST_HEAD(&queue->async_ent_avail_queue);
+
+	INIT_LIST_HEAD(&queue->ent_in_userspace);
+
+	for (tag = 0; tag < ring->queue_depth; tag++) {
+		struct fuse_ring_ent *ent = &queue->ring_ent[tag];
+
+		ent->queue = queue;
+		ent->tag = tag;
+		ent->fuse_req = NULL;
+
+		pr_devel("initialize qid=%d tag=%d queue=%p req=%p", qcfg->qid,
+			 tag, queue, ent);
+
+		ent->rreq->flags = 0;
+
+		ent->state = 0;
+		set_bit(FRRS_INIT, &ent->state);
+
+		INIT_LIST_HEAD(&ent->list);
+	}
+
+	queue->configured = 1;
+	ring->nr_queues_ioctl_init++;
+	if (ring->nr_queues_ioctl_init == ring->nr_queues) {
+		pr_devel("ring=%p nr-queues=%zu depth=%zu ioctl ready\n", ring,
+			 ring->nr_queues, ring->queue_depth);
+	}
+
+	return 0;
+}
+
diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
index c455ae0e729a..7a2f540d3ea5 100644
--- a/fs/fuse/dev_uring_i.h
+++ b/fs/fuse/dev_uring_i.h
@@ -16,6 +16,24 @@ 
 /* IORING_MAX_ENTRIES */
 #define FUSE_URING_MAX_QUEUE_DEPTH 32768
 
+enum fuse_ring_req_state {
+
+	/* request is basially initialized */
+	FRRS_INIT,
+
+	/* The ring request waits for a new fuse request */
+	FRRS_WAIT,
+
+	/* The ring req got assigned a fuse req */
+	FRRS_FUSE_REQ,
+
+	/* request is in or on the way to user space */
+	FRRS_USERSPACE,
+
+	/* request is released */
+	FRRS_FREED,
+};
+
 struct fuse_uring_mbuf {
 	struct rb_node rb_node;
 	void *kbuf; /* kernel allocated ring request buffer */
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d2b058ccb677..fadc51a22bb9 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -540,6 +540,9 @@  struct fuse_dev {
 
 	/** list entry on fc->devices */
 	struct list_head entry;
+
+	/** Is the device used for fuse-over-io-uring? */
+	unsigned int uring_dev : 1;
 };
 
 enum fuse_dax_mode {
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 00d0154ec2da..88d4078c4171 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -1262,4 +1262,30 @@  struct fuse_supp_groups {
 /* The offset parameter is used to identify the request type */
 #define FUSE_URING_MMAP_OFF 0xf8000000ULL
 
+/**
+ * This structure mapped onto the
+ */
+struct fuse_ring_req {
+	union {
+		/* The first 4K are command data */
+		char ring_header[FUSE_RING_HEADER_BUF_SIZE];
+
+		struct {
+			uint64_t flags;
+
+			/* enum fuse_ring_buf_cmd */
+			uint32_t in_out_arg_len;
+			uint32_t padding;
+
+			/* kernel fills in, reads out */
+			union {
+				struct fuse_in_header in;
+				struct fuse_out_header out;
+			};
+		};
+	};
+
+	char in_out_arg[];
+};
+
 #endif /* _LINUX_FUSE_H */