diff mbox series

[v9,13/17] fuse: Allow to queue fg requests through io-uring

Message ID 20250107-fuse-uring-for-6-10-rfc4-v9-13-9c786f9a7a9d@ddn.com (mailing list archive)
State New
Headers show
Series fuse: fuse-over-io-uring | expand

Commit Message

Bernd Schubert Jan. 7, 2025, 12:25 a.m. UTC
This prepares queueing and sending foreground requests through
io-uring.

Signed-off-by: Bernd Schubert <bschubert@ddn.com>
---
 fs/fuse/dev_uring.c   | 185 ++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/fuse/dev_uring_i.h |  11 ++-
 2 files changed, 187 insertions(+), 9 deletions(-)

Comments

Luis Henriques Jan. 7, 2025, 3:54 p.m. UTC | #1
On Tue, Jan 07 2025, Bernd Schubert wrote:

> This prepares queueing and sending foreground requests through
> io-uring.
>
> Signed-off-by: Bernd Schubert <bschubert@ddn.com>
> ---
>  fs/fuse/dev_uring.c   | 185 ++++++++++++++++++++++++++++++++++++++++++++++++--
>  fs/fuse/dev_uring_i.h |  11 ++-
>  2 files changed, 187 insertions(+), 9 deletions(-)
>
> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
> index 01a908b2ef9ada14b759ca047eab40b4c4431d89..89a22a4eee23cbba49bac7a2d2126bb51193326f 100644
> --- a/fs/fuse/dev_uring.c
> +++ b/fs/fuse/dev_uring.c
> @@ -26,6 +26,29 @@ bool fuse_uring_enabled(void)
>  	return enable_uring;
>  }
>  
> +struct fuse_uring_pdu {
> +	struct fuse_ring_ent *ring_ent;
> +};
> +
> +static const struct fuse_iqueue_ops fuse_io_uring_ops;
> +
> +static void uring_cmd_set_ring_ent(struct io_uring_cmd *cmd,
> +				   struct fuse_ring_ent *ring_ent)
> +{
> +	struct fuse_uring_pdu *pdu =
> +		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
> +
> +	pdu->ring_ent = ring_ent;
> +}
> +
> +static struct fuse_ring_ent *uring_cmd_to_ring_ent(struct io_uring_cmd *cmd)
> +{
> +	struct fuse_uring_pdu *pdu =
> +		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
> +
> +	return pdu->ring_ent;
> +}
> +
>  static void fuse_uring_req_end(struct fuse_ring_ent *ring_ent, bool set_err,
>  			       int error)
>  {
> @@ -441,7 +464,7 @@ static int fuse_uring_copy_to_ring(struct fuse_ring *ring, struct fuse_req *req,
>  	struct iov_iter iter;
>  	struct fuse_uring_ent_in_out ent_in_out = {
>  		.flags = 0,
> -		.commit_id = ent->commit_id,
> +		.commit_id = req->in.h.unique,
>  	};
>  
>  	if (WARN_ON(ent_in_out.commit_id == 0))
> @@ -460,7 +483,7 @@ static int fuse_uring_copy_to_ring(struct fuse_ring *ring, struct fuse_req *req,
>  	if (num_args > 0) {
>  		/*
>  		 * Expectation is that the first argument is the per op header.
> -		 * Some op code have that as zero.
> +		 * Some op code have that as zero size.
>  		 */
>  		if (args->in_args[0].size > 0) {
>  			res = copy_to_user(&ent->headers->op_in, in_args->value,
> @@ -578,11 +601,8 @@ static void fuse_uring_add_to_pq(struct fuse_ring_ent *ring_ent,
>  	struct fuse_pqueue *fpq = &queue->fpq;
>  	unsigned int hash;
>  
> -	/* commit_id is the unique id of the request */
> -	ring_ent->commit_id = req->in.h.unique;
> -
>  	req->ring_entry = ring_ent;
> -	hash = fuse_req_hash(ring_ent->commit_id);
> +	hash = fuse_req_hash(req->in.h.unique);
>  	list_move_tail(&req->list, &fpq->processing[hash]);
>  }
>  
> @@ -777,6 +797,31 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
>  	return 0;
>  }
>  
> +static bool is_ring_ready(struct fuse_ring *ring, int current_qid)
> +{
> +	int qid;
> +	struct fuse_ring_queue *queue;
> +	bool ready = true;
> +
> +	for (qid = 0; qid < ring->nr_queues && ready; qid++) {
> +		if (current_qid == qid)
> +			continue;
> +
> +		queue = ring->queues[qid];
> +		if (!queue) {
> +			ready = false;
> +			break;
> +		}
> +
> +		spin_lock(&queue->lock);
> +		if (list_empty(&queue->ent_avail_queue))
> +			ready = false;
> +		spin_unlock(&queue->lock);
> +	}
> +
> +	return ready;
> +}
> +
>  /*
>   * fuse_uring_req_fetch command handling
>   */
> @@ -785,10 +830,22 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ring_ent,
>  				   unsigned int issue_flags)
>  {
>  	struct fuse_ring_queue *queue = ring_ent->queue;
> +	struct fuse_ring *ring = queue->ring;
> +	struct fuse_conn *fc = ring->fc;
> +	struct fuse_iqueue *fiq = &fc->iq;
>  
>  	spin_lock(&queue->lock);
>  	fuse_uring_ent_avail(ring_ent, queue);
>  	spin_unlock(&queue->lock);
> +
> +	if (!ring->ready) {
> +		bool ready = is_ring_ready(ring, queue->qid);
> +
> +		if (ready) {
> +			WRITE_ONCE(ring->ready, true);
> +			fiq->ops = &fuse_io_uring_ops;

Shouldn't we be taking the fiq->lock to protect the above operation?

> +		}
> +	}
>  }
>  
>  /*
> @@ -979,3 +1036,119 @@ int __maybe_unused fuse_uring_cmd(struct io_uring_cmd *cmd,
>  
>  	return -EIOCBQUEUED;
>  }
> +
> +/*
> + * This prepares and sends the ring request in fuse-uring task context.
> + * User buffers are not mapped yet - the application does not have permission
> + * to write to it - this has to be executed in ring task context.
> + */
> +static void
> +fuse_uring_send_req_in_task(struct io_uring_cmd *cmd,
> +			    unsigned int issue_flags)
> +{
> +	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
> +	struct fuse_ring_queue *queue = ent->queue;
> +	int err;
> +
> +	if (unlikely(issue_flags & IO_URING_F_TASK_DEAD)) {
> +		err = -ECANCELED;
> +		goto terminating;
> +	}
> +
> +	err = fuse_uring_prepare_send(ent);
> +	if (err)
> +		goto err;

Suggestion: simplify this function flow.  Something like:

	int err = 0;

	if (unlikely(issue_flags & IO_URING_F_TASK_DEAD))
		err = -ECANCELED;
	else if (fuse_uring_prepare_send(ent)) {
		fuse_uring_next_fuse_req(ent, queue, issue_flags);
		return;
	}
	spin_lock(&queue->lock);
        [...]

> +		goto terminating;
> +	}
> +
> +	err = fuse_uring_prepare_send(ent);
> +	if (err)
> +		goto err;

> +
> +terminating:
> +	spin_lock(&queue->lock);
> +	ent->state = FRRS_USERSPACE;
> +	list_move(&ent->list, &queue->ent_in_userspace);
> +	spin_unlock(&queue->lock);
> +	io_uring_cmd_done(cmd, err, 0, issue_flags);
> +	ent->cmd = NULL;
> +	return;
> +err:
> +	fuse_uring_next_fuse_req(ent, queue, issue_flags);
> +}
> +
> +static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring)
> +{
> +	unsigned int qid;
> +	struct fuse_ring_queue *queue;
> +
> +	qid = task_cpu(current);
> +
> +	if (WARN_ONCE(qid >= ring->nr_queues,
> +		      "Core number (%u) exceeds nr ueues (%zu)\n", qid,

typo: 'queues'

> +		      ring->nr_queues))
> +		qid = 0;
> +
> +	queue = ring->queues[qid];
> +	if (WARN_ONCE(!queue, "Missing queue for qid %d\n", qid))
> +		return NULL;

nit: no need for this if statement.  The WARN_ONCE() is enough.

Cheers,
Bernd Schubert Jan. 7, 2025, 6:59 p.m. UTC | #2
On 1/7/25 16:54, Luis Henriques wrote:

[...]

>> @@ -785,10 +830,22 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ring_ent,
>>   				   unsigned int issue_flags)
>>   {
>>   	struct fuse_ring_queue *queue = ring_ent->queue;
>> +	struct fuse_ring *ring = queue->ring;
>> +	struct fuse_conn *fc = ring->fc;
>> +	struct fuse_iqueue *fiq = &fc->iq;
>>   
>>   	spin_lock(&queue->lock);
>>   	fuse_uring_ent_avail(ring_ent, queue);
>>   	spin_unlock(&queue->lock);
>> +
>> +	if (!ring->ready) {
>> +		bool ready = is_ring_ready(ring, queue->qid);
>> +
>> +		if (ready) {
>> +			WRITE_ONCE(ring->ready, true);
>> +			fiq->ops = &fuse_io_uring_ops;
> 
> Shouldn't we be taking the fiq->lock to protect the above operation?

I switched the order and changed it to WRITE_ONCE. fiq->lock would
require that doing the operations would also hold lock.
Also see "[PATCH v9 16/17] fuse: block request allocation until",
there should be no races anyone.

> 
>> +		}
>> +	}
>>   }
>>   
>>   /*
>> @@ -979,3 +1036,119 @@ int __maybe_unused fuse_uring_cmd(struct io_uring_cmd *cmd,
>>   
>>   	return -EIOCBQUEUED;
>>   }
>> +
>> +/*
>> + * This prepares and sends the ring request in fuse-uring task context.
>> + * User buffers are not mapped yet - the application does not have permission
>> + * to write to it - this has to be executed in ring task context.
>> + */
>> +static void
>> +fuse_uring_send_req_in_task(struct io_uring_cmd *cmd,
>> +			    unsigned int issue_flags)
>> +{
>> +	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
>> +	struct fuse_ring_queue *queue = ent->queue;
>> +	int err;
>> +
>> +	if (unlikely(issue_flags & IO_URING_F_TASK_DEAD)) {
>> +		err = -ECANCELED;
>> +		goto terminating;
>> +	}
>> +
>> +	err = fuse_uring_prepare_send(ent);
>> +	if (err)
>> +		goto err;
> 
> Suggestion: simplify this function flow.  Something like:
> 
> 	int err = 0;
> 
> 	if (unlikely(issue_flags & IO_URING_F_TASK_DEAD))
> 		err = -ECANCELED;
> 	else if (fuse_uring_prepare_send(ent)) {
> 		fuse_uring_next_fuse_req(ent, queue, issue_flags);
> 		return;
> 	}
> 	spin_lock(&queue->lock);
>          [...]

That makes it look like fuse_uring_prepare_send is not an
error, but expected. How about like this?

static void
fuse_uring_send_req_in_task(struct io_uring_cmd *cmd,
			    unsigned int issue_flags)
{
	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
	struct fuse_ring_queue *queue = ent->queue;
	int err;

	if (!(issue_flags & IO_URING_F_TASK_DEAD)) {
		err = fuse_uring_prepare_send(ent);
		if (err) {
			fuse_uring_next_fuse_req(ent, queue, issue_flags);
			return;
		}
	} else {
		err = -ECANCELED;
	}

	spin_lock(&queue->lock);
	ent->state = FRRS_USERSPACE;
	list_move(&ent->list, &queue->ent_in_userspace);
	spin_unlock(&queue->lock);

	io_uring_cmd_done(cmd, err, 0, issue_flags);
	ent->cmd = NULL;
}



Thanks,
Bernd
Luis Henriques Jan. 7, 2025, 9:25 p.m. UTC | #3
On Tue, Jan 07 2025, Bernd Schubert wrote:

> On 1/7/25 16:54, Luis Henriques wrote:
>
> [...]
>
>>> @@ -785,10 +830,22 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ring_ent,
>>>   				   unsigned int issue_flags)
>>>   {
>>>   	struct fuse_ring_queue *queue = ring_ent->queue;
>>> +	struct fuse_ring *ring = queue->ring;
>>> +	struct fuse_conn *fc = ring->fc;
>>> +	struct fuse_iqueue *fiq = &fc->iq;
>>>     	spin_lock(&queue->lock);
>>>   	fuse_uring_ent_avail(ring_ent, queue);
>>>   	spin_unlock(&queue->lock);
>>> +
>>> +	if (!ring->ready) {
>>> +		bool ready = is_ring_ready(ring, queue->qid);
>>> +
>>> +		if (ready) {
>>> +			WRITE_ONCE(ring->ready, true);
>>> +			fiq->ops = &fuse_io_uring_ops;
>> Shouldn't we be taking the fiq->lock to protect the above operation?
>
> I switched the order and changed it to WRITE_ONCE. fiq->lock would
> require that doing the operations would also hold lock.
> Also see "[PATCH v9 16/17] fuse: block request allocation until",
> there should be no races anyone.

OK, great.  I still need to go read the code a few more times, I guess.
Thank you for your help understanding this code, Bernd.

Cheers,
diff mbox series

Patch

diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index 01a908b2ef9ada14b759ca047eab40b4c4431d89..89a22a4eee23cbba49bac7a2d2126bb51193326f 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -26,6 +26,29 @@  bool fuse_uring_enabled(void)
 	return enable_uring;
 }
 
+struct fuse_uring_pdu {
+	struct fuse_ring_ent *ring_ent;
+};
+
+static const struct fuse_iqueue_ops fuse_io_uring_ops;
+
+static void uring_cmd_set_ring_ent(struct io_uring_cmd *cmd,
+				   struct fuse_ring_ent *ring_ent)
+{
+	struct fuse_uring_pdu *pdu =
+		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
+
+	pdu->ring_ent = ring_ent;
+}
+
+static struct fuse_ring_ent *uring_cmd_to_ring_ent(struct io_uring_cmd *cmd)
+{
+	struct fuse_uring_pdu *pdu =
+		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu);
+
+	return pdu->ring_ent;
+}
+
 static void fuse_uring_req_end(struct fuse_ring_ent *ring_ent, bool set_err,
 			       int error)
 {
@@ -441,7 +464,7 @@  static int fuse_uring_copy_to_ring(struct fuse_ring *ring, struct fuse_req *req,
 	struct iov_iter iter;
 	struct fuse_uring_ent_in_out ent_in_out = {
 		.flags = 0,
-		.commit_id = ent->commit_id,
+		.commit_id = req->in.h.unique,
 	};
 
 	if (WARN_ON(ent_in_out.commit_id == 0))
@@ -460,7 +483,7 @@  static int fuse_uring_copy_to_ring(struct fuse_ring *ring, struct fuse_req *req,
 	if (num_args > 0) {
 		/*
 		 * Expectation is that the first argument is the per op header.
-		 * Some op code have that as zero.
+		 * Some op code have that as zero size.
 		 */
 		if (args->in_args[0].size > 0) {
 			res = copy_to_user(&ent->headers->op_in, in_args->value,
@@ -578,11 +601,8 @@  static void fuse_uring_add_to_pq(struct fuse_ring_ent *ring_ent,
 	struct fuse_pqueue *fpq = &queue->fpq;
 	unsigned int hash;
 
-	/* commit_id is the unique id of the request */
-	ring_ent->commit_id = req->in.h.unique;
-
 	req->ring_entry = ring_ent;
-	hash = fuse_req_hash(ring_ent->commit_id);
+	hash = fuse_req_hash(req->in.h.unique);
 	list_move_tail(&req->list, &fpq->processing[hash]);
 }
 
@@ -777,6 +797,31 @@  static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
 	return 0;
 }
 
+static bool is_ring_ready(struct fuse_ring *ring, int current_qid)
+{
+	int qid;
+	struct fuse_ring_queue *queue;
+	bool ready = true;
+
+	for (qid = 0; qid < ring->nr_queues && ready; qid++) {
+		if (current_qid == qid)
+			continue;
+
+		queue = ring->queues[qid];
+		if (!queue) {
+			ready = false;
+			break;
+		}
+
+		spin_lock(&queue->lock);
+		if (list_empty(&queue->ent_avail_queue))
+			ready = false;
+		spin_unlock(&queue->lock);
+	}
+
+	return ready;
+}
+
 /*
  * fuse_uring_req_fetch command handling
  */
@@ -785,10 +830,22 @@  static void fuse_uring_do_register(struct fuse_ring_ent *ring_ent,
 				   unsigned int issue_flags)
 {
 	struct fuse_ring_queue *queue = ring_ent->queue;
+	struct fuse_ring *ring = queue->ring;
+	struct fuse_conn *fc = ring->fc;
+	struct fuse_iqueue *fiq = &fc->iq;
 
 	spin_lock(&queue->lock);
 	fuse_uring_ent_avail(ring_ent, queue);
 	spin_unlock(&queue->lock);
+
+	if (!ring->ready) {
+		bool ready = is_ring_ready(ring, queue->qid);
+
+		if (ready) {
+			WRITE_ONCE(ring->ready, true);
+			fiq->ops = &fuse_io_uring_ops;
+		}
+	}
 }
 
 /*
@@ -979,3 +1036,119 @@  int __maybe_unused fuse_uring_cmd(struct io_uring_cmd *cmd,
 
 	return -EIOCBQUEUED;
 }
+
+/*
+ * This prepares and sends the ring request in fuse-uring task context.
+ * User buffers are not mapped yet - the application does not have permission
+ * to write to it - this has to be executed in ring task context.
+ */
+static void
+fuse_uring_send_req_in_task(struct io_uring_cmd *cmd,
+			    unsigned int issue_flags)
+{
+	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
+	struct fuse_ring_queue *queue = ent->queue;
+	int err;
+
+	if (unlikely(issue_flags & IO_URING_F_TASK_DEAD)) {
+		err = -ECANCELED;
+		goto terminating;
+	}
+
+	err = fuse_uring_prepare_send(ent);
+	if (err)
+		goto err;
+
+terminating:
+	spin_lock(&queue->lock);
+	ent->state = FRRS_USERSPACE;
+	list_move(&ent->list, &queue->ent_in_userspace);
+	spin_unlock(&queue->lock);
+	io_uring_cmd_done(cmd, err, 0, issue_flags);
+	ent->cmd = NULL;
+	return;
+err:
+	fuse_uring_next_fuse_req(ent, queue, issue_flags);
+}
+
+static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring)
+{
+	unsigned int qid;
+	struct fuse_ring_queue *queue;
+
+	qid = task_cpu(current);
+
+	if (WARN_ONCE(qid >= ring->nr_queues,
+		      "Core number (%u) exceeds nr ueues (%zu)\n", qid,
+		      ring->nr_queues))
+		qid = 0;
+
+	queue = ring->queues[qid];
+	if (WARN_ONCE(!queue, "Missing queue for qid %d\n", qid))
+		return NULL;
+
+	return queue;
+}
+
+/* queue a fuse request and send it if a ring entry is available */
+void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req)
+{
+	struct fuse_conn *fc = req->fm->fc;
+	struct fuse_ring *ring = fc->ring;
+	struct fuse_ring_queue *queue;
+	struct fuse_ring_ent *ent = NULL;
+	int err;
+
+	err = -EINVAL;
+	queue = fuse_uring_task_to_queue(ring);
+	if (!queue)
+		goto err;
+
+	if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
+		req->in.h.unique = fuse_get_unique(fiq);
+
+	spin_lock(&queue->lock);
+	err = -ENOTCONN;
+	if (unlikely(queue->stopped))
+		goto err_unlock;
+
+	ent = list_first_entry_or_null(&queue->ent_avail_queue,
+				       struct fuse_ring_ent, list);
+	if (ent)
+		fuse_uring_add_req_to_ring_ent(ent, req);
+	else
+		list_add_tail(&req->list, &queue->fuse_req_queue);
+	spin_unlock(&queue->lock);
+
+	if (ent) {
+		struct io_uring_cmd *cmd = ent->cmd;
+
+		err = -EIO;
+		if (WARN_ON_ONCE(ent->state != FRRS_FUSE_REQ))
+			goto err;
+
+		uring_cmd_set_ring_ent(cmd, ent);
+		io_uring_cmd_complete_in_task(cmd, fuse_uring_send_req_in_task);
+	}
+
+	return;
+
+err_unlock:
+	spin_unlock(&queue->lock);
+err:
+	req->out.h.error = err;
+	clear_bit(FR_PENDING, &req->flags);
+	fuse_request_end(req);
+}
+
+static const struct fuse_iqueue_ops fuse_io_uring_ops = {
+	/* should be send over io-uring as enhancement */
+	.send_forget = fuse_dev_queue_forget,
+
+	/*
+	 * could be send over io-uring, but interrupts should be rare,
+	 * no need to make the code complex
+	 */
+	.send_interrupt = fuse_dev_queue_interrupt,
+	.send_req = fuse_uring_queue_fuse_req,
+};
diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
index ee5aeccae66caaf9a4dccbbbc785820836182668..cda330978faa019ceedf161f50d86db976b072e2 100644
--- a/fs/fuse/dev_uring_i.h
+++ b/fs/fuse/dev_uring_i.h
@@ -48,9 +48,6 @@  struct fuse_ring_ent {
 	enum fuse_ring_req_state state;
 
 	struct fuse_req *fuse_req;
-
-	/* commit id to identify the server reply */
-	uint64_t commit_id;
 };
 
 struct fuse_ring_queue {
@@ -120,6 +117,8 @@  struct fuse_ring {
 	unsigned long teardown_time;
 
 	atomic_t queue_refs;
+
+	bool ready;
 };
 
 bool fuse_uring_enabled(void);
@@ -127,6 +126,7 @@  void fuse_uring_destruct(struct fuse_conn *fc);
 void fuse_uring_stop_queues(struct fuse_ring *ring);
 void fuse_uring_abort_end_requests(struct fuse_ring *ring);
 int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
+void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req);
 
 static inline void fuse_uring_abort(struct fuse_conn *fc)
 {
@@ -150,6 +150,11 @@  static inline void fuse_uring_wait_stopped_queues(struct fuse_conn *fc)
 			   atomic_read(&ring->queue_refs) == 0);
 }
 
+static inline bool fuse_uring_ready(struct fuse_conn *fc)
+{
+	return fc->ring && fc->ring->ready;
+}
+
 #else /* CONFIG_FUSE_IO_URING */
 
 struct fuse_ring;