Message ID | 20250107-fuse-uring-for-6-10-rfc4-v9-13-9c786f9a7a9d@ddn.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | fuse: fuse-over-io-uring | expand |
On Tue, Jan 07 2025, Bernd Schubert wrote: > This prepares queueing and sending foreground requests through > io-uring. > > Signed-off-by: Bernd Schubert <bschubert@ddn.com> > --- > fs/fuse/dev_uring.c | 185 ++++++++++++++++++++++++++++++++++++++++++++++++-- > fs/fuse/dev_uring_i.h | 11 ++- > 2 files changed, 187 insertions(+), 9 deletions(-) > > diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c > index 01a908b2ef9ada14b759ca047eab40b4c4431d89..89a22a4eee23cbba49bac7a2d2126bb51193326f 100644 > --- a/fs/fuse/dev_uring.c > +++ b/fs/fuse/dev_uring.c > @@ -26,6 +26,29 @@ bool fuse_uring_enabled(void) > return enable_uring; > } > > +struct fuse_uring_pdu { > + struct fuse_ring_ent *ring_ent; > +}; > + > +static const struct fuse_iqueue_ops fuse_io_uring_ops; > + > +static void uring_cmd_set_ring_ent(struct io_uring_cmd *cmd, > + struct fuse_ring_ent *ring_ent) > +{ > + struct fuse_uring_pdu *pdu = > + io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu); > + > + pdu->ring_ent = ring_ent; > +} > + > +static struct fuse_ring_ent *uring_cmd_to_ring_ent(struct io_uring_cmd *cmd) > +{ > + struct fuse_uring_pdu *pdu = > + io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu); > + > + return pdu->ring_ent; > +} > + > static void fuse_uring_req_end(struct fuse_ring_ent *ring_ent, bool set_err, > int error) > { > @@ -441,7 +464,7 @@ static int fuse_uring_copy_to_ring(struct fuse_ring *ring, struct fuse_req *req, > struct iov_iter iter; > struct fuse_uring_ent_in_out ent_in_out = { > .flags = 0, > - .commit_id = ent->commit_id, > + .commit_id = req->in.h.unique, > }; > > if (WARN_ON(ent_in_out.commit_id == 0)) > @@ -460,7 +483,7 @@ static int fuse_uring_copy_to_ring(struct fuse_ring *ring, struct fuse_req *req, > if (num_args > 0) { > /* > * Expectation is that the first argument is the per op header. > - * Some op code have that as zero. > + * Some op code have that as zero size. > */ > if (args->in_args[0].size > 0) { > res = copy_to_user(&ent->headers->op_in, in_args->value, > @@ -578,11 +601,8 @@ static void fuse_uring_add_to_pq(struct fuse_ring_ent *ring_ent, > struct fuse_pqueue *fpq = &queue->fpq; > unsigned int hash; > > - /* commit_id is the unique id of the request */ > - ring_ent->commit_id = req->in.h.unique; > - > req->ring_entry = ring_ent; > - hash = fuse_req_hash(ring_ent->commit_id); > + hash = fuse_req_hash(req->in.h.unique); > list_move_tail(&req->list, &fpq->processing[hash]); > } > > @@ -777,6 +797,31 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags, > return 0; > } > > +static bool is_ring_ready(struct fuse_ring *ring, int current_qid) > +{ > + int qid; > + struct fuse_ring_queue *queue; > + bool ready = true; > + > + for (qid = 0; qid < ring->nr_queues && ready; qid++) { > + if (current_qid == qid) > + continue; > + > + queue = ring->queues[qid]; > + if (!queue) { > + ready = false; > + break; > + } > + > + spin_lock(&queue->lock); > + if (list_empty(&queue->ent_avail_queue)) > + ready = false; > + spin_unlock(&queue->lock); > + } > + > + return ready; > +} > + > /* > * fuse_uring_req_fetch command handling > */ > @@ -785,10 +830,22 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ring_ent, > unsigned int issue_flags) > { > struct fuse_ring_queue *queue = ring_ent->queue; > + struct fuse_ring *ring = queue->ring; > + struct fuse_conn *fc = ring->fc; > + struct fuse_iqueue *fiq = &fc->iq; > > spin_lock(&queue->lock); > fuse_uring_ent_avail(ring_ent, queue); > spin_unlock(&queue->lock); > + > + if (!ring->ready) { > + bool ready = is_ring_ready(ring, queue->qid); > + > + if (ready) { > + WRITE_ONCE(ring->ready, true); > + fiq->ops = &fuse_io_uring_ops; Shouldn't we be taking the fiq->lock to protect the above operation? > + } > + } > } > > /* > @@ -979,3 +1036,119 @@ int __maybe_unused fuse_uring_cmd(struct io_uring_cmd *cmd, > > return -EIOCBQUEUED; > } > + > +/* > + * This prepares and sends the ring request in fuse-uring task context. > + * User buffers are not mapped yet - the application does not have permission > + * to write to it - this has to be executed in ring task context. > + */ > +static void > +fuse_uring_send_req_in_task(struct io_uring_cmd *cmd, > + unsigned int issue_flags) > +{ > + struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd); > + struct fuse_ring_queue *queue = ent->queue; > + int err; > + > + if (unlikely(issue_flags & IO_URING_F_TASK_DEAD)) { > + err = -ECANCELED; > + goto terminating; > + } > + > + err = fuse_uring_prepare_send(ent); > + if (err) > + goto err; Suggestion: simplify this function flow. Something like: int err = 0; if (unlikely(issue_flags & IO_URING_F_TASK_DEAD)) err = -ECANCELED; else if (fuse_uring_prepare_send(ent)) { fuse_uring_next_fuse_req(ent, queue, issue_flags); return; } spin_lock(&queue->lock); [...] > + goto terminating; > + } > + > + err = fuse_uring_prepare_send(ent); > + if (err) > + goto err; > + > +terminating: > + spin_lock(&queue->lock); > + ent->state = FRRS_USERSPACE; > + list_move(&ent->list, &queue->ent_in_userspace); > + spin_unlock(&queue->lock); > + io_uring_cmd_done(cmd, err, 0, issue_flags); > + ent->cmd = NULL; > + return; > +err: > + fuse_uring_next_fuse_req(ent, queue, issue_flags); > +} > + > +static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring) > +{ > + unsigned int qid; > + struct fuse_ring_queue *queue; > + > + qid = task_cpu(current); > + > + if (WARN_ONCE(qid >= ring->nr_queues, > + "Core number (%u) exceeds nr ueues (%zu)\n", qid, typo: 'queues' > + ring->nr_queues)) > + qid = 0; > + > + queue = ring->queues[qid]; > + if (WARN_ONCE(!queue, "Missing queue for qid %d\n", qid)) > + return NULL; nit: no need for this if statement. The WARN_ONCE() is enough. Cheers,
On 1/7/25 16:54, Luis Henriques wrote: [...] >> @@ -785,10 +830,22 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ring_ent, >> unsigned int issue_flags) >> { >> struct fuse_ring_queue *queue = ring_ent->queue; >> + struct fuse_ring *ring = queue->ring; >> + struct fuse_conn *fc = ring->fc; >> + struct fuse_iqueue *fiq = &fc->iq; >> >> spin_lock(&queue->lock); >> fuse_uring_ent_avail(ring_ent, queue); >> spin_unlock(&queue->lock); >> + >> + if (!ring->ready) { >> + bool ready = is_ring_ready(ring, queue->qid); >> + >> + if (ready) { >> + WRITE_ONCE(ring->ready, true); >> + fiq->ops = &fuse_io_uring_ops; > > Shouldn't we be taking the fiq->lock to protect the above operation? I switched the order and changed it to WRITE_ONCE. fiq->lock would require that doing the operations would also hold lock. Also see "[PATCH v9 16/17] fuse: block request allocation until", there should be no races anyone. > >> + } >> + } >> } >> >> /* >> @@ -979,3 +1036,119 @@ int __maybe_unused fuse_uring_cmd(struct io_uring_cmd *cmd, >> >> return -EIOCBQUEUED; >> } >> + >> +/* >> + * This prepares and sends the ring request in fuse-uring task context. >> + * User buffers are not mapped yet - the application does not have permission >> + * to write to it - this has to be executed in ring task context. >> + */ >> +static void >> +fuse_uring_send_req_in_task(struct io_uring_cmd *cmd, >> + unsigned int issue_flags) >> +{ >> + struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd); >> + struct fuse_ring_queue *queue = ent->queue; >> + int err; >> + >> + if (unlikely(issue_flags & IO_URING_F_TASK_DEAD)) { >> + err = -ECANCELED; >> + goto terminating; >> + } >> + >> + err = fuse_uring_prepare_send(ent); >> + if (err) >> + goto err; > > Suggestion: simplify this function flow. Something like: > > int err = 0; > > if (unlikely(issue_flags & IO_URING_F_TASK_DEAD)) > err = -ECANCELED; > else if (fuse_uring_prepare_send(ent)) { > fuse_uring_next_fuse_req(ent, queue, issue_flags); > return; > } > spin_lock(&queue->lock); > [...] That makes it look like fuse_uring_prepare_send is not an error, but expected. How about like this? static void fuse_uring_send_req_in_task(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd); struct fuse_ring_queue *queue = ent->queue; int err; if (!(issue_flags & IO_URING_F_TASK_DEAD)) { err = fuse_uring_prepare_send(ent); if (err) { fuse_uring_next_fuse_req(ent, queue, issue_flags); return; } } else { err = -ECANCELED; } spin_lock(&queue->lock); ent->state = FRRS_USERSPACE; list_move(&ent->list, &queue->ent_in_userspace); spin_unlock(&queue->lock); io_uring_cmd_done(cmd, err, 0, issue_flags); ent->cmd = NULL; } Thanks, Bernd
On Tue, Jan 07 2025, Bernd Schubert wrote: > On 1/7/25 16:54, Luis Henriques wrote: > > [...] > >>> @@ -785,10 +830,22 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ring_ent, >>> unsigned int issue_flags) >>> { >>> struct fuse_ring_queue *queue = ring_ent->queue; >>> + struct fuse_ring *ring = queue->ring; >>> + struct fuse_conn *fc = ring->fc; >>> + struct fuse_iqueue *fiq = &fc->iq; >>> spin_lock(&queue->lock); >>> fuse_uring_ent_avail(ring_ent, queue); >>> spin_unlock(&queue->lock); >>> + >>> + if (!ring->ready) { >>> + bool ready = is_ring_ready(ring, queue->qid); >>> + >>> + if (ready) { >>> + WRITE_ONCE(ring->ready, true); >>> + fiq->ops = &fuse_io_uring_ops; >> Shouldn't we be taking the fiq->lock to protect the above operation? > > I switched the order and changed it to WRITE_ONCE. fiq->lock would > require that doing the operations would also hold lock. > Also see "[PATCH v9 16/17] fuse: block request allocation until", > there should be no races anyone. OK, great. I still need to go read the code a few more times, I guess. Thank you for your help understanding this code, Bernd. Cheers,
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index 01a908b2ef9ada14b759ca047eab40b4c4431d89..89a22a4eee23cbba49bac7a2d2126bb51193326f 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -26,6 +26,29 @@ bool fuse_uring_enabled(void) return enable_uring; } +struct fuse_uring_pdu { + struct fuse_ring_ent *ring_ent; +}; + +static const struct fuse_iqueue_ops fuse_io_uring_ops; + +static void uring_cmd_set_ring_ent(struct io_uring_cmd *cmd, + struct fuse_ring_ent *ring_ent) +{ + struct fuse_uring_pdu *pdu = + io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu); + + pdu->ring_ent = ring_ent; +} + +static struct fuse_ring_ent *uring_cmd_to_ring_ent(struct io_uring_cmd *cmd) +{ + struct fuse_uring_pdu *pdu = + io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu); + + return pdu->ring_ent; +} + static void fuse_uring_req_end(struct fuse_ring_ent *ring_ent, bool set_err, int error) { @@ -441,7 +464,7 @@ static int fuse_uring_copy_to_ring(struct fuse_ring *ring, struct fuse_req *req, struct iov_iter iter; struct fuse_uring_ent_in_out ent_in_out = { .flags = 0, - .commit_id = ent->commit_id, + .commit_id = req->in.h.unique, }; if (WARN_ON(ent_in_out.commit_id == 0)) @@ -460,7 +483,7 @@ static int fuse_uring_copy_to_ring(struct fuse_ring *ring, struct fuse_req *req, if (num_args > 0) { /* * Expectation is that the first argument is the per op header. - * Some op code have that as zero. + * Some op code have that as zero size. */ if (args->in_args[0].size > 0) { res = copy_to_user(&ent->headers->op_in, in_args->value, @@ -578,11 +601,8 @@ static void fuse_uring_add_to_pq(struct fuse_ring_ent *ring_ent, struct fuse_pqueue *fpq = &queue->fpq; unsigned int hash; - /* commit_id is the unique id of the request */ - ring_ent->commit_id = req->in.h.unique; - req->ring_entry = ring_ent; - hash = fuse_req_hash(ring_ent->commit_id); + hash = fuse_req_hash(req->in.h.unique); list_move_tail(&req->list, &fpq->processing[hash]); } @@ -777,6 +797,31 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags, return 0; } +static bool is_ring_ready(struct fuse_ring *ring, int current_qid) +{ + int qid; + struct fuse_ring_queue *queue; + bool ready = true; + + for (qid = 0; qid < ring->nr_queues && ready; qid++) { + if (current_qid == qid) + continue; + + queue = ring->queues[qid]; + if (!queue) { + ready = false; + break; + } + + spin_lock(&queue->lock); + if (list_empty(&queue->ent_avail_queue)) + ready = false; + spin_unlock(&queue->lock); + } + + return ready; +} + /* * fuse_uring_req_fetch command handling */ @@ -785,10 +830,22 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ring_ent, unsigned int issue_flags) { struct fuse_ring_queue *queue = ring_ent->queue; + struct fuse_ring *ring = queue->ring; + struct fuse_conn *fc = ring->fc; + struct fuse_iqueue *fiq = &fc->iq; spin_lock(&queue->lock); fuse_uring_ent_avail(ring_ent, queue); spin_unlock(&queue->lock); + + if (!ring->ready) { + bool ready = is_ring_ready(ring, queue->qid); + + if (ready) { + WRITE_ONCE(ring->ready, true); + fiq->ops = &fuse_io_uring_ops; + } + } } /* @@ -979,3 +1036,119 @@ int __maybe_unused fuse_uring_cmd(struct io_uring_cmd *cmd, return -EIOCBQUEUED; } + +/* + * This prepares and sends the ring request in fuse-uring task context. + * User buffers are not mapped yet - the application does not have permission + * to write to it - this has to be executed in ring task context. + */ +static void +fuse_uring_send_req_in_task(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd); + struct fuse_ring_queue *queue = ent->queue; + int err; + + if (unlikely(issue_flags & IO_URING_F_TASK_DEAD)) { + err = -ECANCELED; + goto terminating; + } + + err = fuse_uring_prepare_send(ent); + if (err) + goto err; + +terminating: + spin_lock(&queue->lock); + ent->state = FRRS_USERSPACE; + list_move(&ent->list, &queue->ent_in_userspace); + spin_unlock(&queue->lock); + io_uring_cmd_done(cmd, err, 0, issue_flags); + ent->cmd = NULL; + return; +err: + fuse_uring_next_fuse_req(ent, queue, issue_flags); +} + +static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring) +{ + unsigned int qid; + struct fuse_ring_queue *queue; + + qid = task_cpu(current); + + if (WARN_ONCE(qid >= ring->nr_queues, + "Core number (%u) exceeds nr ueues (%zu)\n", qid, + ring->nr_queues)) + qid = 0; + + queue = ring->queues[qid]; + if (WARN_ONCE(!queue, "Missing queue for qid %d\n", qid)) + return NULL; + + return queue; +} + +/* queue a fuse request and send it if a ring entry is available */ +void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req) +{ + struct fuse_conn *fc = req->fm->fc; + struct fuse_ring *ring = fc->ring; + struct fuse_ring_queue *queue; + struct fuse_ring_ent *ent = NULL; + int err; + + err = -EINVAL; + queue = fuse_uring_task_to_queue(ring); + if (!queue) + goto err; + + if (req->in.h.opcode != FUSE_NOTIFY_REPLY) + req->in.h.unique = fuse_get_unique(fiq); + + spin_lock(&queue->lock); + err = -ENOTCONN; + if (unlikely(queue->stopped)) + goto err_unlock; + + ent = list_first_entry_or_null(&queue->ent_avail_queue, + struct fuse_ring_ent, list); + if (ent) + fuse_uring_add_req_to_ring_ent(ent, req); + else + list_add_tail(&req->list, &queue->fuse_req_queue); + spin_unlock(&queue->lock); + + if (ent) { + struct io_uring_cmd *cmd = ent->cmd; + + err = -EIO; + if (WARN_ON_ONCE(ent->state != FRRS_FUSE_REQ)) + goto err; + + uring_cmd_set_ring_ent(cmd, ent); + io_uring_cmd_complete_in_task(cmd, fuse_uring_send_req_in_task); + } + + return; + +err_unlock: + spin_unlock(&queue->lock); +err: + req->out.h.error = err; + clear_bit(FR_PENDING, &req->flags); + fuse_request_end(req); +} + +static const struct fuse_iqueue_ops fuse_io_uring_ops = { + /* should be send over io-uring as enhancement */ + .send_forget = fuse_dev_queue_forget, + + /* + * could be send over io-uring, but interrupts should be rare, + * no need to make the code complex + */ + .send_interrupt = fuse_dev_queue_interrupt, + .send_req = fuse_uring_queue_fuse_req, +}; diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h index ee5aeccae66caaf9a4dccbbbc785820836182668..cda330978faa019ceedf161f50d86db976b072e2 100644 --- a/fs/fuse/dev_uring_i.h +++ b/fs/fuse/dev_uring_i.h @@ -48,9 +48,6 @@ struct fuse_ring_ent { enum fuse_ring_req_state state; struct fuse_req *fuse_req; - - /* commit id to identify the server reply */ - uint64_t commit_id; }; struct fuse_ring_queue { @@ -120,6 +117,8 @@ struct fuse_ring { unsigned long teardown_time; atomic_t queue_refs; + + bool ready; }; bool fuse_uring_enabled(void); @@ -127,6 +126,7 @@ void fuse_uring_destruct(struct fuse_conn *fc); void fuse_uring_stop_queues(struct fuse_ring *ring); void fuse_uring_abort_end_requests(struct fuse_ring *ring); int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags); +void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req); static inline void fuse_uring_abort(struct fuse_conn *fc) { @@ -150,6 +150,11 @@ static inline void fuse_uring_wait_stopped_queues(struct fuse_conn *fc) atomic_read(&ring->queue_refs) == 0); } +static inline bool fuse_uring_ready(struct fuse_conn *fc) +{ + return fc->ring && fc->ring->ready; +} + #else /* CONFIG_FUSE_IO_URING */ struct fuse_ring;
This prepares queueing and sending foreground requests through io-uring. Signed-off-by: Bernd Schubert <bschubert@ddn.com> --- fs/fuse/dev_uring.c | 185 ++++++++++++++++++++++++++++++++++++++++++++++++-- fs/fuse/dev_uring_i.h | 11 ++- 2 files changed, 187 insertions(+), 9 deletions(-)