diff mbox series

ublk: eliminate unnecessary io_cmds queue

Message ID 20241009193700.3438201-1-ushankar@purestorage.com (mailing list archive)
State New
Headers show
Series ublk: eliminate unnecessary io_cmds queue | expand

Commit Message

Uday Shankar Oct. 9, 2024, 7:37 p.m. UTC
Currently, ublk_drv maintains a per-hctx queue of requests awaiting
dispatch to the ublk server, and pokes the ubq_daemon to come pick them
up via the task_work mechanism when needed. But task_work already
supports internal (lockless) queueing. Reuse this queueing mechanism
(i.e. have one task_work queue item per request awaiting dispatch)
instead of maintaining our own queue in ublk_drv.

Signed-off-by: Uday Shankar <ushankar@purestorage.com>
---
 drivers/block/ublk_drv.c | 34 ++++++----------------------------
 1 file changed, 6 insertions(+), 28 deletions(-)


base-commit: 7a84944a4bf7abda16291ff13984960d0df4e74a

Comments

Ming Lei Oct. 10, 2024, 3:45 a.m. UTC | #1
On Wed, Oct 09, 2024 at 01:37:00PM -0600, Uday Shankar wrote:
> Currently, ublk_drv maintains a per-hctx queue of requests awaiting
> dispatch to the ublk server, and pokes the ubq_daemon to come pick them
> up via the task_work mechanism when needed. But task_work already
> supports internal (lockless) queueing. Reuse this queueing mechanism
> (i.e. have one task_work queue item per request awaiting dispatch)
> instead of maintaining our own queue in ublk_drv.
> 
> Signed-off-by: Uday Shankar <ushankar@purestorage.com>
> ---
>  drivers/block/ublk_drv.c | 34 ++++++----------------------------
>  1 file changed, 6 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 60f6d86ea1e6..2ea108347ec4 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -80,6 +80,7 @@ struct ublk_rq_data {
>  
>  struct ublk_uring_cmd_pdu {
>  	struct ublk_queue *ubq;
> +	struct request *req;
>  	u16 tag;
>  };

I'd suggest to add the following build check in init function since there is
only 32bytes in uring_cmd pdu:

	BUILD_BUG_ON(sizeof(ublk_uring_cmd_pdu) < sizeof_field(io_uring_cmd, pud))

>  
> @@ -141,8 +142,6 @@ struct ublk_queue {
>  	struct task_struct	*ubq_daemon;
>  	char *io_cmd_buf;
>  
> -	struct llist_head	io_cmds;
> -
>  	unsigned long io_addr;	/* mapped vm address */
>  	unsigned int max_io_sz;
>  	bool force_abort;
> @@ -1132,9 +1131,10 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
>  		blk_mq_end_request(rq, BLK_STS_IOERR);
>  }
>  
> -static inline void __ublk_rq_task_work(struct request *req,
> +static inline void __ublk_rq_task_work(struct io_uring_cmd *cmd,
>  				       unsigned issue_flags)

`inline` can be removed and __ublk_rq_task_work() can be named as
ublk_rq_task_work() now.

>  {
> +	struct request *req = ublk_get_uring_cmd_pdu(cmd)->req;
>  	struct ublk_queue *ubq = req->mq_hctx->driver_data;
>  	int tag = req->tag;
>  	struct ublk_io *io = &ubq->ios[tag];
> @@ -1211,34 +1211,12 @@ static inline void __ublk_rq_task_work(struct request *req,
>  	ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags);
>  }
>  
> -static inline void ublk_forward_io_cmds(struct ublk_queue *ubq,
> -					unsigned issue_flags)
> -{
> -	struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
> -	struct ublk_rq_data *data, *tmp;
> -
> -	io_cmds = llist_reverse_order(io_cmds);
> -	llist_for_each_entry_safe(data, tmp, io_cmds, node)
> -		__ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags);
> -}
> -
> -static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
> -{
> -	struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
> -	struct ublk_queue *ubq = pdu->ubq;
> -
> -	ublk_forward_io_cmds(ubq, issue_flags);
> -}
> -
>  static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
>  {
> -	struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
> -
> -	if (llist_add(&data->node, &ubq->io_cmds)) {
> -		struct ublk_io *io = &ubq->ios[rq->tag];
> +	struct ublk_io *io = &ubq->ios[rq->tag];
>  
> -		io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
> -	}
> +	ublk_get_uring_cmd_pdu(io->cmd)->req = rq;
> +	io_uring_cmd_complete_in_task(io->cmd, __ublk_rq_task_work);
>  }

I'd suggest to comment that io_uring_cmd_complete_in_task() needs to
maintain io command order.

Otherwise this patch looks fine.


Thanks,
Ming
Uday Shankar Oct. 15, 2024, 8:51 p.m. UTC | #2
On Thu, Oct 10, 2024 at 11:45:03AM +0800, Ming Lei wrote:
> >  static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
> >  {
> > -	struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
> > -
> > -	if (llist_add(&data->node, &ubq->io_cmds)) {
> > -		struct ublk_io *io = &ubq->ios[rq->tag];
> > +	struct ublk_io *io = &ubq->ios[rq->tag];
> >  
> > -		io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
> > -	}
> > +	ublk_get_uring_cmd_pdu(io->cmd)->req = rq;
> > +	io_uring_cmd_complete_in_task(io->cmd, __ublk_rq_task_work);
> >  }
> 
> I'd suggest to comment that io_uring_cmd_complete_in_task() needs to
> maintain io command order.

Sorry, can you explain why this is important? Generally speaking
out-of-order completion of I/Os is considered okay, so what's the issue
if the dispatch to the ublk server here is not in order?
Ming Lei Oct. 16, 2024, 2:29 a.m. UTC | #3
On Tue, Oct 15, 2024 at 02:51:14PM -0600, Uday Shankar wrote:
> On Thu, Oct 10, 2024 at 11:45:03AM +0800, Ming Lei wrote:
> > >  static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
> > >  {
> > > -	struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
> > > -
> > > -	if (llist_add(&data->node, &ubq->io_cmds)) {
> > > -		struct ublk_io *io = &ubq->ios[rq->tag];
> > > +	struct ublk_io *io = &ubq->ios[rq->tag];
> > >  
> > > -		io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
> > > -	}
> > > +	ublk_get_uring_cmd_pdu(io->cmd)->req = rq;
> > > +	io_uring_cmd_complete_in_task(io->cmd, __ublk_rq_task_work);
> > >  }
> > 
> > I'd suggest to comment that io_uring_cmd_complete_in_task() needs to
> > maintain io command order.
> 
> Sorry, can you explain why this is important? Generally speaking
> out-of-order completion of I/Os is considered okay, so what's the issue
> if the dispatch to the ublk server here is not in order?

It is just okay, but proper implementation requires to keep IO order.

Please see:

1) 7d4a93176e01 ("ublk_drv: don't forward io commands in reserve order")

2) [Report] requests are submitted to hardware in reverse order from nvme/virtio-blk queue_rqs()

https://lore.kernel.org/linux-block/ZbD7ups50ryrlJ%2FG@fedora/


I am also working on ublk-bpf which needs this extra list for submitting
IO in batch, please hold on this patch now.

I plan to send out bpf patches in this cycle or next, and we can restart
the cleanup if the bpf thing turns out not doable.


Thanks, 
Ming
diff mbox series

Patch

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 60f6d86ea1e6..2ea108347ec4 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -80,6 +80,7 @@  struct ublk_rq_data {
 
 struct ublk_uring_cmd_pdu {
 	struct ublk_queue *ubq;
+	struct request *req;
 	u16 tag;
 };
 
@@ -141,8 +142,6 @@  struct ublk_queue {
 	struct task_struct	*ubq_daemon;
 	char *io_cmd_buf;
 
-	struct llist_head	io_cmds;
-
 	unsigned long io_addr;	/* mapped vm address */
 	unsigned int max_io_sz;
 	bool force_abort;
@@ -1132,9 +1131,10 @@  static inline void __ublk_abort_rq(struct ublk_queue *ubq,
 		blk_mq_end_request(rq, BLK_STS_IOERR);
 }
 
-static inline void __ublk_rq_task_work(struct request *req,
+static inline void __ublk_rq_task_work(struct io_uring_cmd *cmd,
 				       unsigned issue_flags)
 {
+	struct request *req = ublk_get_uring_cmd_pdu(cmd)->req;
 	struct ublk_queue *ubq = req->mq_hctx->driver_data;
 	int tag = req->tag;
 	struct ublk_io *io = &ubq->ios[tag];
@@ -1211,34 +1211,12 @@  static inline void __ublk_rq_task_work(struct request *req,
 	ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags);
 }
 
-static inline void ublk_forward_io_cmds(struct ublk_queue *ubq,
-					unsigned issue_flags)
-{
-	struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
-	struct ublk_rq_data *data, *tmp;
-
-	io_cmds = llist_reverse_order(io_cmds);
-	llist_for_each_entry_safe(data, tmp, io_cmds, node)
-		__ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags);
-}
-
-static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
-{
-	struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
-	struct ublk_queue *ubq = pdu->ubq;
-
-	ublk_forward_io_cmds(ubq, issue_flags);
-}
-
 static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
 {
-	struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
-
-	if (llist_add(&data->node, &ubq->io_cmds)) {
-		struct ublk_io *io = &ubq->ios[rq->tag];
+	struct ublk_io *io = &ubq->ios[rq->tag];
 
-		io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
-	}
+	ublk_get_uring_cmd_pdu(io->cmd)->req = rq;
+	io_uring_cmd_complete_in_task(io->cmd, __ublk_rq_task_work);
 }
 
 static enum blk_eh_timer_return ublk_timeout(struct request *rq)