diff mbox series

[01/13] ublk: delay aborting zc request until io_uring returns the buffer

Message ID 20250407131526.1927073-2-ming.lei@redhat.com (mailing list archive)
State New
Headers show
Series ublk: one driver bug fix and selftest change | expand

Commit Message

Ming Lei April 7, 2025, 1:15 p.m. UTC
When one request buffer is leased to io_uring via
io_buffer_register_bvec(), io_uring guarantees that the buffer will
be returned. However ublk aborts request in case that io_uring context
is exiting, then ublk_io_release() may observe freed request, and
kernel panic is triggered.

Fix the issue by delaying to abort zc request until io_uring returns
the buffer back.

Cc: Keith Busch <kbusch@kernel.org>
Fixes: 1f6540e2aabb ("ublk: zc register/unregister bvec")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/block/ublk_drv.c | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

Comments

Caleb Sander Mateos April 7, 2025, 3:02 p.m. UTC | #1
On Mon, Apr 7, 2025 at 6:15 AM Ming Lei <ming.lei@redhat.com> wrote:
>
> When one request buffer is leased to io_uring via
> io_buffer_register_bvec(), io_uring guarantees that the buffer will
> be returned. However ublk aborts request in case that io_uring context
> is exiting, then ublk_io_release() may observe freed request, and
> kernel panic is triggered.

Not sure I follow how the request can be freed while its buffer is
still registered with io_uring. It looks like __ublk_fail_req()
decrements the ublk request's reference count (ublk_put_req_ref()) and
the reference count shouldn't hit 0 if the io_uring registered buffer
is still holding a reference. Is the problem the if
(ublk_nosrv_should_reissue_outstanding()) case, which calls
blk_mq_requeue_request() without checking the reference count?
Maybe a better place to put the requeue logic would be in
__ublk_complete_rq(). Then both the abort and io_uring buffer
unregister paths can just call ublk_put_req_ref(). And there would be
no need for UBLK_IO_FLAG_BUF_LEASED.

>
> Fix the issue by delaying to abort zc request until io_uring returns
> the buffer back.
>
> Cc: Keith Busch <kbusch@kernel.org>
> Fixes: 1f6540e2aabb ("ublk: zc register/unregister bvec")
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>  drivers/block/ublk_drv.c | 31 ++++++++++++++++++++++++++++---
>  1 file changed, 28 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 2fd05c1bd30b..76caec28e5ac 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -140,6 +140,17 @@ struct ublk_uring_cmd_pdu {
>   */
>  #define UBLK_IO_FLAG_NEED_GET_DATA 0x08
>
> +
> +/*
> + * Set when this request buffer is leased to ublk server, and cleared when
> + * the buffer is returned back.
> + *
> + * If this flag is set, this request can't be aborted until buffer is
> + * returned back from io_uring since io_uring is guaranteed to release the
> + * buffer.
> + */
> +#define UBLK_IO_FLAG_BUF_LEASED   0x10
> +
>  /* atomic RW with ubq->cancel_lock */
>  #define UBLK_IO_FLAG_CANCELED  0x80000000
>
> @@ -1550,7 +1561,8 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
>                         rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i);
>                         if (rq && blk_mq_request_started(rq)) {
>                                 io->flags |= UBLK_IO_FLAG_ABORTED;
> -                               __ublk_fail_req(ubq, io, rq);
> +                               if (!(io->flags & UBLK_IO_FLAG_BUF_LEASED))
> +                                       __ublk_fail_req(ubq, io, rq);
>                         }
>                 }
>         }
> @@ -1874,8 +1886,18 @@ static void ublk_io_release(void *priv)
>  {
>         struct request *rq = priv;
>         struct ublk_queue *ubq = rq->mq_hctx->driver_data;
> +       struct ublk_io *io = &ubq->ios[rq->tag];
>
> -       ublk_put_req_ref(ubq, rq);
> +       io->flags &= ~UBLK_IO_FLAG_BUF_LEASED;
> +       /*
> +        * request has been aborted, and the queue context is exiting,
> +        * and ublk server can't be relied for completing this IO cmd,
> +        * so force to complete it
> +        */
> +       if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED))
> +               __ublk_complete_rq(rq);

This unconditionally ends the ublk request without checking the
reference count. Wouldn't that cause use-after-free/double-free issues
if the ublk request's buffer was registered in multiple io_uring
buffer slots?

Best,
Caleb

> +       else
> +               ublk_put_req_ref(ubq, rq);
>  }
>
>  static int ublk_register_io_buf(struct io_uring_cmd *cmd,
> @@ -1958,7 +1980,10 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
>         ret = -EINVAL;
>         switch (_IOC_NR(cmd_op)) {
>         case UBLK_IO_REGISTER_IO_BUF:
> -               return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags);
> +               ret = ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags);
> +               if (!ret)
> +                       io->flags |= UBLK_IO_FLAG_BUF_LEASED;
> +               return ret;
>         case UBLK_IO_UNREGISTER_IO_BUF:
>                 return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags);
>         case UBLK_IO_FETCH_REQ:
> --
> 2.47.0
>
Ming Lei April 8, 2025, 2:18 a.m. UTC | #2
On Mon, Apr 07, 2025 at 08:02:24AM -0700, Caleb Sander Mateos wrote:
> On Mon, Apr 7, 2025 at 6:15 AM Ming Lei <ming.lei@redhat.com> wrote:
> >
> > When one request buffer is leased to io_uring via
> > io_buffer_register_bvec(), io_uring guarantees that the buffer will
> > be returned. However ublk aborts request in case that io_uring context
> > is exiting, then ublk_io_release() may observe freed request, and
> > kernel panic is triggered.
> 
> Not sure I follow how the request can be freed while its buffer is
> still registered with io_uring. It looks like __ublk_fail_req()
> decrements the ublk request's reference count (ublk_put_req_ref()) and
> the reference count shouldn't hit 0 if the io_uring registered buffer
> is still holding a reference. Is the problem the if
> (ublk_nosrv_should_reissue_outstanding()) case, which calls
> blk_mq_requeue_request() without checking the reference count?

Yeah, that is the problem, the request can be failed immediately after
requeue & re-dispatch, then trigger the panic, and I verified that the
following patch does fix it:


diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 2fd05c1bd30b..41bed67508f2 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1140,6 +1140,25 @@ static void ublk_complete_rq(struct kref *ref)
 	__ublk_complete_rq(req);
 }
 
+static void ublk_do_fail_rq(struct request *req)
+{
+	struct ublk_queue *ubq = req->mq_hctx->driver_data;
+
+	if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
+		blk_mq_requeue_request(req, false);
+	else
+		__ublk_complete_rq(req);
+}
+
+static void ublk_fail_rq_fn(struct kref *ref)
+{
+	struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data,
+			ref);
+	struct request *req = blk_mq_rq_from_pdu(data);
+
+	ublk_do_fail_rq(req);
+}
+
 /*
  * Since ublk_rq_task_work_cb always fails requests immediately during
  * exiting, __ublk_fail_req() is only called from abort context during
@@ -1153,10 +1172,13 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
 {
 	WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
 
-	if (ublk_nosrv_should_reissue_outstanding(ubq->dev))
-		blk_mq_requeue_request(req, false);
-	else
-		ublk_put_req_ref(ubq, req);
+	if (ublk_need_req_ref(ubq)) {
+		struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+		kref_put(&data->ref, ublk_fail_rq_fn);
+	} else {
+		ublk_do_fail_rq(req);
+	}
 }
 
 static void ubq_complete_io_cmd(struct ublk_io *io, int res,


Thanks,
Ming
diff mbox series

Patch

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 2fd05c1bd30b..76caec28e5ac 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -140,6 +140,17 @@  struct ublk_uring_cmd_pdu {
  */
 #define UBLK_IO_FLAG_NEED_GET_DATA 0x08
 
+
+/*
+ * Set when this request buffer is leased to ublk server, and cleared when
+ * the buffer is returned back.
+ *
+ * If this flag is set, this request can't be aborted until buffer is
+ * returned back from io_uring since io_uring is guaranteed to release the
+ * buffer.
+ */
+#define UBLK_IO_FLAG_BUF_LEASED   0x10
+
 /* atomic RW with ubq->cancel_lock */
 #define UBLK_IO_FLAG_CANCELED	0x80000000
 
@@ -1550,7 +1561,8 @@  static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
 			rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i);
 			if (rq && blk_mq_request_started(rq)) {
 				io->flags |= UBLK_IO_FLAG_ABORTED;
-				__ublk_fail_req(ubq, io, rq);
+				if (!(io->flags & UBLK_IO_FLAG_BUF_LEASED))
+					__ublk_fail_req(ubq, io, rq);
 			}
 		}
 	}
@@ -1874,8 +1886,18 @@  static void ublk_io_release(void *priv)
 {
 	struct request *rq = priv;
 	struct ublk_queue *ubq = rq->mq_hctx->driver_data;
+	struct ublk_io *io = &ubq->ios[rq->tag];
 
-	ublk_put_req_ref(ubq, rq);
+	io->flags &= ~UBLK_IO_FLAG_BUF_LEASED;
+	/*
+	 * request has been aborted, and the queue context is exiting,
+	 * and ublk server can't be relied for completing this IO cmd,
+	 * so force to complete it
+	 */
+	if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED))
+		__ublk_complete_rq(rq);
+	else
+		ublk_put_req_ref(ubq, rq);
 }
 
 static int ublk_register_io_buf(struct io_uring_cmd *cmd,
@@ -1958,7 +1980,10 @@  static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
 	ret = -EINVAL;
 	switch (_IOC_NR(cmd_op)) {
 	case UBLK_IO_REGISTER_IO_BUF:
-		return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags);
+		ret = ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags);
+		if (!ret)
+			io->flags |= UBLK_IO_FLAG_BUF_LEASED;
+		return ret;
 	case UBLK_IO_UNREGISTER_IO_BUF:
 		return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags);
 	case UBLK_IO_FETCH_REQ: