diff mbox series

[v3,1/2] ublk: properly serialize all FETCH_REQs

Message ID 20250410-ublk_task_per_io-v3-1-b811e8f4554a@purestorage.com (mailing list archive)
State New
Headers show
Series ublk: decouple server threads from hctxs | expand

Commit Message

Uday Shankar April 11, 2025, 12:17 a.m. UTC
Most uring_cmds issued against ublk character devices are serialized
because each command affects only one queue, and there is an early check
which only allows a single task (the queue's ubq_daemon) to issue
uring_cmds against that queue. However, this mechanism does not work for
FETCH_REQs, since they are expected before ubq_daemon is set. Since
FETCH_REQs are only used at initialization and not in the fast path,
serialize them using the per-ublk-device mutex. This fixes a number of
data races that were previously possible if a badly behaved ublk server
decided to issue multiple FETCH_REQs against the same qid/tag
concurrently.

Reported-by: Caleb Sander Mateos <csander@purestorage.com>
Signed-off-by: Uday Shankar <ushankar@purestorage.com>
---
 drivers/block/ublk_drv.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

Comments

Ming Lei April 11, 2025, 8:29 a.m. UTC | #1
On Thu, Apr 10, 2025 at 06:17:50PM -0600, Uday Shankar wrote:
> Most uring_cmds issued against ublk character devices are serialized
> because each command affects only one queue, and there is an early check
> which only allows a single task (the queue's ubq_daemon) to issue
> uring_cmds against that queue. However, this mechanism does not work for
> FETCH_REQs, since they are expected before ubq_daemon is set. Since
> FETCH_REQs are only used at initialization and not in the fast path,
> serialize them using the per-ublk-device mutex. This fixes a number of
> data races that were previously possible if a badly behaved ublk server
> decided to issue multiple FETCH_REQs against the same qid/tag
> concurrently.
> 
> Reported-by: Caleb Sander Mateos <csander@purestorage.com>
> Signed-off-by: Uday Shankar <ushankar@purestorage.com>
> ---
>  drivers/block/ublk_drv.c | 24 +++++++++++++++++-------
>  1 file changed, 17 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 2fd05c1bd30b03343cb6f357f8c08dd92ff47af9..812789f58704cece9b661713cd0107807c789531 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -1809,8 +1809,8 @@ static void ublk_nosrv_work(struct work_struct *work)
>  
>  /* device can only be started after all IOs are ready */
>  static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
> +	__must_hold(&ub->mutex)
>  {
> -	mutex_lock(&ub->mutex);
>  	ubq->nr_io_ready++;
>  	if (ublk_queue_ready(ubq)) {
>  		ubq->ubq_daemon = current;
> @@ -1822,7 +1822,6 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
>  	}
>  	if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues)
>  		complete_all(&ub->completion);
> -	mutex_unlock(&ub->mutex);
>  }
>  
>  static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
> @@ -1962,17 +1961,25 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
>  	case UBLK_IO_UNREGISTER_IO_BUF:
>  		return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags);
>  	case UBLK_IO_FETCH_REQ:
> +		mutex_lock(&ub->mutex);
>  		/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
>  		if (ublk_queue_ready(ubq)) {
>  			ret = -EBUSY;
> -			goto out;
> +			goto out_unlock;
>  		}
>  		/*
>  		 * The io is being handled by server, so COMMIT_RQ is expected
>  		 * instead of FETCH_REQ
>  		 */
>  		if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
> -			goto out;
> +			goto out_unlock;
> +
> +		/*
> +		 * Check again (with mutex held) that the I/O is not
> +		 * active - if so, someone may have already fetched it
> +		 */
> +		if (io->flags & UBLK_IO_FLAG_ACTIVE)
> +			goto out_unlock;
>  
>  		if (ublk_need_map_io(ubq)) {
>  			/*
> @@ -1980,15 +1987,16 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
>  			 * DATA is not enabled
>  			 */
>  			if (!ub_cmd->addr && !ublk_need_get_data(ubq))
> -				goto out;
> +				goto out_unlock;
>  		} else if (ub_cmd->addr) {
>  			/* User copy requires addr to be unset */
>  			ret = -EINVAL;
> -			goto out;
> +			goto out_unlock;
>  		}
>  
>  		ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
>  		ublk_mark_io_ready(ub, ubq);
> +		mutex_unlock(&ub->mutex);
>  		break;
>  	case UBLK_IO_COMMIT_AND_FETCH_REQ:
>  		req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
> @@ -2028,7 +2036,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
>  	ublk_prep_cancel(cmd, issue_flags, ubq, tag);
>  	return -EIOCBQUEUED;
>  
> - out:
> +out_unlock:
> +	mutex_unlock(&ub->mutex);
> +out:
>  	pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n",
>  			__func__, cmd_op, tag, ret, io->flags);
>  	return ret;

Looks fine,

Reviewed-by: Ming Lei <ming.lei@redhat.com>

BTW, FETCH_REQ could be put into one single function, so it will become
cleaner.

Thanks,
Ming
Caleb Sander Mateos April 11, 2025, 4 p.m. UTC | #2
On Thu, Apr 10, 2025 at 5:18 PM Uday Shankar <ushankar@purestorage.com> wrote:
>
> Most uring_cmds issued against ublk character devices are serialized
> because each command affects only one queue, and there is an early check
> which only allows a single task (the queue's ubq_daemon) to issue
> uring_cmds against that queue. However, this mechanism does not work for
> FETCH_REQs, since they are expected before ubq_daemon is set. Since
> FETCH_REQs are only used at initialization and not in the fast path,
> serialize them using the per-ublk-device mutex. This fixes a number of
> data races that were previously possible if a badly behaved ublk server
> decided to issue multiple FETCH_REQs against the same qid/tag
> concurrently.
>
> Reported-by: Caleb Sander Mateos <csander@purestorage.com>
> Signed-off-by: Uday Shankar <ushankar@purestorage.com>
> ---
>  drivers/block/ublk_drv.c | 24 +++++++++++++++++-------
>  1 file changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 2fd05c1bd30b03343cb6f357f8c08dd92ff47af9..812789f58704cece9b661713cd0107807c789531 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -1809,8 +1809,8 @@ static void ublk_nosrv_work(struct work_struct *work)
>
>  /* device can only be started after all IOs are ready */
>  static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
> +       __must_hold(&ub->mutex)
>  {
> -       mutex_lock(&ub->mutex);
>         ubq->nr_io_ready++;
>         if (ublk_queue_ready(ubq)) {
>                 ubq->ubq_daemon = current;
> @@ -1822,7 +1822,6 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
>         }
>         if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues)
>                 complete_all(&ub->completion);
> -       mutex_unlock(&ub->mutex);
>  }
>
>  static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
> @@ -1962,17 +1961,25 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
>         case UBLK_IO_UNREGISTER_IO_BUF:
>                 return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags);
>         case UBLK_IO_FETCH_REQ:
> +               mutex_lock(&ub->mutex);
>                 /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
>                 if (ublk_queue_ready(ubq)) {
>                         ret = -EBUSY;
> -                       goto out;
> +                       goto out_unlock;
>                 }
>                 /*
>                  * The io is being handled by server, so COMMIT_RQ is expected
>                  * instead of FETCH_REQ
>                  */
>                 if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
> -                       goto out;
> +                       goto out_unlock;
> +
> +               /*
> +                * Check again (with mutex held) that the I/O is not
> +                * active - if so, someone may have already fetched it
> +                */
> +               if (io->flags & UBLK_IO_FLAG_ACTIVE)
> +                       goto out_unlock;

The 2 checks of io->flags could probably be combined into a single if
(io->flags & (UBLK_IO_FLAG_ACTIVE | UBLK_IO_FLAG_OWNED_BY_SRV)).

And I agree with Ming, it would be nice to split the UBLK_IO_FETCH_REQ
handling into a separate function, especially now that the mutex needs
to be acquired for the duration of its handling.

Best,
Caleb

>
>                 if (ublk_need_map_io(ubq)) {
>                         /*
> @@ -1980,15 +1987,16 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
>                          * DATA is not enabled
>                          */
>                         if (!ub_cmd->addr && !ublk_need_get_data(ubq))
> -                               goto out;
> +                               goto out_unlock;
>                 } else if (ub_cmd->addr) {
>                         /* User copy requires addr to be unset */
>                         ret = -EINVAL;
> -                       goto out;
> +                       goto out_unlock;
>                 }
>
>                 ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
>                 ublk_mark_io_ready(ub, ubq);
> +               mutex_unlock(&ub->mutex);
>                 break;
>         case UBLK_IO_COMMIT_AND_FETCH_REQ:
>                 req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
> @@ -2028,7 +2036,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
>         ublk_prep_cancel(cmd, issue_flags, ubq, tag);
>         return -EIOCBQUEUED;
>
> - out:
> +out_unlock:
> +       mutex_unlock(&ub->mutex);
> +out:
>         pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n",
>                         __func__, cmd_op, tag, ret, io->flags);
>         return ret;
>
> --
> 2.34.1
>
diff mbox series

Patch

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 2fd05c1bd30b03343cb6f357f8c08dd92ff47af9..812789f58704cece9b661713cd0107807c789531 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1809,8 +1809,8 @@  static void ublk_nosrv_work(struct work_struct *work)
 
 /* device can only be started after all IOs are ready */
 static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
+	__must_hold(&ub->mutex)
 {
-	mutex_lock(&ub->mutex);
 	ubq->nr_io_ready++;
 	if (ublk_queue_ready(ubq)) {
 		ubq->ubq_daemon = current;
@@ -1822,7 +1822,6 @@  static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
 	}
 	if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues)
 		complete_all(&ub->completion);
-	mutex_unlock(&ub->mutex);
 }
 
 static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
@@ -1962,17 +1961,25 @@  static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
 	case UBLK_IO_UNREGISTER_IO_BUF:
 		return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags);
 	case UBLK_IO_FETCH_REQ:
+		mutex_lock(&ub->mutex);
 		/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
 		if (ublk_queue_ready(ubq)) {
 			ret = -EBUSY;
-			goto out;
+			goto out_unlock;
 		}
 		/*
 		 * The io is being handled by server, so COMMIT_RQ is expected
 		 * instead of FETCH_REQ
 		 */
 		if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
-			goto out;
+			goto out_unlock;
+
+		/*
+		 * Check again (with mutex held) that the I/O is not
+		 * active - if so, someone may have already fetched it
+		 */
+		if (io->flags & UBLK_IO_FLAG_ACTIVE)
+			goto out_unlock;
 
 		if (ublk_need_map_io(ubq)) {
 			/*
@@ -1980,15 +1987,16 @@  static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
 			 * DATA is not enabled
 			 */
 			if (!ub_cmd->addr && !ublk_need_get_data(ubq))
-				goto out;
+				goto out_unlock;
 		} else if (ub_cmd->addr) {
 			/* User copy requires addr to be unset */
 			ret = -EINVAL;
-			goto out;
+			goto out_unlock;
 		}
 
 		ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
 		ublk_mark_io_ready(ub, ubq);
+		mutex_unlock(&ub->mutex);
 		break;
 	case UBLK_IO_COMMIT_AND_FETCH_REQ:
 		req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
@@ -2028,7 +2036,9 @@  static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
 	ublk_prep_cancel(cmd, issue_flags, ubq, tag);
 	return -EIOCBQUEUED;
 
- out:
+out_unlock:
+	mutex_unlock(&ub->mutex);
+out:
 	pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n",
 			__func__, cmd_op, tag, ret, io->flags);
 	return ret;