diff mbox series

[v2,1/2] scsi: ufs: mcq: Fix the incorrect OCS value for the device command

Message ID 20230610021553.1213-2-powen.kao@mediatek.com (mailing list archive)
State Accepted
Headers show
Series ufs: mcq: Share first hwq for dev comamnd and IO request | expand

Commit Message

Po-Wen Kao June 10, 2023, 2:15 a.m. UTC
From: Stanley Chu <stanley.chu@mediatek.com>

In MCQ mode, when a device command uses a hardware queue shared
with other commands, a race condition may occur in the following scenario:

1. A device command is completed in CQx with CQE entry "e".
2. The interrupt handler copies the "cqe" pointer to "hba->dev_cmd.cqe"
   and completes "hba->dev_cmd.complete".
3. The "ufshcd_wait_for_dev_cmd()" function is awakened and retrieves
   the OCS value from "hba->dev_cmd.cqe".

However, there is a possibility that the CQE entry "e" will be overwritten
by newly completed commands in CQx, resulting in an incorrect OCS value
being received by "ufshcd_wait_for_dev_cmd()".

To avoid this race condition, the OCS value should be immediately copied
to the struct "lrb" of the device command. Then "ufshcd_wait_for_dev_cmd()"
can retrieve the OCS value from the struct "lrb".

Fixes: b5167638ec82 ("scsi: ufs: core: mcq: Add support to allocate multiple queues")
Suggested-by: Can Guo <quic_cang@quicinc.com>
Signed-off-by: Stanley Chu <stanley.chu@mediatek.com>
Tested-by: Po-Wen Kao <powen.kao@mediatek.com>
---
 drivers/ufs/core/ufshcd.c | 10 +++++++---
 include/ufs/ufshcd.h      |  1 -
 2 files changed, 7 insertions(+), 4 deletions(-)

Comments

Bart Van Assche June 11, 2023, 1:59 p.m. UTC | #1
On 6/9/23 19:15, Po-Wen Kao wrote:
> From: Stanley Chu <stanley.chu@mediatek.com>
> 
> In MCQ mode, when a device command uses a hardware queue shared
> with other commands, a race condition may occur in the following scenario:
> 
> 1. A device command is completed in CQx with CQE entry "e".
> 2. The interrupt handler copies the "cqe" pointer to "hba->dev_cmd.cqe"
>     and completes "hba->dev_cmd.complete".
> 3. The "ufshcd_wait_for_dev_cmd()" function is awakened and retrieves
>     the OCS value from "hba->dev_cmd.cqe".
> 
> However, there is a possibility that the CQE entry "e" will be overwritten
> by newly completed commands in CQx, resulting in an incorrect OCS value
> being received by "ufshcd_wait_for_dev_cmd()".
> 
> To avoid this race condition, the OCS value should be immediately copied
> to the struct "lrb" of the device command. Then "ufshcd_wait_for_dev_cmd()"
> can retrieve the OCS value from the struct "lrb".

Since with this patch applied ufs_dev_cmd.cqe is always NULL, please 
remove the 'cqe' member from struct ufs_dev_cmd.

Thanks,

Bart.
Stanley Jhu June 11, 2023, 2:49 p.m. UTC | #2
Hi Bart,

On Sun, Jun 11, 2023 at 10:04 PM Bart Van Assche <bvanassche@acm.org> wrote:
>
> On 6/9/23 19:15, Po-Wen Kao wrote:
> > From: Stanley Chu <stanley.chu@mediatek.com>
> >
> > In MCQ mode, when a device command uses a hardware queue shared
> > with other commands, a race condition may occur in the following scenario:
> >
> > 1. A device command is completed in CQx with CQE entry "e".
> > 2. The interrupt handler copies the "cqe" pointer to "hba->dev_cmd.cqe"
> >     and completes "hba->dev_cmd.complete".
> > 3. The "ufshcd_wait_for_dev_cmd()" function is awakened and retrieves
> >     the OCS value from "hba->dev_cmd.cqe".
> >
> > However, there is a possibility that the CQE entry "e" will be overwritten
> > by newly completed commands in CQx, resulting in an incorrect OCS value
> > being received by "ufshcd_wait_for_dev_cmd()".
> >
> > To avoid this race condition, the OCS value should be immediately copied
> > to the struct "lrb" of the device command. Then "ufshcd_wait_for_dev_cmd()"
> > can retrieve the OCS value from the struct "lrb".
>
> Since with this patch applied ufs_dev_cmd.cqe is always NULL, please
> remove the 'cqe' member from struct ufs_dev_cmd.

This patch removes the 'cqe' member from the struct ufs_dev_cmd, as
shown in the code section below. Please let us know if anything else
has been missed. Very appreciated.

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 9b2d1859f885..602615e6d1bf 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -225,7 +225,6 @@ struct ufs_dev_cmd {
        struct mutex lock;
        struct completion *complete;
        struct ufs_query query;
-       struct cq_entry *cqe;
 };

Thanks,

Stanley
Bart Van Assche June 11, 2023, 3:19 p.m. UTC | #3
On 6/11/23 07:49, Stanley Chu wrote:
> This patch removes the 'cqe' member from the struct ufs_dev_cmd, as
> shown in the code section below. Please let us know if anything else
> has been missed. Very appreciated.
> 
> diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
> index 9b2d1859f885..602615e6d1bf 100644
> --- a/include/ufs/ufshcd.h
> +++ b/include/ufs/ufshcd.h
> @@ -225,7 +225,6 @@ struct ufs_dev_cmd {
>          struct mutex lock;
>          struct completion *complete;
>          struct ufs_query query;
> -       struct cq_entry *cqe;
>   };

I misread the patch. Since the patch looks fine to me after having taken 
a second look:

Reviewed-by: Bart Van Assche <bvanassche@acm.org>
diff mbox series

Patch

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 5da62248ebc4..593790fa4837 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -3086,7 +3086,7 @@  static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
 		 * not trigger any race conditions.
 		 */
 		hba->dev_cmd.complete = NULL;
-		err = ufshcd_get_tr_ocs(lrbp, hba->dev_cmd.cqe);
+		err = ufshcd_get_tr_ocs(lrbp, NULL);
 		if (!err)
 			err = ufshcd_dev_cmd_completion(hba, lrbp);
 	} else {
@@ -3182,7 +3182,6 @@  static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 		goto out;
 
 	hba->dev_cmd.complete = &wait;
-	hba->dev_cmd.cqe = NULL;
 
 	ufshcd_add_query_upiu_trace(hba, UFS_QUERY_SEND, lrbp->ucd_req_ptr);
 
@@ -5431,6 +5430,7 @@  void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag,
 {
 	struct ufshcd_lrb *lrbp;
 	struct scsi_cmnd *cmd;
+	enum utp_ocs ocs;
 
 	lrbp = &hba->lrb[task_tag];
 	lrbp->compl_time_stamp = ktime_get();
@@ -5446,7 +5446,11 @@  void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag,
 	} else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE ||
 		   lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE) {
 		if (hba->dev_cmd.complete) {
-			hba->dev_cmd.cqe = cqe;
+			if (cqe) {
+				ocs = le32_to_cpu(cqe->status) & MASK_OCS;
+				lrbp->utr_descriptor_ptr->header.dword_2 =
+					cpu_to_le32(ocs);
+			}
 			complete(hba->dev_cmd.complete);
 			ufshcd_clk_scaling_update_busy(hba);
 		}
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 9b2d1859f885..602615e6d1bf 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -225,7 +225,6 @@  struct ufs_dev_cmd {
 	struct mutex lock;
 	struct completion *complete;
 	struct ufs_query query;
-	struct cq_entry *cqe;
 };
 
 /**