diff mbox series

scsi: ufs: Fix a race between the interrupt handler and the reset handler

Message ID 20220610232915.2916712-1-bvanassche@acm.org (mailing list archive)
State Superseded
Headers show
Series scsi: ufs: Fix a race between the interrupt handler and the reset handler | expand

Commit Message

Bart Van Assche June 10, 2022, 11:29 p.m. UTC
Prevent that both the interrupt handler and the reset handler try to
complete a request at the same time. This patch is the result of the
analysis of the following crash:

Unable to handle kernel NULL pointer dereference at virtual address 0000000000000120
CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           OE     5.10.107-android13-4-00051-g1e48e8970cca-ab8664745 #1
pc : ufshcd_release_scsi_cmd+0x30/0x46c
lr : __ufshcd_transfer_req_compl+0x4fc/0x9c0
Call trace:
 ufshcd_release_scsi_cmd+0x30/0x46c
 __ufshcd_transfer_req_compl+0x4fc/0x9c0
 ufshcd_poll+0xf0/0x208
 ufshcd_sl_intr+0xb8/0xf0
 ufshcd_intr+0x168/0x2f4
 __handle_irq_event_percpu+0xa0/0x30c
 handle_irq_event+0x84/0x178
 handle_fasteoi_irq+0x150/0x2e8
 __handle_domain_irq+0x114/0x1e4
 gic_handle_irq.31846+0x58/0x300
 el1_irq+0xe4/0x1c0
 cpuidle_enter_state+0x3ac/0x8c4
 do_idle+0x2fc/0x55c
 cpu_startup_entry+0x84/0x90
 kernel_init+0x0/0x310
 start_kernel+0x0/0x608
 start_kernel+0x4ec/0x608

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/ufs/ufshcd.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

Comments

Adrian Hunter June 13, 2022, 6:28 a.m. UTC | #1
On 11/06/22 02:29, Bart Van Assche wrote:
> Prevent that both the interrupt handler and the reset handler try to
> complete a request at the same time. This patch is the result of the
> analysis of the following crash:
> 
> Unable to handle kernel NULL pointer dereference at virtual address 0000000000000120
> CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           OE     5.10.107-android13-4-00051-g1e48e8970cca-ab8664745 #1
> pc : ufshcd_release_scsi_cmd+0x30/0x46c
> lr : __ufshcd_transfer_req_compl+0x4fc/0x9c0
> Call trace:
>  ufshcd_release_scsi_cmd+0x30/0x46c
>  __ufshcd_transfer_req_compl+0x4fc/0x9c0
>  ufshcd_poll+0xf0/0x208
>  ufshcd_sl_intr+0xb8/0xf0
>  ufshcd_intr+0x168/0x2f4
>  __handle_irq_event_percpu+0xa0/0x30c
>  handle_irq_event+0x84/0x178
>  handle_fasteoi_irq+0x150/0x2e8
>  __handle_domain_irq+0x114/0x1e4
>  gic_handle_irq.31846+0x58/0x300
>  el1_irq+0xe4/0x1c0
>  cpuidle_enter_state+0x3ac/0x8c4
>  do_idle+0x2fc/0x55c
>  cpu_startup_entry+0x84/0x90
>  kernel_init+0x0/0x310
>  start_kernel+0x0/0x608
>  start_kernel+0x4ec/0x608
> 
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 20 +++++++++++++-------
>  1 file changed, 13 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 1fb3a8b9b03e..279691ff3562 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -6966,6 +6966,7 @@ int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba,
>   */
>  static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
>  {
> +	unsigned long flags, completed_reqs = 0;
>  	struct Scsi_Host *host;
>  	struct ufs_hba *hba;
>  	u32 pos;
> @@ -6984,13 +6985,18 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
>  	}
>  
>  	/* clear the commands that were pending for corresponding LUN */
> -	for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) {
> -		if (hba->lrb[pos].lun == lun) {
> -			err = ufshcd_clear_cmd(hba, pos);
> -			if (err)
> -				break;
> -			__ufshcd_transfer_req_compl(hba, 1U << pos);
> -		}
> +	spin_lock_irqsave(&hba->outstanding_lock, flags);
> +	for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs)
> +		if (hba->lrb[pos].lun == lun)
> +			__set_bit(pos, &completed_reqs);
> +	hba->outstanding_reqs &= ~completed_reqs;
> +	spin_unlock_irqrestore(&hba->outstanding_lock, flags);
> +
> +	for_each_set_bit(pos, &completed_reqs, hba->nutrs) {
> +		err = ufshcd_clear_cmd(hba, pos);
> +		if (err)
> +			break;

Having cleared the bit in hba->outstanding_reqs, shouldn't we
always complete the request? i.e. we should not 'break' here

> +		__ufshcd_transfer_req_compl(hba, 1U << pos);
>  	}
>  
>  out:
Bart Van Assche June 13, 2022, 2:24 p.m. UTC | #2
On 6/12/22 23:28, Adrian Hunter wrote:
> Having cleared the bit in hba->outstanding_reqs, shouldn't we
> always complete the request? i.e. we should not 'break' here

Agreed. I will fix this, retest and repost.

Thanks,

Bart.
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 1fb3a8b9b03e..279691ff3562 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -6966,6 +6966,7 @@  int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba,
  */
 static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
 {
+	unsigned long flags, completed_reqs = 0;
 	struct Scsi_Host *host;
 	struct ufs_hba *hba;
 	u32 pos;
@@ -6984,13 +6985,18 @@  static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
 	}
 
 	/* clear the commands that were pending for corresponding LUN */
-	for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) {
-		if (hba->lrb[pos].lun == lun) {
-			err = ufshcd_clear_cmd(hba, pos);
-			if (err)
-				break;
-			__ufshcd_transfer_req_compl(hba, 1U << pos);
-		}
+	spin_lock_irqsave(&hba->outstanding_lock, flags);
+	for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs)
+		if (hba->lrb[pos].lun == lun)
+			__set_bit(pos, &completed_reqs);
+	hba->outstanding_reqs &= ~completed_reqs;
+	spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+
+	for_each_set_bit(pos, &completed_reqs, hba->nutrs) {
+		err = ufshcd_clear_cmd(hba, pos);
+		if (err)
+			break;
+		__ufshcd_transfer_req_compl(hba, 1U << pos);
 	}
 
 out: