diff mbox series

[v3,14/18] scsi: ufs: Fix the SCSI abort handler

Message ID 20210722033439.26550-15-bvanassche@acm.org (mailing list archive)
State Accepted
Headers show
Series UFS patches for kernel v5.15 | expand

Commit Message

Bart Van Assche July 22, 2021, 3:34 a.m. UTC
Make the following changes in ufshcd_abort():
- Return FAILED instead of SUCCESS if the abort handler notices that a SCSI
  command has already been completed. Returning SUCCESS in this case
  triggers a use-after-free and may trigger a kernel crash.
- Fix the code for aborting SCSI commands submitted to a WLUN.

The current approach for aborting SCSI commands that have been submitted to
a WLUN and that timed out is as follows:
- Report to the SCSI core that the command has completed successfully.
  Let the block layer free any data buffers associated with the command.
- Mark the command as outstanding in 'outstanding_reqs'.
- If the block layer tries to reuse the tag associated with the aborted
  command, busy-wait until the tag is freed.

This approach can result in:
- Memory corruption if the controller accesses the data buffer after the
  block layer has freed the associated data buffers.
- A race condition if ufshcd_queuecommand() or ufshcd_exec_dev_cmd()
  checks the bit that corresponds to an aborted command in 'outstanding_reqs'
  after it has been cleared and before it is reset.
- High energy consumption if ufshcd_queuecommand() repeatedly returns
  SCSI_MLQUEUE_HOST_BUSY.

Fix this by reporting to the SCSI error handler that aborting a SCSI
command failed if the SCSI command was submitted to a WLUN.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Can Guo <cang@codeaurora.org>
Cc: Asutosh Das <asutoshd@codeaurora.org>
Cc: Avri Altman <avri.altman@wdc.com>
Fixes: 7a7e66c65d41 ("scsi: ufs: Fix a race condition between ufshcd_abort() and eh_work()")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/ufs/ufshcd.c | 54 ++++++++++++++-------------------------
 1 file changed, 19 insertions(+), 35 deletions(-)

Comments

Bean Huo Aug. 2, 2021, 1:15 p.m. UTC | #1
On Wed, 2021-07-21 at 20:34 -0700, Bart Van Assche wrote:
> Make the following changes in ufshcd_abort():
> - Return FAILED instead of SUCCESS if the abort handler notices that
> a SCSI
>   command has already been completed. Returning SUCCESS in this case
>   triggers a use-after-free and may trigger a kernel crash.
> - Fix the code for aborting SCSI commands submitted to a WLUN.
> 
> The current approach for aborting SCSI commands that have been
> submitted to
> a WLUN and that timed out is as follows:
> - Report to the SCSI core that the command has completed
> successfully.
>   Let the block layer free any data buffers associated with the
> command.
> - Mark the command as outstanding in 'outstanding_reqs'.
> - If the block layer tries to reuse the tag associated with the
> aborted
>   command, busy-wait until the tag is freed.
> 
> This approach can result in:
> - Memory corruption if the controller accesses the data buffer after
> the
>   block layer has freed the associated data buffers.
> - A race condition if ufshcd_queuecommand() or ufshcd_exec_dev_cmd()
>   checks the bit that corresponds to an aborted command in
> 'outstanding_reqs'
>   after it has been cleared and before it is reset.
> - High energy consumption if ufshcd_queuecommand() repeatedly returns
>   SCSI_MLQUEUE_HOST_BUSY.
> 
> Fix this by reporting to the SCSI error handler that aborting a SCSI
> command failed if the SCSI command was submitted to a WLUN.
> 
> Cc: Adrian Hunter <adrian.hunter@intel.com>
> Cc: Stanley Chu <stanley.chu@mediatek.com>
> Cc: Can Guo <cang@codeaurora.org>
> Cc: Asutosh Das <asutoshd@codeaurora.org>
> Cc: Avri Altman <avri.altman@wdc.com>
> Fixes: 7a7e66c65d41 ("scsi: ufs: Fix a race condition between
> ufshcd_abort() and eh_work()")
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> 


Reviewed-by: Bean Huo <beanhuo@micron.com>
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index a3ad83a3bae0..c35e101c5834 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2724,15 +2724,6 @@  static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	WARN_ON(ufshcd_is_clkgating_allowed(hba) &&
 		(hba->clk_gating.state != CLKS_ON));
 
-	if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
-		if (hba->pm_op_in_progress)
-			set_host_byte(cmd, DID_BAD_TARGET);
-		else
-			err = SCSI_MLQUEUE_HOST_BUSY;
-		ufshcd_release(hba);
-		goto out;
-	}
-
 	lrbp = &hba->lrb[tag];
 	WARN_ON(lrbp->cmd);
 	lrbp->cmd = cmd;
@@ -2929,11 +2920,6 @@  static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 	req->timeout = msecs_to_jiffies(2 * timeout);
 	blk_mq_start_request(req);
 
-	if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
-		err = -EBUSY;
-		goto out;
-	}
-
 	lrbp = &hba->lrb[tag];
 	WARN_ON(lrbp->cmd);
 	err = ufshcd_compose_dev_cmd(hba, lrbp, cmd_type, tag);
@@ -6922,19 +6908,19 @@  static int ufshcd_abort(struct scsi_cmnd *cmd)
 	unsigned int tag = cmd->request->tag;
 	struct ufshcd_lrb *lrbp = &hba->lrb[tag];
 	unsigned long flags;
-	int err = 0;
+	int err = FAILED;
 	u32 reg;
 
 	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
 
 	ufshcd_hold(hba, false);
 	reg = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-	/* If command is already aborted/completed, return SUCCESS */
+	/* If command is already aborted/completed, return FAILED. */
 	if (!(test_bit(tag, &hba->outstanding_reqs))) {
 		dev_err(hba->dev,
 			"%s: cmd at tag %d already completed, outstanding=0x%lx, doorbell=0x%x\n",
 			__func__, tag, hba->outstanding_reqs, reg);
-		goto out;
+		goto release;
 	}
 
 	/* Print Transfer Request of aborted task */
@@ -6963,7 +6949,8 @@  static int ufshcd_abort(struct scsi_cmnd *cmd)
 		dev_err(hba->dev,
 		"%s: cmd was completed, but without a notifying intr, tag = %d",
 		__func__, tag);
-		goto cleanup;
+		__ufshcd_transfer_req_compl(hba, 1UL << tag);
+		goto release;
 	}
 
 	/*
@@ -6976,36 +6963,33 @@  static int ufshcd_abort(struct scsi_cmnd *cmd)
 	 */
 	if (lrbp->lun == UFS_UPIU_UFS_DEVICE_WLUN) {
 		ufshcd_update_evt_hist(hba, UFS_EVT_ABORT, lrbp->lun);
-		__ufshcd_transfer_req_compl(hba, (1UL << tag));
-		set_bit(tag, &hba->outstanding_reqs);
+
 		spin_lock_irqsave(host->host_lock, flags);
 		hba->force_reset = true;
 		ufshcd_schedule_eh_work(hba);
 		spin_unlock_irqrestore(host->host_lock, flags);
-		goto out;
+		goto release;
 	}
 
 	/* Skip task abort in case previous aborts failed and report failure */
-	if (lrbp->req_abort_skip)
-		err = -EIO;
-	else
-		err = ufshcd_try_to_abort_task(hba, tag);
+	if (lrbp->req_abort_skip) {
+		dev_err(hba->dev, "%s: skipping abort\n", __func__);
+		ufshcd_set_req_abort_skip(hba, hba->outstanding_reqs);
+		goto release;
+	}
 
-	if (!err) {
-cleanup:
-		__ufshcd_transfer_req_compl(hba, (1UL << tag));
-out:
-		err = SUCCESS;
-	} else {
+	err = ufshcd_try_to_abort_task(hba, tag);
+	if (err) {
 		dev_err(hba->dev, "%s: failed with err %d\n", __func__, err);
 		ufshcd_set_req_abort_skip(hba, hba->outstanding_reqs);
 		err = FAILED;
+		goto release;
 	}
 
-	/*
-	 * This ufshcd_release() corresponds to the original scsi cmd that got
-	 * aborted here (as we won't get any IRQ for it).
-	 */
+	err = SUCCESS;
+
+release:
+	/* Matches the ufshcd_hold() call at the start of this function. */
 	ufshcd_release(hba);
 	return err;
 }