diff mbox

[V5,2/4] scsi: ufs: Fix hardware race conditions while aborting a command

Message ID 1375163155-1473-3-git-send-email-sthumma@codeaurora.org (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Sujit Reddy Thumma July 30, 2013, 5:45 a.m. UTC
There is a possible race condition in the hardware when the abort
command is issued to terminate the ongoing SCSI command as described
below:

- A bit in the door-bell register is set in the controller for a
  new SCSI command.
- In some rare situations, before controller get a chance to issue
  the command to the device, the software issued an abort command.
- If the device recieves abort command first then it returns success
  because the command itself is not present.
- Now if the controller commits the command to device it will be
  processed.
- Software thinks that command is aborted and proceed while still
  the device is processing it.
- The software, controller and device may go out of sync because of
  this race condition.

To avoid this, query task presence in the device before sending abort
task command so that after the abort operation, the command is guaranteed
to be non-existent in both controller and the device.

Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
---
 drivers/scsi/ufs/ufshcd.c |   70 +++++++++++++++++++++++++++++++++++---------
 1 files changed, 55 insertions(+), 15 deletions(-)

Comments

Dolev Raviv Aug. 12, 2013, 1:01 p.m. UTC | #1
Tested-by: Dolev Raviv <draviv@codeaurora.org>

> There is a possible race condition in the hardware when the abort
> command is issued to terminate the ongoing SCSI command as described
> below:
>
> - A bit in the door-bell register is set in the controller for a
>   new SCSI command.
> - In some rare situations, before controller get a chance to issue
>   the command to the device, the software issued an abort command.
> - If the device recieves abort command first then it returns success
>   because the command itself is not present.
> - Now if the controller commits the command to device it will be
>   processed.
> - Software thinks that command is aborted and proceed while still
>   the device is processing it.
> - The software, controller and device may go out of sync because of
>   this race condition.
>
> To avoid this, query task presence in the device before sending abort
> task command so that after the abort operation, the command is guaranteed
> to be non-existent in both controller and the device.
>
> Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
> ---
>  drivers/scsi/ufs/ufshcd.c |   70
> +++++++++++++++++++++++++++++++++++---------
>  1 files changed, 55 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index d7f3746..d4ee48d 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -2485,6 +2485,12 @@ static int ufshcd_host_reset(struct scsi_cmnd *cmd)
>   * ufshcd_abort - abort a specific command
>   * @cmd: SCSI command pointer
>   *
> + * Abort the pending command in device by sending UFS_ABORT_TASK task
> management
> + * command, and in host controller by clearing the door-bell register.
> There can
> + * be race between controller sending the command to the device while
> abort is
> + * issued. To avoid that, first issue UFS_QUERY_TASK to check if the
> command is
> + * really issued and then try to abort it.
> + *
>   * Returns SUCCESS/FAILED
>   */
>  static int ufshcd_abort(struct scsi_cmnd *cmd)
> @@ -2493,7 +2499,8 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
>  	struct ufs_hba *hba;
>  	unsigned long flags;
>  	unsigned int tag;
> -	int err;
> +	int err = 0;
> +	int poll_cnt;
>  	u8 resp = 0xF;
>  	struct ufshcd_lrb *lrbp;
>
> @@ -2501,33 +2508,59 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
>  	hba = shost_priv(host);
>  	tag = cmd->request->tag;
>
> -	spin_lock_irqsave(host->host_lock, flags);
> +	/* If command is already aborted/completed, return SUCCESS */
> +	if (!(test_bit(tag, &hba->outstanding_reqs)))
> +		goto out;
>
> -	/* check if command is still pending */
> -	if (!(test_bit(tag, &hba->outstanding_reqs))) {
> -		err = FAILED;
> -		spin_unlock_irqrestore(host->host_lock, flags);
> +	lrbp = &hba->lrb[tag];
> +	for (poll_cnt = 100; poll_cnt; poll_cnt--) {
> +		err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
> +				UFS_QUERY_TASK, &resp);
> +		if (!err && resp == UPIU_TASK_MANAGEMENT_FUNC_SUCCEEDED) {
> +			/* cmd pending in the device */
> +			break;
> +		} else if (!err && resp == UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
> +			u32 reg;
> +
> +			/*
> +			 * cmd not pending in the device, check if it is
> +			 * in transition.
> +			 */
> +			reg = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
> +			if (reg & (1 << tag)) {
> +				/* sleep for max. 2ms to stabilize */
> +				usleep_range(1000, 2000);
> +				continue;
> +			}
> +			/* command completed already */
> +			goto out;
> +		} else {
> +			if (!err)
> +				err = resp; /* service response error */
> +			goto out;
> +		}
> +	}
> +
> +	if (!poll_cnt) {
> +		err = -EBUSY;
>  		goto out;
>  	}
> -	spin_unlock_irqrestore(host->host_lock, flags);
>
> -	lrbp = &hba->lrb[tag];
>  	err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
>  			UFS_ABORT_TASK, &resp);
>  	if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
> -		err = FAILED;
> +		if (!err)
> +			err = resp; /* service response error */
>  		goto out;
> -	} else {
> -		err = SUCCESS;
>  	}
>
> +	err = ufshcd_clear_cmd(hba, tag);
> +	if (err)
> +		goto out;
> +
>  	scsi_dma_unmap(cmd);
>
>  	spin_lock_irqsave(host->host_lock, flags);
> -
> -	/* clear the respective UTRLCLR register bit */
> -	ufshcd_utrl_clear(hba, tag);
> -
>  	__clear_bit(tag, &hba->outstanding_reqs);
>  	hba->lrb[tag].cmd = NULL;
>  	spin_unlock_irqrestore(host->host_lock, flags);
> @@ -2535,6 +2568,13 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
>  	clear_bit_unlock(tag, &hba->lrb_in_use);
>  	wake_up(&hba->dev_cmd.tag_wq);
>  out:
> +	if (!err) {
> +		err = SUCCESS;
> +	} else {
> +		dev_err(hba->dev, "%s: failed with err %d\n", __func__, err);
> +		err = FAILED;
> +	}
> +
>  	return err;
>  }
>
> --
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
> of Code Aurora Forum, hosted by The Linux Foundation.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Yaniv Gardi Aug. 13, 2013, 2:56 p.m. UTC | #2
Reviewed-by: Yaniv Gardi <ygardi@codeaurora.org>

QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation


= > -----Original Message-----
= > From: linux-scsi-owner@vger.kernel.org [mailto:linux-scsi-
= > owner@vger.kernel.org] On Behalf Of Dolev Raviv
= > Sent: Monday, August 12, 2013 4:02 PM
= > To: Sujit Reddy Thumma
= > Cc: Vinayak Holikatti; Santosh Y; James E.J. Bottomley; linux-
= > scsi@vger.kernel.org; Sujit Reddy Thumma; linux-arm-
= > msm@vger.kernel.org
= > Subject: Re: [PATCH V5 2/4] scsi: ufs: Fix hardware race conditions
while
= > aborting a command
= > 
= > Tested-by: Dolev Raviv <draviv@codeaurora.org>
= > 
= > > There is a possible race condition in the hardware when the abort
= > > command is issued to terminate the ongoing SCSI command as described
= > > below:
= > >
= > > - A bit in the door-bell register is set in the controller for a
= > >   new SCSI command.
= > > - In some rare situations, before controller get a chance to issue
= > >   the command to the device, the software issued an abort command.
= > > - If the device recieves abort command first then it returns success
= > >   because the command itself is not present.
= > > - Now if the controller commits the command to device it will be
= > >   processed.
= > > - Software thinks that command is aborted and proceed while still
= > >   the device is processing it.
= > > - The software, controller and device may go out of sync because of
= > >   this race condition.
= > >
= > > To avoid this, query task presence in the device before sending abort
= > > task command so that after the abort operation, the command is
= > > guaranteed to be non-existent in both controller and the device.
= > >
= > > Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
= > > ---
= > >  drivers/scsi/ufs/ufshcd.c |   70
= > > +++++++++++++++++++++++++++++++++++---------
= > >  1 files changed, 55 insertions(+), 15 deletions(-)
= > >
= > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
= > > index d7f3746..d4ee48d 100644
= > > --- a/drivers/scsi/ufs/ufshcd.c
= > > +++ b/drivers/scsi/ufs/ufshcd.c
= > > @@ -2485,6 +2485,12 @@ static int ufshcd_host_reset(struct scsi_cmnd
= > *cmd)
= > >   * ufshcd_abort - abort a specific command
= > >   * @cmd: SCSI command pointer
= > >   *
= > > + * Abort the pending command in device by sending UFS_ABORT_TASK
= > task
= > > management
= > > + * command, and in host controller by clearing the door-bell
register.
= > > There can
= > > + * be race between controller sending the command to the device while
= > > abort is
= > > + * issued. To avoid that, first issue UFS_QUERY_TASK to check if the
= > > command is
= > > + * really issued and then try to abort it.
= > > + *
= > >   * Returns SUCCESS/FAILED
= > >   */
= > >  static int ufshcd_abort(struct scsi_cmnd *cmd) @@ -2493,7 +2499,8
= > @@
= > > static int ufshcd_abort(struct scsi_cmnd *cmd)
= > >  	struct ufs_hba *hba;
= > >  	unsigned long flags;
= > >  	unsigned int tag;
= > > -	int err;
= > > +	int err = 0;
= > > +	int poll_cnt;
= > >  	u8 resp = 0xF;
= > >  	struct ufshcd_lrb *lrbp;
= > >
= > > @@ -2501,33 +2508,59 @@ static int ufshcd_abort(struct scsi_cmnd
= > *cmd)
= > >  	hba = shost_priv(host);
= > >  	tag = cmd->request->tag;
= > >
= > > -	spin_lock_irqsave(host->host_lock, flags);
= > > +	/* If command is already aborted/completed, return SUCCESS */
= > > +	if (!(test_bit(tag, &hba->outstanding_reqs)))
= > > +		goto out;
= > >
= > > -	/* check if command is still pending */
= > > -	if (!(test_bit(tag, &hba->outstanding_reqs))) {
= > > -		err = FAILED;
= > > -		spin_unlock_irqrestore(host->host_lock, flags);
= > > +	lrbp = &hba->lrb[tag];
= > > +	for (poll_cnt = 100; poll_cnt; poll_cnt--) {
= > > +		err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
= > > +				UFS_QUERY_TASK, &resp);
= > > +		if (!err && resp ==
= > UPIU_TASK_MANAGEMENT_FUNC_SUCCEEDED) {
= > > +			/* cmd pending in the device */
= > > +			break;
= > > +		} else if (!err && resp ==
= > UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
= > > +			u32 reg;
= > > +
= > > +			/*
= > > +			 * cmd not pending in the device, check if it is
= > > +			 * in transition.
= > > +			 */
= > > +			reg = ufshcd_readl(hba,
= > REG_UTP_TRANSFER_REQ_DOOR_BELL);
= > > +			if (reg & (1 << tag)) {
= > > +				/* sleep for max. 2ms to stabilize */
= > > +				usleep_range(1000, 2000);
= > > +				continue;
= > > +			}
= > > +			/* command completed already */
= > > +			goto out;
= > > +		} else {
= > > +			if (!err)
= > > +				err = resp; /* service response error */
= > > +			goto out;
= > > +		}
= > > +	}
= > > +
= > > +	if (!poll_cnt) {
= > > +		err = -EBUSY;
= > >  		goto out;
= > >  	}
= > > -	spin_unlock_irqrestore(host->host_lock, flags);
= > >
= > > -	lrbp = &hba->lrb[tag];
= > >  	err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
= > >  			UFS_ABORT_TASK, &resp);
= > >  	if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
= > > -		err = FAILED;
= > > +		if (!err)
= > > +			err = resp; /* service response error */
= > >  		goto out;
= > > -	} else {
= > > -		err = SUCCESS;
= > >  	}
= > >
= > > +	err = ufshcd_clear_cmd(hba, tag);
= > > +	if (err)
= > > +		goto out;
= > > +
= > >  	scsi_dma_unmap(cmd);
= > >
= > >  	spin_lock_irqsave(host->host_lock, flags);
= > > -
= > > -	/* clear the respective UTRLCLR register bit */
= > > -	ufshcd_utrl_clear(hba, tag);
= > > -
= > >  	__clear_bit(tag, &hba->outstanding_reqs);
= > >  	hba->lrb[tag].cmd = NULL;
= > >  	spin_unlock_irqrestore(host->host_lock, flags); @@ -2535,6
= > +2568,13
= > > @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
= > >  	clear_bit_unlock(tag, &hba->lrb_in_use);
= > >  	wake_up(&hba->dev_cmd.tag_wq);
= > >  out:
= > > +	if (!err) {
= > > +		err = SUCCESS;
= > > +	} else {
= > > +		dev_err(hba->dev, "%s: failed with err %d\n", __func__,
= > err);
= > > +		err = FAILED;
= > > +	}
= > > +
= > >  	return err;
= > >  }
= > >
= > > --
= > > QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a
= > > member of Code Aurora Forum, hosted by The Linux Foundation.
= > >
= > > --
= > > To unsubscribe from this list: send the line "unsubscribe linux-scsi"
= > > in the body of a message to majordomo@vger.kernel.org More
= > majordomo
= > > info at  http://vger.kernel.org/majordomo-info.html
= > >
= > 
= > 
= > --
= > QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center, Inc. is a
= > member of Code Aurora Forum, hosted by The Linux Foundation
= > 
= > --
= > To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the
= > body of a message to majordomo@vger.kernel.org More majordomo info
= > at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index d7f3746..d4ee48d 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2485,6 +2485,12 @@  static int ufshcd_host_reset(struct scsi_cmnd *cmd)
  * ufshcd_abort - abort a specific command
  * @cmd: SCSI command pointer
  *
+ * Abort the pending command in device by sending UFS_ABORT_TASK task management
+ * command, and in host controller by clearing the door-bell register. There can
+ * be race between controller sending the command to the device while abort is
+ * issued. To avoid that, first issue UFS_QUERY_TASK to check if the command is
+ * really issued and then try to abort it.
+ *
  * Returns SUCCESS/FAILED
  */
 static int ufshcd_abort(struct scsi_cmnd *cmd)
@@ -2493,7 +2499,8 @@  static int ufshcd_abort(struct scsi_cmnd *cmd)
 	struct ufs_hba *hba;
 	unsigned long flags;
 	unsigned int tag;
-	int err;
+	int err = 0;
+	int poll_cnt;
 	u8 resp = 0xF;
 	struct ufshcd_lrb *lrbp;
 
@@ -2501,33 +2508,59 @@  static int ufshcd_abort(struct scsi_cmnd *cmd)
 	hba = shost_priv(host);
 	tag = cmd->request->tag;
 
-	spin_lock_irqsave(host->host_lock, flags);
+	/* If command is already aborted/completed, return SUCCESS */
+	if (!(test_bit(tag, &hba->outstanding_reqs)))
+		goto out;
 
-	/* check if command is still pending */
-	if (!(test_bit(tag, &hba->outstanding_reqs))) {
-		err = FAILED;
-		spin_unlock_irqrestore(host->host_lock, flags);
+	lrbp = &hba->lrb[tag];
+	for (poll_cnt = 100; poll_cnt; poll_cnt--) {
+		err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
+				UFS_QUERY_TASK, &resp);
+		if (!err && resp == UPIU_TASK_MANAGEMENT_FUNC_SUCCEEDED) {
+			/* cmd pending in the device */
+			break;
+		} else if (!err && resp == UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
+			u32 reg;
+
+			/*
+			 * cmd not pending in the device, check if it is
+			 * in transition.
+			 */
+			reg = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
+			if (reg & (1 << tag)) {
+				/* sleep for max. 2ms to stabilize */
+				usleep_range(1000, 2000);
+				continue;
+			}
+			/* command completed already */
+			goto out;
+		} else {
+			if (!err)
+				err = resp; /* service response error */
+			goto out;
+		}
+	}
+
+	if (!poll_cnt) {
+		err = -EBUSY;
 		goto out;
 	}
-	spin_unlock_irqrestore(host->host_lock, flags);
 
-	lrbp = &hba->lrb[tag];
 	err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
 			UFS_ABORT_TASK, &resp);
 	if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
-		err = FAILED;
+		if (!err)
+			err = resp; /* service response error */
 		goto out;
-	} else {
-		err = SUCCESS;
 	}
 
+	err = ufshcd_clear_cmd(hba, tag);
+	if (err)
+		goto out;
+
 	scsi_dma_unmap(cmd);
 
 	spin_lock_irqsave(host->host_lock, flags);
-
-	/* clear the respective UTRLCLR register bit */
-	ufshcd_utrl_clear(hba, tag);
-
 	__clear_bit(tag, &hba->outstanding_reqs);
 	hba->lrb[tag].cmd = NULL;
 	spin_unlock_irqrestore(host->host_lock, flags);
@@ -2535,6 +2568,13 @@  static int ufshcd_abort(struct scsi_cmnd *cmd)
 	clear_bit_unlock(tag, &hba->lrb_in_use);
 	wake_up(&hba->dev_cmd.tag_wq);
 out:
+	if (!err) {
+		err = SUCCESS;
+	} else {
+		dev_err(hba->dev, "%s: failed with err %d\n", __func__, err);
+		err = FAILED;
+	}
+
 	return err;
 }