From patchwork Wed Aug 14 13:13:22 2013
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Sujit Reddy Thumma <sthumma@codeaurora.org>
X-Patchwork-Id: 2844495
Return-Path: <linux-arm-msm-owner@kernel.org>
X-Original-To: patchwork-linux-arm-msm@patchwork.kernel.org
Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org
Received: from mail.kernel.org (mail.kernel.org [198.145.19.201])
	by patchwork1.web.kernel.org (Postfix) with ESMTP id AA11C9F271
	for <patchwork-linux-arm-msm@patchwork.kernel.org>;
	Wed, 14 Aug 2013 13:14:03 +0000 (UTC)
Received: from mail.kernel.org (localhost [127.0.0.1])
	by mail.kernel.org (Postfix) with ESMTP id 1CC442059C
	for <patchwork-linux-arm-msm@patchwork.kernel.org>;
	Wed, 14 Aug 2013 13:14:02 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by mail.kernel.org (Postfix) with ESMTP id 427D82058B
	for <patchwork-linux-arm-msm@patchwork.kernel.org>;
	Wed, 14 Aug 2013 13:14:00 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S932545Ab3HNNN7 (ORCPT
	<rfc822;patchwork-linux-arm-msm@patchwork.kernel.org>);
	Wed, 14 Aug 2013 09:13:59 -0400
Received: from smtp.codeaurora.org ([198.145.11.231]:59899 "EHLO
	smtp.codeaurora.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S932477Ab3HNNN6 (ORCPT
	<rfc822;linux-arm-msm@vger.kernel.org>);
	Wed, 14 Aug 2013 09:13:58 -0400
Received: from smtp.codeaurora.org (localhost [127.0.0.1])
	by smtp.codeaurora.org (Postfix) with ESMTP id 002CB13EF10;
	Wed, 14 Aug 2013 13:13:58 +0000 (UTC)
Received: by smtp.codeaurora.org (Postfix, from userid 486)
	id E752313EF89; Wed, 14 Aug 2013 13:13:57 +0000 (UTC)
X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org
X-Spam-Level: 
X-Spam-Status: No, score=-9.7 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI,
	RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=ham version=3.3.1
Received: from srthumm-ics.qualcomm.com (unknown [202.46.23.54])
	(using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits))
	(No client certificate requested)
	(Authenticated sender: sthumma@smtp.codeaurora.org)
	by smtp.codeaurora.org (Postfix) with ESMTPSA id AFE5713EF83;
	Wed, 14 Aug 2013 13:13:54 +0000 (UTC)
From: Sujit Reddy Thumma <sthumma@codeaurora.org>
To: Vinayak Holikatti <vinholikatti@gmail.com>,
	Santosh Y <santoshsy@gmail.com>
Cc: "James E.J. Bottomley" <JBottomley@parallels.com>,
	linux-scsi@vger.kernel.org, Sujit Reddy Thumma <sthumma@codeaurora.org>,
	linux-arm-msm@vger.kernel.org
Subject: [PATCH V6 3/4] scsi: ufs: Fix device and host reset methods
Date: Wed, 14 Aug 2013 18:43:22 +0530
Message-Id: <1376486003-19925-4-git-send-email-sthumma@codeaurora.org>
X-Mailer: git-send-email 1.7.8.3
In-Reply-To: <1376486003-19925-1-git-send-email-sthumma@codeaurora.org>
References: <n>
 <1376486003-19925-1-git-send-email-sthumma@codeaurora.org>
X-Virus-Scanned: ClamAV using ClamSMTP
Sender: linux-arm-msm-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-arm-msm.vger.kernel.org>
X-Mailing-List: linux-arm-msm@vger.kernel.org
X-Virus-Scanned: ClamAV using ClamSMTP

As of now SCSI initiated error handling is broken because,
the reset APIs don't try to bring back the device initialized and
ready for further transfers.

In case of timeouts, the scsi error handler takes care of handling aborts
and resets. Improve the error handling in such scenario by resetting the
device and host and re-initializing them in proper manner.

Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Reviewed-by: Yaniv Gardi <ygardi@codeaurora.org>
Tested-by: Dolev Raviv <draviv@codeaurora.org>
---
 drivers/scsi/ufs/ufshcd.c |  240 +++++++++++++++++++++++++++++++++++----------
 drivers/scsi/ufs/ufshcd.h |    2 +
 2 files changed, 189 insertions(+), 53 deletions(-)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 24c5ab3..dfa61be 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -69,9 +69,14 @@ enum {
 
 /* UFSHCD states */
 enum {
-	UFSHCD_STATE_OPERATIONAL,
 	UFSHCD_STATE_RESET,
 	UFSHCD_STATE_ERROR,
+	UFSHCD_STATE_OPERATIONAL,
+};
+
+/* UFSHCD error handling flags */
+enum {
+	UFSHCD_EH_IN_PROGRESS = (1 << 0),
 };
 
 /* Interrupt configuration options */
@@ -87,6 +92,16 @@ enum {
 	INT_AGGR_CONFIG,
 };
 
+#define ufshcd_set_eh_in_progress(h) \
+	(h->eh_flags |= UFSHCD_EH_IN_PROGRESS)
+#define ufshcd_eh_in_progress(h) \
+	(h->eh_flags & UFSHCD_EH_IN_PROGRESS)
+#define ufshcd_clear_eh_in_progress(h) \
+	(h->eh_flags &= ~UFSHCD_EH_IN_PROGRESS)
+
+static void ufshcd_tmc_handler(struct ufs_hba *hba);
+static void ufshcd_async_scan(void *data, async_cookie_t cookie);
+
 /*
  * ufshcd_wait_for_register - wait for register value to change
  * @hba - per-adapter interface
@@ -840,10 +855,25 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 
 	tag = cmd->request->tag;
 
-	if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	switch (hba->ufshcd_state) {
+	case UFSHCD_STATE_OPERATIONAL:
+		break;
+	case UFSHCD_STATE_RESET:
 		err = SCSI_MLQUEUE_HOST_BUSY;
-		goto out;
+		goto out_unlock;
+	case UFSHCD_STATE_ERROR:
+		set_host_byte(cmd, DID_ERROR);
+		cmd->scsi_done(cmd);
+		goto out_unlock;
+	default:
+		dev_WARN_ONCE(hba->dev, 1, "%s: invalid state %d\n",
+				__func__, hba->ufshcd_state);
+		set_host_byte(cmd, DID_BAD_TARGET);
+		cmd->scsi_done(cmd);
+		goto out_unlock;
 	}
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
 	/* acquire the tag to make sure device cmds don't use it */
 	if (test_and_set_bit_lock(tag, &hba->lrb_in_use)) {
@@ -880,6 +910,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	/* issue command to the controller */
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	ufshcd_send_command(hba, tag);
+out_unlock:
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 out:
 	return err;
@@ -1495,8 +1526,6 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba)
 	if (hba->ufshcd_state == UFSHCD_STATE_RESET)
 		scsi_unblock_requests(hba->host);
 
-	hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
-
 out:
 	return err;
 }
@@ -2245,8 +2274,12 @@ static void ufshcd_err_handler(struct ufs_hba *hba)
 	}
 	return;
 fatal_eh:
-	hba->ufshcd_state = UFSHCD_STATE_ERROR;
-	schedule_work(&hba->feh_workq);
+	/* handle fatal errors only when link is functional */
+	if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
+		/* block commands at driver layer until error is handled */
+		hba->ufshcd_state = UFSHCD_STATE_ERROR;
+		schedule_work(&hba->feh_workq);
+	}
 }
 
 /**
@@ -2411,12 +2444,13 @@ static int ufshcd_issue_tm_cmd(struct ufs_hba *hba, int lun_id, int task_id,
 }
 
 /**
- * ufshcd_device_reset - reset device and abort all the pending commands
+ * ufshcd_eh_device_reset_handler - device reset handler registered to
+ *                                    scsi layer.
  * @cmd: SCSI command pointer
  *
  * Returns SUCCESS/FAILED
  */
-static int ufshcd_device_reset(struct scsi_cmnd *cmd)
+static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
 {
 	struct Scsi_Host *host;
 	struct ufs_hba *hba;
@@ -2425,6 +2459,7 @@ static int ufshcd_device_reset(struct scsi_cmnd *cmd)
 	int err;
 	u8 resp = 0xF;
 	struct ufshcd_lrb *lrbp;
+	unsigned long flags;
 
 	host = cmd->device->host;
 	hba = shost_priv(host);
@@ -2433,55 +2468,33 @@ static int ufshcd_device_reset(struct scsi_cmnd *cmd)
 	lrbp = &hba->lrb[tag];
 	err = ufshcd_issue_tm_cmd(hba, lrbp->lun, 0, UFS_LOGICAL_RESET, &resp);
 	if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
-		err = FAILED;
+		if (!err)
+			err = resp;
 		goto out;
-	} else {
-		err = SUCCESS;
 	}
 
-	for (pos = 0; pos < hba->nutrs; pos++) {
-		if (test_bit(pos, &hba->outstanding_reqs) &&
-		    (hba->lrb[tag].lun == hba->lrb[pos].lun)) {
-
-			/* clear the respective UTRLCLR register bit */
-			ufshcd_utrl_clear(hba, pos);
-
-			clear_bit(pos, &hba->outstanding_reqs);
-
-			if (hba->lrb[pos].cmd) {
-				scsi_dma_unmap(hba->lrb[pos].cmd);
-				hba->lrb[pos].cmd->result =
-					DID_ABORT << 16;
-				hba->lrb[pos].cmd->scsi_done(cmd);
-				hba->lrb[pos].cmd = NULL;
-				clear_bit_unlock(pos, &hba->lrb_in_use);
-				wake_up(&hba->dev_cmd.tag_wq);
-			}
+	/* clear the commands that were pending for corresponding LUN */
+	for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) {
+		if (hba->lrb[pos].lun == lrbp->lun) {
+			err = ufshcd_clear_cmd(hba, pos);
+			if (err)
+				break;
 		}
-	} /* end of for */
+	}
+	spin_lock_irqsave(host->host_lock, flags);
+	ufshcd_transfer_req_compl(hba);
+	spin_unlock_irqrestore(host->host_lock, flags);
 out:
+	if (!err) {
+		err = SUCCESS;
+	} else {
+		dev_err(hba->dev, "%s: failed with err %d\n", __func__, err);
+		err = FAILED;
+	}
 	return err;
 }
 
 /**
- * ufshcd_host_reset - Main reset function registered with scsi layer
- * @cmd: SCSI command pointer
- *
- * Returns SUCCESS/FAILED
- */
-static int ufshcd_host_reset(struct scsi_cmnd *cmd)
-{
-	struct ufs_hba *hba;
-
-	hba = shost_priv(cmd->device->host);
-
-	if (hba->ufshcd_state == UFSHCD_STATE_RESET)
-		return SUCCESS;
-
-	return ufshcd_do_reset(hba);
-}
-
-/**
  * ufshcd_abort - abort a specific command
  * @cmd: SCSI command pointer
  *
@@ -2579,6 +2592,122 @@ out:
 }
 
 /**
+ * ufshcd_host_reset_and_restore - reset and restore host controller
+ * @hba: per-adapter instance
+ *
+ * Note that host controller reset may issue DME_RESET to
+ * local and remote (device) Uni-Pro stack and the attributes
+ * are reset to default state.
+ *
+ * Returns zero on success, non-zero on failure
+ */
+static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
+{
+	int err;
+	async_cookie_t cookie;
+	unsigned long flags;
+
+	/* Reset the host controller */
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	ufshcd_hba_stop(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	err = ufshcd_hba_enable(hba);
+	if (err)
+		goto out;
+
+	/* Establish the link again and restore the device */
+	cookie = async_schedule(ufshcd_async_scan, hba);
+	/* wait for async scan to be completed */
+	async_synchronize_cookie(++cookie);
+	if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL)
+		err = -EIO;
+out:
+	if (err)
+		dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err);
+
+	return err;
+}
+
+/**
+ * ufshcd_reset_and_restore - reset and re-initialize host/device
+ * @hba: per-adapter instance
+ *
+ * Reset and recover device, host and re-establish link. This
+ * is helpful to recover the communication in fatal error conditions.
+ *
+ * Returns zero on success, non-zero on failure
+ */
+static int ufshcd_reset_and_restore(struct ufs_hba *hba)
+{
+	int err = 0;
+	unsigned long flags;
+
+	err = ufshcd_host_reset_and_restore(hba);
+
+	/*
+	 * After reset the door-bell might be cleared, complete
+	 * outstanding requests in s/w here.
+	 */
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	ufshcd_transfer_req_compl(hba);
+	ufshcd_tmc_handler(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	return err;
+}
+
+/**
+ * ufshcd_eh_host_reset_handler - host reset handler registered to scsi layer
+ * @cmd - SCSI command pointer
+ *
+ * Returns SUCCESS/FAILED
+ */
+static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
+{
+	int err;
+	unsigned long flags;
+	struct ufs_hba *hba;
+
+	hba = shost_priv(cmd->device->host);
+
+	/*
+	 * Check if there is any race with fatal error handling.
+	 * If so, wait for it to complete. Even though fatal error
+	 * handling does reset and restore in some cases, don't assume
+	 * anything out of it. We are just avoiding race here.
+	 */
+	do {
+		spin_lock_irqsave(hba->host->host_lock, flags);
+		if (!(work_pending(&hba->feh_workq) ||
+				hba->ufshcd_state == UFSHCD_STATE_RESET))
+			break;
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+		dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
+		flush_work_sync(&hba->feh_workq);
+	} while (1);
+
+	hba->ufshcd_state = UFSHCD_STATE_RESET;
+	ufshcd_set_eh_in_progress(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	err = ufshcd_reset_and_restore(hba);
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	if (!err) {
+		err = SUCCESS;
+		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+	} else {
+		err = FAILED;
+		hba->ufshcd_state = UFSHCD_STATE_ERROR;
+	}
+	ufshcd_clear_eh_in_progress(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	return err;
+}
+
+/**
  * ufshcd_async_scan - asynchronous execution for link startup
  * @data: data pointer to pass to this function
  * @cookie: cookie data
@@ -2601,8 +2730,13 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie)
 		goto out;
 
 	ufshcd_force_reset_auto_bkops(hba);
-	scsi_scan_host(hba->host);
-	pm_runtime_put_sync(hba->dev);
+	hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+
+	/* If we are in error handling context no need to scan the host */
+	if (!ufshcd_eh_in_progress(hba)) {
+		scsi_scan_host(hba->host);
+		pm_runtime_put_sync(hba->dev);
+	}
 out:
 	return;
 }
@@ -2615,8 +2749,8 @@ static struct scsi_host_template ufshcd_driver_template = {
 	.slave_alloc		= ufshcd_slave_alloc,
 	.slave_destroy		= ufshcd_slave_destroy,
 	.eh_abort_handler	= ufshcd_abort,
-	.eh_device_reset_handler = ufshcd_device_reset,
-	.eh_host_reset_handler	= ufshcd_host_reset,
+	.eh_device_reset_handler = ufshcd_eh_device_reset_handler,
+	.eh_host_reset_handler   = ufshcd_eh_host_reset_handler,
 	.this_id		= -1,
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun		= UFSHCD_CMD_PER_LUN,
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index fe7c947..1e0585c 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -179,6 +179,7 @@ struct ufs_dev_cmd {
  * @tm_condition: condition variable for task management
  * @tm_slots_in_use: bit map of task management request slots in use
  * @ufshcd_state: UFSHCD states
+ * @eh_flags: Error handling flags
  * @intr_mask: Interrupt Mask Bits
  * @ee_ctrl_mask: Exception event control mask
  * @feh_workq: Work queue for fatal controller error handling
@@ -224,6 +225,7 @@ struct ufs_hba {
 	unsigned long tm_slots_in_use;
 
 	u32 ufshcd_state;
+	u32 eh_flags;
 	u32 intr_mask;
 	u16 ee_ctrl_mask;