diff mbox series

[v1,1/1] scsi: ufs: Add command logging infrastructure

Message ID 1571808560-3965-1-git-send-email-cang@codeaurora.org (mailing list archive)
State Changes Requested
Headers show
Series [v1,1/1] scsi: ufs: Add command logging infrastructure | expand

Commit Message

Can Guo Oct. 23, 2019, 5:29 a.m. UTC
Add the necessary infrastructure to keep timestamp history
of commands, events and other useful info for debugging
complex issues. This helps in diagnosing events leading
upto failure.

Signed-off-by: Can Guo <cang@codeaurora.org>
---
 drivers/scsi/ufs/Kconfig  |  12 +++
 drivers/scsi/ufs/ufshcd.c | 214 +++++++++++++++++++++++++++++++++++++++-------
 drivers/scsi/ufs/ufshcd.h |  24 +++++-
 3 files changed, 218 insertions(+), 32 deletions(-)

Comments

Winkler, Tomas Oct. 23, 2019, 7:25 a.m. UTC | #1
> Add the necessary infrastructure to keep timestamp history of commands,
> events and other useful info for debugging complex issues. This helps in
> diagnosing events leading upto failure.

Why not use tracepoints, for that?
Thanks
Tomas

> Signed-off-by: Can Guo <cang@codeaurora.org>
> ---
>  drivers/scsi/ufs/Kconfig  |  12 +++
>  drivers/scsi/ufs/ufshcd.c | 214
> +++++++++++++++++++++++++++++++++++++++-------
>  drivers/scsi/ufs/ufshcd.h |  24 +++++-
>  3 files changed, 218 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig index
> 0b845ab..afc70cb 100644
> --- a/drivers/scsi/ufs/Kconfig
> +++ b/drivers/scsi/ufs/Kconfig
> @@ -50,6 +50,18 @@ config SCSI_UFSHCD
>  	  However, do not compile this as a module if your root file system
>  	  (the one containing the directory /) is located on a UFS device.
> 
> +config SCSI_UFSHCD_CMD_LOGGING
> +	bool "Universal Flash Storage host controller driver layer command
> logging support"
> +	depends on SCSI_UFSHCD
> +	help
> +	  This selects the UFS host controller driver layer command logging.
> +	  UFS host controller driver layer command logging records all the
> +	  command information sent from UFS host controller for debugging
> +	  purpose.
> +
> +	  Select this if you want above mentioned debug information captured.
> +	  If unsure, say N.
> +
>  config SCSI_UFSHCD_PCI
>  	tristate "PCI bus based UFS Controller support"
>  	depends on SCSI_UFSHCD && PCI
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index
> c28c144..f3faa85 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -91,6 +91,9 @@
>  /* default delay of autosuspend: 2000 ms */  #define
> RPM_AUTOSUSPEND_DELAY_MS 2000
> 
> +/* Maximum command logging entries */
> +#define UFSHCD_MAX_CMD_LOGGING	20
> +
>  #define ufshcd_toggle_vreg(_dev, _vreg, _on)				\
>  	({                                                              \
>  		int _ret;                                               \
> @@ -328,14 +331,135 @@ static void ufshcd_add_tm_upiu_trace(struct
> ufs_hba *hba, unsigned int tag,
>  			&descp->input_param1);
>  }
> 
> -static void ufshcd_add_command_trace(struct ufs_hba *hba,
> -		unsigned int tag, const char *str)
> +static inline void ufshcd_add_command_trace(struct ufs_hba *hba,
> +			struct ufshcd_cmd_log_entry *entry)
> +{
> +	if (trace_ufshcd_command_enabled()) {
> +		u32 intr = ufshcd_readl(hba, REG_INTERRUPT_STATUS);
> +
> +		trace_ufshcd_command(dev_name(hba->dev), entry->str,
> entry->tag,
> +				     entry->doorbell, entry->transfer_len, intr,
> +				     entry->lba, entry->cmd_id);
> +	}
> +}
> +
> +#ifdef CONFIG_SCSI_UFSHCD_CMD_LOGGING
> +static void ufshcd_cmd_log_init(struct ufs_hba *hba) {
> +	/* Allocate log entries */
> +	if (!hba->cmd_log.entries) {
> +		hba->cmd_log.entries =
> kcalloc(UFSHCD_MAX_CMD_LOGGING,
> +			sizeof(struct ufshcd_cmd_log_entry), GFP_KERNEL);
> +		if (!hba->cmd_log.entries)
> +			return;
> +		dev_dbg(hba->dev, "%s: cmd_log.entries initialized\n",
> +				__func__);
> +	}
> +}
> +
> +static void __ufshcd_cmd_log(struct ufs_hba *hba, char *str, char *cmd_type,
> +			     unsigned int tag, u8 cmd_id, u8 idn, u8 lun,
> +			     sector_t lba, int transfer_len) {
> +	struct ufshcd_cmd_log_entry *entry;
> +
> +	if (!hba->cmd_log.entries)
> +		return;
> +
> +	entry = &hba->cmd_log.entries[hba->cmd_log.pos];
> +	entry->lun = lun;
> +	entry->str = str;
> +	entry->cmd_type = cmd_type;
> +	entry->cmd_id = cmd_id;
> +	entry->lba = lba;
> +	entry->transfer_len = transfer_len;
> +	entry->idn = idn;
> +	entry->doorbell = ufshcd_readl(hba,
> REG_UTP_TRANSFER_REQ_DOOR_BELL);
> +	entry->tag = tag;
> +	entry->tstamp = ktime_get();
> +	entry->outstanding_reqs = hba->outstanding_reqs;
> +	entry->seq_num = hba->cmd_log.seq_num;
> +	hba->cmd_log.seq_num++;
> +	hba->cmd_log.pos = (hba->cmd_log.pos + 1) %
> UFSHCD_MAX_CMD_LOGGING;
> +
> +	ufshcd_add_command_trace(hba, entry);
> +}
> +
> +static void ufshcd_cmd_log(struct ufs_hba *hba, char *str, char *cmd_type,
> +	unsigned int tag, u8 cmd_id, u8 idn)
> +{
> +	__ufshcd_cmd_log(hba, str, cmd_type, tag, cmd_id, idn, 0, 0, 0); }
> +
> +static void ufshcd_dme_cmd_log(struct ufs_hba *hba, char *str, u8
> +cmd_id) {
> +	ufshcd_cmd_log(hba, str, "dme", 0, cmd_id, 0); }
> +
> +static void ufshcd_print_cmd_log(struct ufs_hba *hba) {
> +	int i;
> +	int pos;
> +	struct ufshcd_cmd_log_entry *p;
> +
> +	if (!hba->cmd_log.entries)
> +		return;
> +
> +	pos = hba->cmd_log.pos;
> +	for (i = 0; i < UFSHCD_MAX_CMD_LOGGING; i++) {
> +		p = &hba->cmd_log.entries[pos];
> +		pos = (pos + 1) % UFSHCD_MAX_CMD_LOGGING;
> +
> +		if (ktime_to_us(p->tstamp)) {
> +			pr_err("%s: %s: seq_no=%u lun=0x%x cmd_id=0x%02x
> lba=0x%llx txfer_len=%d tag=%u, doorbell=0x%x outstanding=0x%x idn=%d
> time=%lld us\n",
> +				p->cmd_type, p->str, p->seq_num,
> +				p->lun, p->cmd_id, (unsigned long long)p->lba,
> +				p->transfer_len, p->tag, p->doorbell,
> +				p->outstanding_reqs, p->idn,
> +				ktime_to_us(p->tstamp));
> +				usleep_range(1000, 1100);
> +		}
> +	}
> +}
> +#else
> +static void ufshcd_cmd_log_init(struct ufs_hba *hba) { }
> +
> +static void __ufshcd_cmd_log(struct ufs_hba *hba, char *str, char *cmd_type,
> +			     unsigned int tag, u8 cmd_id, u8 idn, u8 lun,
> +			     sector_t lba, int transfer_len) {
> +	struct ufshcd_cmd_log_entry entry;
> +
> +	entry.str = str;
> +	entry.lba = lba;
> +	entry.cmd_id = cmd_id;
> +	entry.transfer_len = transfer_len;
> +	entry.doorbell = ufshcd_readl(hba,
> REG_UTP_TRANSFER_REQ_DOOR_BELL);
> +	entry.tag = tag;
> +
> +	ufshcd_add_command_trace(hba, &entry); }
> +
> +static void ufshcd_dme_cmd_log(struct ufs_hba *hba, char *str, u8
> +cmd_id) { }
> +
> +static void ufshcd_print_cmd_log(struct ufs_hba *hba) { } #endif
> +
> +static inline void ufshcd_cond_add_cmd_trace(struct ufs_hba *hba,
> +					unsigned int tag, const char *str)
>  {
> -	sector_t lba = -1;
> -	u8 opcode = 0;
> -	u32 intr, doorbell;
>  	struct ufshcd_lrb *lrbp = &hba->lrb[tag];
> -	int transfer_len = -1;
> +	char *cmd_type = NULL;
> +	u8 opcode = 0;
> +	u8 cmd_id = 0, idn = 0;
> +	sector_t lba = 0;
> +	int transfer_len = 0;
> 
>  	if (!trace_ufshcd_command_enabled()) {
>  		/* trace UPIU W/O tracing command */
> @@ -361,10 +485,23 @@ static void ufshcd_add_command_trace(struct
> ufs_hba *hba,
>  		}
>  	}
> 
> -	intr = ufshcd_readl(hba, REG_INTERRUPT_STATUS);
> -	doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
> -	trace_ufshcd_command(dev_name(hba->dev), str, tag,
> -				doorbell, transfer_len, intr, lba, opcode);
> +	if (lrbp->cmd && ((lrbp->command_type == UTP_CMD_TYPE_SCSI) ||
> +			  (lrbp->command_type ==
> UTP_CMD_TYPE_UFS_STORAGE))) {
> +		cmd_type = "scsi";
> +		cmd_id = (u8)(*lrbp->cmd->cmnd);
> +	} else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE) {
> +		if (hba->dev_cmd.type == DEV_CMD_TYPE_NOP) {
> +			cmd_type = "nop";
> +			cmd_id = 0;
> +		} else if (hba->dev_cmd.type == DEV_CMD_TYPE_QUERY) {
> +			cmd_type = "query";
> +			cmd_id = hba-
> >dev_cmd.query.request.upiu_req.opcode;
> +			idn = hba->dev_cmd.query.request.upiu_req.idn;
> +		}
> +	}
> +
> +	__ufshcd_cmd_log(hba, (char *) str, cmd_type, tag, cmd_id, idn,
> +			 lrbp->lun, lba, transfer_len);
>  }
> 
>  static void ufshcd_print_clk_freqs(struct ufs_hba *hba) @@ -1886,7 +2023,8
> @@ void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
>  	ufshcd_writel(hba, 1 << task_tag,
> REG_UTP_TRANSFER_REQ_DOOR_BELL);
>  	/* Make sure that doorbell is committed immediately */
>  	wmb();
> -	ufshcd_add_command_trace(hba, task_tag, "send");
> +	ufshcd_cond_add_cmd_trace(hba, task_tag,
> +			hba->lrb[task_tag].cmd ? "scsi_send" :
> "dev_cmd_send");
>  }
> 
>  /**
> @@ -2001,6 +2139,7 @@ static inline u8 ufshcd_get_upmcrs(struct ufs_hba
> *hba)
> 
>  	hba->active_uic_cmd = uic_cmd;
> 
> +	ufshcd_dme_cmd_log(hba, "dme_send", hba->active_uic_cmd-
> >command);
>  	/* Write Args */
>  	ufshcd_writel(hba, uic_cmd->argument1,
> REG_UIC_COMMAND_ARG_1);
>  	ufshcd_writel(hba, uic_cmd->argument2,
> REG_UIC_COMMAND_ARG_2); @@ -2031,6 +2170,8 @@ static inline u8
> ufshcd_get_upmcrs(struct ufs_hba *hba)
>  	else
>  		ret = -ETIMEDOUT;
> 
> +	ufshcd_dme_cmd_log(hba, "dme_cmpl_1", hba->active_uic_cmd-
> >command);
> +
>  	spin_lock_irqsave(hba->host->host_lock, flags);
>  	hba->active_uic_cmd = NULL;
>  	spin_unlock_irqrestore(hba->host->host_lock, flags); @@ -3797,11
> +3938,14 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct
> uic_command *cmd)
>  			cmd->command, status);
>  		ret = (status != PWR_OK) ? status : -1;
>  	}
> +	ufshcd_dme_cmd_log(hba, "dme_cmpl_2", hba->active_uic_cmd-
> >command);
> +
>  out:
>  	if (ret) {
>  		ufshcd_print_host_state(hba);
>  		ufshcd_print_pwr_info(hba);
>  		ufshcd_print_host_regs(hba);
> +		ufshcd_print_cmd_log(hba);
>  	}
> 
>  	spin_lock_irqsave(hba->host->host_lock, flags); @@ -4712,6 +4856,7
> @@ static void ufshcd_slave_destroy(struct scsi_device *sdev)
>  	int result = 0;
>  	int scsi_status;
>  	int ocs;
> +	bool print_prdt;
> 
>  	/* overall command status of utrd */
>  	ocs = ufshcd_get_tr_ocs(lrbp);
> @@ -4787,8 +4932,11 @@ static void ufshcd_slave_destroy(struct scsi_device
> *sdev)
>  		break;
>  	} /* end of switch */
> 
> -	if (host_byte(result) != DID_OK)
> -		ufshcd_print_trs(hba, 1 << lrbp->task_tag, true);
> +	if (host_byte(result) != DID_OK) {
> +		print_prdt = (ocs == OCS_INVALID_PRDT_ATTR ||
> +				ocs == OCS_MISMATCH_DATA_BUF_SIZE);
> +		ufshcd_print_trs(hba, 1 << lrbp->task_tag, print_prdt);
> +	}
>  	return result;
>  }
> 
> @@ -4828,7 +4976,7 @@ static void __ufshcd_transfer_req_compl(struct
> ufs_hba *hba,
>  		lrbp = &hba->lrb[index];
>  		cmd = lrbp->cmd;
>  		if (cmd) {
> -			ufshcd_add_command_trace(hba, index, "complete");
> +			ufshcd_cond_add_cmd_trace(hba, index, "scsi_cmpl");
>  			result = ufshcd_transfer_rsp_status(hba, lrbp);
>  			scsi_dma_unmap(cmd);
>  			cmd->result = result;
> @@ -4841,8 +4989,8 @@ static void __ufshcd_transfer_req_compl(struct
> ufs_hba *hba,
>  		} else if (lrbp->command_type ==
> UTP_CMD_TYPE_DEV_MANAGE ||
>  			lrbp->command_type ==
> UTP_CMD_TYPE_UFS_STORAGE) {
>  			if (hba->dev_cmd.complete) {
> -				ufshcd_add_command_trace(hba, index,
> -						"dev_complete");
> +				ufshcd_cond_add_cmd_trace(hba, index,
> +						"dev_cmd_cmpl");
>  				complete(hba->dev_cmd.complete);
>  			}
>  		}
> @@ -5307,6 +5455,23 @@ static void ufshcd_err_handler(struct work_struct
> *work)
>  		if (!ret)
>  			goto skip_err_handling;
>  	}
> +
> +	/*
> +	 * Dump controller state before resetting. Transfer requests state
> +	 * will be dump as part of the request completion.
> +	 */
> +	if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) {
> +		dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err
> 0x%x\n",
> +			__func__, hba->saved_err, hba->saved_uic_err);
> +		spin_unlock_irqrestore(hba->host->host_lock, flags);
> +		ufshcd_print_host_regs(hba);
> +		ufshcd_print_host_state(hba);
> +		ufshcd_print_pwr_info(hba);
> +		ufshcd_print_tmrs(hba, hba->outstanding_tasks);
> +		ufshcd_print_cmd_log(hba);
> +		spin_lock_irqsave(hba->host->host_lock, flags);
> +	}
> +
>  	if ((hba->saved_err & INT_FATAL_ERRORS) ||
>  	    (hba->saved_err & UFSHCD_UIC_HIBERN8_MASK) ||
>  	    ((hba->saved_err & UIC_ERROR) &&
> @@ -5523,21 +5688,6 @@ static void ufshcd_check_errors(struct ufs_hba
> *hba)
> 
>  			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED;
> 
> -			/* dump controller state before resetting */
> -			if (hba->saved_err & (INT_FATAL_ERRORS |
> UIC_ERROR)) {
> -				bool pr_prdt = !!(hba->saved_err &
> -						SYSTEM_BUS_FATAL_ERROR);
> -
> -				dev_err(hba->dev, "%s: saved_err 0x%x
> saved_uic_err 0x%x\n",
> -					__func__, hba->saved_err,
> -					hba->saved_uic_err);
> -
> -				ufshcd_print_host_regs(hba);
> -				ufshcd_print_pwr_info(hba);
> -				ufshcd_print_tmrs(hba, hba-
> >outstanding_tasks);
> -				ufshcd_print_trs(hba, hba->outstanding_reqs,
> -							pr_prdt);
> -			}
>  			schedule_work(&hba->eh_work);
>  		}
>  	}
> @@ -8425,6 +8575,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem
> *mmio_base, unsigned int irq)
>  	 */
>  	ufshcd_set_ufs_dev_active(hba);
> 
> +	ufshcd_cmd_log_init(hba);
> +
>  	async_schedule(ufshcd_async_scan, hba);
>  	ufs_sysfs_add_nodes(hba->dev);
> 
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index
> e0fe247..97cc5b0 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -414,7 +414,7 @@ struct ufs_init_prefetch {
>  	u32 icc_level;
>  };
> 
> -#define UFS_ERR_REG_HIST_LENGTH 8
> +#define UIC_ERR_REG_HIST_LENGTH 20
>  /**
>   * struct ufs_err_reg_hist - keeps history of errors
>   * @pos: index to indicate cyclic buffer position @@ -471,6 +471,27 @@ struct
> ufs_stats {
>  	struct ufs_err_reg_hist task_abort;
>  };
> 
> +struct ufshcd_cmd_log_entry {
> +	char *str;	/* context like "send", "complete" */
> +	char *cmd_type;	/* "scsi", "query", "nop", "dme" */
> +	u8 lun;
> +	u8 cmd_id;
> +	sector_t lba;
> +	int transfer_len;
> +	u8 idn;		/* used only for query idn */
> +	u32 doorbell;
> +	u32 outstanding_reqs;
> +	u32 seq_num;
> +	unsigned int tag;
> +	ktime_t tstamp;
> +};
> +
> +struct ufshcd_cmd_log {
> +	struct ufshcd_cmd_log_entry *entries;
> +	int pos;
> +	u32 seq_num;
> +};
> +
>  /**
>   * struct ufs_hba - per adapter private structure
>   * @mmio_base: UFSHCI base register address @@ -692,6 +713,7 @@ struct
> ufs_hba {
>  	struct ufs_pwr_mode_info max_pwr_info;
> 
>  	struct ufs_clk_gating clk_gating;
> +	struct ufshcd_cmd_log cmd_log;
>  	/* Control to enable/disable host capabilities */
>  	u32 caps;
>  	/* Allow dynamic clk gating */
> --
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project
Avri Altman Oct. 23, 2019, 10:33 a.m. UTC | #2
> 
> > Add the necessary infrastructure to keep timestamp history of
> > commands, events and other useful info for debugging complex issues.
> > This helps in diagnosing events leading upto failure.
> 
> Why not use tracepoints, for that?
Ack on Tomas's comment.
Are there any pieces of information that you need not provided by the upiu tracer?

Thanks,
Avri
Can Guo Oct. 29, 2019, 2:37 a.m. UTC | #3
On 2019-10-23 18:33, Avri Altman wrote:
>> 
>> > Add the necessary infrastructure to keep timestamp history of
>> > commands, events and other useful info for debugging complex issues.
>> > This helps in diagnosing events leading upto failure.
>> 
>> Why not use tracepoints, for that?
> Ack on Tomas's comment.
> Are there any pieces of information that you need not provided by the
> upiu tracer?
> 
> Thanks,
> Avri

In extreme cases, when the UFS runs into bad state, system may crash. 
There may not be a chance to collect trace. If trace is not collected 
and failure is hard to be reproduced, some command logs prints would be 
very helpful to help understand what was going on before we run into 
failure.

Thanks,
Can Guo
Bjorn Andersson Oct. 29, 2019, 5:46 a.m. UTC | #4
On Mon 28 Oct 19:37 PDT 2019, cang@codeaurora.org wrote:

> On 2019-10-23 18:33, Avri Altman wrote:
> > > 
> > > > Add the necessary infrastructure to keep timestamp history of
> > > > commands, events and other useful info for debugging complex issues.
> > > > This helps in diagnosing events leading upto failure.
> > > 
> > > Why not use tracepoints, for that?
> > Ack on Tomas's comment.
> > Are there any pieces of information that you need not provided by the
> > upiu tracer?
> > 
> > Thanks,
> > Avri
> 
> In extreme cases, when the UFS runs into bad state, system may crash. There
> may not be a chance to collect trace. If trace is not collected and failure
> is hard to be reproduced, some command logs prints would be very helpful to
> help understand what was going on before we run into failure.
> 

This is a common problem shared among many/all subsystems, so it's
better to rely on a generic solution for this; such as using tracepoints
dumped into pstore/ramoops.

Regards,
Bjorn
Asutosh Das (asd) Jan. 16, 2020, 11:03 p.m. UTC | #5
On 10/28/2019 10:46 PM, Bjorn Andersson wrote:
> On Mon 28 Oct 19:37 PDT 2019, cang@codeaurora.org wrote:
> 
>> On 2019-10-23 18:33, Avri Altman wrote:
>>>>
>>>>> Add the necessary infrastructure to keep timestamp history of
>>>>> commands, events and other useful info for debugging complex issues.
>>>>> This helps in diagnosing events leading upto failure.
>>>>
>>>> Why not use tracepoints, for that?
>>> Ack on Tomas's comment.
>>> Are there any pieces of information that you need not provided by the
>>> upiu tracer?
>>>
>>> Thanks,
>>> Avri
>>
>> In extreme cases, when the UFS runs into bad state, system may crash. There
>> may not be a chance to collect trace. If trace is not collected and failure
>> is hard to be reproduced, some command logs prints would be very helpful to
>> help understand what was going on before we run into failure.
>>
> 
> This is a common problem shared among many/all subsystems, so it's
> better to rely on a generic solution for this; such as using tracepoints
> dumped into pstore/ramoops.
> 
> Regards,
> Bjorn
> 

Reviving this discussion.

Another issue with using ftrace is that several subsystems use it.
Consider a situation in which we store a history of 64 commands, 
responses and some other required info in ftrace.

Say there's a command that's stuck for seconds until the software times 
out. In this case, the other ftrace events are still enabled and keep 
writing to ftrace buffer thus overwriting some/most of the ufs's command 
history; thus the history is more or less lost. And there's a need to 
reproduce the issue with other tracing disabled.

So what we need is a client specific logging mechanism, which is 
lightweight as ftrace and can be parsed from ramdumps.

I'm open to ideas but ftrace in its current form may not be suitable for 
this.
Bart Van Assche Jan. 17, 2020, 3:43 a.m. UTC | #6
On 2020-01-16 15:03, Asutosh Das (asd) wrote:
> On 10/28/2019 10:46 PM, Bjorn Andersson wrote:
>> On Mon 28 Oct 19:37 PDT 2019, cang@codeaurora.org wrote:
>>
>>> On 2019-10-23 18:33, Avri Altman wrote:
>>>>>
>>>>>> Add the necessary infrastructure to keep timestamp history of
>>>>>> commands, events and other useful info for debugging complex issues.
>>>>>> This helps in diagnosing events leading upto failure.
>>>>>
>>>>> Why not use tracepoints, for that?
>>>> Ack on Tomas's comment.
>>>> Are there any pieces of information that you need not provided by the
>>>> upiu tracer?
>>>>
>>>> Thanks,
>>>> Avri
>>>
>>> In extreme cases, when the UFS runs into bad state, system may crash.
>>> There
>>> may not be a chance to collect trace. If trace is not collected and
>>> failure
>>> is hard to be reproduced, some command logs prints would be very
>>> helpful to
>>> help understand what was going on before we run into failure.
>>>
>>
>> This is a common problem shared among many/all subsystems, so it's
>> better to rely on a generic solution for this; such as using tracepoints
>> dumped into pstore/ramoops.
>>
>> Regards,
>> Bjorn
>>
> 
> Reviving this discussion.
> 
> Another issue with using ftrace is that several subsystems use it.
> Consider a situation in which we store a history of 64 commands,
> responses and some other required info in ftrace.
> 
> Say there's a command that's stuck for seconds until the software times
> out. In this case, the other ftrace events are still enabled and keep
> writing to ftrace buffer thus overwriting some/most of the ufs's command
> history; thus the history is more or less lost. And there's a need to
> reproduce the issue with other tracing disabled.
> 
> So what we need is a client specific logging mechanism, which is
> lightweight as ftrace and can be parsed from ramdumps.
> 
> I'm open to ideas but ftrace in its current form may not be suitable for
> this.

Hi Asutosh,

Are you aware that it is already possible today to figure out which SCSI
commands are pending? Are you familiar with the files under
/sys/kernel/debug/block/? An example:

$ (cd /sys/kernel/debug/block && grep -aH . */*/busy)
nvme0n1/hctx7/busy:0000000006f07009 {.op=READ, .cmd_flags=META|PRIO,
.rq_flags=DONTPREP|IO_STAT, .state=in_flight, .tag=275, .internal_tag=-1}

Bart.
Asutosh Das (asd) Jan. 20, 2020, 6:46 p.m. UTC | #7
On 1/16/2020 7:43 PM, Bart Van Assche wrote:
> On 2020-01-16 15:03, Asutosh Das (asd) wrote:
>> On 10/28/2019 10:46 PM, Bjorn Andersson wrote:
>>> On Mon 28 Oct 19:37 PDT 2019, cang@codeaurora.org wrote:
>>>
>>>> On 2019-10-23 18:33, Avri Altman wrote:
>>>>>>
>>>>>>> Add the necessary infrastructure to keep timestamp history of
>>>>>>> commands, events and other useful info for debugging complex issues.
>>>>>>> This helps in diagnosing events leading upto failure.
>>>>>>
>>>>>> Why not use tracepoints, for that?
>>>>> Ack on Tomas's comment.
>>>>> Are there any pieces of information that you need not provided by the
>>>>> upiu tracer?
>>>>>
>>>>> Thanks,
>>>>> Avri
>>>>
>>>> In extreme cases, when the UFS runs into bad state, system may crash.
>>>> There
>>>> may not be a chance to collect trace. If trace is not collected and
>>>> failure
>>>> is hard to be reproduced, some command logs prints would be very
>>>> helpful to
>>>> help understand what was going on before we run into failure.
>>>>
>>>
>>> This is a common problem shared among many/all subsystems, so it's
>>> better to rely on a generic solution for this; such as using tracepoints
>>> dumped into pstore/ramoops.
>>>
>>> Regards,
>>> Bjorn
>>>
>>
>> Reviving this discussion.
>>
>> Another issue with using ftrace is that several subsystems use it.
>> Consider a situation in which we store a history of 64 commands,
>> responses and some other required info in ftrace.
>>
>> Say there's a command that's stuck for seconds until the software times
>> out. In this case, the other ftrace events are still enabled and keep
>> writing to ftrace buffer thus overwriting some/most of the ufs's command
>> history; thus the history is more or less lost. And there's a need to
>> reproduce the issue with other tracing disabled.
>>
>> So what we need is a client specific logging mechanism, which is
>> lightweight as ftrace and can be parsed from ramdumps.
>>
>> I'm open to ideas but ftrace in its current form may not be suitable for
>> this.
> 
> Hi Asutosh,
> 
> Are you aware that it is already possible today to figure out which SCSI
> commands are pending? Are you familiar with the files under
> /sys/kernel/debug/block/? An example:
> 
> $ (cd /sys/kernel/debug/block && grep -aH . */*/busy)
> nvme0n1/hctx7/busy:0000000006f07009 {.op=READ, .cmd_flags=META|PRIO,
> .rq_flags=DONTPREP|IO_STAT, .state=in_flight, .tag=275, .internal_tag=-1}
> 
> Bart.
> 
> 

Hi Bart,

No I was not aware of this. Thanks for letting me know.

However, it doesn't look like this stores the _history_ of commands that 
were already executed.

To put it in perspective, below is the output of the current command 
logging -:

[     461.142124354s] dme: dme_send: seq_no=  738272 lun=0 cmd_id=0x018 
lba=        0 txfer_len=       0 tag= 0 doorbell=0x0000 
outstanding=0x0000 idn= 0
[     461.142384823s] dme: dme_cmpl_2: seq_no=  738273 lun=0 
cmd_id=0x018 lba=        0 txfer_len=       0 tag= 0 doorbell=0x0000 
outstanding=0x0000 idn= 0
[     461.142453052s] scsi: scsi_send: seq_no=  738274 lun=0 
cmd_id=0x028 lba=  1360888 txfer_len=   77824 tag= 0 doorbell=0x0001 
outstanding=0x0001 idn= 0
[     461.142473000s] scsi: scsi_send: seq_no=  738275 lun=0 
cmd_id=0x028 lba=  1361056 txfer_len=   12288 tag= 1 doorbell=0x0003 
outstanding=0x0003 idn= 0
[     461.142482270s] scsi: scsi_send: seq_no=  738276 lun=0 
cmd_id=0x028 lba=  1361088 txfer_len=    4096 tag= 2 doorbell=0x0007 
outstanding=0x0007 idn= 0
[     461.142491020s] scsi: scsi_send: seq_no=  738277 lun=0 
cmd_id=0x028 lba=  1361120 txfer_len=    4096 tag= 3 doorbell=0x000F 
outstanding=0x000F idn= 0
[     461.142538208s] scsi: scsi_send: seq_no=  738278 lun=0 
cmd_id=0x028 lba=  1361144 txfer_len=    4096 tag= 4 doorbell=0x001F 
outstanding=0x001F idn= 0
[     461.142780083s] scsi: scsi_send: seq_no=  738279 lun=0 
cmd_id=0x028 lba=  1361184 txfer_len=   20480 tag= 5 doorbell=0x003F 
outstanding=0x003F idn= 0
[     461.142922791s] scsi: scsi_cmpl: seq_no=  738280 lun=0 
cmd_id=0x028 lba=  1360888 txfer_len=   77824 tag= 0 doorbell=0x003E 
outstanding=0x003F idn= 0
[     461.142982323s] scsi: scsi_cmpl: seq_no=  738281 lun=0 
cmd_id=0x028 lba=  1361056 txfer_len=   12288 tag= 1 doorbell=0x003C 
outstanding=0x003E idn= 0
[     461.142996958s] scsi: scsi_cmpl: seq_no=  738282 lun=0 
cmd_id=0x028 lba=  1361088 txfer_len=    4096 tag= 2 doorbell=0x0038 
outstanding=0x003C idn= 0
[     461.143059406s] scsi: scsi_cmpl: seq_no=  738283 lun=0 
cmd_id=0x028 lba=  1361120 txfer_len=    4096 tag= 3 doorbell=0x0020 
outstanding=0x0038 idn= 0
[     461.143064666s] scsi: scsi_cmpl: seq_no=  738284 lun=0 
cmd_id=0x028 lba=  1361144 txfer_len=    4096 tag= 4 doorbell=0x0020 
outstanding=0x0038 idn= 0

[...]

This output was extracted from ramdumps after a crash.
There're custom logs that come up as well, for e.g. scaling start and 
end etc.

In a nutshell, the high-level requirements of the logging would be -:
1. Light-weight
2. Extract from ram-dumps
3. Eliminate the need to reproduce an issue as much as possible

Ftrace ticks all the boxes except 3. There's a high chance of this data 
being overwritten by other loggers. And when this happens, the issue 
needs to be reproduced with ftrace logging enabled only for ufs.

*Motivation for this logging*:

There are several (read - all) UFS device vendors who need the sequence 
of commands leading up to the specific failure. They've asked for it 
every time. They still do.

There were 2 ways of doing this - either in device or in host.

1. In-Device-:
	The device stores a ring buffer of commands that were received.
	Upon error, host sends a command to device to read this ring 	buffer 
and dumps it out or can be extracted from ramdumps. It'd not be a 
universal solution anyway and would be very vendor-specific. Difficult 
to maintain etc. May have to be added to JEDEC to standardize it.

2. In-host-:
	Ftrace was tried out and the previously highlighted issues popped up. 
Hence the need for a UFS (client)-specific implementation.

If there's a way to have a client-specific ftrace logging, it'd be the 
ideal thing to do.

How about modifying ftrace to make it client-specific?

Any suggestions to modify the current design such that it can use ftrace 
or any other better ideas or methods, please let me know.

-asd
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig
index 0b845ab..afc70cb 100644
--- a/drivers/scsi/ufs/Kconfig
+++ b/drivers/scsi/ufs/Kconfig
@@ -50,6 +50,18 @@  config SCSI_UFSHCD
 	  However, do not compile this as a module if your root file system
 	  (the one containing the directory /) is located on a UFS device.
 
+config SCSI_UFSHCD_CMD_LOGGING
+	bool "Universal Flash Storage host controller driver layer command logging support"
+	depends on SCSI_UFSHCD
+	help
+	  This selects the UFS host controller driver layer command logging.
+	  UFS host controller driver layer command logging records all the
+	  command information sent from UFS host controller for debugging
+	  purpose.
+
+	  Select this if you want above mentioned debug information captured.
+	  If unsure, say N.
+
 config SCSI_UFSHCD_PCI
 	tristate "PCI bus based UFS Controller support"
 	depends on SCSI_UFSHCD && PCI
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index c28c144..f3faa85 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -91,6 +91,9 @@ 
 /* default delay of autosuspend: 2000 ms */
 #define RPM_AUTOSUSPEND_DELAY_MS 2000
 
+/* Maximum command logging entries */
+#define UFSHCD_MAX_CMD_LOGGING	20
+
 #define ufshcd_toggle_vreg(_dev, _vreg, _on)				\
 	({                                                              \
 		int _ret;                                               \
@@ -328,14 +331,135 @@  static void ufshcd_add_tm_upiu_trace(struct ufs_hba *hba, unsigned int tag,
 			&descp->input_param1);
 }
 
-static void ufshcd_add_command_trace(struct ufs_hba *hba,
-		unsigned int tag, const char *str)
+static inline void ufshcd_add_command_trace(struct ufs_hba *hba,
+			struct ufshcd_cmd_log_entry *entry)
+{
+	if (trace_ufshcd_command_enabled()) {
+		u32 intr = ufshcd_readl(hba, REG_INTERRUPT_STATUS);
+
+		trace_ufshcd_command(dev_name(hba->dev), entry->str, entry->tag,
+				     entry->doorbell, entry->transfer_len, intr,
+				     entry->lba, entry->cmd_id);
+	}
+}
+
+#ifdef CONFIG_SCSI_UFSHCD_CMD_LOGGING
+static void ufshcd_cmd_log_init(struct ufs_hba *hba)
+{
+	/* Allocate log entries */
+	if (!hba->cmd_log.entries) {
+		hba->cmd_log.entries = kcalloc(UFSHCD_MAX_CMD_LOGGING,
+			sizeof(struct ufshcd_cmd_log_entry), GFP_KERNEL);
+		if (!hba->cmd_log.entries)
+			return;
+		dev_dbg(hba->dev, "%s: cmd_log.entries initialized\n",
+				__func__);
+	}
+}
+
+static void __ufshcd_cmd_log(struct ufs_hba *hba, char *str, char *cmd_type,
+			     unsigned int tag, u8 cmd_id, u8 idn, u8 lun,
+			     sector_t lba, int transfer_len)
+{
+	struct ufshcd_cmd_log_entry *entry;
+
+	if (!hba->cmd_log.entries)
+		return;
+
+	entry = &hba->cmd_log.entries[hba->cmd_log.pos];
+	entry->lun = lun;
+	entry->str = str;
+	entry->cmd_type = cmd_type;
+	entry->cmd_id = cmd_id;
+	entry->lba = lba;
+	entry->transfer_len = transfer_len;
+	entry->idn = idn;
+	entry->doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
+	entry->tag = tag;
+	entry->tstamp = ktime_get();
+	entry->outstanding_reqs = hba->outstanding_reqs;
+	entry->seq_num = hba->cmd_log.seq_num;
+	hba->cmd_log.seq_num++;
+	hba->cmd_log.pos = (hba->cmd_log.pos + 1) % UFSHCD_MAX_CMD_LOGGING;
+
+	ufshcd_add_command_trace(hba, entry);
+}
+
+static void ufshcd_cmd_log(struct ufs_hba *hba, char *str, char *cmd_type,
+	unsigned int tag, u8 cmd_id, u8 idn)
+{
+	__ufshcd_cmd_log(hba, str, cmd_type, tag, cmd_id, idn, 0, 0, 0);
+}
+
+static void ufshcd_dme_cmd_log(struct ufs_hba *hba, char *str, u8 cmd_id)
+{
+	ufshcd_cmd_log(hba, str, "dme", 0, cmd_id, 0);
+}
+
+static void ufshcd_print_cmd_log(struct ufs_hba *hba)
+{
+	int i;
+	int pos;
+	struct ufshcd_cmd_log_entry *p;
+
+	if (!hba->cmd_log.entries)
+		return;
+
+	pos = hba->cmd_log.pos;
+	for (i = 0; i < UFSHCD_MAX_CMD_LOGGING; i++) {
+		p = &hba->cmd_log.entries[pos];
+		pos = (pos + 1) % UFSHCD_MAX_CMD_LOGGING;
+
+		if (ktime_to_us(p->tstamp)) {
+			pr_err("%s: %s: seq_no=%u lun=0x%x cmd_id=0x%02x lba=0x%llx txfer_len=%d tag=%u, doorbell=0x%x outstanding=0x%x idn=%d time=%lld us\n",
+				p->cmd_type, p->str, p->seq_num,
+				p->lun, p->cmd_id, (unsigned long long)p->lba,
+				p->transfer_len, p->tag, p->doorbell,
+				p->outstanding_reqs, p->idn,
+				ktime_to_us(p->tstamp));
+				usleep_range(1000, 1100);
+		}
+	}
+}
+#else
+static void ufshcd_cmd_log_init(struct ufs_hba *hba)
+{
+}
+
+static void __ufshcd_cmd_log(struct ufs_hba *hba, char *str, char *cmd_type,
+			     unsigned int tag, u8 cmd_id, u8 idn, u8 lun,
+			     sector_t lba, int transfer_len)
+{
+	struct ufshcd_cmd_log_entry entry;
+
+	entry.str = str;
+	entry.lba = lba;
+	entry.cmd_id = cmd_id;
+	entry.transfer_len = transfer_len;
+	entry.doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
+	entry.tag = tag;
+
+	ufshcd_add_command_trace(hba, &entry);
+}
+
+static void ufshcd_dme_cmd_log(struct ufs_hba *hba, char *str, u8 cmd_id)
+{
+}
+
+static void ufshcd_print_cmd_log(struct ufs_hba *hba)
+{
+}
+#endif
+
+static inline void ufshcd_cond_add_cmd_trace(struct ufs_hba *hba,
+					unsigned int tag, const char *str)
 {
-	sector_t lba = -1;
-	u8 opcode = 0;
-	u32 intr, doorbell;
 	struct ufshcd_lrb *lrbp = &hba->lrb[tag];
-	int transfer_len = -1;
+	char *cmd_type = NULL;
+	u8 opcode = 0;
+	u8 cmd_id = 0, idn = 0;
+	sector_t lba = 0;
+	int transfer_len = 0;
 
 	if (!trace_ufshcd_command_enabled()) {
 		/* trace UPIU W/O tracing command */
@@ -361,10 +485,23 @@  static void ufshcd_add_command_trace(struct ufs_hba *hba,
 		}
 	}
 
-	intr = ufshcd_readl(hba, REG_INTERRUPT_STATUS);
-	doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-	trace_ufshcd_command(dev_name(hba->dev), str, tag,
-				doorbell, transfer_len, intr, lba, opcode);
+	if (lrbp->cmd && ((lrbp->command_type == UTP_CMD_TYPE_SCSI) ||
+			  (lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE))) {
+		cmd_type = "scsi";
+		cmd_id = (u8)(*lrbp->cmd->cmnd);
+	} else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE) {
+		if (hba->dev_cmd.type == DEV_CMD_TYPE_NOP) {
+			cmd_type = "nop";
+			cmd_id = 0;
+		} else if (hba->dev_cmd.type == DEV_CMD_TYPE_QUERY) {
+			cmd_type = "query";
+			cmd_id = hba->dev_cmd.query.request.upiu_req.opcode;
+			idn = hba->dev_cmd.query.request.upiu_req.idn;
+		}
+	}
+
+	__ufshcd_cmd_log(hba, (char *) str, cmd_type, tag, cmd_id, idn,
+			 lrbp->lun, lba, transfer_len);
 }
 
 static void ufshcd_print_clk_freqs(struct ufs_hba *hba)
@@ -1886,7 +2023,8 @@  void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
 	ufshcd_writel(hba, 1 << task_tag, REG_UTP_TRANSFER_REQ_DOOR_BELL);
 	/* Make sure that doorbell is committed immediately */
 	wmb();
-	ufshcd_add_command_trace(hba, task_tag, "send");
+	ufshcd_cond_add_cmd_trace(hba, task_tag,
+			hba->lrb[task_tag].cmd ? "scsi_send" : "dev_cmd_send");
 }
 
 /**
@@ -2001,6 +2139,7 @@  static inline u8 ufshcd_get_upmcrs(struct ufs_hba *hba)
 
 	hba->active_uic_cmd = uic_cmd;
 
+	ufshcd_dme_cmd_log(hba, "dme_send", hba->active_uic_cmd->command);
 	/* Write Args */
 	ufshcd_writel(hba, uic_cmd->argument1, REG_UIC_COMMAND_ARG_1);
 	ufshcd_writel(hba, uic_cmd->argument2, REG_UIC_COMMAND_ARG_2);
@@ -2031,6 +2170,8 @@  static inline u8 ufshcd_get_upmcrs(struct ufs_hba *hba)
 	else
 		ret = -ETIMEDOUT;
 
+	ufshcd_dme_cmd_log(hba, "dme_cmpl_1", hba->active_uic_cmd->command);
+
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	hba->active_uic_cmd = NULL;
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
@@ -3797,11 +3938,14 @@  static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd)
 			cmd->command, status);
 		ret = (status != PWR_OK) ? status : -1;
 	}
+	ufshcd_dme_cmd_log(hba, "dme_cmpl_2", hba->active_uic_cmd->command);
+
 out:
 	if (ret) {
 		ufshcd_print_host_state(hba);
 		ufshcd_print_pwr_info(hba);
 		ufshcd_print_host_regs(hba);
+		ufshcd_print_cmd_log(hba);
 	}
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
@@ -4712,6 +4856,7 @@  static void ufshcd_slave_destroy(struct scsi_device *sdev)
 	int result = 0;
 	int scsi_status;
 	int ocs;
+	bool print_prdt;
 
 	/* overall command status of utrd */
 	ocs = ufshcd_get_tr_ocs(lrbp);
@@ -4787,8 +4932,11 @@  static void ufshcd_slave_destroy(struct scsi_device *sdev)
 		break;
 	} /* end of switch */
 
-	if (host_byte(result) != DID_OK)
-		ufshcd_print_trs(hba, 1 << lrbp->task_tag, true);
+	if (host_byte(result) != DID_OK) {
+		print_prdt = (ocs == OCS_INVALID_PRDT_ATTR ||
+				ocs == OCS_MISMATCH_DATA_BUF_SIZE);
+		ufshcd_print_trs(hba, 1 << lrbp->task_tag, print_prdt);
+	}
 	return result;
 }
 
@@ -4828,7 +4976,7 @@  static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
 		lrbp = &hba->lrb[index];
 		cmd = lrbp->cmd;
 		if (cmd) {
-			ufshcd_add_command_trace(hba, index, "complete");
+			ufshcd_cond_add_cmd_trace(hba, index, "scsi_cmpl");
 			result = ufshcd_transfer_rsp_status(hba, lrbp);
 			scsi_dma_unmap(cmd);
 			cmd->result = result;
@@ -4841,8 +4989,8 @@  static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
 		} else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE ||
 			lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE) {
 			if (hba->dev_cmd.complete) {
-				ufshcd_add_command_trace(hba, index,
-						"dev_complete");
+				ufshcd_cond_add_cmd_trace(hba, index,
+						"dev_cmd_cmpl");
 				complete(hba->dev_cmd.complete);
 			}
 		}
@@ -5307,6 +5455,23 @@  static void ufshcd_err_handler(struct work_struct *work)
 		if (!ret)
 			goto skip_err_handling;
 	}
+
+	/*
+	 * Dump controller state before resetting. Transfer requests state
+	 * will be dump as part of the request completion.
+	 */
+	if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) {
+		dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n",
+			__func__, hba->saved_err, hba->saved_uic_err);
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+		ufshcd_print_host_regs(hba);
+		ufshcd_print_host_state(hba);
+		ufshcd_print_pwr_info(hba);
+		ufshcd_print_tmrs(hba, hba->outstanding_tasks);
+		ufshcd_print_cmd_log(hba);
+		spin_lock_irqsave(hba->host->host_lock, flags);
+	}
+
 	if ((hba->saved_err & INT_FATAL_ERRORS) ||
 	    (hba->saved_err & UFSHCD_UIC_HIBERN8_MASK) ||
 	    ((hba->saved_err & UIC_ERROR) &&
@@ -5523,21 +5688,6 @@  static void ufshcd_check_errors(struct ufs_hba *hba)
 
 			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED;
 
-			/* dump controller state before resetting */
-			if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) {
-				bool pr_prdt = !!(hba->saved_err &
-						SYSTEM_BUS_FATAL_ERROR);
-
-				dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n",
-					__func__, hba->saved_err,
-					hba->saved_uic_err);
-
-				ufshcd_print_host_regs(hba);
-				ufshcd_print_pwr_info(hba);
-				ufshcd_print_tmrs(hba, hba->outstanding_tasks);
-				ufshcd_print_trs(hba, hba->outstanding_reqs,
-							pr_prdt);
-			}
 			schedule_work(&hba->eh_work);
 		}
 	}
@@ -8425,6 +8575,8 @@  int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	 */
 	ufshcd_set_ufs_dev_active(hba);
 
+	ufshcd_cmd_log_init(hba);
+
 	async_schedule(ufshcd_async_scan, hba);
 	ufs_sysfs_add_nodes(hba->dev);
 
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index e0fe247..97cc5b0 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -414,7 +414,7 @@  struct ufs_init_prefetch {
 	u32 icc_level;
 };
 
-#define UFS_ERR_REG_HIST_LENGTH 8
+#define UIC_ERR_REG_HIST_LENGTH 20
 /**
  * struct ufs_err_reg_hist - keeps history of errors
  * @pos: index to indicate cyclic buffer position
@@ -471,6 +471,27 @@  struct ufs_stats {
 	struct ufs_err_reg_hist task_abort;
 };
 
+struct ufshcd_cmd_log_entry {
+	char *str;	/* context like "send", "complete" */
+	char *cmd_type;	/* "scsi", "query", "nop", "dme" */
+	u8 lun;
+	u8 cmd_id;
+	sector_t lba;
+	int transfer_len;
+	u8 idn;		/* used only for query idn */
+	u32 doorbell;
+	u32 outstanding_reqs;
+	u32 seq_num;
+	unsigned int tag;
+	ktime_t tstamp;
+};
+
+struct ufshcd_cmd_log {
+	struct ufshcd_cmd_log_entry *entries;
+	int pos;
+	u32 seq_num;
+};
+
 /**
  * struct ufs_hba - per adapter private structure
  * @mmio_base: UFSHCI base register address
@@ -692,6 +713,7 @@  struct ufs_hba {
 	struct ufs_pwr_mode_info max_pwr_info;
 
 	struct ufs_clk_gating clk_gating;
+	struct ufshcd_cmd_log cmd_log;
 	/* Control to enable/disable host capabilities */
 	u32 caps;
 	/* Allow dynamic clk gating */