diff mbox series

[10/25] lpfc: Move SCSI and NVME Stats to hardware queue structures

Message ID 20181226233334.27518-11-jsmart2021@gmail.com (mailing list archive)
State Superseded
Headers show
Series lpfc updates for 12.2.0.0 | expand

Commit Message

James Smart Dec. 26, 2018, 11:33 p.m. UTC
Many io statics were being sampled and saved using adapter-based
data structures. This was creating a lot of contention and cache
thrashing in the I/O path.

Move the statistics to the hardware queue data structures.
Given the per queue data structures, use of atomic types is
lessened.

Add new syfs and debugfs stat routines to collate the per
hardware queue values and report at an adapter level.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
---
 drivers/scsi/lpfc/lpfc.h         |   9 +--
 drivers/scsi/lpfc/lpfc_attr.c    |  68 ++++++++++++++---
 drivers/scsi/lpfc/lpfc_debugfs.c | 158 +++++++++++++++++++++++++++++++++++++--
 drivers/scsi/lpfc/lpfc_debugfs.h |   3 +
 drivers/scsi/lpfc/lpfc_init.c    |  40 ++++++----
 drivers/scsi/lpfc/lpfc_nvme.c    |  57 +++++---------
 drivers/scsi/lpfc/lpfc_nvme.h    |  11 +--
 drivers/scsi/lpfc/lpfc_scsi.c    |  47 ++++++++----
 drivers/scsi/lpfc/lpfc_scsi.h    |   3 +
 drivers/scsi/lpfc/lpfc_sli4.h    |  11 +++
 10 files changed, 304 insertions(+), 103 deletions(-)

Comments

Hannes Reinecke Dec. 28, 2018, 9:18 a.m. UTC | #1
On 12/27/18 12:33 AM, James Smart wrote:
> Many io statics were being sampled and saved using adapter-based
> data structures. This was creating a lot of contention and cache
> thrashing in the I/O path.
> 
> Move the statistics to the hardware queue data structures.
> Given the per queue data structures, use of atomic types is
> lessened.
> 
> Add new syfs and debugfs stat routines to collate the per
> hardware queue values and report at an adapter level.
> 
> Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
> Signed-off-by: James Smart <jsmart2021@gmail.com>
> ---
>   drivers/scsi/lpfc/lpfc.h         |   9 +--
>   drivers/scsi/lpfc/lpfc_attr.c    |  68 ++++++++++++++---
>   drivers/scsi/lpfc/lpfc_debugfs.c | 158 +++++++++++++++++++++++++++++++++++++--
>   drivers/scsi/lpfc/lpfc_debugfs.h |   3 +
>   drivers/scsi/lpfc/lpfc_init.c    |  40 ++++++----
>   drivers/scsi/lpfc/lpfc_nvme.c    |  57 +++++---------
>   drivers/scsi/lpfc/lpfc_nvme.h    |  11 +--
>   drivers/scsi/lpfc/lpfc_scsi.c    |  47 ++++++++----
>   drivers/scsi/lpfc/lpfc_scsi.h    |   3 +
>   drivers/scsi/lpfc/lpfc_sli4.h    |  11 +++
>   10 files changed, 304 insertions(+), 103 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.com>

Cheers,

Hannes
diff mbox series

Patch

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 310437b6b51a..9262c52e32d6 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -479,6 +479,7 @@  struct lpfc_vport {
 	struct dentry *debug_disc_trc;
 	struct dentry *debug_nodelist;
 	struct dentry *debug_nvmestat;
+	struct dentry *debug_scsistat;
 	struct dentry *debug_nvmektime;
 	struct dentry *debug_cpucheck;
 	struct dentry *vport_debugfs_root;
@@ -946,14 +947,6 @@  struct lpfc_hba {
 	struct timer_list eratt_poll;
 	uint32_t eratt_poll_interval;
 
-	/*
-	 * stat  counters
-	 */
-	atomic_t fc4ScsiInputRequests;
-	atomic_t fc4ScsiOutputRequests;
-	atomic_t fc4ScsiControlRequests;
-	atomic_t fc4ScsiIoCmpls;
-
 	uint64_t bg_guard_err_cnt;
 	uint64_t bg_apptag_err_cnt;
 	uint64_t bg_reftag_err_cnt;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 1671d9371d3b..e10d930fcb6a 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -64,9 +64,6 @@ 
 #define LPFC_MIN_MRQ_POST	512
 #define LPFC_MAX_MRQ_POST	2048
 
-#define LPFC_MAX_NVME_INFO_TMP_LEN	100
-#define LPFC_NVME_INFO_MORE_STR		"\nCould be more info...\n"
-
 /*
  * Write key size should be multiple of 4. If write key is changed
  * make sure that library write key is also changed.
@@ -155,7 +152,7 @@  lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 	struct lpfc_nvme_rport *rport;
 	struct lpfc_nodelist *ndlp;
 	struct nvme_fc_remote_port *nrport;
-	struct lpfc_nvme_ctrl_stat *cstat;
+	struct lpfc_fc4_ctrl_stat *cstat;
 	uint64_t data1, data2, data3;
 	uint64_t totin, totout, tot;
 	char *statep;
@@ -457,12 +454,12 @@  lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 	totin = 0;
 	totout = 0;
 	for (i = 0; i < phba->cfg_hdw_queue; i++) {
-		cstat = &lport->cstat[i];
-		tot = atomic_read(&cstat->fc4NvmeIoCmpls);
+		cstat = &phba->sli4_hba.hdwq[i].nvme_cstat;
+		tot = cstat->io_cmpls;
 		totin += tot;
-		data1 = atomic_read(&cstat->fc4NvmeInputRequests);
-		data2 = atomic_read(&cstat->fc4NvmeOutputRequests);
-		data3 = atomic_read(&cstat->fc4NvmeControlRequests);
+		data1 = cstat->input_requests;
+		data2 = cstat->output_requests;
+		data3 = cstat->control_requests;
 		totout += (data1 + data2 + data3);
 	}
 	scnprintf(tmp, sizeof(tmp),
@@ -509,6 +506,57 @@  lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 }
 
 static ssize_t
+lpfc_scsi_stat_show(struct device *dev, struct device_attribute *attr,
+		    char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct lpfc_vport *vport = shost_priv(shost);
+	struct lpfc_hba *phba = vport->phba;
+	int len;
+	struct lpfc_fc4_ctrl_stat *cstat;
+	u64 data1, data2, data3;
+	u64 tot, totin, totout;
+	int i;
+	char tmp[LPFC_MAX_SCSI_INFO_TMP_LEN] = {0};
+
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) ||
+	    (phba->sli_rev != LPFC_SLI_REV4))
+		return 0;
+
+	scnprintf(buf, PAGE_SIZE, "SCSI HDWQ Statistics\n");
+
+	totin = 0;
+	totout = 0;
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		cstat = &phba->sli4_hba.hdwq[i].scsi_cstat;
+		tot = cstat->io_cmpls;
+		totin += tot;
+		data1 = cstat->input_requests;
+		data2 = cstat->output_requests;
+		data3 = cstat->control_requests;
+		totout += (data1 + data2 + data3);
+
+		scnprintf(tmp, sizeof(tmp), "HDWQ (%d): Rd %016llx Wr %016llx "
+			  "IO %016llx ", i, data1, data2, data3);
+		if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
+			goto buffer_done;
+
+		scnprintf(tmp, sizeof(tmp), "Cmpl %016llx OutIO %016llx\n",
+			  tot, ((data1 + data2 + data3) - tot));
+		if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
+			goto buffer_done;
+	}
+	scnprintf(tmp, sizeof(tmp), "Total FCP Cmpl %016llx Issue %016llx "
+		  "OutIO %016llx\n", totin, totout, totout - totin);
+	strlcat(buf, tmp, PAGE_SIZE);
+
+buffer_done:
+	len = strnlen(buf, PAGE_SIZE);
+
+	return len;
+}
+
+static ssize_t
 lpfc_bg_info_show(struct device *dev, struct device_attribute *attr,
 		  char *buf)
 {
@@ -2573,6 +2621,7 @@  lpfc_##attr##_store(struct device *dev, struct device_attribute *attr, \
 
 
 static DEVICE_ATTR(nvme_info, 0444, lpfc_nvme_info_show, NULL);
+static DEVICE_ATTR(scsi_stat, 0444, lpfc_scsi_stat_show, NULL);
 static DEVICE_ATTR(bg_info, S_IRUGO, lpfc_bg_info_show, NULL);
 static DEVICE_ATTR(bg_guard_err, S_IRUGO, lpfc_bg_guard_err_show, NULL);
 static DEVICE_ATTR(bg_apptag_err, S_IRUGO, lpfc_bg_apptag_err_show, NULL);
@@ -5642,6 +5691,7 @@  LPFC_ATTR_RW(enable_dpp, 1, 0, 1, "Enable Direct Packet Push");
 
 struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_nvme_info,
+	&dev_attr_scsi_stat,
 	&dev_attr_bg_info,
 	&dev_attr_bg_guard_err,
 	&dev_attr_bg_apptag_err,
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index d85fe3e4ec12..3487c12ac97a 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -838,7 +838,7 @@  lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
 	struct nvme_fc_local_port *localport;
-	struct lpfc_nvme_ctrl_stat *cstat;
+	struct lpfc_fc4_ctrl_stat *cstat;
 	struct lpfc_nvme_lport *lport;
 	uint64_t data1, data2, data3;
 	uint64_t tot, totin, totout;
@@ -977,7 +977,7 @@  lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 			return len;
 
 		len += snprintf(buf + len, size - len,
-				"\nNVME Lport Statistics\n");
+				"\nNVME HDWQ Statistics\n");
 
 		len += snprintf(buf + len, size - len,
 				"LS: Xmt %016x Cmpl %016x\n",
@@ -991,12 +991,12 @@  lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 		totin = 0;
 		totout = 0;
 		for (i = 0; i < phba->cfg_hdw_queue; i++) {
-			cstat = &lport->cstat[i];
-			tot = atomic_read(&cstat->fc4NvmeIoCmpls);
+			cstat = &phba->sli4_hba.hdwq[i].nvme_cstat;
+			tot = cstat->io_cmpls;
 			totin += tot;
-			data1 = atomic_read(&cstat->fc4NvmeInputRequests);
-			data2 = atomic_read(&cstat->fc4NvmeOutputRequests);
-			data3 = atomic_read(&cstat->fc4NvmeControlRequests);
+			data1 = cstat->input_requests;
+			data2 = cstat->output_requests;
+			data3 = cstat->control_requests;
 			totout += (data1 + data2 + data3);
 
 			/* Limit to 32, debugfs display buffer limitation */
@@ -1004,7 +1004,7 @@  lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 				continue;
 
 			len += snprintf(buf + len, PAGE_SIZE - len,
-					"FCP (%d): Rd %016llx Wr %016llx "
+					"HDWQ (%d): Rd %016llx Wr %016llx "
 					"IO %016llx ",
 					i, data1, data2, data3);
 			len += snprintf(buf + len, PAGE_SIZE - len,
@@ -1044,6 +1044,66 @@  lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 	return len;
 }
 
+/**
+ * lpfc_debugfs_scsistat_data - Dump target node list to a buffer
+ * @vport: The vport to gather target node info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the SCSI statistics associated with @vport
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_scsistat_data(struct lpfc_vport *vport, char *buf, int size)
+{
+	int len;
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_fc4_ctrl_stat *cstat;
+	u64 data1, data2, data3;
+	u64 tot, totin, totout;
+	int i;
+	char tmp[LPFC_MAX_SCSI_INFO_TMP_LEN] = {0};
+
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) ||
+	    (phba->sli_rev != LPFC_SLI_REV4))
+		return 0;
+
+	scnprintf(buf, size, "SCSI HDWQ Statistics\n");
+
+	totin = 0;
+	totout = 0;
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		cstat = &phba->sli4_hba.hdwq[i].scsi_cstat;
+		tot = cstat->io_cmpls;
+		totin += tot;
+		data1 = cstat->input_requests;
+		data2 = cstat->output_requests;
+		data3 = cstat->control_requests;
+		totout += (data1 + data2 + data3);
+
+		scnprintf(tmp, sizeof(tmp), "HDWQ (%d): Rd %016llx Wr %016llx "
+			  "IO %016llx ", i, data1, data2, data3);
+		if (strlcat(buf, tmp, size) >= size)
+			goto buffer_done;
+
+		scnprintf(tmp, sizeof(tmp), "Cmpl %016llx OutIO %016llx\n",
+			  tot, ((data1 + data2 + data3) - tot));
+		if (strlcat(buf, tmp, size) >= size)
+			goto buffer_done;
+	}
+	scnprintf(tmp, sizeof(tmp), "Total FCP Cmpl %016llx Issue %016llx "
+		  "OutIO %016llx\n", totin, totout, totout - totin);
+	strlcat(buf, tmp, size);
+
+buffer_done:
+	len = strnlen(buf, size);
+
+	return len;
+}
 
 /**
  * lpfc_debugfs_nvmektime_data - Dump target node list to a buffer
@@ -2210,6 +2270,64 @@  lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf,
 }
 
 static int
+lpfc_debugfs_scsistat_open(struct inode *inode, struct file *file)
+{
+	struct lpfc_vport *vport = inode->i_private;
+	struct lpfc_debug *debug;
+	int rc = -ENOMEM;
+
+	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
+	if (!debug)
+		goto out;
+
+	 /* Round to page boundary */
+	debug->buffer = kzalloc(LPFC_SCSISTAT_SIZE, GFP_KERNEL);
+	if (!debug->buffer) {
+		kfree(debug);
+		goto out;
+	}
+
+	debug->len = lpfc_debugfs_scsistat_data(vport, debug->buffer,
+		LPFC_SCSISTAT_SIZE);
+
+	debug->i_private = inode->i_private;
+	file->private_data = debug;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static ssize_t
+lpfc_debugfs_scsistat_write(struct file *file, const char __user *buf,
+			    size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_vport *vport = (struct lpfc_vport *)debug->i_private;
+	struct lpfc_hba *phba = vport->phba;
+	char mybuf[6] = {0};
+	int i;
+
+	/* Protect copy from user */
+	if (!access_ok(VERIFY_READ, buf, nbytes))
+		return -EFAULT;
+
+	if (copy_from_user(mybuf, buf, (nbytes >= sizeof(mybuf)) ?
+				       (sizeof(mybuf) - 1) : nbytes))
+		return -EFAULT;
+
+	if ((strncmp(&mybuf[0], "reset", strlen("reset")) == 0) ||
+	    (strncmp(&mybuf[0], "zero", strlen("zero")) == 0)) {
+		for (i = 0; i < phba->cfg_hdw_queue; i++) {
+			memset(&phba->sli4_hba.hdwq[i].scsi_cstat, 0,
+			       sizeof(phba->sli4_hba.hdwq[i].scsi_cstat));
+		}
+	}
+
+	return nbytes;
+}
+
+static int
 lpfc_debugfs_nvmektime_open(struct inode *inode, struct file *file)
 {
 	struct lpfc_vport *vport = inode->i_private;
@@ -4970,6 +5088,16 @@  static const struct file_operations lpfc_debugfs_op_nvmestat = {
 	.release =      lpfc_debugfs_release,
 };
 
+#undef lpfc_debugfs_op_scsistat
+static const struct file_operations lpfc_debugfs_op_scsistat = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_debugfs_scsistat_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_debugfs_read,
+	.write =	lpfc_debugfs_scsistat_write,
+	.release =      lpfc_debugfs_release,
+};
+
 #undef lpfc_debugfs_op_nvmektime
 static const struct file_operations lpfc_debugfs_op_nvmektime = {
 	.owner =        THIS_MODULE,
@@ -5724,6 +5852,17 @@  lpfc_debugfs_initialize(struct lpfc_vport *vport)
 		goto debug_failed;
 	}
 
+	snprintf(name, sizeof(name), "scsistat");
+	vport->debug_scsistat =
+		debugfs_create_file(name, 0644,
+				    vport->vport_debugfs_root,
+				    vport, &lpfc_debugfs_op_scsistat);
+	if (!vport->debug_scsistat) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
+				 "0811 Cannot create debugfs scsistat\n");
+		goto debug_failed;
+	}
+
 	snprintf(name, sizeof(name), "nvmektime");
 	vport->debug_nvmektime =
 		debugfs_create_file(name, 0644,
@@ -5918,6 +6057,9 @@  lpfc_debugfs_terminate(struct lpfc_vport *vport)
 	debugfs_remove(vport->debug_nvmestat); /* nvmestat */
 	vport->debug_nvmestat = NULL;
 
+	debugfs_remove(vport->debug_scsistat); /* scsistat */
+	vport->debug_scsistat = NULL;
+
 	debugfs_remove(vport->debug_nvmektime); /* nvmektime */
 	vport->debug_nvmektime = NULL;
 
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index c904fa74dfbc..2360ec8fb848 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -50,6 +50,9 @@ 
 #define LPFC_CPUCHECK_SIZE 8192
 #define LPFC_NVMEIO_TRC_SIZE 8192
 
+/* scsistat output buffer size */
+#define LPFC_SCSISTAT_SIZE 8192
+
 #define LPFC_DEBUG_OUT_LINE_SZ	80
 
 /*
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 78c9b6b18ae6..3c430eb0c700 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1278,7 +1278,7 @@  lpfc_hb_timeout_handler(struct lpfc_hba *phba)
 	struct lpfc_register reg_data;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_ctrl_stat *cstat;
+	struct lpfc_fc4_ctrl_stat *cstat;
 	void __iomem *eqdreg = phba->sli4_hba.u.if_type2.EQDregaddr;
 
 	vports = lpfc_create_vport_work_array(phba);
@@ -1320,16 +1320,13 @@  lpfc_hb_timeout_handler(struct lpfc_hba *phba)
 				tot = 0;
 				for (i = 0;
 					i < phba->cfg_hdw_queue; i++) {
-					cstat = &lport->cstat[i];
-					data1 = atomic_read(
-						&cstat->fc4NvmeInputRequests);
-					data2 = atomic_read(
-						&cstat->fc4NvmeOutputRequests);
-					data3 = atomic_read(
-						&cstat->fc4NvmeControlRequests);
+					cstat =
+					     &phba->sli4_hba.hdwq[i].nvme_cstat;
+					data1 = cstat->input_requests;
+					data2 = cstat->output_requests;
+					data3 = cstat->control_requests;
 					tot += (data1 + data2 + data3);
-					tot -= atomic_read(
-						&cstat->fc4NvmeIoCmpls);
+					tot -= cstat->io_cmpls;
 				}
 			}
 		}
@@ -7217,10 +7214,6 @@  lpfc_create_shost(struct lpfc_hba *phba)
 	phba->fc_arbtov = FF_DEF_ARBTOV;
 
 	atomic_set(&phba->sdev_cnt, 0);
-	atomic_set(&phba->fc4ScsiInputRequests, 0);
-	atomic_set(&phba->fc4ScsiOutputRequests, 0);
-	atomic_set(&phba->fc4ScsiControlRequests, 0);
-	atomic_set(&phba->fc4ScsiIoCmpls, 0);
 	vport = lpfc_create_port(phba, phba->brd_no, &phba->pcidev->dev);
 	if (!vport)
 		return -ENODEV;
@@ -8772,6 +8765,25 @@  lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			phba->sli4_hba.nvmet_mrq_data[idx] = qdesc;
 		}
 	}
+
+#if defined(BUILD_NVME)
+	/* Clear NVME stats */
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+			memset(&phba->sli4_hba.hdwq[idx].nvme_cstat, 0,
+			       sizeof(phba->sli4_hba.hdwq[idx].nvme_cstat));
+		}
+	}
+#endif
+
+	/* Clear SCSI stats */
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
+		for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+			memset(&phba->sli4_hba.hdwq[idx].scsi_cstat, 0,
+			       sizeof(phba->sli4_hba.hdwq[idx].scsi_cstat));
+		}
+	}
+
 	return 0;
 
 out_error:
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index fe0190b48abd..c13638a3c0e7 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -964,7 +964,6 @@  lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 	struct lpfc_nodelist *ndlp;
 	struct lpfc_nvme_fcpreq_priv *freqpriv;
 	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_ctrl_stat *cstat;
 	uint32_t code, status, idx, cpu;
 	uint16_t cid, sqhd, data;
 	uint32_t *ptr;
@@ -991,17 +990,15 @@  lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 	nCmd = lpfc_ncmd->nvmeCmd;
 	status = bf_get(lpfc_wcqe_c_status, wcqe);
 
+	idx = lpfc_ncmd->cur_iocbq.hba_wqidx;
+	phba->sli4_hba.hdwq[idx].nvme_cstat.io_cmpls++;
+
 	if (vport->localport) {
 		lport = (struct lpfc_nvme_lport *)vport->localport->private;
-		if (lport) {
-			idx = lpfc_ncmd->cur_iocbq.hba_wqidx;
-			cstat = &lport->cstat[idx];
-			atomic_inc(&cstat->fc4NvmeIoCmpls);
-			if (status) {
-				if (bf_get(lpfc_wcqe_c_xb, wcqe))
-					atomic_inc(&lport->cmpl_fcp_xb);
-				atomic_inc(&lport->cmpl_fcp_err);
-			}
+		if (lport && status) {
+			if (bf_get(lpfc_wcqe_c_xb, wcqe))
+				atomic_inc(&lport->cmpl_fcp_xb);
+			atomic_inc(&lport->cmpl_fcp_err);
 		}
 	}
 
@@ -1186,7 +1183,7 @@  static int
 lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 		      struct lpfc_nvme_buf *lpfc_ncmd,
 		      struct lpfc_nodelist *pnode,
-		      struct lpfc_nvme_ctrl_stat *cstat)
+		      struct lpfc_fc4_ctrl_stat *cstat)
 {
 	struct lpfc_hba *phba = vport->phba;
 	struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
@@ -1224,7 +1221,7 @@  lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 			} else {
 				wqe->fcp_iwrite.initial_xfer_len = 0;
 			}
-			atomic_inc(&cstat->fc4NvmeOutputRequests);
+			cstat->output_requests++;
 		} else {
 			/* From the iread template, initialize words 7 - 11 */
 			memcpy(&wqe->words[7],
@@ -1237,13 +1234,13 @@  lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 			/* Word 5 */
 			wqe->fcp_iread.rsrvd5 = 0;
 
-			atomic_inc(&cstat->fc4NvmeInputRequests);
+			cstat->input_requests++;
 		}
 	} else {
 		/* From the icmnd template, initialize words 4 - 11 */
 		memcpy(&wqe->words[4], &lpfc_icmnd_cmd_template.words[4],
 		       sizeof(uint32_t) * 8);
-		atomic_inc(&cstat->fc4NvmeControlRequests);
+		cstat->control_requests++;
 	}
 	/*
 	 * Finish initializing those WQE fields that are independent
@@ -1427,7 +1424,7 @@  lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	int expedite = 0;
 	int idx, cpu;
 	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_ctrl_stat *cstat;
+	struct lpfc_fc4_ctrl_stat *cstat;
 	struct lpfc_vport *vport;
 	struct lpfc_hba *phba;
 	struct lpfc_nodelist *ndlp;
@@ -1590,7 +1587,7 @@  lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	 */
 	idx = lpfc_queue_info->index;
 	lpfc_ncmd->cur_iocbq.hba_wqidx = idx;
-	cstat = &lport->cstat[idx];
+	cstat = &phba->sli4_hba.hdwq[idx].nvme_cstat;
 
 	lpfc_nvme_prep_io_cmd(vport, lpfc_ncmd, ndlp, cstat);
 	ret = lpfc_nvme_prep_io_dma(vport, lpfc_ncmd);
@@ -1643,11 +1640,11 @@  lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
  out_free_nvme_buf:
 	if (lpfc_ncmd->nvmeCmd->sg_cnt) {
 		if (lpfc_ncmd->nvmeCmd->io_dir == NVMEFC_FCP_WRITE)
-			atomic_dec(&cstat->fc4NvmeOutputRequests);
+			cstat->output_requests--;
 		else
-			atomic_dec(&cstat->fc4NvmeInputRequests);
+			cstat->input_requests--;
 	} else
-		atomic_dec(&cstat->fc4NvmeControlRequests);
+		cstat->control_requests--;
 	lpfc_release_nvme_buf(phba, lpfc_ncmd);
  out_fail:
 	return ret;
@@ -2079,8 +2076,6 @@  lpfc_nvme_create_localport(struct lpfc_vport *vport)
 	struct nvme_fc_port_info nfcp_info;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_ctrl_stat *cstat;
-	int i;
 
 	/* Initialize this localport instance.  The vport wwn usage ensures
 	 * that NPIV is accounted for.
@@ -2097,11 +2092,6 @@  lpfc_nvme_create_localport(struct lpfc_vport *vport)
 	lpfc_nvme_template.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1;
 	lpfc_nvme_template.max_hw_queues = phba->cfg_hdw_queue;
 
-	cstat = kmalloc((sizeof(struct lpfc_nvme_ctrl_stat) *
-			phba->cfg_hdw_queue), GFP_KERNEL);
-	if (!cstat)
-		return -ENOMEM;
-
 	/* localport is allocated from the stack, but the registration
 	 * call allocates heap memory as well as the private area.
 	 */
@@ -2124,7 +2114,6 @@  lpfc_nvme_create_localport(struct lpfc_vport *vport)
 		lport = (struct lpfc_nvme_lport *)localport->private;
 		vport->localport = localport;
 		lport->vport = vport;
-		lport->cstat = cstat;
 		vport->nvmei_support = 1;
 
 		atomic_set(&lport->xmt_fcp_noxri, 0);
@@ -2141,16 +2130,7 @@  lpfc_nvme_create_localport(struct lpfc_vport *vport)
 		atomic_set(&lport->cmpl_ls_err, 0);
 		atomic_set(&lport->fc4NvmeLsRequests, 0);
 		atomic_set(&lport->fc4NvmeLsCmpls, 0);
-
-		for (i = 0; i < phba->cfg_hdw_queue; i++) {
-			cstat = &lport->cstat[i];
-			atomic_set(&cstat->fc4NvmeInputRequests, 0);
-			atomic_set(&cstat->fc4NvmeOutputRequests, 0);
-			atomic_set(&cstat->fc4NvmeControlRequests, 0);
-			atomic_set(&cstat->fc4NvmeIoCmpls, 0);
-		}
-	} else
-		kfree(cstat);
+	}
 
 	return ret;
 }
@@ -2212,7 +2192,6 @@  lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 #if (IS_ENABLED(CONFIG_NVME_FC))
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_ctrl_stat *cstat;
 	int ret;
 
 	if (vport->nvmei_support == 0)
@@ -2221,7 +2200,6 @@  lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 	localport = vport->localport;
 	vport->localport = NULL;
 	lport = (struct lpfc_nvme_lport *)localport->private;
-	cstat = lport->cstat;
 
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
 			 "6011 Destroying NVME localport %p\n",
@@ -2237,7 +2215,6 @@  lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 	 * indefinitely or succeeds
 	 */
 	lpfc_nvme_lport_unreg_wait(vport, lport);
-	kfree(cstat);
 
 	/* Regardless of the unregister upcall response, clear
 	 * nvmei_support.  All rports are unregistered and the
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index 4a020b9c8fbf..974fbec7dd04 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -30,6 +30,9 @@ 
 #define LPFC_NVME_FB_SHIFT		9
 #define LPFC_NVME_MAX_FB		(1 << 20)	/* 1M */
 
+#define LPFC_MAX_NVME_INFO_TMP_LEN	100
+#define LPFC_NVME_INFO_MORE_STR		"\nCould be more info...\n"
+
 #define lpfc_ndlp_get_nrport(ndlp)					\
 	((!ndlp->nrport || (ndlp->upcall_flags & NLP_WAIT_FOR_UNREG))	\
 	? NULL : ndlp->nrport)
@@ -40,19 +43,11 @@  struct lpfc_nvme_qhandle {
 	uint32_t cpu_id;	/* current cpu id at time of create */
 };
 
-struct lpfc_nvme_ctrl_stat {
-	atomic_t fc4NvmeInputRequests;
-	atomic_t fc4NvmeOutputRequests;
-	atomic_t fc4NvmeControlRequests;
-	atomic_t fc4NvmeIoCmpls;
-};
-
 /* Declare nvme-based local and remote port definitions. */
 struct lpfc_nvme_lport {
 	struct lpfc_vport *vport;
 	struct completion lport_unreg_done;
 	/* Add stats counters here */
-	struct lpfc_nvme_ctrl_stat *cstat;
 	atomic_t fc4NvmeLsRequests;
 	atomic_t fc4NvmeLsCmpls;
 	atomic_t xmt_fcp_noxri;
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index f1545ee3da7c..b80a57d78b1c 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -3721,14 +3721,18 @@  lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
 	unsigned long flags;
 	struct lpfc_fast_path_event *fast_path_evt;
 	struct Scsi_Host *shost;
+	int idx;
 	uint32_t logit = LOG_FCP;
 
-	atomic_inc(&phba->fc4ScsiIoCmpls);
-
 	/* Sanity check on return of outstanding command */
 	cmd = lpfc_cmd->pCmd;
 	if (!cmd)
 		return;
+
+	idx = lpfc_cmd->cur_iocbq.hba_wqidx;
+	if (phba->sli4_hba.hdwq)
+		phba->sli4_hba.hdwq[idx].scsi_cstat.io_cmpls++;
+
 	shost = cmd->device->host;
 
 	lpfc_cmd->result = (pIocbOut->iocb.un.ulpWord[4] & IOERR_PARAM_MASK);
@@ -3987,7 +3991,9 @@  lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 	struct fcp_cmnd *fcp_cmnd = lpfc_cmd->fcp_cmnd;
 	IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb;
 	struct lpfc_iocbq *piocbq = &(lpfc_cmd->cur_iocbq);
+	struct lpfc_sli4_hdw_queue *hdwq = NULL;
 	int datadir = scsi_cmnd->sc_data_direction;
+	int idx;
 	uint8_t *ptr;
 	bool sli4;
 	uint32_t fcpdl;
@@ -4013,6 +4019,9 @@  lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 
 	sli4 = (phba->sli_rev == LPFC_SLI_REV4);
 	piocbq->iocb.un.fcpi.fcpi_XRdy = 0;
+	idx = lpfc_cmd->hdwq;
+	if (phba->sli4_hba.hdwq)
+		hdwq = &phba->sli4_hba.hdwq[idx];
 
 	/*
 	 * There are three possibilities here - use scatter-gather segment, use
@@ -4034,19 +4043,22 @@  lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 						vport->cfg_first_burst_size;
 			}
 			fcp_cmnd->fcpCntl3 = WRITE_DATA;
-			atomic_inc(&phba->fc4ScsiOutputRequests);
+			if (hdwq)
+				hdwq->scsi_cstat.output_requests++;
 		} else {
 			iocb_cmd->ulpCommand = CMD_FCP_IREAD64_CR;
 			iocb_cmd->ulpPU = PARM_READ_CHECK;
 			fcp_cmnd->fcpCntl3 = READ_DATA;
-			atomic_inc(&phba->fc4ScsiInputRequests);
+			if (hdwq)
+				hdwq->scsi_cstat.input_requests++;
 		}
 	} else {
 		iocb_cmd->ulpCommand = CMD_FCP_ICMND64_CR;
 		iocb_cmd->un.fcpi.fcpi_parm = 0;
 		iocb_cmd->ulpPU = 0;
 		fcp_cmnd->fcpCntl3 = 0;
-		atomic_inc(&phba->fc4ScsiControlRequests);
+		if (hdwq)
+			hdwq->scsi_cstat.control_requests++;
 	}
 	if (phba->sli_rev == 3 &&
 	    !(phba->sli3_options & LPFC_SLI3_BG_ENABLED))
@@ -4398,7 +4410,7 @@  lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	struct lpfc_nodelist *ndlp;
 	struct lpfc_scsi_buf *lpfc_cmd;
 	struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
-	int err;
+	int err, idx;
 
 	rdata = lpfc_rport_data_from_scsi_device(cmnd->device);
 
@@ -4533,16 +4545,6 @@  lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 				 (uint32_t)
 				 (cmnd->request->timeout / 1000));
 
-		switch (lpfc_cmd->fcp_cmnd->fcpCntl3) {
-		case WRITE_DATA:
-			atomic_dec(&phba->fc4ScsiOutputRequests);
-			break;
-		case READ_DATA:
-			atomic_dec(&phba->fc4ScsiInputRequests);
-			break;
-		default:
-			atomic_dec(&phba->fc4ScsiControlRequests);
-		}
 		goto out_host_busy_free_buf;
 	}
 	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
@@ -4556,7 +4558,20 @@  lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	return 0;
 
  out_host_busy_free_buf:
+	idx = lpfc_cmd->hdwq;
 	lpfc_scsi_unprep_dma_buf(phba, lpfc_cmd);
+	if (phba->sli4_hba.hdwq) {
+		switch (lpfc_cmd->fcp_cmnd->fcpCntl3) {
+		case WRITE_DATA:
+			phba->sli4_hba.hdwq[idx].scsi_cstat.output_requests--;
+			break;
+		case READ_DATA:
+			phba->sli4_hba.hdwq[idx].scsi_cstat.input_requests--;
+			break;
+		default:
+			phba->sli4_hba.hdwq[idx].scsi_cstat.control_requests--;
+		}
+	}
 	lpfc_release_scsi_buf(phba, lpfc_cmd);
  out_host_busy:
 	return SCSI_MLQUEUE_HOST_BUSY;
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index ebb5d5860cdb..68e6c4ad3936 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -196,5 +196,8 @@  struct lpfc_scsi_buf {
 
 #define TXRDY_PAYLOAD_LEN	12
 
+/* For sysfs/debugfs tmp string max len */
+#define LPFC_MAX_SCSI_INFO_TMP_LEN	79
+
 int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
 				  struct lpfc_scsi_buf *lpfc_cmd);
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 8e3e99d52f75..a33fb6715329 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -533,6 +533,13 @@  struct lpfc_vector_map_info {
 };
 #define LPFC_VECTOR_MAP_EMPTY	0xffff
 
+struct lpfc_fc4_ctrl_stat {
+	u32 input_requests;
+	u32 output_requests;
+	u32 control_requests;
+	u32 io_cmpls;
+};
+
 /* SLI4 HBA data structure entries */
 struct lpfc_sli4_hdw_queue {
 	/* Pointers to the constructed SLI4 queues */
@@ -560,6 +567,10 @@  struct lpfc_sli4_hdw_queue {
 	uint32_t abts_scsi_io_bufs;
 	uint32_t abts_nvme_io_bufs;
 
+	/* FC-4 Stats counters */
+	struct lpfc_fc4_ctrl_stat nvme_cstat;
+	struct lpfc_fc4_ctrl_stat scsi_cstat;
+
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 #define LPFC_CHECK_CPU_CNT    128
 	uint32_t cpucheck_rcv_io[LPFC_CHECK_CPU_CNT];