@@ -84,8 +84,6 @@ struct lpfc_sli2_slim;
#define LPFC_HB_MBOX_INTERVAL 5 /* Heart beat interval in seconds. */
#define LPFC_HB_MBOX_TIMEOUT 30 /* Heart beat timeout in seconds. */
-#define LPFC_LOOK_AHEAD_OFF 0 /* Look ahead logic is turned off */
-
/* Error Attention event polling interval */
#define LPFC_ERATT_POLL_INTERVAL 5 /* EATT poll interval in seconds */
@@ -821,6 +819,7 @@ struct lpfc_hba {
uint32_t cfg_fcp_imax;
uint32_t cfg_fcp_cpu_map;
uint32_t cfg_hdw_queue;
+ uint32_t cfg_irq_chann;
uint32_t cfg_suppress_rsp;
uint32_t cfg_nvme_oas;
uint32_t cfg_nvme_embed_cmd;
@@ -1043,6 +1042,9 @@ struct lpfc_hba {
struct dentry *debug_nvmeio_trc;
struct lpfc_debugfs_nvmeio_trc *nvmeio_trc;
struct dentry *debug_hdwqinfo;
+#ifdef LPFC_HDWQ_LOCK_STAT
+ struct dentry *debug_lockstat;
+#endif
atomic_t nvmeio_trc_cnt;
uint32_t nvmeio_trc_size;
uint32_t nvmeio_trc_output_idx;
@@ -1162,6 +1164,7 @@ struct lpfc_hba {
#define LPFC_CHECK_NVME_IO 1
#define LPFC_CHECK_NVMET_RCV 2
#define LPFC_CHECK_NVMET_IO 4
+#define LPFC_CHECK_SCSI_IO 8
uint16_t ktime_on;
uint64_t ktime_data_samples;
uint64_t ktime_status_samples;
@@ -4958,7 +4958,7 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
phba->cfg_fcp_imax = (uint32_t)val;
phba->initial_imax = phba->cfg_fcp_imax;
- for (i = 0; i < phba->cfg_hdw_queue; i += LPFC_MAX_EQ_DELAY_EQID_CNT)
+ for (i = 0; i < phba->cfg_irq_chann; i += LPFC_MAX_EQ_DELAY_EQID_CNT)
lpfc_modify_hba_eq_delay(phba, i, LPFC_MAX_EQ_DELAY_EQID_CNT,
val);
@@ -5059,13 +5059,6 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr,
phba->cfg_fcp_cpu_map,
phba->sli4_hba.num_online_cpu);
break;
- case 2:
- len += snprintf(buf + len, PAGE_SIZE-len,
- "fcp_cpu_map: Driver centric mapping (%d): "
- "%d online CPUs\n",
- phba->cfg_fcp_cpu_map,
- phba->sli4_hba.num_online_cpu);
- break;
}
while (phba->sli4_hba.curr_disp_cpu < phba->sli4_hba.num_present_cpu) {
@@ -5076,35 +5069,35 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr,
len += snprintf(
buf + len, PAGE_SIZE - len,
"CPU %02d hdwq None "
- "physid %d coreid %d\n",
+ "physid %d coreid %d ht %d\n",
phba->sli4_hba.curr_disp_cpu,
cpup->phys_id,
- cpup->core_id);
+ cpup->core_id, cpup->hyper);
else
len += snprintf(
buf + len, PAGE_SIZE - len,
- "CPU %02d hdwq %04d "
- "physid %d coreid %d\n",
+ "CPU %02d EQ %04d hdwq %04d "
+ "physid %d coreid %d ht %d\n",
phba->sli4_hba.curr_disp_cpu,
- cpup->hdwq, cpup->phys_id,
- cpup->core_id);
+ cpup->eq, cpup->hdwq, cpup->phys_id,
+ cpup->core_id, cpup->hyper);
} else {
if (cpup->hdwq == LPFC_VECTOR_MAP_EMPTY)
len += snprintf(
buf + len, PAGE_SIZE - len,
"CPU %02d hdwq None "
- "physid %d coreid %d IRQ %d\n",
+ "physid %d coreid %d ht %d IRQ %d\n",
phba->sli4_hba.curr_disp_cpu,
cpup->phys_id,
- cpup->core_id, cpup->irq);
+ cpup->core_id, cpup->hyper, cpup->irq);
else
len += snprintf(
buf + len, PAGE_SIZE - len,
- "CPU %02d hdwq %04d "
- "physid %d coreid %d IRQ %d\n",
+ "CPU %02d EQ %04d hdwq %04d "
+ "physid %d coreid %d ht %d IRQ %d\n",
phba->sli4_hba.curr_disp_cpu,
- cpup->hdwq, cpup->phys_id,
- cpup->core_id, cpup->irq);
+ cpup->eq, cpup->hdwq, cpup->phys_id,
+ cpup->core_id, cpup->hyper, cpup->irq);
}
phba->sli4_hba.curr_disp_cpu++;
@@ -5146,14 +5139,13 @@ lpfc_fcp_cpu_map_store(struct device *dev, struct device_attribute *attr,
# lpfc_fcp_cpu_map: Defines how to map CPUs to IRQ vectors
# for the HBA.
#
-# Value range is [0 to 2]. Default value is LPFC_DRIVER_CPU_MAP (2).
+# Value range is [0 to 1]. Default value is LPFC_HBA_CPU_MAP (1).
# 0 - Do not affinitze IRQ vectors
# 1 - Affintize HBA vectors with respect to each HBA
# (start with CPU0 for each HBA)
-# 2 - Affintize HBA vectors with respect to the entire driver
-# (round robin thru all CPUs across all HBAs)
+# This also defines how Hardware Queues are mapped to specific CPUs.
*/
-static int lpfc_fcp_cpu_map = LPFC_DRIVER_CPU_MAP;
+static int lpfc_fcp_cpu_map = LPFC_HBA_CPU_MAP;
module_param(lpfc_fcp_cpu_map, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(lpfc_fcp_cpu_map,
"Defines how to map CPUs to IRQ vectors per HBA");
@@ -5187,7 +5179,7 @@ lpfc_fcp_cpu_map_init(struct lpfc_hba *phba, int val)
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"3326 lpfc_fcp_cpu_map: %d out of range, using "
"default\n", val);
- phba->cfg_fcp_cpu_map = LPFC_DRIVER_CPU_MAP;
+ phba->cfg_fcp_cpu_map = LPFC_HBA_CPU_MAP;
return 0;
}
@@ -5308,7 +5300,7 @@ LPFC_ATTR_R(xri_rebalancing, 1, 0, 1, "Enable/Disable XRI rebalancing");
* CPU. Otherwise, the default 0 (Round Robin) scheduling of FCP/NVME I/Os
* through WQs will be used.
*/
-LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_BY_HDWQ,
+LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_BY_CPU,
LPFC_FCP_SCHED_BY_HDWQ,
LPFC_FCP_SCHED_BY_CPU,
"Determine scheduling algorithm for "
@@ -5474,18 +5466,18 @@ LPFC_ATTR_RW(nvme_embed_cmd, 1, 0, 2,
"Embed NVME Command in WQE");
/*
- * lpfc_hdw_queue: Set the number of IO channels the driver
+ * lpfc_hdw_queue: Set the number of Hardware Queues the driver
* will advertise it supports to the NVME and SCSI layers. This also
- * will map to the number of EQ/CQ/WQs the driver will create.
+ * will map to the number of CQ/WQ pairs the driver will create.
*
* The NVME Layer will try to create this many, plus 1 administrative
* hardware queue. The administrative queue will always map to WQ 0
- * A hardware IO queue maps (qidx) to a specific driver WQ.
+ * A hardware IO queue maps (qidx) to a specific driver CQ/WQ.
*
* 0 = Configure the number of hdw queues to the number of active CPUs.
- * 1,64 = Manually specify how many hdw queues to use.
+ * 1,128 = Manually specify how many hdw queues to use.
*
- * Value range is [0,64]. Default value is 0.
+ * Value range is [0,128]. Default value is 0.
*/
LPFC_ATTR_R(hdw_queue,
LPFC_HBA_HDWQ_DEF,
@@ -5493,6 +5485,22 @@ LPFC_ATTR_R(hdw_queue,
"Set the number of I/O Hardware Queues");
/*
+ * lpfc_irq_chann: Set the number of IRQ vectors that are available
+ * for Hardware Queues to utilize. This also will map to the number
+ * of EQ / MSI-X vectors the driver will create. This should never be
+ * more than the number of Hardware Queues
+ *
+ * 0 = Configure number of IRQ Channels to the number of active CPUs.
+ * 1,128 = Manually specify how many IRQ Channels to use.
+ *
+ * Value range is [0,128]. Default value is 0.
+ */
+LPFC_ATTR_R(irq_chann,
+ LPFC_HBA_HDWQ_DEF,
+ LPFC_HBA_HDWQ_MIN, LPFC_HBA_HDWQ_MAX,
+ "Set the number of I/O IRQ Channels");
+
+/*
# lpfc_enable_hba_reset: Allow or prevent HBA resets to the hardware.
# 0 = HBA resets disabled
# 1 = HBA resets enabled (default)
@@ -5533,16 +5541,6 @@ LPFC_ATTR_RW(XLanePriority, 0, 0x0, 0x7f, "CS_CTL for Express Lane Feature.");
LPFC_ATTR_R(enable_bg, 0, 0, 1, "Enable BlockGuard Support");
/*
-# lpfc_fcp_look_ahead: Look ahead for completions in FCP start routine
-# 0 = disabled (default)
-# 1 = enabled
-# Value range is [0,1]. Default value is 0.
-#
-# This feature in under investigation and may be supported in the future.
-*/
-unsigned int lpfc_fcp_look_ahead = LPFC_LOOK_AHEAD_OFF;
-
-/*
# lpfc_prot_mask: i
# - Bit mask of host protection capabilities used to register with the
# SCSI mid-layer
@@ -5796,6 +5794,7 @@ struct device_attribute *lpfc_hba_attrs[] = {
&dev_attr_lpfc_fcp_imax,
&dev_attr_lpfc_fcp_cpu_map,
&dev_attr_lpfc_hdw_queue,
+ &dev_attr_lpfc_irq_chann,
&dev_attr_lpfc_suppress_rsp,
&dev_attr_lpfc_nvmet_mrq,
&dev_attr_lpfc_nvmet_mrq_post,
@@ -6876,6 +6875,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb);
lpfc_nvmet_fb_size_init(phba, lpfc_nvmet_fb_size);
lpfc_hdw_queue_init(phba, lpfc_hdw_queue);
+ lpfc_irq_chann_init(phba, lpfc_irq_chann);
lpfc_enable_bbcr_init(phba, lpfc_enable_bbcr);
lpfc_enable_dpp_init(phba, lpfc_enable_dpp);
lpfc_enable_scsi_mq_init(phba, lpfc_enable_scsi_mq);
@@ -6901,6 +6901,10 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
/* A value of 0 means use the number of CPUs found in the system */
if (phba->cfg_hdw_queue == 0)
phba->cfg_hdw_queue = phba->sli4_hba.num_present_cpu;
+ if (phba->cfg_irq_chann == 0)
+ phba->cfg_irq_chann = phba->sli4_hba.num_present_cpu;
+ if (phba->cfg_irq_chann > phba->cfg_hdw_queue)
+ phba->cfg_irq_chann = phba->cfg_hdw_queue;
phba->cfg_soft_wwnn = 0L;
phba->cfg_soft_wwpn = 0L;
@@ -6943,6 +6947,10 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
{
if (phba->cfg_hdw_queue > phba->sli4_hba.num_present_cpu)
phba->cfg_hdw_queue = phba->sli4_hba.num_present_cpu;
+ if (phba->cfg_irq_chann > phba->sli4_hba.num_present_cpu)
+ phba->cfg_irq_chann = phba->sli4_hba.num_present_cpu;
+ if (phba->cfg_irq_chann > phba->cfg_hdw_queue)
+ phba->cfg_irq_chann = phba->cfg_hdw_queue;
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME &&
phba->nvmet_support) {
@@ -6963,11 +6971,11 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
}
if (!phba->cfg_nvmet_mrq)
- phba->cfg_nvmet_mrq = phba->cfg_hdw_queue;
+ phba->cfg_nvmet_mrq = phba->cfg_irq_chann;
/* Adjust lpfc_nvmet_mrq to avoid running out of WQE slots */
- if (phba->cfg_nvmet_mrq > phba->cfg_hdw_queue) {
- phba->cfg_nvmet_mrq = phba->cfg_hdw_queue;
+ if (phba->cfg_nvmet_mrq > phba->cfg_irq_chann) {
+ phba->cfg_nvmet_mrq = phba->cfg_irq_chann;
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
"6018 Adjust lpfc_nvmet_mrq to %d\n",
phba->cfg_nvmet_mrq);
@@ -440,7 +440,6 @@ extern spinlock_t _dump_buf_lock;
extern int _dump_buf_done;
extern spinlock_t pgcnt_lock;
extern unsigned int pgcnt;
-extern unsigned int lpfc_fcp_look_ahead;
/* Interface exported by fabric iocb scheduler */
void lpfc_fabric_abort_nport(struct lpfc_nodelist *);
@@ -378,6 +378,67 @@ lpfc_debugfs_hbqinfo_data(struct lpfc_hba *phba, char *buf, int size)
return len;
}
+static int lpfc_debugfs_last_xripool;
+
+/**
+ * lpfc_debugfs_common_xri_data - Dump Hardware Queue info to a buffer
+ * @phba: The HBA to gather host buffer info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the Hardware Queue info from the @phba to @buf up to
+ * @size number of bytes. A header that describes the current hdwq state will be
+ * dumped to @buf first and then info on each hdwq entry will be dumped to @buf
+ * until @size bytes have been dumped or all the hdwq info has been dumped.
+ *
+ * Notes:
+ * This routine will rotate through each configured Hardware Queue each
+ * time called.
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_commonxripools_data(struct lpfc_hba *phba, char *buf, int size)
+{
+ struct lpfc_sli4_hdw_queue *qp;
+ int len = 0;
+ int i, out;
+ unsigned long iflag;
+
+ for (i = 0; i < phba->cfg_hdw_queue; i++) {
+ if (len > (LPFC_DUMP_MULTIXRIPOOL_SIZE - 80))
+ break;
+ qp = &phba->sli4_hba.hdwq[lpfc_debugfs_last_xripool];
+
+ len += snprintf(buf + len, size - len, "HdwQ %d Info ", i);
+ spin_lock_irqsave(&qp->abts_scsi_buf_list_lock, iflag);
+ spin_lock(&qp->abts_nvme_buf_list_lock);
+ spin_lock(&qp->io_buf_list_get_lock);
+ spin_lock(&qp->io_buf_list_put_lock);
+ out = qp->total_io_bufs - (qp->get_io_bufs + qp->put_io_bufs +
+ qp->abts_scsi_io_bufs + qp->abts_nvme_io_bufs);
+ len += snprintf(buf + len, size - len,
+ "tot:%d get:%d put:%d mt:%d "
+ "ABTS scsi:%d nvme:%d Out:%d\n",
+ qp->total_io_bufs, qp->get_io_bufs, qp->put_io_bufs,
+ qp->empty_io_bufs, qp->abts_scsi_io_bufs,
+ qp->abts_nvme_io_bufs, out);
+ spin_unlock(&qp->io_buf_list_put_lock);
+ spin_unlock(&qp->io_buf_list_get_lock);
+ spin_unlock(&qp->abts_nvme_buf_list_lock);
+ spin_unlock_irqrestore(&qp->abts_scsi_buf_list_lock, iflag);
+
+ lpfc_debugfs_last_xripool++;
+ if (lpfc_debugfs_last_xripool >= phba->cfg_hdw_queue)
+ lpfc_debugfs_last_xripool = 0;
+ }
+
+ return len;
+}
+
/**
* lpfc_debugfs_multixripools_data - Display multi-XRI pools information
* @phba: The HBA to gather host buffer info from.
@@ -405,6 +466,17 @@ lpfc_debugfs_multixripools_data(struct lpfc_hba *phba, char *buf, int size)
u32 txcmplq_cnt;
char tmp[LPFC_DEBUG_OUT_LINE_SZ] = {0};
+ if (phba->sli_rev != LPFC_SLI_REV4)
+ return 0;
+
+ if (!phba->sli4_hba.hdwq)
+ return 0;
+
+ if (!phba->cfg_xri_rebalancing) {
+ i = lpfc_debugfs_commonxripools_data(phba, buf, size);
+ return i;
+ }
+
/*
* Pbl: Current number of free XRIs in public pool
* Pvt: Current number of free XRIs in private pool
@@ -498,10 +570,12 @@ lpfc_debugfs_multixripools_data(struct lpfc_hba *phba, char *buf, int size)
return strnlen(buf, size);
}
-static int lpfc_debugfs_last_hdwq;
+
+#ifdef LPFC_HDWQ_LOCK_STAT
+static int lpfc_debugfs_last_lock;
/**
- * lpfc_debugfs_hdwqinfo_data - Dump Hardware Queue info to a buffer
+ * lpfc_debugfs_lockstat_data - Dump Hardware Queue info to a buffer
* @phba: The HBA to gather host buffer info from.
* @buf: The buffer to dump log into.
* @size: The maximum amount of data to process.
@@ -521,12 +595,11 @@ static int lpfc_debugfs_last_hdwq;
* not exceed @size.
**/
static int
-lpfc_debugfs_hdwqinfo_data(struct lpfc_hba *phba, char *buf, int size)
+lpfc_debugfs_lockstat_data(struct lpfc_hba *phba, char *buf, int size)
{
struct lpfc_sli4_hdw_queue *qp;
int len = 0;
- int i, out;
- unsigned long iflag;
+ int i;
if (phba->sli_rev != LPFC_SLI_REV4)
return 0;
@@ -535,35 +608,40 @@ lpfc_debugfs_hdwqinfo_data(struct lpfc_hba *phba, char *buf, int size)
return 0;
for (i = 0; i < phba->cfg_hdw_queue; i++) {
- if (len > (LPFC_HDWQINFO_SIZE - 80))
+ if (len > (LPFC_HDWQINFO_SIZE - 100))
break;
- qp = &phba->sli4_hba.hdwq[lpfc_debugfs_last_hdwq];
+ qp = &phba->sli4_hba.hdwq[lpfc_debugfs_last_lock];
- len += snprintf(buf + len, size - len, "HdwQ %d Info ", i);
- spin_lock_irqsave(&qp->abts_scsi_buf_list_lock, iflag);
- spin_lock(&qp->abts_nvme_buf_list_lock);
- spin_lock(&qp->io_buf_list_get_lock);
- spin_lock(&qp->io_buf_list_put_lock);
- out = qp->total_io_bufs - (qp->get_io_bufs + qp->put_io_bufs +
- qp->abts_scsi_io_bufs + qp->abts_nvme_io_bufs);
- len += snprintf(buf + len, size - len,
- "tot:%d get:%d put:%d mt:%d "
- "ABTS scsi:%d nvme:%d Out:%d\n",
- qp->total_io_bufs, qp->get_io_bufs, qp->put_io_bufs,
- qp->empty_io_bufs, qp->abts_scsi_io_bufs,
- qp->abts_nvme_io_bufs, out);
- spin_unlock(&qp->io_buf_list_put_lock);
- spin_unlock(&qp->io_buf_list_get_lock);
- spin_unlock(&qp->abts_nvme_buf_list_lock);
- spin_unlock_irqrestore(&qp->abts_scsi_buf_list_lock, iflag);
+ len += snprintf(buf + len, size - len, "HdwQ %03d Lock ", i);
+ if (phba->cfg_xri_rebalancing) {
+ len += snprintf(buf + len, size - len,
+ "get_pvt:%d mv_pvt:%d "
+ "mv2pub:%d mv2pvt:%d "
+ "put_pvt:%d put_pub:%d wq:%d\n",
+ qp->lock_conflict.alloc_pvt_pool,
+ qp->lock_conflict.mv_from_pvt_pool,
+ qp->lock_conflict.mv_to_pub_pool,
+ qp->lock_conflict.mv_to_pvt_pool,
+ qp->lock_conflict.free_pvt_pool,
+ qp->lock_conflict.free_pub_pool,
+ qp->lock_conflict.wq_access);
+ } else {
+ len += snprintf(buf + len, size - len,
+ "get:%d put:%d free:%d wq:%d\n",
+ qp->lock_conflict.alloc_xri_get,
+ qp->lock_conflict.alloc_xri_put,
+ qp->lock_conflict.free_xri,
+ qp->lock_conflict.wq_access);
+ }
- lpfc_debugfs_last_hdwq++;
- if (lpfc_debugfs_last_hdwq >= phba->cfg_hdw_queue)
- lpfc_debugfs_last_hdwq = 0;
+ lpfc_debugfs_last_lock++;
+ if (lpfc_debugfs_last_lock >= phba->cfg_hdw_queue)
+ lpfc_debugfs_last_lock = 0;
}
return len;
}
+#endif
static int lpfc_debugfs_last_hba_slim_off;
@@ -962,7 +1040,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
struct lpfc_nvme_lport *lport;
uint64_t data1, data2, data3;
uint64_t tot, totin, totout;
- int cnt, i, maxch;
+ int cnt, i;
int len = 0;
if (phba->nvmet_support) {
@@ -1104,10 +1182,6 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
atomic_read(&lport->fc4NvmeLsRequests),
atomic_read(&lport->fc4NvmeLsCmpls));
- if (phba->cfg_hdw_queue < LPFC_HBA_HDWQ_MAX)
- maxch = phba->cfg_hdw_queue;
- else
- maxch = LPFC_HBA_HDWQ_MAX;
totin = 0;
totout = 0;
for (i = 0; i < phba->cfg_hdw_queue; i++) {
@@ -1545,7 +1619,7 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
{
struct lpfc_hba *phba = vport->phba;
struct lpfc_sli4_hdw_queue *qp;
- int i, j;
+ int i, j, max_cnt;
int len = 0;
uint32_t tot_xmt;
uint32_t tot_rcv;
@@ -1563,6 +1637,7 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
} else {
len += snprintf(buf + len, PAGE_SIZE - len, "\n");
}
+ max_cnt = size - LPFC_DEBUG_OUT_LINE_SZ;
for (i = 0; i < phba->cfg_hdw_queue; i++) {
qp = &phba->sli4_hba.hdwq[i];
@@ -1604,6 +1679,11 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
}
len += snprintf(buf + len, PAGE_SIZE - len,
"Total: %x\n", tot_xmt);
+ if (len >= max_cnt) {
+ len += snprintf(buf + len, PAGE_SIZE - len,
+ "Truncated ...\n");
+ return len;
+ }
}
return len;
}
@@ -1902,11 +1982,8 @@ lpfc_debugfs_multixripools_open(struct inode *inode, struct file *file)
goto out;
}
- if (phba->cfg_xri_rebalancing)
- debug->len = lpfc_debugfs_multixripools_data(
- phba, debug->buffer, LPFC_DUMP_MULTIXRIPOOL_SIZE);
- else
- debug->len = 0;
+ debug->len = lpfc_debugfs_multixripools_data(
+ phba, debug->buffer, LPFC_DUMP_MULTIXRIPOOL_SIZE);
debug->i_private = inode->i_private;
file->private_data = debug;
@@ -1916,8 +1993,9 @@ lpfc_debugfs_multixripools_open(struct inode *inode, struct file *file)
return rc;
}
+#ifdef LPFC_HDWQ_LOCK_STAT
/**
- * lpfc_debugfs_hdwqinfo_open - Open the hdwqinfo debugfs buffer
+ * lpfc_debugfs_lockstat_open - Open the lockstat debugfs buffer
* @inode: The inode pointer that contains a vport pointer.
* @file: The file pointer to attach the log output.
*
@@ -1932,7 +2010,7 @@ lpfc_debugfs_multixripools_open(struct inode *inode, struct file *file)
* error value.
**/
static int
-lpfc_debugfs_hdwqinfo_open(struct inode *inode, struct file *file)
+lpfc_debugfs_lockstat_open(struct inode *inode, struct file *file)
{
struct lpfc_hba *phba = inode->i_private;
struct lpfc_debug *debug;
@@ -1949,7 +2027,7 @@ lpfc_debugfs_hdwqinfo_open(struct inode *inode, struct file *file)
goto out;
}
- debug->len = lpfc_debugfs_hdwqinfo_data(phba, debug->buffer,
+ debug->len = lpfc_debugfs_lockstat_data(phba, debug->buffer,
LPFC_HBQINFO_SIZE);
file->private_data = debug;
@@ -1958,6 +2036,48 @@ lpfc_debugfs_hdwqinfo_open(struct inode *inode, struct file *file)
return rc;
}
+static ssize_t
+lpfc_debugfs_lockstat_write(struct file *file, const char __user *buf,
+ size_t nbytes, loff_t *ppos)
+{
+ struct lpfc_debug *debug = file->private_data;
+ struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
+ struct lpfc_sli4_hdw_queue *qp;
+ char mybuf[64];
+ char *pbuf;
+ int i;
+
+ /* Protect copy from user */
+ if (!access_ok(VERIFY_READ, buf, nbytes))
+ return -EFAULT;
+
+ memset(mybuf, 0, sizeof(mybuf));
+
+ if (copy_from_user(mybuf, buf, nbytes))
+ return -EFAULT;
+ pbuf = &mybuf[0];
+
+ if ((strncmp(pbuf, "reset", strlen("reset")) == 0) ||
+ (strncmp(pbuf, "zero", strlen("zero")) == 0)) {
+ for (i = 0; i < phba->cfg_hdw_queue; i++) {
+ qp = &phba->sli4_hba.hdwq[i];
+ qp->lock_conflict.alloc_xri_get = 0;
+ qp->lock_conflict.alloc_xri_put = 0;
+ qp->lock_conflict.free_xri = 0;
+ qp->lock_conflict.wq_access = 0;
+ qp->lock_conflict.alloc_pvt_pool = 0;
+ qp->lock_conflict.mv_from_pvt_pool = 0;
+ qp->lock_conflict.mv_to_pub_pool = 0;
+ qp->lock_conflict.mv_to_pvt_pool = 0;
+ qp->lock_conflict.free_pvt_pool = 0;
+ qp->lock_conflict.free_pub_pool = 0;
+ qp->lock_conflict.wq_access = 0;
+ }
+ }
+ return nbytes;
+}
+#endif
+
/**
* lpfc_debugfs_dumpHBASlim_open - Open the Dump HBA SLIM debugfs buffer
* @inode: The inode pointer that contains a vport pointer.
@@ -2814,7 +2934,7 @@ lpfc_debugfs_cpucheck_open(struct inode *inode, struct file *file)
}
debug->len = lpfc_debugfs_cpucheck_data(vport, debug->buffer,
- LPFC_NVMEKTIME_SIZE);
+ LPFC_CPUCHECK_SIZE);
debug->i_private = inode->i_private;
file->private_data = debug;
@@ -2849,8 +2969,18 @@ lpfc_debugfs_cpucheck_write(struct file *file, const char __user *buf,
if (phba->nvmet_support)
phba->cpucheck_on |= LPFC_CHECK_NVMET_IO;
else
+ phba->cpucheck_on |= (LPFC_CHECK_NVME_IO |
+ LPFC_CHECK_SCSI_IO);
+ return strlen(pbuf);
+ } else if ((strncmp(pbuf, "nvme_on", sizeof("nvme_on") - 1) == 0)) {
+ if (phba->nvmet_support)
+ phba->cpucheck_on |= LPFC_CHECK_NVMET_IO;
+ else
phba->cpucheck_on |= LPFC_CHECK_NVME_IO;
return strlen(pbuf);
+ } else if ((strncmp(pbuf, "scsi_on", sizeof("scsi_on") - 1) == 0)) {
+ phba->cpucheck_on |= LPFC_CHECK_SCSI_IO;
+ return strlen(pbuf);
} else if ((strncmp(pbuf, "rcv",
sizeof("rcv") - 1) == 0)) {
if (phba->nvmet_support)
@@ -3730,46 +3860,38 @@ lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
int *len, int max_cnt, int eqidx, int eq_id)
{
struct lpfc_queue *qp;
- int qidx, rc;
+ int rc;
- for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
- qp = phba->sli4_hba.hdwq[qidx].fcp_cq;
- if (qp->assoc_qid != eq_id)
- continue;
+ qp = phba->sli4_hba.hdwq[eqidx].fcp_cq;
- *len = __lpfc_idiag_print_cq(qp, "FCP", pbuffer, *len);
+ *len = __lpfc_idiag_print_cq(qp, "FCP", pbuffer, *len);
- /* Reset max counter */
- qp->CQ_max_cqe = 0;
+ /* Reset max counter */
+ qp->CQ_max_cqe = 0;
- if (*len >= max_cnt)
- return 1;
+ if (*len >= max_cnt)
+ return 1;
- rc = lpfc_idiag_wqs_for_cq(phba, "FCP", pbuffer, len,
- max_cnt, qp->queue_id);
- if (rc)
- return 1;
- }
+ rc = lpfc_idiag_wqs_for_cq(phba, "FCP", pbuffer, len,
+ max_cnt, qp->queue_id);
+ if (rc)
+ return 1;
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
- for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
- qp = phba->sli4_hba.hdwq[qidx].nvme_cq;
- if (qp->assoc_qid != eq_id)
- continue;
+ qp = phba->sli4_hba.hdwq[eqidx].nvme_cq;
- *len = __lpfc_idiag_print_cq(qp, "NVME", pbuffer, *len);
+ *len = __lpfc_idiag_print_cq(qp, "NVME", pbuffer, *len);
- /* Reset max counter */
- qp->CQ_max_cqe = 0;
+ /* Reset max counter */
+ qp->CQ_max_cqe = 0;
- if (*len >= max_cnt)
- return 1;
+ if (*len >= max_cnt)
+ return 1;
- rc = lpfc_idiag_wqs_for_cq(phba, "NVME", pbuffer, len,
- max_cnt, qp->queue_id);
- if (rc)
- return 1;
- }
+ rc = lpfc_idiag_wqs_for_cq(phba, "NVME", pbuffer, len,
+ max_cnt, qp->queue_id);
+ if (rc)
+ return 1;
}
if ((eqidx < phba->cfg_nvmet_mrq) && phba->nvmet_support) {
@@ -3810,9 +3932,10 @@ __lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype,
(unsigned long long)qp->q_cnt_4, qp->q_mode);
len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
"EQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
- "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
+ "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d] AFFIN[%03d]",
qp->queue_id, qp->entry_count, qp->entry_size,
- qp->host_index, qp->hba_index, qp->entry_repost);
+ qp->host_index, qp->hba_index, qp->entry_repost,
+ qp->chann);
len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
return len;
@@ -3867,7 +3990,7 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
phba->lpfc_idiag_last_eq = 0;
len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
- "EQ %d out of %d HBA EQs\n",
+ "HDWQ %d out of %d HBA HDWQs\n",
x, phba->cfg_hdw_queue);
/* Fast-path EQ */
@@ -5297,14 +5420,17 @@ static const struct file_operations lpfc_debugfs_op_hbqinfo = {
.release = lpfc_debugfs_release,
};
-#undef lpfc_debugfs_op_hdwqinfo
-static const struct file_operations lpfc_debugfs_op_hdwqinfo = {
+#ifdef LPFC_HDWQ_LOCK_STAT
+#undef lpfc_debugfs_op_lockstat
+static const struct file_operations lpfc_debugfs_op_lockstat = {
.owner = THIS_MODULE,
- .open = lpfc_debugfs_hdwqinfo_open,
+ .open = lpfc_debugfs_lockstat_open,
.llseek = lpfc_debugfs_lseek,
.read = lpfc_debugfs_read,
+ .write = lpfc_debugfs_lockstat_write,
.release = lpfc_debugfs_release,
};
+#endif
#undef lpfc_debugfs_op_dumpHBASlim
static const struct file_operations lpfc_debugfs_op_dumpHBASlim = {
@@ -5769,17 +5895,19 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
goto debug_failed;
}
- /* Setup hdwqinfo */
- snprintf(name, sizeof(name), "hdwqinfo");
- phba->debug_hdwqinfo =
+#ifdef LPFC_HDWQ_LOCK_STAT
+ /* Setup lockstat */
+ snprintf(name, sizeof(name), "lockstat");
+ phba->debug_lockstat =
debugfs_create_file(name, S_IFREG | 0644,
phba->hba_debugfs_root,
- phba, &lpfc_debugfs_op_hdwqinfo);
- if (!phba->debug_hdwqinfo) {
+ phba, &lpfc_debugfs_op_lockstat);
+ if (!phba->debug_lockstat) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
- "0511 Cant create debugfs hdwqinfo\n");
+ "0913 Cant create debugfs lockstat\n");
goto debug_failed;
}
+#endif
/* Setup dumpHBASlim */
if (phba->sli_rev < LPFC_SLI_REV4) {
@@ -6118,7 +6246,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
vport, &lpfc_debugfs_op_scsistat);
if (!vport->debug_scsistat) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
- "0811 Cannot create debugfs scsistat\n");
+ "0914 Cannot create debugfs scsistat\n");
goto debug_failed;
}
@@ -6339,9 +6467,10 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
debugfs_remove(phba->debug_hbqinfo); /* hbqinfo */
phba->debug_hbqinfo = NULL;
- debugfs_remove(phba->debug_hdwqinfo); /* hdwqinfo */
- phba->debug_hdwqinfo = NULL;
-
+#ifdef LPFC_HDWQ_LOCK_STAT
+ debugfs_remove(phba->debug_lockstat); /* lockstat */
+ phba->debug_lockstat = NULL;
+#endif
debugfs_remove(phba->debug_dumpHBASlim); /* HBASlim */
phba->debug_dumpHBASlim = NULL;
@@ -290,9 +290,6 @@ struct lpfc_idiag {
/* multixripool output buffer size */
#define LPFC_DUMP_MULTIXRIPOOL_SIZE 8192
-/* hdwqinfo output buffer size */
-#define LPFC_HDWQINFO_SIZE 8192
-
enum {
DUMP_FCP,
DUMP_NVME,
@@ -211,9 +211,8 @@ struct lpfc_sli_intf {
#define LPFC_DEF_IMAX 150000
#define LPFC_MIN_CPU_MAP 0
-#define LPFC_MAX_CPU_MAP 2
+#define LPFC_MAX_CPU_MAP 1
#define LPFC_HBA_CPU_MAP 1
-#define LPFC_DRIVER_CPU_MAP 2 /* Default */
/* PORT_CAPABILITIES constants. */
#define LPFC_MAX_SUPPORTED_PAGES 8
@@ -37,6 +37,7 @@
#include <linux/miscdevice.h>
#include <linux/percpu.h>
#include <linux/msi.h>
+#include <linux/irq.h>
#include <linux/bitops.h>
#include <scsi/scsi.h>
@@ -92,6 +93,8 @@ static void lpfc_sli4_cq_event_release_all(struct lpfc_hba *);
static void lpfc_sli4_disable_intr(struct lpfc_hba *);
static uint32_t lpfc_sli4_enable_intr(struct lpfc_hba *, uint32_t);
static void lpfc_sli4_oas_verify(struct lpfc_hba *phba);
+static uint16_t lpfc_find_eq_handle(struct lpfc_hba *, uint16_t);
+static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int);
static struct scsi_transport_template *lpfc_transport_template = NULL;
static struct scsi_transport_template *lpfc_vport_transport_template = NULL;
@@ -1363,13 +1366,13 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
}
/* Interrupts per sec per EQ */
- val = phba->cfg_fcp_imax / phba->cfg_hdw_queue;
+ val = phba->cfg_fcp_imax / phba->cfg_irq_chann;
tick_cqe = val / CONFIG_HZ; /* Per tick per EQ */
/* Assume 1 CQE/ISR, calc max CQEs allowed for time duration */
max_cqe = time_elapsed * tick_cqe;
- for (i = 0; i < phba->cfg_hdw_queue; i++) {
+ for (i = 0; i < phba->cfg_irq_chann; i++) {
/* Fast-path EQ */
qp = phba->sli4_hba.hdwq[i].hba_eq;
if (!qp)
@@ -1393,7 +1396,7 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
if (val) {
/* First, interrupts per sec per EQ */
val = phba->cfg_fcp_imax /
- phba->cfg_hdw_queue;
+ phba->cfg_irq_chann;
/* us delay between each interrupt */
val = LPFC_SEC_TO_USEC / val;
@@ -4331,9 +4334,16 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
shost->max_lun = vport->cfg_max_luns;
shost->this_id = -1;
shost->max_cmd_len = 16;
- if (shost_use_blk_mq(shost) && phba->cfg_enable_scsi_mq) {
+
+ /* Advertise how many hw queues we support based on fcp_io_sched
+ * and if SCSI mq is turned on.
+ */
+ if ((phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) &&
+ (shost_use_blk_mq(shost) && phba->cfg_enable_scsi_mq))
shost->nr_hw_queues = phba->cfg_hdw_queue;
- }
+ else
+ shost->nr_hw_queues = phba->sli4_hba.num_present_cpu;
+
if (phba->sli_rev == LPFC_SLI_REV4) {
shost->dma_boundary =
phba->sli4_hba.pc_sli4_params.sge_supp_len-1;
@@ -6813,7 +6823,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
goto out_remove_rpi_hdrs;
}
- phba->sli4_hba.hba_eq_hdl = kcalloc(phba->cfg_hdw_queue,
+ phba->sli4_hba.hba_eq_hdl = kcalloc(phba->cfg_irq_chann,
sizeof(struct lpfc_hba_eq_hdl),
GFP_KERNEL);
if (!phba->sli4_hba.hba_eq_hdl) {
@@ -8251,7 +8261,7 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
struct lpfc_rsrc_desc_fcfcoe *desc;
char *pdesc_0;
uint16_t forced_link_speed;
- uint32_t if_type;
+ uint32_t if_type, qmin;
int length, i, rc = 0, rc2;
pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -8356,40 +8366,44 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
phba->sli4_hba.max_cfg_param.max_rq);
/*
- * Calculate NVME queue resources based on how
- * many WQ/CQs are available.
+ * Calculate queue resources based on how
+ * many WQ/CQ/EQs are available.
*/
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
- length = phba->sli4_hba.max_cfg_param.max_wq;
- if (phba->sli4_hba.max_cfg_param.max_cq <
- phba->sli4_hba.max_cfg_param.max_wq)
- length = phba->sli4_hba.max_cfg_param.max_cq;
+ qmin = phba->sli4_hba.max_cfg_param.max_wq;
+ if (phba->sli4_hba.max_cfg_param.max_cq < qmin)
+ qmin = phba->sli4_hba.max_cfg_param.max_cq;
+ if (phba->sli4_hba.max_cfg_param.max_eq < qmin)
+ qmin = phba->sli4_hba.max_cfg_param.max_eq;
+ /*
+ * Whats left after this can go toward NVME / FCP.
+ * The minus 4 accounts for ELS, NVME LS, MBOX
+ * plus one extra. When configured for
+ * NVMET, FCP io channel WQs are not created.
+ */
+ qmin -= 4;
- /*
- * Whats left after this can go toward NVME.
- * The minus 6 accounts for ELS, NVME LS, MBOX
- * plus a couple extra. When configured for
- * NVMET, FCP io channel WQs are not created.
- */
- length -= 6;
-
- /* Take off FCP queues */
- if (!phba->nvmet_support)
- length -= phba->cfg_hdw_queue;
-
- /* Check to see if there is enough for NVME */
- if (phba->cfg_hdw_queue > length) {
- lpfc_printf_log(
- phba, KERN_ERR, LOG_SLI,
- "2005 Reducing NVME IO channel to %d: "
- "WQ %d CQ %d CommonIO %d\n",
- length,
+ /* If NVME is configured double the number of CQ/WQs needed */
+ if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
+ !phba->nvmet_support)
+ qmin /= 2;
+
+ /* Check to see if there is enough for NVME */
+ if ((phba->cfg_irq_chann > qmin) ||
+ (phba->cfg_hdw_queue > qmin)) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ "2005 Reducing Queues: "
+ "WQ %d CQ %d EQ %d: min %d: "
+ "IRQ %d HDWQ %d\n",
phba->sli4_hba.max_cfg_param.max_wq,
phba->sli4_hba.max_cfg_param.max_cq,
+ phba->sli4_hba.max_cfg_param.max_eq,
+ qmin, phba->cfg_irq_chann,
phba->cfg_hdw_queue);
- phba->cfg_hdw_queue = length;
- }
+ if (phba->cfg_irq_chann > qmin)
+ phba->cfg_irq_chann = qmin;
+ if (phba->cfg_hdw_queue > qmin)
+ phba->cfg_hdw_queue = qmin;
}
}
@@ -8606,25 +8620,17 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba)
* device parameters
*/
- if (phba->cfg_hdw_queue > phba->sli4_hba.max_cfg_param.max_eq) {
- lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "2575 Reducing IO channels to match number of "
- "available EQs: from %d to %d\n",
- phba->cfg_hdw_queue,
- phba->sli4_hba.max_cfg_param.max_eq);
- phba->cfg_hdw_queue = phba->sli4_hba.max_cfg_param.max_eq;
- }
-
if (phba->nvmet_support) {
- if (phba->cfg_hdw_queue < phba->cfg_nvmet_mrq)
- phba->cfg_nvmet_mrq = phba->cfg_hdw_queue;
+ if (phba->cfg_irq_chann < phba->cfg_nvmet_mrq)
+ phba->cfg_nvmet_mrq = phba->cfg_irq_chann;
}
if (phba->cfg_nvmet_mrq > LPFC_NVMET_MRQ_MAX)
phba->cfg_nvmet_mrq = LPFC_NVMET_MRQ_MAX;
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "2574 IO channels: hdwQ %d MRQ: %d\n",
- phba->cfg_hdw_queue, phba->cfg_nvmet_mrq);
+ "2574 IO channels: hdwQ %d IRQ %d MRQ: %d\n",
+ phba->cfg_hdw_queue, phba->cfg_irq_chann,
+ phba->cfg_nvmet_mrq);
/* Get EQ depth from module parameter, fake the default for now */
phba->sli4_hba.eq_esize = LPFC_EQE_SIZE_4B;
@@ -8652,6 +8658,7 @@ lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
}
qdesc->qe_valid = 1;
qdesc->hdwq = wqidx;
+ qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
phba->sli4_hba.hdwq[wqidx].nvme_cq = qdesc;
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
@@ -8663,6 +8670,7 @@ lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
return 1;
}
qdesc->hdwq = wqidx;
+ qdesc->chann = wqidx;
phba->sli4_hba.hdwq[wqidx].nvme_wq = qdesc;
list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
return 0;
@@ -8692,6 +8700,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
}
qdesc->qe_valid = 1;
qdesc->hdwq = wqidx;
+ qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
phba->sli4_hba.hdwq[wqidx].fcp_cq = qdesc;
/* Create Fast Path FCP WQs */
@@ -8714,6 +8723,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
return 1;
}
qdesc->hdwq = wqidx;
+ qdesc->chann = wqidx;
phba->sli4_hba.hdwq[wqidx].fcp_wq = qdesc;
list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
return 0;
@@ -8737,7 +8747,7 @@ int
lpfc_sli4_queue_create(struct lpfc_hba *phba)
{
struct lpfc_queue *qdesc;
- int idx;
+ int idx, eqidx;
struct lpfc_sli4_hdw_queue *qp;
/*
@@ -8823,7 +8833,18 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
/* Create HBA Event Queues (EQs) */
for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
- /* Create EQs */
+ /*
+ * If there are more Hardware Queues then available
+ * CQs, multiple Hardware Queues may share a common EQ.
+ */
+ if (idx >= phba->cfg_irq_chann) {
+ /* Share an existing EQ */
+ eqidx = lpfc_find_eq_handle(phba, idx);
+ phba->sli4_hba.hdwq[idx].hba_eq =
+ phba->sli4_hba.hdwq[eqidx].hba_eq;
+ continue;
+ }
+ /* Create an EQ */
qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
phba->sli4_hba.eq_esize,
phba->sli4_hba.eq_ecount);
@@ -8834,20 +8855,27 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
}
qdesc->qe_valid = 1;
qdesc->hdwq = idx;
+
+ /* Save the CPU this EQ is affinitised to */
+ eqidx = lpfc_find_eq_handle(phba, idx);
+ qdesc->chann = lpfc_find_cpu_handle(phba, eqidx,
+ LPFC_FIND_BY_EQ);
phba->sli4_hba.hdwq[idx].hba_eq = qdesc;
}
/* Allocate SCSI SLI4 CQ/WQs */
- for (idx = 0; idx < phba->cfg_hdw_queue; idx++)
+ for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
if (lpfc_alloc_fcp_wq_cq(phba, idx))
goto out_error;
+ }
/* Allocate NVME SLI4 CQ/WQs */
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
- for (idx = 0; idx < phba->cfg_hdw_queue; idx++)
+ for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
if (lpfc_alloc_nvme_wq_cq(phba, idx))
goto out_error;
+ }
if (phba->nvmet_support) {
for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
@@ -8865,6 +8893,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
}
qdesc->qe_valid = 1;
qdesc->hdwq = idx;
+ qdesc->chann = idx;
phba->sli4_hba.nvmet_cqset[idx] = qdesc;
}
}
@@ -8896,6 +8925,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
goto out_error;
}
qdesc->qe_valid = 1;
+ qdesc->chann = 0;
phba->sli4_hba.els_cq = qdesc;
@@ -8913,6 +8943,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
"0505 Failed allocate slow-path MQ\n");
goto out_error;
}
+ qdesc->chann = 0;
phba->sli4_hba.mbx_wq = qdesc;
/*
@@ -8928,6 +8959,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
"0504 Failed allocate slow-path ELS WQ\n");
goto out_error;
}
+ qdesc->chann = 0;
phba->sli4_hba.els_wq = qdesc;
list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
@@ -8941,6 +8973,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
"6079 Failed allocate NVME LS CQ\n");
goto out_error;
}
+ qdesc->chann = 0;
qdesc->qe_valid = 1;
phba->sli4_hba.nvmels_cq = qdesc;
@@ -8953,6 +8986,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
"6080 Failed allocate NVME LS WQ\n");
goto out_error;
}
+ qdesc->chann = 0;
phba->sli4_hba.nvmels_wq = qdesc;
list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
}
@@ -9079,17 +9113,21 @@ lpfc_sli4_release_queues(struct lpfc_queue ***qs, int max)
}
static inline void
-lpfc_sli4_release_hdwq(struct lpfc_sli4_hdw_queue *hdwq, int max)
+lpfc_sli4_release_hdwq(struct lpfc_hba *phba)
{
+ struct lpfc_sli4_hdw_queue *hdwq;
uint32_t idx;
- for (idx = 0; idx < max; idx++) {
- lpfc_sli4_queue_free(hdwq[idx].hba_eq);
+ hdwq = phba->sli4_hba.hdwq;
+ for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+ if (idx < phba->cfg_irq_chann)
+ lpfc_sli4_queue_free(hdwq[idx].hba_eq);
+ hdwq[idx].hba_eq = NULL;
+
lpfc_sli4_queue_free(hdwq[idx].fcp_cq);
lpfc_sli4_queue_free(hdwq[idx].nvme_cq);
lpfc_sli4_queue_free(hdwq[idx].fcp_wq);
lpfc_sli4_queue_free(hdwq[idx].nvme_wq);
- hdwq[idx].hba_eq = NULL;
hdwq[idx].fcp_cq = NULL;
hdwq[idx].nvme_cq = NULL;
hdwq[idx].fcp_wq = NULL;
@@ -9114,8 +9152,7 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
{
/* Release HBA eqs */
if (phba->sli4_hba.hdwq)
- lpfc_sli4_release_hdwq(phba->sli4_hba.hdwq,
- phba->cfg_hdw_queue);
+ lpfc_sli4_release_hdwq(phba);
if (phba->nvmet_support) {
lpfc_sli4_release_queues(&phba->sli4_hba.nvmet_cqset,
@@ -9196,7 +9233,6 @@ lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq,
qidx, (uint32_t)rc);
return rc;
}
- cq->chann = qidx;
if (qtype != LPFC_MBOX) {
/* Setup cq_map for fast lookup */
@@ -9216,7 +9252,6 @@ lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq,
/* no need to tear down cq - caller will do so */
return rc;
}
- wq->chann = qidx;
/* Bind this CQ/WQ to the NVME ring */
pring = wq->pring;
@@ -9246,6 +9281,38 @@ lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq,
}
/**
+ * lpfc_setup_cq_lookup - Setup the CQ lookup table
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine will populate the cq_lookup table by all
+ * available CQ queue_id's.
+ **/
+void
+lpfc_setup_cq_lookup(struct lpfc_hba *phba)
+{
+ struct lpfc_queue *eq, *childq;
+ struct lpfc_sli4_hdw_queue *qp;
+ int qidx;
+
+ qp = phba->sli4_hba.hdwq;
+ memset(phba->sli4_hba.cq_lookup, 0,
+ (sizeof(struct lpfc_queue *) * (phba->sli4_hba.cq_max + 1)));
+ for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
+ eq = qp[qidx].hba_eq;
+ if (!eq)
+ continue;
+ list_for_each_entry(childq, &eq->child_list, list) {
+ if (childq->queue_id > phba->sli4_hba.cq_max)
+ continue;
+ if ((childq->subtype == LPFC_FCP) ||
+ (childq->subtype == LPFC_NVME))
+ phba->sli4_hba.cq_lookup[childq->queue_id] =
+ childq;
+ }
+ }
+}
+
+/**
* lpfc_sli4_queue_setup - Set up all the SLI4 queues
* @phba: pointer to lpfc hba data structure.
*
@@ -9325,7 +9392,7 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
rc = -ENOMEM;
goto out_error;
}
- for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+ for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
if (!qp[qidx].hba_eq) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"0522 Fast-path EQ (%d) not "
@@ -9572,11 +9639,23 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
phba->sli4_hba.dat_rq->queue_id,
phba->sli4_hba.els_cq->queue_id);
- for (qidx = 0; qidx < phba->cfg_hdw_queue;
+ for (qidx = 0; qidx < phba->cfg_irq_chann;
qidx += LPFC_MAX_EQ_DELAY_EQID_CNT)
lpfc_modify_hba_eq_delay(phba, qidx, LPFC_MAX_EQ_DELAY_EQID_CNT,
phba->cfg_fcp_imax);
+ if (phba->sli4_hba.cq_max) {
+ kfree(phba->sli4_hba.cq_lookup);
+ phba->sli4_hba.cq_lookup = kcalloc((phba->sli4_hba.cq_max + 1),
+ sizeof(struct lpfc_queue *), GFP_KERNEL);
+ if (!phba->sli4_hba.cq_lookup) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "0549 Failed setup of CQ Lookup table: "
+ "size 0x%x\n", phba->sli4_hba.cq_max);
+ goto out_destroy;
+ }
+ lpfc_setup_cq_lookup(phba);
+ }
return 0;
out_destroy:
@@ -9658,9 +9737,14 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba)
lpfc_wq_destroy(phba, qp->nvme_wq);
lpfc_cq_destroy(phba, qp->fcp_cq);
lpfc_cq_destroy(phba, qp->nvme_cq);
- lpfc_eq_destroy(phba, qp->hba_eq);
+ if (qidx < phba->cfg_irq_chann)
+ lpfc_eq_destroy(phba, qp->hba_eq);
}
}
+
+ kfree(phba->sli4_hba.cq_lookup);
+ phba->sli4_hba.cq_lookup = NULL;
+ phba->sli4_hba.cq_max = 0;
}
/**
@@ -10440,22 +10524,198 @@ lpfc_sli_disable_intr(struct lpfc_hba *phba)
}
/**
+ * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified EQ
+ * @phba: pointer to lpfc hba data structure.
+ * @id: EQ vector index or Hardware Queue index
+ * @match: LPFC_FIND_BY_EQ = match by EQ
+ * LPFC_FIND_BY_HDWQ = match by Hardware Queue
+ */
+static uint16_t
+lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match)
+{
+ struct lpfc_vector_map_info *cpup;
+ int cpu;
+
+ /* Find the desired phys_id for the specified EQ */
+ cpup = phba->sli4_hba.cpu_map;
+ for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+ if ((match == LPFC_FIND_BY_EQ) &&
+ (cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
+ (cpup->eq == id))
+ return cpu;
+ if ((match == LPFC_FIND_BY_HDWQ) && (cpup->hdwq == id))
+ return cpu;
+ cpup++;
+ }
+ return 0;
+}
+
+/**
+ * lpfc_find_eq_handle - Find the EQ that corresponds to the specified
+ * Hardware Queue
+ * @phba: pointer to lpfc hba data structure.
+ * @hdwq: Hardware Queue index
+ */
+static uint16_t
+lpfc_find_eq_handle(struct lpfc_hba *phba, uint16_t hdwq)
+{
+ struct lpfc_vector_map_info *cpup;
+ int cpu;
+
+ /* Find the desired phys_id for the specified EQ */
+ cpup = phba->sli4_hba.cpu_map;
+ for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+ if (cpup->hdwq == hdwq)
+ return cpup->eq;
+ cpup++;
+ }
+ return 0;
+}
+
+/**
+ * lpfc_find_phys_id_eq - Find the next EQ that corresponds to the specified
+ * Physical Id.
+ * @phba: pointer to lpfc hba data structure.
+ * @eqidx: EQ index
+ * @phys_id: CPU package physical id
+ */
+static uint16_t
+lpfc_find_phys_id_eq(struct lpfc_hba *phba, uint16_t eqidx, uint16_t phys_id)
+{
+ struct lpfc_vector_map_info *cpup;
+ int cpu, desired_phys_id;
+
+ desired_phys_id = LPFC_VECTOR_MAP_EMPTY;
+
+ /* Find the desired phys_id for the specified EQ */
+ cpup = phba->sli4_hba.cpu_map;
+ for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+ if ((cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
+ (cpup->eq == eqidx)) {
+ desired_phys_id = cpup->phys_id;
+ break;
+ }
+ cpup++;
+ }
+ if (phys_id == desired_phys_id)
+ return eqidx;
+
+ /* Find a EQ thats on the specified phys_id */
+ cpup = phba->sli4_hba.cpu_map;
+ for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+ if ((cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
+ (cpup->phys_id == phys_id))
+ return cpup->eq;
+ cpup++;
+ }
+ return 0;
+}
+
+/**
+ * lpfc_find_cpu_map - Find next available CPU map entry that matches the
+ * phys_id and core_id.
+ * @phba: pointer to lpfc hba data structure.
+ * @phys_id: CPU package physical id
+ * @core_id: CPU core id
+ * @hdwqidx: Hardware Queue index
+ * @eqidx: EQ index
+ * @isr_avail: Should an IRQ be associated with this entry
+ */
+static struct lpfc_vector_map_info *
+lpfc_find_cpu_map(struct lpfc_hba *phba, uint16_t phys_id, uint16_t core_id,
+ uint16_t hdwqidx, uint16_t eqidx, int isr_avail)
+{
+ struct lpfc_vector_map_info *cpup;
+ int cpu;
+
+ cpup = phba->sli4_hba.cpu_map;
+ for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+ /* Does the cpup match the one we are looking for */
+ if ((cpup->phys_id == phys_id) &&
+ (cpup->core_id == core_id)) {
+ /* If it has been already assigned, then skip it */
+ if (cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) {
+ cpup++;
+ continue;
+ }
+ /* Ensure we are on the same phys_id as the first one */
+ if (!isr_avail)
+ cpup->eq = lpfc_find_phys_id_eq(phba, eqidx,
+ phys_id);
+ else
+ cpup->eq = eqidx;
+
+ cpup->hdwq = hdwqidx;
+ if (isr_avail) {
+ cpup->irq =
+ pci_irq_vector(phba->pcidev, eqidx);
+
+ /* Now affinitize to the selected CPU */
+ irq_set_affinity_hint(cpup->irq,
+ get_cpu_mask(cpu));
+ irq_set_status_flags(cpup->irq,
+ IRQ_NO_BALANCING);
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "3330 Set Affinity: CPU %d "
+ "EQ %d irq %d (HDWQ %x)\n",
+ cpu, cpup->eq,
+ cpup->irq, cpup->hdwq);
+ }
+ return cpup;
+ }
+ cpup++;
+ }
+ return 0;
+}
+
+#ifdef CONFIG_X86
+/**
+ * lpfc_find_hyper - Determine if the CPU map entry is hyper-threaded
+ * @phba: pointer to lpfc hba data structure.
+ * @cpu: CPU map index
+ * @phys_id: CPU package physical id
+ * @core_id: CPU core id
+ */
+static int
+lpfc_find_hyper(struct lpfc_hba *phba, int cpu,
+ uint16_t phys_id, uint16_t core_id)
+{
+ struct lpfc_vector_map_info *cpup;
+ int idx;
+
+ cpup = phba->sli4_hba.cpu_map;
+ for (idx = 0; idx < phba->sli4_hba.num_present_cpu; idx++) {
+ /* Does the cpup match the one we are looking for */
+ if ((cpup->phys_id == phys_id) &&
+ (cpup->core_id == core_id) &&
+ (cpu != idx)) {
+ return 1;
+ }
+ cpup++;
+ }
+ return 0;
+}
+#endif
+
+/**
* lpfc_cpu_affinity_check - Check vector CPU affinity mappings
* @phba: pointer to lpfc hba data structure.
+ * @vectors: number of msix vectors allocated.
*
* The routine will figure out the CPU affinity assignment for every
- * MSI-X vector allocated for the HBA. The hba_eq_hdl will be updated
- * with a pointer to the CPU mask that defines ALL the CPUs this vector
- * can be associated with. If the vector can be unquely associated with
- * a single CPU, that CPU will be recorded in hba_eq_hdl[index].cpu.
+ * MSI-X vector allocated for the HBA.
* In addition, the CPU to IO channel mapping will be calculated
* and the phba->sli4_hba.cpu_map array will reflect this.
*/
static void
-lpfc_cpu_affinity_check(struct lpfc_hba *phba)
+lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
{
+ int i, j, idx, phys_id;
+ int max_phys_id, min_phys_id;
+ int max_core_id, min_core_id;
struct lpfc_vector_map_info *cpup;
- int cpu, idx;
+ int cpu, eqidx, hdwqidx, isr_avail;
#ifdef CONFIG_X86
struct cpuinfo_x86 *cpuinfo;
#endif
@@ -10465,6 +10725,12 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba)
(sizeof(struct lpfc_vector_map_info) *
phba->sli4_hba.num_present_cpu));
+ max_phys_id = 0;
+ min_phys_id = 0xffff;
+ max_core_id = 0;
+ min_core_id = 0xffff;
+ phys_id = 0;
+
/* Update CPU map with physical id and core id of each CPU */
cpup = phba->sli4_hba.cpu_map;
for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
@@ -10472,34 +10738,91 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba)
cpuinfo = &cpu_data(cpu);
cpup->phys_id = cpuinfo->phys_proc_id;
cpup->core_id = cpuinfo->cpu_core_id;
+ cpup->hyper = lpfc_find_hyper(phba, cpu,
+ cpup->phys_id, cpup->core_id);
#else
/* No distinction between CPUs for other platforms */
cpup->phys_id = 0;
- cpup->core_id = 0;
+ cpup->core_id = cpu;
+ cpup->hyper = 0;
#endif
+
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"3328 CPU physid %d coreid %d\n",
cpup->phys_id, cpup->core_id);
+
+ if (cpup->phys_id > max_phys_id)
+ max_phys_id = cpup->phys_id;
+ if (cpup->phys_id < min_phys_id)
+ min_phys_id = cpup->phys_id;
+
+ if (cpup->core_id > max_core_id)
+ max_core_id = cpup->core_id;
+ if (cpup->core_id < min_core_id)
+ min_core_id = cpup->core_id;
+
cpup++;
}
- for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
- cpup = &phba->sli4_hba.cpu_map[idx];
- cpup->irq = pci_irq_vector(phba->pcidev, idx);
+ /*
+ * If the number of IRQ vectors == number of CPUs,
+ * mapping is pretty simple: 1 to 1.
+ * This is the desired path if NVME is enabled.
+ */
+ if (vectors == phba->sli4_hba.num_present_cpu) {
+ cpup = phba->sli4_hba.cpu_map;
+ for (idx = 0; idx < vectors; idx++) {
+ cpup->eq = idx;
+ cpup->hdwq = idx;
+ cpup->irq = pci_irq_vector(phba->pcidev, idx);
+
+ /* Now affinitize to the selected CPU */
+ irq_set_affinity_hint(
+ pci_irq_vector(phba->pcidev, idx),
+ get_cpu_mask(idx));
+ irq_set_status_flags(cpup->irq, IRQ_NO_BALANCING);
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "3336 Set Affinity: CPU %d "
+ "EQ %d irq %d\n",
+ idx, cpup->eq,
+ pci_irq_vector(phba->pcidev, idx));
+ cpup++;
+ }
+ return;
+ }
- /* For now assume vector N maps to CPU N */
- irq_set_affinity_hint(cpup->irq, get_cpu_mask(idx));
- cpup->hdwq = idx;
+ idx = 0;
+ isr_avail = 1;
+ eqidx = 0;
+ hdwqidx = 0;
- lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
- "3336 Set Affinity: CPU %d "
- "hdwq %d irq %d\n",
- cpu, cpup->hdwq, cpup->irq);
+ /* Mapping is more complicated for this case. Hardware Queues are
+ * assigned in a "ping pong" fashion, ping pong-ing between the
+ * available phys_id's.
+ */
+ while (idx < phba->sli4_hba.num_present_cpu) {
+ for (i = min_core_id; i <= max_core_id; i++) {
+ for (j = min_phys_id; j <= max_phys_id; j++) {
+ cpup = lpfc_find_cpu_map(phba, j, i, hdwqidx,
+ eqidx, isr_avail);
+ if (!cpup)
+ continue;
+ idx++;
+ hdwqidx++;
+ if (hdwqidx >= phba->cfg_hdw_queue)
+ hdwqidx = 0;
+ eqidx++;
+ if (eqidx >= phba->cfg_irq_chann) {
+ isr_avail = 0;
+ eqidx = 0;
+ }
+ }
+ }
}
return;
}
-
/**
* lpfc_sli4_enable_msix - Enable MSI-X interrupt mode to SLI-4 device
* @phba: pointer to lpfc hba data structure.
@@ -10518,7 +10841,7 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
char *name;
/* Set up MSI-X multi-message vectors */
- vectors = phba->cfg_hdw_queue;
+ vectors = phba->cfg_irq_chann;
rc = pci_alloc_irq_vectors(phba->pcidev,
(phba->nvmet_support) ? 1 : 2,
@@ -10539,7 +10862,6 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
phba->sli4_hba.hba_eq_hdl[index].idx = index;
phba->sli4_hba.hba_eq_hdl[index].phba = phba;
- atomic_set(&phba->sli4_hba.hba_eq_hdl[index].hba_eq_in_use, 1);
rc = request_irq(pci_irq_vector(phba->pcidev, index),
&lpfc_sli4_hba_intr_handler, 0,
name,
@@ -10552,17 +10874,16 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
}
}
- if (vectors != phba->cfg_hdw_queue) {
+ if (vectors != phba->cfg_irq_chann) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"3238 Reducing IO channels to match number of "
"MSI-X vectors, requested %d got %d\n",
- phba->cfg_hdw_queue, vectors);
- if (phba->cfg_hdw_queue > vectors)
- phba->cfg_hdw_queue = vectors;
+ phba->cfg_irq_chann, vectors);
+ if (phba->cfg_irq_chann > vectors)
+ phba->cfg_irq_chann = vectors;
if (phba->cfg_nvmet_mrq > vectors)
phba->cfg_nvmet_mrq = vectors;
}
- lpfc_cpu_affinity_check(phba);
return rc;
@@ -10617,7 +10938,7 @@ lpfc_sli4_enable_msi(struct lpfc_hba *phba)
return rc;
}
- for (index = 0; index < phba->cfg_hdw_queue; index++) {
+ for (index = 0; index < phba->cfg_irq_chann; index++) {
phba->sli4_hba.hba_eq_hdl[index].idx = index;
phba->sli4_hba.hba_eq_hdl[index].phba = phba;
}
@@ -10682,11 +11003,10 @@ lpfc_sli4_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
phba->intr_type = INTx;
intr_mode = 0;
- for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+ for (idx = 0; idx < phba->cfg_irq_chann; idx++) {
eqhdl = &phba->sli4_hba.hba_eq_hdl[idx];
eqhdl->idx = idx;
eqhdl->phba = phba;
- atomic_set(&eqhdl->hba_eq_in_use, 1);
}
}
}
@@ -10710,7 +11030,7 @@ lpfc_sli4_disable_intr(struct lpfc_hba *phba)
int index;
/* Free up MSI-X multi-message vectors */
- for (index = 0; index < phba->cfg_hdw_queue; index++) {
+ for (index = 0; index < phba->cfg_irq_chann; index++) {
irq_set_affinity_hint(
pci_irq_vector(phba->pcidev, index),
NULL);
@@ -12089,12 +12409,13 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
}
/* Default to single EQ for non-MSI-X */
if (phba->intr_type != MSIX) {
- phba->cfg_hdw_queue = 1;
+ phba->cfg_irq_chann = 1;
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
if (phba->nvmet_support)
phba->cfg_nvmet_mrq = 1;
}
}
+ lpfc_cpu_affinity_check(phba, phba->cfg_irq_chann);
/* Create SCSI host to the physical port */
error = lpfc_create_shost(phba);
@@ -239,7 +239,7 @@ lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport,
if (qidx) {
str = "IO "; /* IO queue */
qhandle->index = ((qidx - 1) %
- vport->phba->cfg_hdw_queue);
+ lpfc_nvme_template.max_hw_queues);
} else {
str = "ADM"; /* Admin queue */
qhandle->index = qidx;
@@ -1546,14 +1546,12 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
}
}
+ /* Lookup Hardware Queue index based on fcp_io_sched module parameter */
if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) {
idx = lpfc_queue_info->index;
} else {
cpu = smp_processor_id();
- if (cpu < phba->cfg_hdw_queue)
- idx = cpu;
- else
- idx = cpu % phba->cfg_hdw_queue;
+ idx = phba->sli4_hba.cpu_map[cpu].hdwq;
}
lpfc_ncmd = lpfc_get_nvme_buf(phba, ndlp, idx, expedite);
@@ -2060,7 +2058,13 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
* allocate + 3, one for cmd, one for rsp and one for this alignment
*/
lpfc_nvme_template.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1;
- lpfc_nvme_template.max_hw_queues = phba->cfg_hdw_queue;
+
+ /* Advertise how many hw queues we support based on fcp_io_sched */
+ if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ)
+ lpfc_nvme_template.max_hw_queues = phba->cfg_hdw_queue;
+ else
+ lpfc_nvme_template.max_hw_queues =
+ phba->sli4_hba.num_present_cpu;
/* localport is allocated from the stack, but the registration
* call allocates heap memory as well as the private area.
@@ -2554,6 +2558,8 @@ lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba)
* WQEs have been removed from the txcmplqs.
*/
for (i = 0; i < phba->cfg_hdw_queue; i++) {
+ if (!phba->sli4_hba.hdwq[i].nvme_wq)
+ continue;
pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
if (!pring)
@@ -688,16 +688,17 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
int tag;
cpu = smp_processor_id();
+
+ /* Lookup Hardware Queue index based on fcp_io_sched module parameter
+ * and if SCSI mq is turned on.
+ */
if (cmnd && shost_use_blk_mq(cmnd->device->host) &&
phba->cfg_enable_scsi_mq &&
(phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ)) {
tag = blk_mq_unique_tag(cmnd->request);
idx = blk_mq_unique_tag_to_hwq(tag);
} else {
- if (cpu < phba->cfg_hdw_queue)
- idx = cpu;
- else
- idx = cpu % phba->cfg_hdw_queue;
+ idx = phba->sli4_hba.cpu_map[cpu].hdwq;
}
lpfc_cmd = lpfc_get_io_buf(phba, ndlp, idx,
@@ -3651,6 +3652,9 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
struct Scsi_Host *shost;
int idx;
uint32_t logit = LOG_FCP;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+ int cpu;
+#endif
/* Sanity check on return of outstanding command */
cmd = lpfc_cmd->pCmd;
@@ -3661,6 +3665,13 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
if (phba->sli4_hba.hdwq)
phba->sli4_hba.hdwq[idx].scsi_cstat.io_cmpls++;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+ if (phba->cpucheck_on & LPFC_CHECK_SCSI_IO) {
+ cpu = smp_processor_id();
+ if (cpu < LPFC_CHECK_CPU_CNT)
+ phba->sli4_hba.hdwq[idx].cpucheck_cmpl_io[cpu]++;
+ }
+#endif
shost = cmd->device->host;
lpfc_cmd->result = (pIocbOut->iocb.un.ulpWord[4] & IOERR_PARAM_MASK);
@@ -4339,6 +4350,9 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
struct lpfc_io_buf *lpfc_cmd;
struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
int err, idx;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+ int cpu;
+#endif
rdata = lpfc_rport_data_from_scsi_device(cmnd->device);
@@ -4453,6 +4467,16 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp);
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+ if (phba->cpucheck_on & LPFC_CHECK_SCSI_IO) {
+ cpu = smp_processor_id();
+ if (cpu < LPFC_CHECK_CPU_CNT) {
+ struct lpfc_sli4_hdw_queue *hdwq =
+ &phba->sli4_hba.hdwq[lpfc_cmd->hdwq_no];
+ hdwq->cpucheck_xmt_io[cpu]++;
+ }
+ }
+#endif
err = lpfc_sli_issue_iocb(phba, LPFC_FCP_RING,
&lpfc_cmd->cur_iocbq, SLI_IOCB_RET_IOCB);
if (err) {
@@ -5587,7 +5587,7 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
LPFC_QUEUE_REARM);
}
- for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++)
+ for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++)
sli4_hba->sli4_eq_release(qp[qidx].hba_eq,
LPFC_QUEUE_REARM);
}
@@ -7879,7 +7879,7 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
/* Find the eq associated with the mcq */
if (sli4_hba->hdwq)
- for (eqidx = 0; eqidx < phba->cfg_hdw_queue; eqidx++)
+ for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++)
if (sli4_hba->hdwq[eqidx].hba_eq->queue_id ==
sli4_hba->mbx_cq->assoc_qid) {
fpeq = sli4_hba->hdwq[eqidx].hba_eq;
@@ -10059,12 +10059,9 @@ int
lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
struct lpfc_iocbq *piocb, uint32_t flag)
{
- struct lpfc_hba_eq_hdl *hba_eq_hdl;
struct lpfc_sli_ring *pring;
- struct lpfc_queue *fpeq;
- struct lpfc_eqe *eqe;
unsigned long iflags;
- int rc, idx;
+ int rc;
if (phba->sli_rev == LPFC_SLI_REV4) {
pring = lpfc_sli4_calc_ring(phba, piocb);
@@ -10074,34 +10071,6 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
spin_lock_irqsave(&pring->ring_lock, iflags);
rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
spin_unlock_irqrestore(&pring->ring_lock, iflags);
-
- if (lpfc_fcp_look_ahead && (piocb->iocb_flag & LPFC_IO_FCP)) {
- idx = piocb->hba_wqidx;
- hba_eq_hdl = &phba->sli4_hba.hba_eq_hdl[idx];
-
- if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use)) {
-
- /* Get associated EQ with this index */
- fpeq = phba->sli4_hba.hdwq[idx].hba_eq;
-
- /* Turn off interrupts from this EQ */
- phba->sli4_hba.sli4_eq_clr_intr(fpeq);
-
- /*
- * Process all the events on FCP EQ
- */
- while ((eqe = lpfc_sli4_eq_get(fpeq))) {
- lpfc_sli4_hba_handle_eqe(phba,
- eqe, idx);
- fpeq->EQ_processed++;
- }
-
- /* Always clear and re-arm the EQ */
- phba->sli4_hba.sli4_eq_release(fpeq,
- LPFC_QUEUE_REARM);
- }
- atomic_inc(&hba_eq_hdl->hba_eq_in_use);
- }
} else {
/* For now, SLI2/3 will still use hbalock */
spin_lock_irqsave(&phba->hbalock, iflags);
@@ -13652,7 +13621,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
/* Save EQ associated with this CQ */
cq->assoc_qp = speq;
- if (!queue_work(phba->wq, &cq->spwork))
+ if (!queue_work_on(cq->chann, phba->wq, &cq->spwork))
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
"0390 Cannot schedule soft IRQ "
"for CQ eqcqid=%d, cqid=%d on CPU %d\n",
@@ -14058,18 +14027,11 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
/* Get the reference to the corresponding CQ */
cqid = bf_get_le32(lpfc_eqe_resource_id, eqe);
- /* First check for NVME/SCSI completion */
- if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
- (cqid == phba->sli4_hba.hdwq[qidx].nvme_cq_map)) {
- /* Process NVME / NVMET command completion */
- cq = phba->sli4_hba.hdwq[qidx].nvme_cq;
- goto process_cq;
- }
-
- if (cqid == phba->sli4_hba.hdwq[qidx].fcp_cq_map) {
- /* Process FCP command completion */
- cq = phba->sli4_hba.hdwq[qidx].fcp_cq;
- goto process_cq;
+ /* Use the fast lookup method first */
+ if (cqid <= phba->sli4_hba.cq_max) {
+ cq = phba->sli4_hba.cq_lookup[cqid];
+ if (cq)
+ goto work_cq;
}
/* Next check for NVMET completion */
@@ -14104,9 +14066,7 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
return;
}
- /* Save EQ associated with this CQ */
- cq->assoc_qp = phba->sli4_hba.hdwq[qidx].hba_eq;
-
+work_cq:
if (!queue_work_on(cq->chann, phba->wq, &cq->irqwork))
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
"0363 Cannot schedule soft IRQ "
@@ -14234,15 +14194,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
if (unlikely(!fpeq))
return IRQ_NONE;
- if (lpfc_fcp_look_ahead) {
- if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use))
- phba->sli4_hba.sli4_eq_clr_intr(fpeq);
- else {
- atomic_inc(&hba_eq_hdl->hba_eq_in_use);
- return IRQ_NONE;
- }
- }
-
/* Check device state for handling interrupt */
if (unlikely(lpfc_intr_state_check(phba))) {
/* Check again for link_state with lock held */
@@ -14251,8 +14202,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
/* Flush, clear interrupt, and rearm the EQ */
lpfc_sli4_eq_flush(phba, fpeq);
spin_unlock_irqrestore(&phba->hbalock, iflag);
- if (lpfc_fcp_look_ahead)
- atomic_inc(&hba_eq_hdl->hba_eq_in_use);
return IRQ_NONE;
}
@@ -14275,12 +14224,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
if (unlikely(ecount == 0)) {
fpeq->EQ_no_entry++;
-
- if (lpfc_fcp_look_ahead) {
- atomic_inc(&hba_eq_hdl->hba_eq_in_use);
- return IRQ_NONE;
- }
-
if (phba->intr_type == MSIX)
/* MSI-X treated interrupt served as no EQ share INT */
lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
@@ -14290,9 +14233,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
return IRQ_NONE;
}
- if (lpfc_fcp_look_ahead)
- atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-
return IRQ_HANDLED;
} /* lpfc_sli4_fp_intr_handler */
@@ -14330,7 +14270,7 @@ lpfc_sli4_intr_handler(int irq, void *dev_id)
/*
* Invoke fast-path host attention interrupt handling as appropriate.
*/
- for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+ for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
hba_irq_rc = lpfc_sli4_hba_intr_handler(irq,
&phba->sli4_hba.hba_eq_hdl[qidx]);
if (hba_irq_rc == IRQ_HANDLED)
@@ -14517,7 +14457,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
union lpfc_sli4_cfg_shdr *shdr;
uint16_t dmult;
- if (startq >= phba->cfg_hdw_queue)
+ if (startq >= phba->cfg_irq_chann)
return 0;
mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -14531,7 +14471,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
eq_delay = &mbox->u.mqe.un.eq_delay;
/* Calculate delay multiper from maximum interrupt per second */
- result = imax / phba->cfg_hdw_queue;
+ result = imax / phba->cfg_irq_chann;
if (result > LPFC_DMULT_CONST || result == 0)
dmult = 0;
else
@@ -14540,7 +14480,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
dmult = LPFC_DMULT_MAX;
cnt = 0;
- for (qidx = startq; qidx < phba->cfg_hdw_queue; qidx++) {
+ for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) {
eq = phba->sli4_hba.hdwq[qidx].hba_eq;
if (!eq)
continue;
@@ -14558,7 +14498,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
val = phba->cfg_fcp_imax;
if (val) {
/* First, interrupts per sec per EQ */
- val = phba->cfg_fcp_imax / phba->cfg_hdw_queue;
+ val = phba->cfg_fcp_imax / phba->cfg_irq_chann;
/* us delay between each interrupt */
val = LPFC_SEC_TO_USEC / val;
@@ -14853,10 +14793,13 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
cq->subtype = subtype;
cq->queue_id = bf_get(lpfc_mbx_cq_create_q_id, &cq_create->u.response);
cq->assoc_qid = eq->queue_id;
+ cq->assoc_qp = eq;
cq->host_index = 0;
cq->hba_index = 0;
cq->entry_repost = LPFC_CQ_REPOST;
+ if (cq->queue_id > phba->sli4_hba.cq_max)
+ phba->sli4_hba.cq_max = cq->queue_id;
out:
mempool_free(mbox, phba->mbox_mem_pool);
return status;
@@ -15062,6 +15005,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
cq->type = type;
cq->subtype = subtype;
cq->assoc_qid = eq->queue_id;
+ cq->assoc_qp = eq;
cq->host_index = 0;
cq->hba_index = 0;
cq->entry_repost = LPFC_CQ_REPOST;
@@ -15102,6 +15046,8 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
for (idx = 0; idx < numcq; idx++) {
cq = cqp[idx];
cq->queue_id = rc + idx;
+ if (cq->queue_id > phba->sli4_hba.cq_max)
+ phba->sli4_hba.cq_max = cq->queue_id;
}
out:
@@ -19665,7 +19611,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
/* NVME_LS and NVME_LS ABTS requests. */
if (pwqe->iocb_flag & LPFC_IO_NVME_LS) {
pring = phba->sli4_hba.nvmels_wq->pring;
- spin_lock_irqsave(&pring->ring_lock, iflags);
+ lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+ qp, wq_access);
sglq = __lpfc_sli_get_els_sglq(phba, pwqe);
if (!sglq) {
spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19698,7 +19645,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map);
- spin_lock_irqsave(&pring->ring_lock, iflags);
+ lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+ qp, wq_access);
ret = lpfc_sli4_wq_put(wq, wqe);
if (ret) {
spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19725,7 +19673,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
pwqe->sli4_xritag);
bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map);
- spin_lock_irqsave(&pring->ring_lock, iflags);
+ lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+ qp, wq_access);
ret = lpfc_sli4_wq_put(wq, wqe);
if (ret) {
spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19873,18 +19822,20 @@ void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid)
{
struct lpfc_pbl_pool *pbl_pool;
struct lpfc_pvt_pool *pvt_pool;
+ struct lpfc_sli4_hdw_queue *qp;
struct lpfc_io_buf *lpfc_ncmd;
struct lpfc_io_buf *lpfc_ncmd_next;
unsigned long iflag;
struct list_head tmp_list;
u32 tmp_count;
- pbl_pool = &phba->sli4_hba.hdwq[hwqid].p_multixri_pool->pbl_pool;
- pvt_pool = &phba->sli4_hba.hdwq[hwqid].p_multixri_pool->pvt_pool;
+ qp = &phba->sli4_hba.hdwq[hwqid];
+ pbl_pool = &qp->p_multixri_pool->pbl_pool;
+ pvt_pool = &qp->p_multixri_pool->pvt_pool;
tmp_count = 0;
- spin_lock_irqsave(&pbl_pool->lock, iflag);
- spin_lock(&pvt_pool->lock);
+ lpfc_qp_spin_lock_irqsave(&pbl_pool->lock, iflag, qp, mv_to_pub_pool);
+ lpfc_qp_spin_lock(&pvt_pool->lock, qp, mv_from_pvt_pool);
if (pvt_pool->count > pvt_pool->low_watermark) {
/* Step 1: move (all - low_watermark) from pvt_pool
@@ -19937,7 +19888,8 @@ void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid)
* false - if the specified pbl_pool is empty or locked by someone else
**/
static bool
-_lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, struct lpfc_pbl_pool *pbl_pool,
+_lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
+ struct lpfc_pbl_pool *pbl_pool,
struct lpfc_pvt_pool *pvt_pool, u32 count)
{
struct lpfc_io_buf *lpfc_ncmd;
@@ -19949,7 +19901,7 @@ _lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, struct lpfc_pbl_pool *pbl_pool,
if (ret) {
if (pbl_pool->count) {
/* Move a batch of XRIs from public to private pool */
- spin_lock(&pvt_pool->lock);
+ lpfc_qp_spin_lock(&pvt_pool->lock, qp, mv_to_pvt_pool);
list_for_each_entry_safe(lpfc_ncmd,
lpfc_ncmd_next,
&pbl_pool->list,
@@ -19991,16 +19943,18 @@ void lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, u32 hwqid, u32 count)
struct lpfc_multixri_pool *next_multixri_pool;
struct lpfc_pvt_pool *pvt_pool;
struct lpfc_pbl_pool *pbl_pool;
+ struct lpfc_sli4_hdw_queue *qp;
u32 next_hwqid;
u32 hwq_count;
int ret;
- multixri_pool = phba->sli4_hba.hdwq[hwqid].p_multixri_pool;
+ qp = &phba->sli4_hba.hdwq[hwqid];
+ multixri_pool = qp->p_multixri_pool;
pvt_pool = &multixri_pool->pvt_pool;
pbl_pool = &multixri_pool->pbl_pool;
/* Check if local pbl_pool is available */
- ret = _lpfc_move_xri_pbl_to_pvt(phba, pbl_pool, pvt_pool, count);
+ ret = _lpfc_move_xri_pbl_to_pvt(phba, qp, pbl_pool, pvt_pool, count);
if (ret) {
#ifdef LPFC_MXP_STAT
multixri_pool->local_pbl_hit_count++;
@@ -20023,7 +19977,7 @@ void lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, u32 hwqid, u32 count)
/* Check if the public free xri pool is available */
ret = _lpfc_move_xri_pbl_to_pvt(
- phba, pbl_pool, pvt_pool, count);
+ phba, qp, pbl_pool, pvt_pool, count);
/* Exit while-loop if success or all hwqid are checked */
} while (!ret && next_hwqid != multixri_pool->rrb_next_hwqid);
@@ -20139,20 +20093,23 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd,
if ((pvt_pool->count < pvt_pool->low_watermark) ||
(xri_owned < xri_limit &&
pvt_pool->count < pvt_pool->high_watermark)) {
- spin_lock_irqsave(&pvt_pool->lock, iflag);
+ lpfc_qp_spin_lock_irqsave(&pvt_pool->lock, iflag,
+ qp, free_pvt_pool);
list_add_tail(&lpfc_ncmd->list,
&pvt_pool->list);
pvt_pool->count++;
spin_unlock_irqrestore(&pvt_pool->lock, iflag);
} else {
- spin_lock_irqsave(&pbl_pool->lock, iflag);
+ lpfc_qp_spin_lock_irqsave(&pbl_pool->lock, iflag,
+ qp, free_pub_pool);
list_add_tail(&lpfc_ncmd->list,
&pbl_pool->list);
pbl_pool->count++;
spin_unlock_irqrestore(&pbl_pool->lock, iflag);
}
} else {
- spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag);
+ lpfc_qp_spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag,
+ qp, free_xri);
list_add_tail(&lpfc_ncmd->list,
&qp->lpfc_io_buf_list_put);
qp->put_io_bufs++;
@@ -20175,6 +20132,7 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd,
**/
static struct lpfc_io_buf *
lpfc_get_io_buf_from_private_pool(struct lpfc_hba *phba,
+ struct lpfc_sli4_hdw_queue *qp,
struct lpfc_pvt_pool *pvt_pool,
struct lpfc_nodelist *ndlp)
{
@@ -20182,7 +20140,7 @@ lpfc_get_io_buf_from_private_pool(struct lpfc_hba *phba,
struct lpfc_io_buf *lpfc_ncmd_next;
unsigned long iflag;
- spin_lock_irqsave(&pvt_pool->lock, iflag);
+ lpfc_qp_spin_lock_irqsave(&pvt_pool->lock, iflag, qp, alloc_pvt_pool);
list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
&pvt_pool->list, list) {
if (lpfc_test_rrq_active(
@@ -20277,7 +20235,7 @@ lpfc_get_io_buf_from_multixri_pools(struct lpfc_hba *phba,
lpfc_move_xri_pbl_to_pvt(phba, hwqid, XRI_BATCH);
/* Get one XRI from private free xri pool */
- lpfc_ncmd = lpfc_get_io_buf_from_private_pool(phba, pvt_pool, ndlp);
+ lpfc_ncmd = lpfc_get_io_buf_from_private_pool(phba, qp, pvt_pool, ndlp);
if (lpfc_ncmd) {
lpfc_ncmd->hdwq = qp;
@@ -20350,11 +20308,13 @@ struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba,
lpfc_cmd = lpfc_get_io_buf_from_multixri_pools(
phba, ndlp, hwqid, expedite);
else {
- spin_lock_irqsave(&qp->io_buf_list_get_lock, iflag);
+ lpfc_qp_spin_lock_irqsave(&qp->io_buf_list_get_lock, iflag,
+ qp, alloc_xri_get);
if (qp->get_io_bufs > LPFC_NVME_EXPEDITE_XRICNT || expedite)
lpfc_cmd = lpfc_io_buf(phba, ndlp, hwqid);
if (!lpfc_cmd) {
- spin_lock(&qp->io_buf_list_put_lock);
+ lpfc_qp_spin_lock(&qp->io_buf_list_put_lock,
+ qp, alloc_xri_put);
list_splice(&qp->lpfc_io_buf_list_put,
&qp->lpfc_io_buf_list_get);
qp->get_io_bufs += qp->put_io_bufs;
@@ -41,7 +41,7 @@
/* Multi-queue arrangement for FCP EQ/CQ/WQ tuples */
#define LPFC_HBA_HDWQ_MIN 0
-#define LPFC_HBA_HDWQ_MAX 64
+#define LPFC_HBA_HDWQ_MAX 128
#define LPFC_HBA_HDWQ_DEF 0
/* Common buffer size to accomidate SCSI and NVME IO buffers */
@@ -166,16 +166,19 @@ struct lpfc_queue {
uint32_t assoc_qid; /* Queue ID associated with, for CQ/WQ/MQ */
uint32_t host_index; /* The host's index for putting or getting */
uint32_t hba_index; /* The last known hba index for get or put */
+ uint32_t q_mode;
struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */
struct lpfc_rqb *rqbp; /* ptr to RQ buffers */
- uint32_t q_mode;
uint16_t page_count; /* Number of pages allocated for this queue */
uint16_t page_size; /* size of page allocated for this queue */
#define LPFC_EXPANDED_PAGE_SIZE 16384
#define LPFC_DEFAULT_PAGE_SIZE 4096
- uint16_t chann; /* IO channel this queue is associated with */
+ uint16_t chann; /* Hardware Queue association WQ/CQ */
+ /* CPU affinity for EQ */
+#define LPFC_FIND_BY_EQ 0
+#define LPFC_FIND_BY_HDWQ 1
uint8_t db_format;
#define LPFC_DB_RING_FORMAT 0x01
#define LPFC_DB_LIST_FORMAT 0x02
@@ -431,11 +434,6 @@ struct lpfc_hba_eq_hdl {
uint32_t idx;
char handler_name[LPFC_SLI4_HANDLER_NAME_SZ];
struct lpfc_hba *phba;
- atomic_t hba_eq_in_use;
- struct cpumask *cpumask;
- /* CPU affinitsed to or 0xffffffff if multiple */
- uint32_t cpu;
-#define LPFC_MULTI_CPU_AFFINITY 0xffffffff
};
/*BB Credit recovery value*/
@@ -529,7 +527,9 @@ struct lpfc_vector_map_info {
uint16_t phys_id;
uint16_t core_id;
uint16_t irq;
+ uint16_t eq;
uint16_t hdwq;
+ uint16_t hyper;
};
#define LPFC_VECTOR_MAP_EMPTY 0xffff
@@ -593,6 +593,21 @@ struct lpfc_fc4_ctrl_stat {
u32 io_cmpls;
};
+#ifdef LPFC_HDWQ_LOCK_STAT
+struct lpfc_lock_stat {
+ uint32_t alloc_xri_get;
+ uint32_t alloc_xri_put;
+ uint32_t free_xri;
+ uint32_t wq_access;
+ uint32_t alloc_pvt_pool;
+ uint32_t mv_from_pvt_pool;
+ uint32_t mv_to_pub_pool;
+ uint32_t mv_to_pvt_pool;
+ uint32_t free_pub_pool;
+ uint32_t free_pvt_pool;
+};
+#endif
+
/* SLI4 HBA data structure entries */
struct lpfc_sli4_hdw_queue {
/* Pointers to the constructed SLI4 queues */
@@ -626,6 +641,9 @@ struct lpfc_sli4_hdw_queue {
/* FC-4 Stats counters */
struct lpfc_fc4_ctrl_stat nvme_cstat;
struct lpfc_fc4_ctrl_stat scsi_cstat;
+#ifdef LPFC_HDWQ_LOCK_STAT
+ struct lpfc_lock_stat lock_conflict;
+#endif
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
#define LPFC_CHECK_CPU_CNT 128
@@ -635,6 +653,34 @@ struct lpfc_sli4_hdw_queue {
#endif
};
+#ifdef LPFC_HDWQ_LOCK_STAT
+/* compile time trylock stats */
+#define lpfc_qp_spin_lock_irqsave(lock, flag, qp, lstat) \
+ { \
+ int only_once = 1; \
+ while (spin_trylock_irqsave(lock, flag) == 0) { \
+ if (only_once) { \
+ only_once = 0; \
+ qp->lock_conflict.lstat++; \
+ } \
+ } \
+ }
+#define lpfc_qp_spin_lock(lock, qp, lstat) \
+ { \
+ int only_once = 1; \
+ while (spin_trylock(lock) == 0) { \
+ if (only_once) { \
+ only_once = 0; \
+ qp->lock_conflict.lstat++; \
+ } \
+ } \
+ }
+#else
+#define lpfc_qp_spin_lock_irqsave(lock, flag, qp, lstat) \
+ spin_lock_irqsave(lock, flag)
+#define lpfc_qp_spin_lock(lock, qp, lstat) spin_lock(lock)
+#endif
+
struct lpfc_sli4_hba {
void __iomem *conf_regs_memmap_p; /* Kernel memory mapped address for
* config space registers
@@ -764,6 +810,8 @@ struct lpfc_sli4_hba {
uint16_t nvmet_xri_cnt;
uint16_t nvmet_io_wait_cnt;
uint16_t nvmet_io_wait_total;
+ uint16_t cq_max;
+ struct lpfc_queue **cq_lookup;
struct list_head lpfc_els_sgl_list;
struct list_head lpfc_abts_els_sgl_list;
spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */