[16/25] lpfc: Support non-uniform allocation of MSIX vectors to hardware queues

Message ID	20181226233334.27518-17-jsmart2021@gmail.com (mailing list archive)
State	Superseded
Headers	show Return-Path: <linux-scsi-owner@kernel.org> From: James Smart <jsmart2021@gmail.com> To: linux-scsi@vger.kernel.org Cc: James Smart <jsmart2021@gmail.com>, Dick Kennedy <dick.kennedy@broadcom.com> Subject: [PATCH 16/25] lpfc: Support non-uniform allocation of MSIX vectors to hardware queues Date: Wed, 26 Dec 2018 15:33:25 -0800 Message-Id: <20181226233334.27518-17-jsmart2021@gmail.com> In-Reply-To: <20181226233334.27518-1-jsmart2021@gmail.com> References: <20181226233334.27518-1-jsmart2021@gmail.com> Sender: linux-scsi-owner@vger.kernel.org Precedence: bulk
Series	lpfc updates for 12.2.0.0 \| expand [00/25] lpfc updates for 12.2.0.0 [01/25] lpfc: cleanup: remove nrport from nvme command structure [02/25] lpfc: cleanup: Remove excess check on NVME io submit code path [03/25] lpfc: Implement common IO buffers between NVME and SCSI [04/25] lpfc: Remove extra vector and SLI4 queue for Expresslane [05/25] lpfc: Replace io_channels for nvme and fcp with general hdw_queues per cpu [06/25] lpfc: Partition XRI buffer list across Hardware Queues [07/25] lpfc: cleanup: Remove unused FCP_XRI_ABORT_EVENT slowpath event [08/25] lpfc: cleanup: Remove lock on SCSI io completion [09/25] lpfc: Adapt cpucheck debugfs logic to Hardware Queues [10/25] lpfc: Move SCSI and NVME Stats to hardware queue structures [11/25] lpfc: Convert ring number to hardware queue for nvme wqe posting. [12/25] lpfc: Synchronize hardware queues with SCSI MQ interface [13/25] lpfc: Adapt partitioned XRI lists to efficient sharing [14/25] lpfc: Allow override of hardware queue selection policies [15/25] lpfc: Fix setting affinity hints to correlate with hardware queues [16/25] lpfc: Support non-uniform allocation of MSIX vectors to hardware queues [17/25] lpfc: cleanup: convert eq_delay to usdelay [18/25] lpfc: Rework EQ/CQ processing to address interrupt coalescing [19/25] lpfc: Utilize new IRQ API when allocating MSI-X vectors [20/25] lpfc: Resize cpu maps structures based on possible cpus [21/25] lpfc: Enable SCSI and NVME fc4s by default [22/25] lpfc: Fix default driver parameter collision for allowing NPIV support [23/25] lpfc: Correct upcalling nvmet_fc transport during io done downcall [24/25] lpfc: Fix nvmet issues when link bounce under IO load [25/25] lpfc: Update lpfc version to 12.2.0.0

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index ffbc50be9e17..7a8173016bd1 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -84,8 +84,6 @@ struct lpfc_sli2_slim; #define LPFC_HB_MBOX_INTERVAL 5 /* Heart beat interval in seconds. */ #define LPFC_HB_MBOX_TIMEOUT 30 /* Heart beat timeout in seconds. */ -#define LPFC_LOOK_AHEAD_OFF 0 /* Look ahead logic is turned off */ - /* Error Attention event polling interval */ #define LPFC_ERATT_POLL_INTERVAL 5 /* EATT poll interval in seconds */ @@ -821,6 +819,7 @@ struct lpfc_hba { uint32_t cfg_fcp_imax; uint32_t cfg_fcp_cpu_map; uint32_t cfg_hdw_queue; + uint32_t cfg_irq_chann; uint32_t cfg_suppress_rsp; uint32_t cfg_nvme_oas; uint32_t cfg_nvme_embed_cmd; @@ -1043,6 +1042,9 @@ struct lpfc_hba { struct dentry *debug_nvmeio_trc; struct lpfc_debugfs_nvmeio_trc *nvmeio_trc; struct dentry *debug_hdwqinfo; +#ifdef LPFC_HDWQ_LOCK_STAT + struct dentry *debug_lockstat; +#endif atomic_t nvmeio_trc_cnt; uint32_t nvmeio_trc_size; uint32_t nvmeio_trc_output_idx; @@ -1162,6 +1164,7 @@ struct lpfc_hba { #define LPFC_CHECK_NVME_IO 1 #define LPFC_CHECK_NVMET_RCV 2 #define LPFC_CHECK_NVMET_IO 4 +#define LPFC_CHECK_SCSI_IO 8 uint16_t ktime_on; uint64_t ktime_data_samples; uint64_t ktime_status_samples; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 27fafad63c6f..66df5212691d 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -4958,7 +4958,7 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr, phba->cfg_fcp_imax = (uint32_t)val; phba->initial_imax = phba->cfg_fcp_imax; - for (i = 0; i < phba->cfg_hdw_queue; i += LPFC_MAX_EQ_DELAY_EQID_CNT) + for (i = 0; i < phba->cfg_irq_chann; i += LPFC_MAX_EQ_DELAY_EQID_CNT) lpfc_modify_hba_eq_delay(phba, i, LPFC_MAX_EQ_DELAY_EQID_CNT, val); @@ -5059,13 +5059,6 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr, phba->cfg_fcp_cpu_map, phba->sli4_hba.num_online_cpu); break; - case 2: - len += snprintf(buf + len, PAGE_SIZE-len, - "fcp_cpu_map: Driver centric mapping (%d): " - "%d online CPUs\n", - phba->cfg_fcp_cpu_map, - phba->sli4_hba.num_online_cpu); - break; } while (phba->sli4_hba.curr_disp_cpu < phba->sli4_hba.num_present_cpu) { @@ -5076,35 +5069,35 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr, len += snprintf( buf + len, PAGE_SIZE - len, "CPU %02d hdwq None " - "physid %d coreid %d\n", + "physid %d coreid %d ht %d\n", phba->sli4_hba.curr_disp_cpu, cpup->phys_id, - cpup->core_id); + cpup->core_id, cpup->hyper); else len += snprintf( buf + len, PAGE_SIZE - len, - "CPU %02d hdwq %04d " - "physid %d coreid %d\n", + "CPU %02d EQ %04d hdwq %04d " + "physid %d coreid %d ht %d\n", phba->sli4_hba.curr_disp_cpu, - cpup->hdwq, cpup->phys_id, - cpup->core_id); + cpup->eq, cpup->hdwq, cpup->phys_id, + cpup->core_id, cpup->hyper); } else { if (cpup->hdwq == LPFC_VECTOR_MAP_EMPTY) len += snprintf( buf + len, PAGE_SIZE - len, "CPU %02d hdwq None " - "physid %d coreid %d IRQ %d\n", + "physid %d coreid %d ht %d IRQ %d\n", phba->sli4_hba.curr_disp_cpu, cpup->phys_id, - cpup->core_id, cpup->irq); + cpup->core_id, cpup->hyper, cpup->irq); else len += snprintf( buf + len, PAGE_SIZE - len, - "CPU %02d hdwq %04d " - "physid %d coreid %d IRQ %d\n", + "CPU %02d EQ %04d hdwq %04d " + "physid %d coreid %d ht %d IRQ %d\n", phba->sli4_hba.curr_disp_cpu, - cpup->hdwq, cpup->phys_id, - cpup->core_id, cpup->irq); + cpup->eq, cpup->hdwq, cpup->phys_id, + cpup->core_id, cpup->hyper, cpup->irq); } phba->sli4_hba.curr_disp_cpu++; @@ -5146,14 +5139,13 @@ lpfc_fcp_cpu_map_store(struct device *dev, struct device_attribute *attr, # lpfc_fcp_cpu_map: Defines how to map CPUs to IRQ vectors # for the HBA. # -# Value range is [0 to 2]. Default value is LPFC_DRIVER_CPU_MAP (2). +# Value range is [0 to 1]. Default value is LPFC_HBA_CPU_MAP (1). # 0 - Do not affinitze IRQ vectors # 1 - Affintize HBA vectors with respect to each HBA # (start with CPU0 for each HBA) -# 2 - Affintize HBA vectors with respect to the entire driver -# (round robin thru all CPUs across all HBAs) +# This also defines how Hardware Queues are mapped to specific CPUs. */ -static int lpfc_fcp_cpu_map = LPFC_DRIVER_CPU_MAP; +static int lpfc_fcp_cpu_map = LPFC_HBA_CPU_MAP; module_param(lpfc_fcp_cpu_map, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(lpfc_fcp_cpu_map, "Defines how to map CPUs to IRQ vectors per HBA"); @@ -5187,7 +5179,7 @@ lpfc_fcp_cpu_map_init(struct lpfc_hba *phba, int val) lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3326 lpfc_fcp_cpu_map: %d out of range, using " "default\n", val); - phba->cfg_fcp_cpu_map = LPFC_DRIVER_CPU_MAP; + phba->cfg_fcp_cpu_map = LPFC_HBA_CPU_MAP; return 0; } @@ -5308,7 +5300,7 @@ LPFC_ATTR_R(xri_rebalancing, 1, 0, 1, "Enable/Disable XRI rebalancing"); * CPU. Otherwise, the default 0 (Round Robin) scheduling of FCP/NVME I/Os * through WQs will be used. */ -LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_BY_HDWQ, +LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_BY_CPU, LPFC_FCP_SCHED_BY_HDWQ, LPFC_FCP_SCHED_BY_CPU, "Determine scheduling algorithm for " @@ -5474,18 +5466,18 @@ LPFC_ATTR_RW(nvme_embed_cmd, 1, 0, 2, "Embed NVME Command in WQE"); /* - * lpfc_hdw_queue: Set the number of IO channels the driver + * lpfc_hdw_queue: Set the number of Hardware Queues the driver * will advertise it supports to the NVME and SCSI layers. This also - * will map to the number of EQ/CQ/WQs the driver will create. + * will map to the number of CQ/WQ pairs the driver will create. * * The NVME Layer will try to create this many, plus 1 administrative * hardware queue. The administrative queue will always map to WQ 0 - * A hardware IO queue maps (qidx) to a specific driver WQ. + * A hardware IO queue maps (qidx) to a specific driver CQ/WQ. * * 0 = Configure the number of hdw queues to the number of active CPUs. - * 1,64 = Manually specify how many hdw queues to use. + * 1,128 = Manually specify how many hdw queues to use. * - * Value range is [0,64]. Default value is 0. + * Value range is [0,128]. Default value is 0. */ LPFC_ATTR_R(hdw_queue, LPFC_HBA_HDWQ_DEF, @@ -5493,6 +5485,22 @@ LPFC_ATTR_R(hdw_queue, "Set the number of I/O Hardware Queues"); /* + * lpfc_irq_chann: Set the number of IRQ vectors that are available + * for Hardware Queues to utilize. This also will map to the number + * of EQ / MSI-X vectors the driver will create. This should never be + * more than the number of Hardware Queues + * + * 0 = Configure number of IRQ Channels to the number of active CPUs. + * 1,128 = Manually specify how many IRQ Channels to use. + * + * Value range is [0,128]. Default value is 0. + */ +LPFC_ATTR_R(irq_chann, + LPFC_HBA_HDWQ_DEF, + LPFC_HBA_HDWQ_MIN, LPFC_HBA_HDWQ_MAX, + "Set the number of I/O IRQ Channels"); + +/* # lpfc_enable_hba_reset: Allow or prevent HBA resets to the hardware. # 0 = HBA resets disabled # 1 = HBA resets enabled (default) @@ -5533,16 +5541,6 @@ LPFC_ATTR_RW(XLanePriority, 0, 0x0, 0x7f, "CS_CTL for Express Lane Feature."); LPFC_ATTR_R(enable_bg, 0, 0, 1, "Enable BlockGuard Support"); /* -# lpfc_fcp_look_ahead: Look ahead for completions in FCP start routine -# 0 = disabled (default) -# 1 = enabled -# Value range is [0,1]. Default value is 0. -# -# This feature in under investigation and may be supported in the future. -*/ -unsigned int lpfc_fcp_look_ahead = LPFC_LOOK_AHEAD_OFF; - -/* # lpfc_prot_mask: i # - Bit mask of host protection capabilities used to register with the # SCSI mid-layer @@ -5796,6 +5794,7 @@ struct device_attribute *lpfc_hba_attrs[] = { &dev_attr_lpfc_fcp_imax, &dev_attr_lpfc_fcp_cpu_map, &dev_attr_lpfc_hdw_queue, + &dev_attr_lpfc_irq_chann, &dev_attr_lpfc_suppress_rsp, &dev_attr_lpfc_nvmet_mrq, &dev_attr_lpfc_nvmet_mrq_post, @@ -6876,6 +6875,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb); lpfc_nvmet_fb_size_init(phba, lpfc_nvmet_fb_size); lpfc_hdw_queue_init(phba, lpfc_hdw_queue); + lpfc_irq_chann_init(phba, lpfc_irq_chann); lpfc_enable_bbcr_init(phba, lpfc_enable_bbcr); lpfc_enable_dpp_init(phba, lpfc_enable_dpp); lpfc_enable_scsi_mq_init(phba, lpfc_enable_scsi_mq); @@ -6901,6 +6901,10 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) /* A value of 0 means use the number of CPUs found in the system */ if (phba->cfg_hdw_queue == 0) phba->cfg_hdw_queue = phba->sli4_hba.num_present_cpu; + if (phba->cfg_irq_chann == 0) + phba->cfg_irq_chann = phba->sli4_hba.num_present_cpu; + if (phba->cfg_irq_chann > phba->cfg_hdw_queue) + phba->cfg_irq_chann = phba->cfg_hdw_queue; phba->cfg_soft_wwnn = 0L; phba->cfg_soft_wwpn = 0L; @@ -6943,6 +6947,10 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba) { if (phba->cfg_hdw_queue > phba->sli4_hba.num_present_cpu) phba->cfg_hdw_queue = phba->sli4_hba.num_present_cpu; + if (phba->cfg_irq_chann > phba->sli4_hba.num_present_cpu) + phba->cfg_irq_chann = phba->sli4_hba.num_present_cpu; + if (phba->cfg_irq_chann > phba->cfg_hdw_queue) + phba->cfg_irq_chann = phba->cfg_hdw_queue; if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME && phba->nvmet_support) { @@ -6963,11 +6971,11 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba) } if (!phba->cfg_nvmet_mrq) - phba->cfg_nvmet_mrq = phba->cfg_hdw_queue; + phba->cfg_nvmet_mrq = phba->cfg_irq_chann; /* Adjust lpfc_nvmet_mrq to avoid running out of WQE slots */ - if (phba->cfg_nvmet_mrq > phba->cfg_hdw_queue) { - phba->cfg_nvmet_mrq = phba->cfg_hdw_queue; + if (phba->cfg_nvmet_mrq > phba->cfg_irq_chann) { + phba->cfg_nvmet_mrq = phba->cfg_irq_chann; lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC, "6018 Adjust lpfc_nvmet_mrq to %d\n", phba->cfg_nvmet_mrq); diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 726cd6a7c452..982401c31c12 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -440,7 +440,6 @@ extern spinlock_t _dump_buf_lock; extern int _dump_buf_done; extern spinlock_t pgcnt_lock; extern unsigned int pgcnt; -extern unsigned int lpfc_fcp_look_ahead; /* Interface exported by fabric iocb scheduler */ void lpfc_fabric_abort_nport(struct lpfc_nodelist *); diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 30ad5ada3485..833b46905bd9 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -378,6 +378,67 @@ lpfc_debugfs_hbqinfo_data(struct lpfc_hba *phba, char *buf, int size) return len; } +static int lpfc_debugfs_last_xripool; + +/** + * lpfc_debugfs_common_xri_data - Dump Hardware Queue info to a buffer + * @phba: The HBA to gather host buffer info from. + * @buf: The buffer to dump log into. + * @size: The maximum amount of data to process. + * + * Description: + * This routine dumps the Hardware Queue info from the @phba to @buf up to + * @size number of bytes. A header that describes the current hdwq state will be + * dumped to @buf first and then info on each hdwq entry will be dumped to @buf + * until @size bytes have been dumped or all the hdwq info has been dumped. + * + * Notes: + * This routine will rotate through each configured Hardware Queue each + * time called. + * + * Return Value: + * This routine returns the amount of bytes that were dumped into @buf and will + * not exceed @size. + **/ +static int +lpfc_debugfs_commonxripools_data(struct lpfc_hba *phba, char *buf, int size) +{ + struct lpfc_sli4_hdw_queue *qp; + int len = 0; + int i, out; + unsigned long iflag; + + for (i = 0; i < phba->cfg_hdw_queue; i++) { + if (len > (LPFC_DUMP_MULTIXRIPOOL_SIZE - 80)) + break; + qp = &phba->sli4_hba.hdwq[lpfc_debugfs_last_xripool]; + + len += snprintf(buf + len, size - len, "HdwQ %d Info ", i); + spin_lock_irqsave(&qp->abts_scsi_buf_list_lock, iflag); + spin_lock(&qp->abts_nvme_buf_list_lock); + spin_lock(&qp->io_buf_list_get_lock); + spin_lock(&qp->io_buf_list_put_lock); + out = qp->total_io_bufs - (qp->get_io_bufs + qp->put_io_bufs + + qp->abts_scsi_io_bufs + qp->abts_nvme_io_bufs); + len += snprintf(buf + len, size - len, + "tot:%d get:%d put:%d mt:%d " + "ABTS scsi:%d nvme:%d Out:%d\n", + qp->total_io_bufs, qp->get_io_bufs, qp->put_io_bufs, + qp->empty_io_bufs, qp->abts_scsi_io_bufs, + qp->abts_nvme_io_bufs, out); + spin_unlock(&qp->io_buf_list_put_lock); + spin_unlock(&qp->io_buf_list_get_lock); + spin_unlock(&qp->abts_nvme_buf_list_lock); + spin_unlock_irqrestore(&qp->abts_scsi_buf_list_lock, iflag); + + lpfc_debugfs_last_xripool++; + if (lpfc_debugfs_last_xripool >= phba->cfg_hdw_queue) + lpfc_debugfs_last_xripool = 0; + } + + return len; +} + /** * lpfc_debugfs_multixripools_data - Display multi-XRI pools information * @phba: The HBA to gather host buffer info from. @@ -405,6 +466,17 @@ lpfc_debugfs_multixripools_data(struct lpfc_hba *phba, char *buf, int size) u32 txcmplq_cnt; char tmp[LPFC_DEBUG_OUT_LINE_SZ] = {0}; + if (phba->sli_rev != LPFC_SLI_REV4) + return 0; + + if (!phba->sli4_hba.hdwq) + return 0; + + if (!phba->cfg_xri_rebalancing) { + i = lpfc_debugfs_commonxripools_data(phba, buf, size); + return i; + } + /* * Pbl: Current number of free XRIs in public pool * Pvt: Current number of free XRIs in private pool @@ -498,10 +570,12 @@ lpfc_debugfs_multixripools_data(struct lpfc_hba *phba, char *buf, int size) return strnlen(buf, size); } -static int lpfc_debugfs_last_hdwq; + +#ifdef LPFC_HDWQ_LOCK_STAT +static int lpfc_debugfs_last_lock; /** - * lpfc_debugfs_hdwqinfo_data - Dump Hardware Queue info to a buffer + * lpfc_debugfs_lockstat_data - Dump Hardware Queue info to a buffer * @phba: The HBA to gather host buffer info from. * @buf: The buffer to dump log into. * @size: The maximum amount of data to process. @@ -521,12 +595,11 @@ static int lpfc_debugfs_last_hdwq; * not exceed @size. **/ static int -lpfc_debugfs_hdwqinfo_data(struct lpfc_hba *phba, char *buf, int size) +lpfc_debugfs_lockstat_data(struct lpfc_hba *phba, char *buf, int size) { struct lpfc_sli4_hdw_queue *qp; int len = 0; - int i, out; - unsigned long iflag; + int i; if (phba->sli_rev != LPFC_SLI_REV4) return 0; @@ -535,35 +608,40 @@ lpfc_debugfs_hdwqinfo_data(struct lpfc_hba *phba, char *buf, int size) return 0; for (i = 0; i < phba->cfg_hdw_queue; i++) { - if (len > (LPFC_HDWQINFO_SIZE - 80)) + if (len > (LPFC_HDWQINFO_SIZE - 100)) break; - qp = &phba->sli4_hba.hdwq[lpfc_debugfs_last_hdwq]; + qp = &phba->sli4_hba.hdwq[lpfc_debugfs_last_lock]; - len += snprintf(buf + len, size - len, "HdwQ %d Info ", i); - spin_lock_irqsave(&qp->abts_scsi_buf_list_lock, iflag); - spin_lock(&qp->abts_nvme_buf_list_lock); - spin_lock(&qp->io_buf_list_get_lock); - spin_lock(&qp->io_buf_list_put_lock); - out = qp->total_io_bufs - (qp->get_io_bufs + qp->put_io_bufs + - qp->abts_scsi_io_bufs + qp->abts_nvme_io_bufs); - len += snprintf(buf + len, size - len, - "tot:%d get:%d put:%d mt:%d " - "ABTS scsi:%d nvme:%d Out:%d\n", - qp->total_io_bufs, qp->get_io_bufs, qp->put_io_bufs, - qp->empty_io_bufs, qp->abts_scsi_io_bufs, - qp->abts_nvme_io_bufs, out); - spin_unlock(&qp->io_buf_list_put_lock); - spin_unlock(&qp->io_buf_list_get_lock); - spin_unlock(&qp->abts_nvme_buf_list_lock); - spin_unlock_irqrestore(&qp->abts_scsi_buf_list_lock, iflag); + len += snprintf(buf + len, size - len, "HdwQ %03d Lock ", i); + if (phba->cfg_xri_rebalancing) { + len += snprintf(buf + len, size - len, + "get_pvt:%d mv_pvt:%d " + "mv2pub:%d mv2pvt:%d " + "put_pvt:%d put_pub:%d wq:%d\n", + qp->lock_conflict.alloc_pvt_pool, + qp->lock_conflict.mv_from_pvt_pool, + qp->lock_conflict.mv_to_pub_pool, + qp->lock_conflict.mv_to_pvt_pool, + qp->lock_conflict.free_pvt_pool, + qp->lock_conflict.free_pub_pool, + qp->lock_conflict.wq_access); + } else { + len += snprintf(buf + len, size - len, + "get:%d put:%d free:%d wq:%d\n", + qp->lock_conflict.alloc_xri_get, + qp->lock_conflict.alloc_xri_put, + qp->lock_conflict.free_xri, + qp->lock_conflict.wq_access); + } - lpfc_debugfs_last_hdwq++; - if (lpfc_debugfs_last_hdwq >= phba->cfg_hdw_queue) - lpfc_debugfs_last_hdwq = 0; + lpfc_debugfs_last_lock++; + if (lpfc_debugfs_last_lock >= phba->cfg_hdw_queue) + lpfc_debugfs_last_lock = 0; } return len; } +#endif static int lpfc_debugfs_last_hba_slim_off; @@ -962,7 +1040,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) struct lpfc_nvme_lport *lport; uint64_t data1, data2, data3; uint64_t tot, totin, totout; - int cnt, i, maxch; + int cnt, i; int len = 0; if (phba->nvmet_support) { @@ -1104,10 +1182,6 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) atomic_read(&lport->fc4NvmeLsRequests), atomic_read(&lport->fc4NvmeLsCmpls)); - if (phba->cfg_hdw_queue < LPFC_HBA_HDWQ_MAX) - maxch = phba->cfg_hdw_queue; - else - maxch = LPFC_HBA_HDWQ_MAX; totin = 0; totout = 0; for (i = 0; i < phba->cfg_hdw_queue; i++) { @@ -1545,7 +1619,7 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size) { struct lpfc_hba *phba = vport->phba; struct lpfc_sli4_hdw_queue *qp; - int i, j; + int i, j, max_cnt; int len = 0; uint32_t tot_xmt; uint32_t tot_rcv; @@ -1563,6 +1637,7 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size) } else { len += snprintf(buf + len, PAGE_SIZE - len, "\n"); } + max_cnt = size - LPFC_DEBUG_OUT_LINE_SZ; for (i = 0; i < phba->cfg_hdw_queue; i++) { qp = &phba->sli4_hba.hdwq[i]; @@ -1604,6 +1679,11 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size) } len += snprintf(buf + len, PAGE_SIZE - len, "Total: %x\n", tot_xmt); + if (len >= max_cnt) { + len += snprintf(buf + len, PAGE_SIZE - len, + "Truncated ...\n"); + return len; + } } return len; } @@ -1902,11 +1982,8 @@ lpfc_debugfs_multixripools_open(struct inode *inode, struct file *file) goto out; } - if (phba->cfg_xri_rebalancing) - debug->len = lpfc_debugfs_multixripools_data( - phba, debug->buffer, LPFC_DUMP_MULTIXRIPOOL_SIZE); - else - debug->len = 0; + debug->len = lpfc_debugfs_multixripools_data( + phba, debug->buffer, LPFC_DUMP_MULTIXRIPOOL_SIZE); debug->i_private = inode->i_private; file->private_data = debug; @@ -1916,8 +1993,9 @@ lpfc_debugfs_multixripools_open(struct inode *inode, struct file *file) return rc; } +#ifdef LPFC_HDWQ_LOCK_STAT /** - * lpfc_debugfs_hdwqinfo_open - Open the hdwqinfo debugfs buffer + * lpfc_debugfs_lockstat_open - Open the lockstat debugfs buffer * @inode: The inode pointer that contains a vport pointer. * @file: The file pointer to attach the log output. * @@ -1932,7 +2010,7 @@ lpfc_debugfs_multixripools_open(struct inode *inode, struct file *file) * error value. **/ static int -lpfc_debugfs_hdwqinfo_open(struct inode *inode, struct file *file) +lpfc_debugfs_lockstat_open(struct inode *inode, struct file *file) { struct lpfc_hba *phba = inode->i_private; struct lpfc_debug *debug; @@ -1949,7 +2027,7 @@ lpfc_debugfs_hdwqinfo_open(struct inode *inode, struct file *file) goto out; } - debug->len = lpfc_debugfs_hdwqinfo_data(phba, debug->buffer, + debug->len = lpfc_debugfs_lockstat_data(phba, debug->buffer, LPFC_HBQINFO_SIZE); file->private_data = debug; @@ -1958,6 +2036,48 @@ lpfc_debugfs_hdwqinfo_open(struct inode *inode, struct file *file) return rc; } +static ssize_t +lpfc_debugfs_lockstat_write(struct file *file, const char __user *buf, + size_t nbytes, loff_t *ppos) +{ + struct lpfc_debug *debug = file->private_data; + struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private; + struct lpfc_sli4_hdw_queue *qp; + char mybuf[64]; + char *pbuf; + int i; + + /* Protect copy from user */ + if (!access_ok(VERIFY_READ, buf, nbytes)) + return -EFAULT; + + memset(mybuf, 0, sizeof(mybuf)); + + if (copy_from_user(mybuf, buf, nbytes)) + return -EFAULT; + pbuf = &mybuf[0]; + + if ((strncmp(pbuf, "reset", strlen("reset")) == 0) || + (strncmp(pbuf, "zero", strlen("zero")) == 0)) { + for (i = 0; i < phba->cfg_hdw_queue; i++) { + qp = &phba->sli4_hba.hdwq[i]; + qp->lock_conflict.alloc_xri_get = 0; + qp->lock_conflict.alloc_xri_put = 0; + qp->lock_conflict.free_xri = 0; + qp->lock_conflict.wq_access = 0; + qp->lock_conflict.alloc_pvt_pool = 0; + qp->lock_conflict.mv_from_pvt_pool = 0; + qp->lock_conflict.mv_to_pub_pool = 0; + qp->lock_conflict.mv_to_pvt_pool = 0; + qp->lock_conflict.free_pvt_pool = 0; + qp->lock_conflict.free_pub_pool = 0; + qp->lock_conflict.wq_access = 0; + } + } + return nbytes; +} +#endif + /** * lpfc_debugfs_dumpHBASlim_open - Open the Dump HBA SLIM debugfs buffer * @inode: The inode pointer that contains a vport pointer. @@ -2814,7 +2934,7 @@ lpfc_debugfs_cpucheck_open(struct inode *inode, struct file *file) } debug->len = lpfc_debugfs_cpucheck_data(vport, debug->buffer, - LPFC_NVMEKTIME_SIZE); + LPFC_CPUCHECK_SIZE); debug->i_private = inode->i_private; file->private_data = debug; @@ -2849,8 +2969,18 @@ lpfc_debugfs_cpucheck_write(struct file *file, const char __user *buf, if (phba->nvmet_support) phba->cpucheck_on |= LPFC_CHECK_NVMET_IO; else + phba->cpucheck_on |= (LPFC_CHECK_NVME_IO | + LPFC_CHECK_SCSI_IO); + return strlen(pbuf); + } else if ((strncmp(pbuf, "nvme_on", sizeof("nvme_on") - 1) == 0)) { + if (phba->nvmet_support) + phba->cpucheck_on |= LPFC_CHECK_NVMET_IO; + else phba->cpucheck_on |= LPFC_CHECK_NVME_IO; return strlen(pbuf); + } else if ((strncmp(pbuf, "scsi_on", sizeof("scsi_on") - 1) == 0)) { + phba->cpucheck_on |= LPFC_CHECK_SCSI_IO; + return strlen(pbuf); } else if ((strncmp(pbuf, "rcv", sizeof("rcv") - 1) == 0)) { if (phba->nvmet_support) @@ -3730,46 +3860,38 @@ lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer, int *len, int max_cnt, int eqidx, int eq_id) { struct lpfc_queue *qp; - int qidx, rc; + int rc; - for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) { - qp = phba->sli4_hba.hdwq[qidx].fcp_cq; - if (qp->assoc_qid != eq_id) - continue; + qp = phba->sli4_hba.hdwq[eqidx].fcp_cq; - *len = __lpfc_idiag_print_cq(qp, "FCP", pbuffer, *len); + *len = __lpfc_idiag_print_cq(qp, "FCP", pbuffer, *len); - /* Reset max counter */ - qp->CQ_max_cqe = 0; + /* Reset max counter */ + qp->CQ_max_cqe = 0; - if (*len >= max_cnt) - return 1; + if (*len >= max_cnt) + return 1; - rc = lpfc_idiag_wqs_for_cq(phba, "FCP", pbuffer, len, - max_cnt, qp->queue_id); - if (rc) - return 1; - } + rc = lpfc_idiag_wqs_for_cq(phba, "FCP", pbuffer, len, + max_cnt, qp->queue_id); + if (rc) + return 1; if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { - for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) { - qp = phba->sli4_hba.hdwq[qidx].nvme_cq; - if (qp->assoc_qid != eq_id) - continue; + qp = phba->sli4_hba.hdwq[eqidx].nvme_cq; - *len = __lpfc_idiag_print_cq(qp, "NVME", pbuffer, *len); + *len = __lpfc_idiag_print_cq(qp, "NVME", pbuffer, *len); - /* Reset max counter */ - qp->CQ_max_cqe = 0; + /* Reset max counter */ + qp->CQ_max_cqe = 0; - if (*len >= max_cnt) - return 1; + if (*len >= max_cnt) + return 1; - rc = lpfc_idiag_wqs_for_cq(phba, "NVME", pbuffer, len, - max_cnt, qp->queue_id); - if (rc) - return 1; - } + rc = lpfc_idiag_wqs_for_cq(phba, "NVME", pbuffer, len, + max_cnt, qp->queue_id); + if (rc) + return 1; } if ((eqidx < phba->cfg_nvmet_mrq) && phba->nvmet_support) { @@ -3810,9 +3932,10 @@ __lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype, (unsigned long long)qp->q_cnt_4, qp->q_mode); len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "EQID[%02d], QE-CNT[%04d], QE-SZ[%04d], " - "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]", + "HST-IDX[%04d], PRT-IDX[%04d], PST[%03d] AFFIN[%03d]", qp->queue_id, qp->entry_count, qp->entry_size, - qp->host_index, qp->hba_index, qp->entry_repost); + qp->host_index, qp->hba_index, qp->entry_repost, + qp->chann); len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n"); return len; @@ -3867,7 +3990,7 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes, phba->lpfc_idiag_last_eq = 0; len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, - "EQ %d out of %d HBA EQs\n", + "HDWQ %d out of %d HBA HDWQs\n", x, phba->cfg_hdw_queue); /* Fast-path EQ */ @@ -5297,14 +5420,17 @@ static const struct file_operations lpfc_debugfs_op_hbqinfo = { .release = lpfc_debugfs_release, }; -#undef lpfc_debugfs_op_hdwqinfo -static const struct file_operations lpfc_debugfs_op_hdwqinfo = { +#ifdef LPFC_HDWQ_LOCK_STAT +#undef lpfc_debugfs_op_lockstat +static const struct file_operations lpfc_debugfs_op_lockstat = { .owner = THIS_MODULE, - .open = lpfc_debugfs_hdwqinfo_open, + .open = lpfc_debugfs_lockstat_open, .llseek = lpfc_debugfs_lseek, .read = lpfc_debugfs_read, + .write = lpfc_debugfs_lockstat_write, .release = lpfc_debugfs_release, }; +#endif #undef lpfc_debugfs_op_dumpHBASlim static const struct file_operations lpfc_debugfs_op_dumpHBASlim = { @@ -5769,17 +5895,19 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) goto debug_failed; } - /* Setup hdwqinfo */ - snprintf(name, sizeof(name), "hdwqinfo"); - phba->debug_hdwqinfo = +#ifdef LPFC_HDWQ_LOCK_STAT + /* Setup lockstat */ + snprintf(name, sizeof(name), "lockstat"); + phba->debug_lockstat = debugfs_create_file(name, S_IFREG | 0644, phba->hba_debugfs_root, - phba, &lpfc_debugfs_op_hdwqinfo); - if (!phba->debug_hdwqinfo) { + phba, &lpfc_debugfs_op_lockstat); + if (!phba->debug_lockstat) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, - "0511 Cant create debugfs hdwqinfo\n"); + "0913 Cant create debugfs lockstat\n"); goto debug_failed; } +#endif /* Setup dumpHBASlim */ if (phba->sli_rev < LPFC_SLI_REV4) { @@ -6118,7 +6246,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) vport, &lpfc_debugfs_op_scsistat); if (!vport->debug_scsistat) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, - "0811 Cannot create debugfs scsistat\n"); + "0914 Cannot create debugfs scsistat\n"); goto debug_failed; } @@ -6339,9 +6467,10 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport) debugfs_remove(phba->debug_hbqinfo); /* hbqinfo */ phba->debug_hbqinfo = NULL; - debugfs_remove(phba->debug_hdwqinfo); /* hdwqinfo */ - phba->debug_hdwqinfo = NULL; - +#ifdef LPFC_HDWQ_LOCK_STAT + debugfs_remove(phba->debug_lockstat); /* lockstat */ + phba->debug_lockstat = NULL; +#endif debugfs_remove(phba->debug_dumpHBASlim); /* HBASlim */ phba->debug_dumpHBASlim = NULL; diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h index cf256a6dca42..1fbee6496f85 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.h +++ b/drivers/scsi/lpfc/lpfc_debugfs.h @@ -290,9 +290,6 @@ struct lpfc_idiag { /* multixripool output buffer size */ #define LPFC_DUMP_MULTIXRIPOOL_SIZE 8192 -/* hdwqinfo output buffer size */ -#define LPFC_HDWQINFO_SIZE 8192 - enum { DUMP_FCP, DUMP_NVME, diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index cd39845c909f..665852291a4f 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -211,9 +211,8 @@ struct lpfc_sli_intf { #define LPFC_DEF_IMAX 150000 #define LPFC_MIN_CPU_MAP 0 -#define LPFC_MAX_CPU_MAP 2 +#define LPFC_MAX_CPU_MAP 1 #define LPFC_HBA_CPU_MAP 1 -#define LPFC_DRIVER_CPU_MAP 2 /* Default */ /* PORT_CAPABILITIES constants. */ #define LPFC_MAX_SUPPORTED_PAGES 8 diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 1ab23394fdc3..590154a2b028 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -37,6 +37,7 @@ #include <linux/miscdevice.h> #include <linux/percpu.h> #include <linux/msi.h> +#include <linux/irq.h> #include <linux/bitops.h> #include <scsi/scsi.h> @@ -92,6 +93,8 @@ static void lpfc_sli4_cq_event_release_all(struct lpfc_hba *); static void lpfc_sli4_disable_intr(struct lpfc_hba *); static uint32_t lpfc_sli4_enable_intr(struct lpfc_hba *, uint32_t); static void lpfc_sli4_oas_verify(struct lpfc_hba *phba); +static uint16_t lpfc_find_eq_handle(struct lpfc_hba *, uint16_t); +static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int); static struct scsi_transport_template *lpfc_transport_template = NULL; static struct scsi_transport_template *lpfc_vport_transport_template = NULL; @@ -1363,13 +1366,13 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba) } /* Interrupts per sec per EQ */ - val = phba->cfg_fcp_imax / phba->cfg_hdw_queue; + val = phba->cfg_fcp_imax / phba->cfg_irq_chann; tick_cqe = val / CONFIG_HZ; /* Per tick per EQ */ /* Assume 1 CQE/ISR, calc max CQEs allowed for time duration */ max_cqe = time_elapsed * tick_cqe; - for (i = 0; i < phba->cfg_hdw_queue; i++) { + for (i = 0; i < phba->cfg_irq_chann; i++) { /* Fast-path EQ */ qp = phba->sli4_hba.hdwq[i].hba_eq; if (!qp) @@ -1393,7 +1396,7 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba) if (val) { /* First, interrupts per sec per EQ */ val = phba->cfg_fcp_imax / - phba->cfg_hdw_queue; + phba->cfg_irq_chann; /* us delay between each interrupt */ val = LPFC_SEC_TO_USEC / val; @@ -4331,9 +4334,16 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) shost->max_lun = vport->cfg_max_luns; shost->this_id = -1; shost->max_cmd_len = 16; - if (shost_use_blk_mq(shost) && phba->cfg_enable_scsi_mq) { + + /* Advertise how many hw queues we support based on fcp_io_sched + * and if SCSI mq is turned on. + */ + if ((phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) && + (shost_use_blk_mq(shost) && phba->cfg_enable_scsi_mq)) shost->nr_hw_queues = phba->cfg_hdw_queue; - } + else + shost->nr_hw_queues = phba->sli4_hba.num_present_cpu; + if (phba->sli_rev == LPFC_SLI_REV4) { shost->dma_boundary = phba->sli4_hba.pc_sli4_params.sge_supp_len-1; @@ -6813,7 +6823,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) goto out_remove_rpi_hdrs; } - phba->sli4_hba.hba_eq_hdl = kcalloc(phba->cfg_hdw_queue, + phba->sli4_hba.hba_eq_hdl = kcalloc(phba->cfg_irq_chann, sizeof(struct lpfc_hba_eq_hdl), GFP_KERNEL); if (!phba->sli4_hba.hba_eq_hdl) { @@ -8251,7 +8261,7 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) struct lpfc_rsrc_desc_fcfcoe *desc; char *pdesc_0; uint16_t forced_link_speed; - uint32_t if_type; + uint32_t if_type, qmin; int length, i, rc = 0, rc2; pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); @@ -8356,40 +8366,44 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) phba->sli4_hba.max_cfg_param.max_rq); /* - * Calculate NVME queue resources based on how - * many WQ/CQs are available. + * Calculate queue resources based on how + * many WQ/CQ/EQs are available. */ - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { - length = phba->sli4_hba.max_cfg_param.max_wq; - if (phba->sli4_hba.max_cfg_param.max_cq < - phba->sli4_hba.max_cfg_param.max_wq) - length = phba->sli4_hba.max_cfg_param.max_cq; + qmin = phba->sli4_hba.max_cfg_param.max_wq; + if (phba->sli4_hba.max_cfg_param.max_cq < qmin) + qmin = phba->sli4_hba.max_cfg_param.max_cq; + if (phba->sli4_hba.max_cfg_param.max_eq < qmin) + qmin = phba->sli4_hba.max_cfg_param.max_eq; + /* + * Whats left after this can go toward NVME / FCP. + * The minus 4 accounts for ELS, NVME LS, MBOX + * plus one extra. When configured for + * NVMET, FCP io channel WQs are not created. + */ + qmin -= 4; - /* - * Whats left after this can go toward NVME. - * The minus 6 accounts for ELS, NVME LS, MBOX - * plus a couple extra. When configured for - * NVMET, FCP io channel WQs are not created. - */ - length -= 6; - - /* Take off FCP queues */ - if (!phba->nvmet_support) - length -= phba->cfg_hdw_queue; - - /* Check to see if there is enough for NVME */ - if (phba->cfg_hdw_queue > length) { - lpfc_printf_log( - phba, KERN_ERR, LOG_SLI, - "2005 Reducing NVME IO channel to %d: " - "WQ %d CQ %d CommonIO %d\n", - length, + /* If NVME is configured double the number of CQ/WQs needed */ + if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) && + !phba->nvmet_support) + qmin /= 2; + + /* Check to see if there is enough for NVME */ + if ((phba->cfg_irq_chann > qmin) || + (phba->cfg_hdw_queue > qmin)) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "2005 Reducing Queues: " + "WQ %d CQ %d EQ %d: min %d: " + "IRQ %d HDWQ %d\n", phba->sli4_hba.max_cfg_param.max_wq, phba->sli4_hba.max_cfg_param.max_cq, + phba->sli4_hba.max_cfg_param.max_eq, + qmin, phba->cfg_irq_chann, phba->cfg_hdw_queue); - phba->cfg_hdw_queue = length; - } + if (phba->cfg_irq_chann > qmin) + phba->cfg_irq_chann = qmin; + if (phba->cfg_hdw_queue > qmin) + phba->cfg_hdw_queue = qmin; } } @@ -8606,25 +8620,17 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba) * device parameters */ - if (phba->cfg_hdw_queue > phba->sli4_hba.max_cfg_param.max_eq) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "2575 Reducing IO channels to match number of " - "available EQs: from %d to %d\n", - phba->cfg_hdw_queue, - phba->sli4_hba.max_cfg_param.max_eq); - phba->cfg_hdw_queue = phba->sli4_hba.max_cfg_param.max_eq; - } - if (phba->nvmet_support) { - if (phba->cfg_hdw_queue < phba->cfg_nvmet_mrq) - phba->cfg_nvmet_mrq = phba->cfg_hdw_queue; + if (phba->cfg_irq_chann < phba->cfg_nvmet_mrq) + phba->cfg_nvmet_mrq = phba->cfg_irq_chann; } if (phba->cfg_nvmet_mrq > LPFC_NVMET_MRQ_MAX) phba->cfg_nvmet_mrq = LPFC_NVMET_MRQ_MAX; lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "2574 IO channels: hdwQ %d MRQ: %d\n", - phba->cfg_hdw_queue, phba->cfg_nvmet_mrq); + "2574 IO channels: hdwQ %d IRQ %d MRQ: %d\n", + phba->cfg_hdw_queue, phba->cfg_irq_chann, + phba->cfg_nvmet_mrq); /* Get EQ depth from module parameter, fake the default for now */ phba->sli4_hba.eq_esize = LPFC_EQE_SIZE_4B; @@ -8652,6 +8658,7 @@ lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx) } qdesc->qe_valid = 1; qdesc->hdwq = wqidx; + qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ); phba->sli4_hba.hdwq[wqidx].nvme_cq = qdesc; qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE, @@ -8663,6 +8670,7 @@ lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx) return 1; } qdesc->hdwq = wqidx; + qdesc->chann = wqidx; phba->sli4_hba.hdwq[wqidx].nvme_wq = qdesc; list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list); return 0; @@ -8692,6 +8700,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx) } qdesc->qe_valid = 1; qdesc->hdwq = wqidx; + qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ); phba->sli4_hba.hdwq[wqidx].fcp_cq = qdesc; /* Create Fast Path FCP WQs */ @@ -8714,6 +8723,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx) return 1; } qdesc->hdwq = wqidx; + qdesc->chann = wqidx; phba->sli4_hba.hdwq[wqidx].fcp_wq = qdesc; list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list); return 0; @@ -8737,7 +8747,7 @@ int lpfc_sli4_queue_create(struct lpfc_hba *phba) { struct lpfc_queue *qdesc; - int idx; + int idx, eqidx; struct lpfc_sli4_hdw_queue *qp; /* @@ -8823,7 +8833,18 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) /* Create HBA Event Queues (EQs) */ for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { - /* Create EQs */ + /* + * If there are more Hardware Queues then available + * CQs, multiple Hardware Queues may share a common EQ. + */ + if (idx >= phba->cfg_irq_chann) { + /* Share an existing EQ */ + eqidx = lpfc_find_eq_handle(phba, idx); + phba->sli4_hba.hdwq[idx].hba_eq = + phba->sli4_hba.hdwq[eqidx].hba_eq; + continue; + } + /* Create an EQ */ qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, phba->sli4_hba.eq_esize, phba->sli4_hba.eq_ecount); @@ -8834,20 +8855,27 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) } qdesc->qe_valid = 1; qdesc->hdwq = idx; + + /* Save the CPU this EQ is affinitised to */ + eqidx = lpfc_find_eq_handle(phba, idx); + qdesc->chann = lpfc_find_cpu_handle(phba, eqidx, + LPFC_FIND_BY_EQ); phba->sli4_hba.hdwq[idx].hba_eq = qdesc; } /* Allocate SCSI SLI4 CQ/WQs */ - for (idx = 0; idx < phba->cfg_hdw_queue; idx++) + for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { if (lpfc_alloc_fcp_wq_cq(phba, idx)) goto out_error; + } /* Allocate NVME SLI4 CQ/WQs */ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { - for (idx = 0; idx < phba->cfg_hdw_queue; idx++) + for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { if (lpfc_alloc_nvme_wq_cq(phba, idx)) goto out_error; + } if (phba->nvmet_support) { for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) { @@ -8865,6 +8893,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) } qdesc->qe_valid = 1; qdesc->hdwq = idx; + qdesc->chann = idx; phba->sli4_hba.nvmet_cqset[idx] = qdesc; } } @@ -8896,6 +8925,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) goto out_error; } qdesc->qe_valid = 1; + qdesc->chann = 0; phba->sli4_hba.els_cq = qdesc; @@ -8913,6 +8943,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) "0505 Failed allocate slow-path MQ\n"); goto out_error; } + qdesc->chann = 0; phba->sli4_hba.mbx_wq = qdesc; /* @@ -8928,6 +8959,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) "0504 Failed allocate slow-path ELS WQ\n"); goto out_error; } + qdesc->chann = 0; phba->sli4_hba.els_wq = qdesc; list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list); @@ -8941,6 +8973,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) "6079 Failed allocate NVME LS CQ\n"); goto out_error; } + qdesc->chann = 0; qdesc->qe_valid = 1; phba->sli4_hba.nvmels_cq = qdesc; @@ -8953,6 +8986,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) "6080 Failed allocate NVME LS WQ\n"); goto out_error; } + qdesc->chann = 0; phba->sli4_hba.nvmels_wq = qdesc; list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list); } @@ -9079,17 +9113,21 @@ lpfc_sli4_release_queues(struct lpfc_queue ***qs, int max) } static inline void -lpfc_sli4_release_hdwq(struct lpfc_sli4_hdw_queue *hdwq, int max) +lpfc_sli4_release_hdwq(struct lpfc_hba *phba) { + struct lpfc_sli4_hdw_queue *hdwq; uint32_t idx; - for (idx = 0; idx < max; idx++) { - lpfc_sli4_queue_free(hdwq[idx].hba_eq); + hdwq = phba->sli4_hba.hdwq; + for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { + if (idx < phba->cfg_irq_chann) + lpfc_sli4_queue_free(hdwq[idx].hba_eq); + hdwq[idx].hba_eq = NULL; + lpfc_sli4_queue_free(hdwq[idx].fcp_cq); lpfc_sli4_queue_free(hdwq[idx].nvme_cq); lpfc_sli4_queue_free(hdwq[idx].fcp_wq); lpfc_sli4_queue_free(hdwq[idx].nvme_wq); - hdwq[idx].hba_eq = NULL; hdwq[idx].fcp_cq = NULL; hdwq[idx].nvme_cq = NULL; hdwq[idx].fcp_wq = NULL; @@ -9114,8 +9152,7 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) { /* Release HBA eqs */ if (phba->sli4_hba.hdwq) - lpfc_sli4_release_hdwq(phba->sli4_hba.hdwq, - phba->cfg_hdw_queue); + lpfc_sli4_release_hdwq(phba); if (phba->nvmet_support) { lpfc_sli4_release_queues(&phba->sli4_hba.nvmet_cqset, @@ -9196,7 +9233,6 @@ lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq, qidx, (uint32_t)rc); return rc; } - cq->chann = qidx; if (qtype != LPFC_MBOX) { /* Setup cq_map for fast lookup */ @@ -9216,7 +9252,6 @@ lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq, /* no need to tear down cq - caller will do so */ return rc; } - wq->chann = qidx; /* Bind this CQ/WQ to the NVME ring */ pring = wq->pring; @@ -9246,6 +9281,38 @@ lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq, } /** + * lpfc_setup_cq_lookup - Setup the CQ lookup table + * @phba: pointer to lpfc hba data structure. + * + * This routine will populate the cq_lookup table by all + * available CQ queue_id's. + **/ +void +lpfc_setup_cq_lookup(struct lpfc_hba *phba) +{ + struct lpfc_queue *eq, *childq; + struct lpfc_sli4_hdw_queue *qp; + int qidx; + + qp = phba->sli4_hba.hdwq; + memset(phba->sli4_hba.cq_lookup, 0, + (sizeof(struct lpfc_queue *) * (phba->sli4_hba.cq_max + 1))); + for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) { + eq = qp[qidx].hba_eq; + if (!eq) + continue; + list_for_each_entry(childq, &eq->child_list, list) { + if (childq->queue_id > phba->sli4_hba.cq_max) + continue; + if ((childq->subtype == LPFC_FCP) || + (childq->subtype == LPFC_NVME)) + phba->sli4_hba.cq_lookup[childq->queue_id] = + childq; + } + } +} + +/** * lpfc_sli4_queue_setup - Set up all the SLI4 queues * @phba: pointer to lpfc hba data structure. * @@ -9325,7 +9392,7 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) rc = -ENOMEM; goto out_error; } - for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) { + for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) { if (!qp[qidx].hba_eq) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0522 Fast-path EQ (%d) not " @@ -9572,11 +9639,23 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) phba->sli4_hba.dat_rq->queue_id, phba->sli4_hba.els_cq->queue_id); - for (qidx = 0; qidx < phba->cfg_hdw_queue; + for (qidx = 0; qidx < phba->cfg_irq_chann; qidx += LPFC_MAX_EQ_DELAY_EQID_CNT) lpfc_modify_hba_eq_delay(phba, qidx, LPFC_MAX_EQ_DELAY_EQID_CNT, phba->cfg_fcp_imax); + if (phba->sli4_hba.cq_max) { + kfree(phba->sli4_hba.cq_lookup); + phba->sli4_hba.cq_lookup = kcalloc((phba->sli4_hba.cq_max + 1), + sizeof(struct lpfc_queue *), GFP_KERNEL); + if (!phba->sli4_hba.cq_lookup) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "0549 Failed setup of CQ Lookup table: " + "size 0x%x\n", phba->sli4_hba.cq_max); + goto out_destroy; + } + lpfc_setup_cq_lookup(phba); + } return 0; out_destroy: @@ -9658,9 +9737,14 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba) lpfc_wq_destroy(phba, qp->nvme_wq); lpfc_cq_destroy(phba, qp->fcp_cq); lpfc_cq_destroy(phba, qp->nvme_cq); - lpfc_eq_destroy(phba, qp->hba_eq); + if (qidx < phba->cfg_irq_chann) + lpfc_eq_destroy(phba, qp->hba_eq); } } + + kfree(phba->sli4_hba.cq_lookup); + phba->sli4_hba.cq_lookup = NULL; + phba->sli4_hba.cq_max = 0; } /** @@ -10440,22 +10524,198 @@ lpfc_sli_disable_intr(struct lpfc_hba *phba) } /** + * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified EQ + * @phba: pointer to lpfc hba data structure. + * @id: EQ vector index or Hardware Queue index + * @match: LPFC_FIND_BY_EQ = match by EQ + * LPFC_FIND_BY_HDWQ = match by Hardware Queue + */ +static uint16_t +lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match) +{ + struct lpfc_vector_map_info *cpup; + int cpu; + + /* Find the desired phys_id for the specified EQ */ + cpup = phba->sli4_hba.cpu_map; + for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) { + if ((match == LPFC_FIND_BY_EQ) && + (cpup->irq != LPFC_VECTOR_MAP_EMPTY) && + (cpup->eq == id)) + return cpu; + if ((match == LPFC_FIND_BY_HDWQ) && (cpup->hdwq == id)) + return cpu; + cpup++; + } + return 0; +} + +/** + * lpfc_find_eq_handle - Find the EQ that corresponds to the specified + * Hardware Queue + * @phba: pointer to lpfc hba data structure. + * @hdwq: Hardware Queue index + */ +static uint16_t +lpfc_find_eq_handle(struct lpfc_hba *phba, uint16_t hdwq) +{ + struct lpfc_vector_map_info *cpup; + int cpu; + + /* Find the desired phys_id for the specified EQ */ + cpup = phba->sli4_hba.cpu_map; + for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) { + if (cpup->hdwq == hdwq) + return cpup->eq; + cpup++; + } + return 0; +} + +/** + * lpfc_find_phys_id_eq - Find the next EQ that corresponds to the specified + * Physical Id. + * @phba: pointer to lpfc hba data structure. + * @eqidx: EQ index + * @phys_id: CPU package physical id + */ +static uint16_t +lpfc_find_phys_id_eq(struct lpfc_hba *phba, uint16_t eqidx, uint16_t phys_id) +{ + struct lpfc_vector_map_info *cpup; + int cpu, desired_phys_id; + + desired_phys_id = LPFC_VECTOR_MAP_EMPTY; + + /* Find the desired phys_id for the specified EQ */ + cpup = phba->sli4_hba.cpu_map; + for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) { + if ((cpup->irq != LPFC_VECTOR_MAP_EMPTY) && + (cpup->eq == eqidx)) { + desired_phys_id = cpup->phys_id; + break; + } + cpup++; + } + if (phys_id == desired_phys_id) + return eqidx; + + /* Find a EQ thats on the specified phys_id */ + cpup = phba->sli4_hba.cpu_map; + for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) { + if ((cpup->irq != LPFC_VECTOR_MAP_EMPTY) && + (cpup->phys_id == phys_id)) + return cpup->eq; + cpup++; + } + return 0; +} + +/** + * lpfc_find_cpu_map - Find next available CPU map entry that matches the + * phys_id and core_id. + * @phba: pointer to lpfc hba data structure. + * @phys_id: CPU package physical id + * @core_id: CPU core id + * @hdwqidx: Hardware Queue index + * @eqidx: EQ index + * @isr_avail: Should an IRQ be associated with this entry + */ +static struct lpfc_vector_map_info * +lpfc_find_cpu_map(struct lpfc_hba *phba, uint16_t phys_id, uint16_t core_id, + uint16_t hdwqidx, uint16_t eqidx, int isr_avail) +{ + struct lpfc_vector_map_info *cpup; + int cpu; + + cpup = phba->sli4_hba.cpu_map; + for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) { + /* Does the cpup match the one we are looking for */ + if ((cpup->phys_id == phys_id) && + (cpup->core_id == core_id)) { + /* If it has been already assigned, then skip it */ + if (cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) { + cpup++; + continue; + } + /* Ensure we are on the same phys_id as the first one */ + if (!isr_avail) + cpup->eq = lpfc_find_phys_id_eq(phba, eqidx, + phys_id); + else + cpup->eq = eqidx; + + cpup->hdwq = hdwqidx; + if (isr_avail) { + cpup->irq = + pci_irq_vector(phba->pcidev, eqidx); + + /* Now affinitize to the selected CPU */ + irq_set_affinity_hint(cpup->irq, + get_cpu_mask(cpu)); + irq_set_status_flags(cpup->irq, + IRQ_NO_BALANCING); + + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3330 Set Affinity: CPU %d " + "EQ %d irq %d (HDWQ %x)\n", + cpu, cpup->eq, + cpup->irq, cpup->hdwq); + } + return cpup; + } + cpup++; + } + return 0; +} + +#ifdef CONFIG_X86 +/** + * lpfc_find_hyper - Determine if the CPU map entry is hyper-threaded + * @phba: pointer to lpfc hba data structure. + * @cpu: CPU map index + * @phys_id: CPU package physical id + * @core_id: CPU core id + */ +static int +lpfc_find_hyper(struct lpfc_hba *phba, int cpu, + uint16_t phys_id, uint16_t core_id) +{ + struct lpfc_vector_map_info *cpup; + int idx; + + cpup = phba->sli4_hba.cpu_map; + for (idx = 0; idx < phba->sli4_hba.num_present_cpu; idx++) { + /* Does the cpup match the one we are looking for */ + if ((cpup->phys_id == phys_id) && + (cpup->core_id == core_id) && + (cpu != idx)) { + return 1; + } + cpup++; + } + return 0; +} +#endif + +/** * lpfc_cpu_affinity_check - Check vector CPU affinity mappings * @phba: pointer to lpfc hba data structure. + * @vectors: number of msix vectors allocated. * * The routine will figure out the CPU affinity assignment for every - * MSI-X vector allocated for the HBA. The hba_eq_hdl will be updated - * with a pointer to the CPU mask that defines ALL the CPUs this vector - * can be associated with. If the vector can be unquely associated with - * a single CPU, that CPU will be recorded in hba_eq_hdl[index].cpu. + * MSI-X vector allocated for the HBA. * In addition, the CPU to IO channel mapping will be calculated * and the phba->sli4_hba.cpu_map array will reflect this. */ static void -lpfc_cpu_affinity_check(struct lpfc_hba *phba) +lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) { + int i, j, idx, phys_id; + int max_phys_id, min_phys_id; + int max_core_id, min_core_id; struct lpfc_vector_map_info *cpup; - int cpu, idx; + int cpu, eqidx, hdwqidx, isr_avail; #ifdef CONFIG_X86 struct cpuinfo_x86 *cpuinfo; #endif @@ -10465,6 +10725,12 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba) (sizeof(struct lpfc_vector_map_info) * phba->sli4_hba.num_present_cpu)); + max_phys_id = 0; + min_phys_id = 0xffff; + max_core_id = 0; + min_core_id = 0xffff; + phys_id = 0; + /* Update CPU map with physical id and core id of each CPU */ cpup = phba->sli4_hba.cpu_map; for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) { @@ -10472,34 +10738,91 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba) cpuinfo = &cpu_data(cpu); cpup->phys_id = cpuinfo->phys_proc_id; cpup->core_id = cpuinfo->cpu_core_id; + cpup->hyper = lpfc_find_hyper(phba, cpu, + cpup->phys_id, cpup->core_id); #else /* No distinction between CPUs for other platforms */ cpup->phys_id = 0; - cpup->core_id = 0; + cpup->core_id = cpu; + cpup->hyper = 0; #endif + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "3328 CPU physid %d coreid %d\n", cpup->phys_id, cpup->core_id); + + if (cpup->phys_id > max_phys_id) + max_phys_id = cpup->phys_id; + if (cpup->phys_id < min_phys_id) + min_phys_id = cpup->phys_id; + + if (cpup->core_id > max_core_id) + max_core_id = cpup->core_id; + if (cpup->core_id < min_core_id) + min_core_id = cpup->core_id; + cpup++; } - for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { - cpup = &phba->sli4_hba.cpu_map[idx]; - cpup->irq = pci_irq_vector(phba->pcidev, idx); + /* + * If the number of IRQ vectors == number of CPUs, + * mapping is pretty simple: 1 to 1. + * This is the desired path if NVME is enabled. + */ + if (vectors == phba->sli4_hba.num_present_cpu) { + cpup = phba->sli4_hba.cpu_map; + for (idx = 0; idx < vectors; idx++) { + cpup->eq = idx; + cpup->hdwq = idx; + cpup->irq = pci_irq_vector(phba->pcidev, idx); + + /* Now affinitize to the selected CPU */ + irq_set_affinity_hint( + pci_irq_vector(phba->pcidev, idx), + get_cpu_mask(idx)); + irq_set_status_flags(cpup->irq, IRQ_NO_BALANCING); + + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3336 Set Affinity: CPU %d " + "EQ %d irq %d\n", + idx, cpup->eq, + pci_irq_vector(phba->pcidev, idx)); + cpup++; + } + return; + } - /* For now assume vector N maps to CPU N */ - irq_set_affinity_hint(cpup->irq, get_cpu_mask(idx)); - cpup->hdwq = idx; + idx = 0; + isr_avail = 1; + eqidx = 0; + hdwqidx = 0; - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "3336 Set Affinity: CPU %d " - "hdwq %d irq %d\n", - cpu, cpup->hdwq, cpup->irq); + /* Mapping is more complicated for this case. Hardware Queues are + * assigned in a "ping pong" fashion, ping pong-ing between the + * available phys_id's. + */ + while (idx < phba->sli4_hba.num_present_cpu) { + for (i = min_core_id; i <= max_core_id; i++) { + for (j = min_phys_id; j <= max_phys_id; j++) { + cpup = lpfc_find_cpu_map(phba, j, i, hdwqidx, + eqidx, isr_avail); + if (!cpup) + continue; + idx++; + hdwqidx++; + if (hdwqidx >= phba->cfg_hdw_queue) + hdwqidx = 0; + eqidx++; + if (eqidx >= phba->cfg_irq_chann) { + isr_avail = 0; + eqidx = 0; + } + } + } } return; } - /** * lpfc_sli4_enable_msix - Enable MSI-X interrupt mode to SLI-4 device * @phba: pointer to lpfc hba data structure. @@ -10518,7 +10841,7 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) char *name; /* Set up MSI-X multi-message vectors */ - vectors = phba->cfg_hdw_queue; + vectors = phba->cfg_irq_chann; rc = pci_alloc_irq_vectors(phba->pcidev, (phba->nvmet_support) ? 1 : 2, @@ -10539,7 +10862,6 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) phba->sli4_hba.hba_eq_hdl[index].idx = index; phba->sli4_hba.hba_eq_hdl[index].phba = phba; - atomic_set(&phba->sli4_hba.hba_eq_hdl[index].hba_eq_in_use, 1); rc = request_irq(pci_irq_vector(phba->pcidev, index), &lpfc_sli4_hba_intr_handler, 0, name, @@ -10552,17 +10874,16 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) } } - if (vectors != phba->cfg_hdw_queue) { + if (vectors != phba->cfg_irq_chann) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3238 Reducing IO channels to match number of " "MSI-X vectors, requested %d got %d\n", - phba->cfg_hdw_queue, vectors); - if (phba->cfg_hdw_queue > vectors) - phba->cfg_hdw_queue = vectors; + phba->cfg_irq_chann, vectors); + if (phba->cfg_irq_chann > vectors) + phba->cfg_irq_chann = vectors; if (phba->cfg_nvmet_mrq > vectors) phba->cfg_nvmet_mrq = vectors; } - lpfc_cpu_affinity_check(phba); return rc; @@ -10617,7 +10938,7 @@ lpfc_sli4_enable_msi(struct lpfc_hba *phba) return rc; } - for (index = 0; index < phba->cfg_hdw_queue; index++) { + for (index = 0; index < phba->cfg_irq_chann; index++) { phba->sli4_hba.hba_eq_hdl[index].idx = index; phba->sli4_hba.hba_eq_hdl[index].phba = phba; } @@ -10682,11 +11003,10 @@ lpfc_sli4_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode) phba->intr_type = INTx; intr_mode = 0; - for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { + for (idx = 0; idx < phba->cfg_irq_chann; idx++) { eqhdl = &phba->sli4_hba.hba_eq_hdl[idx]; eqhdl->idx = idx; eqhdl->phba = phba; - atomic_set(&eqhdl->hba_eq_in_use, 1); } } } @@ -10710,7 +11030,7 @@ lpfc_sli4_disable_intr(struct lpfc_hba *phba) int index; /* Free up MSI-X multi-message vectors */ - for (index = 0; index < phba->cfg_hdw_queue; index++) { + for (index = 0; index < phba->cfg_irq_chann; index++) { irq_set_affinity_hint( pci_irq_vector(phba->pcidev, index), NULL); @@ -12089,12 +12409,13 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) } /* Default to single EQ for non-MSI-X */ if (phba->intr_type != MSIX) { - phba->cfg_hdw_queue = 1; + phba->cfg_irq_chann = 1; if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { if (phba->nvmet_support) phba->cfg_nvmet_mrq = 1; } } + lpfc_cpu_affinity_check(phba, phba->cfg_irq_chann); /* Create SCSI host to the physical port */ error = lpfc_create_shost(phba); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index c9aacd56a449..9480257c5143 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -239,7 +239,7 @@ lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport, if (qidx) { str = "IO "; /* IO queue */ qhandle->index = ((qidx - 1) % - vport->phba->cfg_hdw_queue); + lpfc_nvme_template.max_hw_queues); } else { str = "ADM"; /* Admin queue */ qhandle->index = qidx; @@ -1546,14 +1546,12 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, } } + /* Lookup Hardware Queue index based on fcp_io_sched module parameter */ if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) { idx = lpfc_queue_info->index; } else { cpu = smp_processor_id(); - if (cpu < phba->cfg_hdw_queue) - idx = cpu; - else - idx = cpu % phba->cfg_hdw_queue; + idx = phba->sli4_hba.cpu_map[cpu].hdwq; } lpfc_ncmd = lpfc_get_nvme_buf(phba, ndlp, idx, expedite); @@ -2060,7 +2058,13 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) * allocate + 3, one for cmd, one for rsp and one for this alignment */ lpfc_nvme_template.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1; - lpfc_nvme_template.max_hw_queues = phba->cfg_hdw_queue; + + /* Advertise how many hw queues we support based on fcp_io_sched */ + if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) + lpfc_nvme_template.max_hw_queues = phba->cfg_hdw_queue; + else + lpfc_nvme_template.max_hw_queues = + phba->sli4_hba.num_present_cpu; /* localport is allocated from the stack, but the registration * call allocates heap memory as well as the private area. @@ -2554,6 +2558,8 @@ lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba) * WQEs have been removed from the txcmplqs. */ for (i = 0; i < phba->cfg_hdw_queue; i++) { + if (!phba->sli4_hba.hdwq[i].nvme_wq) + continue; pring = phba->sli4_hba.hdwq[i].nvme_wq->pring; if (!pring) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 80d19b0d3306..9a44aa8c2c1e 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -688,16 +688,17 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, int tag; cpu = smp_processor_id(); + + /* Lookup Hardware Queue index based on fcp_io_sched module parameter + * and if SCSI mq is turned on. + */ if (cmnd && shost_use_blk_mq(cmnd->device->host) && phba->cfg_enable_scsi_mq && (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ)) { tag = blk_mq_unique_tag(cmnd->request); idx = blk_mq_unique_tag_to_hwq(tag); } else { - if (cpu < phba->cfg_hdw_queue) - idx = cpu; - else - idx = cpu % phba->cfg_hdw_queue; + idx = phba->sli4_hba.cpu_map[cpu].hdwq; } lpfc_cmd = lpfc_get_io_buf(phba, ndlp, idx, @@ -3651,6 +3652,9 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, struct Scsi_Host *shost; int idx; uint32_t logit = LOG_FCP; +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + int cpu; +#endif /* Sanity check on return of outstanding command */ cmd = lpfc_cmd->pCmd; @@ -3661,6 +3665,13 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, if (phba->sli4_hba.hdwq) phba->sli4_hba.hdwq[idx].scsi_cstat.io_cmpls++; +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + if (phba->cpucheck_on & LPFC_CHECK_SCSI_IO) { + cpu = smp_processor_id(); + if (cpu < LPFC_CHECK_CPU_CNT) + phba->sli4_hba.hdwq[idx].cpucheck_cmpl_io[cpu]++; + } +#endif shost = cmd->device->host; lpfc_cmd->result = (pIocbOut->iocb.un.ulpWord[4] & IOERR_PARAM_MASK); @@ -4339,6 +4350,9 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) struct lpfc_io_buf *lpfc_cmd; struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device)); int err, idx; +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + int cpu; +#endif rdata = lpfc_rport_data_from_scsi_device(cmnd->device); @@ -4453,6 +4467,16 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp); +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + if (phba->cpucheck_on & LPFC_CHECK_SCSI_IO) { + cpu = smp_processor_id(); + if (cpu < LPFC_CHECK_CPU_CNT) { + struct lpfc_sli4_hdw_queue *hdwq = + &phba->sli4_hba.hdwq[lpfc_cmd->hdwq_no]; + hdwq->cpucheck_xmt_io[cpu]++; + } + } +#endif err = lpfc_sli_issue_iocb(phba, LPFC_FCP_RING, &lpfc_cmd->cur_iocbq, SLI_IOCB_RET_IOCB); if (err) { diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index b0113a56efdc..ead4a1511814 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -5587,7 +5587,7 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba) LPFC_QUEUE_REARM); } - for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) + for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) sli4_hba->sli4_eq_release(qp[qidx].hba_eq, LPFC_QUEUE_REARM); } @@ -7879,7 +7879,7 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba) /* Find the eq associated with the mcq */ if (sli4_hba->hdwq) - for (eqidx = 0; eqidx < phba->cfg_hdw_queue; eqidx++) + for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++) if (sli4_hba->hdwq[eqidx].hba_eq->queue_id == sli4_hba->mbx_cq->assoc_qid) { fpeq = sli4_hba->hdwq[eqidx].hba_eq; @@ -10059,12 +10059,9 @@ int lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number, struct lpfc_iocbq *piocb, uint32_t flag) { - struct lpfc_hba_eq_hdl *hba_eq_hdl; struct lpfc_sli_ring *pring; - struct lpfc_queue *fpeq; - struct lpfc_eqe *eqe; unsigned long iflags; - int rc, idx; + int rc; if (phba->sli_rev == LPFC_SLI_REV4) { pring = lpfc_sli4_calc_ring(phba, piocb); @@ -10074,34 +10071,6 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number, spin_lock_irqsave(&pring->ring_lock, iflags); rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag); spin_unlock_irqrestore(&pring->ring_lock, iflags); - - if (lpfc_fcp_look_ahead && (piocb->iocb_flag & LPFC_IO_FCP)) { - idx = piocb->hba_wqidx; - hba_eq_hdl = &phba->sli4_hba.hba_eq_hdl[idx]; - - if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use)) { - - /* Get associated EQ with this index */ - fpeq = phba->sli4_hba.hdwq[idx].hba_eq; - - /* Turn off interrupts from this EQ */ - phba->sli4_hba.sli4_eq_clr_intr(fpeq); - - /* - * Process all the events on FCP EQ - */ - while ((eqe = lpfc_sli4_eq_get(fpeq))) { - lpfc_sli4_hba_handle_eqe(phba, - eqe, idx); - fpeq->EQ_processed++; - } - - /* Always clear and re-arm the EQ */ - phba->sli4_hba.sli4_eq_release(fpeq, - LPFC_QUEUE_REARM); - } - atomic_inc(&hba_eq_hdl->hba_eq_in_use); - } } else { /* For now, SLI2/3 will still use hbalock */ spin_lock_irqsave(&phba->hbalock, iflags); @@ -13652,7 +13621,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe, /* Save EQ associated with this CQ */ cq->assoc_qp = speq; - if (!queue_work(phba->wq, &cq->spwork)) + if (!queue_work_on(cq->chann, phba->wq, &cq->spwork)) lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "0390 Cannot schedule soft IRQ " "for CQ eqcqid=%d, cqid=%d on CPU %d\n", @@ -14058,18 +14027,11 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe, /* Get the reference to the corresponding CQ */ cqid = bf_get_le32(lpfc_eqe_resource_id, eqe); - /* First check for NVME/SCSI completion */ - if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) && - (cqid == phba->sli4_hba.hdwq[qidx].nvme_cq_map)) { - /* Process NVME / NVMET command completion */ - cq = phba->sli4_hba.hdwq[qidx].nvme_cq; - goto process_cq; - } - - if (cqid == phba->sli4_hba.hdwq[qidx].fcp_cq_map) { - /* Process FCP command completion */ - cq = phba->sli4_hba.hdwq[qidx].fcp_cq; - goto process_cq; + /* Use the fast lookup method first */ + if (cqid <= phba->sli4_hba.cq_max) { + cq = phba->sli4_hba.cq_lookup[cqid]; + if (cq) + goto work_cq; } /* Next check for NVMET completion */ @@ -14104,9 +14066,7 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe, return; } - /* Save EQ associated with this CQ */ - cq->assoc_qp = phba->sli4_hba.hdwq[qidx].hba_eq; - +work_cq: if (!queue_work_on(cq->chann, phba->wq, &cq->irqwork)) lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "0363 Cannot schedule soft IRQ " @@ -14234,15 +14194,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id) if (unlikely(!fpeq)) return IRQ_NONE; - if (lpfc_fcp_look_ahead) { - if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use)) - phba->sli4_hba.sli4_eq_clr_intr(fpeq); - else { - atomic_inc(&hba_eq_hdl->hba_eq_in_use); - return IRQ_NONE; - } - } - /* Check device state for handling interrupt */ if (unlikely(lpfc_intr_state_check(phba))) { /* Check again for link_state with lock held */ @@ -14251,8 +14202,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id) /* Flush, clear interrupt, and rearm the EQ */ lpfc_sli4_eq_flush(phba, fpeq); spin_unlock_irqrestore(&phba->hbalock, iflag); - if (lpfc_fcp_look_ahead) - atomic_inc(&hba_eq_hdl->hba_eq_in_use); return IRQ_NONE; } @@ -14275,12 +14224,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id) if (unlikely(ecount == 0)) { fpeq->EQ_no_entry++; - - if (lpfc_fcp_look_ahead) { - atomic_inc(&hba_eq_hdl->hba_eq_in_use); - return IRQ_NONE; - } - if (phba->intr_type == MSIX) /* MSI-X treated interrupt served as no EQ share INT */ lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, @@ -14290,9 +14233,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id) return IRQ_NONE; } - if (lpfc_fcp_look_ahead) - atomic_inc(&hba_eq_hdl->hba_eq_in_use); - return IRQ_HANDLED; } /* lpfc_sli4_fp_intr_handler */ @@ -14330,7 +14270,7 @@ lpfc_sli4_intr_handler(int irq, void *dev_id) /* * Invoke fast-path host attention interrupt handling as appropriate. */ - for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) { + for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) { hba_irq_rc = lpfc_sli4_hba_intr_handler(irq, &phba->sli4_hba.hba_eq_hdl[qidx]); if (hba_irq_rc == IRQ_HANDLED) @@ -14517,7 +14457,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq, union lpfc_sli4_cfg_shdr *shdr; uint16_t dmult; - if (startq >= phba->cfg_hdw_queue) + if (startq >= phba->cfg_irq_chann) return 0; mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); @@ -14531,7 +14471,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq, eq_delay = &mbox->u.mqe.un.eq_delay; /* Calculate delay multiper from maximum interrupt per second */ - result = imax / phba->cfg_hdw_queue; + result = imax / phba->cfg_irq_chann; if (result > LPFC_DMULT_CONST || result == 0) dmult = 0; else @@ -14540,7 +14480,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq, dmult = LPFC_DMULT_MAX; cnt = 0; - for (qidx = startq; qidx < phba->cfg_hdw_queue; qidx++) { + for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) { eq = phba->sli4_hba.hdwq[qidx].hba_eq; if (!eq) continue; @@ -14558,7 +14498,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq, val = phba->cfg_fcp_imax; if (val) { /* First, interrupts per sec per EQ */ - val = phba->cfg_fcp_imax / phba->cfg_hdw_queue; + val = phba->cfg_fcp_imax / phba->cfg_irq_chann; /* us delay between each interrupt */ val = LPFC_SEC_TO_USEC / val; @@ -14853,10 +14793,13 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq, cq->subtype = subtype; cq->queue_id = bf_get(lpfc_mbx_cq_create_q_id, &cq_create->u.response); cq->assoc_qid = eq->queue_id; + cq->assoc_qp = eq; cq->host_index = 0; cq->hba_index = 0; cq->entry_repost = LPFC_CQ_REPOST; + if (cq->queue_id > phba->sli4_hba.cq_max) + phba->sli4_hba.cq_max = cq->queue_id; out: mempool_free(mbox, phba->mbox_mem_pool); return status; @@ -15062,6 +15005,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp, cq->type = type; cq->subtype = subtype; cq->assoc_qid = eq->queue_id; + cq->assoc_qp = eq; cq->host_index = 0; cq->hba_index = 0; cq->entry_repost = LPFC_CQ_REPOST; @@ -15102,6 +15046,8 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp, for (idx = 0; idx < numcq; idx++) { cq = cqp[idx]; cq->queue_id = rc + idx; + if (cq->queue_id > phba->sli4_hba.cq_max) + phba->sli4_hba.cq_max = cq->queue_id; } out: @@ -19665,7 +19611,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp, /* NVME_LS and NVME_LS ABTS requests. */ if (pwqe->iocb_flag & LPFC_IO_NVME_LS) { pring = phba->sli4_hba.nvmels_wq->pring; - spin_lock_irqsave(&pring->ring_lock, iflags); + lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags, + qp, wq_access); sglq = __lpfc_sli_get_els_sglq(phba, pwqe); if (!sglq) { spin_unlock_irqrestore(&pring->ring_lock, iflags); @@ -19698,7 +19645,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp, bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map); - spin_lock_irqsave(&pring->ring_lock, iflags); + lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags, + qp, wq_access); ret = lpfc_sli4_wq_put(wq, wqe); if (ret) { spin_unlock_irqrestore(&pring->ring_lock, iflags); @@ -19725,7 +19673,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp, pwqe->sli4_xritag); bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map); - spin_lock_irqsave(&pring->ring_lock, iflags); + lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags, + qp, wq_access); ret = lpfc_sli4_wq_put(wq, wqe); if (ret) { spin_unlock_irqrestore(&pring->ring_lock, iflags); @@ -19873,18 +19822,20 @@ void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid) { struct lpfc_pbl_pool *pbl_pool; struct lpfc_pvt_pool *pvt_pool; + struct lpfc_sli4_hdw_queue *qp; struct lpfc_io_buf *lpfc_ncmd; struct lpfc_io_buf *lpfc_ncmd_next; unsigned long iflag; struct list_head tmp_list; u32 tmp_count; - pbl_pool = &phba->sli4_hba.hdwq[hwqid].p_multixri_pool->pbl_pool; - pvt_pool = &phba->sli4_hba.hdwq[hwqid].p_multixri_pool->pvt_pool; + qp = &phba->sli4_hba.hdwq[hwqid]; + pbl_pool = &qp->p_multixri_pool->pbl_pool; + pvt_pool = &qp->p_multixri_pool->pvt_pool; tmp_count = 0; - spin_lock_irqsave(&pbl_pool->lock, iflag); - spin_lock(&pvt_pool->lock); + lpfc_qp_spin_lock_irqsave(&pbl_pool->lock, iflag, qp, mv_to_pub_pool); + lpfc_qp_spin_lock(&pvt_pool->lock, qp, mv_from_pvt_pool); if (pvt_pool->count > pvt_pool->low_watermark) { /* Step 1: move (all - low_watermark) from pvt_pool @@ -19937,7 +19888,8 @@ void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid) * false - if the specified pbl_pool is empty or locked by someone else **/ static bool -_lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, struct lpfc_pbl_pool *pbl_pool, +_lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp, + struct lpfc_pbl_pool *pbl_pool, struct lpfc_pvt_pool *pvt_pool, u32 count) { struct lpfc_io_buf *lpfc_ncmd; @@ -19949,7 +19901,7 @@ _lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, struct lpfc_pbl_pool *pbl_pool, if (ret) { if (pbl_pool->count) { /* Move a batch of XRIs from public to private pool */ - spin_lock(&pvt_pool->lock); + lpfc_qp_spin_lock(&pvt_pool->lock, qp, mv_to_pvt_pool); list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, &pbl_pool->list, @@ -19991,16 +19943,18 @@ void lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, u32 hwqid, u32 count) struct lpfc_multixri_pool *next_multixri_pool; struct lpfc_pvt_pool *pvt_pool; struct lpfc_pbl_pool *pbl_pool; + struct lpfc_sli4_hdw_queue *qp; u32 next_hwqid; u32 hwq_count; int ret; - multixri_pool = phba->sli4_hba.hdwq[hwqid].p_multixri_pool; + qp = &phba->sli4_hba.hdwq[hwqid]; + multixri_pool = qp->p_multixri_pool; pvt_pool = &multixri_pool->pvt_pool; pbl_pool = &multixri_pool->pbl_pool; /* Check if local pbl_pool is available */ - ret = _lpfc_move_xri_pbl_to_pvt(phba, pbl_pool, pvt_pool, count); + ret = _lpfc_move_xri_pbl_to_pvt(phba, qp, pbl_pool, pvt_pool, count); if (ret) { #ifdef LPFC_MXP_STAT multixri_pool->local_pbl_hit_count++; @@ -20023,7 +19977,7 @@ void lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, u32 hwqid, u32 count) /* Check if the public free xri pool is available */ ret = _lpfc_move_xri_pbl_to_pvt( - phba, pbl_pool, pvt_pool, count); + phba, qp, pbl_pool, pvt_pool, count); /* Exit while-loop if success or all hwqid are checked */ } while (!ret && next_hwqid != multixri_pool->rrb_next_hwqid); @@ -20139,20 +20093,23 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd, if ((pvt_pool->count < pvt_pool->low_watermark) || (xri_owned < xri_limit && pvt_pool->count < pvt_pool->high_watermark)) { - spin_lock_irqsave(&pvt_pool->lock, iflag); + lpfc_qp_spin_lock_irqsave(&pvt_pool->lock, iflag, + qp, free_pvt_pool); list_add_tail(&lpfc_ncmd->list, &pvt_pool->list); pvt_pool->count++; spin_unlock_irqrestore(&pvt_pool->lock, iflag); } else { - spin_lock_irqsave(&pbl_pool->lock, iflag); + lpfc_qp_spin_lock_irqsave(&pbl_pool->lock, iflag, + qp, free_pub_pool); list_add_tail(&lpfc_ncmd->list, &pbl_pool->list); pbl_pool->count++; spin_unlock_irqrestore(&pbl_pool->lock, iflag); } } else { - spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag); + lpfc_qp_spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag, + qp, free_xri); list_add_tail(&lpfc_ncmd->list, &qp->lpfc_io_buf_list_put); qp->put_io_bufs++; @@ -20175,6 +20132,7 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd, **/ static struct lpfc_io_buf * lpfc_get_io_buf_from_private_pool(struct lpfc_hba *phba, + struct lpfc_sli4_hdw_queue *qp, struct lpfc_pvt_pool *pvt_pool, struct lpfc_nodelist *ndlp) { @@ -20182,7 +20140,7 @@ lpfc_get_io_buf_from_private_pool(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd_next; unsigned long iflag; - spin_lock_irqsave(&pvt_pool->lock, iflag); + lpfc_qp_spin_lock_irqsave(&pvt_pool->lock, iflag, qp, alloc_pvt_pool); list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, &pvt_pool->list, list) { if (lpfc_test_rrq_active( @@ -20277,7 +20235,7 @@ lpfc_get_io_buf_from_multixri_pools(struct lpfc_hba *phba, lpfc_move_xri_pbl_to_pvt(phba, hwqid, XRI_BATCH); /* Get one XRI from private free xri pool */ - lpfc_ncmd = lpfc_get_io_buf_from_private_pool(phba, pvt_pool, ndlp); + lpfc_ncmd = lpfc_get_io_buf_from_private_pool(phba, qp, pvt_pool, ndlp); if (lpfc_ncmd) { lpfc_ncmd->hdwq = qp; @@ -20350,11 +20308,13 @@ struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba, lpfc_cmd = lpfc_get_io_buf_from_multixri_pools( phba, ndlp, hwqid, expedite); else { - spin_lock_irqsave(&qp->io_buf_list_get_lock, iflag); + lpfc_qp_spin_lock_irqsave(&qp->io_buf_list_get_lock, iflag, + qp, alloc_xri_get); if (qp->get_io_bufs > LPFC_NVME_EXPEDITE_XRICNT || expedite) lpfc_cmd = lpfc_io_buf(phba, ndlp, hwqid); if (!lpfc_cmd) { - spin_lock(&qp->io_buf_list_put_lock); + lpfc_qp_spin_lock(&qp->io_buf_list_put_lock, + qp, alloc_xri_put); list_splice(&qp->lpfc_io_buf_list_put, &qp->lpfc_io_buf_list_get); qp->get_io_bufs += qp->put_io_bufs; diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index f5e58cd4c6ac..c381f2cb4909 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -41,7 +41,7 @@ /* Multi-queue arrangement for FCP EQ/CQ/WQ tuples */ #define LPFC_HBA_HDWQ_MIN 0 -#define LPFC_HBA_HDWQ_MAX 64 +#define LPFC_HBA_HDWQ_MAX 128 #define LPFC_HBA_HDWQ_DEF 0 /* Common buffer size to accomidate SCSI and NVME IO buffers */ @@ -166,16 +166,19 @@ struct lpfc_queue { uint32_t assoc_qid; /* Queue ID associated with, for CQ/WQ/MQ */ uint32_t host_index; /* The host's index for putting or getting */ uint32_t hba_index; /* The last known hba index for get or put */ + uint32_t q_mode; struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */ struct lpfc_rqb *rqbp; /* ptr to RQ buffers */ - uint32_t q_mode; uint16_t page_count; /* Number of pages allocated for this queue */ uint16_t page_size; /* size of page allocated for this queue */ #define LPFC_EXPANDED_PAGE_SIZE 16384 #define LPFC_DEFAULT_PAGE_SIZE 4096 - uint16_t chann; /* IO channel this queue is associated with */ + uint16_t chann; /* Hardware Queue association WQ/CQ */ + /* CPU affinity for EQ */ +#define LPFC_FIND_BY_EQ 0 +#define LPFC_FIND_BY_HDWQ 1 uint8_t db_format; #define LPFC_DB_RING_FORMAT 0x01 #define LPFC_DB_LIST_FORMAT 0x02 @@ -431,11 +434,6 @@ struct lpfc_hba_eq_hdl { uint32_t idx; char handler_name[LPFC_SLI4_HANDLER_NAME_SZ]; struct lpfc_hba *phba; - atomic_t hba_eq_in_use; - struct cpumask *cpumask; - /* CPU affinitsed to or 0xffffffff if multiple */ - uint32_t cpu; -#define LPFC_MULTI_CPU_AFFINITY 0xffffffff }; /*BB Credit recovery value*/ @@ -529,7 +527,9 @@ struct lpfc_vector_map_info { uint16_t phys_id; uint16_t core_id; uint16_t irq; + uint16_t eq; uint16_t hdwq; + uint16_t hyper; }; #define LPFC_VECTOR_MAP_EMPTY 0xffff @@ -593,6 +593,21 @@ struct lpfc_fc4_ctrl_stat { u32 io_cmpls; }; +#ifdef LPFC_HDWQ_LOCK_STAT +struct lpfc_lock_stat { + uint32_t alloc_xri_get; + uint32_t alloc_xri_put; + uint32_t free_xri; + uint32_t wq_access; + uint32_t alloc_pvt_pool; + uint32_t mv_from_pvt_pool; + uint32_t mv_to_pub_pool; + uint32_t mv_to_pvt_pool; + uint32_t free_pub_pool; + uint32_t free_pvt_pool; +}; +#endif + /* SLI4 HBA data structure entries */ struct lpfc_sli4_hdw_queue { /* Pointers to the constructed SLI4 queues */ @@ -626,6 +641,9 @@ struct lpfc_sli4_hdw_queue { /* FC-4 Stats counters */ struct lpfc_fc4_ctrl_stat nvme_cstat; struct lpfc_fc4_ctrl_stat scsi_cstat; +#ifdef LPFC_HDWQ_LOCK_STAT + struct lpfc_lock_stat lock_conflict; +#endif #ifdef CONFIG_SCSI_LPFC_DEBUG_FS #define LPFC_CHECK_CPU_CNT 128 @@ -635,6 +653,34 @@ struct lpfc_sli4_hdw_queue { #endif }; +#ifdef LPFC_HDWQ_LOCK_STAT +/* compile time trylock stats */ +#define lpfc_qp_spin_lock_irqsave(lock, flag, qp, lstat) \ + { \ + int only_once = 1; \ + while (spin_trylock_irqsave(lock, flag) == 0) { \ + if (only_once) { \ + only_once = 0; \ + qp->lock_conflict.lstat++; \ + } \ + } \ + } +#define lpfc_qp_spin_lock(lock, qp, lstat) \ + { \ + int only_once = 1; \ + while (spin_trylock(lock) == 0) { \ + if (only_once) { \ + only_once = 0; \ + qp->lock_conflict.lstat++; \ + } \ + } \ + } +#else +#define lpfc_qp_spin_lock_irqsave(lock, flag, qp, lstat) \ + spin_lock_irqsave(lock, flag) +#define lpfc_qp_spin_lock(lock, qp, lstat) spin_lock(lock) +#endif + struct lpfc_sli4_hba { void __iomem *conf_regs_memmap_p; /* Kernel memory mapped address for * config space registers @@ -764,6 +810,8 @@ struct lpfc_sli4_hba { uint16_t nvmet_xri_cnt; uint16_t nvmet_io_wait_cnt; uint16_t nvmet_io_wait_total; + uint16_t cq_max; + struct lpfc_queue **cq_lookup; struct list_head lpfc_els_sgl_list; struct list_head lpfc_abts_els_sgl_list; spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */

[16/25] lpfc: Support non-uniform allocation of MSIX vectors to hardware queues

Commit Message

Comments

Patch