diff mbox series

[v2,07/13] qla2xxx: performance tweak

Message ID 20200902072548.11491-8-njavali@marvell.com (mailing list archive)
State Superseded
Headers show
Series qla2xxx misc features and bug fixes | expand

Commit Message

Nilesh Javali Sept. 2, 2020, 7:25 a.m. UTC
From: Quinn Tran <qutran@marvell.com>

This patch move statistic fields from vha struct to qpair
to reduce memory thrashing.

Signed-off-by: Quinn Tran <qutran@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
---
 drivers/scsi/qla2xxx/qla_attr.c | 46 ++++++++++++++++++++++++++++-----
 drivers/scsi/qla2xxx/qla_def.h  | 32 +++++++++++++++++------
 drivers/scsi/qla2xxx/qla_init.c |  5 ++--
 drivers/scsi/qla2xxx/qla_iocb.c | 18 +++++++------
 drivers/scsi/qla2xxx/qla_isr.c  |  8 +++---
 drivers/scsi/qla2xxx/qla_mid.c  |  4 +--
 drivers/scsi/qla2xxx/qla_nvme.c |  8 +++---
 drivers/scsi/qla2xxx/qla_os.c   | 10 ++++---
 8 files changed, 92 insertions(+), 39 deletions(-)

Comments

Himanshu Madhani Sept. 2, 2020, 3:54 p.m. UTC | #1
> On Sep 2, 2020, at 2:25 AM, Nilesh Javali <njavali@marvell.com> wrote:
> 
> From: Quinn Tran <qutran@marvell.com>
> 
> This patch move statistic fields from vha struct to qpair
> to reduce memory thrashing.
> 
> Signed-off-by: Quinn Tran <qutran@marvell.com>
> Signed-off-by: Nilesh Javali <njavali@marvell.com>
> ---
> drivers/scsi/qla2xxx/qla_attr.c | 46 ++++++++++++++++++++++++++++-----
> drivers/scsi/qla2xxx/qla_def.h  | 32 +++++++++++++++++------
> drivers/scsi/qla2xxx/qla_init.c |  5 ++--
> drivers/scsi/qla2xxx/qla_iocb.c | 18 +++++++------
> drivers/scsi/qla2xxx/qla_isr.c  |  8 +++---
> drivers/scsi/qla2xxx/qla_mid.c  |  4 +--
> drivers/scsi/qla2xxx/qla_nvme.c |  8 +++---
> drivers/scsi/qla2xxx/qla_os.c   | 10 ++++---
> 8 files changed, 92 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
> index d006ae193677..1ee747ba4ecc 100644
> --- a/drivers/scsi/qla2xxx/qla_attr.c
> +++ b/drivers/scsi/qla2xxx/qla_attr.c
> @@ -2726,6 +2726,9 @@ qla2x00_get_fc_host_stats(struct Scsi_Host *shost)
> 	struct link_statistics *stats;
> 	dma_addr_t stats_dma;
> 	struct fc_host_statistics *p = &vha->fc_host_stat;
> +	struct qla_qpair *qpair;
> +	int i;
> +	u64 ib = 0, ob = 0, ir = 0, or = 0;
> 
> 	memset(p, -1, sizeof(*p));
> 
> @@ -2762,6 +2765,27 @@ qla2x00_get_fc_host_stats(struct Scsi_Host *shost)
> 	if (rval != QLA_SUCCESS)
> 		goto done_free;
> 
> +	/* --- */
> +	for (i = 0; i < vha->hw->max_qpairs; i++) {
> +		qpair = vha->hw->queue_pair_map[i];
> +		if (!qpair)
> +			continue;
> +		ir += qpair->counters.input_requests;
> +		or += qpair->counters.output_requests;
> +		ib += qpair->counters.input_bytes;
> +		ob += qpair->counters.output_bytes;
> +	}
> +	ir += ha->base_qpair->counters.input_requests;
> +	or += ha->base_qpair->counters.output_requests;
> +	ib += ha->base_qpair->counters.input_bytes;
> +	ob += ha->base_qpair->counters.output_bytes;
> +
> +	ir += vha->qla_stats.input_requests;
> +	or += vha->qla_stats.output_requests;
> +	ib += vha->qla_stats.input_bytes;
> +	ob += vha->qla_stats.output_bytes;
> +	/* --- */
> +
> 	p->link_failure_count = le32_to_cpu(stats->link_fail_cnt);
> 	p->loss_of_sync_count = le32_to_cpu(stats->loss_sync_cnt);
> 	p->loss_of_signal_count = le32_to_cpu(stats->loss_sig_cnt);
> @@ -2781,15 +2805,16 @@ qla2x00_get_fc_host_stats(struct Scsi_Host *shost)
> 			p->rx_words = le64_to_cpu(stats->fpm_recv_word_cnt);
> 			p->tx_words = le64_to_cpu(stats->fpm_xmit_word_cnt);
> 		} else {
> -			p->rx_words = vha->qla_stats.input_bytes;
> -			p->tx_words = vha->qla_stats.output_bytes;
> +			p->rx_words = ib >> 2;
> +			p->tx_words = ob >> 2;
> 		}
> 	}
> +
> 	p->fcp_control_requests = vha->qla_stats.control_requests;
> -	p->fcp_input_requests = vha->qla_stats.input_requests;
> -	p->fcp_output_requests = vha->qla_stats.output_requests;
> -	p->fcp_input_megabytes = vha->qla_stats.input_bytes >> 20;
> -	p->fcp_output_megabytes = vha->qla_stats.output_bytes >> 20;
> +	p->fcp_input_requests = ir;
> +	p->fcp_output_requests = or;
> +	p->fcp_input_megabytes  = ib >> 20;
> +	p->fcp_output_megabytes = ob >> 20;
> 	p->seconds_since_last_reset =
> 	    get_jiffies_64() - vha->qla_stats.jiffies_at_last_reset;
> 	do_div(p->seconds_since_last_reset, HZ);
> @@ -2809,9 +2834,18 @@ qla2x00_reset_host_stats(struct Scsi_Host *shost)
> 	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
> 	struct link_statistics *stats;
> 	dma_addr_t stats_dma;
> +	int i;
> +	struct qla_qpair *qpair;
> 
> 	memset(&vha->qla_stats, 0, sizeof(vha->qla_stats));
> 	memset(&vha->fc_host_stat, 0, sizeof(vha->fc_host_stat));
> +	for (i = 0; i < vha->hw->max_qpairs; i++) {
> +		qpair = vha->hw->queue_pair_map[i];
> +		if (!qpair)
> +			continue;
> +		memset(&qpair->counters, 0, sizeof(qpair->counters));
> +	}
> +	memset(&ha->base_qpair->counters, 0, sizeof(qpair->counters));
> 
> 	vha->qla_stats.jiffies_at_last_reset = get_jiffies_64();
> 
> diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
> index ad4a0ba7203c..3ca8665638c4 100644
> --- a/drivers/scsi/qla2xxx/qla_def.h
> +++ b/drivers/scsi/qla2xxx/qla_def.h
> @@ -2443,12 +2443,6 @@ typedef struct fc_port {
> 	struct list_head list;
> 	struct scsi_qla_host *vha;
> 
> -	uint8_t node_name[WWN_SIZE];
> -	uint8_t port_name[WWN_SIZE];
> -	port_id_t d_id;
> -	uint16_t loop_id;
> -	uint16_t old_loop_id;
> -
> 	unsigned int conf_compl_supported:1;
> 	unsigned int deleted:2;
> 	unsigned int free_pending:1;
> @@ -2465,6 +2459,13 @@ typedef struct fc_port {
> 	unsigned int n2n_flag:1;
> 	unsigned int explicit_logout:1;
> 	unsigned int prli_pend_timer:1;
> +	uint8_t nvme_flag;
> +
> +	uint8_t node_name[WWN_SIZE];
> +	uint8_t port_name[WWN_SIZE];
> +	port_id_t d_id;
> +	uint16_t loop_id;
> +	uint16_t old_loop_id;
> 
> 	struct completion nvme_del_done;
> 	uint32_t nvme_prli_service_param;
> @@ -2473,7 +2474,7 @@ typedef struct fc_port {
> #define NVME_PRLI_SP_TARGET     BIT_4
> #define NVME_PRLI_SP_DISCOVERY  BIT_3
> #define NVME_PRLI_SP_FIRST_BURST	BIT_0
> -	uint8_t nvme_flag;
> +
> 	uint32_t nvme_first_burst_size;
> #define NVME_FLAG_REGISTERED 4
> #define NVME_FLAG_DELETING 2
> @@ -3510,6 +3511,14 @@ struct qla_tgt_counters {
> 	uint64_t num_term_xchg_sent;
> };
> 
> +struct qla_counters {
> +	uint64_t input_bytes;
> +	uint64_t input_requests;
> +	uint64_t output_bytes;
> +	uint64_t output_requests;
> +
> +};
> +
> struct qla_qpair;
> 
> /* Response queue data structure */
> @@ -3594,6 +3603,7 @@ struct qla_qpair {
> 	uint32_t enable_class_2:1;
> 	uint32_t enable_explicit_conf:1;
> 	uint32_t use_shadow_reg:1;
> +	uint32_t rcv_intr:1;
> 
> 	uint16_t id;			/* qp number used with FW */
> 	uint16_t vp_idx;		/* vport ID */
> @@ -3609,13 +3619,16 @@ struct qla_qpair {
> 	struct qla_msix_entry *msix; /* point to &ha->msix_entries[x] */
> 	struct qla_hw_data *hw;
> 	struct work_struct q_work;
> +	struct qla_counters counters;
> +
> 	struct list_head qp_list_elem; /* vha->qp_list */
> 	struct list_head hints_list;
> -	uint16_t cpuid;
> +
> 	uint16_t retry_term_cnt;
> 	__le32	retry_term_exchg_addr;
> 	uint64_t retry_term_jiff;
> 	struct qla_tgt_counters tgt_counters;
> +	uint16_t cpuid;
> };
> 
> /* Place holder for FW buffer parameters */
> @@ -4129,6 +4142,9 @@ struct qla_hw_data {
> #define USE_ASYNC_SCAN(ha) (IS_QLA25XX(ha) || IS_QLA81XX(ha) ||\
> 	IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))
> 
> +#define IS_ZIO_THRESHOLD_CAPABLE(ha) \
> +	(IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))
> +

Why not fold in "ha->zio_mode == QLA_ZIO_MODE_6” check as well in the above macro. 

My understanding is that only time ZIO_THREASHOLD_CAPABLE will be used is when
zio_mode == ZIO_MODE_6. Makes code easy to read. 


> 	/* HBA serial number */
> 	uint8_t		serial0;
> 	uint8_t		serial1;
> diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
> index b4d53eb4e53e..99f322fb74ab 100644
> --- a/drivers/scsi/qla2xxx/qla_init.c
> +++ b/drivers/scsi/qla2xxx/qla_init.c
> @@ -3690,9 +3690,8 @@ qla2x00_setup_chip(scsi_qla_host_t *vha)
> 					goto execute_fw_with_lr;
> 				}
> 
> -				if ((IS_QLA83XX(ha) || IS_QLA27XX(ha) ||
> -				    IS_QLA28XX(ha)) &&
> -				    (ha->zio_mode == QLA_ZIO_MODE_6))
> +				if (IS_ZIO_THRESHOLD_CAPABLE(ha) &&
> +				    ha->zio_mode == QLA_ZIO_MODE_6)

See above comment. This line would change to IS_ZIO_THRESHOLD_CAPABLE()

> 					qla27xx_set_zio_threshold(vha,
> 					    ha->last_zio_threshold);
> 
> diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
> index e3d2dea0b057..d69e16e844aa 100644
> --- a/drivers/scsi/qla2xxx/qla_iocb.c
> +++ b/drivers/scsi/qla2xxx/qla_iocb.c
> @@ -594,6 +594,7 @@ qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt,
> 	uint32_t dsd_list_len;
> 	struct dsd_dma *dsd_ptr;
> 	struct ct6_dsd *ctx;
> +	struct qla_qpair *qpair = sp->qpair;
> 
> 	cmd = GET_CMD_SP(sp);
> 
> @@ -612,12 +613,12 @@ qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt,
> 	/* Set transfer direction */
> 	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
> 		cmd_pkt->control_flags = cpu_to_le16(CF_WRITE_DATA);
> -		vha->qla_stats.output_bytes += scsi_bufflen(cmd);
> -		vha->qla_stats.output_requests++;
> +		qpair->counters.output_bytes += scsi_bufflen(cmd);
> +		qpair->counters.output_requests++;
> 	} else if (cmd->sc_data_direction == DMA_FROM_DEVICE) {
> 		cmd_pkt->control_flags = cpu_to_le16(CF_READ_DATA);
> -		vha->qla_stats.input_bytes += scsi_bufflen(cmd);
> -		vha->qla_stats.input_requests++;
> +		qpair->counters.input_bytes += scsi_bufflen(cmd);
> +		qpair->counters.input_requests++;
> 	}
> 
> 	cur_seg = scsi_sglist(cmd);
> @@ -704,6 +705,7 @@ qla24xx_build_scsi_iocbs(srb_t *sp, struct cmd_type_7 *cmd_pkt,
> 	struct scsi_cmnd *cmd;
> 	struct scatterlist *sg;
> 	int i;
> +	struct qla_qpair *qpair = sp->qpair;
> 
> 	cmd = GET_CMD_SP(sp);
> 
> @@ -721,12 +723,12 @@ qla24xx_build_scsi_iocbs(srb_t *sp, struct cmd_type_7 *cmd_pkt,
> 	/* Set transfer direction */
> 	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
> 		cmd_pkt->task_mgmt_flags = cpu_to_le16(TMF_WRITE_DATA);
> -		vha->qla_stats.output_bytes += scsi_bufflen(cmd);
> -		vha->qla_stats.output_requests++;
> +		qpair->counters.output_bytes += scsi_bufflen(cmd);
> +		qpair->counters.output_requests++;
> 	} else if (cmd->sc_data_direction == DMA_FROM_DEVICE) {
> 		cmd_pkt->task_mgmt_flags = cpu_to_le16(TMF_READ_DATA);
> -		vha->qla_stats.input_bytes += scsi_bufflen(cmd);
> -		vha->qla_stats.input_requests++;
> +		qpair->counters.input_bytes += scsi_bufflen(cmd);
> +		qpair->counters.input_requests++;
> 	}
> 
> 	/* One DSD is available in the Command Type 3 IOCB */
> diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
> index d38dd6520b53..a63f2000fadf 100644
> --- a/drivers/scsi/qla2xxx/qla_isr.c
> +++ b/drivers/scsi/qla2xxx/qla_isr.c
> @@ -3414,8 +3414,10 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
> 	if (!ha->flags.fw_started)
> 		return;
> 
> -	if (rsp->qpair->cpuid != smp_processor_id())
> +	if (rsp->qpair->cpuid != smp_processor_id() || !rsp->qpair->rcv_intr) {
> +		rsp->qpair->rcv_intr = 1;
> 		qla_cpu_update(rsp->qpair, smp_processor_id());
> +	}
> 
> 	while (rsp->ring_ptr->signature != RESPONSE_PROCESSED) {
> 		pkt = (struct sts_entry_24xx *)rsp->ring_ptr;
> @@ -3865,7 +3867,7 @@ qla2xxx_msix_rsp_q(int irq, void *dev_id)
> 	}
> 	ha = qpair->hw;
> 
> -	queue_work(ha->wq, &qpair->q_work);
> +	queue_work_on(smp_processor_id(), ha->wq, &qpair->q_work);
> 
> 	return IRQ_HANDLED;
> }
> @@ -3891,7 +3893,7 @@ qla2xxx_msix_rsp_q_hs(int irq, void *dev_id)
> 	wrt_reg_dword(&reg->hccr, HCCRX_CLR_RISC_INT);
> 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
> 
> -	queue_work(ha->wq, &qpair->q_work);
> +	queue_work_on(smp_processor_id(), ha->wq, &qpair->q_work);
> 
> 	return IRQ_HANDLED;
> }
> diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
> index 15efe2f04b86..08cfe043ac66 100644
> --- a/drivers/scsi/qla2xxx/qla_mid.c
> +++ b/drivers/scsi/qla2xxx/qla_mid.c
> @@ -808,11 +808,9 @@ static void qla_do_work(struct work_struct *work)
> {
> 	unsigned long flags;
> 	struct qla_qpair *qpair = container_of(work, struct qla_qpair, q_work);
> -	struct scsi_qla_host *vha;
> -	struct qla_hw_data *ha = qpair->hw;
> +	struct scsi_qla_host *vha = qpair->vha;
> 
> 	spin_lock_irqsave(&qpair->qp_lock, flags);
> -	vha = pci_get_drvdata(ha->pdev);
> 	qla24xx_process_response_queue(vha, qpair->rsp);
> 	spin_unlock_irqrestore(&qpair->qp_lock, flags);
> 
> diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
> index b05e4545ef5f..b0c13144c21a 100644
> --- a/drivers/scsi/qla2xxx/qla_nvme.c
> +++ b/drivers/scsi/qla2xxx/qla_nvme.c
> @@ -428,8 +428,8 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp)
> 	/* No data transfer how do we check buffer len == 0?? */
> 	if (fd->io_dir == NVMEFC_FCP_READ) {
> 		cmd_pkt->control_flags = cpu_to_le16(CF_READ_DATA);
> -		vha->qla_stats.input_bytes += fd->payload_length;
> -		vha->qla_stats.input_requests++;
> +		qpair->counters.input_bytes += fd->payload_length;
> +		qpair->counters.input_requests++;
> 	} else if (fd->io_dir == NVMEFC_FCP_WRITE) {
> 		cmd_pkt->control_flags = cpu_to_le16(CF_WRITE_DATA);
> 		if ((vha->flags.nvme_first_burst) &&
> @@ -441,8 +441,8 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp)
> 				cmd_pkt->control_flags |=
> 					cpu_to_le16(CF_NVME_FIRST_BURST_ENABLE);
> 		}
> -		vha->qla_stats.output_bytes += fd->payload_length;
> -		vha->qla_stats.output_requests++;
> +		qpair->counters.output_bytes += fd->payload_length;
> +		qpair->counters.output_requests++;
> 	} else if (fd->io_dir == 0) {
> 		cmd_pkt->control_flags = 0;
> 	}
> diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
> index 31bfc0c088b7..c53cc31cd068 100644
> --- a/drivers/scsi/qla2xxx/qla_os.c
> +++ b/drivers/scsi/qla2xxx/qla_os.c
> @@ -7198,8 +7198,10 @@ qla2x00_timer(struct timer_list *t)
> 	 * FC-NVME
> 	 * see if the active AEN count has changed from what was last reported.
> 	 */
> +	index = atomic_read(&ha->nvme_active_aen_cnt);
> 	if (!vha->vp_idx &&
> -	    (atomic_read(&ha->nvme_active_aen_cnt) != ha->nvme_last_rptd_aen) &&
> +	    (index != ha->nvme_last_rptd_aen) &&
> +	    (index >= DEFAULT_ZIO_THRESHOLD) &&
> 	    ha->zio_mode == QLA_ZIO_MODE_6 &&
> 	    !ha->flags.host_shutting_down) {
> 		ql_log(ql_log_info, vha, 0x3002,
> @@ -7211,9 +7213,9 @@ qla2x00_timer(struct timer_list *t)
> 	}
> 
> 	if (!vha->vp_idx &&
> -	    (atomic_read(&ha->zio_threshold) != ha->last_zio_threshold) &&
> -	    (ha->zio_mode == QLA_ZIO_MODE_6) &&
> -	    (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))) {
> +	    atomic_read(&ha->zio_threshold) != ha->last_zio_threshold &&
> +	    ha->zio_mode == QLA_ZIO_MODE_6 &&
> +	    IS_ZIO_THRESHOLD_CAPABLE(ha)) {

Ditto. This would just use IS_ZIO_THRESHOLD_CAPABLE() macro. 

> 		ql_log(ql_log_info, vha, 0x3002,
> 		    "Sched: Set ZIO exchange threshold to %d.\n",
> 		    ha->last_zio_threshold);
> -- 
> 2.19.0.rc0
> 

--
Himanshu Madhani	 Oracle Linux Engineering
diff mbox series

Patch

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index d006ae193677..1ee747ba4ecc 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -2726,6 +2726,9 @@  qla2x00_get_fc_host_stats(struct Scsi_Host *shost)
 	struct link_statistics *stats;
 	dma_addr_t stats_dma;
 	struct fc_host_statistics *p = &vha->fc_host_stat;
+	struct qla_qpair *qpair;
+	int i;
+	u64 ib = 0, ob = 0, ir = 0, or = 0;
 
 	memset(p, -1, sizeof(*p));
 
@@ -2762,6 +2765,27 @@  qla2x00_get_fc_host_stats(struct Scsi_Host *shost)
 	if (rval != QLA_SUCCESS)
 		goto done_free;
 
+	/* --- */
+	for (i = 0; i < vha->hw->max_qpairs; i++) {
+		qpair = vha->hw->queue_pair_map[i];
+		if (!qpair)
+			continue;
+		ir += qpair->counters.input_requests;
+		or += qpair->counters.output_requests;
+		ib += qpair->counters.input_bytes;
+		ob += qpair->counters.output_bytes;
+	}
+	ir += ha->base_qpair->counters.input_requests;
+	or += ha->base_qpair->counters.output_requests;
+	ib += ha->base_qpair->counters.input_bytes;
+	ob += ha->base_qpair->counters.output_bytes;
+
+	ir += vha->qla_stats.input_requests;
+	or += vha->qla_stats.output_requests;
+	ib += vha->qla_stats.input_bytes;
+	ob += vha->qla_stats.output_bytes;
+	/* --- */
+
 	p->link_failure_count = le32_to_cpu(stats->link_fail_cnt);
 	p->loss_of_sync_count = le32_to_cpu(stats->loss_sync_cnt);
 	p->loss_of_signal_count = le32_to_cpu(stats->loss_sig_cnt);
@@ -2781,15 +2805,16 @@  qla2x00_get_fc_host_stats(struct Scsi_Host *shost)
 			p->rx_words = le64_to_cpu(stats->fpm_recv_word_cnt);
 			p->tx_words = le64_to_cpu(stats->fpm_xmit_word_cnt);
 		} else {
-			p->rx_words = vha->qla_stats.input_bytes;
-			p->tx_words = vha->qla_stats.output_bytes;
+			p->rx_words = ib >> 2;
+			p->tx_words = ob >> 2;
 		}
 	}
+
 	p->fcp_control_requests = vha->qla_stats.control_requests;
-	p->fcp_input_requests = vha->qla_stats.input_requests;
-	p->fcp_output_requests = vha->qla_stats.output_requests;
-	p->fcp_input_megabytes = vha->qla_stats.input_bytes >> 20;
-	p->fcp_output_megabytes = vha->qla_stats.output_bytes >> 20;
+	p->fcp_input_requests = ir;
+	p->fcp_output_requests = or;
+	p->fcp_input_megabytes  = ib >> 20;
+	p->fcp_output_megabytes = ob >> 20;
 	p->seconds_since_last_reset =
 	    get_jiffies_64() - vha->qla_stats.jiffies_at_last_reset;
 	do_div(p->seconds_since_last_reset, HZ);
@@ -2809,9 +2834,18 @@  qla2x00_reset_host_stats(struct Scsi_Host *shost)
 	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
 	struct link_statistics *stats;
 	dma_addr_t stats_dma;
+	int i;
+	struct qla_qpair *qpair;
 
 	memset(&vha->qla_stats, 0, sizeof(vha->qla_stats));
 	memset(&vha->fc_host_stat, 0, sizeof(vha->fc_host_stat));
+	for (i = 0; i < vha->hw->max_qpairs; i++) {
+		qpair = vha->hw->queue_pair_map[i];
+		if (!qpair)
+			continue;
+		memset(&qpair->counters, 0, sizeof(qpair->counters));
+	}
+	memset(&ha->base_qpair->counters, 0, sizeof(qpair->counters));
 
 	vha->qla_stats.jiffies_at_last_reset = get_jiffies_64();
 
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index ad4a0ba7203c..3ca8665638c4 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2443,12 +2443,6 @@  typedef struct fc_port {
 	struct list_head list;
 	struct scsi_qla_host *vha;
 
-	uint8_t node_name[WWN_SIZE];
-	uint8_t port_name[WWN_SIZE];
-	port_id_t d_id;
-	uint16_t loop_id;
-	uint16_t old_loop_id;
-
 	unsigned int conf_compl_supported:1;
 	unsigned int deleted:2;
 	unsigned int free_pending:1;
@@ -2465,6 +2459,13 @@  typedef struct fc_port {
 	unsigned int n2n_flag:1;
 	unsigned int explicit_logout:1;
 	unsigned int prli_pend_timer:1;
+	uint8_t nvme_flag;
+
+	uint8_t node_name[WWN_SIZE];
+	uint8_t port_name[WWN_SIZE];
+	port_id_t d_id;
+	uint16_t loop_id;
+	uint16_t old_loop_id;
 
 	struct completion nvme_del_done;
 	uint32_t nvme_prli_service_param;
@@ -2473,7 +2474,7 @@  typedef struct fc_port {
 #define NVME_PRLI_SP_TARGET     BIT_4
 #define NVME_PRLI_SP_DISCOVERY  BIT_3
 #define NVME_PRLI_SP_FIRST_BURST	BIT_0
-	uint8_t nvme_flag;
+
 	uint32_t nvme_first_burst_size;
 #define NVME_FLAG_REGISTERED 4
 #define NVME_FLAG_DELETING 2
@@ -3510,6 +3511,14 @@  struct qla_tgt_counters {
 	uint64_t num_term_xchg_sent;
 };
 
+struct qla_counters {
+	uint64_t input_bytes;
+	uint64_t input_requests;
+	uint64_t output_bytes;
+	uint64_t output_requests;
+
+};
+
 struct qla_qpair;
 
 /* Response queue data structure */
@@ -3594,6 +3603,7 @@  struct qla_qpair {
 	uint32_t enable_class_2:1;
 	uint32_t enable_explicit_conf:1;
 	uint32_t use_shadow_reg:1;
+	uint32_t rcv_intr:1;
 
 	uint16_t id;			/* qp number used with FW */
 	uint16_t vp_idx;		/* vport ID */
@@ -3609,13 +3619,16 @@  struct qla_qpair {
 	struct qla_msix_entry *msix; /* point to &ha->msix_entries[x] */
 	struct qla_hw_data *hw;
 	struct work_struct q_work;
+	struct qla_counters counters;
+
 	struct list_head qp_list_elem; /* vha->qp_list */
 	struct list_head hints_list;
-	uint16_t cpuid;
+
 	uint16_t retry_term_cnt;
 	__le32	retry_term_exchg_addr;
 	uint64_t retry_term_jiff;
 	struct qla_tgt_counters tgt_counters;
+	uint16_t cpuid;
 };
 
 /* Place holder for FW buffer parameters */
@@ -4129,6 +4142,9 @@  struct qla_hw_data {
 #define USE_ASYNC_SCAN(ha) (IS_QLA25XX(ha) || IS_QLA81XX(ha) ||\
 	IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))
 
+#define IS_ZIO_THRESHOLD_CAPABLE(ha) \
+	(IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))
+
 	/* HBA serial number */
 	uint8_t		serial0;
 	uint8_t		serial1;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index b4d53eb4e53e..99f322fb74ab 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3690,9 +3690,8 @@  qla2x00_setup_chip(scsi_qla_host_t *vha)
 					goto execute_fw_with_lr;
 				}
 
-				if ((IS_QLA83XX(ha) || IS_QLA27XX(ha) ||
-				    IS_QLA28XX(ha)) &&
-				    (ha->zio_mode == QLA_ZIO_MODE_6))
+				if (IS_ZIO_THRESHOLD_CAPABLE(ha) &&
+				    ha->zio_mode == QLA_ZIO_MODE_6)
 					qla27xx_set_zio_threshold(vha,
 					    ha->last_zio_threshold);
 
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index e3d2dea0b057..d69e16e844aa 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -594,6 +594,7 @@  qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt,
 	uint32_t dsd_list_len;
 	struct dsd_dma *dsd_ptr;
 	struct ct6_dsd *ctx;
+	struct qla_qpair *qpair = sp->qpair;
 
 	cmd = GET_CMD_SP(sp);
 
@@ -612,12 +613,12 @@  qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt,
 	/* Set transfer direction */
 	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
 		cmd_pkt->control_flags = cpu_to_le16(CF_WRITE_DATA);
-		vha->qla_stats.output_bytes += scsi_bufflen(cmd);
-		vha->qla_stats.output_requests++;
+		qpair->counters.output_bytes += scsi_bufflen(cmd);
+		qpair->counters.output_requests++;
 	} else if (cmd->sc_data_direction == DMA_FROM_DEVICE) {
 		cmd_pkt->control_flags = cpu_to_le16(CF_READ_DATA);
-		vha->qla_stats.input_bytes += scsi_bufflen(cmd);
-		vha->qla_stats.input_requests++;
+		qpair->counters.input_bytes += scsi_bufflen(cmd);
+		qpair->counters.input_requests++;
 	}
 
 	cur_seg = scsi_sglist(cmd);
@@ -704,6 +705,7 @@  qla24xx_build_scsi_iocbs(srb_t *sp, struct cmd_type_7 *cmd_pkt,
 	struct scsi_cmnd *cmd;
 	struct scatterlist *sg;
 	int i;
+	struct qla_qpair *qpair = sp->qpair;
 
 	cmd = GET_CMD_SP(sp);
 
@@ -721,12 +723,12 @@  qla24xx_build_scsi_iocbs(srb_t *sp, struct cmd_type_7 *cmd_pkt,
 	/* Set transfer direction */
 	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
 		cmd_pkt->task_mgmt_flags = cpu_to_le16(TMF_WRITE_DATA);
-		vha->qla_stats.output_bytes += scsi_bufflen(cmd);
-		vha->qla_stats.output_requests++;
+		qpair->counters.output_bytes += scsi_bufflen(cmd);
+		qpair->counters.output_requests++;
 	} else if (cmd->sc_data_direction == DMA_FROM_DEVICE) {
 		cmd_pkt->task_mgmt_flags = cpu_to_le16(TMF_READ_DATA);
-		vha->qla_stats.input_bytes += scsi_bufflen(cmd);
-		vha->qla_stats.input_requests++;
+		qpair->counters.input_bytes += scsi_bufflen(cmd);
+		qpair->counters.input_requests++;
 	}
 
 	/* One DSD is available in the Command Type 3 IOCB */
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index d38dd6520b53..a63f2000fadf 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -3414,8 +3414,10 @@  void qla24xx_process_response_queue(struct scsi_qla_host *vha,
 	if (!ha->flags.fw_started)
 		return;
 
-	if (rsp->qpair->cpuid != smp_processor_id())
+	if (rsp->qpair->cpuid != smp_processor_id() || !rsp->qpair->rcv_intr) {
+		rsp->qpair->rcv_intr = 1;
 		qla_cpu_update(rsp->qpair, smp_processor_id());
+	}
 
 	while (rsp->ring_ptr->signature != RESPONSE_PROCESSED) {
 		pkt = (struct sts_entry_24xx *)rsp->ring_ptr;
@@ -3865,7 +3867,7 @@  qla2xxx_msix_rsp_q(int irq, void *dev_id)
 	}
 	ha = qpair->hw;
 
-	queue_work(ha->wq, &qpair->q_work);
+	queue_work_on(smp_processor_id(), ha->wq, &qpair->q_work);
 
 	return IRQ_HANDLED;
 }
@@ -3891,7 +3893,7 @@  qla2xxx_msix_rsp_q_hs(int irq, void *dev_id)
 	wrt_reg_dword(&reg->hccr, HCCRX_CLR_RISC_INT);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
-	queue_work(ha->wq, &qpair->q_work);
+	queue_work_on(smp_processor_id(), ha->wq, &qpair->q_work);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index 15efe2f04b86..08cfe043ac66 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -808,11 +808,9 @@  static void qla_do_work(struct work_struct *work)
 {
 	unsigned long flags;
 	struct qla_qpair *qpair = container_of(work, struct qla_qpair, q_work);
-	struct scsi_qla_host *vha;
-	struct qla_hw_data *ha = qpair->hw;
+	struct scsi_qla_host *vha = qpair->vha;
 
 	spin_lock_irqsave(&qpair->qp_lock, flags);
-	vha = pci_get_drvdata(ha->pdev);
 	qla24xx_process_response_queue(vha, qpair->rsp);
 	spin_unlock_irqrestore(&qpair->qp_lock, flags);
 
diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
index b05e4545ef5f..b0c13144c21a 100644
--- a/drivers/scsi/qla2xxx/qla_nvme.c
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
@@ -428,8 +428,8 @@  static inline int qla2x00_start_nvme_mq(srb_t *sp)
 	/* No data transfer how do we check buffer len == 0?? */
 	if (fd->io_dir == NVMEFC_FCP_READ) {
 		cmd_pkt->control_flags = cpu_to_le16(CF_READ_DATA);
-		vha->qla_stats.input_bytes += fd->payload_length;
-		vha->qla_stats.input_requests++;
+		qpair->counters.input_bytes += fd->payload_length;
+		qpair->counters.input_requests++;
 	} else if (fd->io_dir == NVMEFC_FCP_WRITE) {
 		cmd_pkt->control_flags = cpu_to_le16(CF_WRITE_DATA);
 		if ((vha->flags.nvme_first_burst) &&
@@ -441,8 +441,8 @@  static inline int qla2x00_start_nvme_mq(srb_t *sp)
 				cmd_pkt->control_flags |=
 					cpu_to_le16(CF_NVME_FIRST_BURST_ENABLE);
 		}
-		vha->qla_stats.output_bytes += fd->payload_length;
-		vha->qla_stats.output_requests++;
+		qpair->counters.output_bytes += fd->payload_length;
+		qpair->counters.output_requests++;
 	} else if (fd->io_dir == 0) {
 		cmd_pkt->control_flags = 0;
 	}
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 31bfc0c088b7..c53cc31cd068 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -7198,8 +7198,10 @@  qla2x00_timer(struct timer_list *t)
 	 * FC-NVME
 	 * see if the active AEN count has changed from what was last reported.
 	 */
+	index = atomic_read(&ha->nvme_active_aen_cnt);
 	if (!vha->vp_idx &&
-	    (atomic_read(&ha->nvme_active_aen_cnt) != ha->nvme_last_rptd_aen) &&
+	    (index != ha->nvme_last_rptd_aen) &&
+	    (index >= DEFAULT_ZIO_THRESHOLD) &&
 	    ha->zio_mode == QLA_ZIO_MODE_6 &&
 	    !ha->flags.host_shutting_down) {
 		ql_log(ql_log_info, vha, 0x3002,
@@ -7211,9 +7213,9 @@  qla2x00_timer(struct timer_list *t)
 	}
 
 	if (!vha->vp_idx &&
-	    (atomic_read(&ha->zio_threshold) != ha->last_zio_threshold) &&
-	    (ha->zio_mode == QLA_ZIO_MODE_6) &&
-	    (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))) {
+	    atomic_read(&ha->zio_threshold) != ha->last_zio_threshold &&
+	    ha->zio_mode == QLA_ZIO_MODE_6 &&
+	    IS_ZIO_THRESHOLD_CAPABLE(ha)) {
 		ql_log(ql_log_info, vha, 0x3002,
 		    "Sched: Set ZIO exchange threshold to %d.\n",
 		    ha->last_zio_threshold);