Message ID | 20210908072846.10011-9-njavali@marvell.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | qla2xxx driver bug fixes | expand |
> On Sep 8, 2021, at 2:28 AM, Nilesh Javali <njavali@marvell.com> wrote: > > From: Manish Rangankar <mrangankar@marvell.com> > > DPC thread gets restricted due to a no-op mailbox, which is a blocking call > and has a high execution frequency. To free up the DPC thread we move no-op > handling to the workqueue. Also, modified qla_do_hb to send no-op MBC if > we don’t have any active interrupts, but there are still IOs outstanding > with firmware. > > Fixes: d94d8158e184 ("scsi: qla2xxx: Add heartbeat check") Cc: stable@vger.kernel.org > Signed-off-by: Manish Rangankar <mrangankar@marvell.com> > Signed-off-by: Nilesh Javali <njavali@marvell.com> > --- > drivers/scsi/qla2xxx/qla_def.h | 4 +- > drivers/scsi/qla2xxx/qla_init.c | 2 + > drivers/scsi/qla2xxx/qla_os.c | 74 +++++++++++++++------------------ > 3 files changed, 38 insertions(+), 42 deletions(-) > > diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h > index be2eb75ee1a3..d6e131bf1824 100644 > --- a/drivers/scsi/qla2xxx/qla_def.h > +++ b/drivers/scsi/qla2xxx/qla_def.h > @@ -3750,6 +3750,7 @@ struct qla_qpair { > struct qla_fw_resources fwres ____cacheline_aligned; > u32 cmd_cnt; > u32 cmd_completion_cnt; > + u32 prev_completion_cnt; > }; > > /* Place holder for FW buffer parameters */ > @@ -4607,6 +4608,7 @@ struct qla_hw_data { > struct qla_chip_state_84xx *cs84xx; > struct isp_operations *isp_ops; > struct workqueue_struct *wq; > + struct work_struct hb_work; Use heartbeat_work (which is not awfully long and reads better. > struct qlfc_fw fw_buf; > > /* FCP_CMND priority support */ > @@ -4708,7 +4710,6 @@ struct qla_hw_data { > > struct qla_hw_data_stat stat; > pci_error_state_t pci_error_state; > - u64 prev_cmd_cnt; > struct dma_pool *purex_dma_pool; > struct btree_head32 host_map; > > @@ -4854,7 +4855,6 @@ typedef struct scsi_qla_host { > #define SET_ZIO_THRESHOLD_NEEDED 32 > #define ISP_ABORT_TO_ROM 33 > #define VPORT_DELETE 34 > -#define HEARTBEAT_CHK 38 > > #define PROCESS_PUREX_IOCB 63 > > diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c > index c6b3d0e7489e..a9a4243cb15a 100644 > --- a/drivers/scsi/qla2xxx/qla_init.c > +++ b/drivers/scsi/qla2xxx/qla_init.c > @@ -7025,12 +7025,14 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) > ha->chip_reset++; > ha->base_qpair->chip_reset = ha->chip_reset; > ha->base_qpair->cmd_cnt = ha->base_qpair->cmd_completion_cnt = 0; > + ha->base_qpair->prev_completion_cnt = 0; > for (i = 0; i < ha->max_qpairs; i++) { > if (ha->queue_pair_map[i]) { > ha->queue_pair_map[i]->chip_reset = > ha->base_qpair->chip_reset; > ha->queue_pair_map[i]->cmd_cnt = > ha->queue_pair_map[i]->cmd_completion_cnt = 0; > + ha->base_qpair->prev_completion_cnt = 0; > } > } > > diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c > index a1e861ecfc01..0454f79a8047 100644 > --- a/drivers/scsi/qla2xxx/qla_os.c > +++ b/drivers/scsi/qla2xxx/qla_os.c > @@ -2794,6 +2794,16 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time) > return atomic_read(&vha->loop_state) == LOOP_READY; > } > > +static void qla_hb_work_fn(struct work_struct *work) ^^^^^^^^^^^^^^ Use qla_heartbeat_work_fn for better readability > +{ > + struct qla_hw_data *ha = container_of(work, > + struct qla_hw_data, hb_work); > + struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); > + > + if (!ha->flags.mbox_busy && base_vha->flags.init_done) > + qla_no_op_mb(base_vha); > +} > + > static void qla2x00_iocb_work_fn(struct work_struct *work) > { > struct scsi_qla_host *vha = container_of(work, > @@ -3232,6 +3242,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) > host->transportt, sht->vendor_id); > > INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn); > + INIT_WORK(&ha->hb_work, qla_hb_work_fn); > > /* Set up the irqs */ > ret = qla2x00_request_irqs(ha, rsp); > @@ -7118,17 +7129,6 @@ qla2x00_do_dpc(void *data) > qla2x00_lip_reset(base_vha); > } > > - if (test_bit(HEARTBEAT_CHK, &base_vha->dpc_flags)) { > - /* > - * if there is a mb in progress then that's > - * enough of a check to see if fw is still ticking. > - */ > - if (!ha->flags.mbox_busy && base_vha->flags.init_done) > - qla_no_op_mb(base_vha); > - > - clear_bit(HEARTBEAT_CHK, &base_vha->dpc_flags); > - } > - > ha->dpc_active = 0; > end_loop: > set_current_state(TASK_INTERRUPTIBLE); > @@ -7187,57 +7187,51 @@ qla2x00_rst_aen(scsi_qla_host_t *vha) > > static bool qla_do_heartbeat(struct scsi_qla_host *vha) > { > - u64 cmd_cnt, prev_cmd_cnt; > - bool do_hb = false; > struct qla_hw_data *ha = vha->hw; > - int i; > + u32 cmpl_cnt; > + u16 i; > + bool do_hb = false; > use do_heartbeat which is self-explanatory and reads better > - /* if cmds are still pending down in fw, then do hb */ > - if (ha->base_qpair->cmd_cnt != ha->base_qpair->cmd_completion_cnt) { > + /* > + * Allow do_hb only if we don’t have any active interrupts, > + * but there are still IOs outstanding with firmware. > + */ > + cmpl_cnt = ha->base_qpair->cmd_completion_cnt; > + if (cmpl_cnt == ha->base_qpair->prev_completion_cnt && > + cmpl_cnt != ha->base_qpair->cmd_cnt) { > do_hb = true; > goto skip; > } > + ha->base_qpair->prev_completion_cnt = cmpl_cnt; > > for (i = 0; i < ha->max_qpairs; i++) { > - if (ha->queue_pair_map[i] && > - ha->queue_pair_map[i]->cmd_cnt != > - ha->queue_pair_map[i]->cmd_completion_cnt) { > - do_hb = true; > - break; > + if (ha->queue_pair_map[i]) { > + cmpl_cnt = ha->queue_pair_map[i]->cmd_completion_cnt; > + if (cmpl_cnt == ha->queue_pair_map[i]->prev_completion_cnt && > + cmpl_cnt != ha->queue_pair_map[i]->cmd_cnt) { > + do_hb = true; > + break; > + } > + ha->queue_pair_map[i]->prev_completion_cnt = cmpl_cnt; > } > } > > skip: > - prev_cmd_cnt = ha->prev_cmd_cnt; > - cmd_cnt = ha->base_qpair->cmd_cnt; > - for (i = 0; i < ha->max_qpairs; i++) { > - if (ha->queue_pair_map[i]) > - cmd_cnt += ha->queue_pair_map[i]->cmd_cnt; > - } > - ha->prev_cmd_cnt = cmd_cnt; > - > - if (!do_hb && ((cmd_cnt - prev_cmd_cnt) > 50)) > - /* > - * IOs are completing before periodic hb check. > - * IOs seems to be running, do hb for sanity check. > - */ > - do_hb = true; > - > return do_hb; > } > > static void qla_heart_beat(struct scsi_qla_host *vha) > { > + struct qla_hw_data *ha = vha->hw; > + > if (vha->vp_idx) > return; > > if (vha->hw->flags.eeh_busy || qla2x00_chip_is_down(vha)) > return; > > - if (qla_do_heartbeat(vha)) { > - set_bit(HEARTBEAT_CHK, &vha->dpc_flags); > - qla2xxx_wake_dpc(vha); > - } > + if (qla_do_heartbeat(vha)) > + queue_work(ha->wq, &ha->hb_work); > } > > /************************************************************************** > -- > 2.19.0.rc0 > -- Himanshu Madhani Oracle Linux Engineering
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index be2eb75ee1a3..d6e131bf1824 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3750,6 +3750,7 @@ struct qla_qpair { struct qla_fw_resources fwres ____cacheline_aligned; u32 cmd_cnt; u32 cmd_completion_cnt; + u32 prev_completion_cnt; }; /* Place holder for FW buffer parameters */ @@ -4607,6 +4608,7 @@ struct qla_hw_data { struct qla_chip_state_84xx *cs84xx; struct isp_operations *isp_ops; struct workqueue_struct *wq; + struct work_struct hb_work; struct qlfc_fw fw_buf; /* FCP_CMND priority support */ @@ -4708,7 +4710,6 @@ struct qla_hw_data { struct qla_hw_data_stat stat; pci_error_state_t pci_error_state; - u64 prev_cmd_cnt; struct dma_pool *purex_dma_pool; struct btree_head32 host_map; @@ -4854,7 +4855,6 @@ typedef struct scsi_qla_host { #define SET_ZIO_THRESHOLD_NEEDED 32 #define ISP_ABORT_TO_ROM 33 #define VPORT_DELETE 34 -#define HEARTBEAT_CHK 38 #define PROCESS_PUREX_IOCB 63 diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index c6b3d0e7489e..a9a4243cb15a 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -7025,12 +7025,14 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) ha->chip_reset++; ha->base_qpair->chip_reset = ha->chip_reset; ha->base_qpair->cmd_cnt = ha->base_qpair->cmd_completion_cnt = 0; + ha->base_qpair->prev_completion_cnt = 0; for (i = 0; i < ha->max_qpairs; i++) { if (ha->queue_pair_map[i]) { ha->queue_pair_map[i]->chip_reset = ha->base_qpair->chip_reset; ha->queue_pair_map[i]->cmd_cnt = ha->queue_pair_map[i]->cmd_completion_cnt = 0; + ha->base_qpair->prev_completion_cnt = 0; } } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index a1e861ecfc01..0454f79a8047 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2794,6 +2794,16 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time) return atomic_read(&vha->loop_state) == LOOP_READY; } +static void qla_hb_work_fn(struct work_struct *work) +{ + struct qla_hw_data *ha = container_of(work, + struct qla_hw_data, hb_work); + struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); + + if (!ha->flags.mbox_busy && base_vha->flags.init_done) + qla_no_op_mb(base_vha); +} + static void qla2x00_iocb_work_fn(struct work_struct *work) { struct scsi_qla_host *vha = container_of(work, @@ -3232,6 +3242,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) host->transportt, sht->vendor_id); INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn); + INIT_WORK(&ha->hb_work, qla_hb_work_fn); /* Set up the irqs */ ret = qla2x00_request_irqs(ha, rsp); @@ -7118,17 +7129,6 @@ qla2x00_do_dpc(void *data) qla2x00_lip_reset(base_vha); } - if (test_bit(HEARTBEAT_CHK, &base_vha->dpc_flags)) { - /* - * if there is a mb in progress then that's - * enough of a check to see if fw is still ticking. - */ - if (!ha->flags.mbox_busy && base_vha->flags.init_done) - qla_no_op_mb(base_vha); - - clear_bit(HEARTBEAT_CHK, &base_vha->dpc_flags); - } - ha->dpc_active = 0; end_loop: set_current_state(TASK_INTERRUPTIBLE); @@ -7187,57 +7187,51 @@ qla2x00_rst_aen(scsi_qla_host_t *vha) static bool qla_do_heartbeat(struct scsi_qla_host *vha) { - u64 cmd_cnt, prev_cmd_cnt; - bool do_hb = false; struct qla_hw_data *ha = vha->hw; - int i; + u32 cmpl_cnt; + u16 i; + bool do_hb = false; - /* if cmds are still pending down in fw, then do hb */ - if (ha->base_qpair->cmd_cnt != ha->base_qpair->cmd_completion_cnt) { + /* + * Allow do_hb only if we don’t have any active interrupts, + * but there are still IOs outstanding with firmware. + */ + cmpl_cnt = ha->base_qpair->cmd_completion_cnt; + if (cmpl_cnt == ha->base_qpair->prev_completion_cnt && + cmpl_cnt != ha->base_qpair->cmd_cnt) { do_hb = true; goto skip; } + ha->base_qpair->prev_completion_cnt = cmpl_cnt; for (i = 0; i < ha->max_qpairs; i++) { - if (ha->queue_pair_map[i] && - ha->queue_pair_map[i]->cmd_cnt != - ha->queue_pair_map[i]->cmd_completion_cnt) { - do_hb = true; - break; + if (ha->queue_pair_map[i]) { + cmpl_cnt = ha->queue_pair_map[i]->cmd_completion_cnt; + if (cmpl_cnt == ha->queue_pair_map[i]->prev_completion_cnt && + cmpl_cnt != ha->queue_pair_map[i]->cmd_cnt) { + do_hb = true; + break; + } + ha->queue_pair_map[i]->prev_completion_cnt = cmpl_cnt; } } skip: - prev_cmd_cnt = ha->prev_cmd_cnt; - cmd_cnt = ha->base_qpair->cmd_cnt; - for (i = 0; i < ha->max_qpairs; i++) { - if (ha->queue_pair_map[i]) - cmd_cnt += ha->queue_pair_map[i]->cmd_cnt; - } - ha->prev_cmd_cnt = cmd_cnt; - - if (!do_hb && ((cmd_cnt - prev_cmd_cnt) > 50)) - /* - * IOs are completing before periodic hb check. - * IOs seems to be running, do hb for sanity check. - */ - do_hb = true; - return do_hb; } static void qla_heart_beat(struct scsi_qla_host *vha) { + struct qla_hw_data *ha = vha->hw; + if (vha->vp_idx) return; if (vha->hw->flags.eeh_busy || qla2x00_chip_is_down(vha)) return; - if (qla_do_heartbeat(vha)) { - set_bit(HEARTBEAT_CHK, &vha->dpc_flags); - qla2xxx_wake_dpc(vha); - } + if (qla_do_heartbeat(vha)) + queue_work(ha->wq, &ha->hb_work); } /**************************************************************************