diff mbox series

[v2,4/7] qla2xxx: Fix hang in task management

Message ID 20230428075339.32551-5-njavali@marvell.com (mailing list archive)
State Accepted
Headers show
Series qla2xxx driver update | expand

Commit Message

Nilesh Javali April 28, 2023, 7:53 a.m. UTC
From: Quinn Tran <qutran@marvell.com>

Task management command hangs where a side
band chip reset failed to nudge the TMF
from it's current send path.

Add additional error check to block TMF
from entering during chip reset and along
the TMF path to cause it to bail out, skip
over abort of marker.

Cc: stable@vger.kernel.org
Signed-off-by: Quinn Tran <qutran@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
---
 drivers/scsi/qla2xxx/qla_def.h  |  4 +++
 drivers/scsi/qla2xxx/qla_init.c | 60 +++++++++++++++++++++++++++++++--
 2 files changed, 61 insertions(+), 3 deletions(-)

Comments

Himanshu Madhani May 1, 2023, 3:08 p.m. UTC | #1
> On Apr 28, 2023, at 12:53 AM, Nilesh Javali <njavali@marvell.com> wrote:
> 
> From: Quinn Tran <qutran@marvell.com>
> 
> Task management command hangs where a side
> band chip reset failed to nudge the TMF
> from it's current send path.
> 
> Add additional error check to block TMF
> from entering during chip reset and along
> the TMF path to cause it to bail out, skip
> over abort of marker.
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Quinn Tran <qutran@marvell.com>
> Signed-off-by: Nilesh Javali <njavali@marvell.com>
> ---
> drivers/scsi/qla2xxx/qla_def.h  |  4 +++
> drivers/scsi/qla2xxx/qla_init.c | 60 +++++++++++++++++++++++++++++++--
> 2 files changed, 61 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
> index 0971150953a9..3e0be0136cad 100644
> --- a/drivers/scsi/qla2xxx/qla_def.h
> +++ b/drivers/scsi/qla2xxx/qla_def.h
> @@ -5516,4 +5516,8 @@ struct ql_vnd_tgt_stats_resp {
> _fp->disc_state, _fp->scan_state, _fp->loop_id, _fp->deleted, \
> _fp->flags
> 
> +#define TMF_NOT_READY(_fcport) \
> + (!_fcport || IS_SESSION_DELETED(_fcport) || atomic_read(&_fcport->state) != FCS_ONLINE || \
> + !_fcport->vha->hw->flags.fw_started)
> +
> #endif
> diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
> index 17649cf9c39d..b61597d6882c 100644
> --- a/drivers/scsi/qla2xxx/qla_init.c
> +++ b/drivers/scsi/qla2xxx/qla_init.c
> @@ -1996,6 +1996,11 @@ qla2x00_tmf_iocb_timeout(void *data)
> int rc, h;
> unsigned long flags;
> 
> + if (sp->type == SRB_MARKER) {
> + complete(&tmf->u.tmf.comp);
> + return;
> + }
> +
> rc = qla24xx_async_abort_cmd(sp, false);
> if (rc) {
> spin_lock_irqsave(sp->qpair->qp_lock_ptr, flags);
> @@ -2023,6 +2028,7 @@ static void qla_marker_sp_done(srb_t *sp, int res)
>    sp->handle, sp->fcport->d_id.b24, sp->u.iocb_cmd.u.tmf.flags,
>    sp->u.iocb_cmd.u.tmf.lun, sp->qpair->id);
> 
> + sp->u.iocb_cmd.u.tmf.data = res;
> complete(&tmf->u.tmf.comp);
> }
> 
> @@ -2039,6 +2045,11 @@ static void qla_marker_sp_done(srb_t *sp, int res)
> } while (cnt); \
> }
> 
> +/**
> + * qla26xx_marker: send marker IOCB and wait for the completion of it.
> + * @arg: pointer to argument list.
> + *    It is assume caller will provide an fcport pointer and modifier
> + */
> static int
> qla26xx_marker(struct tmf_arg *arg)
> {
> @@ -2048,6 +2059,14 @@ qla26xx_marker(struct tmf_arg *arg)
> int rval = QLA_FUNCTION_FAILED;
> fc_port_t *fcport = arg->fcport;
> 
> + if (TMF_NOT_READY(arg->fcport)) {
> + ql_dbg(ql_dbg_taskm, vha, 0x8039,
> +    "FC port not ready for marker loop-id=%x portid=%06x modifier=%x lun=%lld qp=%d.\n",
> +    fcport->loop_id, fcport->d_id.b24,
> +    arg->modifier, arg->lun, arg->qpair->id);
> + return QLA_SUSPENDED;
> + }
> +
> /* ref: INIT */
> sp = qla2xxx_get_qpair_sp(vha, arg->qpair, fcport, GFP_KERNEL);
> if (!sp)
> @@ -2074,11 +2093,19 @@ qla26xx_marker(struct tmf_arg *arg)
> 
> if (rval != QLA_SUCCESS) {
> ql_log(ql_log_warn, vha, 0x8031,
> -    "Marker IOCB failed (%x).\n", rval);
> +    "Marker IOCB send failure (%x).\n", rval);
> goto done_free_sp;
> }
> 
> wait_for_completion(&tm_iocb->u.tmf.comp);
> + rval = tm_iocb->u.tmf.data;
> +
> + if (rval != QLA_SUCCESS) {
> + ql_log(ql_log_warn, vha, 0x8019,
> +    "Marker failed hdl=%x loop-id=%x portid=%06x modifier=%x lun=%lld qp=%d rval %d.\n",
> +    sp->handle, fcport->loop_id, fcport->d_id.b24,
> +    arg->modifier, arg->lun, sp->qpair->id, rval);
> + }
> 
> done_free_sp:
> /* ref: INIT */
> @@ -2091,6 +2118,8 @@ static void qla2x00_tmf_sp_done(srb_t *sp, int res)
> {
> struct srb_iocb *tmf = &sp->u.iocb_cmd;
> 
> + if (res)
> + tmf->u.tmf.data = res;
> complete(&tmf->u.tmf.comp);
> }
> 
> @@ -2104,6 +2133,14 @@ __qla2x00_async_tm_cmd(struct tmf_arg *arg)
> 
> fc_port_t *fcport = arg->fcport;
> 
> + if (TMF_NOT_READY(arg->fcport)) {
> + ql_dbg(ql_dbg_taskm, vha, 0x8032,
> +    "FC port not ready for TM command loop-id=%x portid=%06x modifier=%x lun=%lld qp=%d.\n",
> +    fcport->loop_id, fcport->d_id.b24,
> +    arg->modifier, arg->lun, arg->qpair->id);
> + return QLA_SUSPENDED;
> + }
> +
> /* ref: INIT */
> sp = qla2xxx_get_qpair_sp(vha, arg->qpair, fcport, GFP_KERNEL);
> if (!sp)
> @@ -2178,7 +2215,9 @@ int qla_get_tmf(fc_port_t *fcport)
> msleep(1);
> 
> spin_lock_irqsave(&ha->tgt.sess_lock, flags);
> - if (fcport->deleted) {
> + if (TMF_NOT_READY(fcport)) {
> + ql_log(ql_log_warn, vha, 0x802c,
> +    "Unable to acquire TM resource due to disruption.\n");
> rc = EIO;
> break;
> }
> @@ -2204,7 +2243,10 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint64_t lun,
> struct scsi_qla_host *vha = fcport->vha;
> struct qla_qpair *qpair;
> struct tmf_arg a;
> - int i, rval;
> + int i, rval = QLA_SUCCESS;
> +
> + if (TMF_NOT_READY(fcport))
> + return QLA_SUSPENDED;
> 
> a.vha = fcport->vha;
> a.fcport = fcport;
> @@ -2223,6 +2265,14 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint64_t lun,
> qpair = vha->hw->queue_pair_map[i];
> if (!qpair)
> continue;
> +
> + if (TMF_NOT_READY(fcport)) {
> + ql_log(ql_log_warn, vha, 0x8026,
> +    "Unable to send TM due to disruption.\n");
> + rval = QLA_SUSPENDED;
> + break;
> + }
> +
> a.qpair = qpair;
> a.flags = flags|TCF_NOTMCMD_TO_TARGET;
> rval = __qla2x00_async_tm_cmd(&a);
> @@ -2231,10 +2281,14 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint64_t lun,
> }
> }
> 
> + if (rval)
> + goto bailout;
> +
> a.qpair = vha->hw->base_qpair;
> a.flags = flags;
> rval = __qla2x00_async_tm_cmd(&a);
> 
> +bailout:
> if (a.modifier == MK_SYNC_ID_LUN)
> qla_put_tmf(fcport);
> 
> -- 
> 2.23.1
> 

Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
diff mbox series

Patch

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 0971150953a9..3e0be0136cad 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -5516,4 +5516,8 @@  struct ql_vnd_tgt_stats_resp {
 	_fp->disc_state, _fp->scan_state, _fp->loop_id, _fp->deleted, \
 	_fp->flags
 
+#define TMF_NOT_READY(_fcport) \
+	(!_fcport || IS_SESSION_DELETED(_fcport) || atomic_read(&_fcport->state) != FCS_ONLINE || \
+	!_fcport->vha->hw->flags.fw_started)
+
 #endif
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 17649cf9c39d..b61597d6882c 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1996,6 +1996,11 @@  qla2x00_tmf_iocb_timeout(void *data)
 	int rc, h;
 	unsigned long flags;
 
+	if (sp->type == SRB_MARKER) {
+		complete(&tmf->u.tmf.comp);
+		return;
+	}
+
 	rc = qla24xx_async_abort_cmd(sp, false);
 	if (rc) {
 		spin_lock_irqsave(sp->qpair->qp_lock_ptr, flags);
@@ -2023,6 +2028,7 @@  static void qla_marker_sp_done(srb_t *sp, int res)
 		    sp->handle, sp->fcport->d_id.b24, sp->u.iocb_cmd.u.tmf.flags,
 		    sp->u.iocb_cmd.u.tmf.lun, sp->qpair->id);
 
+	sp->u.iocb_cmd.u.tmf.data = res;
 	complete(&tmf->u.tmf.comp);
 }
 
@@ -2039,6 +2045,11 @@  static void qla_marker_sp_done(srb_t *sp, int res)
 	} while (cnt); \
 }
 
+/**
+ * qla26xx_marker: send marker IOCB and wait for the completion of it.
+ * @arg: pointer to argument list.
+ *    It is assume caller will provide an fcport pointer and modifier
+ */
 static int
 qla26xx_marker(struct tmf_arg *arg)
 {
@@ -2048,6 +2059,14 @@  qla26xx_marker(struct tmf_arg *arg)
 	int rval = QLA_FUNCTION_FAILED;
 	fc_port_t *fcport = arg->fcport;
 
+	if (TMF_NOT_READY(arg->fcport)) {
+		ql_dbg(ql_dbg_taskm, vha, 0x8039,
+		    "FC port not ready for marker loop-id=%x portid=%06x modifier=%x lun=%lld qp=%d.\n",
+		    fcport->loop_id, fcport->d_id.b24,
+		    arg->modifier, arg->lun, arg->qpair->id);
+		return QLA_SUSPENDED;
+	}
+
 	/* ref: INIT */
 	sp = qla2xxx_get_qpair_sp(vha, arg->qpair, fcport, GFP_KERNEL);
 	if (!sp)
@@ -2074,11 +2093,19 @@  qla26xx_marker(struct tmf_arg *arg)
 
 	if (rval != QLA_SUCCESS) {
 		ql_log(ql_log_warn, vha, 0x8031,
-		    "Marker IOCB failed (%x).\n", rval);
+		    "Marker IOCB send failure (%x).\n", rval);
 		goto done_free_sp;
 	}
 
 	wait_for_completion(&tm_iocb->u.tmf.comp);
+	rval = tm_iocb->u.tmf.data;
+
+	if (rval != QLA_SUCCESS) {
+		ql_log(ql_log_warn, vha, 0x8019,
+		    "Marker failed hdl=%x loop-id=%x portid=%06x modifier=%x lun=%lld qp=%d rval %d.\n",
+		    sp->handle, fcport->loop_id, fcport->d_id.b24,
+		    arg->modifier, arg->lun, sp->qpair->id, rval);
+	}
 
 done_free_sp:
 	/* ref: INIT */
@@ -2091,6 +2118,8 @@  static void qla2x00_tmf_sp_done(srb_t *sp, int res)
 {
 	struct srb_iocb *tmf = &sp->u.iocb_cmd;
 
+	if (res)
+		tmf->u.tmf.data = res;
 	complete(&tmf->u.tmf.comp);
 }
 
@@ -2104,6 +2133,14 @@  __qla2x00_async_tm_cmd(struct tmf_arg *arg)
 
 	fc_port_t *fcport = arg->fcport;
 
+	if (TMF_NOT_READY(arg->fcport)) {
+		ql_dbg(ql_dbg_taskm, vha, 0x8032,
+		    "FC port not ready for TM command loop-id=%x portid=%06x modifier=%x lun=%lld qp=%d.\n",
+		    fcport->loop_id, fcport->d_id.b24,
+		    arg->modifier, arg->lun, arg->qpair->id);
+		return QLA_SUSPENDED;
+	}
+
 	/* ref: INIT */
 	sp = qla2xxx_get_qpair_sp(vha, arg->qpair, fcport, GFP_KERNEL);
 	if (!sp)
@@ -2178,7 +2215,9 @@  int qla_get_tmf(fc_port_t *fcport)
 		msleep(1);
 
 		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-		if (fcport->deleted) {
+		if (TMF_NOT_READY(fcport)) {
+			ql_log(ql_log_warn, vha, 0x802c,
+			    "Unable to acquire TM resource due to disruption.\n");
 			rc = EIO;
 			break;
 		}
@@ -2204,7 +2243,10 @@  qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint64_t lun,
 	struct scsi_qla_host *vha = fcport->vha;
 	struct qla_qpair *qpair;
 	struct tmf_arg a;
-	int i, rval;
+	int i, rval = QLA_SUCCESS;
+
+	if (TMF_NOT_READY(fcport))
+		return QLA_SUSPENDED;
 
 	a.vha = fcport->vha;
 	a.fcport = fcport;
@@ -2223,6 +2265,14 @@  qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint64_t lun,
 			qpair = vha->hw->queue_pair_map[i];
 			if (!qpair)
 				continue;
+
+			if (TMF_NOT_READY(fcport)) {
+				ql_log(ql_log_warn, vha, 0x8026,
+				    "Unable to send TM due to disruption.\n");
+				rval = QLA_SUSPENDED;
+				break;
+			}
+
 			a.qpair = qpair;
 			a.flags = flags|TCF_NOTMCMD_TO_TARGET;
 			rval = __qla2x00_async_tm_cmd(&a);
@@ -2231,10 +2281,14 @@  qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint64_t lun,
 		}
 	}
 
+	if (rval)
+		goto bailout;
+
 	a.qpair = vha->hw->base_qpair;
 	a.flags = flags;
 	rval = __qla2x00_async_tm_cmd(&a);
 
+bailout:
 	if (a.modifier == MK_SYNC_ID_LUN)
 		qla_put_tmf(fcport);