Message ID | 20191105150657.8092-3-hmadhani@marvell.com (mailing list archive) |
---|---|
State | Mainlined |
Commit | 71c80b75ce8f08c0978ce9a9816b81b5c3ce5e12 |
Headers | show |
Series | qla2xxx: Bug Fixes for the driver | expand |
On Tue, 2019-11-05 at 07:06 -0800, Himanshu Madhani wrote: > From: Quinn Tran <qutran@marvell.com> > > On switch, fabric and mgt command timeout, driver > send Abort to tell FW to return the original command. > If abort is timeout, then return both Abort and > original command for cleanup. > > Fixes: 219d27d7147e0 ("scsi: qla2xxx: Fix race conditions in the code for aborting SCSI commands") > Cc: stable@vger.kernel.org # 5.2 > Signed-off-by: Quinn Tran <qutran@marvell.com> > Signed-off-by: Himanshu Madhani <hmadhani@marvell.com> > --- > drivers/scsi/qla2xxx/qla_def.h | 1 + > drivers/scsi/qla2xxx/qla_init.c | 18 ++++++++++++++++++ > 2 files changed, 19 insertions(+) > > diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h > index 721ee7f09b39..ef9bb3c7ad6f 100644 > --- a/drivers/scsi/qla2xxx/qla_def.h > +++ b/drivers/scsi/qla2xxx/qla_def.h > @@ -604,6 +604,7 @@ typedef struct srb { > const char *name; > int iocbs; > struct qla_qpair *qpair; > + struct srb *cmd_sp; > struct list_head elem; > u32 gen1; /* scratch */ > u32 gen2; /* scratch */ > diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c > index 5db8ad832893..7fdbe041cc19 100644 > --- a/drivers/scsi/qla2xxx/qla_init.c > +++ b/drivers/scsi/qla2xxx/qla_init.c > @@ -101,8 +101,22 @@ static void qla24xx_abort_iocb_timeout(void *data) > u32 handle; > unsigned long flags; > > + if (sp->cmd_sp) > + ql_dbg(ql_dbg_async, sp->vha, 0x507c, > + "Abort timeout - cmd hdl=%x, cmd type=%x hdl=%x, type=%x\n", > + sp->cmd_sp->handle, sp->cmd_sp->type, > + sp->handle, sp->type); > + else > + ql_dbg(ql_dbg_async, sp->vha, 0x507c, > + "Abort timeout 2 - hdl=%x, type=%x\n", > + sp->handle, sp->type); > + > spin_lock_irqsave(qpair->qp_lock_ptr, flags); > for (handle = 1; handle < qpair->req->num_outstanding_cmds; handle++) { > + if (sp->cmd_sp && (qpair->req->outstanding_cmds[handle] == > + sp->cmd_sp)) > + qpair->req->outstanding_cmds[handle] = NULL; > + > /* removing the abort */ > if (qpair->req->outstanding_cmds[handle] == sp) { > qpair->req->outstanding_cmds[handle] = NULL; > @@ -111,6 +125,9 @@ static void qla24xx_abort_iocb_timeout(void *data) > } > spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); > > + if (sp->cmd_sp) > + sp->cmd_sp->done(sp->cmd_sp, QLA_OS_TIMER_EXPIRED); > + > abt->u.abt.comp_status = CS_TIMEOUT; > sp->done(sp, QLA_OS_TIMER_EXPIRED); > } > @@ -142,6 +159,7 @@ static int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) > sp->type = SRB_ABT_CMD; > sp->name = "abort"; > sp->qpair = cmd_sp->qpair; > + sp->cmd_sp = cmd_sp; > if (wait) > sp->flags = SRB_WAKEUP_ON_COMP; > Reviewed-by: Ewan D. Milne <emilne@redhat.com>
On 11/5/19 7:06 AM, Himanshu Madhani wrote: > From: Quinn Tran <qutran@marvell.com> > > On switch, fabric and mgt command timeout, driver > send Abort to tell FW to return the original command. > If abort is timeout, then return both Abort and > original command for cleanup. > > Fixes: 219d27d7147e0 ("scsi: qla2xxx: Fix race conditions in the code for aborting SCSI commands") > Cc: stable@vger.kernel.org # 5.2 > Signed-off-by: Quinn Tran <qutran@marvell.com> > Signed-off-by: Himanshu Madhani <hmadhani@marvell.com> > --- > drivers/scsi/qla2xxx/qla_def.h | 1 + > drivers/scsi/qla2xxx/qla_init.c | 18 ++++++++++++++++++ > 2 files changed, 19 insertions(+) > > diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h > index 721ee7f09b39..ef9bb3c7ad6f 100644 > --- a/drivers/scsi/qla2xxx/qla_def.h > +++ b/drivers/scsi/qla2xxx/qla_def.h > @@ -604,6 +604,7 @@ typedef struct srb { > const char *name; > int iocbs; > struct qla_qpair *qpair; > + struct srb *cmd_sp; > struct list_head elem; > u32 gen1; /* scratch */ > u32 gen2; /* scratch */ > diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c > index 5db8ad832893..7fdbe041cc19 100644 > --- a/drivers/scsi/qla2xxx/qla_init.c > +++ b/drivers/scsi/qla2xxx/qla_init.c > @@ -101,8 +101,22 @@ static void qla24xx_abort_iocb_timeout(void *data) > u32 handle; > unsigned long flags; > > + if (sp->cmd_sp) > + ql_dbg(ql_dbg_async, sp->vha, 0x507c, > + "Abort timeout - cmd hdl=%x, cmd type=%x hdl=%x, type=%x\n", > + sp->cmd_sp->handle, sp->cmd_sp->type, > + sp->handle, sp->type); > + else > + ql_dbg(ql_dbg_async, sp->vha, 0x507c, > + "Abort timeout 2 - hdl=%x, type=%x\n", > + sp->handle, sp->type); > + > spin_lock_irqsave(qpair->qp_lock_ptr, flags); > for (handle = 1; handle < qpair->req->num_outstanding_cmds; handle++) { > + if (sp->cmd_sp && (qpair->req->outstanding_cmds[handle] == > + sp->cmd_sp)) > + qpair->req->outstanding_cmds[handle] = NULL; > + > /* removing the abort */ > if (qpair->req->outstanding_cmds[handle] == sp) { > qpair->req->outstanding_cmds[handle] = NULL; > @@ -111,6 +125,9 @@ static void qla24xx_abort_iocb_timeout(void *data) > } > spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); > > + if (sp->cmd_sp) > + sp->cmd_sp->done(sp->cmd_sp, QLA_OS_TIMER_EXPIRED); > + > abt->u.abt.comp_status = CS_TIMEOUT; > sp->done(sp, QLA_OS_TIMER_EXPIRED); > } > @@ -142,6 +159,7 @@ static int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) > sp->type = SRB_ABT_CMD; > sp->name = "abort"; > sp->qpair = cmd_sp->qpair; > + sp->cmd_sp = cmd_sp; > if (wait) > sp->flags = SRB_WAKEUP_ON_COMP; > After an abort SRB has been submitted it can happen that the command that should be aborted (cmd_sp) completes before the abort SRB completes. I think in that case sp->cmd_sp should be cleared. However, I don't see the code that does that in the above patch. Since the block layer already keeps track of which commands are outstanding, is it really necessary to add the 'cmd_sp' pointer in struct srb? Has it been considered to use blk_mq_tagset_busy_iter() instead of iterating over qpair->req->num_outstanding_cmds in qla24xx_abort_iocb_timeout()? Thanks, Bart.
Bart, > On Nov 5, 2019, at 10:57 AM, Bart Van Assche <bvanassche@acm.org> wrote: > > External Email > > ---------------------------------------------------------------------- > On 11/5/19 7:06 AM, Himanshu Madhani wrote: >> From: Quinn Tran <qutran@marvell.com> >> On switch, fabric and mgt command timeout, driver >> send Abort to tell FW to return the original command. >> If abort is timeout, then return both Abort and >> original command for cleanup. >> Fixes: 219d27d7147e0 ("scsi: qla2xxx: Fix race conditions in the code for aborting SCSI commands") >> Cc: stable@vger.kernel.org # 5.2 >> Signed-off-by: Quinn Tran <qutran@marvell.com> >> Signed-off-by: Himanshu Madhani <hmadhani@marvell.com> >> --- >> drivers/scsi/qla2xxx/qla_def.h | 1 + >> drivers/scsi/qla2xxx/qla_init.c | 18 ++++++++++++++++++ >> 2 files changed, 19 insertions(+) >> diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h >> index 721ee7f09b39..ef9bb3c7ad6f 100644 >> --- a/drivers/scsi/qla2xxx/qla_def.h >> +++ b/drivers/scsi/qla2xxx/qla_def.h >> @@ -604,6 +604,7 @@ typedef struct srb { >> const char *name; >> int iocbs; >> struct qla_qpair *qpair; >> + struct srb *cmd_sp; >> struct list_head elem; >> u32 gen1; /* scratch */ >> u32 gen2; /* scratch */ >> diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c >> index 5db8ad832893..7fdbe041cc19 100644 >> --- a/drivers/scsi/qla2xxx/qla_init.c >> +++ b/drivers/scsi/qla2xxx/qla_init.c >> @@ -101,8 +101,22 @@ static void qla24xx_abort_iocb_timeout(void *data) >> u32 handle; >> unsigned long flags; >> + if (sp->cmd_sp) >> + ql_dbg(ql_dbg_async, sp->vha, 0x507c, >> + "Abort timeout - cmd hdl=%x, cmd type=%x hdl=%x, type=%x\n", >> + sp->cmd_sp->handle, sp->cmd_sp->type, >> + sp->handle, sp->type); >> + else >> + ql_dbg(ql_dbg_async, sp->vha, 0x507c, >> + "Abort timeout 2 - hdl=%x, type=%x\n", >> + sp->handle, sp->type); >> + >> spin_lock_irqsave(qpair->qp_lock_ptr, flags); >> for (handle = 1; handle < qpair->req->num_outstanding_cmds; handle++) { >> + if (sp->cmd_sp && (qpair->req->outstanding_cmds[handle] == >> + sp->cmd_sp)) >> + qpair->req->outstanding_cmds[handle] = NULL; >> + >> /* removing the abort */ >> if (qpair->req->outstanding_cmds[handle] == sp) { >> qpair->req->outstanding_cmds[handle] = NULL; >> @@ -111,6 +125,9 @@ static void qla24xx_abort_iocb_timeout(void *data) >> } >> spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); >> + if (sp->cmd_sp) >> + sp->cmd_sp->done(sp->cmd_sp, QLA_OS_TIMER_EXPIRED); >> + >> abt->u.abt.comp_status = CS_TIMEOUT; >> sp->done(sp, QLA_OS_TIMER_EXPIRED); >> } >> @@ -142,6 +159,7 @@ static int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) >> sp->type = SRB_ABT_CMD; >> sp->name = "abort"; >> sp->qpair = cmd_sp->qpair; >> + sp->cmd_sp = cmd_sp; >> if (wait) >> sp->flags = SRB_WAKEUP_ON_COMP; >> > > After an abort SRB has been submitted it can happen that the command that should be aborted (cmd_sp) completes before the abort SRB completes. I think in that case sp->cmd_sp should be cleared. However, I don't see the code that does that in the above patch. > Good point there and agree that there is a small window where we are not clearing the cmd_sp when the original command is completed. However for this series, I would like to have this patch merged as-is and we’ll send a follow up patch to cover that window. We are doing more cleanup in the abort handling code path to make it more robust. This would be good candidate for that series. > Since the block layer already keeps track of which commands are outstanding, is it really necessary to add the 'cmd_sp' pointer in struct srb? Has it been considered to use blk_mq_tagset_busy_iter() instead of iterating over qpair->req->num_outstanding_cmds in qla24xx_abort_iocb_timeout()? > Your suggestion to consider blk_mq_tagset_busy_iter() is correct for the commands from block layer. However, qpair->req->num_outstanding_cmds is the qla2xxx driver’s commend array to keep track of which commands that driver needs to process and so we do need to iterate over it and track the commands that we need to send aborts for. - Himanshu > Thanks, > > Bart. > >
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 721ee7f09b39..ef9bb3c7ad6f 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -604,6 +604,7 @@ typedef struct srb { const char *name; int iocbs; struct qla_qpair *qpair; + struct srb *cmd_sp; struct list_head elem; u32 gen1; /* scratch */ u32 gen2; /* scratch */ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 5db8ad832893..7fdbe041cc19 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -101,8 +101,22 @@ static void qla24xx_abort_iocb_timeout(void *data) u32 handle; unsigned long flags; + if (sp->cmd_sp) + ql_dbg(ql_dbg_async, sp->vha, 0x507c, + "Abort timeout - cmd hdl=%x, cmd type=%x hdl=%x, type=%x\n", + sp->cmd_sp->handle, sp->cmd_sp->type, + sp->handle, sp->type); + else + ql_dbg(ql_dbg_async, sp->vha, 0x507c, + "Abort timeout 2 - hdl=%x, type=%x\n", + sp->handle, sp->type); + spin_lock_irqsave(qpair->qp_lock_ptr, flags); for (handle = 1; handle < qpair->req->num_outstanding_cmds; handle++) { + if (sp->cmd_sp && (qpair->req->outstanding_cmds[handle] == + sp->cmd_sp)) + qpair->req->outstanding_cmds[handle] = NULL; + /* removing the abort */ if (qpair->req->outstanding_cmds[handle] == sp) { qpair->req->outstanding_cmds[handle] = NULL; @@ -111,6 +125,9 @@ static void qla24xx_abort_iocb_timeout(void *data) } spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); + if (sp->cmd_sp) + sp->cmd_sp->done(sp->cmd_sp, QLA_OS_TIMER_EXPIRED); + abt->u.abt.comp_status = CS_TIMEOUT; sp->done(sp, QLA_OS_TIMER_EXPIRED); } @@ -142,6 +159,7 @@ static int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) sp->type = SRB_ABT_CMD; sp->name = "abort"; sp->qpair = cmd_sp->qpair; + sp->cmd_sp = cmd_sp; if (wait) sp->flags = SRB_WAKEUP_ON_COMP;