Message ID | 1449535747-2850-11-git-send-email-himanshu.madhani@qlogic.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
> -void qlt_abort_cmd(struct qla_tgt_cmd *cmd) > +int qlt_abort_cmd(struct qla_tgt_cmd *cmd) > { > struct qla_tgt *tgt = cmd->tgt; > struct scsi_qla_host *vha = tgt->vha; > struct se_cmd *se_cmd = &cmd->se_cmd; > + unsigned long flags,refcount; > > ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014, > "qla_target(%d): terminating exchange for aborted cmd=%p " > "(se_cmd=%p, tag=%llu)", vha->vp_idx, cmd, &cmd->se_cmd, > se_cmd->tag); > > + spin_lock_irqsave(&cmd->cmd_lock, flags); > + if (cmd->aborted) { > + spin_unlock_irqrestore(&cmd->cmd_lock, flags); > + > + /* It's normal to see 2 calls in this path: > + * 1) XFER Rdy completion + CMD_T_ABORT > + * 2) TCM TMR - drain_state_list > + */ > + refcount = atomic_read(&cmd->se_cmd.cmd_kref.refcount); > + ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff, > + "multiple abort. %p refcount %lx" > + "transport_state %x, t_state %x, se_cmd_flags %x \n", > + cmd, refcount,cmd->se_cmd.transport_state, > + cmd->se_cmd.t_state,cmd->se_cmd.se_cmd_flags); > + > + return EIO; > + } Err, no. Looking into the refcount inside a kref is never the right thing to do. > +typedef enum { > + /* > + * BIT_0 - Atio Arrival / schedule to work > + * BIT_1 - qlt_do_work > + * BIT_2 - qlt_do work failed > + * BIT_3 - xfer rdy/tcm_qla2xxx_write_pending > + * BIT_4 - read respond/tcm_qla2xx_queue_data_in > + * BIT_5 - status respond / tcm_qla2xx_queue_status > + * BIT_6 - tcm request to abort/Term exchange. > + * pre_xmit_response->qlt_send_term_exchange > + * BIT_7 - SRR received (qlt_handle_srr->qlt_xmit_response) > + * BIT_8 - SRR received (qlt_handle_srr->qlt_rdy_to_xfer) > + * BIT_9 - SRR received (qla_handle_srr->qlt_send_term_exchange) > + * BIT_10 - Data in - hanlde_data->tcm_qla2xxx_handle_data > + > + * BIT_12 - good completion - qlt_ctio_do_completion -->free_cmd > + * BIT_13 - Bad completion - > + * qlt_ctio_do_completion --> qlt_term_ctio_exchange > + * BIT_14 - Back end data received/sent. > + * BIT_15 - SRR prepare ctio > + * BIT_16 - complete free > + * BIT_17 - flush - qlt_abort_cmd_on_host_reset > + * BIT_18 - completion w/abort status > + * BIT_19 - completion w/unknown status > + * BIT_20 - tcm_qla2xxx_free_cmd Please use descriptive names for these flags in the source code! > + BUG_ON(cmd->cmd_flags & BIT_20); > + cmd->cmd_flags |= BIT_20; > + And no crazieness like this. While we're at it: what synchronizes access to ->cmd_flags? > @@ -466,13 +484,25 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd, > static void tcm_qla2xxx_handle_data_work(struct work_struct *work) > { > struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work); > + unsigned long flags; > > /* > * Ensure that the complete FCP WRITE payload has been received. > * Otherwise return an exception via CHECK_CONDITION status. > */ > cmd->cmd_in_wq = 0; > - cmd->cmd_flags |= BIT_11; > + > + spin_lock_irqsave(&cmd->cmd_lock, flags); > + cmd->cmd_flags |= CMD_FLAG_DATA_WORK; > + if (cmd->aborted) { > + cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE; > + spin_unlock_irqrestore(&cmd->cmd_lock, flags); > + > + tcm_qla2xxx_free_cmd(cmd); > + return; > + } > + spin_unlock_irqrestore(&cmd->cmd_lock, flags); All these abort flag hacks look very suspicios. Can you explain the exact theory of operation behind them? -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/08/2015 01:48 AM, Himanshu Madhani wrote: > From: Quinn Tran <quinn.tran@qlogic.com> > > During lun reset, TMR thread from TCM would issue abort > to qla driver. At abort time, each command is in different > state. Depending on the state, qla will use the TMR thread > to trigger a command free(cmd_kref--) if command is not > down at firmware. > > Signed-off-by: Quinn Tran <quinn.tran@qlogic.com> > Signed-off-by: Himanshu Madhani <himanshu.madhani@qlogic.com> > --- > drivers/scsi/qla2xxx/qla_target.c | 60 +++++++++++++++++++++-------- > drivers/scsi/qla2xxx/qla_target.h | 59 +++++++++++++++++------------ > drivers/scsi/qla2xxx/tcm_qla2xxx.c | 73 ++++++++++++++++++++++++++++++++++- > 3 files changed, 147 insertions(+), 45 deletions(-) > > diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c > index 638940f..4d42b79 100644 > --- a/drivers/scsi/qla2xxx/qla_target.c > +++ b/drivers/scsi/qla2xxx/qla_target.c > @@ -105,7 +105,7 @@ static void qlt_response_pkt(struct scsi_qla_host *ha, response_t *pkt); > static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun, > int fn, void *iocb, int flags); > static void qlt_send_term_exchange(struct scsi_qla_host *ha, struct qla_tgt_cmd > - *cmd, struct atio_from_isp *atio, int ha_locked); > + *cmd, struct atio_from_isp *atio, int ha_locked, int ul_abort); > static void qlt_reject_free_srr_imm(struct scsi_qla_host *ha, > struct qla_tgt_srr_imm *imm, int ha_lock); > static void qlt_abort_cmd_on_host_reset(struct scsi_qla_host *vha, > @@ -2646,7 +2646,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, > /* no need to terminate. FW already freed exchange. */ > qlt_abort_cmd_on_host_reset(cmd->vha, cmd); > else > - qlt_send_term_exchange(vha, cmd, &cmd->atio, 1); > + qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0); > spin_unlock_irqrestore(&ha->hardware_lock, flags); > return 0; > } > @@ -3154,7 +3154,8 @@ static int __qlt_send_term_exchange(struct scsi_qla_host *vha, > } > > static void qlt_send_term_exchange(struct scsi_qla_host *vha, > - struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked) > + struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked, > + int ul_abort) > { > unsigned long flags = 0; > int rc; > @@ -3174,8 +3175,7 @@ static void qlt_send_term_exchange(struct scsi_qla_host *vha, > qlt_alloc_qfull_cmd(vha, atio, 0, 0); > > done: > - if (cmd && (!cmd->aborted || > - !cmd->cmd_sent_to_fw)) { > + if (cmd && !ul_abort && !cmd->aborted) { > if (cmd->sg_mapped) > qlt_unmap_sg(vha, cmd); > vha->hw->tgt.tgt_ops->free_cmd(cmd); > @@ -3234,21 +3234,43 @@ static void qlt_chk_exch_leak_thresh_hold(struct scsi_qla_host *vha) > > } > > -void qlt_abort_cmd(struct qla_tgt_cmd *cmd) > +int qlt_abort_cmd(struct qla_tgt_cmd *cmd) > { > struct qla_tgt *tgt = cmd->tgt; > struct scsi_qla_host *vha = tgt->vha; > struct se_cmd *se_cmd = &cmd->se_cmd; > + unsigned long flags,refcount; > > ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014, > "qla_target(%d): terminating exchange for aborted cmd=%p " > "(se_cmd=%p, tag=%llu)", vha->vp_idx, cmd, &cmd->se_cmd, > se_cmd->tag); > > + spin_lock_irqsave(&cmd->cmd_lock, flags); > + if (cmd->aborted) { > + spin_unlock_irqrestore(&cmd->cmd_lock, flags); > + > + /* It's normal to see 2 calls in this path: > + * 1) XFER Rdy completion + CMD_T_ABORT > + * 2) TCM TMR - drain_state_list > + */ > + refcount = atomic_read(&cmd->se_cmd.cmd_kref.refcount); > + ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff, > + "multiple abort. %p refcount %lx" > + "transport_state %x, t_state %x, se_cmd_flags %x \n", > + cmd, refcount,cmd->se_cmd.transport_state, > + cmd->se_cmd.t_state,cmd->se_cmd.se_cmd_flags); > + > + return EIO; > + } > + > cmd->aborted = 1; > cmd->cmd_flags |= BIT_6; > + spin_unlock_irqrestore(&cmd->cmd_lock, flags); > + > + qlt_send_term_exchange(vha, cmd, &cmd->atio, 0, 1); > > - qlt_send_term_exchange(vha, cmd, &cmd->atio, 0); > + return 0; > } > EXPORT_SYMBOL(qlt_abort_cmd); > > @@ -3263,6 +3285,9 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd) > > BUG_ON(cmd->cmd_in_wq); > > + if (cmd->sg_mapped) > + qlt_unmap_sg(cmd->vha, cmd); > + > if (!cmd->q_full) > qlt_decr_num_pend_cmds(cmd->vha); > > @@ -3380,7 +3405,7 @@ static int qlt_term_ctio_exchange(struct scsi_qla_host *vha, void *ctio, > term = 1; > > if (term) > - qlt_send_term_exchange(vha, cmd, &cmd->atio, 1); > + qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0); > > return term; > } > @@ -3735,6 +3760,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) > goto out_term; > } > > + spin_lock_init(&cmd->cmd_lock); > cdb = &atio->u.isp24.fcp_cmnd.cdb[0]; > cmd->se_cmd.tag = atio->u.isp24.exchange_addr; > cmd->unpacked_lun = scsilun_to_int( > @@ -3777,7 +3803,7 @@ out_term: > */ > cmd->cmd_flags |= BIT_2; > spin_lock_irqsave(&ha->hardware_lock, flags); > - qlt_send_term_exchange(vha, NULL, &cmd->atio, 1); > + qlt_send_term_exchange(vha, NULL, &cmd->atio, 1, 0); > > qlt_decr_num_pend_cmds(vha); > percpu_ida_free(&sess->se_sess->sess_tag_pool, cmd->se_cmd.map_tag); > @@ -3896,7 +3922,7 @@ static void qlt_create_sess_from_atio(struct work_struct *work) > > out_term: > spin_lock_irqsave(&ha->hardware_lock, flags); > - qlt_send_term_exchange(vha, NULL, &op->atio, 1); > + qlt_send_term_exchange(vha, NULL, &op->atio, 1, 0); > spin_unlock_irqrestore(&ha->hardware_lock, flags); > kfree(op); > > @@ -4722,7 +4748,7 @@ out_reject: > dump_stack(); > } else { > cmd->cmd_flags |= BIT_9; > - qlt_send_term_exchange(vha, cmd, &cmd->atio, 1); > + qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0); > } > spin_unlock_irqrestore(&ha->hardware_lock, flags); > } > @@ -4901,7 +4927,7 @@ static void qlt_prepare_srr_imm(struct scsi_qla_host *vha, > sctio, sctio->srr_id); > list_del(&sctio->srr_list_entry); > qlt_send_term_exchange(vha, sctio->cmd, > - &sctio->cmd->atio, 1); > + &sctio->cmd->atio, 1, 0); > kfree(sctio); > } > } > @@ -5071,7 +5097,7 @@ static int __qlt_send_busy(struct scsi_qla_host *vha, > sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, > atio->u.isp24.fcp_hdr.s_id); > if (!sess) { > - qlt_send_term_exchange(vha, NULL, atio, 1); > + qlt_send_term_exchange(vha, NULL, atio, 1, 0); > return 0; > } > /* Sending marker isn't necessary, since we called from ISR */ > @@ -5345,7 +5371,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha, > #if 1 /* With TERM EXCHANGE some FC cards refuse to boot */ > qlt_send_busy(vha, atio, SAM_STAT_BUSY); > #else > - qlt_send_term_exchange(vha, NULL, atio, 1); > + qlt_send_term_exchange(vha, NULL, atio, 1, 0); > #endif > } else { > if (tgt->tgt_stop) { > @@ -5446,7 +5472,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, response_t *pkt) > #if 1 /* With TERM EXCHANGE some FC cards refuse to boot */ > qlt_send_busy(vha, atio, 0); > #else > - qlt_send_term_exchange(vha, NULL, atio, 1); > + qlt_send_term_exchange(vha, NULL, atio, 1, 0); > #endif > } else { > if (tgt->tgt_stop) { > @@ -5455,7 +5481,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, response_t *pkt) > "command to target, sending TERM " > "EXCHANGE for rsp\n"); > qlt_send_term_exchange(vha, NULL, > - atio, 1); > + atio, 1, 0); > } else { > ql_dbg(ql_dbg_tgt, vha, 0xe060, > "qla_target(%d): Unable to send " > @@ -5875,7 +5901,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt, > return; > > out_term: > - qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1); > + qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0); > if (sess) > ha->tgt.tgt_ops->put_sess(sess); > spin_unlock_irqrestore(&ha->hardware_lock, flags); > diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h > index f5dbeab..a079c237 100644 > --- a/drivers/scsi/qla2xxx/qla_target.h > +++ b/drivers/scsi/qla2xxx/qla_target.h > @@ -941,6 +941,36 @@ struct qla_tgt_sess { > qlt_plogi_ack_t *plogi_link[QLT_PLOGI_LINK_MAX]; > }; > > +typedef enum { > + /* > + * BIT_0 - Atio Arrival / schedule to work > + * BIT_1 - qlt_do_work > + * BIT_2 - qlt_do work failed > + * BIT_3 - xfer rdy/tcm_qla2xxx_write_pending > + * BIT_4 - read respond/tcm_qla2xx_queue_data_in > + * BIT_5 - status respond / tcm_qla2xx_queue_status > + * BIT_6 - tcm request to abort/Term exchange. > + * pre_xmit_response->qlt_send_term_exchange > + * BIT_7 - SRR received (qlt_handle_srr->qlt_xmit_response) > + * BIT_8 - SRR received (qlt_handle_srr->qlt_rdy_to_xfer) > + * BIT_9 - SRR received (qla_handle_srr->qlt_send_term_exchange) > + * BIT_10 - Data in - hanlde_data->tcm_qla2xxx_handle_data > + > + * BIT_12 - good completion - qlt_ctio_do_completion -->free_cmd > + * BIT_13 - Bad completion - > + * qlt_ctio_do_completion --> qlt_term_ctio_exchange > + * BIT_14 - Back end data received/sent. > + * BIT_15 - SRR prepare ctio > + * BIT_16 - complete free > + * BIT_17 - flush - qlt_abort_cmd_on_host_reset > + * BIT_18 - completion w/abort status > + * BIT_19 - completion w/unknown status > + * BIT_20 - tcm_qla2xxx_free_cmd > + */ > + CMD_FLAG_DATA_WORK = BIT_11, > + CMD_FLAG_DATA_WORK_FREE = BIT_21, > +} cmd_flags_t; > + > struct qla_tgt_cmd { > struct se_cmd se_cmd; > struct qla_tgt_sess *sess; > @@ -950,6 +980,7 @@ struct qla_tgt_cmd { > /* Sense buffer that will be mapped into outgoing status */ > unsigned char sense_buffer[TRANSPORT_SENSE_BUFFER]; > > + spinlock_t cmd_lock; > /* to save extra sess dereferences */ > unsigned int conf_compl_supported:1; > unsigned int sg_mapped:1; > @@ -984,30 +1015,8 @@ struct qla_tgt_cmd { > > uint64_t jiffies_at_alloc; > uint64_t jiffies_at_free; > - /* BIT_0 - Atio Arrival / schedule to work > - * BIT_1 - qlt_do_work > - * BIT_2 - qlt_do work failed > - * BIT_3 - xfer rdy/tcm_qla2xxx_write_pending > - * BIT_4 - read respond/tcm_qla2xx_queue_data_in > - * BIT_5 - status respond / tcm_qla2xx_queue_status > - * BIT_6 - tcm request to abort/Term exchange. > - * pre_xmit_response->qlt_send_term_exchange > - * BIT_7 - SRR received (qlt_handle_srr->qlt_xmit_response) > - * BIT_8 - SRR received (qlt_handle_srr->qlt_rdy_to_xfer) > - * BIT_9 - SRR received (qla_handle_srr->qlt_send_term_exchange) > - * BIT_10 - Data in - hanlde_data->tcm_qla2xxx_handle_data > - * BIT_11 - Data actually going to TCM : tcm_qla2xx_handle_data_work > - * BIT_12 - good completion - qlt_ctio_do_completion -->free_cmd > - * BIT_13 - Bad completion - > - * qlt_ctio_do_completion --> qlt_term_ctio_exchange > - * BIT_14 - Back end data received/sent. > - * BIT_15 - SRR prepare ctio > - * BIT_16 - complete free > - * BIT_17 - flush - qlt_abort_cmd_on_host_reset > - * BIT_18 - completion w/abort status > - * BIT_19 - completion w/unknown status > - */ > - uint32_t cmd_flags; > + > + cmd_flags_t cmd_flags; > }; > > struct qla_tgt_sess_work_param { > @@ -1146,7 +1155,7 @@ static inline void sid_to_portid(const uint8_t *s_id, port_id_t *p) > extern void qlt_response_pkt_all_vps(struct scsi_qla_host *, response_t *); > extern int qlt_rdy_to_xfer(struct qla_tgt_cmd *); > extern int qlt_xmit_response(struct qla_tgt_cmd *, int, uint8_t); > -extern void qlt_abort_cmd(struct qla_tgt_cmd *); > +extern int qlt_abort_cmd(struct qla_tgt_cmd *); > extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *); > extern void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *); > extern void qlt_free_cmd(struct qla_tgt_cmd *cmd); > diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c > index 366142a..842fcca 100644 > --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c > +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c > @@ -298,6 +298,10 @@ static void tcm_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd) > { > cmd->vha->tgt_counters.core_qla_free_cmd++; > cmd->cmd_in_wq = 1; > + > + BUG_ON(cmd->cmd_flags & BIT_20); > + cmd->cmd_flags |= BIT_20; > + > INIT_WORK(&cmd->work, tcm_qla2xxx_complete_free); > queue_work(tcm_qla2xxx_free_wq, &cmd->work); > } Why not test_and_set_bit()? > @@ -375,6 +379,20 @@ static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd) > { > struct qla_tgt_cmd *cmd = container_of(se_cmd, > struct qla_tgt_cmd, se_cmd); > + > + if (cmd->aborted) { > + /* Cmd can loop during Q-full. tcm_qla2xxx_aborted_task > + * can get ahead of this cmd. tcm_qla2xxx_aborted_task > + * already kick start the free. > + */ > + pr_debug("write_pending aborted cmd[%p] refcount %d " > + "transport_state %x, t_state %x, se_cmd_flags %x\n", > + cmd,cmd->se_cmd.cmd_kref.refcount.counter, > + cmd->se_cmd.transport_state, > + cmd->se_cmd.t_state, > + cmd->se_cmd.se_cmd_flags); > + return 0; > + } > cmd->cmd_flags |= BIT_3; > cmd->bufflen = se_cmd->data_length; > cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); > @@ -406,7 +424,7 @@ static int tcm_qla2xxx_write_pending_status(struct se_cmd *se_cmd) > se_cmd->t_state == TRANSPORT_COMPLETE_QF_WP) { > spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); > wait_for_completion_timeout(&se_cmd->t_transport_stop_comp, > - 3 * HZ); > + 50); > return 0; > } > spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); > @@ -466,13 +484,25 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd, > static void tcm_qla2xxx_handle_data_work(struct work_struct *work) > { > struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work); > + unsigned long flags; > > /* > * Ensure that the complete FCP WRITE payload has been received. > * Otherwise return an exception via CHECK_CONDITION status. > */ > cmd->cmd_in_wq = 0; > - cmd->cmd_flags |= BIT_11; > + > + spin_lock_irqsave(&cmd->cmd_lock, flags); > + cmd->cmd_flags |= CMD_FLAG_DATA_WORK; > + if (cmd->aborted) { > + cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE; > + spin_unlock_irqrestore(&cmd->cmd_lock, flags); > + > + tcm_qla2xxx_free_cmd(cmd); > + return; > + } > + spin_unlock_irqrestore(&cmd->cmd_lock, flags); > + > cmd->vha->tgt_counters.qla_core_ret_ctio++; > if (!cmd->write_data_transferred) { > /* > @@ -547,6 +577,20 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd) > struct qla_tgt_cmd *cmd = container_of(se_cmd, > struct qla_tgt_cmd, se_cmd); > > + if (cmd->aborted) { > + /* Cmd can loop during Q-full. tcm_qla2xxx_aborted_task > + * can get ahead of this cmd. tcm_qla2xxx_aborted_task > + * already kick start the free. > + */ > + pr_debug("queue_data_in aborted cmd[%p] refcount %d " > + "transport_state %x, t_state %x, se_cmd_flags %x\n", > + cmd,cmd->se_cmd.cmd_kref.refcount.counter, > + cmd->se_cmd.transport_state, > + cmd->se_cmd.t_state, > + cmd->se_cmd.se_cmd_flags); > + return 0; > + } > + > cmd->cmd_flags |= BIT_4; > cmd->bufflen = se_cmd->data_length; > cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); > @@ -638,11 +682,34 @@ static void tcm_qla2xxx_queue_tm_rsp(struct se_cmd *se_cmd) > qlt_xmit_tm_rsp(mcmd); > } > > + > +#define DATA_WORK_NOT_FREE(_flags) \ > + (( _flags & (CMD_FLAG_DATA_WORK|CMD_FLAG_DATA_WORK_FREE)) == \ > + CMD_FLAG_DATA_WORK) > static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd) > { > struct qla_tgt_cmd *cmd = container_of(se_cmd, > struct qla_tgt_cmd, se_cmd); > - qlt_abort_cmd(cmd); > + unsigned long flags; > + > + if (qlt_abort_cmd(cmd)) > + return; > + > + spin_lock_irqsave(&cmd->cmd_lock, flags); > + if ((cmd->state == QLA_TGT_STATE_NEW)|| > + ((cmd->state == QLA_TGT_STATE_DATA_IN) && > + DATA_WORK_NOT_FREE(cmd->cmd_flags)) ) { > + > + cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE; > + spin_unlock_irqrestore(&cmd->cmd_lock, flags); > + /* Cmd have not reached firmware. > + * Use this trigger to free it. */ > + tcm_qla2xxx_free_cmd(cmd); > + return; > + } > + spin_unlock_irqrestore(&cmd->cmd_lock, flags); > + return; > + > } > > static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *, > Have you considered moving to bit ops when modifying cmd_flags? I guess you can also move the ->aborted bit into the bit field, and could get rid of some of the spinlocks ... Cheers, Hannes
On 12/7/15, 6:37 PM, "target-devel-owner@vger.kernel.org on behalf of Christoph Hellwig" <target-devel-owner@vger.kernel.org on behalf of hch@infradead.org> wrote: >> -void qlt_abort_cmd(struct qla_tgt_cmd *cmd) >> +int qlt_abort_cmd(struct qla_tgt_cmd *cmd) >> { >> struct qla_tgt *tgt = cmd->tgt; >> struct scsi_qla_host *vha = tgt->vha; >> struct se_cmd *se_cmd = &cmd->se_cmd; >> + unsigned long flags,refcount; >> >> ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014, >> "qla_target(%d): terminating exchange for aborted cmd=%p " >> "(se_cmd=%p, tag=%llu)", vha->vp_idx, cmd, &cmd->se_cmd, >> se_cmd->tag); >> >> + spin_lock_irqsave(&cmd->cmd_lock, flags); >> + if (cmd->aborted) { >> + spin_unlock_irqrestore(&cmd->cmd_lock, flags); >> + >> + /* It's normal to see 2 calls in this path: >> + * 1) XFER Rdy completion + CMD_T_ABORT >> + * 2) TCM TMR - drain_state_list >> + */ >> + refcount = atomic_read(&cmd->se_cmd.cmd_kref.refcount); >> + ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff, >> + "multiple abort. %p refcount %lx" >> + "transport_state %x, t_state %x, se_cmd_flags %x \n", >> + cmd, refcount,cmd->se_cmd.transport_state, >> + cmd->se_cmd.t_state,cmd->se_cmd.se_cmd_flags); >> + >> + return EIO; >> + } > >Err, no. Looking into the refcount inside a kref is never the >right thing to do. QT> even for debug purpose?? > >> +typedef enum { >> + /* >> + * BIT_0 - Atio Arrival / schedule to work >> + * BIT_1 - qlt_do_work >> + * BIT_2 - qlt_do work failed >> + * BIT_3 - xfer rdy/tcm_qla2xxx_write_pending >> + * BIT_4 - read respond/tcm_qla2xx_queue_data_in >> + * BIT_5 - status respond / tcm_qla2xx_queue_status >> + * BIT_6 - tcm request to abort/Term exchange. >> + * pre_xmit_response->qlt_send_term_exchange >> + * BIT_7 - SRR received (qlt_handle_srr->qlt_xmit_response) >> + * BIT_8 - SRR received (qlt_handle_srr->qlt_rdy_to_xfer) >> + * BIT_9 - SRR received (qla_handle_srr->qlt_send_term_exchange) >> + * BIT_10 - Data in - hanlde_data->tcm_qla2xxx_handle_data >> + >> + * BIT_12 - good completion - qlt_ctio_do_completion -->free_cmd >> + * BIT_13 - Bad completion - >> + * qlt_ctio_do_completion --> qlt_term_ctio_exchange >> + * BIT_14 - Back end data received/sent. >> + * BIT_15 - SRR prepare ctio >> + * BIT_16 - complete free >> + * BIT_17 - flush - qlt_abort_cmd_on_host_reset >> + * BIT_18 - completion w/abort status >> + * BIT_19 - completion w/unknown status >> + * BIT_20 - tcm_qla2xxx_free_cmd > >Please use descriptive names for these flags in the source code! QT> ACK. We¹ll change the bits to more descriptive name in a ³follow on² patch. > >> + BUG_ON(cmd->cmd_flags & BIT_20); >> + cmd->cmd_flags |= BIT_20; >> + > >And no crazieness like this. While we're at it: what synchronizes >access to ->cmd_flags? QT> These bits provide indication as to where the command has traversed in the QLA code. Each bit is set one time. Due to the async nature of the TMR code, it triggers QLA driver to repeat this specific free path in the double free case. This BUG_ON allows us trap it early on. In one of the corner case (below), I need to overloaded it + lock for the cleanup process. > >> @@ -466,13 +484,25 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t >>*vha, struct qla_tgt_cmd *cmd, >> static void tcm_qla2xxx_handle_data_work(struct work_struct *work) >> { >> struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, >>work); >> + unsigned long flags; >> >> /* >> * Ensure that the complete FCP WRITE payload has been received. >> * Otherwise return an exception via CHECK_CONDITION status. >> */ >> cmd->cmd_in_wq = 0; >> - cmd->cmd_flags |= BIT_11; >> + >> + spin_lock_irqsave(&cmd->cmd_lock, flags); >> + cmd->cmd_flags |= CMD_FLAG_DATA_WORK; >> + if (cmd->aborted) { >> + cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE; >> + spin_unlock_irqrestore(&cmd->cmd_lock, flags); >> + >> + tcm_qla2xxx_free_cmd(cmd); >> + return; >> + } >> + spin_unlock_irqrestore(&cmd->cmd_lock, flags); > >All these abort flag hacks look very suspicios. Can you explain the >exact theory of operation behind them? QT> The cmd->aborted flag is used to track the CMD_T_ABORT flag at TCM level. If the command have been requested to be aborted by TCM or already aborted, we advance it to the ³free" state because our hardware have already started freeing up resources associated to this command/exchange. In this specific case(above), a XFER RDY was aborted by the TMR. Returning the cmd to TCM to generate SCSI Status would generate erroneous HW error due to freed resource. > >-- >To unsubscribe from this list: send the line "unsubscribe target-devel" in >the body of a message to majordomo@vger.kernel.org >More majordomo info at http://vger.kernel.org/majordomo-info.html
Hannes, ACK. We¹ll move the flags to bitops in the "follow on" patch to clean it up. Those flags was introduced from a different patch. Will move the few overloaded flag to bit field. However, getting rid of the spin lock would prove tricky because the code is trying to serialize the cleanup. With out the lock, we kept hitting multiple free problem. Regards, Quinn Tran On 12/8/15, 11:01 PM, "target-devel-owner@vger.kernel.org on behalf of Hannes Reinecke" <target-devel-owner@vger.kernel.org on behalf of hare@suse.de> wrote: >>+ >> } >> >> static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *, >Have you considered moving to bit ops when modifying cmd_flags? >I guess you can also move the ->aborted bit into the bit field, and >could get rid of some of the spinlocks ... > >Cheers, > >Hannes >-- >Dr. Hannes Reinecke zSeries & Storage >hare@suse.de +49 911 74053 688 >SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg >GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg) >--
On Wed, Dec 09, 2015 at 10:07:32PM +0000, Quinn Tran wrote: > >Err, no. Looking into the refcount inside a kref is never the > >right thing to do. > > QT> even for debug purpose?? No. Please treat struct kref as opaque. > QT> These bits provide indication as to where the command has traversed in > the QLA code. Each bit is set one time. Due to the async nature of the > TMR code, it triggers QLA driver to repeat this specific free path in the > double free case. This BUG_ON allows us trap it early on. > > In one of the corner case (below), I need to overloaded it + lock for the > cleanup process. Setting bits fundamentaly is a read/modify/write cycle. You either need to use {set,clear,test}_bit or lock around these manipulations. > QT> The cmd->aborted flag is used to track the CMD_T_ABORT flag at TCM > level. If the command have been requested to be aborted by TCM or already > aborted, we advance it to the ?free" state because our hardware have > already started freeing up resources associated to this command/exchange. > In this specific case(above), a XFER RDY was aborted by the TMR. > Returning the cmd to TCM to generate SCSI Status would generate erroneous > HW error due to freed resource. I really think this nees to be updated on top of Bat's changes as a start and re-reviewed. The amoutn of special casing and second guessing here is simply not sustainable in the long run. -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Christoph, Thanks for reviewing. I¹ll withdraw this patch. Will rework with new code and submit at a later time. Regards, Quinn Tran On 12/14/15, 2:34 AM, "Christoph Hellwig" <hch@infradead.org> wrote: >On Wed, Dec 09, 2015 at 10:07:32PM +0000, Quinn Tran wrote: >> >Err, no. Looking into the refcount inside a kref is never the >> >right thing to do. >> >> QT> even for debug purpose?? > >No. Please treat struct kref as opaque. > >> QT> These bits provide indication as to where the command has traversed >>in >> the QLA code. Each bit is set one time. Due to the async nature of the >> TMR code, it triggers QLA driver to repeat this specific free path in >>the >> double free case. This BUG_ON allows us trap it early on. >> >> In one of the corner case (below), I need to overloaded it + lock for >>the >> cleanup process. > >Setting bits fundamentaly is a read/modify/write cycle. You either >need to use {set,clear,test}_bit or lock around these manipulations. > >> QT> The cmd->aborted flag is used to track the CMD_T_ABORT flag at TCM >> level. If the command have been requested to be aborted by TCM or >>already >> aborted, we advance it to the ?free" state because our hardware have >> already started freeing up resources associated to this >>command/exchange. >> In this specific case(above), a XFER RDY was aborted by the TMR. >> Returning the cmd to TCM to generate SCSI Status would generate >>erroneous >> HW error due to freed resource. > >I really think this nees to be updated on top of Bat's changes as a >start and re-reviewed. The amoutn of special casing and second guessing >here is simply not sustainable in the long run.
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 638940f..4d42b79 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -105,7 +105,7 @@ static void qlt_response_pkt(struct scsi_qla_host *ha, response_t *pkt); static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun, int fn, void *iocb, int flags); static void qlt_send_term_exchange(struct scsi_qla_host *ha, struct qla_tgt_cmd - *cmd, struct atio_from_isp *atio, int ha_locked); + *cmd, struct atio_from_isp *atio, int ha_locked, int ul_abort); static void qlt_reject_free_srr_imm(struct scsi_qla_host *ha, struct qla_tgt_srr_imm *imm, int ha_lock); static void qlt_abort_cmd_on_host_reset(struct scsi_qla_host *vha, @@ -2646,7 +2646,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, /* no need to terminate. FW already freed exchange. */ qlt_abort_cmd_on_host_reset(cmd->vha, cmd); else - qlt_send_term_exchange(vha, cmd, &cmd->atio, 1); + qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0); spin_unlock_irqrestore(&ha->hardware_lock, flags); return 0; } @@ -3154,7 +3154,8 @@ static int __qlt_send_term_exchange(struct scsi_qla_host *vha, } static void qlt_send_term_exchange(struct scsi_qla_host *vha, - struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked) + struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked, + int ul_abort) { unsigned long flags = 0; int rc; @@ -3174,8 +3175,7 @@ static void qlt_send_term_exchange(struct scsi_qla_host *vha, qlt_alloc_qfull_cmd(vha, atio, 0, 0); done: - if (cmd && (!cmd->aborted || - !cmd->cmd_sent_to_fw)) { + if (cmd && !ul_abort && !cmd->aborted) { if (cmd->sg_mapped) qlt_unmap_sg(vha, cmd); vha->hw->tgt.tgt_ops->free_cmd(cmd); @@ -3234,21 +3234,43 @@ static void qlt_chk_exch_leak_thresh_hold(struct scsi_qla_host *vha) } -void qlt_abort_cmd(struct qla_tgt_cmd *cmd) +int qlt_abort_cmd(struct qla_tgt_cmd *cmd) { struct qla_tgt *tgt = cmd->tgt; struct scsi_qla_host *vha = tgt->vha; struct se_cmd *se_cmd = &cmd->se_cmd; + unsigned long flags,refcount; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014, "qla_target(%d): terminating exchange for aborted cmd=%p " "(se_cmd=%p, tag=%llu)", vha->vp_idx, cmd, &cmd->se_cmd, se_cmd->tag); + spin_lock_irqsave(&cmd->cmd_lock, flags); + if (cmd->aborted) { + spin_unlock_irqrestore(&cmd->cmd_lock, flags); + + /* It's normal to see 2 calls in this path: + * 1) XFER Rdy completion + CMD_T_ABORT + * 2) TCM TMR - drain_state_list + */ + refcount = atomic_read(&cmd->se_cmd.cmd_kref.refcount); + ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff, + "multiple abort. %p refcount %lx" + "transport_state %x, t_state %x, se_cmd_flags %x \n", + cmd, refcount,cmd->se_cmd.transport_state, + cmd->se_cmd.t_state,cmd->se_cmd.se_cmd_flags); + + return EIO; + } + cmd->aborted = 1; cmd->cmd_flags |= BIT_6; + spin_unlock_irqrestore(&cmd->cmd_lock, flags); + + qlt_send_term_exchange(vha, cmd, &cmd->atio, 0, 1); - qlt_send_term_exchange(vha, cmd, &cmd->atio, 0); + return 0; } EXPORT_SYMBOL(qlt_abort_cmd); @@ -3263,6 +3285,9 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd) BUG_ON(cmd->cmd_in_wq); + if (cmd->sg_mapped) + qlt_unmap_sg(cmd->vha, cmd); + if (!cmd->q_full) qlt_decr_num_pend_cmds(cmd->vha); @@ -3380,7 +3405,7 @@ static int qlt_term_ctio_exchange(struct scsi_qla_host *vha, void *ctio, term = 1; if (term) - qlt_send_term_exchange(vha, cmd, &cmd->atio, 1); + qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0); return term; } @@ -3735,6 +3760,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) goto out_term; } + spin_lock_init(&cmd->cmd_lock); cdb = &atio->u.isp24.fcp_cmnd.cdb[0]; cmd->se_cmd.tag = atio->u.isp24.exchange_addr; cmd->unpacked_lun = scsilun_to_int( @@ -3777,7 +3803,7 @@ out_term: */ cmd->cmd_flags |= BIT_2; spin_lock_irqsave(&ha->hardware_lock, flags); - qlt_send_term_exchange(vha, NULL, &cmd->atio, 1); + qlt_send_term_exchange(vha, NULL, &cmd->atio, 1, 0); qlt_decr_num_pend_cmds(vha); percpu_ida_free(&sess->se_sess->sess_tag_pool, cmd->se_cmd.map_tag); @@ -3896,7 +3922,7 @@ static void qlt_create_sess_from_atio(struct work_struct *work) out_term: spin_lock_irqsave(&ha->hardware_lock, flags); - qlt_send_term_exchange(vha, NULL, &op->atio, 1); + qlt_send_term_exchange(vha, NULL, &op->atio, 1, 0); spin_unlock_irqrestore(&ha->hardware_lock, flags); kfree(op); @@ -4722,7 +4748,7 @@ out_reject: dump_stack(); } else { cmd->cmd_flags |= BIT_9; - qlt_send_term_exchange(vha, cmd, &cmd->atio, 1); + qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0); } spin_unlock_irqrestore(&ha->hardware_lock, flags); } @@ -4901,7 +4927,7 @@ static void qlt_prepare_srr_imm(struct scsi_qla_host *vha, sctio, sctio->srr_id); list_del(&sctio->srr_list_entry); qlt_send_term_exchange(vha, sctio->cmd, - &sctio->cmd->atio, 1); + &sctio->cmd->atio, 1, 0); kfree(sctio); } } @@ -5071,7 +5097,7 @@ static int __qlt_send_busy(struct scsi_qla_host *vha, sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, atio->u.isp24.fcp_hdr.s_id); if (!sess) { - qlt_send_term_exchange(vha, NULL, atio, 1); + qlt_send_term_exchange(vha, NULL, atio, 1, 0); return 0; } /* Sending marker isn't necessary, since we called from ISR */ @@ -5345,7 +5371,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha, #if 1 /* With TERM EXCHANGE some FC cards refuse to boot */ qlt_send_busy(vha, atio, SAM_STAT_BUSY); #else - qlt_send_term_exchange(vha, NULL, atio, 1); + qlt_send_term_exchange(vha, NULL, atio, 1, 0); #endif } else { if (tgt->tgt_stop) { @@ -5446,7 +5472,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, response_t *pkt) #if 1 /* With TERM EXCHANGE some FC cards refuse to boot */ qlt_send_busy(vha, atio, 0); #else - qlt_send_term_exchange(vha, NULL, atio, 1); + qlt_send_term_exchange(vha, NULL, atio, 1, 0); #endif } else { if (tgt->tgt_stop) { @@ -5455,7 +5481,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, response_t *pkt) "command to target, sending TERM " "EXCHANGE for rsp\n"); qlt_send_term_exchange(vha, NULL, - atio, 1); + atio, 1, 0); } else { ql_dbg(ql_dbg_tgt, vha, 0xe060, "qla_target(%d): Unable to send " @@ -5875,7 +5901,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt, return; out_term: - qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1); + qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0); if (sess) ha->tgt.tgt_ops->put_sess(sess); spin_unlock_irqrestore(&ha->hardware_lock, flags); diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index f5dbeab..a079c237 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -941,6 +941,36 @@ struct qla_tgt_sess { qlt_plogi_ack_t *plogi_link[QLT_PLOGI_LINK_MAX]; }; +typedef enum { + /* + * BIT_0 - Atio Arrival / schedule to work + * BIT_1 - qlt_do_work + * BIT_2 - qlt_do work failed + * BIT_3 - xfer rdy/tcm_qla2xxx_write_pending + * BIT_4 - read respond/tcm_qla2xx_queue_data_in + * BIT_5 - status respond / tcm_qla2xx_queue_status + * BIT_6 - tcm request to abort/Term exchange. + * pre_xmit_response->qlt_send_term_exchange + * BIT_7 - SRR received (qlt_handle_srr->qlt_xmit_response) + * BIT_8 - SRR received (qlt_handle_srr->qlt_rdy_to_xfer) + * BIT_9 - SRR received (qla_handle_srr->qlt_send_term_exchange) + * BIT_10 - Data in - hanlde_data->tcm_qla2xxx_handle_data + + * BIT_12 - good completion - qlt_ctio_do_completion -->free_cmd + * BIT_13 - Bad completion - + * qlt_ctio_do_completion --> qlt_term_ctio_exchange + * BIT_14 - Back end data received/sent. + * BIT_15 - SRR prepare ctio + * BIT_16 - complete free + * BIT_17 - flush - qlt_abort_cmd_on_host_reset + * BIT_18 - completion w/abort status + * BIT_19 - completion w/unknown status + * BIT_20 - tcm_qla2xxx_free_cmd + */ + CMD_FLAG_DATA_WORK = BIT_11, + CMD_FLAG_DATA_WORK_FREE = BIT_21, +} cmd_flags_t; + struct qla_tgt_cmd { struct se_cmd se_cmd; struct qla_tgt_sess *sess; @@ -950,6 +980,7 @@ struct qla_tgt_cmd { /* Sense buffer that will be mapped into outgoing status */ unsigned char sense_buffer[TRANSPORT_SENSE_BUFFER]; + spinlock_t cmd_lock; /* to save extra sess dereferences */ unsigned int conf_compl_supported:1; unsigned int sg_mapped:1; @@ -984,30 +1015,8 @@ struct qla_tgt_cmd { uint64_t jiffies_at_alloc; uint64_t jiffies_at_free; - /* BIT_0 - Atio Arrival / schedule to work - * BIT_1 - qlt_do_work - * BIT_2 - qlt_do work failed - * BIT_3 - xfer rdy/tcm_qla2xxx_write_pending - * BIT_4 - read respond/tcm_qla2xx_queue_data_in - * BIT_5 - status respond / tcm_qla2xx_queue_status - * BIT_6 - tcm request to abort/Term exchange. - * pre_xmit_response->qlt_send_term_exchange - * BIT_7 - SRR received (qlt_handle_srr->qlt_xmit_response) - * BIT_8 - SRR received (qlt_handle_srr->qlt_rdy_to_xfer) - * BIT_9 - SRR received (qla_handle_srr->qlt_send_term_exchange) - * BIT_10 - Data in - hanlde_data->tcm_qla2xxx_handle_data - * BIT_11 - Data actually going to TCM : tcm_qla2xx_handle_data_work - * BIT_12 - good completion - qlt_ctio_do_completion -->free_cmd - * BIT_13 - Bad completion - - * qlt_ctio_do_completion --> qlt_term_ctio_exchange - * BIT_14 - Back end data received/sent. - * BIT_15 - SRR prepare ctio - * BIT_16 - complete free - * BIT_17 - flush - qlt_abort_cmd_on_host_reset - * BIT_18 - completion w/abort status - * BIT_19 - completion w/unknown status - */ - uint32_t cmd_flags; + + cmd_flags_t cmd_flags; }; struct qla_tgt_sess_work_param { @@ -1146,7 +1155,7 @@ static inline void sid_to_portid(const uint8_t *s_id, port_id_t *p) extern void qlt_response_pkt_all_vps(struct scsi_qla_host *, response_t *); extern int qlt_rdy_to_xfer(struct qla_tgt_cmd *); extern int qlt_xmit_response(struct qla_tgt_cmd *, int, uint8_t); -extern void qlt_abort_cmd(struct qla_tgt_cmd *); +extern int qlt_abort_cmd(struct qla_tgt_cmd *); extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *); extern void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *); extern void qlt_free_cmd(struct qla_tgt_cmd *cmd); diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 366142a..842fcca 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -298,6 +298,10 @@ static void tcm_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd) { cmd->vha->tgt_counters.core_qla_free_cmd++; cmd->cmd_in_wq = 1; + + BUG_ON(cmd->cmd_flags & BIT_20); + cmd->cmd_flags |= BIT_20; + INIT_WORK(&cmd->work, tcm_qla2xxx_complete_free); queue_work(tcm_qla2xxx_free_wq, &cmd->work); } @@ -375,6 +379,20 @@ static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); + + if (cmd->aborted) { + /* Cmd can loop during Q-full. tcm_qla2xxx_aborted_task + * can get ahead of this cmd. tcm_qla2xxx_aborted_task + * already kick start the free. + */ + pr_debug("write_pending aborted cmd[%p] refcount %d " + "transport_state %x, t_state %x, se_cmd_flags %x\n", + cmd,cmd->se_cmd.cmd_kref.refcount.counter, + cmd->se_cmd.transport_state, + cmd->se_cmd.t_state, + cmd->se_cmd.se_cmd_flags); + return 0; + } cmd->cmd_flags |= BIT_3; cmd->bufflen = se_cmd->data_length; cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); @@ -406,7 +424,7 @@ static int tcm_qla2xxx_write_pending_status(struct se_cmd *se_cmd) se_cmd->t_state == TRANSPORT_COMPLETE_QF_WP) { spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); wait_for_completion_timeout(&se_cmd->t_transport_stop_comp, - 3 * HZ); + 50); return 0; } spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); @@ -466,13 +484,25 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd, static void tcm_qla2xxx_handle_data_work(struct work_struct *work) { struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work); + unsigned long flags; /* * Ensure that the complete FCP WRITE payload has been received. * Otherwise return an exception via CHECK_CONDITION status. */ cmd->cmd_in_wq = 0; - cmd->cmd_flags |= BIT_11; + + spin_lock_irqsave(&cmd->cmd_lock, flags); + cmd->cmd_flags |= CMD_FLAG_DATA_WORK; + if (cmd->aborted) { + cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE; + spin_unlock_irqrestore(&cmd->cmd_lock, flags); + + tcm_qla2xxx_free_cmd(cmd); + return; + } + spin_unlock_irqrestore(&cmd->cmd_lock, flags); + cmd->vha->tgt_counters.qla_core_ret_ctio++; if (!cmd->write_data_transferred) { /* @@ -547,6 +577,20 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd) struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); + if (cmd->aborted) { + /* Cmd can loop during Q-full. tcm_qla2xxx_aborted_task + * can get ahead of this cmd. tcm_qla2xxx_aborted_task + * already kick start the free. + */ + pr_debug("queue_data_in aborted cmd[%p] refcount %d " + "transport_state %x, t_state %x, se_cmd_flags %x\n", + cmd,cmd->se_cmd.cmd_kref.refcount.counter, + cmd->se_cmd.transport_state, + cmd->se_cmd.t_state, + cmd->se_cmd.se_cmd_flags); + return 0; + } + cmd->cmd_flags |= BIT_4; cmd->bufflen = se_cmd->data_length; cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); @@ -638,11 +682,34 @@ static void tcm_qla2xxx_queue_tm_rsp(struct se_cmd *se_cmd) qlt_xmit_tm_rsp(mcmd); } + +#define DATA_WORK_NOT_FREE(_flags) \ + (( _flags & (CMD_FLAG_DATA_WORK|CMD_FLAG_DATA_WORK_FREE)) == \ + CMD_FLAG_DATA_WORK) static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); - qlt_abort_cmd(cmd); + unsigned long flags; + + if (qlt_abort_cmd(cmd)) + return; + + spin_lock_irqsave(&cmd->cmd_lock, flags); + if ((cmd->state == QLA_TGT_STATE_NEW)|| + ((cmd->state == QLA_TGT_STATE_DATA_IN) && + DATA_WORK_NOT_FREE(cmd->cmd_flags)) ) { + + cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE; + spin_unlock_irqrestore(&cmd->cmd_lock, flags); + /* Cmd have not reached firmware. + * Use this trigger to free it. */ + tcm_qla2xxx_free_cmd(cmd); + return; + } + spin_unlock_irqrestore(&cmd->cmd_lock, flags); + return; + } static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *,