Message ID | 1726715161-18941-6-git-send-email-selvin.xavier@broadcom.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | RDMA/bnxt_re: Bug fixes for 6.12 kernel | expand |
在 2024/9/19 11:06, Selvin Xavier 写道: > There is a race between the CREQ tasklet and destroy > qp when accessing the qp-handle table. There is > a chance of reading a valid qp-handle in the > CREQ tasklet handler while the QP is already moving > ahead with the destruction. > > Fixing this race by implementing a table-lock to > synchronize the access. > > Fixes: f218d67ef004 ("RDMA/bnxt_re: Allow posting when QPs are in error") > Fixes: 84cf229f4001 ("RDMA/bnxt_re: Fix the qp table indexing") > Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> > Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com> > --- > drivers/infiniband/hw/bnxt_re/qplib_fp.c | 4 ++++ > drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 13 +++++++++---- > drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 2 ++ > 3 files changed, 15 insertions(+), 4 deletions(-) > > diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c > index 42e98e5..bc358bd 100644 > --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c > +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c > @@ -1527,9 +1527,11 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, > u32 tbl_indx; > int rc; > > + spin_lock_bh(&rcfw->tbl_lock); > tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw); > rcfw->qp_tbl[tbl_indx].qp_id = BNXT_QPLIB_QP_ID_INVALID; > rcfw->qp_tbl[tbl_indx].qp_handle = NULL; > + spin_unlock_bh(&rcfw->tbl_lock); > > bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, > CMDQ_BASE_OPCODE_DESTROY_QP, > @@ -1540,8 +1542,10 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, > sizeof(resp), 0); > rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); > if (rc) { > + spin_lock_bh(&rcfw->tbl_lock); > rcfw->qp_tbl[tbl_indx].qp_id = qp->id; > rcfw->qp_tbl[tbl_indx].qp_handle = qp; > + spin_unlock_bh(&rcfw->tbl_lock); > return rc; > } > > diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c > index 5bef9b4..85bfedc 100644 > --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c > +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c > @@ -634,17 +634,21 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, > case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION: > err_event = (struct creq_qp_error_notification *)qp_event; > qp_id = le32_to_cpu(err_event->xid); > + spin_lock_nested(&rcfw->tbl_lock, SINGLE_DEPTH_NESTING); > tbl_indx = map_qp_id_to_tbl_indx(qp_id, rcfw); > qp = rcfw->qp_tbl[tbl_indx].qp_handle; > + if (!qp) { > + spin_unlock(&rcfw->tbl_lock); > + break; > + } > + bnxt_qplib_mark_qp_error(qp); > + rc = rcfw->creq.aeq_handler(rcfw, qp_event, qp); > + spin_unlock(&rcfw->tbl_lock); > dev_dbg(&pdev->dev, "Received QP error notification\n"); > dev_dbg(&pdev->dev, > "qpid 0x%x, req_err=0x%x, resp_err=0x%x\n", > qp_id, err_event->req_err_state_reason, > err_event->res_err_state_reason); > - if (!qp) > - break; > - bnxt_qplib_mark_qp_error(qp); > - rc = rcfw->creq.aeq_handler(rcfw, qp_event, qp); > break; > default: > /* > @@ -973,6 +977,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, > GFP_KERNEL); > if (!rcfw->qp_tbl) > goto fail; > + spin_lock_init(&rcfw->tbl_lock); Thanks. I am fine with this spin_lock_bh and spin_lock_init. Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev> Zhu Yanjun > > rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout; > > diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h > index 45996e6..07779ae 100644 > --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h > +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h > @@ -224,6 +224,8 @@ struct bnxt_qplib_rcfw { > struct bnxt_qplib_crsqe *crsqe_tbl; > int qp_tbl_size; > struct bnxt_qplib_qp_node *qp_tbl; > + /* To synchronize the qp-handle hash table */ > + spinlock_t tbl_lock; > u64 oos_prev; > u32 init_oos_stats; > u32 cmdq_depth;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 42e98e5..bc358bd 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -1527,9 +1527,11 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, u32 tbl_indx; int rc; + spin_lock_bh(&rcfw->tbl_lock); tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw); rcfw->qp_tbl[tbl_indx].qp_id = BNXT_QPLIB_QP_ID_INVALID; rcfw->qp_tbl[tbl_indx].qp_handle = NULL; + spin_unlock_bh(&rcfw->tbl_lock); bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_DESTROY_QP, @@ -1540,8 +1542,10 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) { + spin_lock_bh(&rcfw->tbl_lock); rcfw->qp_tbl[tbl_indx].qp_id = qp->id; rcfw->qp_tbl[tbl_indx].qp_handle = qp; + spin_unlock_bh(&rcfw->tbl_lock); return rc; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 5bef9b4..85bfedc 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -634,17 +634,21 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION: err_event = (struct creq_qp_error_notification *)qp_event; qp_id = le32_to_cpu(err_event->xid); + spin_lock_nested(&rcfw->tbl_lock, SINGLE_DEPTH_NESTING); tbl_indx = map_qp_id_to_tbl_indx(qp_id, rcfw); qp = rcfw->qp_tbl[tbl_indx].qp_handle; + if (!qp) { + spin_unlock(&rcfw->tbl_lock); + break; + } + bnxt_qplib_mark_qp_error(qp); + rc = rcfw->creq.aeq_handler(rcfw, qp_event, qp); + spin_unlock(&rcfw->tbl_lock); dev_dbg(&pdev->dev, "Received QP error notification\n"); dev_dbg(&pdev->dev, "qpid 0x%x, req_err=0x%x, resp_err=0x%x\n", qp_id, err_event->req_err_state_reason, err_event->res_err_state_reason); - if (!qp) - break; - bnxt_qplib_mark_qp_error(qp); - rc = rcfw->creq.aeq_handler(rcfw, qp_event, qp); break; default: /* @@ -973,6 +977,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, GFP_KERNEL); if (!rcfw->qp_tbl) goto fail; + spin_lock_init(&rcfw->tbl_lock); rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 45996e6..07779ae 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -224,6 +224,8 @@ struct bnxt_qplib_rcfw { struct bnxt_qplib_crsqe *crsqe_tbl; int qp_tbl_size; struct bnxt_qplib_qp_node *qp_tbl; + /* To synchronize the qp-handle hash table */ + spinlock_t tbl_lock; u64 oos_prev; u32 init_oos_stats; u32 cmdq_depth;