diff mbox series

[for-next] RDMA/bnxt_re: Add resize_cq support

Message ID 1678868215-23626-1-git-send-email-selvin.xavier@broadcom.com (mailing list archive)
State Accepted
Headers show
Series [for-next] RDMA/bnxt_re: Add resize_cq support | expand

Commit Message

Selvin Xavier March 15, 2023, 8:16 a.m. UTC
Add resize_cq verb support for user space CQs. Resize operation for
kernel CQs are not supported now.

Driver should free the current CQ only after user library polls
for all the completions and switch to new CQ. So after the resize_cq
is returned from the driver, user libray polls for existing completions
and store it as temporary data. Once library reaps all completions in the
current CQ, it invokes the ibv_cmd_poll_cq to inform the driver about
the resize_cq completion. Adding a check for user CQs in driver's
poll_cq and complete the resize operation for user CQs.
Updating uverbs_cmd_mask with poll_cq to support this.

User library changes are available in this pull request.
https://github.com/linux-rdma/rdma-core/pull/1315

Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 109 +++++++++++++++++++++++++++++++
 drivers/infiniband/hw/bnxt_re/ib_verbs.h |   3 +
 drivers/infiniband/hw/bnxt_re/main.c     |   2 +
 drivers/infiniband/hw/bnxt_re/qplib_fp.c |  44 +++++++++++++
 drivers/infiniband/hw/bnxt_re/qplib_fp.h |   5 ++
 include/uapi/rdma/bnxt_re-abi.h          |   4 ++
 6 files changed, 167 insertions(+)

Comments

Yunsheng Lin March 17, 2023, 9:08 a.m. UTC | #1
On 2023/3/15 16:16, Selvin Xavier wrote:
> Add resize_cq verb support for user space CQs. Resize operation for
> kernel CQs are not supported now.
> 
> Driver should free the current CQ only after user library polls
> for all the completions and switch to new CQ. So after the resize_cq
> is returned from the driver, user libray polls for existing completions

libray -> library

> and store it as temporary data. Once library reaps all completions in the
> current CQ, it invokes the ibv_cmd_poll_cq to inform the driver about
> the resize_cq completion. Adding a check for user CQs in driver's
> poll_cq and complete the resize operation for user CQs.
> Updating uverbs_cmd_mask with poll_cq to support this.
> 
> User library changes are available in this pull request.
> https://github.com/linux-rdma/rdma-core/pull/1315
> 
> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
> ---
>  drivers/infiniband/hw/bnxt_re/ib_verbs.c | 109 +++++++++++++++++++++++++++++++
>  drivers/infiniband/hw/bnxt_re/ib_verbs.h |   3 +
>  drivers/infiniband/hw/bnxt_re/main.c     |   2 +
>  drivers/infiniband/hw/bnxt_re/qplib_fp.c |  44 +++++++++++++
>  drivers/infiniband/hw/bnxt_re/qplib_fp.h |   5 ++
>  include/uapi/rdma/bnxt_re-abi.h          |   4 ++
>  6 files changed, 167 insertions(+)
> 
> diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> index 989edc7..e86afec 100644
> --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> @@ -2912,6 +2912,106 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
>  	return rc;
>  }
>  
> +static void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq)
> +{
> +	struct bnxt_re_dev *rdev = cq->rdev;
> +
> +	bnxt_qplib_resize_cq_complete(&rdev->qplib_res, &cq->qplib_cq);
> +
> +	cq->qplib_cq.max_wqe = cq->resize_cqe;
> +	if (cq->resize_umem) {
> +		ib_umem_release(cq->umem);
> +		cq->umem = cq->resize_umem;
> +		cq->resize_umem = NULL;
> +		cq->resize_cqe = 0;
> +	}
> +}
> +
> +int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
> +{
> +	struct bnxt_qplib_sg_info sg_info = {};
> +	struct bnxt_qplib_dpi *orig_dpi = NULL;
> +	struct bnxt_qplib_dev_attr *dev_attr;
> +	struct bnxt_re_ucontext *uctx = NULL;
> +	struct bnxt_re_resize_cq_req req;
> +	struct bnxt_re_dev *rdev;
> +	struct bnxt_re_cq *cq;
> +	int rc, entries;
> +
> +	cq =  container_of(ibcq, struct bnxt_re_cq, ib_cq);
> +	rdev = cq->rdev;
> +	dev_attr = &rdev->dev_attr;
> +	if (!ibcq->uobject) {
> +		ibdev_err(&rdev->ibdev, "Kernel CQ Resize not supported");
> +		return -EOPNOTSUPP;
> +	}
> +
> +	if (cq->resize_umem) {
> +		ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - Busy",
> +			  cq->qplib_cq.id);
> +		return -EBUSY;
> +	}

Does above cq->resize_umem checking has any conconcurrent protection
again the bnxt_re_resize_cq_complete() called by bnxt_re_poll_cq()?

bnxt_re_resize_cq() seems like a control path operation, while
bnxt_re_poll_cq() seems like a data path operation, I am not sure
there is any conconcurrent protection between them.

> +
> +	/* Check the requested cq depth out of supported depth */
> +	if (cqe < 1 || cqe > dev_attr->max_cq_wqes) {
> +		ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - out of range cqe %d",
> +			  cq->qplib_cq.id, cqe);
> +		return -EINVAL;
> +	}
> +
> +	entries = roundup_pow_of_two(cqe + 1);
> +	if (entries > dev_attr->max_cq_wqes + 1)
> +		entries = dev_attr->max_cq_wqes + 1;
> +
> +	uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext,
> +					 ib_uctx);
> +	/* uverbs consumer */
> +	if (ib_copy_from_udata(&req, udata, sizeof(req))) {
> +		rc = -EFAULT;
> +		goto fail;
> +	}
> +
> +	cq->resize_umem = ib_umem_get(&rdev->ibdev, req.cq_va,
> +				      entries * sizeof(struct cq_base),
> +				      IB_ACCESS_LOCAL_WRITE);
> +	if (IS_ERR(cq->resize_umem)) {
> +		rc = PTR_ERR(cq->resize_umem);
> +		cq->resize_umem = NULL;
> +		ibdev_err(&rdev->ibdev, "%s: ib_umem_get failed! rc = %d\n",
> +			  __func__, rc);
> +		goto fail;
> +	}
> +	cq->resize_cqe = entries;
> +	memcpy(&sg_info, &cq->qplib_cq.sg_info, sizeof(sg_info));
> +	orig_dpi = cq->qplib_cq.dpi;
> +
> +	cq->qplib_cq.sg_info.umem = cq->resize_umem;
> +	cq->qplib_cq.sg_info.pgsize = PAGE_SIZE;
> +	cq->qplib_cq.sg_info.pgshft = PAGE_SHIFT;
> +	cq->qplib_cq.dpi = &uctx->dpi;
> +
> +	rc = bnxt_qplib_resize_cq(&rdev->qplib_res, &cq->qplib_cq, entries);
> +	if (rc) {
> +		ibdev_err(&rdev->ibdev, "Resize HW CQ %#x failed!",
> +			  cq->qplib_cq.id);
> +		goto fail;
> +	}
> +
> +	cq->ib_cq.cqe = cq->resize_cqe;
> +
> +	return 0;
> +
> +fail:
> +	if (cq->resize_umem) {
> +		ib_umem_release(cq->resize_umem);
> +		cq->resize_umem = NULL;
> +		cq->resize_cqe = 0;
> +		memcpy(&cq->qplib_cq.sg_info, &sg_info, sizeof(sg_info));
> +		cq->qplib_cq.dpi = orig_dpi;
> +	}
> +	return rc;
> +}
> +
>  static u8 __req_to_ib_wc_status(u8 qstatus)
>  {
>  	switch (qstatus) {
> @@ -3425,6 +3525,15 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
>  	struct bnxt_re_sqp_entries *sqp_entry = NULL;
>  	unsigned long flags;
>  
> +	/* User CQ; the only processing we do is to
> +	 * complete any pending CQ resize operation.
> +	 */
> +	if (cq->umem) {
> +		if (cq->resize_umem)
> +			bnxt_re_resize_cq_complete(cq);
> +		return 0;
> +	}
> +
>  	spin_lock_irqsave(&cq->cq_lock, flags);
>  	budget = min_t(u32, num_entries, cq->max_cql);
>  	num_entries = budget;
> diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
> index 9432626..31f7e34 100644
> --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
> +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
> @@ -104,6 +104,8 @@ struct bnxt_re_cq {
>  #define MAX_CQL_PER_POLL	1024
>  	u32			max_cql;
>  	struct ib_umem		*umem;
> +	struct ib_umem		*resize_umem;
> +	int			resize_cqe;
>  };
>  
>  struct bnxt_re_mr {
> @@ -191,6 +193,7 @@ int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
>  		      const struct ib_recv_wr **bad_recv_wr);
>  int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
>  		      struct ib_udata *udata);
> +int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
>  int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
>  int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
>  int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
> diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
> index c5867e7..48bbba7 100644
> --- a/drivers/infiniband/hw/bnxt_re/main.c
> +++ b/drivers/infiniband/hw/bnxt_re/main.c
> @@ -553,6 +553,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
>  	.query_srq = bnxt_re_query_srq,
>  	.reg_user_mr = bnxt_re_reg_user_mr,
>  	.req_notify_cq = bnxt_re_req_notify_cq,
> +	.resize_cq = bnxt_re_resize_cq,
>  	INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah),
>  	INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq),
>  	INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
> @@ -584,6 +585,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
>  		return ret;
>  
>  	dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX);
> +	ibdev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ);
>  	return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev);
>  }
>  
> diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
> index 96e581c..1d769a3 100644
> --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
> +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
> @@ -2100,6 +2100,50 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
>  	return rc;
>  }
>  
> +void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res,
> +				   struct bnxt_qplib_cq *cq)
> +{
> +	bnxt_qplib_free_hwq(res, &cq->hwq);
> +	memcpy(&cq->hwq, &cq->resize_hwq, sizeof(cq->hwq));
> +}
> +
> +int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq,
> +			 int new_cqes)
> +{
> +	struct bnxt_qplib_hwq_attr hwq_attr = {};
> +	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
> +	struct creq_resize_cq_resp resp = {};
> +	struct cmdq_resize_cq req = {};
> +	struct bnxt_qplib_pbl *pbl;
> +	u32 pg_sz, lvl, new_sz;
> +	u16 cmd_flags = 0;
> +	int rc;
> +
> +	RCFW_CMD_PREP(req, RESIZE_CQ, cmd_flags);
> +	hwq_attr.sginfo = &cq->sg_info;
> +	hwq_attr.res = res;
> +	hwq_attr.depth = new_cqes;
> +	hwq_attr.stride = sizeof(struct cq_base);
> +	hwq_attr.type = HWQ_TYPE_QUEUE;
> +	rc = bnxt_qplib_alloc_init_hwq(&cq->resize_hwq, &hwq_attr);
> +	if (rc)
> +		return rc;
> +
> +	req.cq_cid = cpu_to_le32(cq->id);
> +	pbl = &cq->resize_hwq.pbl[PBL_LVL_0];
> +	pg_sz = bnxt_qplib_base_pg_size(&cq->resize_hwq);
> +	lvl = (cq->resize_hwq.level << CMDQ_RESIZE_CQ_LVL_SFT) &
> +				       CMDQ_RESIZE_CQ_LVL_MASK;
> +	new_sz = (new_cqes << CMDQ_RESIZE_CQ_NEW_CQ_SIZE_SFT) &
> +		  CMDQ_RESIZE_CQ_NEW_CQ_SIZE_MASK;
> +	req.new_cq_size_pg_size_lvl = cpu_to_le32(new_sz | pg_sz | lvl);
> +	req.new_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
> +
> +	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
> +					  (void *)&resp, NULL, 0);
> +	return rc;

	return bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
					    (void *)&resp, NULL, 0);

> +}
> +
>  int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
>  {
>  	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
> diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
> index 0375019..d74d5ea 100644
> --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
> +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
> @@ -400,6 +400,7 @@ struct bnxt_qplib_cq {
>  	u16				count;
>  	u16				period;
>  	struct bnxt_qplib_hwq		hwq;
> +	struct bnxt_qplib_hwq		resize_hwq;
>  	u32				cnq_hw_ring_id;
>  	struct bnxt_qplib_nq		*nq;
>  	bool				resize_in_progress;
> @@ -532,6 +533,10 @@ void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp);
>  int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
>  			 struct bnxt_qplib_swqe *wqe);
>  int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
> +int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq,
> +			 int new_cqes);
> +void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res,
> +				   struct bnxt_qplib_cq *cq);
>  int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
>  int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
>  		       int num, struct bnxt_qplib_qp **qp);
> diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
> index b1de99b..f761e75 100644
> --- a/include/uapi/rdma/bnxt_re-abi.h
> +++ b/include/uapi/rdma/bnxt_re-abi.h
> @@ -96,6 +96,10 @@ struct bnxt_re_cq_resp {
>  	__u32 rsvd;
>  };
>  
> +struct bnxt_re_resize_cq_req {
> +	__u64 cq_va;
> +};
> +
>  struct bnxt_re_qp_req {
>  	__aligned_u64 qpsva;
>  	__aligned_u64 qprva;
>
Selvin Xavier March 17, 2023, 10:17 a.m. UTC | #2
On Fri, Mar 17, 2023 at 2:40 PM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>
> On 2023/3/15 16:16, Selvin Xavier wrote:
> > Add resize_cq verb support for user space CQs. Resize operation for
> > kernel CQs are not supported now.
> >
> > Driver should free the current CQ only after user library polls
> > for all the completions and switch to new CQ. So after the resize_cq
> > is returned from the driver, user libray polls for existing completions
>
> libray -> library
>
> > and store it as temporary data. Once library reaps all completions in the
> > current CQ, it invokes the ibv_cmd_poll_cq to inform the driver about
> > the resize_cq completion. Adding a check for user CQs in driver's
> > poll_cq and complete the resize operation for user CQs.
> > Updating uverbs_cmd_mask with poll_cq to support this.
> >
> > User library changes are available in this pull request.
> > https://github.com/linux-rdma/rdma-core/pull/1315
> >
> > Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
> > ---
> >  drivers/infiniband/hw/bnxt_re/ib_verbs.c | 109 +++++++++++++++++++++++++++++++
> >  drivers/infiniband/hw/bnxt_re/ib_verbs.h |   3 +
> >  drivers/infiniband/hw/bnxt_re/main.c     |   2 +
> >  drivers/infiniband/hw/bnxt_re/qplib_fp.c |  44 +++++++++++++
> >  drivers/infiniband/hw/bnxt_re/qplib_fp.h |   5 ++
> >  include/uapi/rdma/bnxt_re-abi.h          |   4 ++
> >  6 files changed, 167 insertions(+)
> >
> > diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> > index 989edc7..e86afec 100644
> > --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> > +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> > @@ -2912,6 +2912,106 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> >       return rc;
> >  }
> >
> > +static void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq)
> > +{
> > +     struct bnxt_re_dev *rdev = cq->rdev;
> > +
> > +     bnxt_qplib_resize_cq_complete(&rdev->qplib_res, &cq->qplib_cq);
> > +
> > +     cq->qplib_cq.max_wqe = cq->resize_cqe;
> > +     if (cq->resize_umem) {
> > +             ib_umem_release(cq->umem);
> > +             cq->umem = cq->resize_umem;
> > +             cq->resize_umem = NULL;
> > +             cq->resize_cqe = 0;
> > +     }
> > +}
> > +
> > +int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
> > +{
> > +     struct bnxt_qplib_sg_info sg_info = {};
> > +     struct bnxt_qplib_dpi *orig_dpi = NULL;
> > +     struct bnxt_qplib_dev_attr *dev_attr;
> > +     struct bnxt_re_ucontext *uctx = NULL;
> > +     struct bnxt_re_resize_cq_req req;
> > +     struct bnxt_re_dev *rdev;
> > +     struct bnxt_re_cq *cq;
> > +     int rc, entries;
> > +
> > +     cq =  container_of(ibcq, struct bnxt_re_cq, ib_cq);
> > +     rdev = cq->rdev;
> > +     dev_attr = &rdev->dev_attr;
> > +     if (!ibcq->uobject) {
> > +             ibdev_err(&rdev->ibdev, "Kernel CQ Resize not supported");
> > +             return -EOPNOTSUPP;
> > +     }
> > +
> > +     if (cq->resize_umem) {
> > +             ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - Busy",
> > +                       cq->qplib_cq.id);
> > +             return -EBUSY;
> > +     }
>
> Does above cq->resize_umem checking has any conconcurrent protection
> again the bnxt_re_resize_cq_complete() called by bnxt_re_poll_cq()?
>
> bnxt_re_resize_cq() seems like a control path operation, while
> bnxt_re_poll_cq() seems like a data path operation, I am not sure
> there is any conconcurrent protection between them.
>
The previous check is to prevent simultaneous  resize_cq context from
the user application.

 if you see the library implementation (PR
https://github.com/linux-rdma/rdma-core/pull/1315), entire operation
is done in the single resize_cq context from application
i.e.
bnxt_re_resize_cq
 -> ibv_cmd_resize_cq
                  call driver bnxt_re_resize_cq and return
  -> poll out the current completions and store it in a user lib list
  -> issue an ibv_cmd_poll_cq.
        This will invoke bnxt_re_poll_cq in the kernel driver. We free
the previous cq resources.

So the synchronization between resize_cq and poll_cq is happening in
the user lib. We can free the old CQ  only after user land sees the
final Completion on the previous CQ. So we return from the driver's
bnxt_re_resize_cq and then poll for all completions and then use
ibv_cmd_poll_cq as a hook to reach the kernel driver and free the old
CQ's resource.

Summarize, driver's bnxt_re_poll_cq for user space CQs will be called
only from resize_cq context and no concurrent protection required in
the driver.

> > +
> > +     /* Check the requested cq depth out of supported depth */
> > +     if (cqe < 1 || cqe > dev_attr->max_cq_wqes) {
> > +             ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - out of range cqe %d",
> > +                       cq->qplib_cq.id, cqe);
> > +             return -EINVAL;
> > +     }
> > +
> > +     entries = roundup_pow_of_two(cqe + 1);
> > +     if (entries > dev_attr->max_cq_wqes + 1)
> > +             entries = dev_attr->max_cq_wqes + 1;
> > +
> > +     uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext,
> > +                                      ib_uctx);
> > +     /* uverbs consumer */
> > +     if (ib_copy_from_udata(&req, udata, sizeof(req))) {
> > +             rc = -EFAULT;
> > +             goto fail;
> > +     }
> > +
> > +     cq->resize_umem = ib_umem_get(&rdev->ibdev, req.cq_va,
> > +                                   entries * sizeof(struct cq_base),
> > +                                   IB_ACCESS_LOCAL_WRITE);
> > +     if (IS_ERR(cq->resize_umem)) {
> > +             rc = PTR_ERR(cq->resize_umem);
> > +             cq->resize_umem = NULL;
> > +             ibdev_err(&rdev->ibdev, "%s: ib_umem_get failed! rc = %d\n",
> > +                       __func__, rc);
> > +             goto fail;
> > +     }
> > +     cq->resize_cqe = entries;
> > +     memcpy(&sg_info, &cq->qplib_cq.sg_info, sizeof(sg_info));
> > +     orig_dpi = cq->qplib_cq.dpi;
> > +
> > +     cq->qplib_cq.sg_info.umem = cq->resize_umem;
> > +     cq->qplib_cq.sg_info.pgsize = PAGE_SIZE;
> > +     cq->qplib_cq.sg_info.pgshft = PAGE_SHIFT;
> > +     cq->qplib_cq.dpi = &uctx->dpi;
> > +
> > +     rc = bnxt_qplib_resize_cq(&rdev->qplib_res, &cq->qplib_cq, entries);
> > +     if (rc) {
> > +             ibdev_err(&rdev->ibdev, "Resize HW CQ %#x failed!",
> > +                       cq->qplib_cq.id);
> > +             goto fail;
> > +     }
> > +
> > +     cq->ib_cq.cqe = cq->resize_cqe;
> > +
> > +     return 0;
> > +
> > +fail:
> > +     if (cq->resize_umem) {
> > +             ib_umem_release(cq->resize_umem);
> > +             cq->resize_umem = NULL;
> > +             cq->resize_cqe = 0;
> > +             memcpy(&cq->qplib_cq.sg_info, &sg_info, sizeof(sg_info));
> > +             cq->qplib_cq.dpi = orig_dpi;
> > +     }
> > +     return rc;
> > +}
> > +
> >  static u8 __req_to_ib_wc_status(u8 qstatus)
> >  {
> >       switch (qstatus) {
> > @@ -3425,6 +3525,15 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
> >       struct bnxt_re_sqp_entries *sqp_entry = NULL;
> >       unsigned long flags;
> >
> > +     /* User CQ; the only processing we do is to
> > +      * complete any pending CQ resize operation.
> > +      */
> > +     if (cq->umem) {
> > +             if (cq->resize_umem)
> > +                     bnxt_re_resize_cq_complete(cq);
> > +             return 0;
> > +     }
> > +
> >       spin_lock_irqsave(&cq->cq_lock, flags);
> >       budget = min_t(u32, num_entries, cq->max_cql);
> >       num_entries = budget;
> > diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
> > index 9432626..31f7e34 100644
> > --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
> > +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
> > @@ -104,6 +104,8 @@ struct bnxt_re_cq {
> >  #define MAX_CQL_PER_POLL     1024
> >       u32                     max_cql;
> >       struct ib_umem          *umem;
> > +     struct ib_umem          *resize_umem;
> > +     int                     resize_cqe;
> >  };
> >
> >  struct bnxt_re_mr {
> > @@ -191,6 +193,7 @@ int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
> >                     const struct ib_recv_wr **bad_recv_wr);
> >  int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> >                     struct ib_udata *udata);
> > +int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
> >  int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
> >  int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
> >  int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
> > diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
> > index c5867e7..48bbba7 100644
> > --- a/drivers/infiniband/hw/bnxt_re/main.c
> > +++ b/drivers/infiniband/hw/bnxt_re/main.c
> > @@ -553,6 +553,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
> >       .query_srq = bnxt_re_query_srq,
> >       .reg_user_mr = bnxt_re_reg_user_mr,
> >       .req_notify_cq = bnxt_re_req_notify_cq,
> > +     .resize_cq = bnxt_re_resize_cq,
> >       INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah),
> >       INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq),
> >       INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
> > @@ -584,6 +585,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
> >               return ret;
> >
> >       dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX);
> > +     ibdev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ);
> >       return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev);
> >  }
> >
> > diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
> > index 96e581c..1d769a3 100644
> > --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
> > +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
> > @@ -2100,6 +2100,50 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
> >       return rc;
> >  }
> >
> > +void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res,
> > +                                struct bnxt_qplib_cq *cq)
> > +{
> > +     bnxt_qplib_free_hwq(res, &cq->hwq);
> > +     memcpy(&cq->hwq, &cq->resize_hwq, sizeof(cq->hwq));
> > +}
> > +
> > +int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq,
> > +                      int new_cqes)
> > +{
> > +     struct bnxt_qplib_hwq_attr hwq_attr = {};
> > +     struct bnxt_qplib_rcfw *rcfw = res->rcfw;
> > +     struct creq_resize_cq_resp resp = {};
> > +     struct cmdq_resize_cq req = {};
> > +     struct bnxt_qplib_pbl *pbl;
> > +     u32 pg_sz, lvl, new_sz;
> > +     u16 cmd_flags = 0;
> > +     int rc;
> > +
> > +     RCFW_CMD_PREP(req, RESIZE_CQ, cmd_flags);
> > +     hwq_attr.sginfo = &cq->sg_info;
> > +     hwq_attr.res = res;
> > +     hwq_attr.depth = new_cqes;
> > +     hwq_attr.stride = sizeof(struct cq_base);
> > +     hwq_attr.type = HWQ_TYPE_QUEUE;
> > +     rc = bnxt_qplib_alloc_init_hwq(&cq->resize_hwq, &hwq_attr);
> > +     if (rc)
> > +             return rc;
> > +
> > +     req.cq_cid = cpu_to_le32(cq->id);
> > +     pbl = &cq->resize_hwq.pbl[PBL_LVL_0];
> > +     pg_sz = bnxt_qplib_base_pg_size(&cq->resize_hwq);
> > +     lvl = (cq->resize_hwq.level << CMDQ_RESIZE_CQ_LVL_SFT) &
> > +                                    CMDQ_RESIZE_CQ_LVL_MASK;
> > +     new_sz = (new_cqes << CMDQ_RESIZE_CQ_NEW_CQ_SIZE_SFT) &
> > +               CMDQ_RESIZE_CQ_NEW_CQ_SIZE_MASK;
> > +     req.new_cq_size_pg_size_lvl = cpu_to_le32(new_sz | pg_sz | lvl);
> > +     req.new_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
> > +
> > +     rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
> > +                                       (void *)&resp, NULL, 0);
> > +     return rc;
>
>         return bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
>                                             (void *)&resp, NULL, 0);
>
> > +}
> > +
> >  int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
> >  {
> >       struct bnxt_qplib_rcfw *rcfw = res->rcfw;
> > diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
> > index 0375019..d74d5ea 100644
> > --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
> > +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
> > @@ -400,6 +400,7 @@ struct bnxt_qplib_cq {
> >       u16                             count;
> >       u16                             period;
> >       struct bnxt_qplib_hwq           hwq;
> > +     struct bnxt_qplib_hwq           resize_hwq;
> >       u32                             cnq_hw_ring_id;
> >       struct bnxt_qplib_nq            *nq;
> >       bool                            resize_in_progress;
> > @@ -532,6 +533,10 @@ void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp);
> >  int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
> >                        struct bnxt_qplib_swqe *wqe);
> >  int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
> > +int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq,
> > +                      int new_cqes);
> > +void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res,
> > +                                struct bnxt_qplib_cq *cq);
> >  int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
> >  int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
> >                      int num, struct bnxt_qplib_qp **qp);
> > diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
> > index b1de99b..f761e75 100644
> > --- a/include/uapi/rdma/bnxt_re-abi.h
> > +++ b/include/uapi/rdma/bnxt_re-abi.h
> > @@ -96,6 +96,10 @@ struct bnxt_re_cq_resp {
> >       __u32 rsvd;
> >  };
> >
> > +struct bnxt_re_resize_cq_req {
> > +     __u64 cq_va;
> > +};
> > +
> >  struct bnxt_re_qp_req {
> >       __aligned_u64 qpsva;
> >       __aligned_u64 qprva;
> >
Yunsheng Lin March 17, 2023, 12:23 p.m. UTC | #3
On 2023/3/17 18:17, Selvin Xavier wrote:
> On Fri, Mar 17, 2023 at 2:40 PM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>>
>> On 2023/3/15 16:16, Selvin Xavier wrote:
>>> Add resize_cq verb support for user space CQs. Resize operation for
>>> kernel CQs are not supported now.
>>>
>>> Driver should free the current CQ only after user library polls
>>> for all the completions and switch to new CQ. So after the resize_cq
>>> is returned from the driver, user libray polls for existing completions
>>
>> libray -> library
>>
>>> and store it as temporary data. Once library reaps all completions in the
>>> current CQ, it invokes the ibv_cmd_poll_cq to inform the driver about
>>> the resize_cq completion. Adding a check for user CQs in driver's
>>> poll_cq and complete the resize operation for user CQs.
>>> Updating uverbs_cmd_mask with poll_cq to support this.
>>>
>>> User library changes are available in this pull request.
>>> https://github.com/linux-rdma/rdma-core/pull/1315
>>>
>>> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
>>> ---
>>>  drivers/infiniband/hw/bnxt_re/ib_verbs.c | 109 +++++++++++++++++++++++++++++++
>>>  drivers/infiniband/hw/bnxt_re/ib_verbs.h |   3 +
>>>  drivers/infiniband/hw/bnxt_re/main.c     |   2 +
>>>  drivers/infiniband/hw/bnxt_re/qplib_fp.c |  44 +++++++++++++
>>>  drivers/infiniband/hw/bnxt_re/qplib_fp.h |   5 ++
>>>  include/uapi/rdma/bnxt_re-abi.h          |   4 ++
>>>  6 files changed, 167 insertions(+)
>>>
>>> diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
>>> index 989edc7..e86afec 100644
>>> --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
>>> +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
>>> @@ -2912,6 +2912,106 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
>>>       return rc;
>>>  }
>>>
>>> +static void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq)
>>> +{
>>> +     struct bnxt_re_dev *rdev = cq->rdev;
>>> +
>>> +     bnxt_qplib_resize_cq_complete(&rdev->qplib_res, &cq->qplib_cq);
>>> +
>>> +     cq->qplib_cq.max_wqe = cq->resize_cqe;
>>> +     if (cq->resize_umem) {
>>> +             ib_umem_release(cq->umem);
>>> +             cq->umem = cq->resize_umem;
>>> +             cq->resize_umem = NULL;
>>> +             cq->resize_cqe = 0;
>>> +     }
>>> +}
>>> +
>>> +int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
>>> +{
>>> +     struct bnxt_qplib_sg_info sg_info = {};
>>> +     struct bnxt_qplib_dpi *orig_dpi = NULL;
>>> +     struct bnxt_qplib_dev_attr *dev_attr;
>>> +     struct bnxt_re_ucontext *uctx = NULL;
>>> +     struct bnxt_re_resize_cq_req req;
>>> +     struct bnxt_re_dev *rdev;
>>> +     struct bnxt_re_cq *cq;
>>> +     int rc, entries;
>>> +
>>> +     cq =  container_of(ibcq, struct bnxt_re_cq, ib_cq);
>>> +     rdev = cq->rdev;
>>> +     dev_attr = &rdev->dev_attr;
>>> +     if (!ibcq->uobject) {
>>> +             ibdev_err(&rdev->ibdev, "Kernel CQ Resize not supported");
>>> +             return -EOPNOTSUPP;
>>> +     }
>>> +
>>> +     if (cq->resize_umem) {
>>> +             ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - Busy",
>>> +                       cq->qplib_cq.id);
>>> +             return -EBUSY;
>>> +     }
>>
>> Does above cq->resize_umem checking has any conconcurrent protection
>> again the bnxt_re_resize_cq_complete() called by bnxt_re_poll_cq()?
>>
>> bnxt_re_resize_cq() seems like a control path operation, while
>> bnxt_re_poll_cq() seems like a data path operation, I am not sure
>> there is any conconcurrent protection between them.
>>
> The previous check is to prevent simultaneous  resize_cq context from
> the user application.
> 
>  if you see the library implementation (PR
> https://github.com/linux-rdma/rdma-core/pull/1315), entire operation
> is done in the single resize_cq context from application
> i.e.
> bnxt_re_resize_cq
>  -> ibv_cmd_resize_cq
>                   call driver bnxt_re_resize_cq and return
>   -> poll out the current completions and store it in a user lib list
>   -> issue an ibv_cmd_poll_cq.
>         This will invoke bnxt_re_poll_cq in the kernel driver. We free
> the previous cq resources.
> 
> So the synchronization between resize_cq and poll_cq is happening in
> the user lib. We can free the old CQ  only after user land sees the
> final Completion on the previous CQ. So we return from the driver's
> bnxt_re_resize_cq and then poll for all completions and then use
> ibv_cmd_poll_cq as a hook to reach the kernel driver and free the old
> CQ's resource.
> 
> Summarize, driver's bnxt_re_poll_cq for user space CQs will be called
> only from resize_cq context and no concurrent protection required in
> the driver.

Is it acceptable to depend on the user space code to ensure the correct
order in the kernel space?
Isn't that may utilized by some malicious user to crash the kernel?
Selvin Xavier March 19, 2023, 2:07 a.m. UTC | #4
On Fri, Mar 17, 2023 at 5:53 PM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>
> On 2023/3/17 18:17, Selvin Xavier wrote:
> > On Fri, Mar 17, 2023 at 2:40 PM Yunsheng Lin <linyunsheng@huawei.com> wrote:
> >>
> >> On 2023/3/15 16:16, Selvin Xavier wrote:
> >>> Add resize_cq verb support for user space CQs. Resize operation for
> >>> kernel CQs are not supported now.
> >>>
> >>> Driver should free the current CQ only after user library polls
> >>> for all the completions and switch to new CQ. So after the resize_cq
> >>> is returned from the driver, user libray polls for existing completions
> >>
> >> libray -> library
> >>
> >>> and store it as temporary data. Once library reaps all completions in the
> >>> current CQ, it invokes the ibv_cmd_poll_cq to inform the driver about
> >>> the resize_cq completion. Adding a check for user CQs in driver's
> >>> poll_cq and complete the resize operation for user CQs.
> >>> Updating uverbs_cmd_mask with poll_cq to support this.
> >>>
> >>> User library changes are available in this pull request.
> >>> https://github.com/linux-rdma/rdma-core/pull/1315
> >>>
> >>> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
> >>> ---
> >>>  drivers/infiniband/hw/bnxt_re/ib_verbs.c | 109 +++++++++++++++++++++++++++++++
> >>>  drivers/infiniband/hw/bnxt_re/ib_verbs.h |   3 +
> >>>  drivers/infiniband/hw/bnxt_re/main.c     |   2 +
> >>>  drivers/infiniband/hw/bnxt_re/qplib_fp.c |  44 +++++++++++++
> >>>  drivers/infiniband/hw/bnxt_re/qplib_fp.h |   5 ++
> >>>  include/uapi/rdma/bnxt_re-abi.h          |   4 ++
> >>>  6 files changed, 167 insertions(+)
> >>>
> >>> diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> >>> index 989edc7..e86afec 100644
> >>> --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> >>> +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> >>> @@ -2912,6 +2912,106 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> >>>       return rc;
> >>>  }
> >>>
> >>> +static void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq)
> >>> +{
> >>> +     struct bnxt_re_dev *rdev = cq->rdev;
> >>> +
> >>> +     bnxt_qplib_resize_cq_complete(&rdev->qplib_res, &cq->qplib_cq);
> >>> +
> >>> +     cq->qplib_cq.max_wqe = cq->resize_cqe;
> >>> +     if (cq->resize_umem) {
> >>> +             ib_umem_release(cq->umem);
> >>> +             cq->umem = cq->resize_umem;
> >>> +             cq->resize_umem = NULL;
> >>> +             cq->resize_cqe = 0;
> >>> +     }
> >>> +}
> >>> +
> >>> +int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
> >>> +{
> >>> +     struct bnxt_qplib_sg_info sg_info = {};
> >>> +     struct bnxt_qplib_dpi *orig_dpi = NULL;
> >>> +     struct bnxt_qplib_dev_attr *dev_attr;
> >>> +     struct bnxt_re_ucontext *uctx = NULL;
> >>> +     struct bnxt_re_resize_cq_req req;
> >>> +     struct bnxt_re_dev *rdev;
> >>> +     struct bnxt_re_cq *cq;
> >>> +     int rc, entries;
> >>> +
> >>> +     cq =  container_of(ibcq, struct bnxt_re_cq, ib_cq);
> >>> +     rdev = cq->rdev;
> >>> +     dev_attr = &rdev->dev_attr;
> >>> +     if (!ibcq->uobject) {
> >>> +             ibdev_err(&rdev->ibdev, "Kernel CQ Resize not supported");
> >>> +             return -EOPNOTSUPP;
> >>> +     }
> >>> +
> >>> +     if (cq->resize_umem) {
> >>> +             ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - Busy",
> >>> +                       cq->qplib_cq.id);
> >>> +             return -EBUSY;
> >>> +     }
> >>
> >> Does above cq->resize_umem checking has any conconcurrent protection
> >> again the bnxt_re_resize_cq_complete() called by bnxt_re_poll_cq()?
> >>
> >> bnxt_re_resize_cq() seems like a control path operation, while
> >> bnxt_re_poll_cq() seems like a data path operation, I am not sure
> >> there is any conconcurrent protection between them.
> >>
> > The previous check is to prevent simultaneous  resize_cq context from
> > the user application.
> >
> >  if you see the library implementation (PR
> > https://github.com/linux-rdma/rdma-core/pull/1315), entire operation
> > is done in the single resize_cq context from application
> > i.e.
> > bnxt_re_resize_cq
> >  -> ibv_cmd_resize_cq
> >                   call driver bnxt_re_resize_cq and return
> >   -> poll out the current completions and store it in a user lib list
> >   -> issue an ibv_cmd_poll_cq.
> >         This will invoke bnxt_re_poll_cq in the kernel driver. We free
> > the previous cq resources.
> >
> > So the synchronization between resize_cq and poll_cq is happening in
> > the user lib. We can free the old CQ  only after user land sees the
> > final Completion on the previous CQ. So we return from the driver's
> > bnxt_re_resize_cq and then poll for all completions and then use
> > ibv_cmd_poll_cq as a hook to reach the kernel driver and free the old
> > CQ's resource.
> >
> > Summarize, driver's bnxt_re_poll_cq for user space CQs will be called
> > only from resize_cq context and no concurrent protection required in
> > the driver.
>
> Is it acceptable to depend on the user space code to ensure the correct
> order in the kernel space?
> Isn't that may utilized by some malicious user to crash the kernel?
>
The user space code I mentioned is implemented in the provider/bnxt_re
user library just
like any other verb interface. Apps shall always go through this
library to send request
to the provider driver.
Leon Romanovsky March 19, 2023, 12:02 p.m. UTC | #5
On Sun, Mar 19, 2023 at 07:37:45AM +0530, Selvin Xavier wrote:
> On Fri, Mar 17, 2023 at 5:53 PM Yunsheng Lin <linyunsheng@huawei.com> wrote:
> >
> > On 2023/3/17 18:17, Selvin Xavier wrote:
> > > On Fri, Mar 17, 2023 at 2:40 PM Yunsheng Lin <linyunsheng@huawei.com> wrote:
> > >>
> > >> On 2023/3/15 16:16, Selvin Xavier wrote:
> > >>> Add resize_cq verb support for user space CQs. Resize operation for
> > >>> kernel CQs are not supported now.
> > >>>
> > >>> Driver should free the current CQ only after user library polls
> > >>> for all the completions and switch to new CQ. So after the resize_cq
> > >>> is returned from the driver, user libray polls for existing completions
> > >>
> > >> libray -> library
> > >>
> > >>> and store it as temporary data. Once library reaps all completions in the
> > >>> current CQ, it invokes the ibv_cmd_poll_cq to inform the driver about
> > >>> the resize_cq completion. Adding a check for user CQs in driver's
> > >>> poll_cq and complete the resize operation for user CQs.
> > >>> Updating uverbs_cmd_mask with poll_cq to support this.
> > >>>
> > >>> User library changes are available in this pull request.
> > >>> https://github.com/linux-rdma/rdma-core/pull/1315
> > >>>
> > >>> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
> > >>> ---
> > >>>  drivers/infiniband/hw/bnxt_re/ib_verbs.c | 109 +++++++++++++++++++++++++++++++
> > >>>  drivers/infiniband/hw/bnxt_re/ib_verbs.h |   3 +
> > >>>  drivers/infiniband/hw/bnxt_re/main.c     |   2 +
> > >>>  drivers/infiniband/hw/bnxt_re/qplib_fp.c |  44 +++++++++++++
> > >>>  drivers/infiniband/hw/bnxt_re/qplib_fp.h |   5 ++
> > >>>  include/uapi/rdma/bnxt_re-abi.h          |   4 ++
> > >>>  6 files changed, 167 insertions(+)
> > >>>
> > >>> diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> > >>> index 989edc7..e86afec 100644
> > >>> --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> > >>> +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> > >>> @@ -2912,6 +2912,106 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> > >>>       return rc;
> > >>>  }
> > >>>
> > >>> +static void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq)
> > >>> +{
> > >>> +     struct bnxt_re_dev *rdev = cq->rdev;
> > >>> +
> > >>> +     bnxt_qplib_resize_cq_complete(&rdev->qplib_res, &cq->qplib_cq);
> > >>> +
> > >>> +     cq->qplib_cq.max_wqe = cq->resize_cqe;
> > >>> +     if (cq->resize_umem) {
> > >>> +             ib_umem_release(cq->umem);
> > >>> +             cq->umem = cq->resize_umem;
> > >>> +             cq->resize_umem = NULL;
> > >>> +             cq->resize_cqe = 0;
> > >>> +     }
> > >>> +}
> > >>> +
> > >>> +int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
> > >>> +{
> > >>> +     struct bnxt_qplib_sg_info sg_info = {};
> > >>> +     struct bnxt_qplib_dpi *orig_dpi = NULL;
> > >>> +     struct bnxt_qplib_dev_attr *dev_attr;
> > >>> +     struct bnxt_re_ucontext *uctx = NULL;
> > >>> +     struct bnxt_re_resize_cq_req req;
> > >>> +     struct bnxt_re_dev *rdev;
> > >>> +     struct bnxt_re_cq *cq;
> > >>> +     int rc, entries;
> > >>> +
> > >>> +     cq =  container_of(ibcq, struct bnxt_re_cq, ib_cq);
> > >>> +     rdev = cq->rdev;
> > >>> +     dev_attr = &rdev->dev_attr;
> > >>> +     if (!ibcq->uobject) {
> > >>> +             ibdev_err(&rdev->ibdev, "Kernel CQ Resize not supported");
> > >>> +             return -EOPNOTSUPP;
> > >>> +     }
> > >>> +
> > >>> +     if (cq->resize_umem) {
> > >>> +             ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - Busy",
> > >>> +                       cq->qplib_cq.id);
> > >>> +             return -EBUSY;
> > >>> +     }
> > >>
> > >> Does above cq->resize_umem checking has any conconcurrent protection
> > >> again the bnxt_re_resize_cq_complete() called by bnxt_re_poll_cq()?
> > >>
> > >> bnxt_re_resize_cq() seems like a control path operation, while
> > >> bnxt_re_poll_cq() seems like a data path operation, I am not sure
> > >> there is any conconcurrent protection between them.
> > >>
> > > The previous check is to prevent simultaneous  resize_cq context from
> > > the user application.
> > >
> > >  if you see the library implementation (PR
> > > https://github.com/linux-rdma/rdma-core/pull/1315), entire operation
> > > is done in the single resize_cq context from application
> > > i.e.
> > > bnxt_re_resize_cq
> > >  -> ibv_cmd_resize_cq
> > >                   call driver bnxt_re_resize_cq and return
> > >   -> poll out the current completions and store it in a user lib list
> > >   -> issue an ibv_cmd_poll_cq.
> > >         This will invoke bnxt_re_poll_cq in the kernel driver. We free
> > > the previous cq resources.
> > >
> > > So the synchronization between resize_cq and poll_cq is happening in
> > > the user lib. We can free the old CQ  only after user land sees the
> > > final Completion on the previous CQ. So we return from the driver's
> > > bnxt_re_resize_cq and then poll for all completions and then use
> > > ibv_cmd_poll_cq as a hook to reach the kernel driver and free the old
> > > CQ's resource.
> > >
> > > Summarize, driver's bnxt_re_poll_cq for user space CQs will be called
> > > only from resize_cq context and no concurrent protection required in
> > > the driver.
> >
> > Is it acceptable to depend on the user space code to ensure the correct
> > order in the kernel space?
> > Isn't that may utilized by some malicious user to crash the kernel?
> >
> The user space code I mentioned is implemented in the provider/bnxt_re
> user library just
> like any other verb interface. Apps shall always go through this
> library to send request
> to the provider driver.

And if I use my own version of provider/bnxt_re, will kernel crash?

Thanks
Selvin Xavier March 19, 2023, 5:11 p.m. UTC | #6
On Sun, Mar 19, 2023 at 5:32 PM Leon Romanovsky <leon@kernel.org> wrote:
>
> On Sun, Mar 19, 2023 at 07:37:45AM +0530, Selvin Xavier wrote:
> > On Fri, Mar 17, 2023 at 5:53 PM Yunsheng Lin <linyunsheng@huawei.com> wrote:
> > >
> > > On 2023/3/17 18:17, Selvin Xavier wrote:
> > > > On Fri, Mar 17, 2023 at 2:40 PM Yunsheng Lin <linyunsheng@huawei.com> wrote:
> > > >>
> > > >> On 2023/3/15 16:16, Selvin Xavier wrote:
> > > >>> Add resize_cq verb support for user space CQs. Resize operation for
> > > >>> kernel CQs are not supported now.
> > > >>>
> > > >>> Driver should free the current CQ only after user library polls
> > > >>> for all the completions and switch to new CQ. So after the resize_cq
> > > >>> is returned from the driver, user libray polls for existing completions
> > > >>
> > > >> libray -> library
> > > >>
> > > >>> and store it as temporary data. Once library reaps all completions in the
> > > >>> current CQ, it invokes the ibv_cmd_poll_cq to inform the driver about
> > > >>> the resize_cq completion. Adding a check for user CQs in driver's
> > > >>> poll_cq and complete the resize operation for user CQs.
> > > >>> Updating uverbs_cmd_mask with poll_cq to support this.
> > > >>>
> > > >>> User library changes are available in this pull request.
> > > >>> https://github.com/linux-rdma/rdma-core/pull/1315
> > > >>>
> > > >>> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
> > > >>> ---
> > > >>>  drivers/infiniband/hw/bnxt_re/ib_verbs.c | 109 +++++++++++++++++++++++++++++++
> > > >>>  drivers/infiniband/hw/bnxt_re/ib_verbs.h |   3 +
> > > >>>  drivers/infiniband/hw/bnxt_re/main.c     |   2 +
> > > >>>  drivers/infiniband/hw/bnxt_re/qplib_fp.c |  44 +++++++++++++
> > > >>>  drivers/infiniband/hw/bnxt_re/qplib_fp.h |   5 ++
> > > >>>  include/uapi/rdma/bnxt_re-abi.h          |   4 ++
> > > >>>  6 files changed, 167 insertions(+)
> > > >>>
> > > >>> diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> > > >>> index 989edc7..e86afec 100644
> > > >>> --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> > > >>> +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> > > >>> @@ -2912,6 +2912,106 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> > > >>>       return rc;
> > > >>>  }
> > > >>>
> > > >>> +static void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq)
> > > >>> +{
> > > >>> +     struct bnxt_re_dev *rdev = cq->rdev;
> > > >>> +
> > > >>> +     bnxt_qplib_resize_cq_complete(&rdev->qplib_res, &cq->qplib_cq);
> > > >>> +
> > > >>> +     cq->qplib_cq.max_wqe = cq->resize_cqe;
> > > >>> +     if (cq->resize_umem) {
> > > >>> +             ib_umem_release(cq->umem);
> > > >>> +             cq->umem = cq->resize_umem;
> > > >>> +             cq->resize_umem = NULL;
> > > >>> +             cq->resize_cqe = 0;
> > > >>> +     }
> > > >>> +}
> > > >>> +
> > > >>> +int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
> > > >>> +{
> > > >>> +     struct bnxt_qplib_sg_info sg_info = {};
> > > >>> +     struct bnxt_qplib_dpi *orig_dpi = NULL;
> > > >>> +     struct bnxt_qplib_dev_attr *dev_attr;
> > > >>> +     struct bnxt_re_ucontext *uctx = NULL;
> > > >>> +     struct bnxt_re_resize_cq_req req;
> > > >>> +     struct bnxt_re_dev *rdev;
> > > >>> +     struct bnxt_re_cq *cq;
> > > >>> +     int rc, entries;
> > > >>> +
> > > >>> +     cq =  container_of(ibcq, struct bnxt_re_cq, ib_cq);
> > > >>> +     rdev = cq->rdev;
> > > >>> +     dev_attr = &rdev->dev_attr;
> > > >>> +     if (!ibcq->uobject) {
> > > >>> +             ibdev_err(&rdev->ibdev, "Kernel CQ Resize not supported");
> > > >>> +             return -EOPNOTSUPP;
> > > >>> +     }
> > > >>> +
> > > >>> +     if (cq->resize_umem) {
> > > >>> +             ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - Busy",
> > > >>> +                       cq->qplib_cq.id);
> > > >>> +             return -EBUSY;
> > > >>> +     }
> > > >>
> > > >> Does above cq->resize_umem checking has any conconcurrent protection
> > > >> again the bnxt_re_resize_cq_complete() called by bnxt_re_poll_cq()?
> > > >>
> > > >> bnxt_re_resize_cq() seems like a control path operation, while
> > > >> bnxt_re_poll_cq() seems like a data path operation, I am not sure
> > > >> there is any conconcurrent protection between them.
> > > >>
> > > > The previous check is to prevent simultaneous  resize_cq context from
> > > > the user application.
> > > >
> > > >  if you see the library implementation (PR
> > > > https://github.com/linux-rdma/rdma-core/pull/1315), entire operation
> > > > is done in the single resize_cq context from application
> > > > i.e.
> > > > bnxt_re_resize_cq
> > > >  -> ibv_cmd_resize_cq
> > > >                   call driver bnxt_re_resize_cq and return
> > > >   -> poll out the current completions and store it in a user lib list
> > > >   -> issue an ibv_cmd_poll_cq.
> > > >         This will invoke bnxt_re_poll_cq in the kernel driver. We free
> > > > the previous cq resources.
> > > >
> > > > So the synchronization between resize_cq and poll_cq is happening in
> > > > the user lib. We can free the old CQ  only after user land sees the
> > > > final Completion on the previous CQ. So we return from the driver's
> > > > bnxt_re_resize_cq and then poll for all completions and then use
> > > > ibv_cmd_poll_cq as a hook to reach the kernel driver and free the old
> > > > CQ's resource.
> > > >
> > > > Summarize, driver's bnxt_re_poll_cq for user space CQs will be called
> > > > only from resize_cq context and no concurrent protection required in
> > > > the driver.
> > >
> > > Is it acceptable to depend on the user space code to ensure the correct
> > > order in the kernel space?
> > > Isn't that may utilized by some malicious user to crash the kernel?
> > >
> > The user space code I mentioned is implemented in the provider/bnxt_re
> > user library just
> > like any other verb interface. Apps shall always go through this
> > library to send request
> > to the provider driver.
>
> And if I use my own version of provider/bnxt_re, will kernel crash?
No.. it will not. It can cause a memory leak if the provider/bnxt_re
doesn't call ibv_cmd_poll_cq after the ibv_cmd_resize_cq.
As per the HW design, freeing the old CQ resources can happen only
after the HW generates a cut off completion entry
on the CQ which is getting resized.  This needs to be polled by the
user library. Once the user lib sees the cut off completion, we
issue an ibv_cmd_poll_cq from the library to invoke the driver and
free the old CQ resource.
>
> Thanks
Leon Romanovsky March 29, 2023, 7:05 a.m. UTC | #7
On Wed, Mar 15, 2023 at 01:16:55AM -0700, Selvin Xavier wrote:
> Add resize_cq verb support for user space CQs. Resize operation for
> kernel CQs are not supported now.
> 
> Driver should free the current CQ only after user library polls
> for all the completions and switch to new CQ. So after the resize_cq
> is returned from the driver, user libray polls for existing completions
> and store it as temporary data. Once library reaps all completions in the
> current CQ, it invokes the ibv_cmd_poll_cq to inform the driver about
> the resize_cq completion. Adding a check for user CQs in driver's
> poll_cq and complete the resize operation for user CQs.
> Updating uverbs_cmd_mask with poll_cq to support this.
> 
> User library changes are available in this pull request.
> https://github.com/linux-rdma/rdma-core/pull/1315
> 
> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
> ---
>  drivers/infiniband/hw/bnxt_re/ib_verbs.c | 109 +++++++++++++++++++++++++++++++
>  drivers/infiniband/hw/bnxt_re/ib_verbs.h |   3 +
>  drivers/infiniband/hw/bnxt_re/main.c     |   2 +
>  drivers/infiniband/hw/bnxt_re/qplib_fp.c |  44 +++++++++++++
>  drivers/infiniband/hw/bnxt_re/qplib_fp.h |   5 ++
>  include/uapi/rdma/bnxt_re-abi.h          |   4 ++
>  6 files changed, 167 insertions(+)

<...>

> +struct bnxt_re_resize_cq_req {
> +	__u64 cq_va;

This should be __aligned_u64, see commit:
26b9906612c3 ("RDMA: Change all uapi headers to use __aligned_u64 instead of __u64")

Fixed locally and applied.

Thanks

> +};
> +
>  struct bnxt_re_qp_req {
>  	__aligned_u64 qpsva;
>  	__aligned_u64 qprva;
> -- 
> 2.5.5
>
Leon Romanovsky March 29, 2023, 7:09 a.m. UTC | #8
On Wed, 15 Mar 2023 01:16:55 -0700, Selvin Xavier wrote:
> Add resize_cq verb support for user space CQs. Resize operation for
> kernel CQs are not supported now.
> 
> Driver should free the current CQ only after user library polls
> for all the completions and switch to new CQ. So after the resize_cq
> is returned from the driver, user libray polls for existing completions
> and store it as temporary data. Once library reaps all completions in the
> current CQ, it invokes the ibv_cmd_poll_cq to inform the driver about
> the resize_cq completion. Adding a check for user CQs in driver's
> poll_cq and complete the resize operation for user CQs.
> Updating uverbs_cmd_mask with poll_cq to support this.
> 
> [...]

Applied, thanks!

[1/1] RDMA/bnxt_re: Add resize_cq support
      https://git.kernel.org/rdma/rdma/c/d54bd5abf4d26e

Best regards,
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 989edc7..e86afec 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -2912,6 +2912,106 @@  int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	return rc;
 }
 
+static void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq)
+{
+	struct bnxt_re_dev *rdev = cq->rdev;
+
+	bnxt_qplib_resize_cq_complete(&rdev->qplib_res, &cq->qplib_cq);
+
+	cq->qplib_cq.max_wqe = cq->resize_cqe;
+	if (cq->resize_umem) {
+		ib_umem_release(cq->umem);
+		cq->umem = cq->resize_umem;
+		cq->resize_umem = NULL;
+		cq->resize_cqe = 0;
+	}
+}
+
+int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
+{
+	struct bnxt_qplib_sg_info sg_info = {};
+	struct bnxt_qplib_dpi *orig_dpi = NULL;
+	struct bnxt_qplib_dev_attr *dev_attr;
+	struct bnxt_re_ucontext *uctx = NULL;
+	struct bnxt_re_resize_cq_req req;
+	struct bnxt_re_dev *rdev;
+	struct bnxt_re_cq *cq;
+	int rc, entries;
+
+	cq =  container_of(ibcq, struct bnxt_re_cq, ib_cq);
+	rdev = cq->rdev;
+	dev_attr = &rdev->dev_attr;
+	if (!ibcq->uobject) {
+		ibdev_err(&rdev->ibdev, "Kernel CQ Resize not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (cq->resize_umem) {
+		ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - Busy",
+			  cq->qplib_cq.id);
+		return -EBUSY;
+	}
+
+	/* Check the requested cq depth out of supported depth */
+	if (cqe < 1 || cqe > dev_attr->max_cq_wqes) {
+		ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - out of range cqe %d",
+			  cq->qplib_cq.id, cqe);
+		return -EINVAL;
+	}
+
+	entries = roundup_pow_of_two(cqe + 1);
+	if (entries > dev_attr->max_cq_wqes + 1)
+		entries = dev_attr->max_cq_wqes + 1;
+
+	uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext,
+					 ib_uctx);
+	/* uverbs consumer */
+	if (ib_copy_from_udata(&req, udata, sizeof(req))) {
+		rc = -EFAULT;
+		goto fail;
+	}
+
+	cq->resize_umem = ib_umem_get(&rdev->ibdev, req.cq_va,
+				      entries * sizeof(struct cq_base),
+				      IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(cq->resize_umem)) {
+		rc = PTR_ERR(cq->resize_umem);
+		cq->resize_umem = NULL;
+		ibdev_err(&rdev->ibdev, "%s: ib_umem_get failed! rc = %d\n",
+			  __func__, rc);
+		goto fail;
+	}
+	cq->resize_cqe = entries;
+	memcpy(&sg_info, &cq->qplib_cq.sg_info, sizeof(sg_info));
+	orig_dpi = cq->qplib_cq.dpi;
+
+	cq->qplib_cq.sg_info.umem = cq->resize_umem;
+	cq->qplib_cq.sg_info.pgsize = PAGE_SIZE;
+	cq->qplib_cq.sg_info.pgshft = PAGE_SHIFT;
+	cq->qplib_cq.dpi = &uctx->dpi;
+
+	rc = bnxt_qplib_resize_cq(&rdev->qplib_res, &cq->qplib_cq, entries);
+	if (rc) {
+		ibdev_err(&rdev->ibdev, "Resize HW CQ %#x failed!",
+			  cq->qplib_cq.id);
+		goto fail;
+	}
+
+	cq->ib_cq.cqe = cq->resize_cqe;
+
+	return 0;
+
+fail:
+	if (cq->resize_umem) {
+		ib_umem_release(cq->resize_umem);
+		cq->resize_umem = NULL;
+		cq->resize_cqe = 0;
+		memcpy(&cq->qplib_cq.sg_info, &sg_info, sizeof(sg_info));
+		cq->qplib_cq.dpi = orig_dpi;
+	}
+	return rc;
+}
+
 static u8 __req_to_ib_wc_status(u8 qstatus)
 {
 	switch (qstatus) {
@@ -3425,6 +3525,15 @@  int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
 	struct bnxt_re_sqp_entries *sqp_entry = NULL;
 	unsigned long flags;
 
+	/* User CQ; the only processing we do is to
+	 * complete any pending CQ resize operation.
+	 */
+	if (cq->umem) {
+		if (cq->resize_umem)
+			bnxt_re_resize_cq_complete(cq);
+		return 0;
+	}
+
 	spin_lock_irqsave(&cq->cq_lock, flags);
 	budget = min_t(u32, num_entries, cq->max_cql);
 	num_entries = budget;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 9432626..31f7e34 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -104,6 +104,8 @@  struct bnxt_re_cq {
 #define MAX_CQL_PER_POLL	1024
 	u32			max_cql;
 	struct ib_umem		*umem;
+	struct ib_umem		*resize_umem;
+	int			resize_cqe;
 };
 
 struct bnxt_re_mr {
@@ -191,6 +193,7 @@  int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
 		      const struct ib_recv_wr **bad_recv_wr);
 int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		      struct ib_udata *udata);
+int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
 int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
 int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
 int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index c5867e7..48bbba7 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -553,6 +553,7 @@  static const struct ib_device_ops bnxt_re_dev_ops = {
 	.query_srq = bnxt_re_query_srq,
 	.reg_user_mr = bnxt_re_reg_user_mr,
 	.req_notify_cq = bnxt_re_req_notify_cq,
+	.resize_cq = bnxt_re_resize_cq,
 	INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah),
 	INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq),
 	INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
@@ -584,6 +585,7 @@  static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 		return ret;
 
 	dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX);
+	ibdev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ);
 	return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev);
 }
 
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 96e581c..1d769a3 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -2100,6 +2100,50 @@  int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 	return rc;
 }
 
+void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res,
+				   struct bnxt_qplib_cq *cq)
+{
+	bnxt_qplib_free_hwq(res, &cq->hwq);
+	memcpy(&cq->hwq, &cq->resize_hwq, sizeof(cq->hwq));
+}
+
+int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq,
+			 int new_cqes)
+{
+	struct bnxt_qplib_hwq_attr hwq_attr = {};
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	struct creq_resize_cq_resp resp = {};
+	struct cmdq_resize_cq req = {};
+	struct bnxt_qplib_pbl *pbl;
+	u32 pg_sz, lvl, new_sz;
+	u16 cmd_flags = 0;
+	int rc;
+
+	RCFW_CMD_PREP(req, RESIZE_CQ, cmd_flags);
+	hwq_attr.sginfo = &cq->sg_info;
+	hwq_attr.res = res;
+	hwq_attr.depth = new_cqes;
+	hwq_attr.stride = sizeof(struct cq_base);
+	hwq_attr.type = HWQ_TYPE_QUEUE;
+	rc = bnxt_qplib_alloc_init_hwq(&cq->resize_hwq, &hwq_attr);
+	if (rc)
+		return rc;
+
+	req.cq_cid = cpu_to_le32(cq->id);
+	pbl = &cq->resize_hwq.pbl[PBL_LVL_0];
+	pg_sz = bnxt_qplib_base_pg_size(&cq->resize_hwq);
+	lvl = (cq->resize_hwq.level << CMDQ_RESIZE_CQ_LVL_SFT) &
+				       CMDQ_RESIZE_CQ_LVL_MASK;
+	new_sz = (new_cqes << CMDQ_RESIZE_CQ_NEW_CQ_SIZE_SFT) &
+		  CMDQ_RESIZE_CQ_NEW_CQ_SIZE_MASK;
+	req.new_cq_size_pg_size_lvl = cpu_to_le32(new_sz | pg_sz | lvl);
+	req.new_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	return rc;
+}
+
 int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 0375019..d74d5ea 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -400,6 +400,7 @@  struct bnxt_qplib_cq {
 	u16				count;
 	u16				period;
 	struct bnxt_qplib_hwq		hwq;
+	struct bnxt_qplib_hwq		resize_hwq;
 	u32				cnq_hw_ring_id;
 	struct bnxt_qplib_nq		*nq;
 	bool				resize_in_progress;
@@ -532,6 +533,10 @@  void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp);
 int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
 			 struct bnxt_qplib_swqe *wqe);
 int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
+int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq,
+			 int new_cqes);
+void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res,
+				   struct bnxt_qplib_cq *cq);
 int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
 		       int num, struct bnxt_qplib_qp **qp);
diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
index b1de99b..f761e75 100644
--- a/include/uapi/rdma/bnxt_re-abi.h
+++ b/include/uapi/rdma/bnxt_re-abi.h
@@ -96,6 +96,10 @@  struct bnxt_re_cq_resp {
 	__u32 rsvd;
 };
 
+struct bnxt_re_resize_cq_req {
+	__u64 cq_va;
+};
+
 struct bnxt_re_qp_req {
 	__aligned_u64 qpsva;
 	__aligned_u64 qprva;