diff mbox series

[for-next] RDMA/hns: Use the reserved loopback QPs to free MR before destroying MPT

Message ID 20220225095654.24684-1-liangwenpeng@huawei.com (mailing list archive)
State Superseded
Delegated to: Leon Romanovsky
Headers show
Series [for-next] RDMA/hns: Use the reserved loopback QPs to free MR before destroying MPT | expand

Commit Message

Wenpeng Liang Feb. 25, 2022, 9:56 a.m. UTC
From: Yixing Liu <liuyixing1@huawei.com>

Before destroying MPT, the reserved loopback QPs send loopback IOs (one
write operation per SL). Completing these loopback IOs represents that
there isn't any outstanding request in MPT, then it's safe to destroy MPT.

Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_device.h |   2 +
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 334 +++++++++++++++++++-
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h  |  20 ++
 drivers/infiniband/hw/hns/hns_roce_mr.c     |   6 +-
 4 files changed, 358 insertions(+), 4 deletions(-)

Comments

Leon Romanovsky Feb. 28, 2022, 12:01 p.m. UTC | #1
On Fri, Feb 25, 2022 at 05:56:54PM +0800, Wenpeng Liang wrote:
> From: Yixing Liu <liuyixing1@huawei.com>
> 
> Before destroying MPT, the reserved loopback QPs send loopback IOs (one
> write operation per SL). Completing these loopback IOs represents that
> there isn't any outstanding request in MPT, then it's safe to destroy MPT.
> 
> Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
> Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_device.h |   2 +
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 334 +++++++++++++++++++-
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  |  20 ++
>  drivers/infiniband/hw/hns/hns_roce_mr.c     |   6 +-
>  4 files changed, 358 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
> index 1e0bae136997..da0b4b310aab 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
> @@ -624,6 +624,7 @@ struct hns_roce_qp {
>  	u32			next_sge;
>  	enum ib_mtu		path_mtu;
>  	u32			max_inline_data;
> +	u8			free_mr_en;
>  
>  	/* 0: flush needed, 1: unneeded */
>  	unsigned long		flush_flag;
> @@ -882,6 +883,7 @@ struct hns_roce_hw {
>  			 enum ib_qp_state new_state);
>  	int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
>  			 struct hns_roce_qp *hr_qp);
> +	void (*dereg_mr)(struct hns_roce_dev *hr_dev);
>  	int (*init_eq)(struct hns_roce_dev *hr_dev);
>  	void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
>  	int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf);
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index b33e948fd060..62ee9c0bba74 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -2664,6 +2664,217 @@ static void free_dip_list(struct hns_roce_dev *hr_dev)
>  	spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
>  }
>  
> +static int free_mr_alloc_pd(struct hns_roce_dev *hr_dev,
> +			    struct hns_roce_v2_free_mr *free_mr)
> +{

You chose very non-intuitive name "free_mr...", but I don't have anything
concrete to suggest.

> +	struct ib_device *ibdev = &hr_dev->ib_dev;
> +	struct ib_pd *pd;
> +
> +	pd = ib_alloc_pd(ibdev, 0);
> +	if (IS_ERR(pd)) {
> +		ibdev_err(ibdev, "failed to create pd for free mr.\n");
> +		return PTR_ERR(pd);
> +	}
> +
> +	free_mr->rsv_pd = pd;
> +	return 0;
> +}
> +
> +static int free_mr_create_cq(struct hns_roce_dev *hr_dev,
> +			     struct hns_roce_v2_free_mr *free_mr)
> +{
> +	struct ib_device *ibdev = &hr_dev->ib_dev;
> +	struct ib_cq_init_attr cq_init_attr = {};
> +	struct ib_cq *cq;
> +
> +	cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM;
> +
> +	cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_init_attr);
> +	if (IS_ERR(cq)) {
> +		ibdev_err(ibdev, "failed to create cq for free mr.\n");
> +		return PTR_ERR(cq);
> +	}
> +
> +	free_mr->rsv_cq = cq;
> +	return 0;
> +}
> +
> +static int free_mr_create_loopback_qp(struct hns_roce_dev *hr_dev,
> +				      struct hns_roce_v2_free_mr *free_mr,
> +				      int sl_num)
> +{
> +	struct ib_device *ibdev = &hr_dev->ib_dev;
> +	struct ib_qp_init_attr init_attr = {};
> +	struct ib_pd *pd = free_mr->rsv_pd;
> +	struct ib_cq *cq = free_mr->rsv_cq;
> +	struct ib_qp *qp;
> +
> +	init_attr.qp_type		= IB_QPT_RC;
> +	init_attr.sq_sig_type		= IB_SIGNAL_ALL_WR;
> +	init_attr.send_cq		= cq;
> +	init_attr.recv_cq		= cq;
> +	init_attr.cap.max_send_wr	= HNS_ROCE_FREE_MR_USED_SQWQE_NUM;
> +	init_attr.cap.max_send_sge	= HNS_ROCE_FREE_MR_USED_SQSGE_NUM;
> +	init_attr.cap.max_recv_wr	= HNS_ROCE_FREE_MR_USED_RQWQE_NUM;
> +	init_attr.cap.max_recv_sge	= HNS_ROCE_FREE_MR_USED_RQSGE_NUM;

No vertical alignment in new code, please.

> +
> +	qp = ib_create_qp(pd, &init_attr);
> +	if (IS_ERR(qp)) {
> +		ibdev_err(ibdev, "failed to create qp for free mr.\n");
> +		return PTR_ERR(qp);
> +	}
> +
> +	free_mr->rsv_qp[sl_num] = qp;
> +	return 0;
> +}
> +
> +static int free_mr_create_qp(struct hns_roce_dev *hr_dev,
> +			     struct hns_roce_v2_free_mr *free_mr)
> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++)
> +		if (free_mr_create_loopback_qp(hr_dev, free_mr, i))
> +			return -ENOMEM;

Please don't overwrite returned error code - in all places.

> +
> +	return 0;
> +}
> +
> +static void free_mr_init_qp_attr(struct ib_qp_attr *attr)
> +{
> +	rdma_ah_set_grh(&attr->ah_attr, NULL, 0, 0, 1, 0);
> +	rdma_ah_set_static_rate(&attr->ah_attr, 3);
> +	rdma_ah_set_port_num(&attr->ah_attr, 1);
> +}
> +
> +static int free_mr_modify_loopback_qp(struct hns_roce_dev *hr_dev,
> +				      struct ib_qp_attr *attr, int sl_num)
> +{
> +	struct hns_roce_v2_priv *priv = hr_dev->priv;
> +	struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
> +	struct ib_device *ibdev = &hr_dev->ib_dev;
> +	struct hns_roce_qp *hr_qp;
> +	int loopback;
> +	int mask;
> +	int ret;
> +
> +	hr_qp = to_hr_qp(free_mr->rsv_qp[sl_num]);
> +	hr_qp->free_mr_en = 1;
> +
> +	mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS;
> +	attr->qp_state		= IB_QPS_INIT;
> +	attr->port_num		= 1;
> +	attr->qp_access_flags	= IB_ACCESS_REMOTE_WRITE;
> +	ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
> +	if (ret) {
> +		ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
> +			  ret);
> +		return ret;
> +	}
> +
> +	loopback = hr_dev->loop_idc;
> +	/* Set qpc lbi = 1 incidate loopback IO */
> +	hr_dev->loop_idc = 1;
> +
> +	mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN |
> +	       IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
> +	attr->qp_state		= IB_QPS_RTR;
> +	attr->ah_attr.type	= RDMA_AH_ATTR_TYPE_ROCE;
> +	attr->path_mtu		= IB_MTU_256;
> +	attr->dest_qp_num	= hr_qp->qpn;
> +	attr->rq_psn		= HNS_ROCE_FREE_MR_USED_PSN;
> +
> +	rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num);
> +
> +	ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
> +	hr_dev->loop_idc = loopback;
> +	if (ret) {
> +		ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n",
> +			  ret);
> +		return ret;
> +	}
> +
> +	mask = IB_QP_STATE | IB_QP_SQ_PSN | IB_QP_RETRY_CNT | IB_QP_TIMEOUT |
> +	       IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC;
> +	attr->qp_state		= IB_QPS_RTS;
> +	attr->sq_psn		= HNS_ROCE_FREE_MR_USED_PSN;
> +	attr->retry_cnt		= HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT;
> +	attr->timeout		= HNS_ROCE_FREE_MR_USED_QP_TIMEOUT;
> +	ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
> +	if (ret)
> +		ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n",
> +			  ret);
> +
> +	return ret;
> +}
> +
> +static int free_mr_modify_qp(struct hns_roce_dev *hr_dev,
> +			     struct hns_roce_v2_free_mr *free_mr)
> +{
> +	struct ib_qp_attr attr = {};
> +	int ret;
> +	int i;
> +
> +	free_mr_init_qp_attr(&attr);
> +
> +	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
> +		ret = free_mr_modify_loopback_qp(hr_dev, &attr, i);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static void free_mr_exit(struct hns_roce_dev *hr_dev)
> +{
> +	struct hns_roce_v2_priv *priv = hr_dev->priv;
> +	struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
> +		if (free_mr->rsv_qp[i]) {
> +			(void)ib_destroy_qp(free_mr->rsv_qp[i]);

Please don't cast. It is not kernel coding style.

Thanks
Wenpeng Liang March 2, 2022, 12:44 p.m. UTC | #2
On 2022/2/28 20:01, Leon Romanovsky wrote:
> On Fri, Feb 25, 2022 at 05:56:54PM +0800, Wenpeng Liang wrote:
>> From: Yixing Liu <liuyixing1@huawei.com>
>>
>> Before destroying MPT, the reserved loopback QPs send loopback IOs (one
>> write operation per SL). Completing these loopback IOs represents that
>> there isn't any outstanding request in MPT, then it's safe to destroy MPT.
>>
>> Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
>> Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
>> ---
>>  drivers/infiniband/hw/hns/hns_roce_device.h |   2 +
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 334 +++++++++++++++++++-
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  |  20 ++
>>  drivers/infiniband/hw/hns/hns_roce_mr.c     |   6 +-
>>  4 files changed, 358 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
>> index 1e0bae136997..da0b4b310aab 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
>> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
>> @@ -624,6 +624,7 @@ struct hns_roce_qp {
>>  	u32			next_sge;
>>  	enum ib_mtu		path_mtu;
>>  	u32			max_inline_data;
>> +	u8			free_mr_en;
>>  
>>  	/* 0: flush needed, 1: unneeded */
>>  	unsigned long		flush_flag;
>> @@ -882,6 +883,7 @@ struct hns_roce_hw {
>>  			 enum ib_qp_state new_state);
>>  	int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
>>  			 struct hns_roce_qp *hr_qp);
>> +	void (*dereg_mr)(struct hns_roce_dev *hr_dev);
>>  	int (*init_eq)(struct hns_roce_dev *hr_dev);
>>  	void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
>>  	int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf);
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> index b33e948fd060..62ee9c0bba74 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> @@ -2664,6 +2664,217 @@ static void free_dip_list(struct hns_roce_dev *hr_dev)
>>  	spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
>>  }
>>  
>> +static int free_mr_alloc_pd(struct hns_roce_dev *hr_dev,
>> +			    struct hns_roce_v2_free_mr *free_mr)
>> +{
> 
> You chose very non-intuitive name "free_mr...", but I don't have anything
> concrete to suggest.
> 

Thank you for your advice. There are two alternative names for this event,
which are DRAIN_RESIDUAL_WR or DRAIN_WR. It is hard to decide which one is
better. Could you give me some suggestions for the naming?

Thanks,
Wenpeng

>> +	struct ib_device *ibdev = &hr_dev->ib_dev;
>> +	struct ib_pd *pd;
>> +
>> +	pd = ib_alloc_pd(ibdev, 0);
>> +	if (IS_ERR(pd)) {
>> +		ibdev_err(ibdev, "failed to create pd for free mr.\n");
>> +		return PTR_ERR(pd);
>> +	}
>> +
>> +	free_mr->rsv_pd = pd;
>> +	return 0;
>> +}
>> +
>> +static int free_mr_create_cq(struct hns_roce_dev *hr_dev,
>> +			     struct hns_roce_v2_free_mr *free_mr)
>> +{
>> +	struct ib_device *ibdev = &hr_dev->ib_dev;
>> +	struct ib_cq_init_attr cq_init_attr = {};
>> +	struct ib_cq *cq;
>> +
>> +	cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM;
>> +
>> +	cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_init_attr);
>> +	if (IS_ERR(cq)) {
>> +		ibdev_err(ibdev, "failed to create cq for free mr.\n");
>> +		return PTR_ERR(cq);
>> +	}
>> +
>> +	free_mr->rsv_cq = cq;
>> +	return 0;
>> +}
>> +
>> +static int free_mr_create_loopback_qp(struct hns_roce_dev *hr_dev,
>> +				      struct hns_roce_v2_free_mr *free_mr,
>> +				      int sl_num)
>> +{
>> +	struct ib_device *ibdev = &hr_dev->ib_dev;
>> +	struct ib_qp_init_attr init_attr = {};
>> +	struct ib_pd *pd = free_mr->rsv_pd;
>> +	struct ib_cq *cq = free_mr->rsv_cq;
>> +	struct ib_qp *qp;
>> +
>> +	init_attr.qp_type		= IB_QPT_RC;
>> +	init_attr.sq_sig_type		= IB_SIGNAL_ALL_WR;
>> +	init_attr.send_cq		= cq;
>> +	init_attr.recv_cq		= cq;
>> +	init_attr.cap.max_send_wr	= HNS_ROCE_FREE_MR_USED_SQWQE_NUM;
>> +	init_attr.cap.max_send_sge	= HNS_ROCE_FREE_MR_USED_SQSGE_NUM;
>> +	init_attr.cap.max_recv_wr	= HNS_ROCE_FREE_MR_USED_RQWQE_NUM;
>> +	init_attr.cap.max_recv_sge	= HNS_ROCE_FREE_MR_USED_RQSGE_NUM;
> 
> No vertical alignment in new code, please.
> 

Fix it in v2.

>> +
>> +	qp = ib_create_qp(pd, &init_attr);
>> +	if (IS_ERR(qp)) {
>> +		ibdev_err(ibdev, "failed to create qp for free mr.\n");
>> +		return PTR_ERR(qp);
>> +	}
>> +
>> +	free_mr->rsv_qp[sl_num] = qp;
>> +	return 0;
>> +}
>> +
>> +static int free_mr_create_qp(struct hns_roce_dev *hr_dev,
>> +			     struct hns_roce_v2_free_mr *free_mr)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++)
>> +		if (free_mr_create_loopback_qp(hr_dev, free_mr, i))
>> +			return -ENOMEM;
> 
> Please don't overwrite returned error code - in all places.
> 

The next version will be revised to the following form:

	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
		ret = free_mr_create_loopback_qp(hr_dev, free_mr, i);
		if (ret)
			return ret;
	}

>> +
>> +	return 0;
>> +}
>> +
>> +static void free_mr_init_qp_attr(struct ib_qp_attr *attr)
>> +{
>> +	rdma_ah_set_grh(&attr->ah_attr, NULL, 0, 0, 1, 0);
>> +	rdma_ah_set_static_rate(&attr->ah_attr, 3);
>> +	rdma_ah_set_port_num(&attr->ah_attr, 1);
>> +}
>> +
>> +static int free_mr_modify_loopback_qp(struct hns_roce_dev *hr_dev,
>> +				      struct ib_qp_attr *attr, int sl_num)
>> +{
>> +	struct hns_roce_v2_priv *priv = hr_dev->priv;
>> +	struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
>> +	struct ib_device *ibdev = &hr_dev->ib_dev;
>> +	struct hns_roce_qp *hr_qp;
>> +	int loopback;
>> +	int mask;
>> +	int ret;
>> +
>> +	hr_qp = to_hr_qp(free_mr->rsv_qp[sl_num]);
>> +	hr_qp->free_mr_en = 1;
>> +
>> +	mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS;
>> +	attr->qp_state		= IB_QPS_INIT;
>> +	attr->port_num		= 1;
>> +	attr->qp_access_flags	= IB_ACCESS_REMOTE_WRITE;
>> +	ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
>> +	if (ret) {
>> +		ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
>> +			  ret);
>> +		return ret;
>> +	}
>> +
>> +	loopback = hr_dev->loop_idc;
>> +	/* Set qpc lbi = 1 incidate loopback IO */
>> +	hr_dev->loop_idc = 1;
>> +
>> +	mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN |
>> +	       IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
>> +	attr->qp_state		= IB_QPS_RTR;
>> +	attr->ah_attr.type	= RDMA_AH_ATTR_TYPE_ROCE;
>> +	attr->path_mtu		= IB_MTU_256;
>> +	attr->dest_qp_num	= hr_qp->qpn;
>> +	attr->rq_psn		= HNS_ROCE_FREE_MR_USED_PSN;
>> +
>> +	rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num);
>> +
>> +	ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
>> +	hr_dev->loop_idc = loopback;
>> +	if (ret) {
>> +		ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n",
>> +			  ret);
>> +		return ret;
>> +	}
>> +
>> +	mask = IB_QP_STATE | IB_QP_SQ_PSN | IB_QP_RETRY_CNT | IB_QP_TIMEOUT |
>> +	       IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC;
>> +	attr->qp_state		= IB_QPS_RTS;
>> +	attr->sq_psn		= HNS_ROCE_FREE_MR_USED_PSN;
>> +	attr->retry_cnt		= HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT;
>> +	attr->timeout		= HNS_ROCE_FREE_MR_USED_QP_TIMEOUT;
>> +	ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
>> +	if (ret)
>> +		ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n",
>> +			  ret);
>> +
>> +	return ret;
>> +}
>> +
>> +static int free_mr_modify_qp(struct hns_roce_dev *hr_dev,
>> +			     struct hns_roce_v2_free_mr *free_mr)
>> +{
>> +	struct ib_qp_attr attr = {};
>> +	int ret;
>> +	int i;
>> +
>> +	free_mr_init_qp_attr(&attr);
>> +
>> +	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
>> +		ret = free_mr_modify_loopback_qp(hr_dev, &attr, i);
>> +		if (ret)
>> +			return ret;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static void free_mr_exit(struct hns_roce_dev *hr_dev)
>> +{
>> +	struct hns_roce_v2_priv *priv = hr_dev->priv;
>> +	struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
>> +	int i;
>> +
>> +	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
>> +		if (free_mr->rsv_qp[i]) {
>> +			(void)ib_destroy_qp(free_mr->rsv_qp[i]);
> 
> Please don't cast. It is not kernel coding style.
> 

The next version will be revised to the following form

	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
		if (free_mr->rsv_qp[i]) {
			ret = ib_destroy_qp(free_mr->rsv_qp[i]);
			if (ret)
				ibdev_err(&hr_dev->ib_dev,
					  "failed to destroy qp in free mr.\n");

			free_mr->rsv_qp[i] = NULL;
		}
	}

Thanks,
Wenpeng

> Thanks
> .
>
Leon Romanovsky March 3, 2022, 5:57 p.m. UTC | #3
On Wed, Mar 02, 2022 at 08:44:48PM +0800, Wenpeng Liang wrote:
> On 2022/2/28 20:01, Leon Romanovsky wrote:
> > On Fri, Feb 25, 2022 at 05:56:54PM +0800, Wenpeng Liang wrote:
> >> From: Yixing Liu <liuyixing1@huawei.com>
> >>
> >> Before destroying MPT, the reserved loopback QPs send loopback IOs (one
> >> write operation per SL). Completing these loopback IOs represents that
> >> there isn't any outstanding request in MPT, then it's safe to destroy MPT.
> >>
> >> Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
> >> Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
> >> ---
> >>  drivers/infiniband/hw/hns/hns_roce_device.h |   2 +
> >>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 334 +++++++++++++++++++-
> >>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  |  20 ++
> >>  drivers/infiniband/hw/hns/hns_roce_mr.c     |   6 +-
> >>  4 files changed, 358 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
> >> index 1e0bae136997..da0b4b310aab 100644
> >> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
> >> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
> >> @@ -624,6 +624,7 @@ struct hns_roce_qp {
> >>  	u32			next_sge;
> >>  	enum ib_mtu		path_mtu;
> >>  	u32			max_inline_data;
> >> +	u8			free_mr_en;
> >>  
> >>  	/* 0: flush needed, 1: unneeded */
> >>  	unsigned long		flush_flag;
> >> @@ -882,6 +883,7 @@ struct hns_roce_hw {
> >>  			 enum ib_qp_state new_state);
> >>  	int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
> >>  			 struct hns_roce_qp *hr_qp);
> >> +	void (*dereg_mr)(struct hns_roce_dev *hr_dev);
> >>  	int (*init_eq)(struct hns_roce_dev *hr_dev);
> >>  	void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
> >>  	int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf);
> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> >> index b33e948fd060..62ee9c0bba74 100644
> >> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> >> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> >> @@ -2664,6 +2664,217 @@ static void free_dip_list(struct hns_roce_dev *hr_dev)
> >>  	spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
> >>  }
> >>  
> >> +static int free_mr_alloc_pd(struct hns_roce_dev *hr_dev,
> >> +			    struct hns_roce_v2_free_mr *free_mr)
> >> +{
> > 
> > You chose very non-intuitive name "free_mr...", but I don't have anything
> > concrete to suggest.
> > 
> 
> Thank you for your advice. There are two alternative names for this event,
> which are DRAIN_RESIDUAL_WR or DRAIN_WR. It is hard to decide which one is
> better. Could you give me some suggestions for the naming?

mlx5 called to such objects device resource - devr, see mlx5_ib_dev_res_init().
I personally would create something similar to that, one function
without separation to multiple free_mr_alloc_* functions.

Up-to you.

Thanks
Wenpeng Liang March 8, 2022, 8:13 a.m. UTC | #4
On 2022/3/4 1:57, Leon Romanovsky wrote:
> On Wed, Mar 02, 2022 at 08:44:48PM +0800, Wenpeng Liang wrote:
>> On 2022/2/28 20:01, Leon Romanovsky wrote:
>>> On Fri, Feb 25, 2022 at 05:56:54PM +0800, Wenpeng Liang wrote:
>>>> From: Yixing Liu <liuyixing1@huawei.com>
>>>>
>>>> Before destroying MPT, the reserved loopback QPs send loopback IOs (one
>>>> write operation per SL). Completing these loopback IOs represents that
>>>> there isn't any outstanding request in MPT, then it's safe to destroy MPT.
>>>>
>>>> Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
>>>> Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
>>>> ---
>>>>  drivers/infiniband/hw/hns/hns_roce_device.h |   2 +
>>>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 334 +++++++++++++++++++-
>>>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  |  20 ++
>>>>  drivers/infiniband/hw/hns/hns_roce_mr.c     |   6 +-
>>>>  4 files changed, 358 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
>>>> index 1e0bae136997..da0b4b310aab 100644
>>>> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
>>>> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
>>>> @@ -624,6 +624,7 @@ struct hns_roce_qp {
>>>>  	u32			next_sge;
>>>>  	enum ib_mtu		path_mtu;
>>>>  	u32			max_inline_data;
>>>> +	u8			free_mr_en;
>>>>  
>>>>  	/* 0: flush needed, 1: unneeded */
>>>>  	unsigned long		flush_flag;
>>>> @@ -882,6 +883,7 @@ struct hns_roce_hw {
>>>>  			 enum ib_qp_state new_state);
>>>>  	int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
>>>>  			 struct hns_roce_qp *hr_qp);
>>>> +	void (*dereg_mr)(struct hns_roce_dev *hr_dev);
>>>>  	int (*init_eq)(struct hns_roce_dev *hr_dev);
>>>>  	void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
>>>>  	int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf);
>>>> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>>>> index b33e948fd060..62ee9c0bba74 100644
>>>> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>>>> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>>>> @@ -2664,6 +2664,217 @@ static void free_dip_list(struct hns_roce_dev *hr_dev)
>>>>  	spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
>>>>  }
>>>>  
>>>> +static int free_mr_alloc_pd(struct hns_roce_dev *hr_dev,
>>>> +			    struct hns_roce_v2_free_mr *free_mr)
>>>> +{
>>>
>>> You chose very non-intuitive name "free_mr...", but I don't have anything
>>> concrete to suggest.
>>>
>>
>> Thank you for your advice. There are two alternative names for this event,
>> which are DRAIN_RESIDUAL_WR or DRAIN_WR. It is hard to decide which one is
>> better. Could you give me some suggestions for the naming?
> 
> mlx5 called to such objects device resource - devr, see mlx5_ib_dev_res_init().
> I personally would create something similar to that, one function
> without separation to multiple free_mr_alloc_* functions.
> 
> Up-to you.
> 

Thank you for your suggestion.
I will put the creation of resources in one function.

Thanks,
Wenpeng

> Thanks
> .
>
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 1e0bae136997..da0b4b310aab 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -624,6 +624,7 @@  struct hns_roce_qp {
 	u32			next_sge;
 	enum ib_mtu		path_mtu;
 	u32			max_inline_data;
+	u8			free_mr_en;
 
 	/* 0: flush needed, 1: unneeded */
 	unsigned long		flush_flag;
@@ -882,6 +883,7 @@  struct hns_roce_hw {
 			 enum ib_qp_state new_state);
 	int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
 			 struct hns_roce_qp *hr_qp);
+	void (*dereg_mr)(struct hns_roce_dev *hr_dev);
 	int (*init_eq)(struct hns_roce_dev *hr_dev);
 	void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
 	int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index b33e948fd060..62ee9c0bba74 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -2664,6 +2664,217 @@  static void free_dip_list(struct hns_roce_dev *hr_dev)
 	spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
 }
 
+static int free_mr_alloc_pd(struct hns_roce_dev *hr_dev,
+			    struct hns_roce_v2_free_mr *free_mr)
+{
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct ib_pd *pd;
+
+	pd = ib_alloc_pd(ibdev, 0);
+	if (IS_ERR(pd)) {
+		ibdev_err(ibdev, "failed to create pd for free mr.\n");
+		return PTR_ERR(pd);
+	}
+
+	free_mr->rsv_pd = pd;
+	return 0;
+}
+
+static int free_mr_create_cq(struct hns_roce_dev *hr_dev,
+			     struct hns_roce_v2_free_mr *free_mr)
+{
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct ib_cq_init_attr cq_init_attr = {};
+	struct ib_cq *cq;
+
+	cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM;
+
+	cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_init_attr);
+	if (IS_ERR(cq)) {
+		ibdev_err(ibdev, "failed to create cq for free mr.\n");
+		return PTR_ERR(cq);
+	}
+
+	free_mr->rsv_cq = cq;
+	return 0;
+}
+
+static int free_mr_create_loopback_qp(struct hns_roce_dev *hr_dev,
+				      struct hns_roce_v2_free_mr *free_mr,
+				      int sl_num)
+{
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct ib_qp_init_attr init_attr = {};
+	struct ib_pd *pd = free_mr->rsv_pd;
+	struct ib_cq *cq = free_mr->rsv_cq;
+	struct ib_qp *qp;
+
+	init_attr.qp_type		= IB_QPT_RC;
+	init_attr.sq_sig_type		= IB_SIGNAL_ALL_WR;
+	init_attr.send_cq		= cq;
+	init_attr.recv_cq		= cq;
+	init_attr.cap.max_send_wr	= HNS_ROCE_FREE_MR_USED_SQWQE_NUM;
+	init_attr.cap.max_send_sge	= HNS_ROCE_FREE_MR_USED_SQSGE_NUM;
+	init_attr.cap.max_recv_wr	= HNS_ROCE_FREE_MR_USED_RQWQE_NUM;
+	init_attr.cap.max_recv_sge	= HNS_ROCE_FREE_MR_USED_RQSGE_NUM;
+
+	qp = ib_create_qp(pd, &init_attr);
+	if (IS_ERR(qp)) {
+		ibdev_err(ibdev, "failed to create qp for free mr.\n");
+		return PTR_ERR(qp);
+	}
+
+	free_mr->rsv_qp[sl_num] = qp;
+	return 0;
+}
+
+static int free_mr_create_qp(struct hns_roce_dev *hr_dev,
+			     struct hns_roce_v2_free_mr *free_mr)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++)
+		if (free_mr_create_loopback_qp(hr_dev, free_mr, i))
+			return -ENOMEM;
+
+	return 0;
+}
+
+static void free_mr_init_qp_attr(struct ib_qp_attr *attr)
+{
+	rdma_ah_set_grh(&attr->ah_attr, NULL, 0, 0, 1, 0);
+	rdma_ah_set_static_rate(&attr->ah_attr, 3);
+	rdma_ah_set_port_num(&attr->ah_attr, 1);
+}
+
+static int free_mr_modify_loopback_qp(struct hns_roce_dev *hr_dev,
+				      struct ib_qp_attr *attr, int sl_num)
+{
+	struct hns_roce_v2_priv *priv = hr_dev->priv;
+	struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct hns_roce_qp *hr_qp;
+	int loopback;
+	int mask;
+	int ret;
+
+	hr_qp = to_hr_qp(free_mr->rsv_qp[sl_num]);
+	hr_qp->free_mr_en = 1;
+
+	mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS;
+	attr->qp_state		= IB_QPS_INIT;
+	attr->port_num		= 1;
+	attr->qp_access_flags	= IB_ACCESS_REMOTE_WRITE;
+	ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
+	if (ret) {
+		ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
+			  ret);
+		return ret;
+	}
+
+	loopback = hr_dev->loop_idc;
+	/* Set qpc lbi = 1 incidate loopback IO */
+	hr_dev->loop_idc = 1;
+
+	mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN |
+	       IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
+	attr->qp_state		= IB_QPS_RTR;
+	attr->ah_attr.type	= RDMA_AH_ATTR_TYPE_ROCE;
+	attr->path_mtu		= IB_MTU_256;
+	attr->dest_qp_num	= hr_qp->qpn;
+	attr->rq_psn		= HNS_ROCE_FREE_MR_USED_PSN;
+
+	rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num);
+
+	ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
+	hr_dev->loop_idc = loopback;
+	if (ret) {
+		ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n",
+			  ret);
+		return ret;
+	}
+
+	mask = IB_QP_STATE | IB_QP_SQ_PSN | IB_QP_RETRY_CNT | IB_QP_TIMEOUT |
+	       IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC;
+	attr->qp_state		= IB_QPS_RTS;
+	attr->sq_psn		= HNS_ROCE_FREE_MR_USED_PSN;
+	attr->retry_cnt		= HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT;
+	attr->timeout		= HNS_ROCE_FREE_MR_USED_QP_TIMEOUT;
+	ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
+	if (ret)
+		ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n",
+			  ret);
+
+	return ret;
+}
+
+static int free_mr_modify_qp(struct hns_roce_dev *hr_dev,
+			     struct hns_roce_v2_free_mr *free_mr)
+{
+	struct ib_qp_attr attr = {};
+	int ret;
+	int i;
+
+	free_mr_init_qp_attr(&attr);
+
+	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+		ret = free_mr_modify_loopback_qp(hr_dev, &attr, i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void free_mr_exit(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_v2_priv *priv = hr_dev->priv;
+	struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+		if (free_mr->rsv_qp[i]) {
+			(void)ib_destroy_qp(free_mr->rsv_qp[i]);
+			free_mr->rsv_qp[i] = NULL;
+		}
+	}
+
+	if (free_mr->rsv_cq) {
+		ib_destroy_cq(free_mr->rsv_cq);
+		free_mr->rsv_cq = NULL;
+	}
+
+	if (free_mr->rsv_pd) {
+		ib_dealloc_pd(free_mr->rsv_pd);
+		free_mr->rsv_pd = NULL;
+	}
+}
+
+static int free_mr_init(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_v2_priv *priv = hr_dev->priv;
+	struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+
+	if (free_mr_alloc_pd(hr_dev, free_mr))
+		return -ENOMEM;
+
+	if (free_mr_create_cq(hr_dev, free_mr))
+		goto create_failed;
+
+	if (free_mr_create_qp(hr_dev, free_mr))
+		goto create_failed;
+
+	if (free_mr_modify_qp(hr_dev, free_mr))
+		goto create_failed;
+
+	return 0;
+
+create_failed:
+	free_mr_exit(hr_dev);
+
+	return -ENOMEM;
+}
+
 static int get_hem_table(struct hns_roce_dev *hr_dev)
 {
 	unsigned int qpc_count;
@@ -3245,6 +3456,98 @@  static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
 	return 0;
 }
 
+static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	const struct ib_send_wr *bad_wr;
+	struct ib_rdma_wr rdma_wr = {};
+	struct ib_send_wr *send_wr;
+	int ret;
+
+	send_wr = &rdma_wr.wr;
+	send_wr->opcode = IB_WR_RDMA_WRITE;
+
+	ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr);
+	if (ret) {
+		ibdev_err(ibdev, "failed to post wqe for free mr, ret = %d.\n",
+			  ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
+			       struct ib_wc *wc);
+
+static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_v2_priv *priv = hr_dev->priv;
+	struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+	struct ib_wc wc[ARRAY_SIZE(free_mr->rsv_qp)];
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct hns_roce_qp *hr_qp;
+	unsigned long end;
+	int cqe_cnt = 0;
+	int npolled;
+	int ret;
+	int i;
+
+	/*
+	 * If the device initialization is not complete or in the uninstall
+	 * process, then there is no need to execute free mr.
+	 */
+	if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT ||
+	    priv->handle->rinfo.instance_state == HNS_ROCE_STATE_INIT ||
+	    hr_dev->state == HNS_ROCE_DEVICE_STATE_UNINIT)
+		return;
+
+	mutex_lock(&free_mr->mutex);
+
+	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+		hr_qp = to_hr_qp(free_mr->rsv_qp[i]);
+
+		ret = free_mr_post_send_lp_wqe(hr_qp);
+		if (ret) {
+			ibdev_err(ibdev,
+				  "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n",
+				  hr_qp->qpn, ret);
+			break;
+		}
+
+		cqe_cnt++;
+	}
+
+	end = msecs_to_jiffies(HNS_ROCE_V2_FREE_MR_TIMEOUT) + jiffies;
+	while (cqe_cnt) {
+		npolled = hns_roce_v2_poll_cq(free_mr->rsv_cq, cqe_cnt, wc);
+		if (npolled < 0) {
+			ibdev_err(ibdev,
+				  "failed to poll cqe for free mr, remain %d cqe.\n",
+				  cqe_cnt);
+			goto out;
+		}
+
+		if (time_after(jiffies, end)) {
+			ibdev_err(ibdev,
+				  "failed to poll cqe for free mr and timeout, remain %d cqe.\n",
+				  cqe_cnt);
+			goto out;
+		}
+		cqe_cnt -= npolled;
+	}
+
+out:
+	mutex_unlock(&free_mr->mutex);
+}
+
+static void hns_roce_v2_dereg_mr(struct hns_roce_dev *hr_dev)
+{
+	if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+		free_mr_send_cmd_to_hw(hr_dev);
+}
+
 static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
 {
 	return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size);
@@ -4667,6 +4970,18 @@  static int hns_roce_v2_set_path(struct ib_qp *ibqp,
 	u8 hr_port;
 	int ret;
 
+	/*
+	 * If free_mr_en of qp is set, it means that this qp comes from
+	 * free mr. This qp will perform the loopback operation.
+	 * In the loopback scenario, only sl needs to be set.
+	 */
+	if (hr_qp->free_mr_en) {
+		hr_reg_write(context, QPC_SL, rdma_ah_get_sl(&attr->ah_attr));
+		hr_reg_clear(qpc_mask, QPC_SL);
+		hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+		return 0;
+	}
+
 	ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num : hr_qp->port + 1;
 	hr_port = ib_port - 1;
 	is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) &&
@@ -6258,6 +6573,7 @@  static const struct hns_roce_hw hns_roce_hw_v2 = {
 	.set_hem = hns_roce_v2_set_hem,
 	.clear_hem = hns_roce_v2_clear_hem,
 	.modify_qp = hns_roce_v2_modify_qp,
+	.dereg_mr = hns_roce_v2_dereg_mr,
 	.qp_flow_control_init = hns_roce_v2_qp_flow_control_init,
 	.init_eq = hns_roce_v2_init_eq_table,
 	.cleanup_eq = hns_roce_v2_cleanup_eq_table,
@@ -6339,14 +6655,25 @@  static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
 	ret = hns_roce_init(hr_dev);
 	if (ret) {
 		dev_err(hr_dev->dev, "RoCE Engine init failed!\n");
-		goto error_failed_get_cfg;
+		goto error_failed_cfg;
+	}
+
+	if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+		ret = free_mr_init(hr_dev);
+		if (ret) {
+			dev_err(hr_dev->dev, "failed to init free mr!\n");
+			goto error_failed_roce_init;
+		}
 	}
 
 	handle->priv = hr_dev;
 
 	return 0;
 
-error_failed_get_cfg:
+error_failed_roce_init:
+	hns_roce_exit(hr_dev);
+
+error_failed_cfg:
 	kfree(hr_dev->priv);
 
 error_failed_kzalloc:
@@ -6368,6 +6695,9 @@  static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
 	hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT;
 	hns_roce_handle_device_err(hr_dev);
 
+	if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+		free_mr_exit(hr_dev);
+
 	hns_roce_exit(hr_dev);
 	kfree(hr_dev->priv);
 	ib_dealloc_device(&hr_dev->ib_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 12be85f0986e..0d87b627601e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -139,6 +139,18 @@  enum {
 #define CMD_CSQ_DESC_NUM		1024
 #define CMD_CRQ_DESC_NUM		1024
 
+/* Free mr used parameters */
+#define HNS_ROCE_FREE_MR_USED_CQE_NUM		128
+#define HNS_ROCE_FREE_MR_USED_QP_NUM		0x8
+#define HNS_ROCE_FREE_MR_USED_PSN		0x0808
+#define HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT	0x7
+#define HNS_ROCE_FREE_MR_USED_QP_TIMEOUT	0x12
+#define HNS_ROCE_FREE_MR_USED_SQWQE_NUM		128
+#define HNS_ROCE_FREE_MR_USED_SQSGE_NUM		0x2
+#define HNS_ROCE_FREE_MR_USED_RQWQE_NUM		128
+#define HNS_ROCE_FREE_MR_USED_RQSGE_NUM		0x2
+#define HNS_ROCE_V2_FREE_MR_TIMEOUT		4500
+
 enum {
 	NO_ARMED = 0x0,
 	REG_NXT_CEQE = 0x2,
@@ -1418,10 +1430,18 @@  struct hns_roce_link_table {
 #define HNS_ROCE_EXT_LLM_ENTRY(addr, id) (((id) << (64 - 12)) | ((addr) >> 12))
 #define HNS_ROCE_EXT_LLM_MIN_PAGES(que_num) ((que_num) * 4 + 2)
 
+struct hns_roce_v2_free_mr {
+	struct ib_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM];
+	struct ib_cq *rsv_cq;
+	struct ib_pd *rsv_pd;
+	struct mutex mutex;
+};
+
 struct hns_roce_v2_priv {
 	struct hnae3_handle *handle;
 	struct hns_roce_v2_cmq cmq;
 	struct hns_roce_link_table ext_llm;
+	struct hns_roce_v2_free_mr free_mr;
 };
 
 struct hns_roce_dip {
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 2ee06b906b60..5bd3b19d3a05 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -137,8 +137,7 @@  static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
 	hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr);
 }
 
-static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
-			     struct hns_roce_mr *mr)
+static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
 {
 	struct ib_device *ibdev = &hr_dev->ib_dev;
 	int ret;
@@ -361,6 +360,9 @@  int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 	struct hns_roce_mr *mr = to_hr_mr(ibmr);
 	int ret = 0;
 
+	if (hr_dev->hw->dereg_mr)
+		hr_dev->hw->dereg_mr(hr_dev);
+
 	hns_roce_mr_free(hr_dev, mr);
 	kfree(mr);