diff mbox series

[v4,for-next,2/3] RDMA/hns: Add SCC context clr support for hip08

Message ID 1544002904-28495-3-git-send-email-oulijun@huawei.com (mailing list archive)
State Superseded
Headers show
Series Add DCQCN support for hip08 | expand

Commit Message

Lijun Ou Dec. 5, 2018, 9:41 a.m. UTC
From: Yangyang Li <liyangyang20@huawei.com>

This patch adds SCC context clear support for DCQCN
in kernel space driver.

Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_device.h |  3 ++
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 59 ++++++++++++++++++++++++++++-
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h  | 17 +++++++++
 drivers/infiniband/hw/hns/hns_roce_qp.c     |  9 +++++
 4 files changed, 87 insertions(+), 1 deletion(-)

Comments

Leon Romanovsky Dec. 5, 2018, 5:06 p.m. UTC | #1
On Wed, Dec 05, 2018 at 05:41:43PM +0800, Lijun Ou wrote:
> From: Yangyang Li <liyangyang20@huawei.com>
>
> This patch adds SCC context clear support for DCQCN
> in kernel space driver.
>
> Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_device.h |  3 ++
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 59 ++++++++++++++++++++++++++++-
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  | 17 +++++++++
>  drivers/infiniband/hw/hns/hns_roce_qp.c     |  9 +++++
>  4 files changed, 87 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
> index 718b415..6fe1871 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
> @@ -202,6 +202,7 @@ enum {
>  	HNS_ROCE_CAP_FLAG_SRQ			= BIT(5),
>  	HNS_ROCE_CAP_FLAG_MW			= BIT(7),
>  	HNS_ROCE_CAP_FLAG_FRMR                  = BIT(8),
> +	HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL		= BIT(9),
>  	HNS_ROCE_CAP_FLAG_ATOMIC		= BIT(10),
>  };
>
> @@ -867,6 +868,8 @@ struct hns_roce_hw {
>  			 int attr_mask, enum ib_qp_state cur_state,
>  			 enum ib_qp_state new_state);
>  	int (*destroy_qp)(struct ib_qp *ibqp);
> +	int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
> +			 struct hns_roce_qp *hr_qp);
>  	int (*post_send)(struct ib_qp *ibqp, const struct ib_send_wr *wr,
>  			 const struct ib_send_wr **bad_wr);
>  	int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index 77cfd9b..4520061 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -1392,7 +1392,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
>
>  	if (hr_dev->pci_dev->revision == 0x21) {
>  		caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC |
> -			       HNS_ROCE_CAP_FLAG_SRQ;
> +			       HNS_ROCE_CAP_FLAG_SRQ |
> +			       HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
>  		caps->scc_ctx_entry_sz	= HNS_ROCE_V2_SCC_CTX_ENTRY_SZ;
>  		caps->scc_ctx_ba_pg_sz	= 0;
>  		caps->scc_ctx_buf_pg_sz = 0;
> @@ -4210,6 +4211,61 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp)
>  	return 0;
>  }
>
> +static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
> +						struct hns_roce_qp *hr_qp)
> +{
> +	struct hns_roce_scc_ctx_clr *scc_cxt_clr;
> +	struct hns_roce_scc_ctx_clr_done *resp;
> +	struct hns_roce_scc_ctx_clr_done *rst;
> +	struct hns_roce_cmq_desc desc;
> +	int ret;
> +	int i;
> +
> +	/* set scc ctx clear done flag */
> +	hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCC_CTX,
> +				      false);
> +
> +	rst = (struct hns_roce_scc_ctx_clr_done *)desc.data;
> +	memset(rst, 0, sizeof(*rst));
> +	roce_set_bit(rst->rocee_scc_ctx_clr_done,
> +		     HNS_ROCE_V2_SCC_CTX_DONE_S,
> +		     0);
> +
> +	ret =  hns_roce_cmq_send(hr_dev, &desc, 1);
> +	if (ret)
> +		return ret;
> +
> +	/* clear scc context */
> +	hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_SCC_CTX_CLR,
> +				      false);
> +
> +	scc_cxt_clr = (struct hns_roce_scc_ctx_clr *)desc.data;
> +	memset(scc_cxt_clr, 0, sizeof(*scc_cxt_clr));
> +	scc_cxt_clr->rocee_scc_ctx_clr_qpn = hr_qp->qpn;
> +
> +	ret =  hns_roce_cmq_send(hr_dev, &desc, 1);
> +	if (ret)
> +		return ret;
> +
> +	/* query scc context clear is done or not */
> +	for (i = 0; i <= HNS_ROCE_CMQ_SCC_CLR_DONE_CNT; i++) {
> +		hns_roce_cmq_setup_basic_desc(&desc,
> +					      HNS_ROCE_OPC_QUERY_SCC_CTX, true);
> +		resp = (struct hns_roce_scc_ctx_clr_done *)desc.data;
> +		memset(resp, 0, sizeof(*resp));
> +
> +		ret = hns_roce_cmq_send(hr_dev, &desc, 1);
> +		if (ret)
> +			return ret;
> +
> +		if (resp->rocee_scc_ctx_clr_done == 1)
> +			return 0;

Sorry for my question, but I'm a little bit lost here.

You took pointer "resp", cleared the data and the resp->rocee_scc_ctx_clr_done.
Are you still expecting to see here resp->rocee_scc_ctx_clr_done = 1 ?

Thanks

> +	}
> +
> +	dev_err(hr_dev->dev, "clear scc ctx failure!");
> +	return -EINVAL;
> +}
> +
>  static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
>  {
>  	struct hns_roce_dev *hr_dev = to_hr_dev(cq->device);
> @@ -5740,6 +5796,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
>  	.modify_qp = hns_roce_v2_modify_qp,
>  	.query_qp = hns_roce_v2_query_qp,
>  	.destroy_qp = hns_roce_v2_destroy_qp,
> +	.qp_flow_control_init = hns_roce_v2_qp_flow_control_init,
>  	.modify_cq = hns_roce_v2_modify_cq,
>  	.post_send = hns_roce_v2_post_send,
>  	.post_recv = hns_roce_v2_post_recv,
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> index b92eb30..bd9f086 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> @@ -122,6 +122,8 @@
>  #define HNS_ROCE_CMQ_EN_B		16
>  #define HNS_ROCE_CMQ_ENABLE		BIT(HNS_ROCE_CMQ_EN_B)
>
> +#define HNS_ROCE_CMQ_SCC_CLR_DONE_CNT		100
> +
>  #define check_whether_last_step(hop_num, step_idx) \
>  	((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \
>  	(step_idx == 1 && hop_num == 1) || \
> @@ -227,6 +229,9 @@ enum hns_roce_opcode_type {
>  	HNS_ROCE_OPC_CFG_SGID_TB			= 0x8500,
>  	HNS_ROCE_OPC_CFG_SMAC_TB			= 0x8501,
>  	HNS_ROCE_OPC_CFG_BT_ATTR			= 0x8506,
> +	HNS_ROCE_OPC_SCC_CTX_CLR			= 0x8509,
> +	HNS_ROCE_OPC_QUERY_SCC_CTX			= 0x850a,
> +	HNS_ROCE_OPC_RESET_SCC_CTX			= 0x850b,
>  };
>
>  enum {
> @@ -1726,4 +1731,16 @@ struct hns_roce_wqe_atomic_seg {
>  	__le64          cmp_data;
>  };
>
> +#define HNS_ROCE_V2_SCC_CTX_DONE_S 0
> +
> +struct hns_roce_scc_ctx_clr {
> +	__le32 rocee_scc_ctx_clr_qpn;
> +	__le32 rsv[5];
> +};
> +
> +struct hns_roce_scc_ctx_clr_done {
> +	__le32 rocee_scc_ctx_clr_done;
> +	__le32 rsv[5];
> +};
> +
>  #endif
> diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
> index 895274a..abf29bf 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_qp.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
> @@ -812,6 +812,15 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>
>  		hr_qp->rdb_en = 1;
>  	}
> +
> +	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
> +		ret = hr_dev->hw->qp_flow_control_init(hr_dev, hr_qp);
> +		if (ret) {
> +			dev_err(hr_dev->dev, "qp flow control init failure!");
> +			goto err_qp;
> +		}
> +	}
> +
>  	hr_qp->event = hns_roce_ib_qp_event;
>
>  	return 0;
> --
> 1.9.1
>
Yangyang Li Dec. 6, 2018, 6:11 a.m. UTC | #2
On 2018/12/6 1:06, Leon Romanovsky wrote:
> On Wed, Dec 05, 2018 at 05:41:43PM +0800, Lijun Ou wrote:
>> From: Yangyang Li <liyangyang20@huawei.com>
>>
>> This patch adds SCC context clear support for DCQCN
>> in kernel space driver.
>>
>> Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
>> ---
>>  drivers/infiniband/hw/hns/hns_roce_device.h |  3 ++
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 59 ++++++++++++++++++++++++++++-
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  | 17 +++++++++
>>  drivers/infiniband/hw/hns/hns_roce_qp.c     |  9 +++++
>>  4 files changed, 87 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
>> index 718b415..6fe1871 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
>> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
>> @@ -202,6 +202,7 @@ enum {
>>  	HNS_ROCE_CAP_FLAG_SRQ			= BIT(5),
>>  	HNS_ROCE_CAP_FLAG_MW			= BIT(7),
>>  	HNS_ROCE_CAP_FLAG_FRMR                  = BIT(8),
>> +	HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL		= BIT(9),
>>  	HNS_ROCE_CAP_FLAG_ATOMIC		= BIT(10),
>>  };
>>
>> @@ -867,6 +868,8 @@ struct hns_roce_hw {
>>  			 int attr_mask, enum ib_qp_state cur_state,
>>  			 enum ib_qp_state new_state);
>>  	int (*destroy_qp)(struct ib_qp *ibqp);
>> +	int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
>> +			 struct hns_roce_qp *hr_qp);
>>  	int (*post_send)(struct ib_qp *ibqp, const struct ib_send_wr *wr,
>>  			 const struct ib_send_wr **bad_wr);
>>  	int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> index 77cfd9b..4520061 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> @@ -1392,7 +1392,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
>>
>>  	if (hr_dev->pci_dev->revision == 0x21) {
>>  		caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC |
>> -			       HNS_ROCE_CAP_FLAG_SRQ;
>> +			       HNS_ROCE_CAP_FLAG_SRQ |
>> +			       HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
>>  		caps->scc_ctx_entry_sz	= HNS_ROCE_V2_SCC_CTX_ENTRY_SZ;
>>  		caps->scc_ctx_ba_pg_sz	= 0;
>>  		caps->scc_ctx_buf_pg_sz = 0;
>> @@ -4210,6 +4211,61 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp)
>>  	return 0;
>>  }
>>
>> +static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
>> +						struct hns_roce_qp *hr_qp)
>> +{
>> +	struct hns_roce_scc_ctx_clr *scc_cxt_clr;
>> +	struct hns_roce_scc_ctx_clr_done *resp;
>> +	struct hns_roce_scc_ctx_clr_done *rst;
>> +	struct hns_roce_cmq_desc desc;
>> +	int ret;
>> +	int i;
>> +
>> +	/* set scc ctx clear done flag */
>> +	hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCC_CTX,
>> +				      false);
>> +
>> +	rst = (struct hns_roce_scc_ctx_clr_done *)desc.data;
>> +	memset(rst, 0, sizeof(*rst));
>> +	roce_set_bit(rst->rocee_scc_ctx_clr_done,
>> +		     HNS_ROCE_V2_SCC_CTX_DONE_S,
>> +		     0);
>> +
>> +	ret =  hns_roce_cmq_send(hr_dev, &desc, 1);
>> +	if (ret)
>> +		return ret;
>> +
>> +	/* clear scc context */
>> +	hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_SCC_CTX_CLR,
>> +				      false);
>> +
>> +	scc_cxt_clr = (struct hns_roce_scc_ctx_clr *)desc.data;
>> +	memset(scc_cxt_clr, 0, sizeof(*scc_cxt_clr));
>> +	scc_cxt_clr->rocee_scc_ctx_clr_qpn = hr_qp->qpn;
>> +
>> +	ret =  hns_roce_cmq_send(hr_dev, &desc, 1);
>> +	if (ret)
>> +		return ret;
>> +
>> +	/* query scc context clear is done or not */
>> +	for (i = 0; i <= HNS_ROCE_CMQ_SCC_CLR_DONE_CNT; i++) {
>> +		hns_roce_cmq_setup_basic_desc(&desc,
>> +					      HNS_ROCE_OPC_QUERY_SCC_CTX, true);
>> +		resp = (struct hns_roce_scc_ctx_clr_done *)desc.data;
>> +		memset(resp, 0, sizeof(*resp));
>> +
>> +		ret = hns_roce_cmq_send(hr_dev, &desc, 1);
>> +		if (ret)
>> +			return ret;
>> +
>> +		if (resp->rocee_scc_ctx_clr_done == 1)
>> +			return 0;
> 
> Sorry for my question, but I'm a little bit lost here.
> 
> You took pointer "resp", cleared the data and the resp->rocee_scc_ctx_clr_done.
> Are you still expecting to see here resp->rocee_scc_ctx_clr_done = 1 ?
"resp" point to "desc.data" which will be sent to hardware through
function "hns_roce_cmq_send".
resp->rocee_scc_ctx_clr_done will be set to 1 by hardware if
scc context clear successful. If not, continue the query until the
maximum number of queries is reached HNS_ROCE_CMQ_SCC_CLR_DONE_CNT.

Thanks.

> 
> Thanks
> 
>> +	}
>> +
>> +	dev_err(hr_dev->dev, "clear scc ctx failure!");
>> +	return -EINVAL;
>> +}
>> +
>>  static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
>>  {
>>  	struct hns_roce_dev *hr_dev = to_hr_dev(cq->device);
>> @@ -5740,6 +5796,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
>>  	.modify_qp = hns_roce_v2_modify_qp,
>>  	.query_qp = hns_roce_v2_query_qp,
>>  	.destroy_qp = hns_roce_v2_destroy_qp,
>> +	.qp_flow_control_init = hns_roce_v2_qp_flow_control_init,
>>  	.modify_cq = hns_roce_v2_modify_cq,
>>  	.post_send = hns_roce_v2_post_send,
>>  	.post_recv = hns_roce_v2_post_recv,
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
>> index b92eb30..bd9f086 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
>> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
>> @@ -122,6 +122,8 @@
>>  #define HNS_ROCE_CMQ_EN_B		16
>>  #define HNS_ROCE_CMQ_ENABLE		BIT(HNS_ROCE_CMQ_EN_B)
>>
>> +#define HNS_ROCE_CMQ_SCC_CLR_DONE_CNT		100
>> +
>>  #define check_whether_last_step(hop_num, step_idx) \
>>  	((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \
>>  	(step_idx == 1 && hop_num == 1) || \
>> @@ -227,6 +229,9 @@ enum hns_roce_opcode_type {
>>  	HNS_ROCE_OPC_CFG_SGID_TB			= 0x8500,
>>  	HNS_ROCE_OPC_CFG_SMAC_TB			= 0x8501,
>>  	HNS_ROCE_OPC_CFG_BT_ATTR			= 0x8506,
>> +	HNS_ROCE_OPC_SCC_CTX_CLR			= 0x8509,
>> +	HNS_ROCE_OPC_QUERY_SCC_CTX			= 0x850a,
>> +	HNS_ROCE_OPC_RESET_SCC_CTX			= 0x850b,
>>  };
>>
>>  enum {
>> @@ -1726,4 +1731,16 @@ struct hns_roce_wqe_atomic_seg {
>>  	__le64          cmp_data;
>>  };
>>
>> +#define HNS_ROCE_V2_SCC_CTX_DONE_S 0
>> +
>> +struct hns_roce_scc_ctx_clr {
>> +	__le32 rocee_scc_ctx_clr_qpn;
>> +	__le32 rsv[5];
>> +};
>> +
>> +struct hns_roce_scc_ctx_clr_done {
>> +	__le32 rocee_scc_ctx_clr_done;
>> +	__le32 rsv[5];
>> +};
>> +
>>  #endif
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
>> index 895274a..abf29bf 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_qp.c
>> +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
>> @@ -812,6 +812,15 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>>
>>  		hr_qp->rdb_en = 1;
>>  	}
>> +
>> +	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
>> +		ret = hr_dev->hw->qp_flow_control_init(hr_dev, hr_qp);
>> +		if (ret) {
>> +			dev_err(hr_dev->dev, "qp flow control init failure!");
>> +			goto err_qp;
>> +		}
>> +	}
>> +
>>  	hr_qp->event = hns_roce_ib_qp_event;
>>
>>  	return 0;
>> --
>> 1.9.1
>>
Leon Romanovsky Dec. 6, 2018, 6:49 a.m. UTC | #3
On Thu, Dec 06, 2018 at 02:11:44PM +0800, liyangyang (M) wrote:
> On 2018/12/6 1:06, Leon Romanovsky wrote:
> > On Wed, Dec 05, 2018 at 05:41:43PM +0800, Lijun Ou wrote:
> >> From: Yangyang Li <liyangyang20@huawei.com>
> >>
> >> This patch adds SCC context clear support for DCQCN
> >> in kernel space driver.
> >>
> >> Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
> >> ---
> >>  drivers/infiniband/hw/hns/hns_roce_device.h |  3 ++
> >>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 59 ++++++++++++++++++++++++++++-
> >>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  | 17 +++++++++
> >>  drivers/infiniband/hw/hns/hns_roce_qp.c     |  9 +++++
> >>  4 files changed, 87 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
> >> index 718b415..6fe1871 100644
> >> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
> >> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
> >> @@ -202,6 +202,7 @@ enum {
> >>  	HNS_ROCE_CAP_FLAG_SRQ			= BIT(5),
> >>  	HNS_ROCE_CAP_FLAG_MW			= BIT(7),
> >>  	HNS_ROCE_CAP_FLAG_FRMR                  = BIT(8),
> >> +	HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL		= BIT(9),
> >>  	HNS_ROCE_CAP_FLAG_ATOMIC		= BIT(10),
> >>  };
> >>
> >> @@ -867,6 +868,8 @@ struct hns_roce_hw {
> >>  			 int attr_mask, enum ib_qp_state cur_state,
> >>  			 enum ib_qp_state new_state);
> >>  	int (*destroy_qp)(struct ib_qp *ibqp);
> >> +	int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
> >> +			 struct hns_roce_qp *hr_qp);
> >>  	int (*post_send)(struct ib_qp *ibqp, const struct ib_send_wr *wr,
> >>  			 const struct ib_send_wr **bad_wr);
> >>  	int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> >> index 77cfd9b..4520061 100644
> >> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> >> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> >> @@ -1392,7 +1392,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
> >>
> >>  	if (hr_dev->pci_dev->revision == 0x21) {
> >>  		caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC |
> >> -			       HNS_ROCE_CAP_FLAG_SRQ;
> >> +			       HNS_ROCE_CAP_FLAG_SRQ |
> >> +			       HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
> >>  		caps->scc_ctx_entry_sz	= HNS_ROCE_V2_SCC_CTX_ENTRY_SZ;
> >>  		caps->scc_ctx_ba_pg_sz	= 0;
> >>  		caps->scc_ctx_buf_pg_sz = 0;
> >> @@ -4210,6 +4211,61 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp)
> >>  	return 0;
> >>  }
> >>
> >> +static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
> >> +						struct hns_roce_qp *hr_qp)
> >> +{
> >> +	struct hns_roce_scc_ctx_clr *scc_cxt_clr;
> >> +	struct hns_roce_scc_ctx_clr_done *resp;
> >> +	struct hns_roce_scc_ctx_clr_done *rst;
> >> +	struct hns_roce_cmq_desc desc;
> >> +	int ret;
> >> +	int i;
> >> +
> >> +	/* set scc ctx clear done flag */
> >> +	hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCC_CTX,
> >> +				      false);
> >> +
> >> +	rst = (struct hns_roce_scc_ctx_clr_done *)desc.data;
> >> +	memset(rst, 0, sizeof(*rst));
> >> +	roce_set_bit(rst->rocee_scc_ctx_clr_done,
> >> +		     HNS_ROCE_V2_SCC_CTX_DONE_S,
> >> +		     0);
> >> +
> >> +	ret =  hns_roce_cmq_send(hr_dev, &desc, 1);
> >> +	if (ret)
> >> +		return ret;
> >> +
> >> +	/* clear scc context */
> >> +	hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_SCC_CTX_CLR,
> >> +				      false);
> >> +
> >> +	scc_cxt_clr = (struct hns_roce_scc_ctx_clr *)desc.data;
> >> +	memset(scc_cxt_clr, 0, sizeof(*scc_cxt_clr));
> >> +	scc_cxt_clr->rocee_scc_ctx_clr_qpn = hr_qp->qpn;
> >> +
> >> +	ret =  hns_roce_cmq_send(hr_dev, &desc, 1);
> >> +	if (ret)
> >> +		return ret;
> >> +
> >> +	/* query scc context clear is done or not */
> >> +	for (i = 0; i <= HNS_ROCE_CMQ_SCC_CLR_DONE_CNT; i++) {
> >> +		hns_roce_cmq_setup_basic_desc(&desc,
> >> +					      HNS_ROCE_OPC_QUERY_SCC_CTX, true);
> >> +		resp = (struct hns_roce_scc_ctx_clr_done *)desc.data;
> >> +		memset(resp, 0, sizeof(*resp));
> >> +
> >> +		ret = hns_roce_cmq_send(hr_dev, &desc, 1);
> >> +		if (ret)
> >> +			return ret;
> >> +
> >> +		if (resp->rocee_scc_ctx_clr_done == 1)
> >> +			return 0;
> >
> > Sorry for my question, but I'm a little bit lost here.
> >
> > You took pointer "resp", cleared the data and the resp->rocee_scc_ctx_clr_done.
> > Are you still expecting to see here resp->rocee_scc_ctx_clr_done = 1 ?
> "resp" point to "desc.data" which will be sent to hardware through
> function "hns_roce_cmq_send".
> resp->rocee_scc_ctx_clr_done will be set to 1 by hardware if
> scc context clear successful. If not, continue the query until the
> maximum number of queries is reached HNS_ROCE_CMQ_SCC_CLR_DONE_CNT.

I'm not expert in this area, but it looks like you need some sort of
memory fence and ensure that memory in-sync between device and CPU.

Thanks
Jason Gunthorpe Dec. 6, 2018, 5:57 p.m. UTC | #4
On Thu, Dec 06, 2018 at 08:49:43AM +0200, Leon Romanovsky wrote:

> > "resp" point to "desc.data" which will be sent to hardware through
> > function "hns_roce_cmq_send".
> > resp->rocee_scc_ctx_clr_done will be set to 1 by hardware if
> > scc context clear successful. If not, continue the query until the
> > maximum number of queries is reached HNS_ROCE_CMQ_SCC_CLR_DONE_CNT.
> 
> I'm not expert in this area, but it looks like you need some sort of
> memory fence and ensure that memory in-sync between device and CPU.

It looks like there is a readl on this path and readl is serializing

Jason
Yangyang Li Dec. 10, 2018, 1:13 p.m. UTC | #5
Hi, Leon and Jason:

Thanks a lot for yours reply.

On 2018/12/7 1:57, Jason Gunthorpe wrote:
> On Thu, Dec 06, 2018 at 08:49:43AM +0200, Leon Romanovsky wrote:
> 
>>> "resp" point to "desc.data" which will be sent to hardware through
>>> function "hns_roce_cmq_send".
>>> resp->rocee_scc_ctx_clr_done will be set to 1 by hardware if
>>> scc context clear successful. If not, continue the query until the
>>> maximum number of queries is reached HNS_ROCE_CMQ_SCC_CLR_DONE_CNT.
>>
>> I'm not expert in this area, but it looks like you need some sort of
>> memory fence and ensure that memory in-sync between device and CPU.
> 
> It looks like there is a readl on this path and readl is serializing
> 
> Jason
> 
As Jason said, there is a readl on this path and readl is serializing.
Thanks to your reminder, I found another problem. In a multi-concurrent
scenario, the scc ctx done flag may be overwritten by other threads.
This is because the hardware does not map the scc ctx done flag to qpn.
So spinlock is needed. The adjusted function is as follows:

+static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
+                                               struct hns_roce_qp *hr_qp)
+{
+       struct hns_roce_sccc_clr_done *rst, *resp;
+       struct hns_roce_sccc_clr *clr;
+       struct hns_roce_cmq_desc desc;
+       int ret, i;
+
+       spin_lock_bh(&hr_dev->qp_table.scc_lock);
+
+       /* set scc ctx clear done flag */
+       hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false);
+       rst = (struct hns_roce_sccc_clr_done *)desc.data;
+       ret =  hns_roce_cmq_send(hr_dev, &desc, 1);
+       if (ret) {
+               dev_err(hr_dev->dev, "reset SCC ctx  failed(%d)\n", ret);
+               goto out;
+       }
+
+       /* clear scc context */
+       hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CLR_SCCC, false);
+       clr = (struct hns_roce_sccc_clr *)desc.data;
+       clr->qpn = cpu_to_le32(hr_qp->qpn);
+       ret =  hns_roce_cmq_send(hr_dev, &desc, 1);
+       if (ret) {
+               dev_err(hr_dev->dev, "clear SCC ctx failed(%d)\n", ret);
+               goto out;
+       }
+
+       /* query scc context clear is done or not */
+       resp = (struct hns_roce_sccc_clr_done *)desc.data;
+       for (i = 0; i <= HNS_ROCE_CMQ_SCC_CLR_DONE_CNT; i++) {
+               hns_roce_cmq_setup_basic_desc(&desc,
+                                             HNS_ROCE_OPC_QUERY_SCCC, true);
+               ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+               if (ret) {
+                       dev_err(hr_dev->dev, "query clr cmq failed(%d)\n", ret);
+                       goto out;
+               }
+
+               if (resp->clr_done) {
+                       ret = 0;
+                       goto out;
+               }
+
+               mdelay(1);
+       }
+
+       dev_err(hr_dev->dev, "query SCC clr done flag overtime.\n");
+       ret = -ETIMEDOUT;
+
+out:
+       spin_unlock_bh(&hr_dev->qp_table.scc_lock);
+       return ret;
+}
+

---
v4->v5:
1.Modify some variabile names in hns_roce_v2_qp_flow_control_init.
2.Redundant memset deletion in hns_roce_v2_qp_flow_control_init,
  Because hns_roce_cmq_setup_basic_desc has zeroed out the desc.
3.Add spinlock to hns_roce_v2_qp_flow_control_init.
4.Add mdelay to query_scc_clr_done in hns_roce_v2_qp_flow_control_init.
5.Add dev_err in hns_roce_v2_qp_flow_control_init.
---

Thanks
>
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 718b415..6fe1871 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -202,6 +202,7 @@  enum {
 	HNS_ROCE_CAP_FLAG_SRQ			= BIT(5),
 	HNS_ROCE_CAP_FLAG_MW			= BIT(7),
 	HNS_ROCE_CAP_FLAG_FRMR                  = BIT(8),
+	HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL		= BIT(9),
 	HNS_ROCE_CAP_FLAG_ATOMIC		= BIT(10),
 };
 
@@ -867,6 +868,8 @@  struct hns_roce_hw {
 			 int attr_mask, enum ib_qp_state cur_state,
 			 enum ib_qp_state new_state);
 	int (*destroy_qp)(struct ib_qp *ibqp);
+	int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
+			 struct hns_roce_qp *hr_qp);
 	int (*post_send)(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 			 const struct ib_send_wr **bad_wr);
 	int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 77cfd9b..4520061 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1392,7 +1392,8 @@  static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 
 	if (hr_dev->pci_dev->revision == 0x21) {
 		caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC |
-			       HNS_ROCE_CAP_FLAG_SRQ;
+			       HNS_ROCE_CAP_FLAG_SRQ |
+			       HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
 		caps->scc_ctx_entry_sz	= HNS_ROCE_V2_SCC_CTX_ENTRY_SZ;
 		caps->scc_ctx_ba_pg_sz	= 0;
 		caps->scc_ctx_buf_pg_sz = 0;
@@ -4210,6 +4211,61 @@  static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp)
 	return 0;
 }
 
+static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
+						struct hns_roce_qp *hr_qp)
+{
+	struct hns_roce_scc_ctx_clr *scc_cxt_clr;
+	struct hns_roce_scc_ctx_clr_done *resp;
+	struct hns_roce_scc_ctx_clr_done *rst;
+	struct hns_roce_cmq_desc desc;
+	int ret;
+	int i;
+
+	/* set scc ctx clear done flag */
+	hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCC_CTX,
+				      false);
+
+	rst = (struct hns_roce_scc_ctx_clr_done *)desc.data;
+	memset(rst, 0, sizeof(*rst));
+	roce_set_bit(rst->rocee_scc_ctx_clr_done,
+		     HNS_ROCE_V2_SCC_CTX_DONE_S,
+		     0);
+
+	ret =  hns_roce_cmq_send(hr_dev, &desc, 1);
+	if (ret)
+		return ret;
+
+	/* clear scc context */
+	hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_SCC_CTX_CLR,
+				      false);
+
+	scc_cxt_clr = (struct hns_roce_scc_ctx_clr *)desc.data;
+	memset(scc_cxt_clr, 0, sizeof(*scc_cxt_clr));
+	scc_cxt_clr->rocee_scc_ctx_clr_qpn = hr_qp->qpn;
+
+	ret =  hns_roce_cmq_send(hr_dev, &desc, 1);
+	if (ret)
+		return ret;
+
+	/* query scc context clear is done or not */
+	for (i = 0; i <= HNS_ROCE_CMQ_SCC_CLR_DONE_CNT; i++) {
+		hns_roce_cmq_setup_basic_desc(&desc,
+					      HNS_ROCE_OPC_QUERY_SCC_CTX, true);
+		resp = (struct hns_roce_scc_ctx_clr_done *)desc.data;
+		memset(resp, 0, sizeof(*resp));
+
+		ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+		if (ret)
+			return ret;
+
+		if (resp->rocee_scc_ctx_clr_done == 1)
+			return 0;
+	}
+
+	dev_err(hr_dev->dev, "clear scc ctx failure!");
+	return -EINVAL;
+}
+
 static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(cq->device);
@@ -5740,6 +5796,7 @@  static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
 	.modify_qp = hns_roce_v2_modify_qp,
 	.query_qp = hns_roce_v2_query_qp,
 	.destroy_qp = hns_roce_v2_destroy_qp,
+	.qp_flow_control_init = hns_roce_v2_qp_flow_control_init,
 	.modify_cq = hns_roce_v2_modify_cq,
 	.post_send = hns_roce_v2_post_send,
 	.post_recv = hns_roce_v2_post_recv,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index b92eb30..bd9f086 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -122,6 +122,8 @@ 
 #define HNS_ROCE_CMQ_EN_B		16
 #define HNS_ROCE_CMQ_ENABLE		BIT(HNS_ROCE_CMQ_EN_B)
 
+#define HNS_ROCE_CMQ_SCC_CLR_DONE_CNT		100
+
 #define check_whether_last_step(hop_num, step_idx) \
 	((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \
 	(step_idx == 1 && hop_num == 1) || \
@@ -227,6 +229,9 @@  enum hns_roce_opcode_type {
 	HNS_ROCE_OPC_CFG_SGID_TB			= 0x8500,
 	HNS_ROCE_OPC_CFG_SMAC_TB			= 0x8501,
 	HNS_ROCE_OPC_CFG_BT_ATTR			= 0x8506,
+	HNS_ROCE_OPC_SCC_CTX_CLR			= 0x8509,
+	HNS_ROCE_OPC_QUERY_SCC_CTX			= 0x850a,
+	HNS_ROCE_OPC_RESET_SCC_CTX			= 0x850b,
 };
 
 enum {
@@ -1726,4 +1731,16 @@  struct hns_roce_wqe_atomic_seg {
 	__le64          cmp_data;
 };
 
+#define HNS_ROCE_V2_SCC_CTX_DONE_S 0
+
+struct hns_roce_scc_ctx_clr {
+	__le32 rocee_scc_ctx_clr_qpn;
+	__le32 rsv[5];
+};
+
+struct hns_roce_scc_ctx_clr_done {
+	__le32 rocee_scc_ctx_clr_done;
+	__le32 rsv[5];
+};
+
 #endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 895274a..abf29bf 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -812,6 +812,15 @@  static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 
 		hr_qp->rdb_en = 1;
 	}
+
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
+		ret = hr_dev->hw->qp_flow_control_init(hr_dev, hr_qp);
+		if (ret) {
+			dev_err(hr_dev->dev, "qp flow control init failure!");
+			goto err_qp;
+		}
+	}
+
 	hr_qp->event = hns_roce_ib_qp_event;
 
 	return 0;