diff mbox series

[rdma-core,5/5] libhns: Add support for XRC for HIP09

Message ID 1614689424-27154-6-git-send-email-liweihang@huawei.com (mailing list archive)
State Not Applicable
Headers show
Series libhns: Support XRC on HIP09 | expand

Commit Message

Weihang Li March 2, 2021, 12:50 p.m. UTC
From: Wenpeng Liang <liangwenpeng@huawei.com>

The HIP09 supports XRC transport service, it greatly saves the number of
QPs required to connect all processes in a large cluster.

Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 providers/hns/hns_roce_u.c       |   4 ++
 providers/hns/hns_roce_u.h       |  10 ++++
 providers/hns/hns_roce_u_hw_v2.c |  99 ++++++++++++++++++++++++----------
 providers/hns/hns_roce_u_hw_v2.h |   1 +
 providers/hns/hns_roce_u_verbs.c | 114 +++++++++++++++++++++++++++++++++++----
 5 files changed, 190 insertions(+), 38 deletions(-)
diff mbox series

Patch

diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index d103808b..ef8b14a 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -88,6 +88,10 @@  static const struct verbs_context_ops hns_common_ops = {
 	.free_context = hns_roce_free_context,
 	.create_ah = hns_roce_u_create_ah,
 	.destroy_ah = hns_roce_u_destroy_ah,
+	.open_xrcd = hns_roce_u_open_xrcd,
+	.close_xrcd = hns_roce_u_close_xrcd,
+	.open_qp = hns_roce_u_open_qp,
+	.get_srq_num = hns_roce_u_get_srq_num,
 };
 
 static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index de6d0f3..1e6e638 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -285,6 +285,7 @@  struct hns_roce_qp {
 
 	struct hns_roce_rinl_buf	rq_rinl_buf;
 	unsigned long			flags;
+	int				refcnt; /* specially used for XRC */
 };
 
 struct hns_roce_av {
@@ -388,6 +389,7 @@  struct ibv_srq *hns_roce_u_create_srq(struct ibv_pd *pd,
 				      struct ibv_srq_init_attr *srq_init_attr);
 struct ibv_srq *hns_roce_u_create_srq_ex(struct ibv_context *context,
 					 struct ibv_srq_init_attr_ex *attr);
+int hns_roce_u_get_srq_num(struct ibv_srq *ibv_srq, uint32_t *srq_num);
 int hns_roce_u_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr,
 			  int srq_attr_mask);
 int hns_roce_u_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr);
@@ -401,6 +403,9 @@  struct ibv_qp *
 hns_roce_u_create_qp_ex(struct ibv_context *context,
 			struct ibv_qp_init_attr_ex *qp_init_attr_ex);
 
+struct ibv_qp *hns_roce_u_open_qp(struct ibv_context *context,
+				  struct ibv_qp_open_attr *attr);
+
 int hns_roce_u_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
 			int attr_mask, struct ibv_qp_init_attr *init_attr);
 
@@ -408,6 +413,11 @@  struct ibv_ah *hns_roce_u_create_ah(struct ibv_pd *pd,
 				    struct ibv_ah_attr *attr);
 int hns_roce_u_destroy_ah(struct ibv_ah *ah);
 
+struct ibv_xrcd *
+hns_roce_u_open_xrcd(struct ibv_context *context,
+		     struct ibv_xrcd_init_attr *xrcd_init_attr);
+int hns_roce_u_close_xrcd(struct ibv_xrcd *ibv_xrcd);
+
 int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size,
 		       int page_size);
 void hns_roce_free_buf(struct hns_roce_buf *buf);
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 4d990dd..bbb5262 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -206,6 +206,9 @@  static void hns_roce_v2_handle_error_cqe(struct hns_roce_v2_cqe *cqe,
 	case HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR:
 		wc->status = IBV_WC_REM_ABORT_ERR;
 		break;
+	case HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR:
+		wc->status = IBV_WC_REM_INV_RD_REQ_ERR;
+		break;
 	default:
 		wc->status = IBV_WC_GENERAL_ERR;
 		break;
@@ -346,15 +349,17 @@  static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
 		return NULL;
 }
 
-static void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, uint32_t qpn)
+static void hns_roce_v2_clear_qp(struct hns_roce_context *ctx,
+				 struct hns_roce_qp *qp)
 {
-	int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
+	uint32_t qpn = qp->verbs_qp.qp.qp_num;
+	uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
 
 	pthread_mutex_lock(&ctx->qp_table_mutex);
 
 	if (!--ctx->qp_table[tind].refcnt)
 		free(ctx->qp_table[tind].table);
-	else
+	else if (!--qp->refcnt)
 		ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;
 
 	pthread_mutex_unlock(&ctx->qp_table_mutex);
@@ -510,13 +515,15 @@  static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
 static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
 				struct hns_roce_qp **cur_qp, struct ibv_wc *wc)
 {
-	uint32_t qpn;
-	int is_send;
-	uint16_t wqe_ctr;
+	struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context);
+	struct hns_roce_srq *srq = NULL;
 	struct hns_roce_wq *wq = NULL;
 	struct hns_roce_v2_cqe *cqe;
-	struct hns_roce_srq *srq;
+	uint16_t wqe_ctr;
 	uint32_t opcode;
+	uint32_t srqn;
+	uint32_t qpn;
+	int is_send;
 	int ret;
 
 	/* According to CI, find the relative cqe */
@@ -537,15 +544,23 @@  static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
 
 	/* if qp is zero, it will not get the correct qpn */
 	if (!*cur_qp || qpn != (*cur_qp)->verbs_qp.qp.qp_num) {
-		*cur_qp = hns_roce_v2_find_qp(to_hr_ctx(cq->ibv_cq.context),
-					      qpn);
+		*cur_qp = hns_roce_v2_find_qp(ctx, qpn);
 		if (!*cur_qp)
 			return V2_CQ_POLL_ERR;
 	}
 	wc->qp_num = qpn;
 
-	srq = (*cur_qp)->verbs_qp.qp.srq ?
-				to_hr_srq((*cur_qp)->verbs_qp.qp.srq) : NULL;
+	if ((*cur_qp)->verbs_qp.qp.qp_type == IBV_QPT_XRC_RECV) {
+		srqn = roce_get_field(cqe->byte_12, CQE_BYTE_12_XRC_SRQN_M,
+				      CQE_BYTE_12_XRC_SRQN_S);
+
+		srq = hns_roce_find_srq(ctx, srqn);
+		if (!srq)
+			return V2_CQ_POLL_ERR;
+	} else if ((*cur_qp)->verbs_qp.qp.srq) {
+		srq = to_hr_srq((*cur_qp)->verbs_qp.qp.srq);
+	}
+
 	if (is_send) {
 		wq = &(*cur_qp)->sq;
 		/*
@@ -683,6 +698,21 @@  static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
 	return 0;
 }
 
+static int check_qp_send(struct ibv_qp *qp, struct hns_roce_context *ctx)
+{
+	if (unlikely(qp->qp_type != IBV_QPT_RC &&
+		     qp->qp_type != IBV_QPT_UD) &&
+		     qp->qp_type != IBV_QPT_XRC_SEND)
+		return -EINVAL;
+
+	if (unlikely(qp->state == IBV_QPS_RESET ||
+		     qp->state == IBV_QPS_INIT ||
+		     qp->state == IBV_QPS_RTR))
+		return -EINVAL;
+
+	return 0;
+}
+
 static void set_sge(struct hns_roce_v2_wqe_data_seg *dseg,
 		    struct hns_roce_qp *qp, struct ibv_send_wr *wr,
 		    struct hns_roce_sge_info *sge_info)
@@ -913,8 +943,6 @@  static int set_ud_wqe(void *wqe, struct hns_roce_qp *qp,
 	struct hns_roce_ud_sq_wqe *ud_sq_wqe = wqe;
 	int ret = 0;
 
-	memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe));
-
 	roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_CQE_S,
 		     !!(wr->send_flags & IBV_SEND_SIGNALED));
 	roce_set_bit(ud_sq_wqe->rsv_opcode, UD_SQ_WQE_SE_S,
@@ -1071,8 +1099,6 @@  static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
 	struct hns_roce_v2_wqe_data_seg *dseg;
 	int ret;
 
-	memset(rc_sq_wqe, 0, sizeof(struct hns_roce_rc_sq_wqe));
-
 	ret = check_rc_opcode(rc_sq_wqe, wr);
 	if (ret)
 		return ret;
@@ -1137,16 +1163,15 @@  int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 	struct hns_roce_context *ctx = to_hr_ctx(ibvqp->context);
 	struct hns_roce_qp *qp = to_hr_qp(ibvqp);
 	struct hns_roce_sge_info sge_info = {};
+	struct hns_roce_rc_sq_wqe *wqe;
 	unsigned int wqe_idx, nreq;
 	struct ibv_qp_attr attr;
 	int ret = 0;
-	void *wqe;
 
-	/* check that state is OK to post send */
-	if (ibvqp->state == IBV_QPS_RESET || ibvqp->state == IBV_QPS_INIT ||
-	    ibvqp->state == IBV_QPS_RTR) {
+	ret = check_qp_send(ibvqp, ctx);
+	if (unlikely(ret)) {
 		*bad_wr = wr;
-		return EINVAL;
+		return ret;
 	}
 
 	pthread_spin_lock(&qp->sq.lock);
@@ -1172,6 +1197,12 @@  int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 		qp->sq.wrid[wqe_idx] = wr->wr_id;
 
 		switch (ibvqp->qp_type) {
+		case IBV_QPT_XRC_SEND:
+			roce_set_field(wqe->byte_16,
+				       RC_SQ_WQE_BYTE_16_XRC_SRQN_M,
+				       RC_SQ_WQE_BYTE_16_XRC_SRQN_S,
+				       wr->qp_type.xrc.remote_srqn);
+			SWITCH_FALLTHROUGH;
 		case IBV_QPT_RC:
 			ret = set_rc_wqe(wqe, qp, wr, nreq, &sge_info);
 			break;
@@ -1212,6 +1243,18 @@  out:
 	return ret;
 }
 
+static int check_qp_recv(struct ibv_qp *qp, struct hns_roce_context *ctx)
+{
+	if (unlikely(qp->qp_type != IBV_QPT_RC &&
+		     qp->qp_type != IBV_QPT_UD))
+		return -EINVAL;
+
+	if (qp->state == IBV_QPS_RESET || qp->srq)
+		return -EINVAL;
+
+	return 0;
+}
+
 static void fill_rq_wqe(struct hns_roce_qp *qp, struct ibv_recv_wr *wr,
 			unsigned int wqe_idx)
 {
@@ -1252,10 +1295,10 @@  static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
 	unsigned int max_sge;
 	int ret = 0;
 
-	/* check that state is OK to post receive */
-	if (ibvqp->state == IBV_QPS_RESET) {
+	ret = check_qp_recv(ibvqp, ctx);
+	if (unlikely(ret)) {
 		*bad_wr = wr;
-		return EINVAL;
+		return ret;
 	}
 
 	pthread_spin_lock(&qp->rq.lock);
@@ -1406,9 +1449,11 @@  static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 		qp->state = attr->qp_state;
 
 	if ((attr_mask & IBV_QP_STATE) && attr->qp_state == IBV_QPS_RESET) {
-		hns_roce_v2_cq_clean(to_hr_cq(qp->recv_cq), qp->qp_num,
-				     qp->srq ? to_hr_srq(qp->srq) : NULL);
-		if (qp->send_cq != qp->recv_cq)
+		if (qp->recv_cq)
+			hns_roce_v2_cq_clean(to_hr_cq(qp->recv_cq), qp->qp_num,
+					     qp->srq ? to_hr_srq(qp->srq) :
+					     NULL);
+		if (qp->send_cq && qp->send_cq != qp->recv_cq)
 			hns_roce_v2_cq_clean(to_hr_cq(qp->send_cq), qp->qp_num,
 					     NULL);
 
@@ -1474,7 +1519,7 @@  static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
 	if (ret)
 		return ret;
 
-	hns_roce_v2_clear_qp(ctx, ibqp->qp_num);
+	hns_roce_v2_clear_qp(ctx, qp);
 
 	hns_roce_lock_cqs(ibqp);
 
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index f5e6402..af67b31 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -112,6 +112,7 @@  enum {
 	HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR		= 0x15,
 	HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR		= 0x16,
 	HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR		= 0x22,
+	HNS_ROCE_V2_CQE_XRC_VIOLATION_ERR		= 0x24,
 };
 
 enum {
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index e51b9d5..fc3dd13 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -120,6 +120,41 @@  int hns_roce_u_free_pd(struct ibv_pd *pd)
 	return ret;
 }
 
+struct ibv_xrcd *hns_roce_u_open_xrcd(struct ibv_context *context,
+				      struct ibv_xrcd_init_attr *xrcd_init_attr)
+{
+	struct ib_uverbs_open_xrcd_resp resp = {};
+	struct ibv_open_xrcd cmd = {};
+	struct verbs_xrcd *xrcd;
+	int ret;
+
+	xrcd = calloc(1, sizeof(*xrcd));
+	if (!xrcd)
+		return NULL;
+
+	ret = ibv_cmd_open_xrcd(context, xrcd, sizeof(*xrcd), xrcd_init_attr,
+				&cmd, sizeof(cmd), &resp, sizeof(resp));
+	if (ret) {
+		free(xrcd);
+		return NULL;
+	}
+
+	return &xrcd->xrcd;
+}
+
+int hns_roce_u_close_xrcd(struct ibv_xrcd *ibv_xrcd)
+{
+	struct verbs_xrcd *xrcd =
+			container_of(ibv_xrcd, struct verbs_xrcd, xrcd);
+	int ret;
+
+	ret = ibv_cmd_close_xrcd(xrcd);
+	if (!ret)
+		free(xrcd);
+
+	return ret;
+}
+
 struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 				 uint64_t hca_va, int access)
 {
@@ -637,6 +672,13 @@  struct ibv_srq *hns_roce_u_create_srq_ex(struct ibv_context *context,
 	return create_srq(context, attr);
 }
 
+int hns_roce_u_get_srq_num(struct ibv_srq *ibv_srq, uint32_t *srq_num)
+{
+	*srq_num = to_hr_srq(ibv_srq)->srqn;
+
+	return 0;
+}
+
 int hns_roce_u_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr,
 			  int srq_attr_mask)
 {
@@ -686,14 +728,35 @@  int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq)
 }
 
 enum {
-	CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD,
+	CREATE_QP_SUP_COMP_MASK = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_XRCD,
 };
 
-static int check_qp_create_mask(struct ibv_qp_init_attr_ex *attr)
+static int check_qp_create_mask(struct hns_roce_context *ctx,
+				struct ibv_qp_init_attr_ex *attr)
 {
+	struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
+
 	if (!check_comp_mask(attr->comp_mask, CREATE_QP_SUP_COMP_MASK))
 		return -EOPNOTSUPP;
 
+	switch (attr->qp_type) {
+	case IBV_QPT_UD:
+		if (hr_dev->hw_version < HNS_ROCE_HW_VER3)
+			return -EINVAL;
+		SWITCH_FALLTHROUGH;
+	case IBV_QPT_RC:
+	case IBV_QPT_XRC_SEND:
+		if (!(attr->comp_mask & IBV_QP_INIT_ATTR_PD))
+			return -EINVAL;
+		break;
+	case IBV_QPT_XRC_RECV:
+		if (!(attr->comp_mask & IBV_QP_INIT_ATTR_XRCD))
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -704,8 +767,10 @@  static int verify_qp_create_cap(struct hns_roce_context *ctx,
 	struct ibv_qp_cap *cap = &attr->cap;
 	uint32_t min_wqe_num;
 
-	if (!cap->max_send_wr ||
-	    cap->max_send_wr > ctx->max_qp_wr ||
+	if (!cap->max_send_wr && attr->qp_type != IBV_QPT_XRC_RECV)
+		return -EINVAL;
+
+	if (cap->max_send_wr > ctx->max_qp_wr ||
 	    cap->max_recv_wr > ctx->max_qp_wr ||
 	    cap->max_send_sge > ctx->max_sge  ||
 	    cap->max_recv_sge > ctx->max_sge)
@@ -725,11 +790,6 @@  static int verify_qp_create_cap(struct hns_roce_context *ctx,
 			return -EINVAL;
 	}
 
-	if (!(attr->qp_type == IBV_QPT_RC ||
-	      (attr->qp_type == IBV_QPT_UD &&
-	       hr_dev->hw_version >= HNS_ROCE_HW_VER3)))
-		return -EOPNOTSUPP;
-
 	return 0;
 }
 
@@ -738,7 +798,7 @@  static int verify_qp_create_attr(struct hns_roce_context *ctx,
 {
 	int ret;
 
-	ret = check_qp_create_mask(attr);
+	ret = check_qp_create_mask(ctx, attr);
 	if (ret)
 		return ret;
 
@@ -998,7 +1058,8 @@  static int qp_alloc_db(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp,
 	return 0;
 }
 
-static int hns_roce_store_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp)
+static int hns_roce_store_qp(struct hns_roce_context *ctx,
+			     struct hns_roce_qp *qp)
 {
 	uint32_t qpn = qp->verbs_qp.qp.qp_num;
 	uint32_t tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
@@ -1013,6 +1074,7 @@  static int hns_roce_store_qp(struct hns_roce_context *ctx, struct hns_roce_qp *q
 		}
 	}
 
+	++qp->refcnt;
 	++ctx->qp_table[tind].refcnt;
 	ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;
 	pthread_mutex_unlock(&ctx->qp_table_mutex);
@@ -1162,6 +1224,36 @@  struct ibv_qp *hns_roce_u_create_qp_ex(struct ibv_context *context,
 	return create_qp(context, attr);
 }
 
+struct ibv_qp *hns_roce_u_open_qp(struct ibv_context *context,
+				  struct ibv_qp_open_attr *attr)
+{
+	struct ib_uverbs_create_qp_resp resp;
+	struct ibv_open_qp cmd;
+	struct hns_roce_qp *qp;
+	int ret;
+
+	qp = calloc(1, sizeof(*qp));
+	if (!qp)
+		return NULL;
+
+	ret = ibv_cmd_open_qp(context, &qp->verbs_qp, sizeof(qp->verbs_qp),
+			      attr, &cmd, sizeof(cmd), &resp, sizeof(resp));
+	if (ret)
+		goto err_buf;
+
+	ret = hns_roce_store_qp(to_hr_ctx(context), qp);
+	if (ret)
+		goto err_cmd;
+
+	return &qp->verbs_qp.qp;
+
+err_cmd:
+	ibv_cmd_destroy_qp(&qp->verbs_qp.qp);
+err_buf:
+	free(qp);
+	return NULL;
+}
+
 int hns_roce_u_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
 			int attr_mask, struct ibv_qp_init_attr *init_attr)
 {