diff mbox series

[for-next,2/6] IB/hfi1: Add OPFN helper functions for TID RDMA feature

Message ID 20190124032036.29585.43677.stgit@scvm10.sc.intel.com (mailing list archive)
State Accepted
Headers show
Series IB/hfi1: Add OPFN | expand

Commit Message

Dennis Dalessandro Jan. 24, 2019, 3:20 a.m. UTC
From: Kaike Wan <kaike.wan@intel.com>

This patch adds the OPFN helper functions to initialize, encode, decode,
and reset OPFN parameters for the TID RDMA feature.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
---
 drivers/infiniband/hw/hfi1/chip.c     |   11 ++
 drivers/infiniband/hw/hfi1/chip.h     |    3 
 drivers/infiniband/hw/hfi1/init.c     |    2 
 drivers/infiniband/hw/hfi1/opfn.h     |    5 +
 drivers/infiniband/hw/hfi1/tid_rdma.c |  202 +++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/hfi1/tid_rdma.h |   28 ++++-
 drivers/infiniband/hw/hfi1/verbs.h    |    5 +
 7 files changed, 254 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index b443642..4d40311 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -5222,6 +5222,17 @@  int is_bx(struct hfi1_devdata *dd)
 	return (chip_rev_minor & 0xF0) == 0x10;
 }
 
+/* return true is kernel urg disabled for rcd */
+bool is_urg_masked(struct hfi1_ctxtdata *rcd)
+{
+	u64 mask;
+	u32 is = IS_RCVURGENT_START + rcd->ctxt;
+	u8 bit = is % 64;
+
+	mask = read_csr(rcd->dd, CCE_INT_MASK + (8 * (is / 64)));
+	return !(mask & BIT_ULL(bit));
+}
+
 /*
  * Append string s to buffer buf.  Arguments curp and len are the current
  * position and remaining length, respectively.
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 6b9c8f1..ba3d99e 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -1,7 +1,7 @@ 
 #ifndef _CHIP_H
 #define _CHIP_H
 /*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -804,6 +804,7 @@  void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
 u32 hdrqempty(struct hfi1_ctxtdata *rcd);
 int is_ax(struct hfi1_devdata *dd);
 int is_bx(struct hfi1_devdata *dd);
+bool is_urg_masked(struct hfi1_ctxtdata *rcd);
 u32 read_physical_state(struct hfi1_devdata *dd);
 u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
 const char *opa_lstate_name(u32 lstate);
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 0904490..f1d0c95 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -927,6 +927,8 @@  int hfi1_init(struct hfi1_devdata *dd, int reinit)
 		lastfail = hfi1_create_rcvhdrq(dd, rcd);
 		if (!lastfail)
 			lastfail = hfi1_setup_eagerbufs(rcd);
+		if (!lastfail)
+			lastfail = hfi1_kern_exp_rcv_init(rcd, reinit);
 		if (lastfail) {
 			dd_dev_err(dd,
 				   "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
diff --git a/drivers/infiniband/hw/hfi1/opfn.h b/drivers/infiniband/hw/hfi1/opfn.h
index 1a2b344..1927c98 100644
--- a/drivers/infiniband/hw/hfi1/opfn.h
+++ b/drivers/infiniband/hw/hfi1/opfn.h
@@ -50,4 +50,9 @@ 
 /* STL Verbs Extended */
 #define IB_BTHE_E_SHIFT           24
 
+struct hfi1_opfn_data {
+	/* serialize opfn function calls */
+	spinlock_t lock;
+};
+
 #endif /* _HFI1_OPFN_H */
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index da1ecb6..a8fd66f 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -8,6 +8,208 @@ 
 #include "verbs.h"
 #include "tid_rdma.h"
 
+/*
+ * J_KEY for kernel contexts when TID RDMA is used.
+ * See generate_jkey() in hfi.h for more information.
+ */
+#define TID_RDMA_JKEY                   32
+#define HFI1_KERNEL_MIN_JKEY HFI1_ADMIN_JKEY_RANGE
+#define HFI1_KERNEL_MAX_JKEY (2 * HFI1_ADMIN_JKEY_RANGE - 1)
+
+#define TID_RDMA_MAX_READ_SEGS_PER_REQ  6
+#define TID_RDMA_MAX_WRITE_SEGS_PER_REQ 4
+
+#define TID_OPFN_QP_CTXT_MASK 0xff
+#define TID_OPFN_QP_CTXT_SHIFT 56
+#define TID_OPFN_QP_KDETH_MASK 0xff
+#define TID_OPFN_QP_KDETH_SHIFT 48
+#define TID_OPFN_MAX_LEN_MASK 0x7ff
+#define TID_OPFN_MAX_LEN_SHIFT 37
+#define TID_OPFN_TIMEOUT_MASK 0x1f
+#define TID_OPFN_TIMEOUT_SHIFT 32
+#define TID_OPFN_RESERVED_MASK 0x3f
+#define TID_OPFN_RESERVED_SHIFT 26
+#define TID_OPFN_URG_MASK 0x1
+#define TID_OPFN_URG_SHIFT 25
+#define TID_OPFN_VER_MASK 0x7
+#define TID_OPFN_VER_SHIFT 22
+#define TID_OPFN_JKEY_MASK 0x3f
+#define TID_OPFN_JKEY_SHIFT 16
+#define TID_OPFN_MAX_READ_MASK 0x3f
+#define TID_OPFN_MAX_READ_SHIFT 10
+#define TID_OPFN_MAX_WRITE_MASK 0x3f
+#define TID_OPFN_MAX_WRITE_SHIFT 4
+
+/*
+ * OPFN TID layout
+ *
+ * 63               47               31               15
+ * NNNNNNNNKKKKKKKK MMMMMMMMMMMTTTTT DDDDDDUVVVJJJJJJ RRRRRRWWWWWWCCCC
+ * 3210987654321098 7654321098765432 1098765432109876 5432109876543210
+ * N - the context Number
+ * K - the Kdeth_qp
+ * M - Max_len
+ * T - Timeout
+ * D - reserveD
+ * V - version
+ * U - Urg capable
+ * J - Jkey
+ * R - max_Read
+ * W - max_Write
+ * C - Capcode
+ */
+
+static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
+{
+	return
+		(((u64)p->qp & TID_OPFN_QP_CTXT_MASK) <<
+			TID_OPFN_QP_CTXT_SHIFT) |
+		((((u64)p->qp >> 16) & TID_OPFN_QP_KDETH_MASK) <<
+			TID_OPFN_QP_KDETH_SHIFT) |
+		(((u64)((p->max_len >> PAGE_SHIFT) - 1) &
+			TID_OPFN_MAX_LEN_MASK) << TID_OPFN_MAX_LEN_SHIFT) |
+		(((u64)p->timeout & TID_OPFN_TIMEOUT_MASK) <<
+			TID_OPFN_TIMEOUT_SHIFT) |
+		(((u64)p->urg & TID_OPFN_URG_MASK) << TID_OPFN_URG_SHIFT) |
+		(((u64)p->jkey & TID_OPFN_JKEY_MASK) << TID_OPFN_JKEY_SHIFT) |
+		(((u64)p->max_read & TID_OPFN_MAX_READ_MASK) <<
+			TID_OPFN_MAX_READ_SHIFT) |
+		(((u64)p->max_write & TID_OPFN_MAX_WRITE_MASK) <<
+			TID_OPFN_MAX_WRITE_SHIFT);
+}
+
+static void tid_rdma_opfn_decode(struct tid_rdma_params *p, u64 data)
+{
+	p->max_len = (((data >> TID_OPFN_MAX_LEN_SHIFT) &
+		TID_OPFN_MAX_LEN_MASK) + 1) << PAGE_SHIFT;
+	p->jkey = (data >> TID_OPFN_JKEY_SHIFT) & TID_OPFN_JKEY_MASK;
+	p->max_write = (data >> TID_OPFN_MAX_WRITE_SHIFT) &
+		TID_OPFN_MAX_WRITE_MASK;
+	p->max_read = (data >> TID_OPFN_MAX_READ_SHIFT) &
+		TID_OPFN_MAX_READ_MASK;
+	p->qp =
+		((((data >> TID_OPFN_QP_KDETH_SHIFT) & TID_OPFN_QP_KDETH_MASK)
+			<< 16) |
+		((data >> TID_OPFN_QP_CTXT_SHIFT) & TID_OPFN_QP_CTXT_MASK));
+	p->urg = (data >> TID_OPFN_URG_SHIFT) & TID_OPFN_URG_MASK;
+	p->timeout = (data >> TID_OPFN_TIMEOUT_SHIFT) & TID_OPFN_TIMEOUT_MASK;
+}
+
+void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	p->qp = (kdeth_qp << 16) | priv->rcd->ctxt;
+	p->max_len = TID_RDMA_MAX_SEGMENT_SIZE;
+	p->jkey = priv->rcd->jkey;
+	p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ;
+	p->max_write = TID_RDMA_MAX_WRITE_SEGS_PER_REQ;
+	p->timeout = qp->timeout;
+	p->urg = is_urg_masked(priv->rcd);
+}
+
+bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	*data = tid_rdma_opfn_encode(&priv->tid_rdma.local);
+	return true;
+}
+
+bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+	struct tid_rdma_params *remote, *old;
+	bool ret = true;
+
+	old = rcu_dereference_protected(priv->tid_rdma.remote,
+					lockdep_is_held(&priv->opfn.lock));
+	data &= ~0xfULL;
+	/*
+	 * If data passed in is zero, return true so as not to continue the
+	 * negotiation process
+	 */
+	if (!data || !HFI1_CAP_IS_KSET(TID_RDMA))
+		goto null;
+	/*
+	 * If kzalloc fails, return false. This will result in:
+	 * * at the requester a new OPFN request being generated to retry
+	 *   the negotiation
+	 * * at the responder, 0 being returned to the requester so as to
+	 *   disable TID RDMA at both the requester and the responder
+	 */
+	remote = kzalloc(sizeof(*remote), GFP_ATOMIC);
+	if (!remote) {
+		ret = false;
+		goto null;
+	}
+
+	tid_rdma_opfn_decode(remote, data);
+	priv->tid_timer_timeout_jiffies =
+		usecs_to_jiffies((((4096UL * (1UL << remote->timeout)) /
+				   1000UL) << 3) * 7);
+	rcu_assign_pointer(priv->tid_rdma.remote, remote);
+	/*
+	 * A TID RDMA READ request's segment size is not equal to
+	 * remote->max_len only when the request's data length is smaller
+	 * than remote->max_len. In that case, there will be only one segment.
+	 * Therefore, when priv->pkts_ps is used to calculate req->cur_seg
+	 * during retry, it will lead to req->cur_seg = 0, which is exactly
+	 * what is expected.
+	 */
+	priv->pkts_ps = (u16)rvt_div_mtu(qp, remote->max_len);
+	priv->timeout_shift = ilog2(priv->pkts_ps - 1) + 1;
+	goto free;
+null:
+	RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
+	priv->timeout_shift = 0;
+free:
+	if (old)
+		kfree_rcu(old, rcu_head);
+	return ret;
+}
+
+bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data)
+{
+	bool ret;
+
+	ret = tid_rdma_conn_reply(qp, *data);
+	*data = 0;
+	/*
+	 * If tid_rdma_conn_reply() returns error, set *data as 0 to indicate
+	 * TID RDMA could not be enabled. This will result in TID RDMA being
+	 * disabled at the requester too.
+	 */
+	if (ret)
+		(void)tid_rdma_conn_req(qp, data);
+	return ret;
+}
+
+void tid_rdma_conn_error(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+	struct tid_rdma_params *old;
+
+	old = rcu_dereference_protected(priv->tid_rdma.remote,
+					lockdep_is_held(&priv->opfn.lock));
+	RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
+	if (old)
+		kfree_rcu(old, rcu_head);
+}
+
+/* This is called at context initialization time */
+int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
+{
+	if (reinit)
+		return 0;
+
+	BUILD_BUG_ON(TID_RDMA_JKEY < HFI1_KERNEL_MIN_JKEY);
+	BUILD_BUG_ON(TID_RDMA_JKEY > HFI1_KERNEL_MAX_JKEY);
+	rcd->jkey = TID_RDMA_JKEY;
+	hfi1_set_ctxt_jkey(rcd->dd, rcd, rcd->jkey);
+	return 0;
+}
+
 /**
  * qp_to_rcd - determine the receive context used by a qp
  * @qp - the qp
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
index 6fcd3ad..18c6d43 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.h
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -6,8 +6,34 @@ 
 #ifndef HFI1_TID_RDMA_H
 #define HFI1_TID_RDMA_H
 
+#define TID_RDMA_MAX_SEGMENT_SIZE       BIT(18)   /* 256 KiB (for now) */
+
+struct tid_rdma_params {
+	struct rcu_head rcu_head;
+	u32 qp;
+	u32 max_len;
+	u16 jkey;
+	u8 max_read;
+	u8 max_write;
+	u8 timeout;
+	u8 urg;
+	u8 version;
+};
+
+struct tid_rdma_qp_params {
+	struct tid_rdma_params local;
+	struct tid_rdma_params __rcu *remote;
+};
+
+bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data);
+bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data);
+bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data);
+void tid_rdma_conn_error(struct rvt_qp *qp);
+void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p);
+
+int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit);
+
 int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 		      struct ib_qp_init_attr *init_attr);
 
 #endif /* HFI1_TID_RDMA_H */
-
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 8834119..c8baa1e 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -161,8 +161,13 @@  struct hfi1_qp_priv {
 	struct hfi1_ctxtdata *rcd;                /* QP's receive context */
 	u8 s_sc;		                  /* SC[0..4] for next packet */
 	struct iowait s_iowait;
+	struct hfi1_opfn_data opfn;
+	struct tid_rdma_qp_params tid_rdma;
 	struct rvt_qp *owner;
 	u8 hdr_type; /* 9B or 16B */
+	unsigned long tid_timer_timeout_jiffies;
+	u16 pkts_ps;            /* packets per segment */
+	u8 timeout_shift;       /* account for number of packets per segment */
 };
 
 /*