diff mbox series

[for-next,5/6] IB/hfi1: Integrate OPFN into RC transactions

Message ID 20190124032106.29585.31357.stgit@scvm10.sc.intel.com (mailing list archive)
State Accepted
Headers show
Series IB/hfi1: Add OPFN | expand

Commit Message

Dennis Dalessandro Jan. 24, 2019, 3:21 a.m. UTC
From: Kaike Wan <kaike.wan@intel.com>

OPFN parameter negotiation allows a pair of connected RC QPs to exchange
a set of parameters in succession. This negotiation does not commence
till the first ULP request. Because OPFN operations are operations
private to the driver, they do not generate user completions or put the
QP into error when they run out of retries. This patch integrates the
OPFN protocol into the transactions of an RC QP.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
---
 drivers/infiniband/hw/hfi1/init.c     |    7 ++++
 drivers/infiniband/hw/hfi1/qp.c       |   13 +++++++
 drivers/infiniband/hw/hfi1/rc.c       |   58 +++++++++++++++++++++++++++------
 drivers/infiniband/hw/hfi1/tid_rdma.c |   12 +++++++
 drivers/infiniband/hw/hfi1/tid_rdma.h |    1 +
 drivers/infiniband/hw/hfi1/verbs.c    |    1 +
 6 files changed, 82 insertions(+), 10 deletions(-)
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index d772a01..d0b57cc 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1498,6 +1498,12 @@  static int __init hfi1_mod_init(void)
 	/* sanitize link CRC options */
 	link_crc_mask &= SUPPORTED_CRCS;
 
+	ret = opfn_init();
+	if (ret < 0) {
+		pr_err("Failed to allocate opfn_wq");
+		goto bail_dev;
+	}
+
 	/*
 	 * These must be called before the driver is registered with
 	 * the PCI subsystem.
@@ -1528,6 +1534,7 @@  static int __init hfi1_mod_init(void)
 static void __exit hfi1_mod_cleanup(void)
 {
 	pci_unregister_driver(&hfi1_pci_driver);
+	opfn_exit();
 	node_affinity_destroy_all();
 	hfi1_dbg_exit();
 
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 5344e89..f822f92 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -132,6 +132,12 @@  static int iowait_sleep(
 	.qpt_support = BIT(IB_QPT_RC),
 },
 
+[IB_WR_OPFN] = {
+	.length = sizeof(struct ib_atomic_wr),
+	.qpt_support = BIT(IB_QPT_RC),
+	.flags = RVT_OPERATION_USE_RESERVE,
+},
+
 };
 
 static void flush_list_head(struct list_head *l)
@@ -285,6 +291,8 @@  void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
 		priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
 		qp_set_16b(qp);
 	}
+
+	opfn_qp_init(qp, attr, attr_mask);
 }
 
 /**
@@ -696,6 +704,7 @@  void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
 {
 	struct hfi1_qp_priv *priv = qp->priv;
 
+	hfi1_qp_priv_tid_free(rdi, qp);
 	kfree(priv->s_ahg);
 	kfree(priv);
 }
@@ -751,6 +760,10 @@  void notify_qp_reset(struct rvt_qp *qp)
 {
 	qp->r_adefered = 0;
 	clear_ahg(qp);
+
+	/* Clear any OPFN state */
+	if (qp->ibqp.qp_type == IB_QPT_RC)
+		opfn_conn_error(qp);
 }
 
 /*
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 8970fc7..092d5eb 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -57,6 +57,10 @@ 
 /* cut down ridiculously long IB macro names */
 #define OP(x) RC_OP(x)
 
+static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
+					 struct rvt_swqe *wqe,
+					 struct hfi1_ibport *ibp);
+
 static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
 		       u32 psn, u32 pmtu)
 {
@@ -517,10 +521,14 @@  int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 					goto bail;
 				}
 				qp->s_num_rd_atomic++;
-				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
-					qp->s_lsn++;
 			}
-			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+
+			/* FALLTHROUGH */
+		case IB_WR_OPFN:
+			if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
+				qp->s_lsn++;
+			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+			    wqe->wr.opcode == IB_WR_OPFN) {
 				qp->s_state = OP(COMPARE_SWAP);
 				put_ib_ateth_swap(wqe->atomic_wr.swap,
 						  &ohdr->u.atomic_eth);
@@ -1040,6 +1048,7 @@  static void reset_psn(struct rvt_qp *qp, u32 psn)
  */
 void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	struct hfi1_ibport *ibp;
 
@@ -1050,8 +1059,26 @@  void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
 			hfi1_migrate_qp(qp);
 			qp->s_retry = qp->s_retry_cnt;
 		} else if (qp->s_last == qp->s_acked) {
-			rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
-			rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+			/*
+			 * We need special handling for the OPFN request WQEs as
+			 * they are not allowed to generate real user errors
+			 */
+			if (wqe->wr.opcode == IB_WR_OPFN) {
+				struct hfi1_ibport *ibp =
+					to_iport(qp->ibqp.device, qp->port_num);
+				/*
+				 * Call opfn_conn_reply() with capcode and
+				 * remaining data as 0 to close out the
+				 * current request
+				 */
+				opfn_conn_reply(qp, priv->opfn.curr);
+				wqe = do_rc_completion(qp, wqe, ibp);
+				qp->s_flags &= ~RVT_S_WAIT_ACK;
+			} else {
+				rvt_send_complete(qp, wqe,
+						  IB_WC_RETRY_EXC_ERR);
+				rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+			}
 			return;
 		} else { /* need to handle delayed completion */
 			return;
@@ -1363,6 +1390,9 @@  static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
 			u64 *vaddr = wqe->sg_list[0].vaddr;
 			*vaddr = val;
 		}
+		if (wqe->wr.opcode == IB_WR_OPFN)
+			opfn_conn_reply(qp, val);
+
 		if (qp->s_num_rd_atomic &&
 		    (wqe->wr.opcode == IB_WR_RDMA_READ ||
 		     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
@@ -2068,6 +2098,7 @@  void hfi1_rc_rcv(struct hfi1_packet *packet)
 		return;
 
 	fecn = process_ecn(qp, packet);
+	opfn_trigger_conn_request(qp, be32_to_cpu(ohdr->bth[1]));
 
 	/*
 	 * Process responses (ACKs) before anything else.  Note that the
@@ -2363,15 +2394,18 @@  void hfi1_rc_rcv(struct hfi1_packet *packet)
 
 	case OP(COMPARE_SWAP):
 	case OP(FETCH_ADD): {
-		struct ib_atomic_eth *ateth;
+		struct ib_atomic_eth *ateth = &ohdr->u.atomic_eth;
+		u64 vaddr = get_ib_ateth_vaddr(ateth);
+		bool opfn = opcode == OP(COMPARE_SWAP) &&
+			vaddr == HFI1_VERBS_E_ATOMIC_VADDR;
 		struct rvt_ack_entry *e;
-		u64 vaddr;
 		atomic64_t *maddr;
 		u64 sdata;
 		u32 rkey;
 		u8 next;
 
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+			     !opfn))
 			goto nack_inv;
 		next = qp->r_head_ack_queue + 1;
 		if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
@@ -2387,8 +2421,11 @@  void hfi1_rc_rcv(struct hfi1_packet *packet)
 			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
-		ateth = &ohdr->u.atomic_eth;
-		vaddr = get_ib_ateth_vaddr(ateth);
+		/* Process OPFN special virtual address */
+		if (opfn) {
+			opfn_conn_response(qp, e, ateth);
+			goto ack;
+		}
 		if (unlikely(vaddr & (sizeof(u64) - 1)))
 			goto nack_inv_unlck;
 		rkey = be32_to_cpu(ateth->rkey);
@@ -2407,6 +2444,7 @@  void hfi1_rc_rcv(struct hfi1_packet *packet)
 				      sdata);
 		rvt_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
+ack:
 		e->opcode = opcode;
 		e->sent = 0;
 		e->psn = psn;
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index a8fd66f..4ab1cdc 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -246,5 +246,17 @@  int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 
 	qpriv->rcd = qp_to_rcd(rdi, qp);
 
+	spin_lock_init(&qpriv->opfn.lock);
+	INIT_WORK(&qpriv->opfn.opfn_work, opfn_send_conn_request);
+
 	return 0;
 }
+
+void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	if (qp->ibqp.qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA))
+		cancel_work_sync(&priv->opfn.opfn_work);
+}
+
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
index 18c6d43..ee81515 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.h
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -35,5 +35,6 @@  struct tid_rdma_qp_params {
 
 int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 		      struct ib_qp_init_attr *init_attr);
+void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
 
 #endif /* HFI1_TID_RDMA_H */
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index ec3899c..571bfd5 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1735,6 +1735,7 @@  int hfi1_register_ib_device(struct hfi1_devdata *dd)
 	dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
 	dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
 	dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
+	dd->verbs_dev.rdi.dparms.reserved_operations = 1;
 	dd->verbs_dev.rdi.dparms.extra_rdma_atomic = 1;
 
 	/* post send table */