diff mbox series

[for-next,23/23] IB/hfi1: Prioritize the sending of ACK packets

Message ID 20190124055214.10736.20092.stgit@scvm10.sc.intel.com (mailing list archive)
State Accepted
Delegated to: Doug Ledford
Headers show
Series IB/hfi1: Add TID RDMA Write | expand

Commit Message

Dennis Dalessandro Jan. 24, 2019, 5:52 a.m. UTC
From: Kaike Wan <kaike.wan@intel.com>

ACK packets are generally associated with request completion and resource
release and therefore should be sent first. This patch optimizes the
send engine by using the following policies:
(1) QPs with RVT_S_ACK_PENDING bit set in qp->s_flags or qpriv->s_flags
should have their priority incremented;
(2) QPs with ACK or TID-ACK packet queued should have their priority
incremented;
(3) When a QP is queued to the wait list due to resource constraints, it
will be queued to the head if it has ACK packet to send;
(4) When selecting qps to run from the wait list, the one with the highest
priority and starve_cnt will be selected; each priority will be equivalent
to a fixed number of starve_cnt (16).

Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
---
 drivers/infiniband/hw/hfi1/iowait.c      |   34 +++++++++++-
 drivers/infiniband/hw/hfi1/iowait.h      |   87 ++++++++++++++++++++----------
 drivers/infiniband/hw/hfi1/pio.c         |   18 ++++--
 drivers/infiniband/hw/hfi1/qp.c          |   15 +++++
 drivers/infiniband/hw/hfi1/rc.c          |    1 
 drivers/infiniband/hw/hfi1/sdma.c        |   24 +++++---
 drivers/infiniband/hw/hfi1/sdma_txreq.h  |    1 
 drivers/infiniband/hw/hfi1/tid_rdma.c    |    1 
 drivers/infiniband/hw/hfi1/user_sdma.c   |    6 +-
 drivers/infiniband/hw/hfi1/verbs.c       |    1 
 drivers/infiniband/hw/hfi1/verbs_txreq.h |    1 
 drivers/infiniband/hw/hfi1/vnic_sdma.c   |    6 +-
 12 files changed, 144 insertions(+), 51 deletions(-)
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c
index 582f1ba..adb4a1b 100644
--- a/drivers/infiniband/hw/hfi1/iowait.c
+++ b/drivers/infiniband/hw/hfi1/iowait.c
@@ -6,6 +6,9 @@ 
 #include "iowait.h"
 #include "trace_iowait.h"
 
+/* 1 priority == 16 starve_cnt */
+#define IOWAIT_PRIORITY_STARVE_SHIFT 4
+
 void iowait_set_flag(struct iowait *wait, u32 flag)
 {
 	trace_hfi1_iowait_set(wait, flag);
@@ -44,7 +47,8 @@  void iowait_init(struct iowait *wait, u32 tx_limit,
 			      uint seq,
 			      bool pkts_sent),
 		 void (*wakeup)(struct iowait *wait, int reason),
-		 void (*sdma_drained)(struct iowait *wait))
+		 void (*sdma_drained)(struct iowait *wait),
+		 void (*init_priority)(struct iowait *wait))
 {
 	int i;
 
@@ -58,6 +62,7 @@  void iowait_init(struct iowait *wait, u32 tx_limit,
 	wait->sleep = sleep;
 	wait->wakeup = wakeup;
 	wait->sdma_drained = sdma_drained;
+	wait->init_priority = init_priority;
 	wait->flags = 0;
 	for (i = 0; i < IOWAIT_SES; i++) {
 		wait->wait[i].iow = wait;
@@ -92,3 +97,30 @@  int iowait_set_work_flag(struct iowait_work *w)
 	iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
 	return IOWAIT_TID_SE;
 }
+
+/**
+ * iowait_priority_update_top - update the top priority entry
+ * @w: the iowait struct
+ * @top: a pointer to the top priority entry
+ * @idx: the index of the current iowait in an array
+ * @top_idx: the array index for the iowait entry that has the top priority
+ *
+ * This function is called to compare the priority of a given
+ * iowait with the given top priority entry. The top index will
+ * be returned.
+ */
+uint iowait_priority_update_top(struct iowait *w,
+				struct iowait *top,
+				uint idx, uint top_idx)
+{
+	u8 cnt, tcnt;
+
+	/* Convert priority into starve_cnt and compare the total.*/
+	cnt = (w->priority << IOWAIT_PRIORITY_STARVE_SHIFT) + w->starved_cnt;
+	tcnt = (top->priority << IOWAIT_PRIORITY_STARVE_SHIFT) +
+		top->starved_cnt;
+	if (cnt > tcnt)
+		return idx;
+	else
+		return top_idx;
+}
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index bd91370..07847cb 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -100,6 +100,7 @@  struct iowait_work {
  * @sleep: no space callback
  * @wakeup: space callback wakeup
  * @sdma_drained: sdma count drained
+ * @init_priority: callback to manipulate priority
  * @lock: lock protected head of wait queue
  * @iowork: workqueue overhead
  * @wait_dma: wait for sdma_busy == 0
@@ -109,7 +110,7 @@  struct iowait_work {
  * @tx_limit: limit for overflow queuing
  * @tx_count: number of tx entry's in tx_head'ed list
  * @flags: wait flags (one per QP)
- * @wait: SE array
+ * @wait: SE array for multiple legs
  *
  * This is to be embedded in user's state structure
  * (QP or PQ).
@@ -120,10 +121,13 @@  struct iowait_work {
  * are callbacks for the ULP to implement
  * what ever queuing/dequeuing of
  * the embedded iowait and its containing struct
- * when a resource shortage like SDMA ring space is seen.
+ * when a resource shortage like SDMA ring space
+ * or PIO credit space is seen.
  *
  * Both potentially have locks help
- * so sleeping is not allowed.
+ * so sleeping is not allowed and it is not
+ * supported to submit txreqs from the wakeup
+ * call directly because of lock conflicts.
  *
  * The wait_dma member along with the iow
  *
@@ -143,6 +147,7 @@  struct iowait {
 		);
 	void (*wakeup)(struct iowait *wait, int reason);
 	void (*sdma_drained)(struct iowait *wait);
+	void (*init_priority)(struct iowait *wait);
 	seqlock_t *lock;
 	wait_queue_head_t wait_dma;
 	wait_queue_head_t wait_pio;
@@ -152,6 +157,7 @@  struct iowait {
 	u32 tx_limit;
 	u32 tx_count;
 	u8 starved_cnt;
+	u8 priority;
 	unsigned long flags;
 	struct iowait_work wait[IOWAIT_SES];
 };
@@ -171,7 +177,8 @@  void iowait_init(struct iowait *wait, u32 tx_limit,
 			      uint seq,
 			      bool pkts_sent),
 		 void (*wakeup)(struct iowait *wait, int reason),
-		 void (*sdma_drained)(struct iowait *wait));
+		 void (*sdma_drained)(struct iowait *wait),
+		 void (*init_priority)(struct iowait *wait));
 
 /**
  * iowait_schedule() - schedule the default send engine work
@@ -339,6 +346,8 @@  static inline u16 iowait_get_desc(struct iowait_work *w)
 		tx = list_first_entry(&w->tx_head, struct sdma_txreq,
 				      list);
 		num_desc = tx->num_desc;
+		if (tx->flags & SDMA_TXREQ_F_VIP)
+			w->iow->priority++;
 	}
 	return num_desc;
 }
@@ -352,6 +361,37 @@  static inline u32 iowait_get_all_desc(struct iowait *w)
 	return num_desc;
 }
 
+static inline void iowait_update_priority(struct iowait_work *w)
+{
+	struct sdma_txreq *tx = NULL;
+
+	if (!list_empty(&w->tx_head)) {
+		tx = list_first_entry(&w->tx_head, struct sdma_txreq,
+				      list);
+		if (tx->flags & SDMA_TXREQ_F_VIP)
+			w->iow->priority++;
+	}
+}
+
+static inline void iowait_update_all_priority(struct iowait *w)
+{
+	iowait_update_priority(&w->wait[IOWAIT_IB_SE]);
+	iowait_update_priority(&w->wait[IOWAIT_TID_SE]);
+}
+
+static inline void iowait_init_priority(struct iowait *w)
+{
+	w->priority = 0;
+	if (w->init_priority)
+		w->init_priority(w);
+}
+
+static inline void iowait_get_priority(struct iowait *w)
+{
+	iowait_init_priority(w);
+	iowait_update_all_priority(w);
+}
+
 /**
  * iowait_queue - Put the iowait on a wait queue
  * @pkts_sent: have some packets been sent before queuing?
@@ -368,14 +408,18 @@  static inline void iowait_queue(bool pkts_sent, struct iowait *w,
 	/*
 	 * To play fair, insert the iowait at the tail of the wait queue if it
 	 * has already sent some packets; Otherwise, put it at the head.
+	 * However, if it has priority packets to send, also put it at the
+	 * head.
 	 */
-	if (pkts_sent) {
-		list_add_tail(&w->list, wait_head);
+	if (pkts_sent)
 		w->starved_cnt = 0;
-	} else {
-		list_add(&w->list, wait_head);
+	else
 		w->starved_cnt++;
-	}
+
+	if (w->priority > 0 || !pkts_sent)
+		list_add(&w->list, wait_head);
+	else
+		list_add_tail(&w->list, wait_head);
 }
 
 /**
@@ -392,27 +436,10 @@  static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w)
 		w->starved_cnt = 0;
 }
 
-/**
- * iowait_starve_find_max - Find the maximum of the starve count
- * @w: the iowait struct
- * @max: a variable containing the max starve count
- * @idx: the index of the current iowait in an array
- * @max_idx: a variable containing the array index for the
- *         iowait entry that has the max starve count
- *
- * This function is called to compare the starve count of a
- * given iowait with the given max starve count. The max starve
- * count and the index will be updated if the iowait's start
- * count is larger.
- */
-static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
-					  uint idx, uint *max_idx)
-{
-	if (w->starved_cnt > *max) {
-		*max = w->starved_cnt;
-		*max_idx = idx;
-	}
-}
+/* Update the top priority index */
+uint iowait_priority_update_top(struct iowait *w,
+				struct iowait *top,
+				uint idx, uint top_idx);
 
 /**
  * iowait_packet_queued() - determine if a packet is queued
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index dd5a5c0..2f8d6c9 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1599,8 +1599,7 @@  static void sc_piobufavail(struct send_context *sc)
 	struct rvt_qp *qp;
 	struct hfi1_qp_priv *priv;
 	unsigned long flags;
-	uint i, n = 0, max_idx = 0;
-	u8 max_starved_cnt = 0;
+	uint i, n = 0, top_idx = 0;
 
 	if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
 	    dd->send_contexts[sc->sw_index].type != SC_VL15)
@@ -1619,11 +1618,18 @@  static void sc_piobufavail(struct send_context *sc)
 		if (n == ARRAY_SIZE(qps))
 			break;
 		wait = list_first_entry(list, struct iowait, list);
+		iowait_get_priority(wait);
 		qp = iowait_to_qp(wait);
 		priv = qp->priv;
 		list_del_init(&priv->s_iowait.list);
 		priv->s_iowait.lock = NULL;
-		iowait_starve_find_max(wait, &max_starved_cnt, n, &max_idx);
+		if (n) {
+			priv = qps[top_idx]->priv;
+			top_idx = iowait_priority_update_top(wait,
+							     &priv->s_iowait,
+							     n, top_idx);
+		}
+
 		/* refcount held until actual wake up */
 		qps[n++] = qp;
 	}
@@ -1638,12 +1644,12 @@  static void sc_piobufavail(struct send_context *sc)
 	}
 	write_sequnlock_irqrestore(&sc->waitlock, flags);
 
-	/* Wake up the most starved one first */
+	/* Wake up the top-priority one first */
 	if (n)
-		hfi1_qp_wakeup(qps[max_idx],
+		hfi1_qp_wakeup(qps[top_idx],
 			       RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
 	for (i = 0; i < n; i++)
-		if (i != max_idx)
+		if (i != top_idx)
 			hfi1_qp_wakeup(qps[i],
 				       RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
 }
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index cfd598e..d8f7add 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -518,6 +518,7 @@  static int iowait_sleep(
 
 			ibp->rvp.n_dmawait++;
 			qp->s_flags |= RVT_S_WAIT_DMA_DESC;
+			iowait_get_priority(&priv->s_iowait);
 			iowait_queue(pkts_sent, &priv->s_iowait,
 				     &sde->dmawait);
 			priv->s_iowait.lock = &sde->waitlock;
@@ -567,6 +568,17 @@  static void iowait_sdma_drained(struct iowait *wait)
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
+static void hfi1_init_priority(struct iowait *w)
+{
+	struct rvt_qp *qp = iowait_to_qp(w);
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	if (qp->s_flags & RVT_S_ACK_PENDING)
+		w->priority++;
+	if (priv->s_flags & RVT_S_ACK_PENDING)
+		w->priority++;
+}
+
 /**
  * qp_to_sdma_engine - map a qp to a send engine
  * @qp: the QP
@@ -727,7 +739,8 @@  void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter)
 		_hfi1_do_tid_send,
 		iowait_sleep,
 		iowait_wakeup,
-		iowait_sdma_drained);
+		iowait_sdma_drained,
+		hfi1_init_priority);
 	return priv;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 82afa77..e6726c1 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -390,6 +390,7 @@  static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
 		bth0 = OP(ACKNOWLEDGE) << 24;
 		bth2 = mask_psn(qp->s_ack_psn);
 		qp->s_flags &= ~RVT_S_ACK_PENDING;
+		ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
 		ps->s_txreq->ss = NULL;
 	}
 	qp->s_rdma_ack_cnt++;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index b84356e..5d1491c 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1756,10 +1756,9 @@  static inline u16 sdma_gethead(struct sdma_engine *sde)
  */
 static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
 {
-	struct iowait *wait, *nw;
+	struct iowait *wait, *nw, *twait;
 	struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
-	uint i, n = 0, seq, max_idx = 0;
-	u8 max_starved_cnt = 0;
+	uint i, n = 0, seq, tidx = 0;
 
 #ifdef CONFIG_SDMA_VERBOSITY
 	dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
@@ -1784,13 +1783,20 @@  static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
 					continue;
 				if (n == ARRAY_SIZE(waits))
 					break;
+				iowait_init_priority(wait);
 				num_desc = iowait_get_all_desc(wait);
 				if (num_desc > avail)
 					break;
 				avail -= num_desc;
-				/* Find the most starved wait memeber */
-				iowait_starve_find_max(wait, &max_starved_cnt,
-						       n, &max_idx);
+				/* Find the top-priority wait memeber */
+				if (n) {
+					twait = waits[tidx];
+					tidx =
+					    iowait_priority_update_top(wait,
+								       twait,
+								       n,
+								       tidx);
+				}
 				list_del_init(&wait->list);
 				waits[n++] = wait;
 			}
@@ -1799,12 +1805,12 @@  static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
 		}
 	} while (read_seqretry(&sde->waitlock, seq));
 
-	/* Schedule the most starved one first */
+	/* Schedule the top-priority entry first */
 	if (n)
-		waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON);
+		waits[tidx]->wakeup(waits[tidx], SDMA_AVAIL_REASON);
 
 	for (i = 0; i < n; i++)
-		if (i != max_idx)
+		if (i != tidx)
 			waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
 }
 
diff --git a/drivers/infiniband/hw/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h
index bf7d777..514a478 100644
--- a/drivers/infiniband/hw/hfi1/sdma_txreq.h
+++ b/drivers/infiniband/hw/hfi1/sdma_txreq.h
@@ -91,6 +91,7 @@  struct sdma_desc {
 #define SDMA_TXREQ_F_URGENT       0x0001
 #define SDMA_TXREQ_F_AHG_COPY     0x0002
 #define SDMA_TXREQ_F_USE_AHG      0x0004
+#define SDMA_TXREQ_F_VIP          0x0010
 
 struct sdma_txreq;
 typedef void (*callback_t)(struct sdma_txreq *, int);
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 25c2f52..a7a1833 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -5292,6 +5292,7 @@  static int make_tid_rdma_ack(struct rvt_qp *qp,
 	ps->s_txreq->ss = NULL;
 	hfi1_make_ruc_header(qp, ohdr, (TID_OP(ACK) << 24), bth1, bth2, middle,
 			     ps);
+	ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
 	return 1;
 bail:
 	/*
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 6764114..8bfbc6d 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -144,8 +144,10 @@  static int defer_packet_queue(
 	 */
 	xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
 	write_seqlock(&sde->waitlock);
-	if (list_empty(&pq->busy.list))
+	if (list_empty(&pq->busy.list)) {
+		iowait_get_priority(&pq->busy);
 		iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
+	}
 	write_sequnlock(&sde->waitlock);
 	return -EBUSY;
 eagain:
@@ -191,7 +193,7 @@  int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
 	pq->mm = fd->mm;
 
 	iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
-		    activate_packet_queue, NULL);
+		    activate_packet_queue, NULL, NULL);
 	pq->reqidx = 0;
 
 	pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index ab97d71..55a56b3 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -945,6 +945,7 @@  static int pio_wait(struct rvt_qp *qp,
 			dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN);
 			qp->s_flags |= flag;
 			was_empty = list_empty(&sc->piowait);
+			iowait_get_priority(&priv->s_iowait);
 			iowait_queue(ps->pkts_sent, &priv->s_iowait,
 				     &sc->piowait);
 			priv->s_iowait.lock = &sc->waitlock;
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 2a77af2..b002e96 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -94,6 +94,7 @@  struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 	tx->txreq.num_desc = 0;
 	/* Set the header type */
 	tx->phdr.hdr.hdr_type = priv->hdr_type;
+	tx->txreq.flags = 0;
 	return tx;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index 1f81c48..af1b1ff 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -240,8 +240,10 @@  static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
 	}
 
 	vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
-	if (list_empty(&vnic_sdma->wait.list))
+	if (list_empty(&vnic_sdma->wait.list)) {
+		iowait_get_priority(wait->iow);
 		iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
+	}
 	write_sequnlock(&sde->waitlock);
 	return -EBUSY;
 }
@@ -281,7 +283,7 @@  void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
 
 		iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
 			    hfi1_vnic_sdma_sleep,
-			    hfi1_vnic_sdma_wakeup, NULL);
+			    hfi1_vnic_sdma_wakeup, NULL, NULL);
 		vnic_sdma->sde = &vinfo->dd->per_sdma[i];
 		vnic_sdma->dd = vinfo->dd;
 		vnic_sdma->vinfo = vinfo;