@@ -6,6 +6,9 @@
#include "iowait.h"
#include "trace_iowait.h"
+/* 1 priority == 16 starve_cnt */
+#define IOWAIT_PRIORITY_STARVE_SHIFT 4
+
void iowait_set_flag(struct iowait *wait, u32 flag)
{
trace_hfi1_iowait_set(wait, flag);
@@ -44,7 +47,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
uint seq,
bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason),
- void (*sdma_drained)(struct iowait *wait))
+ void (*sdma_drained)(struct iowait *wait),
+ void (*init_priority)(struct iowait *wait))
{
int i;
@@ -58,6 +62,7 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
wait->sleep = sleep;
wait->wakeup = wakeup;
wait->sdma_drained = sdma_drained;
+ wait->init_priority = init_priority;
wait->flags = 0;
for (i = 0; i < IOWAIT_SES; i++) {
wait->wait[i].iow = wait;
@@ -92,3 +97,30 @@ int iowait_set_work_flag(struct iowait_work *w)
iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
return IOWAIT_TID_SE;
}
+
+/**
+ * iowait_priority_update_top - update the top priority entry
+ * @w: the iowait struct
+ * @top: a pointer to the top priority entry
+ * @idx: the index of the current iowait in an array
+ * @top_idx: the array index for the iowait entry that has the top priority
+ *
+ * This function is called to compare the priority of a given
+ * iowait with the given top priority entry. The top index will
+ * be returned.
+ */
+uint iowait_priority_update_top(struct iowait *w,
+ struct iowait *top,
+ uint idx, uint top_idx)
+{
+ u8 cnt, tcnt;
+
+ /* Convert priority into starve_cnt and compare the total.*/
+ cnt = (w->priority << IOWAIT_PRIORITY_STARVE_SHIFT) + w->starved_cnt;
+ tcnt = (top->priority << IOWAIT_PRIORITY_STARVE_SHIFT) +
+ top->starved_cnt;
+ if (cnt > tcnt)
+ return idx;
+ else
+ return top_idx;
+}
@@ -100,6 +100,7 @@ struct iowait_work {
* @sleep: no space callback
* @wakeup: space callback wakeup
* @sdma_drained: sdma count drained
+ * @init_priority: callback to manipulate priority
* @lock: lock protected head of wait queue
* @iowork: workqueue overhead
* @wait_dma: wait for sdma_busy == 0
@@ -109,7 +110,7 @@ struct iowait_work {
* @tx_limit: limit for overflow queuing
* @tx_count: number of tx entry's in tx_head'ed list
* @flags: wait flags (one per QP)
- * @wait: SE array
+ * @wait: SE array for multiple legs
*
* This is to be embedded in user's state structure
* (QP or PQ).
@@ -120,10 +121,13 @@ struct iowait_work {
* are callbacks for the ULP to implement
* what ever queuing/dequeuing of
* the embedded iowait and its containing struct
- * when a resource shortage like SDMA ring space is seen.
+ * when a resource shortage like SDMA ring space
+ * or PIO credit space is seen.
*
* Both potentially have locks help
- * so sleeping is not allowed.
+ * so sleeping is not allowed and it is not
+ * supported to submit txreqs from the wakeup
+ * call directly because of lock conflicts.
*
* The wait_dma member along with the iow
*
@@ -143,6 +147,7 @@ struct iowait {
);
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
+ void (*init_priority)(struct iowait *wait);
seqlock_t *lock;
wait_queue_head_t wait_dma;
wait_queue_head_t wait_pio;
@@ -152,6 +157,7 @@ struct iowait {
u32 tx_limit;
u32 tx_count;
u8 starved_cnt;
+ u8 priority;
unsigned long flags;
struct iowait_work wait[IOWAIT_SES];
};
@@ -171,7 +177,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
uint seq,
bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason),
- void (*sdma_drained)(struct iowait *wait));
+ void (*sdma_drained)(struct iowait *wait),
+ void (*init_priority)(struct iowait *wait));
/**
* iowait_schedule() - schedule the default send engine work
@@ -339,6 +346,8 @@ static inline u16 iowait_get_desc(struct iowait_work *w)
tx = list_first_entry(&w->tx_head, struct sdma_txreq,
list);
num_desc = tx->num_desc;
+ if (tx->flags & SDMA_TXREQ_F_VIP)
+ w->iow->priority++;
}
return num_desc;
}
@@ -352,6 +361,37 @@ static inline u32 iowait_get_all_desc(struct iowait *w)
return num_desc;
}
+static inline void iowait_update_priority(struct iowait_work *w)
+{
+ struct sdma_txreq *tx = NULL;
+
+ if (!list_empty(&w->tx_head)) {
+ tx = list_first_entry(&w->tx_head, struct sdma_txreq,
+ list);
+ if (tx->flags & SDMA_TXREQ_F_VIP)
+ w->iow->priority++;
+ }
+}
+
+static inline void iowait_update_all_priority(struct iowait *w)
+{
+ iowait_update_priority(&w->wait[IOWAIT_IB_SE]);
+ iowait_update_priority(&w->wait[IOWAIT_TID_SE]);
+}
+
+static inline void iowait_init_priority(struct iowait *w)
+{
+ w->priority = 0;
+ if (w->init_priority)
+ w->init_priority(w);
+}
+
+static inline void iowait_get_priority(struct iowait *w)
+{
+ iowait_init_priority(w);
+ iowait_update_all_priority(w);
+}
+
/**
* iowait_queue - Put the iowait on a wait queue
* @pkts_sent: have some packets been sent before queuing?
@@ -368,14 +408,18 @@ static inline void iowait_queue(bool pkts_sent, struct iowait *w,
/*
* To play fair, insert the iowait at the tail of the wait queue if it
* has already sent some packets; Otherwise, put it at the head.
+ * However, if it has priority packets to send, also put it at the
+ * head.
*/
- if (pkts_sent) {
- list_add_tail(&w->list, wait_head);
+ if (pkts_sent)
w->starved_cnt = 0;
- } else {
- list_add(&w->list, wait_head);
+ else
w->starved_cnt++;
- }
+
+ if (w->priority > 0 || !pkts_sent)
+ list_add(&w->list, wait_head);
+ else
+ list_add_tail(&w->list, wait_head);
}
/**
@@ -392,27 +436,10 @@ static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w)
w->starved_cnt = 0;
}
-/**
- * iowait_starve_find_max - Find the maximum of the starve count
- * @w: the iowait struct
- * @max: a variable containing the max starve count
- * @idx: the index of the current iowait in an array
- * @max_idx: a variable containing the array index for the
- * iowait entry that has the max starve count
- *
- * This function is called to compare the starve count of a
- * given iowait with the given max starve count. The max starve
- * count and the index will be updated if the iowait's start
- * count is larger.
- */
-static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
- uint idx, uint *max_idx)
-{
- if (w->starved_cnt > *max) {
- *max = w->starved_cnt;
- *max_idx = idx;
- }
-}
+/* Update the top priority index */
+uint iowait_priority_update_top(struct iowait *w,
+ struct iowait *top,
+ uint idx, uint top_idx);
/**
* iowait_packet_queued() - determine if a packet is queued
@@ -1599,8 +1599,7 @@ static void sc_piobufavail(struct send_context *sc)
struct rvt_qp *qp;
struct hfi1_qp_priv *priv;
unsigned long flags;
- uint i, n = 0, max_idx = 0;
- u8 max_starved_cnt = 0;
+ uint i, n = 0, top_idx = 0;
if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
dd->send_contexts[sc->sw_index].type != SC_VL15)
@@ -1619,11 +1618,18 @@ static void sc_piobufavail(struct send_context *sc)
if (n == ARRAY_SIZE(qps))
break;
wait = list_first_entry(list, struct iowait, list);
+ iowait_get_priority(wait);
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
- iowait_starve_find_max(wait, &max_starved_cnt, n, &max_idx);
+ if (n) {
+ priv = qps[top_idx]->priv;
+ top_idx = iowait_priority_update_top(wait,
+ &priv->s_iowait,
+ n, top_idx);
+ }
+
/* refcount held until actual wake up */
qps[n++] = qp;
}
@@ -1638,12 +1644,12 @@ static void sc_piobufavail(struct send_context *sc)
}
write_sequnlock_irqrestore(&sc->waitlock, flags);
- /* Wake up the most starved one first */
+ /* Wake up the top-priority one first */
if (n)
- hfi1_qp_wakeup(qps[max_idx],
+ hfi1_qp_wakeup(qps[top_idx],
RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
for (i = 0; i < n; i++)
- if (i != max_idx)
+ if (i != top_idx)
hfi1_qp_wakeup(qps[i],
RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
}
@@ -518,6 +518,7 @@ static int iowait_sleep(
ibp->rvp.n_dmawait++;
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
+ iowait_get_priority(&priv->s_iowait);
iowait_queue(pkts_sent, &priv->s_iowait,
&sde->dmawait);
priv->s_iowait.lock = &sde->waitlock;
@@ -567,6 +568,17 @@ static void iowait_sdma_drained(struct iowait *wait)
spin_unlock_irqrestore(&qp->s_lock, flags);
}
+static void hfi1_init_priority(struct iowait *w)
+{
+ struct rvt_qp *qp = iowait_to_qp(w);
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ if (qp->s_flags & RVT_S_ACK_PENDING)
+ w->priority++;
+ if (priv->s_flags & RVT_S_ACK_PENDING)
+ w->priority++;
+}
+
/**
* qp_to_sdma_engine - map a qp to a send engine
* @qp: the QP
@@ -727,7 +739,8 @@ void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter)
_hfi1_do_tid_send,
iowait_sleep,
iowait_wakeup,
- iowait_sdma_drained);
+ iowait_sdma_drained,
+ hfi1_init_priority);
return priv;
}
@@ -390,6 +390,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
bth0 = OP(ACKNOWLEDGE) << 24;
bth2 = mask_psn(qp->s_ack_psn);
qp->s_flags &= ~RVT_S_ACK_PENDING;
+ ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
ps->s_txreq->ss = NULL;
}
qp->s_rdma_ack_cnt++;
@@ -1756,10 +1756,9 @@ static inline u16 sdma_gethead(struct sdma_engine *sde)
*/
static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
{
- struct iowait *wait, *nw;
+ struct iowait *wait, *nw, *twait;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
- uint i, n = 0, seq, max_idx = 0;
- u8 max_starved_cnt = 0;
+ uint i, n = 0, seq, tidx = 0;
#ifdef CONFIG_SDMA_VERBOSITY
dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
@@ -1784,13 +1783,20 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
continue;
if (n == ARRAY_SIZE(waits))
break;
+ iowait_init_priority(wait);
num_desc = iowait_get_all_desc(wait);
if (num_desc > avail)
break;
avail -= num_desc;
- /* Find the most starved wait memeber */
- iowait_starve_find_max(wait, &max_starved_cnt,
- n, &max_idx);
+ /* Find the top-priority wait memeber */
+ if (n) {
+ twait = waits[tidx];
+ tidx =
+ iowait_priority_update_top(wait,
+ twait,
+ n,
+ tidx);
+ }
list_del_init(&wait->list);
waits[n++] = wait;
}
@@ -1799,12 +1805,12 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
}
} while (read_seqretry(&sde->waitlock, seq));
- /* Schedule the most starved one first */
+ /* Schedule the top-priority entry first */
if (n)
- waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON);
+ waits[tidx]->wakeup(waits[tidx], SDMA_AVAIL_REASON);
for (i = 0; i < n; i++)
- if (i != max_idx)
+ if (i != tidx)
waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
}
@@ -91,6 +91,7 @@ struct sdma_desc {
#define SDMA_TXREQ_F_URGENT 0x0001
#define SDMA_TXREQ_F_AHG_COPY 0x0002
#define SDMA_TXREQ_F_USE_AHG 0x0004
+#define SDMA_TXREQ_F_VIP 0x0010
struct sdma_txreq;
typedef void (*callback_t)(struct sdma_txreq *, int);
@@ -5292,6 +5292,7 @@ static int make_tid_rdma_ack(struct rvt_qp *qp,
ps->s_txreq->ss = NULL;
hfi1_make_ruc_header(qp, ohdr, (TID_OP(ACK) << 24), bth1, bth2, middle,
ps);
+ ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
return 1;
bail:
/*
@@ -144,8 +144,10 @@ static int defer_packet_queue(
*/
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
write_seqlock(&sde->waitlock);
- if (list_empty(&pq->busy.list))
+ if (list_empty(&pq->busy.list)) {
+ iowait_get_priority(&pq->busy);
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
+ }
write_sequnlock(&sde->waitlock);
return -EBUSY;
eagain:
@@ -191,7 +193,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
pq->mm = fd->mm;
iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
- activate_packet_queue, NULL);
+ activate_packet_queue, NULL, NULL);
pq->reqidx = 0;
pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
@@ -945,6 +945,7 @@ static int pio_wait(struct rvt_qp *qp,
dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN);
qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
+ iowait_get_priority(&priv->s_iowait);
iowait_queue(ps->pkts_sent, &priv->s_iowait,
&sc->piowait);
priv->s_iowait.lock = &sc->waitlock;
@@ -94,6 +94,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
tx->txreq.num_desc = 0;
/* Set the header type */
tx->phdr.hdr.hdr_type = priv->hdr_type;
+ tx->txreq.flags = 0;
return tx;
}
@@ -240,8 +240,10 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
}
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
- if (list_empty(&vnic_sdma->wait.list))
+ if (list_empty(&vnic_sdma->wait.list)) {
+ iowait_get_priority(wait->iow);
iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
+ }
write_sequnlock(&sde->waitlock);
return -EBUSY;
}
@@ -281,7 +283,7 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
hfi1_vnic_sdma_sleep,
- hfi1_vnic_sdma_wakeup, NULL);
+ hfi1_vnic_sdma_wakeup, NULL, NULL);
vnic_sdma->sde = &vinfo->dd->per_sdma[i];
vnic_sdma->dd = vinfo->dd;
vnic_sdma->vinfo = vinfo;