@@ -304,26 +304,28 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
}
/**
- * hfi1_check_send_wqe - validate wqe
+ * hfi1_setup_wqe - setup the wqe
* @qp - The qp
* @wqe - The built wqe
*
- * validate wqe. This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
+ * Perform setup of the wqe. This is called
+ * prior to inserting the wqe into the ring but after
+ * the wqe has been setup by RDMAVT. This function
+ * allow the driver the opportunity to perform
+ * validation and additional setup of the wqe.
*
* Returns 0 on success, -EINVAL on failure
*
*/
-int hfi1_check_send_wqe(struct rvt_qp *qp,
- struct rvt_swqe *wqe)
+int hfi1_setup_wqe(struct rvt_qp *qp,
+ struct rvt_swqe *wqe)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct rvt_ah *ah;
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
+ hfi1_setup_tid_rdma_wqe(qp, wqe);
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
@@ -4009,6 +4009,88 @@ static void hfi1_tid_retry_timeout(struct timer_list *t)
spin_unlock_irqrestore(&qp->r_lock, flags);
}
+/* Does @sge meet the alignment requirements for tid rdma? */
+static inline bool hfi1_check_sge_align(struct rvt_sge *sge, int num_sge)
+{
+ int i;
+
+ for (i = 0; i < num_sge; i++, sge++)
+ if ((u64)sge->vaddr & ~PAGE_MASK ||
+ sge->sge_length & ~PAGE_MASK)
+ return false;
+ return true;
+}
+
+void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+ struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
+ struct hfi1_swqe_priv *priv = wqe->priv;
+ struct tid_rdma_params *remote;
+ bool do_tid_rdma = false;
+
+ rcu_read_lock();
+ remote = rcu_dereference(qpriv->tid_rdma.remote);
+ /*
+ * If TID RDMA is disabled by the negotiation, don't
+ * use it.
+ */
+ if (!remote)
+ goto exit;
+
+ if (wqe->wr.opcode == IB_WR_RDMA_READ) {
+ if (hfi1_check_sge_align(&wqe->sg_list[0], wqe->wr.num_sge) &&
+ qpriv->tid_rdma.n_read < remote->max_read) {
+ wqe->wr.opcode = IB_WR_TID_RDMA_READ;
+ do_tid_rdma = true;
+ priv->tid_req.n_flows = remote->max_read;
+ qpriv->tid_r_reqs++;
+ }
+ } else if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
+ /*
+ * TID RDMA is enabled for this RDMA WRITE request iff:
+ * 1. The remote address is page-aligned,
+ * 2. The length is larger than the minimum segment size,
+ * 3. The length is page-multiple, and
+ * 4. There are available TID RDMA WRITEs
+ * on the remote end.
+ */
+ if (!(wqe->rdma_wr.remote_addr & ~PAGE_MASK) &&
+ !(wqe->length & ~PAGE_MASK) &&
+ qpriv->tid_rdma.n_write < remote->max_write) {
+ wqe->wr.opcode = IB_WR_TID_RDMA_WRITE;
+ do_tid_rdma = true;
+ }
+ }
+
+ if (do_tid_rdma) {
+ priv->tid_req.seg_len =
+ min_t(u32, remote->max_len, wqe->length);
+ priv->tid_req.total_segs =
+ DIV_ROUND_UP(wqe->length, priv->tid_req.seg_len);
+ /* Compute the last PSN of the request */
+ wqe->lpsn = wqe->psn;
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
+ wqe->lpsn += rvt_div_round_up_mtu(qp, wqe->length) - 1;
+ } else {
+ wqe->lpsn += priv->tid_req.total_segs - 1;
+ atomic_inc(&qpriv->n_requests);
+ }
+ priv->tid_req.cur_seg = 0;
+ priv->tid_req.comp_seg = 0;
+ priv->tid_req.ack_seg = 0;
+ priv->tid_req.state = TID_REQUEST_INACTIVE;
+ /*
+ * Reset acked_tail.
+ * TID RDMA READ does not have ACKs so it does not
+ * update the pointer. We have to reset it so TID RDMA
+ * WRITE does not get confused.
+ */
+ priv->tid_req.acked_tail = priv->tid_req.setup_head;
+ }
+exit:
+ rcu_read_unlock();
+}
+
static bool _hfi1_schedule_tid_send(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
@@ -299,6 +299,17 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
void hfi1_add_tid_retry_timer(struct rvt_qp *qp);
void hfi1_del_tid_retry_timer(struct rvt_qp *qp);
+void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+static inline void hfi1_setup_tid_rdma_wqe(struct rvt_qp *qp,
+ struct rvt_swqe *wqe)
+{
+ if (wqe->priv &&
+ (wqe->wr.opcode == IB_WR_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_RDMA_WRITE) &&
+ wqe->length >= TID_RDMA_MIN_SEGMENT_SIZE)
+ setup_tid_rdma_wqe(qp, wqe);
+}
+
bool hfi1_schedule_tid_send(struct rvt_qp *qp);
int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
@@ -2134,9 +2134,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
- dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup =
hfi1_comp_vect_mappings_lookup;
+ dd->verbs_dev.rdi.driver_f.setup_wqe = hfi1_setup_wqe;
/* completeion queue */
dd->verbs_dev.rdi.ibdev.num_comp_vectors = dd->comp_vect_possible_cpus;
@@ -433,7 +433,7 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
-int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
extern const u32 rc_only_opcode;
extern const u32 uc_only_opcode;
@@ -1588,7 +1588,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files;
dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
- dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe;
+ dd->verbs_dev.rdi.driver_f.setup_wqe = qib_check_send_wqe;
dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah;
dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn;
dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc;
@@ -1831,15 +1831,11 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
wqe->wr.num_sge = j;
}
- /* general part of wqe valid - allow for driver checks */
- if (rdi->driver_f.check_send_wqe) {
- ret = rdi->driver_f.check_send_wqe(qp, wqe);
- if (ret < 0)
- goto bail_inval_free;
- if (ret)
- *call_send = ret;
- }
-
+ /*
+ * Calculate and set SWQE PSN values prior to handing it off
+ * to the driver's check routine. This give the driver the
+ * opportunity to adjust PSN values based on internal checks.
+ */
log_pmtu = qp->log_pmtu;
if (qp->ibqp.qp_type != IB_QPT_UC &&
qp->ibqp.qp_type != IB_QPT_RC) {
@@ -1864,8 +1860,20 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
(wqe->length ?
((wqe->length - 1) >> log_pmtu) :
0);
- qp->s_next_psn = wqe->lpsn + 1;
}
+
+ /* general part of wqe valid - allow for driver checks */
+ if (rdi->driver_f.setup_wqe) {
+ ret = rdi->driver_f.setup_wqe(qp, wqe);
+ if (ret < 0)
+ goto bail_inval_free_ref;
+ if (ret)
+ *call_send = ret;
+ }
+
+ if (!(rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL))
+ qp->s_next_psn = wqe->lpsn + 1;
+
if (unlikely(reserved_op)) {
wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
rvt_qp_wqe_reserve(qp, wqe);
@@ -1879,6 +1887,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
return 0;
+bail_inval_free_ref:
+ if (qp->ibqp.qp_type != IB_QPT_UC &&
+ qp->ibqp.qp_type != IB_QPT_RC)
+ atomic_dec(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
bail_inval_free:
/* release mr holds */
while (j) {
@@ -203,7 +203,11 @@ struct rvt_driver_provided {
* check_support() for details.
*/
- /* hot path calldowns in a single cacheline */
+ /*
+ * hot path calldowns in a single cacheline
+ *
+ * Add data path calls below
+ */
/*
* Give the driver a notice that there is send work to do. It is up to
@@ -215,8 +219,14 @@ struct rvt_driver_provided {
bool (*schedule_send)(struct rvt_qp *qp);
bool (*schedule_send_no_lock)(struct rvt_qp *qp);
- /* Driver specific work request checking */
- int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
+ /*
+ * Driver specific work request setup and checking.
+ * This function is allowed to perform any setup, checks, or
+ * adjustments required to the SWQE in order to be usable by
+ * underlying protocols. This includes private data structure
+ * allocations.
+ */
+ int (*setup_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
/*
* Sometimes rdmavt needs to kick the driver's send progress. That is
@@ -224,6 +234,8 @@ struct rvt_driver_provided {
*/
void (*do_send)(struct rvt_qp *qp);
+ /* end of hot path calldowns */
+
/* Passed to ib core registration. Callback to create syfs files */
int (*port_callback)(struct ib_device *, u8, struct kobject *);