From patchwork Tue Jul 10 07:22:08 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Raju Rangoju X-Patchwork-Id: 10516269 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id 88CF6603D7 for ; Tue, 10 Jul 2018 07:23:05 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 6F14828B79 for ; Tue, 10 Jul 2018 07:23:05 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 62C6A28B87; Tue, 10 Jul 2018 07:23:05 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00, MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id E1E1E28B79 for ; Tue, 10 Jul 2018 07:23:03 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751082AbeGJHXD (ORCPT ); Tue, 10 Jul 2018 03:23:03 -0400 Received: from stargate.chelsio.com ([12.32.117.8]:48816 "EHLO stargate.chelsio.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751053AbeGJHXC (ORCPT ); Tue, 10 Jul 2018 03:23:02 -0400 Received: from localhost (kumbhalgarh.blr.asicdesigners.com [10.193.185.255]) by stargate.chelsio.com (8.13.8/8.13.8) with ESMTP id w6A7Mt2W020004; Tue, 10 Jul 2018 00:22:56 -0700 From: Raju Rangoju To: jgg@mellanox.com, linux-rdma@vger.kernel.org Cc: swise@opengridcomputing.com, rajur@chelsio.com Subject: [PATCH rdma-core 2/2] rdma-core/cxgb4: Add support for user mode srqs Date: Tue, 10 Jul 2018 12:52:08 +0530 Message-Id: <20180710072208.28686-3-rajur@chelsio.com> X-Mailer: git-send-email 2.13.0 In-Reply-To: <20180710072208.28686-1-rajur@chelsio.com> References: <20180710072208.28686-1-rajur@chelsio.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP - Added create/destroy/modify routines to support user mode srq - Added post_srq function - Updated poll_cq code to deal with srqs - Handled user mode SRQ_LIMIT events - Handled flushed SRQ buffers Signed-off-by: Raju Rangoju Reviewed-by: Steve Wise --- providers/cxgb4/cq.c | 139 +++++++++++++++++++++++++++--- providers/cxgb4/dev.c | 2 + providers/cxgb4/qp.c | 129 +++++++++++++++++++++++++++- providers/cxgb4/t4.h | 6 +- providers/cxgb4/verbs.c | 219 +++++++++++++++++++++++++++++++++++++++--------- 5 files changed, 437 insertions(+), 58 deletions(-) diff --git a/providers/cxgb4/cq.c b/providers/cxgb4/cq.c index bb4f6447..2421e2fb 100644 --- a/providers/cxgb4/cq.c +++ b/providers/cxgb4/cq.c @@ -40,7 +40,7 @@ #include "libcxgb4.h" #include "cxgb4-abi.h" -static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) +static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq, u32 srqidx) { union t4_cqe cqe = {}; __be64 *gen = GEN_ADDR(&cqe); @@ -53,6 +53,9 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) V_CQE_SWCQE(1) | V_CQE_QPID(wq->sq.qid)); *gen = htobe64(V_CQE_GENBIT((u64)cq->gen)); + if (srqidx) + cqe.b64.u.srcqe.abs_rqe_idx = htobe32(srqidx); + memcpy(Q_ENTRY(cq->sw_queue, cq->sw_pidx), &cqe, CQE_SIZE(&cqe)); t4_swcq_produce(cq); } @@ -66,7 +69,7 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) PDBG("%s wq %p cq %p rq.in_use %u skip count %u\n", __func__, wq, cq, wq->rq.in_use, count); while (in_use--) { - insert_recv_cqe(wq, cq); + insert_recv_cqe(wq, cq, 0); flushed++; } return flushed; @@ -354,6 +357,78 @@ static void dump_cqe(void *arg) } +static void post_pending_srq_wrs(struct t4_srq *srq) +{ + struct t4_srq_pending_wr *pwr; + u16 idx = 0; + + while (srq->pending_in_use) { + + assert(!srq->sw_rq[srq->pidx].valid); + + pwr = &srq->pending_wrs[srq->pending_cidx]; + srq->sw_rq[srq->pidx].wr_id = pwr->wr_id; + srq->sw_rq[srq->pidx].valid = 1; + + PDBG("%s posting pending cidx %u pidx %u wq_pidx %u " + "in_use %u rq_size %u wr_id %llx\n", __func__, + srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, srq->size, + (unsigned long long)pwr->wr_id); + + c4iw_copy_wr_to_srq(srq, &pwr->wqe, pwr->len16); + t4_srq_consume_pending_wr(srq); + t4_srq_produce(srq, pwr->len16); + idx += DIV_ROUND_UP(pwr->len16*16, T4_EQ_ENTRY_SIZE); + } + + if (idx) { + t4_ring_srq_db(srq, idx, pwr->len16, &pwr->wqe); + srq->queue[srq->size].status.host_wq_pidx = + srq->wq_pidx; + } +} + +static u64 reap_srq_cqe(union t4_cqe *hw_cqe, struct t4_srq *srq) +{ + int rel_idx = CQE_ABS_RQE_IDX(&hw_cqe->b64) - srq->rqt_abs_idx; + u64 wr_id; + + BUG_ON(rel_idx >= srq->size); + + assert(srq->sw_rq[rel_idx].valid); + srq->sw_rq[rel_idx].valid = 0; + wr_id = srq->sw_rq[rel_idx].wr_id; + + if (rel_idx == srq->cidx) { + PDBG("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u " + "in_use %u rq_size %u wr_id %llx\n", __func__, + rel_idx, srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, srq->size, + (unsigned long long)srq->sw_rq[rel_idx].wr_id); + t4_srq_consume(srq); + while (srq->ooo_count && !srq->sw_rq[srq->cidx].valid) { + PDBG("%s eat ooo cidx %u pidx %u wq_pidx %u " + "in_use %u rq_size %u ooo_count %u wr_id %llx\n", __func__, + srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, srq->size, srq->ooo_count, + (unsigned long long)srq->sw_rq[srq->cidx].wr_id); + t4_srq_consume_ooo(srq); + } + if (srq->ooo_count == 0 && srq->pending_in_use) + post_pending_srq_wrs(srq); + } else { + BUG_ON(srq->in_use == 0); + PDBG("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u " + "in_use %u rq_size %u ooo_count %u wr_id %llx\n", __func__, + rel_idx, srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, srq->size, srq->ooo_count, + (unsigned long long)srq->sw_rq[rel_idx].wr_id); + t4_srq_produce_ooo(srq); + } + return wr_id; +} + /* * poll_cq * @@ -370,8 +445,9 @@ static void dump_cqe(void *arg) * -EAGAIN CQE skipped, try again. * -EOVERFLOW CQ overflow detected. */ -static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, union t4_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit) +static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, + union t4_cqe *cqe, u8 *cqe_flushed, + u64 *cookie, u32 *credit, struct t4_srq *srq) { int ret = 0; union t4_cqe *hw_cqe, read_cqe; @@ -495,7 +571,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, union t4_cqe *cqe, * error. */ - if (t4_rq_empty(wq)) { + if (srq ? t4_srq_empty(srq) : t4_rq_empty(wq)) { t4_set_wq_in_error(wq); ret = -EAGAIN; goto skip_cqe; @@ -563,11 +639,15 @@ proc_cqe: *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; t4_sq_consume(wq); } else { - PDBG("%s completing rq idx %u\n", __func__, wq->rq.cidx); - BUG_ON(wq->rq.cidx >= wq->rq.size); - *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; - BUG_ON(t4_rq_empty(wq)); - t4_rq_consume(wq); + if (!srq) { + PDBG("%s completing rq idx %u\n", __func__, wq->rq.cidx); + BUG_ON(wq->rq.cidx >= wq->rq.size); + *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; + BUG_ON(t4_rq_empty(wq)); + t4_rq_consume(wq); + } else + *cookie = reap_srq_cqe(hw_cqe, srq); + wq->rq.msn++; goto skip_cqe; } @@ -590,6 +670,18 @@ skip_cqe: return ret; } +static void generate_srq_limit_event(struct c4iw_srq *srq) +{ + struct ibv_modify_srq cmd; + struct ibv_srq_attr attr = {0}; + int ret; + + srq->armed = 0; + ret = ibv_cmd_modify_srq(&srq->ibv_srq, &attr, 0, &cmd, sizeof cmd); + if (ret) + fprintf(stderr, "Failure to send srq_limit event - ret %d errno %d\n", ret, errno); +} + /* * Get one cq entry from c4iw and map it to openib. * @@ -602,6 +694,7 @@ skip_cqe: static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ibv_wc *wc) { struct c4iw_qp *qhp = NULL; + struct c4iw_srq *srq = NULL; struct t4_cqe_common *com; union t4_cqe uninitialized_var(cqe), *rd_cqe; struct t4_wq *wq; @@ -637,8 +730,12 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ibv_wc *wc) else { pthread_spin_lock(&qhp->lock); wq = &(qhp->wq); + srq = qhp->srq; + if (srq) + pthread_spin_lock(&srq->lock); } - ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); + ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit, + srq ? &srq->wq : NULL); if (ret) goto out; @@ -649,6 +746,13 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ibv_wc *wc) wc->vendor_err = CQE_STATUS(com); wc->wc_flags = 0; + /* + * Simulate a SRQ_LIMIT_REACHED HW notification if required. + */ + if (srq && !(srq->flags & T4_SRQ_LIMIT_SUPPORT) && srq->armed && + srq->wq.in_use < srq->srq_limit) + generate_srq_limit_event(srq); + PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " "lo 0x%x cookie 0x%llx\n", __func__, CQE_QPID(com), CQE_TYPE(com), @@ -747,8 +851,11 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ibv_wc *wc) chp->cq.cqid, CQE_QPID(com), CQE_TYPE(com), CQE_OPCODE(com), CQE_STATUS(com)); out: - if (wq) + if (wq) { pthread_spin_unlock(&qhp->lock); + if (srq) + pthread_spin_unlock(&srq->lock); + } return ret; } @@ -792,3 +899,11 @@ int c4iw_arm_cq(struct ibv_cq *ibcq, int solicited) pthread_spin_unlock(&chp->lock); return ret; } + +void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx) +{ + struct c4iw_cq * rchp = to_c4iw_cq(qhp->ibv_qp.recv_cq); + + /* create a SRQ RECV CQE for srqidx */ + insert_recv_cqe(&qhp->wq, &rchp->cq, srqidx); +} diff --git a/providers/cxgb4/dev.c b/providers/cxgb4/dev.c index b1870219..3479e561 100644 --- a/providers/cxgb4/dev.c +++ b/providers/cxgb4/dev.c @@ -84,6 +84,7 @@ static const struct verbs_context_ops c4iw_ctx_common_ops = { .create_srq = c4iw_create_srq, .modify_srq = c4iw_modify_srq, .destroy_srq = c4iw_destroy_srq, + .query_srq = c4iw_query_srq, .create_qp = c4iw_create_qp, .modify_qp = c4iw_modify_qp, .destroy_qp = c4iw_destroy_qp, @@ -456,6 +457,7 @@ static struct verbs_device *c4iw_device_alloc(struct verbs_sysfs_dev *sysfs_dev) dev->abi_version = sysfs_dev->abi_ver; list_node_init(&dev->list); + list_head_init(&dev->srq_list); PDBG("%s device claimed\n", __FUNCTION__); list_add_tail(&devices, &dev->list); #ifdef STALL_DETECTION diff --git a/providers/cxgb4/qp.c b/providers/cxgb4/qp.c index 5d90510c..eadfc6d5 100644 --- a/providers/cxgb4/qp.c +++ b/providers/cxgb4/qp.c @@ -92,6 +92,23 @@ static void copy_wr_to_rq(struct t4_wq *wq, union t4_recv_wr *wqe, u8 len16) } } +void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16) +{ + u64 *src, *dst; + + src = (u64 *)wqe; + dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE); + while (len16) { + *dst++ = *src++; + if (dst >= (u64 *)&srq->queue[srq->size]) + dst = (u64 *)srq->queue; + *dst++ = *src++; + if (dst >= (u64 *)&srq->queue[srq->size]) + dst = (u64 *)srq->queue; + len16--; + } +} + static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, struct ibv_send_wr *wr, int max, u32 *plenp) { @@ -277,6 +294,19 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, return 0; } +static int build_srq_recv(union t4_recv_wr *wqe, struct ibv_recv_wr *wr, + u8 *len16) +{ + int ret; + + ret = build_isgl(&wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); + if (ret) + return ret; + *len16 = DIV_ROUND_UP(sizeof wqe->recv + + wr->num_sge * sizeof(struct fw_ri_sge), 16); + return 0; +} + static void ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 idx) { struct ibv_modify_qp cmd = {}; @@ -406,6 +436,89 @@ int c4iw_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, return err; } +static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe, uint64_t wr_id, u8 len16) +{ + struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx]; + + PDBG("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx " + "pending_cidx %u pending_pidx %u pending_in_use %u\n", + __func__, srq->cidx, srq->pidx, srq->wq_pidx, + srq->in_use, srq->ooo_count, (unsigned long long)wr_id, srq->pending_cidx, + srq->pending_pidx, srq->pending_in_use); + pwr->wr_id = wr_id; + pwr->len16 = len16; + memcpy(&pwr->wqe, wqe, len16*16); + t4_srq_produce_pending_wr(srq); +} + +int c4iw_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +{ + int err = 0; + struct c4iw_srq *srq; + union t4_recv_wr *wqe, lwqe; + u32 num_wrs; + u8 len16 = 0; + u16 idx = 0; + + srq = to_c4iw_srq(ibsrq); + pthread_spin_lock(&srq->lock); + INC_STAT(srq_recv); + num_wrs = t4_srq_avail(&srq->wq); + if (num_wrs == 0) { + pthread_spin_unlock(&srq->lock); + return -ENOMEM; + } + while (wr) { + if (wr->num_sge > T4_MAX_RECV_SGE) { + err = -EINVAL; + *bad_wr = wr; + break; + } + wqe = &lwqe; + if (num_wrs) + err = build_srq_recv(wqe, wr, &len16); + else + err = -ENOMEM; + if (err) { + *bad_wr = wr; + break; + } + + wqe->recv.opcode = FW_RI_RECV_WR; + wqe->recv.r1 = 0; + wqe->recv.wrid = srq->wq.pidx; + wqe->recv.r2[0] = 0; + wqe->recv.r2[1] = 0; + wqe->recv.r2[2] = 0; + wqe->recv.len16 = len16; + + if (srq->wq.ooo_count || srq->wq.pending_in_use || srq->wq.sw_rq[srq->wq.pidx].valid) + defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16); + else { + srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id; + srq->wq.sw_rq[srq->wq.pidx].valid = 1; + c4iw_copy_wr_to_srq(&srq->wq, wqe, len16); + PDBG("%s cidx %u pidx %u wq_pidx %u in_use %u " + "wr_id 0x%llx \n", __func__, srq->wq.cidx, + srq->wq.pidx, srq->wq.wq_pidx, srq->wq.in_use, + (unsigned long long)wr->wr_id); + t4_srq_produce(&srq->wq, len16); + idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + } + wr = wr->next; + num_wrs--; + } + + if (idx) { + t4_ring_srq_db(&srq->wq, idx, len16, wqe); + srq->wq.queue[srq->wq.size].status.host_wq_pidx = + srq->wq.wq_pidx; + } + pthread_spin_unlock(&srq->lock); + return err; +} + int c4iw_post_receive(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr) { @@ -491,8 +604,10 @@ static void update_qp_state(struct c4iw_qp *qhp) void c4iw_flush_qp(struct c4iw_qp *qhp) { struct c4iw_cq *rchp, *schp; + u32 srqidx; int count; + srqidx = t4_wq_srqidx(&qhp->wq); rchp = to_c4iw_cq(qhp->ibv_qp.recv_cq); schp = to_c4iw_cq(qhp->ibv_qp.send_cq); @@ -515,16 +630,26 @@ void c4iw_flush_qp(struct c4iw_qp *qhp) qhp->wq.flushed = 1; t4_set_wq_in_error(&qhp->wq); + if (qhp->srq) + pthread_spin_lock(&qhp->srq->lock); + + if (srqidx) + c4iw_flush_srqidx(qhp, srqidx); + update_qp_state(qhp); c4iw_flush_hw_cq(rchp, qhp); - c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); - c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + if (!qhp->srq) { + c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); + c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + } if (schp != rchp) c4iw_flush_hw_cq(schp, qhp); c4iw_flush_sq(qhp); + if (qhp->srq) + pthread_spin_unlock(&qhp->srq->lock); pthread_spin_unlock(&qhp->lock); if (schp != rchp) diff --git a/providers/cxgb4/t4.h b/providers/cxgb4/t4.h index 67f411d9..08f29fa7 100644 --- a/providers/cxgb4/t4.h +++ b/providers/cxgb4/t4.h @@ -373,6 +373,7 @@ struct t4_sq { struct t4_swrqe { u64 wr_id; + int valid; }; struct t4_rq { @@ -440,7 +441,6 @@ static inline void t4_rq_produce(struct t4_wq *wq, u8 len16) static inline void t4_rq_consume(struct t4_wq *wq) { wq->rq.in_use--; - wq->rq.msn++; if (++wq->rq.cidx == wq->rq.size) wq->rq.cidx = 0; assert((wq->rq.cidx != wq->rq.pidx) || wq->rq.in_use == 0); @@ -566,7 +566,7 @@ static inline void t4_srq_consume(struct t4_srq *srq) static inline int t4_wq_in_error(struct t4_wq *wq) { - return wq->error || wq->rq.queue[wq->rq.size].status.qp_err; + return wq->error || *wq->qp_errp; } static inline u32 t4_wq_srqidx(struct t4_wq *wq) @@ -742,7 +742,7 @@ static inline void t4_ring_srq_db(struct t4_srq *srq, u16 inc, u8 len16, static inline void t4_set_wq_in_error(struct t4_wq *wq) { - wq->rq.queue[wq->rq.size].status.qp_err = 1; + *wq->qp_errp = 1; } extern int c4iw_abi_version; diff --git a/providers/cxgb4/verbs.c b/providers/cxgb4/verbs.c index a8935def..e43992e8 100644 --- a/providers/cxgb4/verbs.c +++ b/providers/cxgb4/verbs.c @@ -286,24 +286,141 @@ int c4iw_destroy_cq(struct ibv_cq *ibcq) struct ibv_srq *c4iw_create_srq(struct ibv_pd *pd, struct ibv_srq_init_attr *attr) { + struct c4iw_dev *dev = to_c4iw_dev(pd->context->device); + struct uc4iw_create_srq_resp resp; + unsigned long segment_offset; + struct ibv_create_srq cmd; + struct c4iw_srq *srq; + void *dbva; + int ret; + + PDBG("%s enter\n", __func__); + srq = calloc(1, sizeof *srq); + if (!srq) + goto err; + + ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr, &cmd, + sizeof cmd, &resp.ibv_resp, sizeof resp); + if (ret) + goto err_free_srq_mem; + + PDBG("%s srq id 0x%x srq key %" PRIx64 " srq db/gts key %" PRIx64 + " qid_mask 0x%x\n", __func__, + resp.srqid, resp.srq_key, resp.srq_db_gts_key, + resp.qid_mask); + + srq->rhp = dev; + srq->wq.qid = resp.srqid; + srq->wq.size = resp.srq_size; + srq->wq.memsize = resp.srq_memsize; + srq->wq.rqt_abs_idx = resp.rqt_abs_idx; + srq->flags = resp.flags; + pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE); + + dbva = mmap(NULL, c4iw_page_size, PROT_WRITE, MAP_SHARED, + pd->context->cmd_fd, resp.srq_db_gts_key); + if (dbva == MAP_FAILED) + goto err_destroy_srq; + srq->wq.udb = dbva; + + segment_offset = 128 * (srq->wq.qid & resp.qid_mask); + if (segment_offset < c4iw_page_size) { + srq->wq.udb += segment_offset / 4; + srq->wq.wc_reg_available = 1; + } else + srq->wq.bar2_qid = srq->wq.qid & resp.qid_mask; + srq->wq.udb += 2; + + srq->wq.queue = mmap(NULL, srq->wq.memsize, + PROT_WRITE, MAP_SHARED, + pd->context->cmd_fd, resp.srq_key); + if (srq->wq.queue == MAP_FAILED) + goto err_unmap_udb; + + srq->wq.sw_rq = calloc(srq->wq.size, sizeof (struct t4_swrqe)); + if (!srq->wq.sw_rq) + goto err_unmap_queue; + srq->wq.pending_wrs = calloc(srq->wq.size, sizeof *srq->wq.pending_wrs); + if (!srq->wq.pending_wrs) + goto err_free_sw_rq; + + pthread_spin_lock(&dev->lock); + list_add_tail(&dev->srq_list, &srq->list); + pthread_spin_unlock(&dev->lock); + + PDBG("%s srq dbva %p srq qva %p srq depth %u srq memsize %lu\n", + __func__, srq->wq.udb, srq->wq.queue, + srq->wq.size, srq->wq.memsize); + + INC_STAT(srq); + return &srq->ibv_srq; +err_free_sw_rq: + free(srq->wq.sw_rq); +err_unmap_queue: + munmap((void *)srq->wq.queue, srq->wq.memsize); +err_unmap_udb: + munmap(MASKED(srq->wq.udb), c4iw_page_size); +err_destroy_srq: + (void)ibv_cmd_destroy_srq(&srq->ibv_srq); +err_free_srq_mem: + free(srq); +err: + return NULL; } -int c4iw_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr, +int c4iw_modify_srq(struct ibv_srq *ibsrq, struct ibv_srq_attr *attr, int attr_mask) { - return ENOSYS; + struct c4iw_srq *srq = to_c4iw_srq(ibsrq); + struct ibv_modify_srq cmd; + int ret; + + /* XXX no support for this yet */ + if (attr_mask & IBV_SRQ_MAX_WR) + return ENOSYS; + + ret = ibv_cmd_modify_srq(ibsrq, attr, attr_mask, &cmd, sizeof cmd); + if (!ret) { + if (attr_mask & IBV_SRQ_LIMIT) { + srq->armed = 1; + srq->srq_limit = attr->srq_limit; + } + } + return ret; } -int c4iw_destroy_srq(struct ibv_srq *srq) +int c4iw_destroy_srq(struct ibv_srq *ibsrq) { - return ENOSYS; + int ret; + struct c4iw_srq *srq = to_c4iw_srq(ibsrq); + + PDBG("%s enter qp %p\n", __func__, ibsrq); + + ret = ibv_cmd_destroy_srq(ibsrq); + if (ret) { + return ret; + } + + pthread_spin_lock(&srq->rhp->lock); + list_del(&srq->list); + pthread_spin_unlock(&srq->rhp->lock); + + munmap(MASKED(srq->wq.udb), c4iw_page_size); + munmap(srq->wq.queue, srq->wq.memsize); + + free(srq->wq.pending_wrs); + free(srq->wq.sw_rq); + free(srq); + return 0; + } -int c4iw_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, - struct ibv_recv_wr **bad_wr) +int c4iw_query_srq(struct ibv_srq *ibsrq, struct ibv_srq_attr *attr) { - return ENOSYS; + struct ibv_query_srq cmd; + + return ibv_cmd_query_srq(ibsrq, attr, &cmd, sizeof cmd); } static struct ibv_qp *create_qp_v0(struct ibv_pd *pd, @@ -372,7 +489,7 @@ static struct ibv_qp *create_qp_v0(struct ibv_pd *pd, if (!qhp->wq.sq.sw_sq) goto err7; - qhp->wq.rq.sw_rq = calloc(qhp->wq.rq.size, sizeof (uint64_t)); + qhp->wq.rq.sw_rq = calloc(qhp->wq.rq.size, sizeof (struct t4_swrqe)); if (!qhp->wq.rq.sw_rq) goto err8; @@ -445,9 +562,12 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd, qhp->wq.sq.flags = resp.flags & C4IW_QPF_ONCHIP ? T4_SQ_ONCHIP : 0; qhp->wq.sq.flush_cidx = -1; qhp->wq.rq.msn = 1; - qhp->wq.rq.qid = resp.rqid; - qhp->wq.rq.size = resp.rq_size; - qhp->wq.rq.memsize = resp.rq_memsize; + qhp->srq = to_c4iw_srq(attr->srq); + if (!attr->srq) { + qhp->wq.rq.qid = resp.rqid; + qhp->wq.rq.size = resp.rq_size; + qhp->wq.rq.memsize = resp.rq_memsize; + } if (ma_wr && resp.sq_memsize < (resp.sq_size + 1) * sizeof *qhp->wq.sq.queue + 16*sizeof(__be64) ) { ma_wr = 0; @@ -479,35 +599,39 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd, if (qhp->wq.sq.queue == MAP_FAILED) goto err4; - dbva = mmap(NULL, c4iw_page_size, PROT_WRITE, MAP_SHARED, - pd->context->cmd_fd, resp.rq_db_gts_key); - if (dbva == MAP_FAILED) - goto err5; - qhp->wq.rq.udb = dbva; - if (!dev_is_t4(qhp->rhp)) { - unsigned long segment_offset = 128 * (qhp->wq.rq.qid & - qhp->wq.qid_mask); - - if (segment_offset < c4iw_page_size) { - qhp->wq.rq.udb += segment_offset / 4; - qhp->wq.rq.wc_reg_available = 1; - } else - qhp->wq.rq.bar2_qid = qhp->wq.rq.qid & qhp->wq.qid_mask; - qhp->wq.rq.udb += 2; + if (!attr->srq) { + dbva = mmap(NULL, c4iw_page_size, PROT_WRITE, MAP_SHARED, + pd->context->cmd_fd, resp.rq_db_gts_key); + if (dbva == MAP_FAILED) + goto err5; + qhp->wq.rq.udb = dbva; + if (!dev_is_t4(qhp->rhp)) { + unsigned long segment_offset = 128 * (qhp->wq.rq.qid & + qhp->wq.qid_mask); + + if (segment_offset < c4iw_page_size) { + qhp->wq.rq.udb += segment_offset / 4; + qhp->wq.rq.wc_reg_available = 1; + } else + qhp->wq.rq.bar2_qid = qhp->wq.rq.qid & qhp->wq.qid_mask; + qhp->wq.rq.udb += 2; + } + qhp->wq.rq.queue = mmap(NULL, qhp->wq.rq.memsize, + PROT_WRITE, MAP_SHARED, + pd->context->cmd_fd, resp.rq_key); + if (qhp->wq.rq.queue == MAP_FAILED) + goto err6; } - qhp->wq.rq.queue = mmap(NULL, qhp->wq.rq.memsize, - PROT_WRITE, MAP_SHARED, - pd->context->cmd_fd, resp.rq_key); - if (qhp->wq.rq.queue == MAP_FAILED) - goto err6; qhp->wq.sq.sw_sq = calloc(qhp->wq.sq.size, sizeof (struct t4_swsqe)); if (!qhp->wq.sq.sw_sq) goto err7; - qhp->wq.rq.sw_rq = calloc(qhp->wq.rq.size, sizeof (uint64_t)); - if (!qhp->wq.rq.sw_rq) - goto err8; + if (!attr->srq) { + qhp->wq.rq.sw_rq = calloc(qhp->wq.rq.size, sizeof (struct t4_swrqe)); + if (!qhp->wq.rq.sw_rq) + goto err8; + } if (t4_sq_onchip(&qhp->wq)) { qhp->wq.sq.ma_sync = mmap(NULL, c4iw_page_size, PROT_WRITE, @@ -520,11 +644,18 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd, if (ctx->status_page_size) { qhp->wq.db_offp = &ctx->status_page->db_off; - } else { + } else if (!attr->srq) { qhp->wq.db_offp = &qhp->wq.rq.queue[qhp->wq.rq.size].status.db_off; } + if (!attr->srq) + qhp->wq.qp_errp = &qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err; + else { + qhp->wq.qp_errp = &qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err; + qhp->wq.srqidxp = &qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx; + } + PDBG("%s sq dbva %p sq qva %p sq depth %u sq memsize %lu " " rq dbva %p rq qva %p rq depth %u rq memsize %lu\n", __func__, @@ -541,13 +672,16 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd, INC_STAT(qp); return &qhp->ibv_qp; err9: - free(qhp->wq.rq.sw_rq); + if (!attr->srq) + free(qhp->wq.rq.sw_rq); err8: free(qhp->wq.sq.sw_sq); err7: - munmap((void *)qhp->wq.rq.queue, qhp->wq.rq.memsize); + if (!attr->srq) + munmap((void *)qhp->wq.rq.queue, qhp->wq.rq.memsize); err6: - munmap(MASKED(qhp->wq.rq.udb), c4iw_page_size); + if (!attr->srq) + munmap(MASKED(qhp->wq.rq.udb), c4iw_page_size); err5: munmap((void *)qhp->wq.sq.queue, qhp->wq.sq.memsize); err4: @@ -619,15 +753,18 @@ int c4iw_destroy_qp(struct ibv_qp *ibqp) munmap((void *)qhp->wq.sq.ma_sync, c4iw_page_size); } munmap(MASKED(qhp->wq.sq.udb), c4iw_page_size); - munmap(MASKED(qhp->wq.rq.udb), c4iw_page_size); munmap(qhp->wq.sq.queue, qhp->wq.sq.memsize); - munmap(qhp->wq.rq.queue, qhp->wq.rq.memsize); + if (!qhp->srq) { + munmap(MASKED(qhp->wq.rq.udb), c4iw_page_size); + munmap(qhp->wq.rq.queue, qhp->wq.rq.memsize); + } pthread_spin_lock(&dev->lock); dev->qpid2ptr[qhp->wq.sq.qid] = NULL; pthread_spin_unlock(&dev->lock); - free(qhp->wq.rq.sw_rq); + if (!qhp->srq) + free(qhp->wq.rq.sw_rq); free(qhp->wq.sq.sw_sq); free(qhp); return 0;