From patchwork Tue Apr 24 17:25:22 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Raju Rangoju X-Patchwork-Id: 10360563 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id 5112F602D6 for ; Tue, 24 Apr 2018 17:26:30 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 3D5D628DFB for ; Tue, 24 Apr 2018 17:26:30 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 3168E28EBA; Tue, 24 Apr 2018 17:26:30 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00, MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id D0BCF28DFB for ; Tue, 24 Apr 2018 17:26:27 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752273AbeDXR0Z (ORCPT ); Tue, 24 Apr 2018 13:26:25 -0400 Received: from stargate.chelsio.com ([12.32.117.8]:32591 "EHLO stargate.chelsio.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752143AbeDXR0V (ORCPT ); Tue, 24 Apr 2018 13:26:21 -0400 Received: from localhost (junagarh.blr.asicdesigners.com [10.193.185.238]) by stargate.chelsio.com (8.13.8/8.13.8) with ESMTP id w3OHQ93I028008; Tue, 24 Apr 2018 10:26:09 -0700 From: Raju Rangoju To: jgg@mellanox.com, linux-rdma@vger.kernel.org, dledford@redhat.com Cc: swise@opengridcomputing.com, rajur@chelsio.com, bharat@chelsio.com Subject: [PATCH WIP rdma-core] cxgb4: Add srq support for Chelsio Adapters Date: Tue, 24 Apr 2018 22:55:22 +0530 Message-Id: <20180424172522.21028-1-rajur@chelsio.com> X-Mailer: git-send-email 2.12.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Reference for the corresponding kernel mode srq submission https://www.spinics.net/lists/linux-rdma/msg63695.html https://www.spinics.net/lists/linux-rdma/msg63696.html https://www.spinics.net/lists/linux-rdma/msg63697.html This patch adds necessary changes for supporting srq feature to chelsio adapters. Signed-off-by: Raju Rangoju Reviewed-by: Steve Wise --- kernel-headers/rdma/cxgb4-abi.h | 28 ++++- providers/cxgb4/cq.c | 168 +++++++++++++++++++++++++++-- providers/cxgb4/cxgb4-abi.h | 5 +- providers/cxgb4/dev.c | 2 + providers/cxgb4/libcxgb4.h | 22 ++++ providers/cxgb4/qp.c | 228 ++++++++++++++++++++++++++++++--------- providers/cxgb4/t4.h | 164 +++++++++++++++++++++++++++- providers/cxgb4/t4_regs.h | 4 + providers/cxgb4/t4fw_api.h | 2 + providers/cxgb4/t4fw_ri_api.h | 20 ++++ providers/cxgb4/verbs.c | 232 ++++++++++++++++++++++++++++++++-------- 11 files changed, 761 insertions(+), 114 deletions(-) diff --git a/kernel-headers/rdma/cxgb4-abi.h b/kernel-headers/rdma/cxgb4-abi.h index 1fefd014..55959158 100644 --- a/kernel-headers/rdma/cxgb4-abi.h +++ b/kernel-headers/rdma/cxgb4-abi.h @@ -44,6 +44,16 @@ * In particular do not use pointer types -- pass pointers in __aligned_u64 * instead. */ + +enum { + C4IW_64B_CQE = (1 << 0) +}; + +struct c4iw_create_cq { + __u32 flags; + __u32 reserved; +}; + struct c4iw_create_cq_resp { __aligned_u64 key; __aligned_u64 gts_key; @@ -51,7 +61,7 @@ struct c4iw_create_cq_resp { __u32 cqid; __u32 size; __u32 qid_mask; - __u32 reserved; /* explicit padding (optional for i386) */ + __u32 flags; }; enum { @@ -84,4 +94,20 @@ struct c4iw_alloc_pd_resp { __u32 pdid; }; +struct c4iw_create_srq_resp { + //struct ibv_create_srq_resp ibv_resp; + __u64 srq_key; + __u64 srq_db_gts_key; + __u64 srq_memsize; + __u32 srqid; + __u32 srq_size; + __u32 rqt_abs_idx; + __u32 qid_mask; + __u32 flags; +}; + +enum { + T4_SRQ_LIMIT_SUPPORT = (1<<0), /* HW supports SRQ_LIMIT_REACHED event */ +}; + #endif /* CXGB4_ABI_USER_H */ diff --git a/providers/cxgb4/cq.c b/providers/cxgb4/cq.c index be6cf2f2..0cdc1c09 100644 --- a/providers/cxgb4/cq.c +++ b/providers/cxgb4/cq.c @@ -40,7 +40,7 @@ #include "libcxgb4.h" #include "cxgb4-abi.h" -static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) +static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq, u32 srqidx) { struct t4_cqe cqe; @@ -53,6 +53,8 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) V_CQE_SWCQE(1) | V_CQE_QPID(wq->sq.qid)); cqe.bits_type_ts = htobe64(V_CQE_GENBIT((u64)cq->gen)); + if (srqidx) + cqe.u.srcqe.abs_rqe_idx = htobe32(srqidx); cq->sw_queue[cq->sw_pidx] = cqe; t4_swcq_produce(cq); } @@ -66,7 +68,7 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) PDBG("%s wq %p cq %p rq.in_use %u skip count %u\n", __func__, wq, cq, wq->rq.in_use, count); while (in_use--) { - insert_recv_cqe(wq, cq); + insert_recv_cqe(wq, cq, 0); flushed++; } return flushed; @@ -327,6 +329,100 @@ static void dump_cqe(void *arg) (long long)be64toh(p[3])); } +static void post_pending_srq_wrs(struct t4_srq *srq) +{ + struct t4_srq_pending_wr *pwr; + u16 idx = 0; + + while (srq->pending_in_use) { + + assert(!srq->sw_rq[srq->pidx].valid); + + pwr = &srq->pending_wrs[srq->pending_cidx]; + srq->sw_rq[srq->pidx].wr_id = pwr->wr_id; + srq->sw_rq[srq->pidx].valid = 1; + + PDBG("%s posting pending cidx %u pidx %u wq_pidx %u " + "in_use %u rq_size %u wr_id %llx\n", __func__, + srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, srq->size, + (unsigned long long)pwr->wr_id); + + c4iw_copy_wr_to_srq(srq, &pwr->wqe, pwr->len16); + t4_srq_consume_pending_wr(srq); + t4_srq_produce(srq, pwr->len16); + idx += DIV_ROUND_UP(pwr->len16*16, T4_EQ_ENTRY_SIZE); + } + + if (idx) { + t4_ring_srq_db(srq, idx, pwr->len16, &pwr->wqe); + srq->queue[srq->size].status.host_wq_pidx = + srq->wq_pidx; + } +} + +struct t4_srq *find_srq(struct t4_cqe *hw_cqe, struct t4_cq *cq); +struct t4_srq *find_srq(struct t4_cqe *hw_cqe, struct t4_cq *cq) +{ + struct c4iw_cq *chp = container_of(cq, struct c4iw_cq, cq); + struct c4iw_dev *dev = chp->rhp; + struct c4iw_srq *srq = NULL; + struct t4_srq *wq = NULL; + + pthread_spin_lock(&dev->lock); + list_for_each(&dev->srq_list, srq, list) { + if ((CQE_ABS_RQE_IDX(hw_cqe) >= srq->wq.rqt_abs_idx) && + (CQE_ABS_RQE_IDX(hw_cqe) <= srq->wq.rqt_abs_idx + + srq->wq.size - 1)) { + wq = &srq->wq; + PDBG("%s found t4_srq\n", __func__); + break; + } + } + pthread_spin_unlock(&dev->lock); + return wq; +} + +static u64 reap_srq_cqe(struct t4_cqe *hw_cqe, struct t4_srq *srq) +{ + int rel_idx = CQE_ABS_RQE_IDX(hw_cqe) - srq->rqt_abs_idx; + u64 wr_id; + + BUG_ON(rel_idx >= srq->size); + + assert(srq->sw_rq[rel_idx].valid); + srq->sw_rq[rel_idx].valid = 0; + wr_id = srq->sw_rq[rel_idx].wr_id; + + if (rel_idx == srq->cidx) { + PDBG("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u " + "in_use %u rq_size %u wr_id %llx\n", __func__, + rel_idx, srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, srq->size, + (unsigned long long)srq->sw_rq[rel_idx].wr_id); + t4_srq_consume(srq); + while (srq->ooo_count && !srq->sw_rq[srq->cidx].valid) { + PDBG("%s eat ooo cidx %u pidx %u wq_pidx %u " + "in_use %u rq_size %u ooo_count %u wr_id %llx\n", __func__, + srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, srq->size, srq->ooo_count, + (unsigned long long)srq->sw_rq[srq->cidx].wr_id); + t4_srq_consume_ooo(srq); + } + if (srq->ooo_count == 0 && srq->pending_in_use) + post_pending_srq_wrs(srq); + } else { + BUG_ON(srq->in_use == 0); + PDBG("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u " + "in_use %u rq_size %u ooo_count %u wr_id %llx\n", __func__, + rel_idx, srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, srq->size, srq->ooo_count, + (unsigned long long)srq->sw_rq[rel_idx].wr_id); + t4_srq_produce_ooo(srq); + } + return wr_id; +} + /* * poll_cq * @@ -344,7 +440,7 @@ static void dump_cqe(void *arg) * -EOVERFLOW CQ overflow detected. */ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit) + u8 *cqe_flushed, u64 *cookie, u32 *credit, struct t4_srq *srq) { int ret = 0; struct t4_cqe *hw_cqe, read_cqe; @@ -367,6 +463,13 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, * skip cqe's not affiliated with a QP. */ if (wq == NULL) { +#if 0 /* If this is an SRQ CQE then update the srq state. */ + if (CQE_IS_SRQ(hw_cqe) && (srq = find_srq(hw_cqe, cq))) { + PDBG("%s found srq, reaping it, hw_cqe %p srq %p\n", + __func__,hw_cqe, srq); + (void)reap_srq_cqe(hw_cqe, srq); + } +#endif ret = -EAGAIN; goto skip_cqe; } @@ -454,11 +557,15 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, * error. */ - if (t4_rq_empty(wq)) { + //BUG_ON(srq ? t4_srq_empty(srq) : t4_rq_empty(wq)); + if (srq) { + t4_srq_empty(srq); + } else if (t4_rq_empty(wq)) { t4_set_wq_in_error(wq); ret = -EAGAIN; goto skip_cqe; } + if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) { t4_set_wq_in_error(wq); hw_cqe->header |= htobe32(V_CQE_STATUS(T4_ERR_MSN)); @@ -522,11 +629,15 @@ proc_cqe: *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; t4_sq_consume(wq); } else { - PDBG("%s completing rq idx %u\n", __func__, wq->rq.cidx); - BUG_ON(wq->rq.cidx >= wq->rq.size); - *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; - BUG_ON(t4_rq_empty(wq)); - t4_rq_consume(wq); + if (!srq) { + PDBG("%s completing rq idx %u\n", __func__, wq->rq.cidx); + BUG_ON(wq->rq.cidx >= wq->rq.size); + *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; + BUG_ON(t4_rq_empty(wq)); + t4_rq_consume(wq); + } else + *cookie = reap_srq_cqe(hw_cqe, srq); + wq->rq.msn++; goto skip_cqe; } @@ -549,6 +660,18 @@ skip_cqe: return ret; } +static void generate_srq_limit_event(struct c4iw_srq *srq) +{ + struct ibv_modify_srq cmd; + struct ibv_srq_attr attr = {0}; + int ret; + + srq->armed = 0; + ret = ibv_cmd_modify_srq(&srq->ibv_srq, &attr, 0, &cmd, sizeof cmd); + if (ret) + fprintf(stderr, "Failure to send srq_limit event - ret %d errno %d\n", ret, errno); +} + /* * Get one cq entry from c4iw and map it to openib. * @@ -561,6 +684,7 @@ skip_cqe: static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ibv_wc *wc) { struct c4iw_qp *qhp = NULL; + struct c4iw_srq *srq = NULL; struct t4_cqe uninitialized_var(cqe), *rd_cqe; struct t4_wq *wq; u32 credit = 0; @@ -595,8 +719,12 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ibv_wc *wc) else { pthread_spin_lock(&qhp->lock); wq = &(qhp->wq); + srq = qhp->srq; + if (srq) + pthread_spin_lock(&srq->lock); } - ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); + ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit, + srq ? &srq->wq : NULL); if (ret) goto out; @@ -606,6 +734,13 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ibv_wc *wc) wc->vendor_err = CQE_STATUS(&cqe); wc->wc_flags = 0; + /* + * Simulate a SRQ_LIMIT_REACHED HW notification if required. + */ + if (srq && !(srq->flags & T4_SRQ_LIMIT_SUPPORT) && srq->armed && + srq->wq.in_use < srq->srq_limit) + generate_srq_limit_event(srq); + PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " "lo 0x%x cookie 0x%llx\n", __func__, CQE_QPID(&cqe), CQE_TYPE(&cqe), @@ -704,8 +839,11 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ibv_wc *wc) chp->cq.cqid, CQE_QPID(&cqe), CQE_TYPE(&cqe), CQE_OPCODE(&cqe), CQE_STATUS(&cqe)); out: - if (wq) + if (wq) { pthread_spin_unlock(&qhp->lock); + if (srq) + pthread_spin_unlock(&srq->lock); + } return ret; } @@ -749,3 +887,11 @@ int c4iw_arm_cq(struct ibv_cq *ibcq, int solicited) pthread_spin_unlock(&chp->lock); return ret; } + +void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx) +{ + struct c4iw_cq * rchp = to_c4iw_cq(qhp->ibv_qp.recv_cq); + + /* create a SRQ RECV CQE for srqidx */ + insert_recv_cqe(&qhp->wq, &rchp->cq, srqidx); +} diff --git a/providers/cxgb4/cxgb4-abi.h b/providers/cxgb4/cxgb4-abi.h index 14fe8feb..63945719 100644 --- a/providers/cxgb4/cxgb4-abi.h +++ b/providers/cxgb4/cxgb4-abi.h @@ -58,12 +58,13 @@ struct c4iw_create_qp_resp_v0 _c4iw_create_qp_resp_v0; DECLARE_DRV_CMD(uc4iw_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, empty, c4iw_alloc_pd_resp); DECLARE_DRV_CMD(uc4iw_create_cq, IB_USER_VERBS_CMD_CREATE_CQ, - empty, c4iw_create_cq_resp); + c4iw_create_cq, c4iw_create_cq_resp); DECLARE_DRV_CMD(uc4iw_create_qp, IB_USER_VERBS_CMD_CREATE_QP, empty, c4iw_create_qp_resp); DECLARE_DRV_CMD(uc4iw_create_qp_v0, IB_USER_VERBS_CMD_CREATE_QP, empty, c4iw_create_qp_resp_v0); DECLARE_DRV_CMD(uc4iw_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT, empty, c4iw_alloc_ucontext_resp); - +DECLARE_DRV_CMD(uc4iw_create_srq, IB_USER_VERBS_CMD_CREATE_SRQ, + empty, c4iw_create_srq_resp); #endif /* IWCH_ABI_H */ diff --git a/providers/cxgb4/dev.c b/providers/cxgb4/dev.c index b1870219..3479e561 100644 --- a/providers/cxgb4/dev.c +++ b/providers/cxgb4/dev.c @@ -84,6 +84,7 @@ static const struct verbs_context_ops c4iw_ctx_common_ops = { .create_srq = c4iw_create_srq, .modify_srq = c4iw_modify_srq, .destroy_srq = c4iw_destroy_srq, + .query_srq = c4iw_query_srq, .create_qp = c4iw_create_qp, .modify_qp = c4iw_modify_qp, .destroy_qp = c4iw_destroy_qp, @@ -456,6 +457,7 @@ static struct verbs_device *c4iw_device_alloc(struct verbs_sysfs_dev *sysfs_dev) dev->abi_version = sysfs_dev->abi_ver; list_node_init(&dev->list); + list_head_init(&dev->srq_list); PDBG("%s device claimed\n", __FUNCTION__); list_add_tail(&devices, &dev->list); #ifdef STALL_DETECTION diff --git a/providers/cxgb4/libcxgb4.h b/providers/cxgb4/libcxgb4.h index 893bd85d..d1b96791 100644 --- a/providers/cxgb4/libcxgb4.h +++ b/providers/cxgb4/libcxgb4.h @@ -59,6 +59,7 @@ struct c4iw_dev { struct c4iw_qp **qpid2ptr; int max_cq; struct c4iw_cq **cqid2ptr; + struct list_head srq_list; pthread_spinlock_t lock; struct list_node list; int abi_version; @@ -117,11 +118,29 @@ struct c4iw_qp { struct t4_wq wq; pthread_spinlock_t lock; int sq_sig_all; + struct c4iw_srq *srq; }; #define to_c4iw_xxx(xxx, type) \ container_of(ib##xxx, struct c4iw_##type, ibv_##xxx) +struct c4iw_srq { + struct ibv_srq ibv_srq; + int type; /* must be 2nd in this struct */ + struct c4iw_dev *rhp; + struct t4_srq wq; + struct list_node list; + pthread_spinlock_t lock; + uint32_t srq_limit; + int armed; + __u32 flags; +}; + +static inline struct c4iw_srq *to_c4iw_srq(struct ibv_srq *ibsrq) +{ + return to_c4iw_xxx(srq, srq); +} + static inline struct c4iw_dev *to_c4iw_dev(struct ibv_device *ibdev) { return container_of(ibdev, struct c4iw_dev, ibv_dev.device); @@ -201,6 +220,7 @@ int c4iw_destroy_srq(struct ibv_srq *srq); int c4iw_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr); +int c4iw_query_srq(struct ibv_srq *ibsrq, struct ibv_srq_attr *attr); struct ibv_qp *c4iw_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); @@ -229,6 +249,8 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp); int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count); void c4iw_flush_sq(struct c4iw_qp *qhp); void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count); +void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16); +void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx); #define FW_MAJ 0 #define FW_MIN 0 diff --git a/providers/cxgb4/qp.c b/providers/cxgb4/qp.c index af04e3a1..cb4ea785 100644 --- a/providers/cxgb4/qp.c +++ b/providers/cxgb4/qp.c @@ -92,6 +92,23 @@ static void copy_wr_to_rq(struct t4_wq *wq, union t4_recv_wr *wqe, u8 len16) } } +void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16) +{ + u64 *src, *dst; + + src = (u64 *)wqe; + dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE); + while (len16) { + *dst++ = *src++; + if (dst >= (u64 *)&srq->queue[srq->size]) + dst = (u64 *)srq->queue; + *dst++ = *src++; + if (dst >= (u64 *)&srq->queue[srq->size]) + dst = (u64 *)srq->queue; + len16--; + } +} + static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, struct ibv_send_wr *wr, int max, u32 *plenp) { @@ -277,6 +294,20 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, return 0; } +static int build_srq_recv(union t4_recv_wr *wqe, struct ibv_recv_wr *wr, + u8 *len16) +{ + int ret; + + ret = build_isgl(&wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); + if (ret) + return ret; + *len16 = DIV_ROUND_UP(sizeof wqe->recv + + wr->num_sge * sizeof(struct fw_ri_sge), 16); + return 0; +} + + static void ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 idx) { struct ibv_modify_qp cmd = {}; @@ -299,7 +330,7 @@ static void ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 idx) } int c4iw_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, - struct ibv_send_wr **bad_wr) + struct ibv_send_wr **bad_wr) { int err = 0; u8 uninitialized_var(len16); @@ -339,37 +370,37 @@ int c4iw_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, fw_flags |= FW_RI_COMPLETION_FLAG; swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; switch (wr->opcode) { - case IBV_WR_SEND: - INC_STAT(send); - if (wr->send_flags & IBV_SEND_FENCE) - fw_flags |= FW_RI_READ_FENCE_FLAG; - fw_opcode = FW_RI_SEND_WR; - swsqe->opcode = FW_RI_SEND; - err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); - break; - case IBV_WR_RDMA_WRITE: - INC_STAT(write); - fw_opcode = FW_RI_RDMA_WRITE_WR; - swsqe->opcode = FW_RI_RDMA_WRITE; - err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16); - break; - case IBV_WR_RDMA_READ: - INC_STAT(read); - fw_opcode = FW_RI_RDMA_READ_WR; - swsqe->opcode = FW_RI_READ_REQ; - fw_flags = 0; - err = build_rdma_read(wqe, wr, &len16); - if (err) + case IBV_WR_SEND: + INC_STAT(send); + if (wr->send_flags & IBV_SEND_FENCE) + fw_flags |= FW_RI_READ_FENCE_FLAG; + fw_opcode = FW_RI_SEND_WR; + swsqe->opcode = FW_RI_SEND; + err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); break; - swsqe->read_len = wr->sg_list ? wr->sg_list[0].length : - 0; - if (!qhp->wq.sq.oldest_read) - qhp->wq.sq.oldest_read = swsqe; - break; - default: - PDBG("%s post of type=%d TBD!\n", __func__, - wr->opcode); - err = -EINVAL; + case IBV_WR_RDMA_WRITE: + INC_STAT(write); + fw_opcode = FW_RI_RDMA_WRITE_WR; + swsqe->opcode = FW_RI_RDMA_WRITE; + err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16); + break; + case IBV_WR_RDMA_READ: + INC_STAT(read); + fw_opcode = FW_RI_RDMA_READ_WR; + swsqe->opcode = FW_RI_READ_REQ; + fw_flags = 0; + err = build_rdma_read(wqe, wr, &len16); + if (err) + break; + swsqe->read_len = wr->sg_list ? wr->sg_list[0].length : + 0; + if (!qhp->wq.sq.oldest_read) + qhp->wq.sq.oldest_read = swsqe; + break; + default: + PDBG("%s post of type=%d TBD!\n", __func__, + wr->opcode); + err = -EINVAL; } if (err) { *bad_wr = wr; @@ -474,6 +505,89 @@ int c4iw_post_receive(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, return err; } +static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe, uint64_t wr_id, u8 len16) +{ + struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx]; + + PDBG("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx " + "pending_cidx %u pending_pidx %u pending_in_use %u\n", + __func__, srq->cidx, srq->pidx, srq->wq_pidx, + srq->in_use, srq->ooo_count, (unsigned long long)wr_id, srq->pending_cidx, + srq->pending_pidx, srq->pending_in_use); + pwr->wr_id = wr_id; + pwr->len16 = len16; + memcpy(&pwr->wqe, wqe, len16*16); + t4_srq_produce_pending_wr(srq); +} + +int c4iw_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +{ + int err = 0; + struct c4iw_srq *srq; + union t4_recv_wr *wqe, lwqe; + u32 num_wrs; + u8 len16 = 0; + u16 idx = 0; + + srq = to_c4iw_srq(ibsrq); + pthread_spin_lock(&srq->lock); + INC_STAT(srq_recv); + num_wrs = t4_srq_avail(&srq->wq); + if (num_wrs == 0) { + pthread_spin_unlock(&srq->lock); + return -ENOMEM; + } + while (wr) { + if (wr->num_sge > T4_MAX_RECV_SGE) { + err = -EINVAL; + *bad_wr = wr; + break; + } + wqe = &lwqe; + if (num_wrs) + err = build_srq_recv(wqe, wr, &len16); + else + err = -ENOMEM; + if (err) { + *bad_wr = wr; + break; + } + + wqe->recv.opcode = FW_RI_RECV_WR; + wqe->recv.r1 = 0; + wqe->recv.wrid = srq->wq.pidx; + wqe->recv.r2[0] = 0; + wqe->recv.r2[1] = 0; + wqe->recv.r2[2] = 0; + wqe->recv.len16 = len16; + + if (srq->wq.ooo_count || srq->wq.pending_in_use || srq->wq.sw_rq[srq->wq.pidx].valid) + defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16); + else { + srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id; + srq->wq.sw_rq[srq->wq.pidx].valid = 1; + c4iw_copy_wr_to_srq(&srq->wq, wqe, len16); + PDBG("%s cidx %u pidx %u wq_pidx %u in_use %u " + "wr_id 0x%llx \n", __func__, srq->wq.cidx, + srq->wq.pidx, srq->wq.wq_pidx, srq->wq.in_use, + (unsigned long long)wr->wr_id); + t4_srq_produce(&srq->wq, len16); + idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + } + wr = wr->next; + num_wrs--; + } + + if (idx) { + t4_ring_srq_db(&srq->wq, idx, len16, wqe); + srq->wq.queue[srq->wq.size].status.host_wq_pidx = + srq->wq.wq_pidx; + } + pthread_spin_unlock(&srq->lock); + return err; +} + static void update_qp_state(struct c4iw_qp *qhp) { struct ibv_query_qp cmd; @@ -488,44 +602,56 @@ static void update_qp_state(struct c4iw_qp *qhp) qhp->ibv_qp.state = attr.qp_state; } -/* - * Assumes qhp lock is held. - */ void c4iw_flush_qp(struct c4iw_qp *qhp) { struct c4iw_cq *rchp, *schp; int count; - - if (qhp->wq.flushed) - return; - - update_qp_state(qhp); + u32 srqidx = t4_wq_srqidx(&qhp->wq); rchp = to_c4iw_cq(qhp->ibv_qp.recv_cq); schp = to_c4iw_cq(qhp->ibv_qp.send_cq); PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); - qhp->wq.flushed = 1; - pthread_spin_unlock(&qhp->lock); /* locking heirarchy: cq lock first, then qp lock. */ pthread_spin_lock(&rchp->lock); + if (schp != rchp) + pthread_spin_lock(&schp->lock); pthread_spin_lock(&qhp->lock); - c4iw_flush_hw_cq(rchp); - c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); - c4iw_flush_rq(&qhp->wq, &rchp->cq, count); - pthread_spin_unlock(&qhp->lock); - pthread_spin_unlock(&rchp->lock); - /* locking heirarchy: cq lock first, then qp lock. */ - pthread_spin_lock(&schp->lock); - pthread_spin_lock(&qhp->lock); + if (qhp->wq.flushed) { + pthread_spin_unlock(&qhp->lock); + if (schp != rchp) + pthread_spin_unlock(&schp->lock); + pthread_spin_unlock(&rchp->lock); + return; + } + + qhp->wq.flushed = 1; + t4_set_wq_in_error(&qhp->wq); + + if (qhp->srq) + pthread_spin_lock(&qhp->srq->lock); + + if (srqidx) + c4iw_flush_srqidx(qhp, srqidx); + + update_qp_state(qhp); + c4iw_flush_hw_cq(rchp); + if (!qhp->srq) { + c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); + c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + } if (schp != rchp) c4iw_flush_hw_cq(schp); c4iw_flush_sq(qhp); + + if (qhp->srq) + pthread_spin_unlock(&qhp->srq->lock); pthread_spin_unlock(&qhp->lock); - pthread_spin_unlock(&schp->lock); - pthread_spin_lock(&qhp->lock); + if (schp != rchp) + pthread_spin_unlock(&schp->lock); + pthread_spin_unlock(&rchp->lock); } void c4iw_flush_qps(struct c4iw_dev *dev) diff --git a/providers/cxgb4/t4.h b/providers/cxgb4/t4.h index fb10002b..613b462a 100644 --- a/providers/cxgb4/t4.h +++ b/providers/cxgb4/t4.h @@ -67,7 +67,7 @@ #ifdef DEBUG #define DBGLOG(s) -#define PDBG(fmt, args...) do {syslog(LOG_DEBUG, fmt, ##args); } while (0) +#define PDBG(fmt, args...) do {syslog(LOG_ALERT, fmt, ##args); } while (0) #else #define DBGLOG(s) #define PDBG(fmt, args...) do {} while (0) @@ -100,10 +100,12 @@ struct t4_status_page { __be16 pidx; u8 qp_err; /* flit 1 - sw owns */ u8 db_off; - u8 pad; + u8 pad[2]; u16 host_wq_pidx; u16 host_cidx; u16 host_pidx; + u16 pad2; + u32 srqidx; }; #define T4_EQ_ENTRY_SIZE 64 @@ -212,8 +214,14 @@ struct t4_cqe { __be32 wrid_hi; __be32 wrid_low; } gen; + struct { + __be32 stag; + __be32 msn; + __be32 reserved; + __be32 abs_rqe_idx; + } srcqe; } u; - __be64 reserved; + __be64 reserved[4]; __be64 bits_type_ts; }; @@ -263,6 +271,7 @@ struct t4_cqe { /* used for RQ completion processing */ #define CQE_WRID_STAG(x) (be32toh((x)->u.rcqe.stag)) #define CQE_WRID_MSN(x) (be32toh((x)->u.rcqe.msn)) +#define CQE_ABS_RQE_IDX(x) (be32toh((x)->u.srcqe.abs_rqe_idx)) /* used for SQ completion processing */ #define CQE_WRID_SQ_IDX(x) (x)->u.scqe.cidx @@ -291,6 +300,7 @@ struct t4_cqe { #define CQE_OVFBIT(x) ((unsigned)G_CQE_OVFBIT(be64toh((x)->bits_type_ts))) #define CQE_GENBIT(x) ((unsigned)G_CQE_GENBIT(be64toh((x)->bits_type_ts))) #define CQE_TS(x) (G_CQE_TS(be64toh((x)->bits_type_ts))) +//#define CQE_IS_SRQ(x) ((x)->rss.opcode == CPL_RDMA_CQE_SRQ) struct t4_swsqe { u64 wr_id; @@ -331,6 +341,7 @@ struct t4_sq { struct t4_swrqe { u64 wr_id; + int valid; }; struct t4_rq { @@ -359,6 +370,8 @@ struct t4_wq { int error; int flushed; u8 *db_offp; + u8 *qp_errp; + u32 *srqidxp; }; static inline int t4_rqes_posted(struct t4_wq *wq) @@ -396,7 +409,6 @@ static inline void t4_rq_produce(struct t4_wq *wq, u8 len16) static inline void t4_rq_consume(struct t4_wq *wq) { wq->rq.in_use--; - wq->rq.msn++; if (++wq->rq.cidx == wq->rq.size) wq->rq.cidx = 0; assert((wq->rq.cidx != wq->rq.pidx) || wq->rq.in_use == 0); @@ -404,6 +416,122 @@ static inline void t4_rq_consume(struct t4_wq *wq) wq->rq.queue[wq->rq.size].status.host_cidx = wq->rq.cidx; } +struct t4_srq_pending_wr { + u64 wr_id; + union t4_recv_wr wqe; + u8 len16; +}; + +struct t4_srq { + union t4_recv_wr *queue; + struct t4_swrqe *sw_rq; + volatile u32 *udb; + size_t memsize; + u32 qid; + u32 bar2_qid; + u32 msn; + u32 rqt_hwaddr; + u32 rqt_abs_idx; + u16 in_use; + u16 size; + u16 cidx; + u16 pidx; + u16 wq_pidx; + int wc_reg_available; + struct t4_srq_pending_wr *pending_wrs; + u16 pending_cidx; + u16 pending_pidx; + u16 pending_in_use; + u16 ooo_count; +}; + +static inline u32 t4_srq_avail(struct t4_srq *srq) +{ + return srq->size - 1 - srq->in_use; +} + +static inline int t4_srq_empty(struct t4_srq *srq) +{ + return srq->in_use == 0; +} + +static inline int t4_srq_cidx_at_end(struct t4_srq *srq) +{ + assert(srq->cidx != srq->pidx); + if (srq->cidx < srq->pidx) + return srq->cidx == (srq->pidx - 1); + else + return srq->cidx == (srq->size - 1) && srq->pidx == 0; +} + +static inline int t4_srq_wrs_pending(struct t4_srq *srq) +{ + return srq->pending_cidx != srq->pending_pidx; +} + +static inline void t4_srq_produce(struct t4_srq *srq, u8 len16) +{ + srq->in_use++; + assert(srq->in_use < srq->size); + if (++srq->pidx == srq->size) + srq->pidx = 0; + assert(srq->cidx != srq->pidx); /* overflow */ + srq->wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + if (srq->wq_pidx >= srq->size * T4_RQ_NUM_SLOTS) + srq->wq_pidx %= srq->size * T4_RQ_NUM_SLOTS; + srq->queue[srq->size].status.host_pidx = srq->pidx; +} + +static inline void t4_srq_produce_pending_wr(struct t4_srq *srq) +{ + srq->pending_in_use++; + srq->in_use++; + assert(srq->pending_in_use < srq->size); + assert(srq->in_use < srq->size); + assert(srq->pending_pidx < srq->size); + if (++srq->pending_pidx == srq->size) + srq->pending_pidx = 0; +} + +static inline void t4_srq_consume_pending_wr(struct t4_srq *srq) +{ + assert(srq->pending_in_use > 0); + srq->pending_in_use--; + assert(srq->in_use > 0); + srq->in_use--; + if (++srq->pending_cidx == srq->size) + srq->pending_cidx = 0; + assert((srq->pending_cidx != srq->pending_pidx) || srq->pending_in_use == 0); +} + +static inline void t4_srq_produce_ooo(struct t4_srq *srq) +{ + assert(srq->in_use > 0); + srq->in_use--; + srq->ooo_count++; + assert(srq->ooo_count < srq->size); +} + +static inline void t4_srq_consume_ooo(struct t4_srq *srq) +{ + srq->cidx++; + if (srq->cidx == srq->size) + srq->cidx = 0; + srq->queue[srq->size].status.host_cidx = srq->cidx; + assert(srq->ooo_count > 0); + srq->ooo_count--; +} + +static inline void t4_srq_consume(struct t4_srq *srq) +{ + assert(srq->in_use > 0); + srq->in_use--; + if (++srq->cidx == srq->size) + srq->cidx = 0; + assert((srq->cidx != srq->pidx) || srq->in_use == 0); + srq->queue[srq->size].status.host_cidx = srq->cidx; +} + static inline int t4_sq_empty(struct t4_wq *wq) { return wq->sq.in_use == 0; @@ -471,6 +599,23 @@ static void copy_wqe_to_udb(volatile u32 *udb_offset, void *wqe) } } +static inline void t4_ring_srq_db(struct t4_srq *srq, u16 inc, u8 len16, + union t4_recv_wr *wqe) +{ + mmio_wc_start(); + if (inc == 1 && srq->wc_reg_available) { + PDBG("%s: WC srq->pidx = %d; len16=%d\n", + __func__, srq->pidx, len16); + copy_wqe_to_udb(srq->udb + 14, wqe); + } else { + PDBG("%s: DB srq->pidx = %d; len16=%d\n", + __func__, srq->pidx, len16); + writel(QID_V(srq->bar2_qid) | PIDX_T5_V(inc), srq->udb); + } + mmio_flush_writes(); + return; +} + extern int ma_wr; extern int t5_en_wc; @@ -552,6 +697,17 @@ static inline int t4_wq_in_error(struct t4_wq *wq) return wq->error || wq->rq.queue[wq->rq.size].status.qp_err; } +static inline u32 t4_wq_srqidx(struct t4_wq *wq) +{ + u32 srqidx; + + if (!wq->srqidxp) + return 0; + srqidx = *wq->srqidxp; + wq->srqidxp = 0; + return srqidx; +} + static inline void t4_set_wq_in_error(struct t4_wq *wq) { wq->rq.queue[wq->rq.size].status.qp_err = 1; diff --git a/providers/cxgb4/t4_regs.h b/providers/cxgb4/t4_regs.h index 9fea255c..c0627378 100644 --- a/providers/cxgb4/t4_regs.h +++ b/providers/cxgb4/t4_regs.h @@ -1437,6 +1437,10 @@ #define TP_MIB_DATA_A 0x7e54 #define TP_INT_CAUSE_A 0x7e74 +#define SRQTABLEPERR_S 1 +#define SRQTABLEPERR_V(x) ((x) << SRQTABLEPERR_S) +#define SRQTABLEPERR_F SRQTABLEPERR_V(1U) + #define FLMTXFLSTEMPTY_S 30 #define FLMTXFLSTEMPTY_V(x) ((x) << FLMTXFLSTEMPTY_S) #define FLMTXFLSTEMPTY_F FLMTXFLSTEMPTY_V(1U) diff --git a/providers/cxgb4/t4fw_api.h b/providers/cxgb4/t4fw_api.h index 49bbca18..fabb16c7 100644 --- a/providers/cxgb4/t4fw_api.h +++ b/providers/cxgb4/t4fw_api.h @@ -1152,6 +1152,8 @@ enum fw_params_param_pfvf { FW_PARAMS_PARAM_PFVF_SQRQ_END = 0x16, FW_PARAMS_PARAM_PFVF_CQ_START = 0x17, FW_PARAMS_PARAM_PFVF_CQ_END = 0x18, + FW_PARAMS_PARAM_PFVF_SRQ_START = 0x19, + FW_PARAMS_PARAM_PFVF_SRQ_END = 0x1A, FW_PARAMS_PARAM_PFVF_SCHEDCLASS_ETH = 0x20, FW_PARAMS_PARAM_PFVF_VIID = 0x24, FW_PARAMS_PARAM_PFVF_CPMASK = 0x25, diff --git a/providers/cxgb4/t4fw_ri_api.h b/providers/cxgb4/t4fw_ri_api.h index 1e266697..667e4096 100644 --- a/providers/cxgb4/t4fw_ri_api.h +++ b/providers/cxgb4/t4fw_ri_api.h @@ -263,6 +263,7 @@ enum fw_ri_res_type { FW_RI_RES_TYPE_SQ, FW_RI_RES_TYPE_RQ, FW_RI_RES_TYPE_CQ, + FW_RI_RES_TYPE_SRQ, }; enum fw_ri_res_op { @@ -296,6 +297,21 @@ struct fw_ri_res { __be32 r6_lo; __be64 r7; } cq; + struct fw_ri_res_srq { + __u8 restype; + __u8 op; + __be16 r3; + __be32 eqid; + __be32 r4[2]; + __be32 fetchszm_to_iqid; + __be32 dcaen_to_eqsize; + __be64 eqaddr; + __be32 srqid; + __be32 pdid; + __be32 hwsrqsize; + __be32 hwsrqaddr; + } srq; + } u; }; @@ -695,6 +711,10 @@ enum fw_ri_init_p2ptype { FW_RI_INIT_P2PTYPE_DISABLED = 0xf, }; +enum fw_ri_init_rqeqid_srq { + FW_RI_INIT_RQEQID_SRQ = 1 << 31, +}; + struct fw_ri_wr { __be32 op_compl; __be32 flowid_len16; diff --git a/providers/cxgb4/verbs.c b/providers/cxgb4/verbs.c index 3c493697..435bb238 100644 --- a/providers/cxgb4/verbs.c +++ b/providers/cxgb4/verbs.c @@ -168,6 +168,7 @@ int c4iw_dereg_mr(struct ibv_mr *mr) struct ibv_cq *c4iw_create_cq(struct ibv_context *context, int cqe, struct ibv_comp_channel *channel, int comp_vector) { + struct uc4iw_create_cq cmd; struct uc4iw_create_cq_resp resp; struct c4iw_cq *chp; struct c4iw_dev *dev = to_c4iw_dev(context->device); @@ -178,16 +179,22 @@ struct ibv_cq *c4iw_create_cq(struct ibv_context *context, int cqe, return NULL; } - resp.reserved = 0; + resp.flags = 0; + memset(&cmd, 0, sizeof cmd); + cmd.flags = C4IW_64B_CQE; + ret = ibv_cmd_create_cq(context, cqe, channel, comp_vector, - &chp->ibv_cq, NULL, 0, + &chp->ibv_cq, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof resp); if (ret) goto err1; - if (resp.reserved) - PDBG("%s c4iw_create_cq_resp reserved field modified by kernel\n", - __FUNCTION__); + if (!resp.flags) { + fprintf(stderr, "libcxgb4 FATAL ERROR: downlevel iw_cxgb4 " + "module. Cannot support RDMA with this driver/lib " + "combination. Update your drivers!\n"); + return NULL; + } pthread_spin_init(&chp->lock, PTHREAD_PROCESS_PRIVATE); #ifdef STALL_DETECTION @@ -279,24 +286,139 @@ int c4iw_destroy_cq(struct ibv_cq *ibcq) struct ibv_srq *c4iw_create_srq(struct ibv_pd *pd, struct ibv_srq_init_attr *attr) { + struct ibv_create_srq cmd; + struct uc4iw_create_srq_resp resp; + struct c4iw_srq *srq; + struct c4iw_dev *dev = to_c4iw_dev(pd->context->device); + int ret; + void *dbva; + unsigned long segment_offset; + + PDBG("%s enter\n", __func__); + srq = calloc(1, sizeof *srq); + if (!srq) + goto err; + + ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr, &cmd, + sizeof cmd, &resp.ibv_resp, sizeof resp); + if (ret) + goto err_free_srq_mem; + + PDBG("%s srq id 0x%x srq key %" PRIx64 " srq db/gts key %" PRIx64 + " qid_mask 0x%x\n", __func__, + resp.srqid, resp.srq_key, resp.srq_db_gts_key, + resp.qid_mask); + + srq->rhp = dev; + srq->wq.qid = resp.srqid; + srq->wq.size = resp.srq_size; + srq->wq.memsize = resp.srq_memsize; + srq->wq.rqt_abs_idx = resp.rqt_abs_idx; + srq->flags = resp.flags; + pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE); + + dbva = mmap(NULL, c4iw_page_size, PROT_WRITE, MAP_SHARED, + pd->context->cmd_fd, resp.srq_db_gts_key); + if (dbva == MAP_FAILED) + goto err_destroy_srq; + srq->wq.udb = dbva; + + segment_offset = 128 * (srq->wq.qid & resp.qid_mask); + if (segment_offset < c4iw_page_size) { + srq->wq.udb += segment_offset / 4; + srq->wq.wc_reg_available = 1; + } else + srq->wq.bar2_qid = srq->wq.qid & resp.qid_mask; + srq->wq.udb += 2; + + srq->wq.queue = mmap(NULL, srq->wq.memsize, + PROT_WRITE, MAP_SHARED, + pd->context->cmd_fd, resp.srq_key); + if (srq->wq.queue == MAP_FAILED) + goto err_unmap_udb; + + srq->wq.sw_rq = calloc(srq->wq.size, sizeof (struct t4_swrqe)); + if (!srq->wq.sw_rq) + goto err_unmap_queue; + srq->wq.pending_wrs = calloc(srq->wq.size, sizeof *srq->wq.pending_wrs); + if (!srq->wq.pending_wrs) + goto err_free_sw_rq; + + pthread_spin_lock(&dev->lock); + list_add_tail(&dev->srq_list, &srq->list); + pthread_spin_unlock(&dev->lock); + + PDBG("%s srq dbva %p srq qva %p srq depth %u srq memsize %lu\n", + __func__, srq->wq.udb, srq->wq.queue, + srq->wq.size, srq->wq.memsize); + + INC_STAT(srq); + return &srq->ibv_srq; +err_free_sw_rq: + free(srq->wq.sw_rq); +err_unmap_queue: + munmap((void *)srq->wq.queue, srq->wq.memsize); +err_unmap_udb: + munmap(MASKED(srq->wq.udb), c4iw_page_size); +err_destroy_srq: + (void)ibv_cmd_destroy_srq(&srq->ibv_srq); +err_free_srq_mem: + free(srq); +err: return NULL; } -int c4iw_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr, +int c4iw_modify_srq(struct ibv_srq *ibsrq, struct ibv_srq_attr *attr, int attr_mask) { - return ENOSYS; + struct c4iw_srq *srq = to_c4iw_srq(ibsrq); + struct ibv_modify_srq cmd; + int ret; + + /* XXX no support for this yet */ + if (attr_mask & IBV_SRQ_MAX_WR) + return ENOSYS; + + ret = ibv_cmd_modify_srq(ibsrq, attr, attr_mask, &cmd, sizeof cmd); + if (!ret) { + if (attr_mask & IBV_SRQ_LIMIT) { + srq->armed = 1; + srq->srq_limit = attr->srq_limit; + } + } + return ret; } -int c4iw_destroy_srq(struct ibv_srq *srq) +int c4iw_destroy_srq(struct ibv_srq *ibsrq) { - return ENOSYS; + int ret; + struct c4iw_srq *srq = to_c4iw_srq(ibsrq); + + PDBG("%s enter qp %p\n", __func__, ibsrq); + + ret = ibv_cmd_destroy_srq(ibsrq); + if (ret) { + return ret; + } + + pthread_spin_lock(&srq->rhp->lock); + list_del(&srq->list); + pthread_spin_unlock(&srq->rhp->lock); + + munmap(MASKED(srq->wq.udb), c4iw_page_size); + munmap(srq->wq.queue, srq->wq.memsize); + + free(srq->wq.pending_wrs); + free(srq->wq.sw_rq); + free(srq); + return 0; } -int c4iw_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, - struct ibv_recv_wr **bad_wr) +int c4iw_query_srq(struct ibv_srq *ibsrq, struct ibv_srq_attr *attr) { - return ENOSYS; + struct ibv_query_srq cmd; + + return ibv_cmd_query_srq(ibsrq, attr, &cmd, sizeof cmd); } static struct ibv_qp *create_qp_v0(struct ibv_pd *pd, @@ -438,9 +560,12 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd, qhp->wq.sq.flags = resp.flags & C4IW_QPF_ONCHIP ? T4_SQ_ONCHIP : 0; qhp->wq.sq.flush_cidx = -1; qhp->wq.rq.msn = 1; - qhp->wq.rq.qid = resp.rqid; - qhp->wq.rq.size = resp.rq_size; - qhp->wq.rq.memsize = resp.rq_memsize; + qhp->srq = to_c4iw_srq(attr->srq); + if (!attr->srq) { + qhp->wq.rq.qid = resp.rqid; + qhp->wq.rq.size = resp.rq_size; + qhp->wq.rq.memsize = resp.rq_memsize; + } if (ma_wr && resp.sq_memsize < (resp.sq_size + 1) * sizeof *qhp->wq.sq.queue + 16*sizeof(__be64) ) { ma_wr = 0; @@ -472,35 +597,39 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd, if (qhp->wq.sq.queue == MAP_FAILED) goto err4; - dbva = mmap(NULL, c4iw_page_size, PROT_WRITE, MAP_SHARED, - pd->context->cmd_fd, resp.rq_db_gts_key); - if (dbva == MAP_FAILED) - goto err5; - qhp->wq.rq.udb = dbva; - if (!dev_is_t4(qhp->rhp)) { - unsigned long segment_offset = 128 * (qhp->wq.rq.qid & - qhp->wq.qid_mask); - - if (segment_offset < c4iw_page_size) { - qhp->wq.rq.udb += segment_offset / 4; - qhp->wq.rq.wc_reg_available = 1; - } else - qhp->wq.rq.bar2_qid = qhp->wq.rq.qid & qhp->wq.qid_mask; - qhp->wq.rq.udb += 2; + if (!attr->srq) { + dbva = mmap(NULL, c4iw_page_size, PROT_WRITE, MAP_SHARED, + pd->context->cmd_fd, resp.rq_db_gts_key); + if (dbva == MAP_FAILED) + goto err5; + qhp->wq.rq.udb = dbva; + if (!dev_is_t4(qhp->rhp)) { + unsigned long segment_offset = 128 * (qhp->wq.rq.qid & + qhp->wq.qid_mask); + + if (segment_offset < c4iw_page_size) { + qhp->wq.rq.udb += segment_offset / 4; + qhp->wq.rq.wc_reg_available = 1; + } else + qhp->wq.rq.bar2_qid = qhp->wq.rq.qid & qhp->wq.qid_mask; + qhp->wq.rq.udb += 2; + } + qhp->wq.rq.queue = mmap(NULL, qhp->wq.rq.memsize, + PROT_WRITE, MAP_SHARED, + pd->context->cmd_fd, resp.rq_key); + if (qhp->wq.rq.queue == MAP_FAILED) + goto err6; } - qhp->wq.rq.queue = mmap(NULL, qhp->wq.rq.memsize, - PROT_WRITE, MAP_SHARED, - pd->context->cmd_fd, resp.rq_key); - if (qhp->wq.rq.queue == MAP_FAILED) - goto err6; qhp->wq.sq.sw_sq = calloc(qhp->wq.sq.size, sizeof (struct t4_swsqe)); if (!qhp->wq.sq.sw_sq) goto err7; - qhp->wq.rq.sw_rq = calloc(qhp->wq.rq.size, sizeof (uint64_t)); - if (!qhp->wq.rq.sw_rq) - goto err8; + if (!attr->srq) { + qhp->wq.rq.sw_rq = calloc(qhp->wq.rq.size, sizeof (uint64_t)); + if (!qhp->wq.rq.sw_rq) + goto err8; + } if (t4_sq_onchip(&qhp->wq)) { qhp->wq.sq.ma_sync = mmap(NULL, c4iw_page_size, PROT_WRITE, @@ -513,11 +642,18 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd, if (ctx->status_page_size) { qhp->wq.db_offp = &ctx->status_page->db_off; - } else { + } else if (!attr->srq) { qhp->wq.db_offp = &qhp->wq.rq.queue[qhp->wq.rq.size].status.db_off; } + if (!attr->srq) + qhp->wq.qp_errp = &qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err; + else { + qhp->wq.qp_errp = &qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err; + qhp->wq.srqidxp = &qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx; + } + PDBG("%s sq dbva %p sq qva %p sq depth %u sq memsize %lu " " rq dbva %p rq qva %p rq depth %u rq memsize %lu\n", __func__, @@ -534,13 +670,16 @@ static struct ibv_qp *create_qp(struct ibv_pd *pd, INC_STAT(qp); return &qhp->ibv_qp; err9: - free(qhp->wq.rq.sw_rq); + if (!attr->srq) + free(qhp->wq.rq.sw_rq); err8: free(qhp->wq.sq.sw_sq); err7: - munmap((void *)qhp->wq.rq.queue, qhp->wq.rq.memsize); + if (!attr->srq) + munmap((void *)qhp->wq.rq.queue, qhp->wq.rq.memsize); err6: - munmap(MASKED(qhp->wq.rq.udb), c4iw_page_size); + if (!attr->srq) + munmap(MASKED(qhp->wq.rq.udb), c4iw_page_size); err5: munmap((void *)qhp->wq.sq.queue, qhp->wq.sq.memsize); err4: @@ -614,15 +753,18 @@ int c4iw_destroy_qp(struct ibv_qp *ibqp) munmap((void *)qhp->wq.sq.ma_sync, c4iw_page_size); } munmap(MASKED(qhp->wq.sq.udb), c4iw_page_size); - munmap(MASKED(qhp->wq.rq.udb), c4iw_page_size); munmap(qhp->wq.sq.queue, qhp->wq.sq.memsize); - munmap(qhp->wq.rq.queue, qhp->wq.rq.memsize); + if (!qhp->srq) { + munmap(MASKED(qhp->wq.rq.udb), c4iw_page_size); + munmap(qhp->wq.rq.queue, qhp->wq.rq.memsize); + } pthread_spin_lock(&dev->lock); dev->qpid2ptr[qhp->wq.sq.qid] = NULL; pthread_spin_unlock(&dev->lock); - free(qhp->wq.rq.sw_rq); + if (!qhp->srq) + free(qhp->wq.rq.sw_rq); free(qhp->wq.sq.sw_sq); free(qhp); return 0;