Message ID | 20190326125433.475-2-kamalheib1@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | pvrdma: Add support for SRQ | expand |
On Tue, Mar 26, 2019 at 02:54:30PM +0200, Kamal Heib wrote: > Add the required function and definitions for support shared receive s/function/functions s/for/to (but not sure about that though) > queues (SRQs) in the backend layer. > > Signed-off-by: Kamal Heib <kamalheib1@gmail.com> > --- > hw/rdma/rdma_backend.c | 116 +++++++++++++++++++++++++++++++++++- > hw/rdma/rdma_backend.h | 12 ++++ > hw/rdma/rdma_backend_defs.h | 5 ++ > 3 files changed, 131 insertions(+), 2 deletions(-) > > diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c > index d1660b6474fa..54419c8c58dd 100644 > --- a/hw/rdma/rdma_backend.c > +++ b/hw/rdma/rdma_backend.c > @@ -40,6 +40,7 @@ typedef struct BackendCtx { > void *up_ctx; > struct ibv_sge sge; /* Used to save MAD recv buffer */ > RdmaBackendQP *backend_qp; /* To maintain recv buffers */ > + RdmaBackendSRQ *backend_srq; > } BackendCtx; > > struct backend_umad { > @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) > int i, ne, total_ne = 0; > BackendCtx *bctx; > struct ibv_wc wc[2]; > + RdmaProtectedGSList *cqe_ctx_list; > > qemu_mutex_lock(&rdma_dev_res->lock); > do { > @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) > > comp_handler(bctx->up_ctx, &wc[i]); > > - rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list, > - wc[i].wr_id); > + if (bctx->backend_qp) { > + cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list; > + } else { > + cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list; > + } > + > + rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id); > rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id); > g_free(bctx); > } > @@ -662,6 +669,60 @@ err_free_bctx: > g_free(bctx); > } > > +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, > + RdmaBackendSRQ *srq, struct ibv_sge *sge, > + uint32_t num_sge, void *ctx) > +{ > + BackendCtx *bctx; > + struct ibv_sge new_sge[MAX_SGE]; > + uint32_t bctx_id; > + int rc; > + struct ibv_recv_wr wr = {}, *bad_wr; > + > + bctx = g_malloc0(sizeof(*bctx)); > + bctx->up_ctx = ctx; > + bctx->backend_srq = srq; > + bctx->backend_qp = NULL; g_malloc0 takes care for this (otherwise expecting your touch in rdma_backend_post_recv and rdma_backend_post_send) > + > + rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx); > + if (unlikely(rc)) { > + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); > + goto err_free_bctx; > + } > + > + rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id); > + > + rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge, > + &backend_dev->rdma_dev_res->stats.rx_bufs_len); > + if (rc) { > + complete_work(IBV_WC_GENERAL_ERR, rc, ctx); > + goto err_dealloc_cqe_ctx; > + } > + > + wr.num_sge = num_sge; > + wr.sg_list = new_sge; > + wr.wr_id = bctx_id; > + rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr); > + if (rc) { > + rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d", > + srq->ibsrq->handle, rc, errno); > + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); > + goto err_dealloc_cqe_ctx; > + } > + > + atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe); > + backend_dev->rdma_dev_res->stats.rx_bufs++; Suggesting to maintain a dedicated counter for srq_rx, what do you think? > + > + return; > + > +err_dealloc_cqe_ctx: > + backend_dev->rdma_dev_res->stats.rx_bufs_err++; > + rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id); > + > +err_free_bctx: > + g_free(bctx); > +} > + > int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd) > { > pd->ibpd = ibv_alloc_pd(backend_dev->context); > @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res) > rdma_protected_gslist_destroy(&qp->cqe_ctx_list); > } > > +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, > + uint32_t max_wr, uint32_t max_sge, > + uint32_t srq_limit) > +{ > + struct ibv_srq_init_attr srq_init_attr = {}; > + > + srq_init_attr.attr.max_wr = max_wr; > + srq_init_attr.attr.max_sge = max_sge; > + srq_init_attr.attr.srq_limit = srq_limit; > + > + srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr); > + if (!srq->ibsrq) { > + rdma_error_report("ibv_create_srq failed, errno=%d", errno); > + return -EIO; > + } > + > + rdma_protected_gslist_init(&srq->cqe_ctx_list); > + > + return 0; > +} > + > +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr) > +{ > + if (!srq->ibsrq) { > + return -EINVAL; > + } > + > + return ibv_query_srq(srq->ibsrq, srq_attr); > +} > + > +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr, > + int srq_attr_mask) > +{ > + if (!srq->ibsrq) { > + return -EINVAL; > + } > + > + return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask); > +} > + > +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res) > +{ > + if (srq->ibsrq) { > + ibv_destroy_srq(srq->ibsrq); > + } > + g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res); > + rdma_protected_gslist_destroy(&srq->cqe_ctx_list); > +} > + > #define CHK_ATTR(req, dev, member, fmt) ({ \ > trace_rdma_check_dev_attr(#member, dev.member, req->member); \ > if (req->member > dev.member) { \ > @@ -960,6 +1070,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev, > } > > dev_attr->max_sge = MAX_SGE; > + dev_attr->max_srq_sge = MAX_SGE; > > CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64); > CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d"); > @@ -970,6 +1081,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev, > CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d"); > CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d"); > CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d"); > + CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d"); > > return 0; > } > diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h > index 38056d97c7fc..cad7956d98e8 100644 > --- a/hw/rdma/rdma_backend.h > +++ b/hw/rdma/rdma_backend.h > @@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, > RdmaBackendQP *qp, uint8_t qp_type, > struct ibv_sge *sge, uint32_t num_sge, void *ctx); > > +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, > + uint32_t max_wr, uint32_t max_sge, > + uint32_t srq_limit); > +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr); > +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr, > + int srq_attr_mask); > +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, > + RdmaDeviceResources *dev_res); > +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, > + RdmaBackendSRQ *srq, struct ibv_sge *sge, > + uint32_t num_sge, void *ctx); > + > #endif > diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h > index 817153dc8cf4..0b55be35038d 100644 > --- a/hw/rdma/rdma_backend_defs.h > +++ b/hw/rdma/rdma_backend_defs.h > @@ -68,4 +68,9 @@ typedef struct RdmaBackendQP { > RdmaProtectedGSList cqe_ctx_list; > } RdmaBackendQP; > > +typedef struct RdmaBackendSRQ { > + struct ibv_srq *ibsrq; > + RdmaProtectedGSList cqe_ctx_list; > +} RdmaBackendSRQ; > + > #endif > -- > 2.20.1 > >
On 3/27/19 8:44 AM, Yuval Shaia wrote: > On Tue, Mar 26, 2019 at 02:54:30PM +0200, Kamal Heib wrote: >> Add the required function and definitions for support shared receive > > s/function/functions > s/for/to (but not sure about that though) > OK, I'll fix it in v3. >> queues (SRQs) in the backend layer. >> >> Signed-off-by: Kamal Heib <kamalheib1@gmail.com> >> --- >> hw/rdma/rdma_backend.c | 116 +++++++++++++++++++++++++++++++++++- >> hw/rdma/rdma_backend.h | 12 ++++ >> hw/rdma/rdma_backend_defs.h | 5 ++ >> 3 files changed, 131 insertions(+), 2 deletions(-) >> >> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c >> index d1660b6474fa..54419c8c58dd 100644 >> --- a/hw/rdma/rdma_backend.c >> +++ b/hw/rdma/rdma_backend.c >> @@ -40,6 +40,7 @@ typedef struct BackendCtx { >> void *up_ctx; >> struct ibv_sge sge; /* Used to save MAD recv buffer */ >> RdmaBackendQP *backend_qp; /* To maintain recv buffers */ >> + RdmaBackendSRQ *backend_srq; >> } BackendCtx; >> >> struct backend_umad { >> @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) >> int i, ne, total_ne = 0; >> BackendCtx *bctx; >> struct ibv_wc wc[2]; >> + RdmaProtectedGSList *cqe_ctx_list; >> >> qemu_mutex_lock(&rdma_dev_res->lock); >> do { >> @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) >> >> comp_handler(bctx->up_ctx, &wc[i]); >> >> - rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list, >> - wc[i].wr_id); >> + if (bctx->backend_qp) { >> + cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list; >> + } else { >> + cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list; >> + } >> + >> + rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id); >> rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id); >> g_free(bctx); >> } >> @@ -662,6 +669,60 @@ err_free_bctx: >> g_free(bctx); >> } >> >> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, >> + RdmaBackendSRQ *srq, struct ibv_sge *sge, >> + uint32_t num_sge, void *ctx) >> +{ >> + BackendCtx *bctx; >> + struct ibv_sge new_sge[MAX_SGE]; >> + uint32_t bctx_id; >> + int rc; >> + struct ibv_recv_wr wr = {}, *bad_wr; >> + >> + bctx = g_malloc0(sizeof(*bctx)); >> + bctx->up_ctx = ctx; >> + bctx->backend_srq = srq; >> + bctx->backend_qp = NULL; > > g_malloc0 takes care for this (otherwise expecting your touch in > rdma_backend_post_recv and rdma_backend_post_send) You are right, I'll fix it in v3. > >> + >> + rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx); >> + if (unlikely(rc)) { >> + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); >> + goto err_free_bctx; >> + } >> + >> + rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id); >> + >> + rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge, >> + &backend_dev->rdma_dev_res->stats.rx_bufs_len); >> + if (rc) { >> + complete_work(IBV_WC_GENERAL_ERR, rc, ctx); >> + goto err_dealloc_cqe_ctx; >> + } >> + >> + wr.num_sge = num_sge; >> + wr.sg_list = new_sge; >> + wr.wr_id = bctx_id; >> + rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr); >> + if (rc) { >> + rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d", >> + srq->ibsrq->handle, rc, errno); >> + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); >> + goto err_dealloc_cqe_ctx; >> + } >> + >> + atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe); >> + backend_dev->rdma_dev_res->stats.rx_bufs++; > > Suggesting to maintain a dedicated counter for srq_rx, what do you think? > Probably need to maintain both, I mean add a dedicated counter for srq_rx and and maintain the existing rx_bufs, because the rx_buf is very generic. >> + >> + return; >> + >> +err_dealloc_cqe_ctx: >> + backend_dev->rdma_dev_res->stats.rx_bufs_err++; >> + rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id); >> + >> +err_free_bctx: >> + g_free(bctx); >> +} >> + >> int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd) >> { >> pd->ibpd = ibv_alloc_pd(backend_dev->context); >> @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res) >> rdma_protected_gslist_destroy(&qp->cqe_ctx_list); >> } >> >> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, >> + uint32_t max_wr, uint32_t max_sge, >> + uint32_t srq_limit) >> +{ >> + struct ibv_srq_init_attr srq_init_attr = {}; >> + >> + srq_init_attr.attr.max_wr = max_wr; >> + srq_init_attr.attr.max_sge = max_sge; >> + srq_init_attr.attr.srq_limit = srq_limit; >> + >> + srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr); >> + if (!srq->ibsrq) { >> + rdma_error_report("ibv_create_srq failed, errno=%d", errno); >> + return -EIO; >> + } >> + >> + rdma_protected_gslist_init(&srq->cqe_ctx_list); >> + >> + return 0; >> +} >> + >> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr) >> +{ >> + if (!srq->ibsrq) { >> + return -EINVAL; >> + } >> + >> + return ibv_query_srq(srq->ibsrq, srq_attr); >> +} >> + >> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr, >> + int srq_attr_mask) >> +{ >> + if (!srq->ibsrq) { >> + return -EINVAL; >> + } >> + >> + return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask); >> +} >> + >> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res) >> +{ >> + if (srq->ibsrq) { >> + ibv_destroy_srq(srq->ibsrq); >> + } >> + g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res); >> + rdma_protected_gslist_destroy(&srq->cqe_ctx_list); >> +} >> + >> #define CHK_ATTR(req, dev, member, fmt) ({ \ >> trace_rdma_check_dev_attr(#member, dev.member, req->member); \ >> if (req->member > dev.member) { \ >> @@ -960,6 +1070,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev, >> } >> >> dev_attr->max_sge = MAX_SGE; >> + dev_attr->max_srq_sge = MAX_SGE; >> >> CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64); >> CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d"); >> @@ -970,6 +1081,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev, >> CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d"); >> CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d"); >> CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d"); >> + CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d"); >> >> return 0; >> } >> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h >> index 38056d97c7fc..cad7956d98e8 100644 >> --- a/hw/rdma/rdma_backend.h >> +++ b/hw/rdma/rdma_backend.h >> @@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, >> RdmaBackendQP *qp, uint8_t qp_type, >> struct ibv_sge *sge, uint32_t num_sge, void *ctx); >> >> +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, >> + uint32_t max_wr, uint32_t max_sge, >> + uint32_t srq_limit); >> +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr); >> +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr, >> + int srq_attr_mask); >> +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, >> + RdmaDeviceResources *dev_res); >> +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, >> + RdmaBackendSRQ *srq, struct ibv_sge *sge, >> + uint32_t num_sge, void *ctx); >> + >> #endif >> diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h >> index 817153dc8cf4..0b55be35038d 100644 >> --- a/hw/rdma/rdma_backend_defs.h >> +++ b/hw/rdma/rdma_backend_defs.h >> @@ -68,4 +68,9 @@ typedef struct RdmaBackendQP { >> RdmaProtectedGSList cqe_ctx_list; >> } RdmaBackendQP; >> >> +typedef struct RdmaBackendSRQ { >> + struct ibv_srq *ibsrq; >> + RdmaProtectedGSList cqe_ctx_list; >> +} RdmaBackendSRQ; >> + >> #endif >> -- >> 2.20.1 >> >>
diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index d1660b6474fa..54419c8c58dd 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -40,6 +40,7 @@ typedef struct BackendCtx { void *up_ctx; struct ibv_sge sge; /* Used to save MAD recv buffer */ RdmaBackendQP *backend_qp; /* To maintain recv buffers */ + RdmaBackendSRQ *backend_srq; } BackendCtx; struct backend_umad { @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) int i, ne, total_ne = 0; BackendCtx *bctx; struct ibv_wc wc[2]; + RdmaProtectedGSList *cqe_ctx_list; qemu_mutex_lock(&rdma_dev_res->lock); do { @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) comp_handler(bctx->up_ctx, &wc[i]); - rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list, - wc[i].wr_id); + if (bctx->backend_qp) { + cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list; + } else { + cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list; + } + + rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id); rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id); g_free(bctx); } @@ -662,6 +669,60 @@ err_free_bctx: g_free(bctx); } +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, + RdmaBackendSRQ *srq, struct ibv_sge *sge, + uint32_t num_sge, void *ctx) +{ + BackendCtx *bctx; + struct ibv_sge new_sge[MAX_SGE]; + uint32_t bctx_id; + int rc; + struct ibv_recv_wr wr = {}, *bad_wr; + + bctx = g_malloc0(sizeof(*bctx)); + bctx->up_ctx = ctx; + bctx->backend_srq = srq; + bctx->backend_qp = NULL; + + rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx); + if (unlikely(rc)) { + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); + goto err_free_bctx; + } + + rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id); + + rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge, + &backend_dev->rdma_dev_res->stats.rx_bufs_len); + if (rc) { + complete_work(IBV_WC_GENERAL_ERR, rc, ctx); + goto err_dealloc_cqe_ctx; + } + + wr.num_sge = num_sge; + wr.sg_list = new_sge; + wr.wr_id = bctx_id; + rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr); + if (rc) { + rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d", + srq->ibsrq->handle, rc, errno); + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); + goto err_dealloc_cqe_ctx; + } + + atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe); + backend_dev->rdma_dev_res->stats.rx_bufs++; + + return; + +err_dealloc_cqe_ctx: + backend_dev->rdma_dev_res->stats.rx_bufs_err++; + rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id); + +err_free_bctx: + g_free(bctx); +} + int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd) { pd->ibpd = ibv_alloc_pd(backend_dev->context); @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res) rdma_protected_gslist_destroy(&qp->cqe_ctx_list); } +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, + uint32_t max_wr, uint32_t max_sge, + uint32_t srq_limit) +{ + struct ibv_srq_init_attr srq_init_attr = {}; + + srq_init_attr.attr.max_wr = max_wr; + srq_init_attr.attr.max_sge = max_sge; + srq_init_attr.attr.srq_limit = srq_limit; + + srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr); + if (!srq->ibsrq) { + rdma_error_report("ibv_create_srq failed, errno=%d", errno); + return -EIO; + } + + rdma_protected_gslist_init(&srq->cqe_ctx_list); + + return 0; +} + +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr) +{ + if (!srq->ibsrq) { + return -EINVAL; + } + + return ibv_query_srq(srq->ibsrq, srq_attr); +} + +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr, + int srq_attr_mask) +{ + if (!srq->ibsrq) { + return -EINVAL; + } + + return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask); +} + +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res) +{ + if (srq->ibsrq) { + ibv_destroy_srq(srq->ibsrq); + } + g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res); + rdma_protected_gslist_destroy(&srq->cqe_ctx_list); +} + #define CHK_ATTR(req, dev, member, fmt) ({ \ trace_rdma_check_dev_attr(#member, dev.member, req->member); \ if (req->member > dev.member) { \ @@ -960,6 +1070,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev, } dev_attr->max_sge = MAX_SGE; + dev_attr->max_srq_sge = MAX_SGE; CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64); CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d"); @@ -970,6 +1081,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev, CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d"); + CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d"); return 0; } diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h index 38056d97c7fc..cad7956d98e8 100644 --- a/hw/rdma/rdma_backend.h +++ b/hw/rdma/rdma_backend.h @@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, struct ibv_sge *sge, uint32_t num_sge, void *ctx); +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, + uint32_t max_wr, uint32_t max_sge, + uint32_t srq_limit); +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr); +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr, + int srq_attr_mask); +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, + RdmaDeviceResources *dev_res); +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, + RdmaBackendSRQ *srq, struct ibv_sge *sge, + uint32_t num_sge, void *ctx); + #endif diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h index 817153dc8cf4..0b55be35038d 100644 --- a/hw/rdma/rdma_backend_defs.h +++ b/hw/rdma/rdma_backend_defs.h @@ -68,4 +68,9 @@ typedef struct RdmaBackendQP { RdmaProtectedGSList cqe_ctx_list; } RdmaBackendQP; +typedef struct RdmaBackendSRQ { + struct ibv_srq *ibsrq; + RdmaProtectedGSList cqe_ctx_list; +} RdmaBackendSRQ; + #endif
Add the required function and definitions for support shared receive queues (SRQs) in the backend layer. Signed-off-by: Kamal Heib <kamalheib1@gmail.com> --- hw/rdma/rdma_backend.c | 116 +++++++++++++++++++++++++++++++++++- hw/rdma/rdma_backend.h | 12 ++++ hw/rdma/rdma_backend_defs.h | 5 ++ 3 files changed, 131 insertions(+), 2 deletions(-)