Message ID | 20190528182116.19012.50268.stgit@manet.1015granger.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | for-5.3 NFS/RDMA patches for review | expand |
Hi Chuck, On Tue, 2019-05-28 at 14:21 -0400, Chuck Lever wrote: > Since both the Send and Receive completion queues are processed in > a workqueue context, it should be safe to DMA unmap and return MRs > to the free or recycle lists directly in the completion handlers. > > Doing this means rpcrdma_frwr no longer needs to track the state > of each MR... a VALID or FLUSHED MR can no longer appear on an > xprt's MR free list. > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com> > --- > include/trace/events/rpcrdma.h | 19 ---- > net/sunrpc/xprtrdma/frwr_ops.c | 202 ++++++++++++++++++------------------ > --- > net/sunrpc/xprtrdma/rpc_rdma.c | 2 > net/sunrpc/xprtrdma/xprt_rdma.h | 11 -- > 4 files changed, 95 insertions(+), 139 deletions(-) > > diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h > index a4ab3a2..7fe21ba 100644 > --- a/include/trace/events/rpcrdma.h > +++ b/include/trace/events/rpcrdma.h > @@ -181,18 +181,6 @@ > ), \ > TP_ARGS(task, mr, nsegs)) > > -TRACE_DEFINE_ENUM(FRWR_IS_INVALID); > -TRACE_DEFINE_ENUM(FRWR_IS_VALID); > -TRACE_DEFINE_ENUM(FRWR_FLUSHED_FR); > -TRACE_DEFINE_ENUM(FRWR_FLUSHED_LI); > - > -#define xprtrdma_show_frwr_state(x) \ > - __print_symbolic(x, \ > - { FRWR_IS_INVALID, "INVALID" }, \ > - { FRWR_IS_VALID, "VALID" }, \ > - { FRWR_FLUSHED_FR, "FLUSHED_FR" }, \ > - { FRWR_FLUSHED_LI, "FLUSHED_LI" }) > - > DECLARE_EVENT_CLASS(xprtrdma_frwr_done, > TP_PROTO( > const struct ib_wc *wc, > @@ -203,22 +191,19 @@ > > TP_STRUCT__entry( > __field(const void *, mr) > - __field(unsigned int, state) > __field(unsigned int, status) > __field(unsigned int, vendor_err) > ), > > TP_fast_assign( > __entry->mr = container_of(frwr, struct rpcrdma_mr, frwr); > - __entry->state = frwr->fr_state; > __entry->status = wc->status; > __entry->vendor_err = __entry->status ? wc->vendor_err : 0; > ), > > TP_printk( > - "mr=%p state=%s: %s (%u/0x%x)", > - __entry->mr, xprtrdma_show_frwr_state(__entry->state), > - rdma_show_wc_status(__entry->status), > + "mr=%p: %s (%u/0x%x)", > + __entry->mr, rdma_show_wc_status(__entry->status), > __entry->status, __entry->vendor_err > ) > ); > diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c > index ac47314..99871fbf 100644 > --- a/net/sunrpc/xprtrdma/frwr_ops.c > +++ b/net/sunrpc/xprtrdma/frwr_ops.c > @@ -168,7 +168,6 @@ int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr > *mr) > goto out_list_err; > > mr->frwr.fr_mr = frmr; > - mr->frwr.fr_state = FRWR_IS_INVALID; > mr->mr_dir = DMA_NONE; > INIT_LIST_HEAD(&mr->mr_list); > INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker); > @@ -298,65 +297,6 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt) > } > > /** > - * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC > - * @cq: completion queue (ignored) > - * @wc: completed WR > - * > - */ > -static void > -frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) > -{ > - struct ib_cqe *cqe = wc->wr_cqe; > - struct rpcrdma_frwr *frwr = > - container_of(cqe, struct rpcrdma_frwr, fr_cqe); > - > - /* WARNING: Only wr_cqe and status are reliable at this point */ > - if (wc->status != IB_WC_SUCCESS) > - frwr->fr_state = FRWR_FLUSHED_FR; > - trace_xprtrdma_wc_fastreg(wc, frwr); > -} > - > -/** > - * frwr_wc_localinv - Invoked by RDMA provider for a flushed LocalInv WC > - * @cq: completion queue (ignored) > - * @wc: completed WR > - * > - */ > -static void > -frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) > -{ > - struct ib_cqe *cqe = wc->wr_cqe; > - struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, > - fr_cqe); > - > - /* WARNING: Only wr_cqe and status are reliable at this point */ > - if (wc->status != IB_WC_SUCCESS) > - frwr->fr_state = FRWR_FLUSHED_LI; > - trace_xprtrdma_wc_li(wc, frwr); > -} > - > -/** > - * frwr_wc_localinv_wake - Invoked by RDMA provider for a signaled LocalInv > WC > - * @cq: completion queue (ignored) > - * @wc: completed WR > - * > - * Awaken anyone waiting for an MR to finish being fenced. > - */ > -static void > -frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) > -{ > - struct ib_cqe *cqe = wc->wr_cqe; > - struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, > - fr_cqe); > - > - /* WARNING: Only wr_cqe and status are reliable at this point */ > - if (wc->status != IB_WC_SUCCESS) > - frwr->fr_state = FRWR_FLUSHED_LI; > - trace_xprtrdma_wc_li_wake(wc, frwr); > - complete(&frwr->fr_linv_done); > -} > - > -/** > * frwr_map - Register a memory region > * @r_xprt: controlling transport > * @seg: memory region co-ordinates > @@ -378,23 +318,15 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt > *r_xprt, > { > struct rpcrdma_ia *ia = &r_xprt->rx_ia; > bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; > - struct rpcrdma_frwr *frwr; > struct rpcrdma_mr *mr; > struct ib_mr *ibmr; > struct ib_reg_wr *reg_wr; > int i, n; > u8 key; > > - mr = NULL; > - do { > - if (mr) > - rpcrdma_mr_recycle(mr); > - mr = rpcrdma_mr_get(r_xprt); > - if (!mr) > - goto out_getmr_err; > - } while (mr->frwr.fr_state != FRWR_IS_INVALID); > - frwr = &mr->frwr; > - frwr->fr_state = FRWR_IS_VALID; > + mr = rpcrdma_mr_get(r_xprt); > + if (!mr) > + goto out_getmr_err; > > if (nsegs > ia->ri_max_frwr_depth) > nsegs = ia->ri_max_frwr_depth; > @@ -423,7 +355,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt > *r_xprt, > if (!mr->mr_nents) > goto out_dmamap_err; > > - ibmr = frwr->fr_mr; > + ibmr = mr->frwr.fr_mr; > n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); > if (unlikely(n != mr->mr_nents)) > goto out_mapmr_err; > @@ -433,7 +365,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt > *r_xprt, > key = (u8)(ibmr->rkey & 0x000000FF); > ib_update_fast_reg_key(ibmr, ++key); > > - reg_wr = &frwr->fr_regwr; > + reg_wr = &mr->frwr.fr_regwr; > reg_wr->mr = ibmr; > reg_wr->key = ibmr->rkey; > reg_wr->access = writing ? > @@ -465,6 +397,23 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt > *r_xprt, > } > > /** > + * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC > + * @cq: completion queue (ignored) > + * @wc: completed WR > + * > + */ > +static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) > +{ > + struct ib_cqe *cqe = wc->wr_cqe; > + struct rpcrdma_frwr *frwr = > + container_of(cqe, struct rpcrdma_frwr, fr_cqe); > + > + /* WARNING: Only wr_cqe and status are reliable at this point */ > + trace_xprtrdma_wc_fastreg(wc, frwr); > + /* The MR will get recycled when the associated req is retransmitted */ > +} > + > +/** > * frwr_send - post Send WR containing the RPC Call message > * @ia: interface adapter > * @req: Prepared RPC Call > @@ -516,65 +465,104 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct > list_head *mrs) > if (mr->mr_handle == rep->rr_inv_rkey) { > list_del_init(&mr->mr_list); > trace_xprtrdma_mr_remoteinv(mr); > - mr->frwr.fr_state = FRWR_IS_INVALID; > rpcrdma_mr_unmap_and_put(mr); > break; /* only one invalidated MR per RPC */ > } > } > > +static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr) > +{ > + if (wc->status != IB_WC_SUCCESS) > + rpcrdma_mr_recycle(mr); > + else > + rpcrdma_mr_unmap_and_put(mr); > +} > + > /** > - * frwr_unmap_sync - invalidate memory regions that were registered for @req > - * @r_xprt: controlling transport > - * @mrs: list of MRs to process > + * frwr_wc_localinv - Invoked by RDMA provider for a LOCAL_INV WC > + * @cq: completion queue (ignored) > + * @wc: completed WR > * > - * Sleeps until it is safe for the host CPU to access the > - * previously mapped memory regions. > + */ > +static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) > +{ > + struct ib_cqe *cqe = wc->wr_cqe; > + struct rpcrdma_frwr *frwr = > + container_of(cqe, struct rpcrdma_frwr, fr_cqe); > + struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr); > + > + /* WARNING: Only wr_cqe and status are reliable at this point */ > + __frwr_release_mr(wc, mr); > + trace_xprtrdma_wc_li(wc, frwr); > +} > + > +/** > + * frwr_wc_localinv_wake - Invoked by RDMA provider for a LOCAL_INV WC > + * @cq: completion queue (ignored) > + * @wc: completed WR > * > - * Caller ensures that @mrs is not empty before the call. This > - * function empties the list. > + * Awaken anyone waiting for an MR to finish being fenced. > */ > -void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) > +static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) > +{ > + struct ib_cqe *cqe = wc->wr_cqe; > + struct rpcrdma_frwr *frwr = > + container_of(cqe, struct rpcrdma_frwr, fr_cqe); > + struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr); > + > + /* WARNING: Only wr_cqe and status are reliable at this point */ > + __frwr_release_mr(wc, mr); > + trace_xprtrdma_wc_li_wake(wc, frwr); > + complete(&frwr->fr_linv_done); > +} > + > +/** > + * frwr_unmap_sync - invalidate memory regions that were registered for @req > + * @r_xprt: controlling transport instance > + * @req: rpcrdma_req with a non-empty list of MRs to process > + * > + * Sleeps until it is safe for the host CPU to access the previously mapped > + * memory regions. > + */ > +void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) > { > struct ib_send_wr *first, **prev, *last; > const struct ib_send_wr *bad_wr; > - struct rpcrdma_ia *ia = &r_xprt->rx_ia; > struct rpcrdma_frwr *frwr; > struct rpcrdma_mr *mr; > - int count, rc; > + int rc; > > /* ORDER: Invalidate all of the MRs first > * > * Chain the LOCAL_INV Work Requests and post them with > * a single ib_post_send() call. > */ > - frwr = NULL; > - count = 0; > prev = &first; > - list_for_each_entry(mr, mrs, mr_list) { > - mr->frwr.fr_state = FRWR_IS_INVALID; > + while (!list_empty(&req->rl_registered)) { Is this list guaranteed to always start full? Because we could potentially use frwr uninitialized a few lines down if it's not. net/sunrpc/xprtrdma/frwr_ops.c: In function ‘frwr_unmap_sync’: net/sunrpc/xprtrdma/frwr_ops.c:582:3: error: ‘frwr’ may be used uninitialized in this function [-Werror=maybe-uninitialized] wait_for_completion(&frwr->fr_linv_done); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Thanks, Anna > + mr = rpcrdma_mr_pop(&req->rl_registered); > > - frwr = &mr->frwr; > trace_xprtrdma_mr_localinv(mr); > + r_xprt->rx_stats.local_inv_needed++; > > + frwr = &mr->frwr; > frwr->fr_cqe.done = frwr_wc_localinv; > last = &frwr->fr_invwr; > - memset(last, 0, sizeof(*last)); > + last->next = NULL; > last->wr_cqe = &frwr->fr_cqe; > + last->sg_list = NULL; > + last->num_sge = 0; > last->opcode = IB_WR_LOCAL_INV; > + last->send_flags = IB_SEND_SIGNALED; > last->ex.invalidate_rkey = mr->mr_handle; > - count++; > > *prev = last; > prev = &last->next; > } > - if (!frwr) > - goto unmap; > > /* Strong send queue ordering guarantees that when the > * last WR in the chain completes, all WRs in the chain > * are complete. > */ > - last->send_flags = IB_SEND_SIGNALED; > frwr->fr_cqe.done = frwr_wc_localinv_wake; > reinit_completion(&frwr->fr_linv_done); > > @@ -582,26 +570,18 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct > list_head *mrs) > * replaces the QP. The RPC reply handler won't call us > * unless ri_id->qp is a valid pointer. > */ > - r_xprt->rx_stats.local_inv_needed++; > bad_wr = NULL; > - rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); > - if (bad_wr != first) > - wait_for_completion(&frwr->fr_linv_done); > - if (rc) > - goto out_release; > + rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr); > + trace_xprtrdma_post_send(req, rc); > > - /* ORDER: Now DMA unmap all of the MRs, and return > - * them to the free MR list. > + /* The final LOCAL_INV WR in the chain is supposed to > + * do the wake. If it never gets posted, the wake will > + * not happen, so don't wait in that case. > */ > -unmap: > - while (!list_empty(mrs)) { > - mr = rpcrdma_mr_pop(mrs); > - rpcrdma_mr_unmap_and_put(mr); > - } > - return; > - > -out_release: > - pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc); > + if (bad_wr != first) > + wait_for_completion(&frwr->fr_linv_done); > + if (!rc) > + return; > > /* Unmap and release the MRs in the LOCAL_INV WRs that did not > * get posted. > diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c > index 77fc1e4..6c049fd 100644 > --- a/net/sunrpc/xprtrdma/rpc_rdma.c > +++ b/net/sunrpc/xprtrdma/rpc_rdma.c > @@ -1277,7 +1277,7 @@ void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, > struct rpcrdma_req *req) > * RPC has relinquished all its Send Queue entries. > */ > if (!list_empty(&req->rl_registered)) > - frwr_unmap_sync(r_xprt, &req->rl_registered); > + frwr_unmap_sync(r_xprt, req); > > /* Ensure that any DMA mapped pages associated with > * the Send of the RPC Call have been unmapped before > diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h > index 3c39aa3..a9de116 100644 > --- a/net/sunrpc/xprtrdma/xprt_rdma.h > +++ b/net/sunrpc/xprtrdma/xprt_rdma.h > @@ -240,17 +240,9 @@ struct rpcrdma_sendctx { > * An external memory region is any buffer or page that is registered > * on the fly (ie, not pre-registered). > */ > -enum rpcrdma_frwr_state { > - FRWR_IS_INVALID, /* ready to be used */ > - FRWR_IS_VALID, /* in use */ > - FRWR_FLUSHED_FR, /* flushed FASTREG WR */ > - FRWR_FLUSHED_LI, /* flushed LOCALINV WR */ > -}; > - > struct rpcrdma_frwr { > struct ib_mr *fr_mr; > struct ib_cqe fr_cqe; > - enum rpcrdma_frwr_state fr_state; > struct completion fr_linv_done; > union { > struct ib_reg_wr fr_regwr; > @@ -567,8 +559,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt > *r_xprt, > struct rpcrdma_mr **mr); > int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req); > void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); > -void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, > - struct list_head *mrs); > +void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); > > /* > * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c >
> On May 30, 2019, at 10:05 AM, Anna Schumaker <schumaker.anna@gmail.com> wrote: > > Hi Chuck, > > On Tue, 2019-05-28 at 14:21 -0400, Chuck Lever wrote: >> Since both the Send and Receive completion queues are processed in >> a workqueue context, it should be safe to DMA unmap and return MRs >> to the free or recycle lists directly in the completion handlers. >> >> Doing this means rpcrdma_frwr no longer needs to track the state >> of each MR... a VALID or FLUSHED MR can no longer appear on an >> xprt's MR free list. >> >> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> >> --- >> include/trace/events/rpcrdma.h | 19 ---- >> net/sunrpc/xprtrdma/frwr_ops.c | 202 ++++++++++++++++++------------------ >> --- >> net/sunrpc/xprtrdma/rpc_rdma.c | 2 >> net/sunrpc/xprtrdma/xprt_rdma.h | 11 -- >> 4 files changed, 95 insertions(+), 139 deletions(-) >> >> diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h >> index a4ab3a2..7fe21ba 100644 >> --- a/include/trace/events/rpcrdma.h >> +++ b/include/trace/events/rpcrdma.h >> @@ -181,18 +181,6 @@ >> ), \ >> TP_ARGS(task, mr, nsegs)) >> >> -TRACE_DEFINE_ENUM(FRWR_IS_INVALID); >> -TRACE_DEFINE_ENUM(FRWR_IS_VALID); >> -TRACE_DEFINE_ENUM(FRWR_FLUSHED_FR); >> -TRACE_DEFINE_ENUM(FRWR_FLUSHED_LI); >> - >> -#define xprtrdma_show_frwr_state(x) \ >> - __print_symbolic(x, \ >> - { FRWR_IS_INVALID, "INVALID" }, \ >> - { FRWR_IS_VALID, "VALID" }, \ >> - { FRWR_FLUSHED_FR, "FLUSHED_FR" }, \ >> - { FRWR_FLUSHED_LI, "FLUSHED_LI" }) >> - >> DECLARE_EVENT_CLASS(xprtrdma_frwr_done, >> TP_PROTO( >> const struct ib_wc *wc, >> @@ -203,22 +191,19 @@ >> >> TP_STRUCT__entry( >> __field(const void *, mr) >> - __field(unsigned int, state) >> __field(unsigned int, status) >> __field(unsigned int, vendor_err) >> ), >> >> TP_fast_assign( >> __entry->mr = container_of(frwr, struct rpcrdma_mr, frwr); >> - __entry->state = frwr->fr_state; >> __entry->status = wc->status; >> __entry->vendor_err = __entry->status ? wc->vendor_err : 0; >> ), >> >> TP_printk( >> - "mr=%p state=%s: %s (%u/0x%x)", >> - __entry->mr, xprtrdma_show_frwr_state(__entry->state), >> - rdma_show_wc_status(__entry->status), >> + "mr=%p: %s (%u/0x%x)", >> + __entry->mr, rdma_show_wc_status(__entry->status), >> __entry->status, __entry->vendor_err >> ) >> ); >> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c >> index ac47314..99871fbf 100644 >> --- a/net/sunrpc/xprtrdma/frwr_ops.c >> +++ b/net/sunrpc/xprtrdma/frwr_ops.c >> @@ -168,7 +168,6 @@ int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr >> *mr) >> goto out_list_err; >> >> mr->frwr.fr_mr = frmr; >> - mr->frwr.fr_state = FRWR_IS_INVALID; >> mr->mr_dir = DMA_NONE; >> INIT_LIST_HEAD(&mr->mr_list); >> INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker); >> @@ -298,65 +297,6 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt) >> } >> >> /** >> - * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC >> - * @cq: completion queue (ignored) >> - * @wc: completed WR >> - * >> - */ >> -static void >> -frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) >> -{ >> - struct ib_cqe *cqe = wc->wr_cqe; >> - struct rpcrdma_frwr *frwr = >> - container_of(cqe, struct rpcrdma_frwr, fr_cqe); >> - >> - /* WARNING: Only wr_cqe and status are reliable at this point */ >> - if (wc->status != IB_WC_SUCCESS) >> - frwr->fr_state = FRWR_FLUSHED_FR; >> - trace_xprtrdma_wc_fastreg(wc, frwr); >> -} >> - >> -/** >> - * frwr_wc_localinv - Invoked by RDMA provider for a flushed LocalInv WC >> - * @cq: completion queue (ignored) >> - * @wc: completed WR >> - * >> - */ >> -static void >> -frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) >> -{ >> - struct ib_cqe *cqe = wc->wr_cqe; >> - struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, >> - fr_cqe); >> - >> - /* WARNING: Only wr_cqe and status are reliable at this point */ >> - if (wc->status != IB_WC_SUCCESS) >> - frwr->fr_state = FRWR_FLUSHED_LI; >> - trace_xprtrdma_wc_li(wc, frwr); >> -} >> - >> -/** >> - * frwr_wc_localinv_wake - Invoked by RDMA provider for a signaled LocalInv >> WC >> - * @cq: completion queue (ignored) >> - * @wc: completed WR >> - * >> - * Awaken anyone waiting for an MR to finish being fenced. >> - */ >> -static void >> -frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) >> -{ >> - struct ib_cqe *cqe = wc->wr_cqe; >> - struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, >> - fr_cqe); >> - >> - /* WARNING: Only wr_cqe and status are reliable at this point */ >> - if (wc->status != IB_WC_SUCCESS) >> - frwr->fr_state = FRWR_FLUSHED_LI; >> - trace_xprtrdma_wc_li_wake(wc, frwr); >> - complete(&frwr->fr_linv_done); >> -} >> - >> -/** >> * frwr_map - Register a memory region >> * @r_xprt: controlling transport >> * @seg: memory region co-ordinates >> @@ -378,23 +318,15 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt >> *r_xprt, >> { >> struct rpcrdma_ia *ia = &r_xprt->rx_ia; >> bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; >> - struct rpcrdma_frwr *frwr; >> struct rpcrdma_mr *mr; >> struct ib_mr *ibmr; >> struct ib_reg_wr *reg_wr; >> int i, n; >> u8 key; >> >> - mr = NULL; >> - do { >> - if (mr) >> - rpcrdma_mr_recycle(mr); >> - mr = rpcrdma_mr_get(r_xprt); >> - if (!mr) >> - goto out_getmr_err; >> - } while (mr->frwr.fr_state != FRWR_IS_INVALID); >> - frwr = &mr->frwr; >> - frwr->fr_state = FRWR_IS_VALID; >> + mr = rpcrdma_mr_get(r_xprt); >> + if (!mr) >> + goto out_getmr_err; >> >> if (nsegs > ia->ri_max_frwr_depth) >> nsegs = ia->ri_max_frwr_depth; >> @@ -423,7 +355,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt >> *r_xprt, >> if (!mr->mr_nents) >> goto out_dmamap_err; >> >> - ibmr = frwr->fr_mr; >> + ibmr = mr->frwr.fr_mr; >> n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); >> if (unlikely(n != mr->mr_nents)) >> goto out_mapmr_err; >> @@ -433,7 +365,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt >> *r_xprt, >> key = (u8)(ibmr->rkey & 0x000000FF); >> ib_update_fast_reg_key(ibmr, ++key); >> >> - reg_wr = &frwr->fr_regwr; >> + reg_wr = &mr->frwr.fr_regwr; >> reg_wr->mr = ibmr; >> reg_wr->key = ibmr->rkey; >> reg_wr->access = writing ? >> @@ -465,6 +397,23 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt >> *r_xprt, >> } >> >> /** >> + * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC >> + * @cq: completion queue (ignored) >> + * @wc: completed WR >> + * >> + */ >> +static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) >> +{ >> + struct ib_cqe *cqe = wc->wr_cqe; >> + struct rpcrdma_frwr *frwr = >> + container_of(cqe, struct rpcrdma_frwr, fr_cqe); >> + >> + /* WARNING: Only wr_cqe and status are reliable at this point */ >> + trace_xprtrdma_wc_fastreg(wc, frwr); >> + /* The MR will get recycled when the associated req is retransmitted */ >> +} >> + >> +/** >> * frwr_send - post Send WR containing the RPC Call message >> * @ia: interface adapter >> * @req: Prepared RPC Call >> @@ -516,65 +465,104 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct >> list_head *mrs) >> if (mr->mr_handle == rep->rr_inv_rkey) { >> list_del_init(&mr->mr_list); >> trace_xprtrdma_mr_remoteinv(mr); >> - mr->frwr.fr_state = FRWR_IS_INVALID; >> rpcrdma_mr_unmap_and_put(mr); >> break; /* only one invalidated MR per RPC */ >> } >> } >> >> +static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr) >> +{ >> + if (wc->status != IB_WC_SUCCESS) >> + rpcrdma_mr_recycle(mr); >> + else >> + rpcrdma_mr_unmap_and_put(mr); >> +} >> + >> /** >> - * frwr_unmap_sync - invalidate memory regions that were registered for @req >> - * @r_xprt: controlling transport >> - * @mrs: list of MRs to process >> + * frwr_wc_localinv - Invoked by RDMA provider for a LOCAL_INV WC >> + * @cq: completion queue (ignored) >> + * @wc: completed WR >> * >> - * Sleeps until it is safe for the host CPU to access the >> - * previously mapped memory regions. >> + */ >> +static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) >> +{ >> + struct ib_cqe *cqe = wc->wr_cqe; >> + struct rpcrdma_frwr *frwr = >> + container_of(cqe, struct rpcrdma_frwr, fr_cqe); >> + struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr); >> + >> + /* WARNING: Only wr_cqe and status are reliable at this point */ >> + __frwr_release_mr(wc, mr); >> + trace_xprtrdma_wc_li(wc, frwr); >> +} >> + >> +/** >> + * frwr_wc_localinv_wake - Invoked by RDMA provider for a LOCAL_INV WC >> + * @cq: completion queue (ignored) >> + * @wc: completed WR >> * >> - * Caller ensures that @mrs is not empty before the call. This >> - * function empties the list. >> + * Awaken anyone waiting for an MR to finish being fenced. >> */ >> -void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) >> +static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) >> +{ >> + struct ib_cqe *cqe = wc->wr_cqe; >> + struct rpcrdma_frwr *frwr = >> + container_of(cqe, struct rpcrdma_frwr, fr_cqe); >> + struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr); >> + >> + /* WARNING: Only wr_cqe and status are reliable at this point */ >> + __frwr_release_mr(wc, mr); >> + trace_xprtrdma_wc_li_wake(wc, frwr); >> + complete(&frwr->fr_linv_done); >> +} >> + >> +/** >> + * frwr_unmap_sync - invalidate memory regions that were registered for @req >> + * @r_xprt: controlling transport instance >> + * @req: rpcrdma_req with a non-empty list of MRs to process >> + * >> + * Sleeps until it is safe for the host CPU to access the previously mapped >> + * memory regions. >> + */ >> +void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) >> { >> struct ib_send_wr *first, **prev, *last; >> const struct ib_send_wr *bad_wr; >> - struct rpcrdma_ia *ia = &r_xprt->rx_ia; >> struct rpcrdma_frwr *frwr; >> struct rpcrdma_mr *mr; >> - int count, rc; >> + int rc; >> >> /* ORDER: Invalidate all of the MRs first >> * >> * Chain the LOCAL_INV Work Requests and post them with >> * a single ib_post_send() call. >> */ >> - frwr = NULL; >> - count = 0; >> prev = &first; >> - list_for_each_entry(mr, mrs, mr_list) { >> - mr->frwr.fr_state = FRWR_IS_INVALID; >> + while (!list_empty(&req->rl_registered)) { > > Is this list guaranteed to always start full? Because we could potentially use > frwr uninitialized a few lines down if it's not. The only frwr_unmap_async looks like this: 1353 if (!list_empty(&req->rl_registered)) 1354 frwr_unmap_async(r_xprt, req); The warning is a false positive. I'll see about rearranging the logic. > net/sunrpc/xprtrdma/frwr_ops.c: In function ‘frwr_unmap_sync’: > net/sunrpc/xprtrdma/frwr_ops.c:582:3: error: ‘frwr’ may be used uninitialized in > this function [-Werror=maybe-uninitialized] > wait_for_completion(&frwr->fr_linv_done); > ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > Thanks, > Anna >> + mr = rpcrdma_mr_pop(&req->rl_registered); >> >> - frwr = &mr->frwr; >> trace_xprtrdma_mr_localinv(mr); >> + r_xprt->rx_stats.local_inv_needed++; >> >> + frwr = &mr->frwr; >> frwr->fr_cqe.done = frwr_wc_localinv; >> last = &frwr->fr_invwr; >> - memset(last, 0, sizeof(*last)); >> + last->next = NULL; >> last->wr_cqe = &frwr->fr_cqe; >> + last->sg_list = NULL; >> + last->num_sge = 0; >> last->opcode = IB_WR_LOCAL_INV; >> + last->send_flags = IB_SEND_SIGNALED; >> last->ex.invalidate_rkey = mr->mr_handle; >> - count++; >> >> *prev = last; >> prev = &last->next; >> } >> - if (!frwr) >> - goto unmap; >> >> /* Strong send queue ordering guarantees that when the >> * last WR in the chain completes, all WRs in the chain >> * are complete. >> */ >> - last->send_flags = IB_SEND_SIGNALED; >> frwr->fr_cqe.done = frwr_wc_localinv_wake; >> reinit_completion(&frwr->fr_linv_done); >> >> @@ -582,26 +570,18 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct >> list_head *mrs) >> * replaces the QP. The RPC reply handler won't call us >> * unless ri_id->qp is a valid pointer. >> */ >> - r_xprt->rx_stats.local_inv_needed++; >> bad_wr = NULL; >> - rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); >> - if (bad_wr != first) >> - wait_for_completion(&frwr->fr_linv_done); >> - if (rc) >> - goto out_release; >> + rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr); >> + trace_xprtrdma_post_send(req, rc); >> >> - /* ORDER: Now DMA unmap all of the MRs, and return >> - * them to the free MR list. >> + /* The final LOCAL_INV WR in the chain is supposed to >> + * do the wake. If it never gets posted, the wake will >> + * not happen, so don't wait in that case. >> */ >> -unmap: >> - while (!list_empty(mrs)) { >> - mr = rpcrdma_mr_pop(mrs); >> - rpcrdma_mr_unmap_and_put(mr); >> - } >> - return; >> - >> -out_release: >> - pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc); >> + if (bad_wr != first) >> + wait_for_completion(&frwr->fr_linv_done); >> + if (!rc) >> + return; >> >> /* Unmap and release the MRs in the LOCAL_INV WRs that did not >> * get posted. >> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c >> index 77fc1e4..6c049fd 100644 >> --- a/net/sunrpc/xprtrdma/rpc_rdma.c >> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c >> @@ -1277,7 +1277,7 @@ void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, >> struct rpcrdma_req *req) >> * RPC has relinquished all its Send Queue entries. >> */ >> if (!list_empty(&req->rl_registered)) >> - frwr_unmap_sync(r_xprt, &req->rl_registered); >> + frwr_unmap_sync(r_xprt, req); >> >> /* Ensure that any DMA mapped pages associated with >> * the Send of the RPC Call have been unmapped before >> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h >> index 3c39aa3..a9de116 100644 >> --- a/net/sunrpc/xprtrdma/xprt_rdma.h >> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h >> @@ -240,17 +240,9 @@ struct rpcrdma_sendctx { >> * An external memory region is any buffer or page that is registered >> * on the fly (ie, not pre-registered). >> */ >> -enum rpcrdma_frwr_state { >> - FRWR_IS_INVALID, /* ready to be used */ >> - FRWR_IS_VALID, /* in use */ >> - FRWR_FLUSHED_FR, /* flushed FASTREG WR */ >> - FRWR_FLUSHED_LI, /* flushed LOCALINV WR */ >> -}; >> - >> struct rpcrdma_frwr { >> struct ib_mr *fr_mr; >> struct ib_cqe fr_cqe; >> - enum rpcrdma_frwr_state fr_state; >> struct completion fr_linv_done; >> union { >> struct ib_reg_wr fr_regwr; >> @@ -567,8 +559,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt >> *r_xprt, >> struct rpcrdma_mr **mr); >> int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req); >> void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); >> -void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, >> - struct list_head *mrs); >> +void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); >> >> /* >> * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c -- Chuck Lever
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index a4ab3a2..7fe21ba 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -181,18 +181,6 @@ ), \ TP_ARGS(task, mr, nsegs)) -TRACE_DEFINE_ENUM(FRWR_IS_INVALID); -TRACE_DEFINE_ENUM(FRWR_IS_VALID); -TRACE_DEFINE_ENUM(FRWR_FLUSHED_FR); -TRACE_DEFINE_ENUM(FRWR_FLUSHED_LI); - -#define xprtrdma_show_frwr_state(x) \ - __print_symbolic(x, \ - { FRWR_IS_INVALID, "INVALID" }, \ - { FRWR_IS_VALID, "VALID" }, \ - { FRWR_FLUSHED_FR, "FLUSHED_FR" }, \ - { FRWR_FLUSHED_LI, "FLUSHED_LI" }) - DECLARE_EVENT_CLASS(xprtrdma_frwr_done, TP_PROTO( const struct ib_wc *wc, @@ -203,22 +191,19 @@ TP_STRUCT__entry( __field(const void *, mr) - __field(unsigned int, state) __field(unsigned int, status) __field(unsigned int, vendor_err) ), TP_fast_assign( __entry->mr = container_of(frwr, struct rpcrdma_mr, frwr); - __entry->state = frwr->fr_state; __entry->status = wc->status; __entry->vendor_err = __entry->status ? wc->vendor_err : 0; ), TP_printk( - "mr=%p state=%s: %s (%u/0x%x)", - __entry->mr, xprtrdma_show_frwr_state(__entry->state), - rdma_show_wc_status(__entry->status), + "mr=%p: %s (%u/0x%x)", + __entry->mr, rdma_show_wc_status(__entry->status), __entry->status, __entry->vendor_err ) ); diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index ac47314..99871fbf 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -168,7 +168,6 @@ int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) goto out_list_err; mr->frwr.fr_mr = frmr; - mr->frwr.fr_state = FRWR_IS_INVALID; mr->mr_dir = DMA_NONE; INIT_LIST_HEAD(&mr->mr_list); INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker); @@ -298,65 +297,6 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt) } /** - * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC - * @cq: completion queue (ignored) - * @wc: completed WR - * - */ -static void -frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) -{ - struct ib_cqe *cqe = wc->wr_cqe; - struct rpcrdma_frwr *frwr = - container_of(cqe, struct rpcrdma_frwr, fr_cqe); - - /* WARNING: Only wr_cqe and status are reliable at this point */ - if (wc->status != IB_WC_SUCCESS) - frwr->fr_state = FRWR_FLUSHED_FR; - trace_xprtrdma_wc_fastreg(wc, frwr); -} - -/** - * frwr_wc_localinv - Invoked by RDMA provider for a flushed LocalInv WC - * @cq: completion queue (ignored) - * @wc: completed WR - * - */ -static void -frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) -{ - struct ib_cqe *cqe = wc->wr_cqe; - struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, - fr_cqe); - - /* WARNING: Only wr_cqe and status are reliable at this point */ - if (wc->status != IB_WC_SUCCESS) - frwr->fr_state = FRWR_FLUSHED_LI; - trace_xprtrdma_wc_li(wc, frwr); -} - -/** - * frwr_wc_localinv_wake - Invoked by RDMA provider for a signaled LocalInv WC - * @cq: completion queue (ignored) - * @wc: completed WR - * - * Awaken anyone waiting for an MR to finish being fenced. - */ -static void -frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) -{ - struct ib_cqe *cqe = wc->wr_cqe; - struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr, - fr_cqe); - - /* WARNING: Only wr_cqe and status are reliable at this point */ - if (wc->status != IB_WC_SUCCESS) - frwr->fr_state = FRWR_FLUSHED_LI; - trace_xprtrdma_wc_li_wake(wc, frwr); - complete(&frwr->fr_linv_done); -} - -/** * frwr_map - Register a memory region * @r_xprt: controlling transport * @seg: memory region co-ordinates @@ -378,23 +318,15 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, { struct rpcrdma_ia *ia = &r_xprt->rx_ia; bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; - struct rpcrdma_frwr *frwr; struct rpcrdma_mr *mr; struct ib_mr *ibmr; struct ib_reg_wr *reg_wr; int i, n; u8 key; - mr = NULL; - do { - if (mr) - rpcrdma_mr_recycle(mr); - mr = rpcrdma_mr_get(r_xprt); - if (!mr) - goto out_getmr_err; - } while (mr->frwr.fr_state != FRWR_IS_INVALID); - frwr = &mr->frwr; - frwr->fr_state = FRWR_IS_VALID; + mr = rpcrdma_mr_get(r_xprt); + if (!mr) + goto out_getmr_err; if (nsegs > ia->ri_max_frwr_depth) nsegs = ia->ri_max_frwr_depth; @@ -423,7 +355,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, if (!mr->mr_nents) goto out_dmamap_err; - ibmr = frwr->fr_mr; + ibmr = mr->frwr.fr_mr; n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); if (unlikely(n != mr->mr_nents)) goto out_mapmr_err; @@ -433,7 +365,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, key = (u8)(ibmr->rkey & 0x000000FF); ib_update_fast_reg_key(ibmr, ++key); - reg_wr = &frwr->fr_regwr; + reg_wr = &mr->frwr.fr_regwr; reg_wr->mr = ibmr; reg_wr->key = ibmr->rkey; reg_wr->access = writing ? @@ -465,6 +397,23 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, } /** + * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC + * @cq: completion queue (ignored) + * @wc: completed WR + * + */ +static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_cqe *cqe = wc->wr_cqe; + struct rpcrdma_frwr *frwr = + container_of(cqe, struct rpcrdma_frwr, fr_cqe); + + /* WARNING: Only wr_cqe and status are reliable at this point */ + trace_xprtrdma_wc_fastreg(wc, frwr); + /* The MR will get recycled when the associated req is retransmitted */ +} + +/** * frwr_send - post Send WR containing the RPC Call message * @ia: interface adapter * @req: Prepared RPC Call @@ -516,65 +465,104 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) if (mr->mr_handle == rep->rr_inv_rkey) { list_del_init(&mr->mr_list); trace_xprtrdma_mr_remoteinv(mr); - mr->frwr.fr_state = FRWR_IS_INVALID; rpcrdma_mr_unmap_and_put(mr); break; /* only one invalidated MR per RPC */ } } +static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr) +{ + if (wc->status != IB_WC_SUCCESS) + rpcrdma_mr_recycle(mr); + else + rpcrdma_mr_unmap_and_put(mr); +} + /** - * frwr_unmap_sync - invalidate memory regions that were registered for @req - * @r_xprt: controlling transport - * @mrs: list of MRs to process + * frwr_wc_localinv - Invoked by RDMA provider for a LOCAL_INV WC + * @cq: completion queue (ignored) + * @wc: completed WR * - * Sleeps until it is safe for the host CPU to access the - * previously mapped memory regions. + */ +static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_cqe *cqe = wc->wr_cqe; + struct rpcrdma_frwr *frwr = + container_of(cqe, struct rpcrdma_frwr, fr_cqe); + struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr); + + /* WARNING: Only wr_cqe and status are reliable at this point */ + __frwr_release_mr(wc, mr); + trace_xprtrdma_wc_li(wc, frwr); +} + +/** + * frwr_wc_localinv_wake - Invoked by RDMA provider for a LOCAL_INV WC + * @cq: completion queue (ignored) + * @wc: completed WR * - * Caller ensures that @mrs is not empty before the call. This - * function empties the list. + * Awaken anyone waiting for an MR to finish being fenced. */ -void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) +static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_cqe *cqe = wc->wr_cqe; + struct rpcrdma_frwr *frwr = + container_of(cqe, struct rpcrdma_frwr, fr_cqe); + struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr); + + /* WARNING: Only wr_cqe and status are reliable at this point */ + __frwr_release_mr(wc, mr); + trace_xprtrdma_wc_li_wake(wc, frwr); + complete(&frwr->fr_linv_done); +} + +/** + * frwr_unmap_sync - invalidate memory regions that were registered for @req + * @r_xprt: controlling transport instance + * @req: rpcrdma_req with a non-empty list of MRs to process + * + * Sleeps until it is safe for the host CPU to access the previously mapped + * memory regions. + */ +void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) { struct ib_send_wr *first, **prev, *last; const struct ib_send_wr *bad_wr; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_frwr *frwr; struct rpcrdma_mr *mr; - int count, rc; + int rc; /* ORDER: Invalidate all of the MRs first * * Chain the LOCAL_INV Work Requests and post them with * a single ib_post_send() call. */ - frwr = NULL; - count = 0; prev = &first; - list_for_each_entry(mr, mrs, mr_list) { - mr->frwr.fr_state = FRWR_IS_INVALID; + while (!list_empty(&req->rl_registered)) { + mr = rpcrdma_mr_pop(&req->rl_registered); - frwr = &mr->frwr; trace_xprtrdma_mr_localinv(mr); + r_xprt->rx_stats.local_inv_needed++; + frwr = &mr->frwr; frwr->fr_cqe.done = frwr_wc_localinv; last = &frwr->fr_invwr; - memset(last, 0, sizeof(*last)); + last->next = NULL; last->wr_cqe = &frwr->fr_cqe; + last->sg_list = NULL; + last->num_sge = 0; last->opcode = IB_WR_LOCAL_INV; + last->send_flags = IB_SEND_SIGNALED; last->ex.invalidate_rkey = mr->mr_handle; - count++; *prev = last; prev = &last->next; } - if (!frwr) - goto unmap; /* Strong send queue ordering guarantees that when the * last WR in the chain completes, all WRs in the chain * are complete. */ - last->send_flags = IB_SEND_SIGNALED; frwr->fr_cqe.done = frwr_wc_localinv_wake; reinit_completion(&frwr->fr_linv_done); @@ -582,26 +570,18 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) * replaces the QP. The RPC reply handler won't call us * unless ri_id->qp is a valid pointer. */ - r_xprt->rx_stats.local_inv_needed++; bad_wr = NULL; - rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); - if (bad_wr != first) - wait_for_completion(&frwr->fr_linv_done); - if (rc) - goto out_release; + rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr); + trace_xprtrdma_post_send(req, rc); - /* ORDER: Now DMA unmap all of the MRs, and return - * them to the free MR list. + /* The final LOCAL_INV WR in the chain is supposed to + * do the wake. If it never gets posted, the wake will + * not happen, so don't wait in that case. */ -unmap: - while (!list_empty(mrs)) { - mr = rpcrdma_mr_pop(mrs); - rpcrdma_mr_unmap_and_put(mr); - } - return; - -out_release: - pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc); + if (bad_wr != first) + wait_for_completion(&frwr->fr_linv_done); + if (!rc) + return; /* Unmap and release the MRs in the LOCAL_INV WRs that did not * get posted. diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 77fc1e4..6c049fd 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1277,7 +1277,7 @@ void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * RPC has relinquished all its Send Queue entries. */ if (!list_empty(&req->rl_registered)) - frwr_unmap_sync(r_xprt, &req->rl_registered); + frwr_unmap_sync(r_xprt, req); /* Ensure that any DMA mapped pages associated with * the Send of the RPC Call have been unmapped before diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 3c39aa3..a9de116 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -240,17 +240,9 @@ struct rpcrdma_sendctx { * An external memory region is any buffer or page that is registered * on the fly (ie, not pre-registered). */ -enum rpcrdma_frwr_state { - FRWR_IS_INVALID, /* ready to be used */ - FRWR_IS_VALID, /* in use */ - FRWR_FLUSHED_FR, /* flushed FASTREG WR */ - FRWR_FLUSHED_LI, /* flushed LOCALINV WR */ -}; - struct rpcrdma_frwr { struct ib_mr *fr_mr; struct ib_cqe fr_cqe; - enum rpcrdma_frwr_state fr_state; struct completion fr_linv_done; union { struct ib_reg_wr fr_regwr; @@ -567,8 +559,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr **mr); int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req); void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); -void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, - struct list_head *mrs); +void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); /* * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
Since both the Send and Receive completion queues are processed in a workqueue context, it should be safe to DMA unmap and return MRs to the free or recycle lists directly in the completion handlers. Doing this means rpcrdma_frwr no longer needs to track the state of each MR... a VALID or FLUSHED MR can no longer appear on an xprt's MR free list. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> --- include/trace/events/rpcrdma.h | 19 ---- net/sunrpc/xprtrdma/frwr_ops.c | 202 ++++++++++++++++++--------------------- net/sunrpc/xprtrdma/rpc_rdma.c | 2 net/sunrpc/xprtrdma/xprt_rdma.h | 11 -- 4 files changed, 95 insertions(+), 139 deletions(-)