Message ID | 1443116094-7969-13-git-send-email-sagig@mellanox.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
> On Sep 24, 2015, at 10:34 AM, Sagi Grimberg <sagig@mellanox.com> wrote: > > Instead of maintaining a fastreg page list, keep an sg table > and convert an array of pages to a sg list. Then call ib_map_mr_sg > and construct ib_reg_wr. > > Note that the next step would be to have NFS work with sg lists > as it maps well to sk_frags (see comment from hch > http://marc.info/?l=linux-rdma&m=143677002622296&w=2). Fwiw, you would need to change tcp_sendpages() first. One more comment below. > Signed-off-by: Sagi Grimberg <sagig@mellanox.com> > Acked-by: Christoph Hellwig <hch@lst.de> > --- > net/sunrpc/xprtrdma/frwr_ops.c | 113 +++++++++++++++++++++++----------------- > net/sunrpc/xprtrdma/xprt_rdma.h | 3 +- > 2 files changed, 68 insertions(+), 48 deletions(-) > > diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c > index 0d2f46f600b6..b80a82149977 100644 > --- a/net/sunrpc/xprtrdma/frwr_ops.c > +++ b/net/sunrpc/xprtrdma/frwr_ops.c > @@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, > f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); > if (IS_ERR(f->fr_mr)) > goto out_mr_err; > - f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); > - if (IS_ERR(f->fr_pgl)) > + > + f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL); > + if (!f->sg) > goto out_list_err; > + > + sg_init_table(f->sg, depth); > + > return 0; > > out_mr_err: > @@ -163,7 +167,7 @@ out_mr_err: > return rc; > > out_list_err: > - rc = PTR_ERR(f->fr_pgl); > + rc = -ENOMEM; > dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", > __func__, rc); > ib_dereg_mr(f->fr_mr); > @@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r) > if (rc) > dprintk("RPC: %s: ib_dereg_mr status %i\n", > __func__, rc); > - ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); > + kfree(r->r.frmr.sg); > } > > static int > @@ -312,14 +316,11 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, > struct rpcrdma_mw *mw; > struct rpcrdma_frmr *frmr; > struct ib_mr *mr; > - struct ib_fast_reg_wr fastreg_wr; > + struct ib_reg_wr reg_wr; > struct ib_send_wr *bad_wr; > + unsigned int dma_nents; > u8 key; > - int len, pageoff; > - int i, rc; > - int seg_len; > - u64 pa; > - int page_no; > + int i, rc, len, n; > > mw = seg1->rl_mw; > seg1->rl_mw = NULL; > @@ -332,64 +333,81 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, > } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); > frmr = &mw->r.frmr; > frmr->fr_state = FRMR_IS_VALID; > + mr = frmr->fr_mr; > > - pageoff = offset_in_page(seg1->mr_offset); > - seg1->mr_offset -= pageoff; /* start of page */ > - seg1->mr_len += pageoff; > - len = -pageoff; > if (nsegs > ia->ri_max_frmr_depth) > nsegs = ia->ri_max_frmr_depth; > > - for (page_no = i = 0; i < nsegs;) { > - rpcrdma_map_one(device, seg, direction); > - pa = seg->mr_dma; > - for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { > - frmr->fr_pgl->page_list[page_no++] = pa; > - pa += PAGE_SIZE; > - } > + for (len = 0, i = 0; i < nsegs;) { > + if (seg->mr_page) > + sg_set_page(&frmr->sg[i], > + seg->mr_page, > + seg->mr_len, > + offset_in_page(seg->mr_offset)); > + else > + sg_set_buf(&frmr->sg[i], seg->mr_offset, > + seg->mr_len); > + > len += seg->mr_len; > ++seg; > ++i; > + > /* Check for holes */ > if ((i < nsegs && offset_in_page(seg->mr_offset)) || > offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) > break; > } > + frmr->sg_nents = i; > + > + dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction); > + if (!dma_nents) { > + pr_err("RPC: %s: failed to dma map sg %p sg_nents %d\n", > + __func__, frmr->sg, frmr->sg_nents); > + return -ENOMEM; > + } > + > + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); > + if (unlikely(n != frmr->sg_nents)) { > + pr_err("RPC: %s: failed to map mr %p (%d/%d)\n", > + __func__, frmr->fr_mr, n, frmr->sg_nents); > + rc = n < 0 ? n : -EINVAL; > + goto out_senderr; > + } > + > dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", > - __func__, mw, i, len); > - > - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); > - fastreg_wr.wr.wr_id = (unsigned long)(void *)mw; > - fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; > - fastreg_wr.iova_start = seg1->mr_dma + pageoff; > - fastreg_wr.page_list = frmr->fr_pgl; > - fastreg_wr.page_shift = PAGE_SHIFT; > - fastreg_wr.page_list_len = page_no; > - fastreg_wr.length = len; > - fastreg_wr.access_flags = writing ? > - IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : > - IB_ACCESS_REMOTE_READ; > - mr = frmr->fr_mr; > + __func__, mw, frmr->sg_nents, mr->length); > + > key = (u8)(mr->rkey & 0x000000FF); > ib_update_fast_reg_key(mr, ++key); > - fastreg_wr.rkey = mr->rkey; > + > + reg_wr.wr.next = NULL; > + reg_wr.wr.opcode = IB_WR_REG_MR; > + reg_wr.wr.wr_id = (uintptr_t)mw; > + reg_wr.wr.num_sge = 0; > + reg_wr.wr.send_flags = 0; > + reg_wr.mr = mr; > + reg_wr.key = mr->rkey; > + reg_wr.access = writing ? > + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : > + IB_ACCESS_REMOTE_READ; > > DECR_CQCOUNT(&r_xprt->rx_ep); > - rc = ib_post_send(ia->ri_id->qp, &fastreg_wr.wr, &bad_wr); > + rc = ib_post_send(ia->ri_id->qp, ®_wr.wr, &bad_wr); > if (rc) > goto out_senderr; > > + seg1->mr_dir = direction; > seg1->rl_mw = mw; > seg1->mr_rkey = mr->rkey; > - seg1->mr_base = seg1->mr_dma + pageoff; > - seg1->mr_nsegs = i; > - seg1->mr_len = len; > - return i; > + seg1->mr_base = mr->iova; > + seg1->mr_nsegs = frmr->sg_nents; > + seg1->mr_len = mr->length; > + > + return frmr->sg_nents; > > out_senderr: > dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); > - while (i--) > - rpcrdma_unmap_one(device, --seg); > + ib_dma_unmap_sg(device, frmr->sg, frmr->sg_nents, direction); > __frwr_queue_recovery(mw); > return rc; > } > @@ -403,28 +421,29 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) > struct rpcrdma_mr_seg *seg1 = seg; > struct rpcrdma_ia *ia = &r_xprt->rx_ia; > struct rpcrdma_mw *mw = seg1->rl_mw; > + struct rpcrdma_frmr *frmr = &mw->r.frmr; > struct ib_send_wr invalidate_wr, *bad_wr; > int rc, nsegs = seg->mr_nsegs; > > dprintk("RPC: %s: FRMR %p\n", __func__, mw); > > seg1->rl_mw = NULL; > - mw->r.frmr.fr_state = FRMR_IS_INVALID; > + frmr->fr_state = FRMR_IS_INVALID; > > memset(&invalidate_wr, 0, sizeof(invalidate_wr)); > invalidate_wr.wr_id = (unsigned long)(void *)mw; > invalidate_wr.opcode = IB_WR_LOCAL_INV; > - invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey; > + invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey; > DECR_CQCOUNT(&r_xprt->rx_ep); > > - while (seg1->mr_nsegs--) > - rpcrdma_unmap_one(ia->ri_device, seg++); > read_lock(&ia->ri_qplock); > rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); > read_unlock(&ia->ri_qplock); > if (rc) > goto out_err; > > + ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); What benefit does moving the DMA unmap after the post_send have? Remember that the LOCAL_INV WRs may not have started yet, even after the post_send. I’m OK with you not moving the DMA unmap into the completion handler yet, it’s a rather complex change to consider. I have a plan and some patches cooking that will address this problem and the issue of flow controlling invalidation WRs. > + > rpcrdma_put_mw(r_xprt, mw); > return nsegs; > > diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h > index d252457ff21a..00773636d17e 100644 > --- a/net/sunrpc/xprtrdma/xprt_rdma.h > +++ b/net/sunrpc/xprtrdma/xprt_rdma.h > @@ -195,7 +195,8 @@ enum rpcrdma_frmr_state { > }; > > struct rpcrdma_frmr { > - struct ib_fast_reg_page_list *fr_pgl; > + struct scatterlist *sg; > + unsigned int sg_nents; > struct ib_mr *fr_mr; > enum rpcrdma_frmr_state fr_state; > struct work_struct fr_work; > -- > 1.8.4.3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html — Chuck Lever -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 9/25/2015 7:51 PM, Chuck Lever wrote: > >> On Sep 24, 2015, at 10:34 AM, Sagi Grimberg <sagig@mellanox.com> wrote: >> >> Instead of maintaining a fastreg page list, keep an sg table >> and convert an array of pages to a sg list. Then call ib_map_mr_sg >> and construct ib_reg_wr. >> >> Note that the next step would be to have NFS work with sg lists >> as it maps well to sk_frags (see comment from hch >> http://marc.info/?l=linux-rdma&m=143677002622296&w=2). > > Fwiw, you would need to change tcp_sendpages() first. > > One more comment below. > > >> Signed-off-by: Sagi Grimberg <sagig@mellanox.com> >> Acked-by: Christoph Hellwig <hch@lst.de> >> --- >> net/sunrpc/xprtrdma/frwr_ops.c | 113 +++++++++++++++++++++++----------------- >> net/sunrpc/xprtrdma/xprt_rdma.h | 3 +- >> 2 files changed, 68 insertions(+), 48 deletions(-) >> >> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c >> index 0d2f46f600b6..b80a82149977 100644 >> --- a/net/sunrpc/xprtrdma/frwr_ops.c >> +++ b/net/sunrpc/xprtrdma/frwr_ops.c >> @@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, >> f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); >> if (IS_ERR(f->fr_mr)) >> goto out_mr_err; >> - f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); >> - if (IS_ERR(f->fr_pgl)) >> + >> + f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL); >> + if (!f->sg) >> goto out_list_err; >> + >> + sg_init_table(f->sg, depth); >> + >> return 0; >> >> out_mr_err: >> @@ -163,7 +167,7 @@ out_mr_err: >> return rc; >> >> out_list_err: >> - rc = PTR_ERR(f->fr_pgl); >> + rc = -ENOMEM; >> dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", >> __func__, rc); >> ib_dereg_mr(f->fr_mr); >> @@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r) >> if (rc) >> dprintk("RPC: %s: ib_dereg_mr status %i\n", >> __func__, rc); >> - ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); >> + kfree(r->r.frmr.sg); >> } >> >> static int >> @@ -312,14 +316,11 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, >> struct rpcrdma_mw *mw; >> struct rpcrdma_frmr *frmr; >> struct ib_mr *mr; >> - struct ib_fast_reg_wr fastreg_wr; >> + struct ib_reg_wr reg_wr; >> struct ib_send_wr *bad_wr; >> + unsigned int dma_nents; >> u8 key; >> - int len, pageoff; >> - int i, rc; >> - int seg_len; >> - u64 pa; >> - int page_no; >> + int i, rc, len, n; >> >> mw = seg1->rl_mw; >> seg1->rl_mw = NULL; >> @@ -332,64 +333,81 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, >> } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); >> frmr = &mw->r.frmr; >> frmr->fr_state = FRMR_IS_VALID; >> + mr = frmr->fr_mr; >> >> - pageoff = offset_in_page(seg1->mr_offset); >> - seg1->mr_offset -= pageoff; /* start of page */ >> - seg1->mr_len += pageoff; >> - len = -pageoff; >> if (nsegs > ia->ri_max_frmr_depth) >> nsegs = ia->ri_max_frmr_depth; >> >> - for (page_no = i = 0; i < nsegs;) { >> - rpcrdma_map_one(device, seg, direction); >> - pa = seg->mr_dma; >> - for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { >> - frmr->fr_pgl->page_list[page_no++] = pa; >> - pa += PAGE_SIZE; >> - } >> + for (len = 0, i = 0; i < nsegs;) { >> + if (seg->mr_page) >> + sg_set_page(&frmr->sg[i], >> + seg->mr_page, >> + seg->mr_len, >> + offset_in_page(seg->mr_offset)); >> + else >> + sg_set_buf(&frmr->sg[i], seg->mr_offset, >> + seg->mr_len); >> + >> len += seg->mr_len; >> ++seg; >> ++i; >> + >> /* Check for holes */ >> if ((i < nsegs && offset_in_page(seg->mr_offset)) || >> offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) >> break; >> } >> + frmr->sg_nents = i; >> + >> + dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction); >> + if (!dma_nents) { >> + pr_err("RPC: %s: failed to dma map sg %p sg_nents %d\n", >> + __func__, frmr->sg, frmr->sg_nents); >> + return -ENOMEM; >> + } >> + >> + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); >> + if (unlikely(n != frmr->sg_nents)) { >> + pr_err("RPC: %s: failed to map mr %p (%d/%d)\n", >> + __func__, frmr->fr_mr, n, frmr->sg_nents); >> + rc = n < 0 ? n : -EINVAL; >> + goto out_senderr; >> + } >> + >> dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", >> - __func__, mw, i, len); >> - >> - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); >> - fastreg_wr.wr.wr_id = (unsigned long)(void *)mw; >> - fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; >> - fastreg_wr.iova_start = seg1->mr_dma + pageoff; >> - fastreg_wr.page_list = frmr->fr_pgl; >> - fastreg_wr.page_shift = PAGE_SHIFT; >> - fastreg_wr.page_list_len = page_no; >> - fastreg_wr.length = len; >> - fastreg_wr.access_flags = writing ? >> - IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : >> - IB_ACCESS_REMOTE_READ; >> - mr = frmr->fr_mr; >> + __func__, mw, frmr->sg_nents, mr->length); >> + >> key = (u8)(mr->rkey & 0x000000FF); >> ib_update_fast_reg_key(mr, ++key); >> - fastreg_wr.rkey = mr->rkey; >> + >> + reg_wr.wr.next = NULL; >> + reg_wr.wr.opcode = IB_WR_REG_MR; >> + reg_wr.wr.wr_id = (uintptr_t)mw; >> + reg_wr.wr.num_sge = 0; >> + reg_wr.wr.send_flags = 0; >> + reg_wr.mr = mr; >> + reg_wr.key = mr->rkey; >> + reg_wr.access = writing ? >> + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : >> + IB_ACCESS_REMOTE_READ; >> >> DECR_CQCOUNT(&r_xprt->rx_ep); >> - rc = ib_post_send(ia->ri_id->qp, &fastreg_wr.wr, &bad_wr); >> + rc = ib_post_send(ia->ri_id->qp, ®_wr.wr, &bad_wr); >> if (rc) >> goto out_senderr; >> >> + seg1->mr_dir = direction; >> seg1->rl_mw = mw; >> seg1->mr_rkey = mr->rkey; >> - seg1->mr_base = seg1->mr_dma + pageoff; >> - seg1->mr_nsegs = i; >> - seg1->mr_len = len; >> - return i; >> + seg1->mr_base = mr->iova; >> + seg1->mr_nsegs = frmr->sg_nents; >> + seg1->mr_len = mr->length; >> + >> + return frmr->sg_nents; >> >> out_senderr: >> dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); >> - while (i--) >> - rpcrdma_unmap_one(device, --seg); >> + ib_dma_unmap_sg(device, frmr->sg, frmr->sg_nents, direction); >> __frwr_queue_recovery(mw); >> return rc; >> } >> @@ -403,28 +421,29 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) >> struct rpcrdma_mr_seg *seg1 = seg; >> struct rpcrdma_ia *ia = &r_xprt->rx_ia; >> struct rpcrdma_mw *mw = seg1->rl_mw; >> + struct rpcrdma_frmr *frmr = &mw->r.frmr; >> struct ib_send_wr invalidate_wr, *bad_wr; >> int rc, nsegs = seg->mr_nsegs; >> >> dprintk("RPC: %s: FRMR %p\n", __func__, mw); >> >> seg1->rl_mw = NULL; >> - mw->r.frmr.fr_state = FRMR_IS_INVALID; >> + frmr->fr_state = FRMR_IS_INVALID; >> >> memset(&invalidate_wr, 0, sizeof(invalidate_wr)); >> invalidate_wr.wr_id = (unsigned long)(void *)mw; >> invalidate_wr.opcode = IB_WR_LOCAL_INV; >> - invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey; >> + invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey; >> DECR_CQCOUNT(&r_xprt->rx_ep); >> >> - while (seg1->mr_nsegs--) >> - rpcrdma_unmap_one(ia->ri_device, seg++); >> read_lock(&ia->ri_qplock); >> rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); >> read_unlock(&ia->ri_qplock); >> if (rc) >> goto out_err; >> >> + ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); > > What benefit does moving the DMA unmap after the post_send have? > Remember that the LOCAL_INV WRs may not have started yet, even after > the post_send. It doesn't have a benefit. I can move it back... -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 0d2f46f600b6..b80a82149977 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); if (IS_ERR(f->fr_mr)) goto out_mr_err; - f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); - if (IS_ERR(f->fr_pgl)) + + f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL); + if (!f->sg) goto out_list_err; + + sg_init_table(f->sg, depth); + return 0; out_mr_err: @@ -163,7 +167,7 @@ out_mr_err: return rc; out_list_err: - rc = PTR_ERR(f->fr_pgl); + rc = -ENOMEM; dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", __func__, rc); ib_dereg_mr(f->fr_mr); @@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r) if (rc) dprintk("RPC: %s: ib_dereg_mr status %i\n", __func__, rc); - ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); + kfree(r->r.frmr.sg); } static int @@ -312,14 +316,11 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, struct rpcrdma_mw *mw; struct rpcrdma_frmr *frmr; struct ib_mr *mr; - struct ib_fast_reg_wr fastreg_wr; + struct ib_reg_wr reg_wr; struct ib_send_wr *bad_wr; + unsigned int dma_nents; u8 key; - int len, pageoff; - int i, rc; - int seg_len; - u64 pa; - int page_no; + int i, rc, len, n; mw = seg1->rl_mw; seg1->rl_mw = NULL; @@ -332,64 +333,81 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); frmr = &mw->r.frmr; frmr->fr_state = FRMR_IS_VALID; + mr = frmr->fr_mr; - pageoff = offset_in_page(seg1->mr_offset); - seg1->mr_offset -= pageoff; /* start of page */ - seg1->mr_len += pageoff; - len = -pageoff; if (nsegs > ia->ri_max_frmr_depth) nsegs = ia->ri_max_frmr_depth; - for (page_no = i = 0; i < nsegs;) { - rpcrdma_map_one(device, seg, direction); - pa = seg->mr_dma; - for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { - frmr->fr_pgl->page_list[page_no++] = pa; - pa += PAGE_SIZE; - } + for (len = 0, i = 0; i < nsegs;) { + if (seg->mr_page) + sg_set_page(&frmr->sg[i], + seg->mr_page, + seg->mr_len, + offset_in_page(seg->mr_offset)); + else + sg_set_buf(&frmr->sg[i], seg->mr_offset, + seg->mr_len); + len += seg->mr_len; ++seg; ++i; + /* Check for holes */ if ((i < nsegs && offset_in_page(seg->mr_offset)) || offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) break; } + frmr->sg_nents = i; + + dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction); + if (!dma_nents) { + pr_err("RPC: %s: failed to dma map sg %p sg_nents %d\n", + __func__, frmr->sg, frmr->sg_nents); + return -ENOMEM; + } + + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); + if (unlikely(n != frmr->sg_nents)) { + pr_err("RPC: %s: failed to map mr %p (%d/%d)\n", + __func__, frmr->fr_mr, n, frmr->sg_nents); + rc = n < 0 ? n : -EINVAL; + goto out_senderr; + } + dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", - __func__, mw, i, len); - - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.wr.wr_id = (unsigned long)(void *)mw; - fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.iova_start = seg1->mr_dma + pageoff; - fastreg_wr.page_list = frmr->fr_pgl; - fastreg_wr.page_shift = PAGE_SHIFT; - fastreg_wr.page_list_len = page_no; - fastreg_wr.length = len; - fastreg_wr.access_flags = writing ? - IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : - IB_ACCESS_REMOTE_READ; - mr = frmr->fr_mr; + __func__, mw, frmr->sg_nents, mr->length); + key = (u8)(mr->rkey & 0x000000FF); ib_update_fast_reg_key(mr, ++key); - fastreg_wr.rkey = mr->rkey; + + reg_wr.wr.next = NULL; + reg_wr.wr.opcode = IB_WR_REG_MR; + reg_wr.wr.wr_id = (uintptr_t)mw; + reg_wr.wr.num_sge = 0; + reg_wr.wr.send_flags = 0; + reg_wr.mr = mr; + reg_wr.key = mr->rkey; + reg_wr.access = writing ? + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : + IB_ACCESS_REMOTE_READ; DECR_CQCOUNT(&r_xprt->rx_ep); - rc = ib_post_send(ia->ri_id->qp, &fastreg_wr.wr, &bad_wr); + rc = ib_post_send(ia->ri_id->qp, ®_wr.wr, &bad_wr); if (rc) goto out_senderr; + seg1->mr_dir = direction; seg1->rl_mw = mw; seg1->mr_rkey = mr->rkey; - seg1->mr_base = seg1->mr_dma + pageoff; - seg1->mr_nsegs = i; - seg1->mr_len = len; - return i; + seg1->mr_base = mr->iova; + seg1->mr_nsegs = frmr->sg_nents; + seg1->mr_len = mr->length; + + return frmr->sg_nents; out_senderr: dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); - while (i--) - rpcrdma_unmap_one(device, --seg); + ib_dma_unmap_sg(device, frmr->sg, frmr->sg_nents, direction); __frwr_queue_recovery(mw); return rc; } @@ -403,28 +421,29 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) struct rpcrdma_mr_seg *seg1 = seg; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_mw *mw = seg1->rl_mw; + struct rpcrdma_frmr *frmr = &mw->r.frmr; struct ib_send_wr invalidate_wr, *bad_wr; int rc, nsegs = seg->mr_nsegs; dprintk("RPC: %s: FRMR %p\n", __func__, mw); seg1->rl_mw = NULL; - mw->r.frmr.fr_state = FRMR_IS_INVALID; + frmr->fr_state = FRMR_IS_INVALID; memset(&invalidate_wr, 0, sizeof(invalidate_wr)); invalidate_wr.wr_id = (unsigned long)(void *)mw; invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey; + invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); - while (seg1->mr_nsegs--) - rpcrdma_unmap_one(ia->ri_device, seg++); read_lock(&ia->ri_qplock); rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); read_unlock(&ia->ri_qplock); if (rc) goto out_err; + ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); + rpcrdma_put_mw(r_xprt, mw); return nsegs; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index d252457ff21a..00773636d17e 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -195,7 +195,8 @@ enum rpcrdma_frmr_state { }; struct rpcrdma_frmr { - struct ib_fast_reg_page_list *fr_pgl; + struct scatterlist *sg; + unsigned int sg_nents; struct ib_mr *fr_mr; enum rpcrdma_frmr_state fr_state; struct work_struct fr_work;