Message ID | 20230105223710.973148-1-yanjun.zhu@intel.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [PATCHv3,for-next,1/1] RDMA/irdma: Add support for dmabuf pin memory regions | expand |
On Thu, Jan 05, 2023 at 05:37:10PM -0500, Zhu Yanjun wrote: > From: Zhu Yanjun <yanjun.zhu@linux.dev> > > This is a followup to the EFA dmabuf[1]. Irdma driver currently does > not support on-demand-paging(ODP). So it uses habanalabs as the > dmabuf exporter, and irdma as the importer to allow for peer2peer > access through libibverbs. > > In this commit, the function ib_umem_dmabuf_get_pinned() is used. > This function is introduced in EFA dmabuf[1] which allows the driver > to get a dmabuf umem which is pinned and does not require move_notify > callback implementation. The returned umem is pinned and DMA mapped > like standard cpu umems, and is released through ib_umem_release(). > > [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/ > > Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> > --- > V2->V3: Simplify the function by removing QP and CQ handling; > V1->V2: Fix the build warning by adding a static; > --- > drivers/infiniband/hw/irdma/verbs.c | 97 +++++++++++++++++++++++++++++ > 1 file changed, 97 insertions(+) > > diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c > index f6973ea55eda..7028b8af87b9 100644 > --- a/drivers/infiniband/hw/irdma/verbs.c > +++ b/drivers/infiniband/hw/irdma/verbs.c > @@ -2912,6 +2912,102 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, > return ERR_PTR(err); > } > > +static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, > + u64 len, u64 virt, > + int fd, int access, > + struct ib_udata *udata) > +{ > + struct irdma_device *iwdev = to_iwdev(pd->device); > + struct irdma_pble_alloc *palloc; > + struct irdma_pbl *iwpbl; > + struct irdma_mr *iwmr; > + u32 stag = 0; > + bool use_pbles = false; > + int err = -EINVAL; > + struct ib_umem_dmabuf *umem_dmabuf; > + > + if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) > + return ERR_PTR(-EINVAL); > + > + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) > + return ERR_PTR(-EINVAL); > + > + umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, > + access); > + if (IS_ERR(umem_dmabuf)) { > + err = PTR_ERR(umem_dmabuf); > + ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err); > + return ERR_PTR(err); > + } > + > + iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); > + if (!iwmr) { > + ib_umem_release(&umem_dmabuf->umem); > + return ERR_PTR(-ENOMEM); > + } again, please don't duplicate all this code, refactor the mr code so it can be shared. Jason
在 2023/1/5 21:37, Jason Gunthorpe 写道: > On Thu, Jan 05, 2023 at 05:37:10PM -0500, Zhu Yanjun wrote: >> From: Zhu Yanjun <yanjun.zhu@linux.dev> >> >> This is a followup to the EFA dmabuf[1]. Irdma driver currently does >> not support on-demand-paging(ODP). So it uses habanalabs as the >> dmabuf exporter, and irdma as the importer to allow for peer2peer >> access through libibverbs. >> >> In this commit, the function ib_umem_dmabuf_get_pinned() is used. >> This function is introduced in EFA dmabuf[1] which allows the driver >> to get a dmabuf umem which is pinned and does not require move_notify >> callback implementation. The returned umem is pinned and DMA mapped >> like standard cpu umems, and is released through ib_umem_release(). >> >> [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/ >> >> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> >> --- >> V2->V3: Simplify the function by removing QP and CQ handling; >> V1->V2: Fix the build warning by adding a static; >> --- >> drivers/infiniband/hw/irdma/verbs.c | 97 +++++++++++++++++++++++++++++ >> 1 file changed, 97 insertions(+) >> >> diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c >> index f6973ea55eda..7028b8af87b9 100644 >> --- a/drivers/infiniband/hw/irdma/verbs.c >> +++ b/drivers/infiniband/hw/irdma/verbs.c >> @@ -2912,6 +2912,102 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, >> return ERR_PTR(err); >> } >> >> +static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, >> + u64 len, u64 virt, >> + int fd, int access, >> + struct ib_udata *udata) >> +{ >> + struct irdma_device *iwdev = to_iwdev(pd->device); >> + struct irdma_pble_alloc *palloc; >> + struct irdma_pbl *iwpbl; >> + struct irdma_mr *iwmr; >> + u32 stag = 0; >> + bool use_pbles = false; >> + int err = -EINVAL; >> + struct ib_umem_dmabuf *umem_dmabuf; >> + >> + if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) >> + return ERR_PTR(-EINVAL); >> + >> + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) >> + return ERR_PTR(-EINVAL); >> + >> + umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, >> + access); >> + if (IS_ERR(umem_dmabuf)) { >> + err = PTR_ERR(umem_dmabuf); >> + ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err); >> + return ERR_PTR(err); >> + } >> + >> + iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); >> + if (!iwmr) { >> + ib_umem_release(&umem_dmabuf->umem); >> + return ERR_PTR(-ENOMEM); >> + } > > again, please don't duplicate all this code, refactor the mr code so > it can be shared. Got it. I will refactor the mr code and split the shared code into several helper functions. After the commits are merged, I will continue to add the support of dmabuf with the helper functions. Zhu Yanjun > > Jason
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index f6973ea55eda..7028b8af87b9 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2912,6 +2912,102 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, return ERR_PTR(err); } +static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, + u64 len, u64 virt, + int fd, int access, + struct ib_udata *udata) +{ + struct irdma_device *iwdev = to_iwdev(pd->device); + struct irdma_pble_alloc *palloc; + struct irdma_pbl *iwpbl; + struct irdma_mr *iwmr; + u32 stag = 0; + bool use_pbles = false; + int err = -EINVAL; + struct ib_umem_dmabuf *umem_dmabuf; + + if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) + return ERR_PTR(-EINVAL); + + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) + return ERR_PTR(-EINVAL); + + umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, + access); + if (IS_ERR(umem_dmabuf)) { + err = PTR_ERR(umem_dmabuf); + ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err); + return ERR_PTR(err); + } + + iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); + if (!iwmr) { + ib_umem_release(&umem_dmabuf->umem); + return ERR_PTR(-ENOMEM); + } + + iwpbl = &iwmr->iwpbl; + iwpbl->iwmr = iwmr; + iwmr->region = &umem_dmabuf->umem; + iwmr->ibmr.pd = pd; + iwmr->ibmr.device = pd->device; + iwmr->ibmr.iova = virt; + + iwmr->page_size = ib_umem_find_best_pgsz(iwmr->region, + iwdev->rf->sc_dev.hw_attrs.page_size_cap, + virt); + if (unlikely(!iwmr->page_size)) { + kfree(iwmr); + ib_umem_release(iwmr->region); + return ERR_PTR(-EOPNOTSUPP); + } + + iwmr->len = iwmr->region->length; + iwpbl->user_base = virt; + palloc = &iwpbl->pble_alloc; + iwmr->type = IRDMA_MEMREG_TYPE_MEM; + iwmr->page_cnt = ib_umem_num_dma_blocks(iwmr->region, iwmr->page_size); + + use_pbles = (iwmr->page_cnt != 1); + + err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false); + if (err) + goto error; + + if (use_pbles) { + err = irdma_check_mr_contiguous(palloc, iwmr->page_size); + if (err) { + irdma_free_pble(iwdev->rf->pble_rsrc, palloc); + iwpbl->pbl_allocated = false; + } + } + + stag = irdma_create_stag(iwdev); + if (!stag) { + err = -ENOMEM; + goto error; + } + + iwmr->stag = stag; + iwmr->ibmr.rkey = stag; + iwmr->ibmr.lkey = stag; + err = irdma_hwreg_mr(iwdev, iwmr, access); + if (err) { + irdma_free_stag(iwdev, stag); + goto error; + } + + return &iwmr->ibmr; + +error: + if (palloc->level != PBLE_LEVEL_0 && iwpbl->pbl_allocated) + irdma_free_pble(iwdev->rf->pble_rsrc, palloc); + kfree(iwmr); + ib_umem_release(&umem_dmabuf->umem); + + return ERR_PTR(err); +} + /** * irdma_reg_phys_mr - register kernel physical memory * @pd: ibpd pointer @@ -4418,6 +4514,7 @@ static const struct ib_device_ops irdma_dev_ops = { .query_port = irdma_query_port, .query_qp = irdma_query_qp, .reg_user_mr = irdma_reg_user_mr, + .reg_user_mr_dmabuf = irdma_reg_user_mr_dmabuf, .req_notify_cq = irdma_req_notify_cq, .resize_cq = irdma_resize_cq, INIT_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd),