Message ID | 20230201032115.631656-1-yanjun.zhu@intel.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Jason Gunthorpe |
Headers | show |
Series | [PATCHv3,for-next,1/1] RDMA/irdma: Add support for dmabuf pin memory regions | expand |
在 2023/2/1 11:21, Zhu Yanjun 写道: > From: Zhu Yanjun <yanjun.zhu@linux.dev> > > This is a followup to the EFA dmabuf[1]. Irdma driver currently does > not support on-demand-paging(ODP). So it uses habanalabs as the > dmabuf exporter, and irdma as the importer to allow for peer2peer > access through libibverbs. > > In this commit, the function ib_umem_dmabuf_get_pinned() is used. > This function is introduced in EFA dmabuf[1] which allows the driver > to get a dmabuf umem which is pinned and does not require move_notify > callback implementation. The returned umem is pinned and DMA mapped > like standard cpu umems, and is released through ib_umem_release(). > > [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/ > > Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> > --- > V2->V3: Remove unnecessary variable initialization; > Use error handler; > V1->V2: Thanks Shiraz Saleem, he gave me a lot of good suggestions. > This commit is based on the shared functions from refactored > irdma_reg_user_mr. Shiraz Saleem is on vacation and returning to office on Feb.13, 2023. We can wait for him. Zhu Yanjun > --- > drivers/infiniband/hw/irdma/verbs.c | 45 +++++++++++++++++++++++++++++ > 1 file changed, 45 insertions(+) > > diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c > index 6982f38596c8..7525f4cdf6fb 100644 > --- a/drivers/infiniband/hw/irdma/verbs.c > +++ b/drivers/infiniband/hw/irdma/verbs.c > @@ -2977,6 +2977,50 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, > return ERR_PTR(err); > } > > +static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, > + u64 len, u64 virt, > + int fd, int access, > + struct ib_udata *udata) > +{ > + struct irdma_device *iwdev = to_iwdev(pd->device); > + struct ib_umem_dmabuf *umem_dmabuf; > + struct irdma_mr *iwmr; > + int err; > + > + if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) > + return ERR_PTR(-EINVAL); > + > + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) > + return ERR_PTR(-EINVAL); > + > + umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, access); > + if (IS_ERR(umem_dmabuf)) { > + err = PTR_ERR(umem_dmabuf); > + ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err); > + return ERR_PTR(err); > + } > + > + iwmr = irdma_alloc_iwmr(&umem_dmabuf->umem, pd, virt, IRDMA_MEMREG_TYPE_MEM); > + if (IS_ERR(iwmr)) { > + err = PTR_ERR(iwmr); > + goto err_release; > + } > + > + err = irdma_reg_user_mr_type_mem(iwmr, access); > + if (err) > + goto err_iwmr; > + > + return &iwmr->ibmr; > + > +err_iwmr: > + irdma_free_iwmr(iwmr); > + > +err_release: > + ib_umem_release(&umem_dmabuf->umem); > + > + return ERR_PTR(err); > +} > + > /** > * irdma_reg_phys_mr - register kernel physical memory > * @pd: ibpd pointer > @@ -4483,6 +4527,7 @@ static const struct ib_device_ops irdma_dev_ops = { > .query_port = irdma_query_port, > .query_qp = irdma_query_qp, > .reg_user_mr = irdma_reg_user_mr, > + .reg_user_mr_dmabuf = irdma_reg_user_mr_dmabuf, > .req_notify_cq = irdma_req_notify_cq, > .resize_cq = irdma_resize_cq, > INIT_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd),
> Subject: [PATCHv3 for-next 1/1] RDMA/irdma: Add support for dmabuf pin > memory regions > > From: Zhu Yanjun <yanjun.zhu@linux.dev> > > This is a followup to the EFA dmabuf[1]. Irdma driver currently does not support > on-demand-paging(ODP). So it uses habanalabs as the dmabuf exporter, and > irdma as the importer to allow for peer2peer access through libibverbs. > > In this commit, the function ib_umem_dmabuf_get_pinned() is used. > This function is introduced in EFA dmabuf[1] which allows the driver to get a > dmabuf umem which is pinned and does not require move_notify callback > implementation. The returned umem is pinned and DMA mapped like standard cpu > umems, and is released through ib_umem_release(). > > [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/ > > Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> > --- > V2->V3: Remove unnecessary variable initialization; > Use error handler; > V1->V2: Thanks Shiraz Saleem, he gave me a lot of good suggestions. > This commit is based on the shared functions from refactored > irdma_reg_user_mr. > --- > drivers/infiniband/hw/irdma/verbs.c | 45 +++++++++++++++++++++++++++++ > 1 file changed, 45 insertions(+) > > diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c > index 6982f38596c8..7525f4cdf6fb 100644 > --- a/drivers/infiniband/hw/irdma/verbs.c > +++ b/drivers/infiniband/hw/irdma/verbs.c > @@ -2977,6 +2977,50 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd > *pd, u64 start, u64 len, > return ERR_PTR(err); > } > > +static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, > + u64 len, u64 virt, > + int fd, int access, > + struct ib_udata *udata) > +{ > + struct irdma_device *iwdev = to_iwdev(pd->device); > + struct ib_umem_dmabuf *umem_dmabuf; > + struct irdma_mr *iwmr; > + int err; > + > + if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) > + return ERR_PTR(-EINVAL); > + > + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) > + return ERR_PTR(-EINVAL); Do we need this? we don't copy anything from udata. There is no info passed via ABI struct irdma_mem_reg_req. > + > + umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, > access); > + if (IS_ERR(umem_dmabuf)) { > + err = PTR_ERR(umem_dmabuf); > + ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", > err); > + return ERR_PTR(err); > + } > + > + iwmr = irdma_alloc_iwmr(&umem_dmabuf->umem, pd, virt, > IRDMA_MEMREG_TYPE_MEM); > + if (IS_ERR(iwmr)) { > + err = PTR_ERR(iwmr); > + goto err_release; > + } > + > + err = irdma_reg_user_mr_type_mem(iwmr, access); > + if (err) > + goto err_iwmr; > + > + return &iwmr->ibmr; > + > +err_iwmr: > + irdma_free_iwmr(iwmr); > + > +err_release: > + ib_umem_release(&umem_dmabuf->umem); > + > + return ERR_PTR(err); > +} > + > /** > * irdma_reg_phys_mr - register kernel physical memory > * @pd: ibpd pointer > @@ -4483,6 +4527,7 @@ static const struct ib_device_ops irdma_dev_ops = { > .query_port = irdma_query_port, > .query_qp = irdma_query_qp, > .reg_user_mr = irdma_reg_user_mr, > + .reg_user_mr_dmabuf = irdma_reg_user_mr_dmabuf, > .req_notify_cq = irdma_req_notify_cq, > .resize_cq = irdma_resize_cq, > INIT_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd), > -- Reviewed-by: Shiraz Saleem <shiraz.saleem@intel.com>
On Wed, Feb 01, 2023 at 11:21:15AM +0800, Zhu Yanjun wrote: > From: Zhu Yanjun <yanjun.zhu@linux.dev> > > This is a followup to the EFA dmabuf[1]. Irdma driver currently does > not support on-demand-paging(ODP). So it uses habanalabs as the > dmabuf exporter, and irdma as the importer to allow for peer2peer > access through libibverbs. > > In this commit, the function ib_umem_dmabuf_get_pinned() is used. > This function is introduced in EFA dmabuf[1] which allows the driver > to get a dmabuf umem which is pinned and does not require move_notify > callback implementation. The returned umem is pinned and DMA mapped > like standard cpu umems, and is released through ib_umem_release(). > > [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/ > > Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> > Reviewed-by: Shiraz Saleem <shiraz.saleem@intel.com> > --- > V2->V3: Remove unnecessary variable initialization; > Use error handler; > V1->V2: Thanks Shiraz Saleem, he gave me a lot of good suggestions. > This commit is based on the shared functions from refactored > irdma_reg_user_mr. > --- > drivers/infiniband/hw/irdma/verbs.c | 45 +++++++++++++++++++++++++++++ > 1 file changed, 45 insertions(+) > > diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c > index 6982f38596c8..7525f4cdf6fb 100644 > --- a/drivers/infiniband/hw/irdma/verbs.c > +++ b/drivers/infiniband/hw/irdma/verbs.c > @@ -2977,6 +2977,50 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, > return ERR_PTR(err); > } > > +static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, > + u64 len, u64 virt, > + int fd, int access, > + struct ib_udata *udata) > +{ > + struct irdma_device *iwdev = to_iwdev(pd->device); > + struct ib_umem_dmabuf *umem_dmabuf; > + struct irdma_mr *iwmr; > + int err; > + > + if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) > + return ERR_PTR(-EINVAL); > + > + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) > + return ERR_PTR(-EINVAL); Shiraz is correct, I'm wondering how this even works. This is a new style uAPI without UVERBS_ATTR_UHW so inlen should always be 0. How did you manage to test this?? Jason
在 2023/2/16 23:03, Jason Gunthorpe 写道: > On Wed, Feb 01, 2023 at 11:21:15AM +0800, Zhu Yanjun wrote: >> From: Zhu Yanjun <yanjun.zhu@linux.dev> >> >> This is a followup to the EFA dmabuf[1]. Irdma driver currently does >> not support on-demand-paging(ODP). So it uses habanalabs as the >> dmabuf exporter, and irdma as the importer to allow for peer2peer >> access through libibverbs. >> >> In this commit, the function ib_umem_dmabuf_get_pinned() is used. >> This function is introduced in EFA dmabuf[1] which allows the driver >> to get a dmabuf umem which is pinned and does not require move_notify >> callback implementation. The returned umem is pinned and DMA mapped >> like standard cpu umems, and is released through ib_umem_release(). >> >> [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/ >> >> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> >> Reviewed-by: Shiraz Saleem <shiraz.saleem@intel.com> >> --- >> V2->V3: Remove unnecessary variable initialization; >> Use error handler; >> V1->V2: Thanks Shiraz Saleem, he gave me a lot of good suggestions. >> This commit is based on the shared functions from refactored >> irdma_reg_user_mr. >> --- >> drivers/infiniband/hw/irdma/verbs.c | 45 +++++++++++++++++++++++++++++ >> 1 file changed, 45 insertions(+) >> >> diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c >> index 6982f38596c8..7525f4cdf6fb 100644 >> --- a/drivers/infiniband/hw/irdma/verbs.c >> +++ b/drivers/infiniband/hw/irdma/verbs.c >> @@ -2977,6 +2977,50 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, >> return ERR_PTR(err); >> } >> >> +static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, >> + u64 len, u64 virt, >> + int fd, int access, >> + struct ib_udata *udata) >> +{ >> + struct irdma_device *iwdev = to_iwdev(pd->device); >> + struct ib_umem_dmabuf *umem_dmabuf; >> + struct irdma_mr *iwmr; >> + int err; >> + >> + if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) >> + return ERR_PTR(-EINVAL); >> + >> + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) >> + return ERR_PTR(-EINVAL); > Shiraz is correct, I'm wondering how this even works. This is a new > style uAPI without UVERBS_ATTR_UHW so inlen should always be 0. Got it. Thanks Shiraz and Jason. I will remove the test of inlen in the latest commit. Best Regards, Zhu Yanjun > > How did you manage to test this?? > > Jason
在 2023/2/15 21:38, Saleem, Shiraz 写道: >> Subject: [PATCHv3 for-next 1/1] RDMA/irdma: Add support for dmabuf pin >> memory regions >> >> From: Zhu Yanjun <yanjun.zhu@linux.dev> >> >> This is a followup to the EFA dmabuf[1]. Irdma driver currently does not support >> on-demand-paging(ODP). So it uses habanalabs as the dmabuf exporter, and >> irdma as the importer to allow for peer2peer access through libibverbs. >> >> In this commit, the function ib_umem_dmabuf_get_pinned() is used. >> This function is introduced in EFA dmabuf[1] which allows the driver to get a >> dmabuf umem which is pinned and does not require move_notify callback >> implementation. The returned umem is pinned and DMA mapped like standard cpu >> umems, and is released through ib_umem_release(). >> >> [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/ >> >> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> >> --- >> V2->V3: Remove unnecessary variable initialization; >> Use error handler; >> V1->V2: Thanks Shiraz Saleem, he gave me a lot of good suggestions. >> This commit is based on the shared functions from refactored >> irdma_reg_user_mr. >> --- >> drivers/infiniband/hw/irdma/verbs.c | 45 +++++++++++++++++++++++++++++ >> 1 file changed, 45 insertions(+) >> >> diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c >> index 6982f38596c8..7525f4cdf6fb 100644 >> --- a/drivers/infiniband/hw/irdma/verbs.c >> +++ b/drivers/infiniband/hw/irdma/verbs.c >> @@ -2977,6 +2977,50 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd >> *pd, u64 start, u64 len, >> return ERR_PTR(err); >> } >> >> +static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, >> + u64 len, u64 virt, >> + int fd, int access, >> + struct ib_udata *udata) >> +{ >> + struct irdma_device *iwdev = to_iwdev(pd->device); >> + struct ib_umem_dmabuf *umem_dmabuf; >> + struct irdma_mr *iwmr; >> + int err; >> + >> + if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) >> + return ERR_PTR(-EINVAL); >> + >> + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) >> + return ERR_PTR(-EINVAL); > Do we need this? we don't copy anything from udata. There is no info passed via ABI struct irdma_mem_reg_req. Got it. I will remove the inlen test and send a new commit out. > >> + >> + umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, >> access); >> + if (IS_ERR(umem_dmabuf)) { >> + err = PTR_ERR(umem_dmabuf); >> + ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", >> err); >> + return ERR_PTR(err); >> + } >> + >> + iwmr = irdma_alloc_iwmr(&umem_dmabuf->umem, pd, virt, >> IRDMA_MEMREG_TYPE_MEM); >> + if (IS_ERR(iwmr)) { >> + err = PTR_ERR(iwmr); >> + goto err_release; >> + } >> + >> + err = irdma_reg_user_mr_type_mem(iwmr, access); >> + if (err) >> + goto err_iwmr; >> + >> + return &iwmr->ibmr; >> + >> +err_iwmr: >> + irdma_free_iwmr(iwmr); >> + >> +err_release: >> + ib_umem_release(&umem_dmabuf->umem); >> + >> + return ERR_PTR(err); >> +} >> + >> /** >> * irdma_reg_phys_mr - register kernel physical memory >> * @pd: ibpd pointer >> @@ -4483,6 +4527,7 @@ static const struct ib_device_ops irdma_dev_ops = { >> .query_port = irdma_query_port, >> .query_qp = irdma_query_qp, >> .reg_user_mr = irdma_reg_user_mr, >> + .reg_user_mr_dmabuf = irdma_reg_user_mr_dmabuf, >> .req_notify_cq = irdma_req_notify_cq, >> .resize_cq = irdma_resize_cq, >> INIT_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd), >> -- > Reviewed-by: Shiraz Saleem <shiraz.saleem@intel.com> Thanks. Zhu Yanjun >
On Fri, Feb 17, 2023 at 08:46:52AM +0800, Zhu Yanjun wrote: > > 在 2023/2/16 23:03, Jason Gunthorpe 写道: > > On Wed, Feb 01, 2023 at 11:21:15AM +0800, Zhu Yanjun wrote: > > > From: Zhu Yanjun <yanjun.zhu@linux.dev> > > > > > > This is a followup to the EFA dmabuf[1]. Irdma driver currently does > > > not support on-demand-paging(ODP). So it uses habanalabs as the > > > dmabuf exporter, and irdma as the importer to allow for peer2peer > > > access through libibverbs. > > > > > > In this commit, the function ib_umem_dmabuf_get_pinned() is used. > > > This function is introduced in EFA dmabuf[1] which allows the driver > > > to get a dmabuf umem which is pinned and does not require move_notify > > > callback implementation. The returned umem is pinned and DMA mapped > > > like standard cpu umems, and is released through ib_umem_release(). > > > > > > [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/ > > > > > > Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> > > > Reviewed-by: Shiraz Saleem <shiraz.saleem@intel.com> > > > --- > > > V2->V3: Remove unnecessary variable initialization; > > > Use error handler; > > > V1->V2: Thanks Shiraz Saleem, he gave me a lot of good suggestions. > > > This commit is based on the shared functions from refactored > > > irdma_reg_user_mr. > > > --- > > > drivers/infiniband/hw/irdma/verbs.c | 45 +++++++++++++++++++++++++++++ > > > 1 file changed, 45 insertions(+) > > > > > > diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c > > > index 6982f38596c8..7525f4cdf6fb 100644 > > > --- a/drivers/infiniband/hw/irdma/verbs.c > > > +++ b/drivers/infiniband/hw/irdma/verbs.c > > > @@ -2977,6 +2977,50 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, > > > return ERR_PTR(err); > > > } > > > +static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, > > > + u64 len, u64 virt, > > > + int fd, int access, > > > + struct ib_udata *udata) > > > +{ > > > + struct irdma_device *iwdev = to_iwdev(pd->device); > > > + struct ib_umem_dmabuf *umem_dmabuf; > > > + struct irdma_mr *iwmr; > > > + int err; > > > + > > > + if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) > > > + return ERR_PTR(-EINVAL); > > > + > > > + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) > > > + return ERR_PTR(-EINVAL); > > Shiraz is correct, I'm wondering how this even works. This is a new > > style uAPI without UVERBS_ATTR_UHW so inlen should always be 0. > > Got it. Thanks Shiraz and Jason. > > I will remove the test of inlen in the latest commit. Please answer how did you manage to test this? Jason
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 6982f38596c8..7525f4cdf6fb 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2977,6 +2977,50 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, return ERR_PTR(err); } +static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, + u64 len, u64 virt, + int fd, int access, + struct ib_udata *udata) +{ + struct irdma_device *iwdev = to_iwdev(pd->device); + struct ib_umem_dmabuf *umem_dmabuf; + struct irdma_mr *iwmr; + int err; + + if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) + return ERR_PTR(-EINVAL); + + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) + return ERR_PTR(-EINVAL); + + umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, access); + if (IS_ERR(umem_dmabuf)) { + err = PTR_ERR(umem_dmabuf); + ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err); + return ERR_PTR(err); + } + + iwmr = irdma_alloc_iwmr(&umem_dmabuf->umem, pd, virt, IRDMA_MEMREG_TYPE_MEM); + if (IS_ERR(iwmr)) { + err = PTR_ERR(iwmr); + goto err_release; + } + + err = irdma_reg_user_mr_type_mem(iwmr, access); + if (err) + goto err_iwmr; + + return &iwmr->ibmr; + +err_iwmr: + irdma_free_iwmr(iwmr); + +err_release: + ib_umem_release(&umem_dmabuf->umem); + + return ERR_PTR(err); +} + /** * irdma_reg_phys_mr - register kernel physical memory * @pd: ibpd pointer @@ -4483,6 +4527,7 @@ static const struct ib_device_ops irdma_dev_ops = { .query_port = irdma_query_port, .query_qp = irdma_query_qp, .reg_user_mr = irdma_reg_user_mr, + .reg_user_mr_dmabuf = irdma_reg_user_mr_dmabuf, .req_notify_cq = irdma_req_notify_cq, .resize_cq = irdma_resize_cq, INIT_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd),