Message ID | 20230103013433.341997-1-yanjun.zhu@intel.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [1/1] RDMA/irdma: Add support for dmabuf pin memory regions | expand |
Hi Zhu, I love your patch! Perhaps something to improve: [auto build test WARNING on rdma/for-next] [also build test WARNING on linus/master v6.2-rc2 next-20221226] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Zhu-Yanjun/RDMA-irdma-Add-support-for-dmabuf-pin-memory-regions/20230102-170926 base: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next patch link: https://lore.kernel.org/r/20230103013433.341997-1-yanjun.zhu%40intel.com patch subject: [PATCH 1/1] RDMA/irdma: Add support for dmabuf pin memory regions config: ia64-allyesconfig compiler: ia64-linux-gcc (GCC) 12.1.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/a8500f730889793bc8ad2a6116a31035fdd34c0e git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Zhu-Yanjun/RDMA-irdma-Add-support-for-dmabuf-pin-memory-regions/20230102-170926 git checkout a8500f730889793bc8ad2a6116a31035fdd34c0e # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=ia64 olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=ia64 SHELL=/bin/bash drivers/infiniband/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): >> drivers/infiniband/hw/irdma/verbs.c:2915:15: warning: no previous prototype for 'irdma_reg_user_mr_dmabuf' [-Wmissing-prototypes] 2915 | struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, | ^~~~~~~~~~~~~~~~~~~~~~~~ vim +/irdma_reg_user_mr_dmabuf +2915 drivers/infiniband/hw/irdma/verbs.c 2914 > 2915 struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, 2916 u64 len, u64 virt, 2917 int fd, int access, 2918 struct ib_udata *udata) 2919 { 2920 struct irdma_device *iwdev = to_iwdev(pd->device); 2921 struct irdma_ucontext *ucontext; 2922 struct irdma_pble_alloc *palloc; 2923 struct irdma_pbl *iwpbl; 2924 struct irdma_mr *iwmr; 2925 struct irdma_mem_reg_req req; 2926 u32 total, stag = 0; 2927 u8 shadow_pgcnt = 1; 2928 bool use_pbles = false; 2929 unsigned long flags; 2930 int err = -EINVAL; 2931 struct ib_umem_dmabuf *umem_dmabuf; 2932 2933 if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) 2934 return ERR_PTR(-EINVAL); 2935 2936 if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) 2937 return ERR_PTR(-EINVAL); 2938 2939 umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, 2940 access); 2941 if (IS_ERR(umem_dmabuf)) { 2942 err = PTR_ERR(umem_dmabuf); 2943 ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err); 2944 return ERR_PTR(err); 2945 } 2946 2947 if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) { 2948 ib_umem_release(&umem_dmabuf->umem); 2949 return ERR_PTR(-EFAULT); 2950 } 2951 2952 iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); 2953 if (!iwmr) { 2954 ib_umem_release(&umem_dmabuf->umem); 2955 return ERR_PTR(-ENOMEM); 2956 } 2957 2958 iwpbl = &iwmr->iwpbl; 2959 iwpbl->iwmr = iwmr; 2960 iwmr->region = &umem_dmabuf->umem; 2961 iwmr->ibmr.pd = pd; 2962 iwmr->ibmr.device = pd->device; 2963 iwmr->ibmr.iova = virt; 2964 iwmr->page_size = PAGE_SIZE; 2965 2966 if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) { 2967 iwmr->page_size = ib_umem_find_best_pgsz(iwmr->region, 2968 iwdev->rf->sc_dev.hw_attrs.page_size_cap, 2969 virt); 2970 if (unlikely(!iwmr->page_size)) { 2971 kfree(iwmr); 2972 ib_umem_release(iwmr->region); 2973 return ERR_PTR(-EOPNOTSUPP); 2974 } 2975 } 2976 iwmr->len = iwmr->region->length; 2977 iwpbl->user_base = virt; 2978 palloc = &iwpbl->pble_alloc; 2979 iwmr->type = req.reg_type; 2980 iwmr->page_cnt = ib_umem_num_dma_blocks(iwmr->region, iwmr->page_size); 2981 2982 switch (req.reg_type) { 2983 case IRDMA_MEMREG_TYPE_QP: 2984 total = req.sq_pages + req.rq_pages + shadow_pgcnt; 2985 if (total > iwmr->page_cnt) { 2986 err = -EINVAL; 2987 goto error; 2988 } 2989 total = req.sq_pages + req.rq_pages; 2990 use_pbles = (total > 2); 2991 err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles); 2992 if (err) 2993 goto error; 2994 2995 ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, 2996 ibucontext); 2997 spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); 2998 list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list); 2999 iwpbl->on_list = true; 3000 spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); 3001 break; 3002 case IRDMA_MEMREG_TYPE_CQ: 3003 if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE) 3004 shadow_pgcnt = 0; 3005 total = req.cq_pages + shadow_pgcnt; 3006 if (total > iwmr->page_cnt) { 3007 err = -EINVAL; 3008 goto error; 3009 } 3010 3011 use_pbles = (req.cq_pages > 1); 3012 err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles); 3013 if (err) 3014 goto error; 3015 3016 ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, 3017 ibucontext); 3018 spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); 3019 list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list); 3020 iwpbl->on_list = true; 3021 spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); 3022 break; 3023 case IRDMA_MEMREG_TYPE_MEM: 3024 use_pbles = (iwmr->page_cnt != 1); 3025 3026 err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false); 3027 if (err) 3028 goto error; 3029 3030 if (use_pbles) { 3031 err = irdma_check_mr_contiguous(palloc, 3032 iwmr->page_size); 3033 if (err) { 3034 irdma_free_pble(iwdev->rf->pble_rsrc, palloc); 3035 iwpbl->pbl_allocated = false; 3036 } 3037 } 3038 3039 stag = irdma_create_stag(iwdev); 3040 if (!stag) { 3041 err = -ENOMEM; 3042 goto error; 3043 } 3044 3045 iwmr->stag = stag; 3046 iwmr->ibmr.rkey = stag; 3047 iwmr->ibmr.lkey = stag; 3048 err = irdma_hwreg_mr(iwdev, iwmr, access); 3049 if (err) { 3050 irdma_free_stag(iwdev, stag); 3051 goto error; 3052 } 3053 3054 break; 3055 default: 3056 goto error; 3057 } 3058 3059 iwmr->type = req.reg_type; 3060 3061 return &iwmr->ibmr; 3062 3063 error: 3064 if (palloc->level != PBLE_LEVEL_0 && iwpbl->pbl_allocated) 3065 irdma_free_pble(iwdev->rf->pble_rsrc, palloc); 3066 ib_umem_release(iwmr->region); 3067 kfree(iwmr); 3068 3069 return ERR_PTR(err); 3070 } 3071
Hi Zhu, I love your patch! Perhaps something to improve: [auto build test WARNING on rdma/for-next] [also build test WARNING on linus/master v6.2-rc2 next-20221226] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Zhu-Yanjun/RDMA-irdma-Add-support-for-dmabuf-pin-memory-regions/20230102-170926 base: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next patch link: https://lore.kernel.org/r/20230103013433.341997-1-yanjun.zhu%40intel.com patch subject: [PATCH 1/1] RDMA/irdma: Add support for dmabuf pin memory regions config: i386-randconfig-a016-20230102 compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/a8500f730889793bc8ad2a6116a31035fdd34c0e git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Zhu-Yanjun/RDMA-irdma-Add-support-for-dmabuf-pin-memory-regions/20230102-170926 git checkout a8500f730889793bc8ad2a6116a31035fdd34c0e # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/infiniband/hw/irdma/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): >> drivers/infiniband/hw/irdma/verbs.c:2915:15: warning: no previous prototype for function 'irdma_reg_user_mr_dmabuf' [-Wmissing-prototypes] struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, ^ drivers/infiniband/hw/irdma/verbs.c:2915:1: note: declare 'static' if the function is not intended to be used outside of this translation unit struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, ^ static 1 warning generated. vim +/irdma_reg_user_mr_dmabuf +2915 drivers/infiniband/hw/irdma/verbs.c 2914 > 2915 struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, 2916 u64 len, u64 virt, 2917 int fd, int access, 2918 struct ib_udata *udata) 2919 { 2920 struct irdma_device *iwdev = to_iwdev(pd->device); 2921 struct irdma_ucontext *ucontext; 2922 struct irdma_pble_alloc *palloc; 2923 struct irdma_pbl *iwpbl; 2924 struct irdma_mr *iwmr; 2925 struct irdma_mem_reg_req req; 2926 u32 total, stag = 0; 2927 u8 shadow_pgcnt = 1; 2928 bool use_pbles = false; 2929 unsigned long flags; 2930 int err = -EINVAL; 2931 struct ib_umem_dmabuf *umem_dmabuf; 2932 2933 if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) 2934 return ERR_PTR(-EINVAL); 2935 2936 if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) 2937 return ERR_PTR(-EINVAL); 2938 2939 umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, 2940 access); 2941 if (IS_ERR(umem_dmabuf)) { 2942 err = PTR_ERR(umem_dmabuf); 2943 ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err); 2944 return ERR_PTR(err); 2945 } 2946 2947 if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) { 2948 ib_umem_release(&umem_dmabuf->umem); 2949 return ERR_PTR(-EFAULT); 2950 } 2951 2952 iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); 2953 if (!iwmr) { 2954 ib_umem_release(&umem_dmabuf->umem); 2955 return ERR_PTR(-ENOMEM); 2956 } 2957 2958 iwpbl = &iwmr->iwpbl; 2959 iwpbl->iwmr = iwmr; 2960 iwmr->region = &umem_dmabuf->umem; 2961 iwmr->ibmr.pd = pd; 2962 iwmr->ibmr.device = pd->device; 2963 iwmr->ibmr.iova = virt; 2964 iwmr->page_size = PAGE_SIZE; 2965 2966 if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) { 2967 iwmr->page_size = ib_umem_find_best_pgsz(iwmr->region, 2968 iwdev->rf->sc_dev.hw_attrs.page_size_cap, 2969 virt); 2970 if (unlikely(!iwmr->page_size)) { 2971 kfree(iwmr); 2972 ib_umem_release(iwmr->region); 2973 return ERR_PTR(-EOPNOTSUPP); 2974 } 2975 } 2976 iwmr->len = iwmr->region->length; 2977 iwpbl->user_base = virt; 2978 palloc = &iwpbl->pble_alloc; 2979 iwmr->type = req.reg_type; 2980 iwmr->page_cnt = ib_umem_num_dma_blocks(iwmr->region, iwmr->page_size); 2981 2982 switch (req.reg_type) { 2983 case IRDMA_MEMREG_TYPE_QP: 2984 total = req.sq_pages + req.rq_pages + shadow_pgcnt; 2985 if (total > iwmr->page_cnt) { 2986 err = -EINVAL; 2987 goto error; 2988 } 2989 total = req.sq_pages + req.rq_pages; 2990 use_pbles = (total > 2); 2991 err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles); 2992 if (err) 2993 goto error; 2994 2995 ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, 2996 ibucontext); 2997 spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); 2998 list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list); 2999 iwpbl->on_list = true; 3000 spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); 3001 break; 3002 case IRDMA_MEMREG_TYPE_CQ: 3003 if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE) 3004 shadow_pgcnt = 0; 3005 total = req.cq_pages + shadow_pgcnt; 3006 if (total > iwmr->page_cnt) { 3007 err = -EINVAL; 3008 goto error; 3009 } 3010 3011 use_pbles = (req.cq_pages > 1); 3012 err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles); 3013 if (err) 3014 goto error; 3015 3016 ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, 3017 ibucontext); 3018 spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); 3019 list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list); 3020 iwpbl->on_list = true; 3021 spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); 3022 break; 3023 case IRDMA_MEMREG_TYPE_MEM: 3024 use_pbles = (iwmr->page_cnt != 1); 3025 3026 err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false); 3027 if (err) 3028 goto error; 3029 3030 if (use_pbles) { 3031 err = irdma_check_mr_contiguous(palloc, 3032 iwmr->page_size); 3033 if (err) { 3034 irdma_free_pble(iwdev->rf->pble_rsrc, palloc); 3035 iwpbl->pbl_allocated = false; 3036 } 3037 } 3038 3039 stag = irdma_create_stag(iwdev); 3040 if (!stag) { 3041 err = -ENOMEM; 3042 goto error; 3043 } 3044 3045 iwmr->stag = stag; 3046 iwmr->ibmr.rkey = stag; 3047 iwmr->ibmr.lkey = stag; 3048 err = irdma_hwreg_mr(iwdev, iwmr, access); 3049 if (err) { 3050 irdma_free_stag(iwdev, stag); 3051 goto error; 3052 } 3053 3054 break; 3055 default: 3056 goto error; 3057 } 3058 3059 iwmr->type = req.reg_type; 3060 3061 return &iwmr->ibmr; 3062 3063 error: 3064 if (palloc->level != PBLE_LEVEL_0 && iwpbl->pbl_allocated) 3065 irdma_free_pble(iwdev->rf->pble_rsrc, palloc); 3066 ib_umem_release(iwmr->region); 3067 kfree(iwmr); 3068 3069 return ERR_PTR(err); 3070 } 3071
On Mon, Jan 02, 2023 at 08:34:33PM -0500, Zhu Yanjun wrote: > From: Zhu Yanjun <yanjun.zhu@linux.dev> > > This is a followup to the EFA dmabuf[1]. Irdma driver currently does > not support on-demand-paging(ODP). So it uses habanalabs as the > dmabuf exporter, and irdma as the importer to allow for peer2peer > access through libibverbs. > > In this commit, the function ib_umem_dmabuf_get_pinned() is used. > This function is introduced in EFA dmabuf[1] which allows the driver > to get a dmabuf umem which is pinned and does not require move_notify > callback implementation. The returned umem is pinned and DMA mapped > like standard cpu umems, and is released through ib_umem_release(). > > [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/ > > Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> > --- > drivers/infiniband/hw/irdma/verbs.c | 158 ++++++++++++++++++++++++++++ > 1 file changed, 158 insertions(+) > > diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c > index f6973ea55eda..76dc6e65930a 100644 > --- a/drivers/infiniband/hw/irdma/verbs.c > +++ b/drivers/infiniband/hw/irdma/verbs.c > @@ -2912,6 +2912,163 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, > return ERR_PTR(err); > } > > +struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, > + u64 len, u64 virt, > + int fd, int access, > + struct ib_udata *udata) kbuild complained about this line, it should be "static struct ..." And please use target in your patches: rdma-next/rdma-rc. Thanks
在 2023/1/3 17:37, Leon Romanovsky 写道: > On Mon, Jan 02, 2023 at 08:34:33PM -0500, Zhu Yanjun wrote: >> From: Zhu Yanjun <yanjun.zhu@linux.dev> >> >> This is a followup to the EFA dmabuf[1]. Irdma driver currently does >> not support on-demand-paging(ODP). So it uses habanalabs as the >> dmabuf exporter, and irdma as the importer to allow for peer2peer >> access through libibverbs. >> >> In this commit, the function ib_umem_dmabuf_get_pinned() is used. >> This function is introduced in EFA dmabuf[1] which allows the driver >> to get a dmabuf umem which is pinned and does not require move_notify >> callback implementation. The returned umem is pinned and DMA mapped >> like standard cpu umems, and is released through ib_umem_release(). >> >> [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/ >> >> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> >> --- >> drivers/infiniband/hw/irdma/verbs.c | 158 ++++++++++++++++++++++++++++ >> 1 file changed, 158 insertions(+) >> >> diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c >> index f6973ea55eda..76dc6e65930a 100644 >> --- a/drivers/infiniband/hw/irdma/verbs.c >> +++ b/drivers/infiniband/hw/irdma/verbs.c >> @@ -2912,6 +2912,163 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, >> return ERR_PTR(err); >> } >> >> +struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, >> + u64 len, u64 virt, >> + int fd, int access, >> + struct ib_udata *udata) > > kbuild complained about this line, it should be "static struct ..." > > And please use target in your patches: rdma-next/rdma-rc. static is added. Please check V2. Thanks and Regards, Zhu Yanjun > > Thanks
On Mon, Jan 02, 2023 at 08:34:33PM -0500, Zhu Yanjun wrote: > From: Zhu Yanjun <yanjun.zhu@linux.dev> > > This is a followup to the EFA dmabuf[1]. Irdma driver currently does > not support on-demand-paging(ODP). So it uses habanalabs as the > dmabuf exporter, and irdma as the importer to allow for peer2peer > access through libibverbs. > > In this commit, the function ib_umem_dmabuf_get_pinned() is used. > This function is introduced in EFA dmabuf[1] which allows the driver > to get a dmabuf umem which is pinned and does not require move_notify > callback implementation. The returned umem is pinned and DMA mapped > like standard cpu umems, and is released through ib_umem_release(). > > [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/ > > Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> > --- > drivers/infiniband/hw/irdma/verbs.c | 158 ++++++++++++++++++++++++++++ > 1 file changed, 158 insertions(+) > > diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c > index f6973ea55eda..76dc6e65930a 100644 > --- a/drivers/infiniband/hw/irdma/verbs.c > +++ b/drivers/infiniband/hw/irdma/verbs.c > @@ -2912,6 +2912,163 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, > return ERR_PTR(err); > } > > +struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, > + u64 len, u64 virt, > + int fd, int access, > + struct ib_udata *udata) > +{ > + struct irdma_device *iwdev = to_iwdev(pd->device); > + struct irdma_ucontext *ucontext; > + struct irdma_pble_alloc *palloc; > + struct irdma_pbl *iwpbl; > + struct irdma_mr *iwmr; > + struct irdma_mem_reg_req req; > + u32 total, stag = 0; > + u8 shadow_pgcnt = 1; > + bool use_pbles = false; > + unsigned long flags; > + int err = -EINVAL; > + struct ib_umem_dmabuf *umem_dmabuf; > + > + if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) > + return ERR_PTR(-EINVAL); > + > + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) > + return ERR_PTR(-EINVAL); > + > + umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, > + access); > + if (IS_ERR(umem_dmabuf)) { > + err = PTR_ERR(umem_dmabuf); > + ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err); > + return ERR_PTR(err); > + } > + > + if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) { > + ib_umem_release(&umem_dmabuf->umem); > + return ERR_PTR(-EFAULT); > + } > + > + iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); > + if (!iwmr) { > + ib_umem_release(&umem_dmabuf->umem); > + return ERR_PTR(-ENOMEM); > + } > + > + iwpbl = &iwmr->iwpbl; > + iwpbl->iwmr = iwmr; > + iwmr->region = &umem_dmabuf->umem; > + iwmr->ibmr.pd = pd; > + iwmr->ibmr.device = pd->device; > + iwmr->ibmr.iova = virt; > + iwmr->page_size = PAGE_SIZE; > + > + if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) { > + iwmr->page_size = ib_umem_find_best_pgsz(iwmr->region, > + iwdev->rf->sc_dev.hw_attrs.page_size_cap, > + virt); You can't call rdma_umem_for_each_dma_block() without also calling this function to validate that the page_size passed to rdma_umem_for_each_dma_block() is correct. This seems to be an existing bug, please fix it. Also, is there a reason this code is all duplicated from irdma_reg_user_mr? Please split things up like the other drivers to obtain the umem then use shared code to process the umem as required. Jason
在 2023/1/4 7:11, Jason Gunthorpe 写道: > On Mon, Jan 02, 2023 at 08:34:33PM -0500, Zhu Yanjun wrote: >> From: Zhu Yanjun <yanjun.zhu@linux.dev> >> >> This is a followup to the EFA dmabuf[1]. Irdma driver currently does >> not support on-demand-paging(ODP). So it uses habanalabs as the >> dmabuf exporter, and irdma as the importer to allow for peer2peer >> access through libibverbs. >> >> In this commit, the function ib_umem_dmabuf_get_pinned() is used. >> This function is introduced in EFA dmabuf[1] which allows the driver >> to get a dmabuf umem which is pinned and does not require move_notify >> callback implementation. The returned umem is pinned and DMA mapped >> like standard cpu umems, and is released through ib_umem_release(). >> >> [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/ >> >> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> >> --- >> drivers/infiniband/hw/irdma/verbs.c | 158 ++++++++++++++++++++++++++++ >> 1 file changed, 158 insertions(+) >> >> diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c >> index f6973ea55eda..76dc6e65930a 100644 >> --- a/drivers/infiniband/hw/irdma/verbs.c >> +++ b/drivers/infiniband/hw/irdma/verbs.c >> @@ -2912,6 +2912,163 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, >> return ERR_PTR(err); >> } >> >> +struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, >> + u64 len, u64 virt, >> + int fd, int access, >> + struct ib_udata *udata) >> +{ >> + struct irdma_device *iwdev = to_iwdev(pd->device); >> + struct irdma_ucontext *ucontext; >> + struct irdma_pble_alloc *palloc; >> + struct irdma_pbl *iwpbl; >> + struct irdma_mr *iwmr; >> + struct irdma_mem_reg_req req; >> + u32 total, stag = 0; >> + u8 shadow_pgcnt = 1; >> + bool use_pbles = false; >> + unsigned long flags; >> + int err = -EINVAL; >> + struct ib_umem_dmabuf *umem_dmabuf; >> + >> + if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) >> + return ERR_PTR(-EINVAL); >> + >> + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) >> + return ERR_PTR(-EINVAL); >> + >> + umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, >> + access); >> + if (IS_ERR(umem_dmabuf)) { >> + err = PTR_ERR(umem_dmabuf); >> + ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err); >> + return ERR_PTR(err); >> + } >> + >> + if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) { >> + ib_umem_release(&umem_dmabuf->umem); >> + return ERR_PTR(-EFAULT); >> + } >> + >> + iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); >> + if (!iwmr) { >> + ib_umem_release(&umem_dmabuf->umem); >> + return ERR_PTR(-ENOMEM); >> + } >> + >> + iwpbl = &iwmr->iwpbl; >> + iwpbl->iwmr = iwmr; >> + iwmr->region = &umem_dmabuf->umem; >> + iwmr->ibmr.pd = pd; >> + iwmr->ibmr.device = pd->device; >> + iwmr->ibmr.iova = virt; >> + iwmr->page_size = PAGE_SIZE; >> + >> + if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) { >> + iwmr->page_size = ib_umem_find_best_pgsz(iwmr->region, >> + iwdev->rf->sc_dev.hw_attrs.page_size_cap, >> + virt); > You can't call rdma_umem_for_each_dma_block() without also calling > this function to validate that the page_size passed to > rdma_umem_for_each_dma_block() is correct. Got it. I will fix it in the latest commit. > > This seems to be an existing bug, please fix it. > > Also, is there a reason this code is all duplicated from > irdma_reg_user_mr? Please split things up like the other drivers to > obtain the umem then use shared code to process the umem as required. Got it. Zhu Yanjun > > Jason
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index f6973ea55eda..76dc6e65930a 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2912,6 +2912,163 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, return ERR_PTR(err); } +struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, + u64 len, u64 virt, + int fd, int access, + struct ib_udata *udata) +{ + struct irdma_device *iwdev = to_iwdev(pd->device); + struct irdma_ucontext *ucontext; + struct irdma_pble_alloc *palloc; + struct irdma_pbl *iwpbl; + struct irdma_mr *iwmr; + struct irdma_mem_reg_req req; + u32 total, stag = 0; + u8 shadow_pgcnt = 1; + bool use_pbles = false; + unsigned long flags; + int err = -EINVAL; + struct ib_umem_dmabuf *umem_dmabuf; + + if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) + return ERR_PTR(-EINVAL); + + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) + return ERR_PTR(-EINVAL); + + umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, + access); + if (IS_ERR(umem_dmabuf)) { + err = PTR_ERR(umem_dmabuf); + ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err); + return ERR_PTR(err); + } + + if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) { + ib_umem_release(&umem_dmabuf->umem); + return ERR_PTR(-EFAULT); + } + + iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); + if (!iwmr) { + ib_umem_release(&umem_dmabuf->umem); + return ERR_PTR(-ENOMEM); + } + + iwpbl = &iwmr->iwpbl; + iwpbl->iwmr = iwmr; + iwmr->region = &umem_dmabuf->umem; + iwmr->ibmr.pd = pd; + iwmr->ibmr.device = pd->device; + iwmr->ibmr.iova = virt; + iwmr->page_size = PAGE_SIZE; + + if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) { + iwmr->page_size = ib_umem_find_best_pgsz(iwmr->region, + iwdev->rf->sc_dev.hw_attrs.page_size_cap, + virt); + if (unlikely(!iwmr->page_size)) { + kfree(iwmr); + ib_umem_release(iwmr->region); + return ERR_PTR(-EOPNOTSUPP); + } + } + iwmr->len = iwmr->region->length; + iwpbl->user_base = virt; + palloc = &iwpbl->pble_alloc; + iwmr->type = req.reg_type; + iwmr->page_cnt = ib_umem_num_dma_blocks(iwmr->region, iwmr->page_size); + + switch (req.reg_type) { + case IRDMA_MEMREG_TYPE_QP: + total = req.sq_pages + req.rq_pages + shadow_pgcnt; + if (total > iwmr->page_cnt) { + err = -EINVAL; + goto error; + } + total = req.sq_pages + req.rq_pages; + use_pbles = (total > 2); + err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles); + if (err) + goto error; + + ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, + ibucontext); + spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); + list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list); + iwpbl->on_list = true; + spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); + break; + case IRDMA_MEMREG_TYPE_CQ: + if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE) + shadow_pgcnt = 0; + total = req.cq_pages + shadow_pgcnt; + if (total > iwmr->page_cnt) { + err = -EINVAL; + goto error; + } + + use_pbles = (req.cq_pages > 1); + err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles); + if (err) + goto error; + + ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, + ibucontext); + spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); + list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list); + iwpbl->on_list = true; + spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); + break; + case IRDMA_MEMREG_TYPE_MEM: + use_pbles = (iwmr->page_cnt != 1); + + err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false); + if (err) + goto error; + + if (use_pbles) { + err = irdma_check_mr_contiguous(palloc, + iwmr->page_size); + if (err) { + irdma_free_pble(iwdev->rf->pble_rsrc, palloc); + iwpbl->pbl_allocated = false; + } + } + + stag = irdma_create_stag(iwdev); + if (!stag) { + err = -ENOMEM; + goto error; + } + + iwmr->stag = stag; + iwmr->ibmr.rkey = stag; + iwmr->ibmr.lkey = stag; + err = irdma_hwreg_mr(iwdev, iwmr, access); + if (err) { + irdma_free_stag(iwdev, stag); + goto error; + } + + break; + default: + goto error; + } + + iwmr->type = req.reg_type; + + return &iwmr->ibmr; + +error: + if (palloc->level != PBLE_LEVEL_0 && iwpbl->pbl_allocated) + irdma_free_pble(iwdev->rf->pble_rsrc, palloc); + ib_umem_release(iwmr->region); + kfree(iwmr); + + return ERR_PTR(err); +} + /** * irdma_reg_phys_mr - register kernel physical memory * @pd: ibpd pointer @@ -4418,6 +4575,7 @@ static const struct ib_device_ops irdma_dev_ops = { .query_port = irdma_query_port, .query_qp = irdma_query_qp, .reg_user_mr = irdma_reg_user_mr, + .reg_user_mr_dmabuf = irdma_reg_user_mr_dmabuf, .req_notify_cq = irdma_req_notify_cq, .resize_cq = irdma_resize_cq, INIT_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd),