diff mbox series

[1/1] RDMA/irdma: Add support for dmabuf pin memory regions

Message ID 20230103013433.341997-1-yanjun.zhu@intel.com (mailing list archive)
State Superseded
Headers show
Series [1/1] RDMA/irdma: Add support for dmabuf pin memory regions | expand

Commit Message

Zhu Yanjun Jan. 3, 2023, 1:34 a.m. UTC
From: Zhu Yanjun <yanjun.zhu@linux.dev>

This is a followup to the EFA dmabuf[1]. Irdma driver currently does
not support on-demand-paging(ODP). So it uses habanalabs as the
dmabuf exporter, and irdma as the importer to allow for peer2peer
access through libibverbs.

In this commit, the function ib_umem_dmabuf_get_pinned() is used.
This function is introduced in EFA dmabuf[1] which allows the driver
to get a dmabuf umem which is pinned and does not require move_notify
callback implementation. The returned umem is pinned and DMA mapped
like standard cpu umems, and is released through ib_umem_release().

[1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/

Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
---
 drivers/infiniband/hw/irdma/verbs.c | 158 ++++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)

Comments

kernel test robot Jan. 2, 2023, 10:39 a.m. UTC | #1
Hi Zhu,

I love your patch! Perhaps something to improve:

[auto build test WARNING on rdma/for-next]
[also build test WARNING on linus/master v6.2-rc2 next-20221226]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Zhu-Yanjun/RDMA-irdma-Add-support-for-dmabuf-pin-memory-regions/20230102-170926
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next
patch link:    https://lore.kernel.org/r/20230103013433.341997-1-yanjun.zhu%40intel.com
patch subject: [PATCH 1/1] RDMA/irdma: Add support for dmabuf pin memory regions
config: ia64-allyesconfig
compiler: ia64-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/a8500f730889793bc8ad2a6116a31035fdd34c0e
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Zhu-Yanjun/RDMA-irdma-Add-support-for-dmabuf-pin-memory-regions/20230102-170926
        git checkout a8500f730889793bc8ad2a6116a31035fdd34c0e
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=ia64 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=ia64 SHELL=/bin/bash drivers/infiniband/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/infiniband/hw/irdma/verbs.c:2915:15: warning: no previous prototype for 'irdma_reg_user_mr_dmabuf' [-Wmissing-prototypes]
    2915 | struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
         |               ^~~~~~~~~~~~~~~~~~~~~~~~


vim +/irdma_reg_user_mr_dmabuf +2915 drivers/infiniband/hw/irdma/verbs.c

  2914	
> 2915	struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
  2916					       u64 len, u64 virt,
  2917					       int fd, int access,
  2918					       struct ib_udata *udata)
  2919	{
  2920		struct irdma_device *iwdev = to_iwdev(pd->device);
  2921		struct irdma_ucontext *ucontext;
  2922		struct irdma_pble_alloc *palloc;
  2923		struct irdma_pbl *iwpbl;
  2924		struct irdma_mr *iwmr;
  2925		struct irdma_mem_reg_req req;
  2926		u32 total, stag = 0;
  2927		u8 shadow_pgcnt = 1;
  2928		bool use_pbles = false;
  2929		unsigned long flags;
  2930		int err = -EINVAL;
  2931		struct ib_umem_dmabuf *umem_dmabuf;
  2932	
  2933		if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
  2934			return ERR_PTR(-EINVAL);
  2935	
  2936		if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN)
  2937			return ERR_PTR(-EINVAL);
  2938	
  2939		umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd,
  2940							access);
  2941		if (IS_ERR(umem_dmabuf)) {
  2942			err = PTR_ERR(umem_dmabuf);
  2943			ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err);
  2944			return ERR_PTR(err);
  2945		}
  2946	
  2947		if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) {
  2948			ib_umem_release(&umem_dmabuf->umem);
  2949			return ERR_PTR(-EFAULT);
  2950		}
  2951	
  2952		iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
  2953		if (!iwmr) {
  2954			ib_umem_release(&umem_dmabuf->umem);
  2955			return ERR_PTR(-ENOMEM);
  2956		}
  2957	
  2958		iwpbl = &iwmr->iwpbl;
  2959		iwpbl->iwmr = iwmr;
  2960		iwmr->region = &umem_dmabuf->umem;
  2961		iwmr->ibmr.pd = pd;
  2962		iwmr->ibmr.device = pd->device;
  2963		iwmr->ibmr.iova = virt;
  2964		iwmr->page_size = PAGE_SIZE;
  2965	
  2966		if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) {
  2967			iwmr->page_size = ib_umem_find_best_pgsz(iwmr->region,
  2968								 iwdev->rf->sc_dev.hw_attrs.page_size_cap,
  2969								 virt);
  2970			if (unlikely(!iwmr->page_size)) {
  2971				kfree(iwmr);
  2972				ib_umem_release(iwmr->region);
  2973				return ERR_PTR(-EOPNOTSUPP);
  2974			}
  2975		}
  2976		iwmr->len = iwmr->region->length;
  2977		iwpbl->user_base = virt;
  2978		palloc = &iwpbl->pble_alloc;
  2979		iwmr->type = req.reg_type;
  2980		iwmr->page_cnt = ib_umem_num_dma_blocks(iwmr->region, iwmr->page_size);
  2981	
  2982		switch (req.reg_type) {
  2983		case IRDMA_MEMREG_TYPE_QP:
  2984			total = req.sq_pages + req.rq_pages + shadow_pgcnt;
  2985			if (total > iwmr->page_cnt) {
  2986				err = -EINVAL;
  2987				goto error;
  2988			}
  2989			total = req.sq_pages + req.rq_pages;
  2990			use_pbles = (total > 2);
  2991			err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
  2992			if (err)
  2993				goto error;
  2994	
  2995			ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
  2996							     ibucontext);
  2997			spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
  2998			list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
  2999			iwpbl->on_list = true;
  3000			spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
  3001			break;
  3002		case IRDMA_MEMREG_TYPE_CQ:
  3003			if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE)
  3004				shadow_pgcnt = 0;
  3005			total = req.cq_pages + shadow_pgcnt;
  3006			if (total > iwmr->page_cnt) {
  3007				err = -EINVAL;
  3008				goto error;
  3009			}
  3010	
  3011			use_pbles = (req.cq_pages > 1);
  3012			err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
  3013			if (err)
  3014				goto error;
  3015	
  3016			ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
  3017							     ibucontext);
  3018			spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
  3019			list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
  3020			iwpbl->on_list = true;
  3021			spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
  3022			break;
  3023		case IRDMA_MEMREG_TYPE_MEM:
  3024			use_pbles = (iwmr->page_cnt != 1);
  3025	
  3026			err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false);
  3027			if (err)
  3028				goto error;
  3029	
  3030			if (use_pbles) {
  3031				err = irdma_check_mr_contiguous(palloc,
  3032								iwmr->page_size);
  3033				if (err) {
  3034					irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
  3035					iwpbl->pbl_allocated = false;
  3036				}
  3037			}
  3038	
  3039			stag = irdma_create_stag(iwdev);
  3040			if (!stag) {
  3041				err = -ENOMEM;
  3042				goto error;
  3043			}
  3044	
  3045			iwmr->stag = stag;
  3046			iwmr->ibmr.rkey = stag;
  3047			iwmr->ibmr.lkey = stag;
  3048			err = irdma_hwreg_mr(iwdev, iwmr, access);
  3049			if (err) {
  3050				irdma_free_stag(iwdev, stag);
  3051				goto error;
  3052			}
  3053	
  3054			break;
  3055		default:
  3056			goto error;
  3057		}
  3058	
  3059		iwmr->type = req.reg_type;
  3060	
  3061		return &iwmr->ibmr;
  3062	
  3063	error:
  3064		if (palloc->level != PBLE_LEVEL_0 && iwpbl->pbl_allocated)
  3065			irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
  3066		ib_umem_release(iwmr->region);
  3067		kfree(iwmr);
  3068	
  3069		return ERR_PTR(err);
  3070	}
  3071
kernel test robot Jan. 2, 2023, 12:40 p.m. UTC | #2
Hi Zhu,

I love your patch! Perhaps something to improve:

[auto build test WARNING on rdma/for-next]
[also build test WARNING on linus/master v6.2-rc2 next-20221226]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Zhu-Yanjun/RDMA-irdma-Add-support-for-dmabuf-pin-memory-regions/20230102-170926
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next
patch link:    https://lore.kernel.org/r/20230103013433.341997-1-yanjun.zhu%40intel.com
patch subject: [PATCH 1/1] RDMA/irdma: Add support for dmabuf pin memory regions
config: i386-randconfig-a016-20230102
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/a8500f730889793bc8ad2a6116a31035fdd34c0e
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Zhu-Yanjun/RDMA-irdma-Add-support-for-dmabuf-pin-memory-regions/20230102-170926
        git checkout a8500f730889793bc8ad2a6116a31035fdd34c0e
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/infiniband/hw/irdma/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/infiniband/hw/irdma/verbs.c:2915:15: warning: no previous prototype for function 'irdma_reg_user_mr_dmabuf' [-Wmissing-prototypes]
   struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
                 ^
   drivers/infiniband/hw/irdma/verbs.c:2915:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
   ^
   static 
   1 warning generated.


vim +/irdma_reg_user_mr_dmabuf +2915 drivers/infiniband/hw/irdma/verbs.c

  2914	
> 2915	struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
  2916					       u64 len, u64 virt,
  2917					       int fd, int access,
  2918					       struct ib_udata *udata)
  2919	{
  2920		struct irdma_device *iwdev = to_iwdev(pd->device);
  2921		struct irdma_ucontext *ucontext;
  2922		struct irdma_pble_alloc *palloc;
  2923		struct irdma_pbl *iwpbl;
  2924		struct irdma_mr *iwmr;
  2925		struct irdma_mem_reg_req req;
  2926		u32 total, stag = 0;
  2927		u8 shadow_pgcnt = 1;
  2928		bool use_pbles = false;
  2929		unsigned long flags;
  2930		int err = -EINVAL;
  2931		struct ib_umem_dmabuf *umem_dmabuf;
  2932	
  2933		if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
  2934			return ERR_PTR(-EINVAL);
  2935	
  2936		if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN)
  2937			return ERR_PTR(-EINVAL);
  2938	
  2939		umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd,
  2940							access);
  2941		if (IS_ERR(umem_dmabuf)) {
  2942			err = PTR_ERR(umem_dmabuf);
  2943			ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err);
  2944			return ERR_PTR(err);
  2945		}
  2946	
  2947		if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) {
  2948			ib_umem_release(&umem_dmabuf->umem);
  2949			return ERR_PTR(-EFAULT);
  2950		}
  2951	
  2952		iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
  2953		if (!iwmr) {
  2954			ib_umem_release(&umem_dmabuf->umem);
  2955			return ERR_PTR(-ENOMEM);
  2956		}
  2957	
  2958		iwpbl = &iwmr->iwpbl;
  2959		iwpbl->iwmr = iwmr;
  2960		iwmr->region = &umem_dmabuf->umem;
  2961		iwmr->ibmr.pd = pd;
  2962		iwmr->ibmr.device = pd->device;
  2963		iwmr->ibmr.iova = virt;
  2964		iwmr->page_size = PAGE_SIZE;
  2965	
  2966		if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) {
  2967			iwmr->page_size = ib_umem_find_best_pgsz(iwmr->region,
  2968								 iwdev->rf->sc_dev.hw_attrs.page_size_cap,
  2969								 virt);
  2970			if (unlikely(!iwmr->page_size)) {
  2971				kfree(iwmr);
  2972				ib_umem_release(iwmr->region);
  2973				return ERR_PTR(-EOPNOTSUPP);
  2974			}
  2975		}
  2976		iwmr->len = iwmr->region->length;
  2977		iwpbl->user_base = virt;
  2978		palloc = &iwpbl->pble_alloc;
  2979		iwmr->type = req.reg_type;
  2980		iwmr->page_cnt = ib_umem_num_dma_blocks(iwmr->region, iwmr->page_size);
  2981	
  2982		switch (req.reg_type) {
  2983		case IRDMA_MEMREG_TYPE_QP:
  2984			total = req.sq_pages + req.rq_pages + shadow_pgcnt;
  2985			if (total > iwmr->page_cnt) {
  2986				err = -EINVAL;
  2987				goto error;
  2988			}
  2989			total = req.sq_pages + req.rq_pages;
  2990			use_pbles = (total > 2);
  2991			err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
  2992			if (err)
  2993				goto error;
  2994	
  2995			ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
  2996							     ibucontext);
  2997			spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
  2998			list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
  2999			iwpbl->on_list = true;
  3000			spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
  3001			break;
  3002		case IRDMA_MEMREG_TYPE_CQ:
  3003			if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE)
  3004				shadow_pgcnt = 0;
  3005			total = req.cq_pages + shadow_pgcnt;
  3006			if (total > iwmr->page_cnt) {
  3007				err = -EINVAL;
  3008				goto error;
  3009			}
  3010	
  3011			use_pbles = (req.cq_pages > 1);
  3012			err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
  3013			if (err)
  3014				goto error;
  3015	
  3016			ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
  3017							     ibucontext);
  3018			spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
  3019			list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
  3020			iwpbl->on_list = true;
  3021			spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
  3022			break;
  3023		case IRDMA_MEMREG_TYPE_MEM:
  3024			use_pbles = (iwmr->page_cnt != 1);
  3025	
  3026			err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false);
  3027			if (err)
  3028				goto error;
  3029	
  3030			if (use_pbles) {
  3031				err = irdma_check_mr_contiguous(palloc,
  3032								iwmr->page_size);
  3033				if (err) {
  3034					irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
  3035					iwpbl->pbl_allocated = false;
  3036				}
  3037			}
  3038	
  3039			stag = irdma_create_stag(iwdev);
  3040			if (!stag) {
  3041				err = -ENOMEM;
  3042				goto error;
  3043			}
  3044	
  3045			iwmr->stag = stag;
  3046			iwmr->ibmr.rkey = stag;
  3047			iwmr->ibmr.lkey = stag;
  3048			err = irdma_hwreg_mr(iwdev, iwmr, access);
  3049			if (err) {
  3050				irdma_free_stag(iwdev, stag);
  3051				goto error;
  3052			}
  3053	
  3054			break;
  3055		default:
  3056			goto error;
  3057		}
  3058	
  3059		iwmr->type = req.reg_type;
  3060	
  3061		return &iwmr->ibmr;
  3062	
  3063	error:
  3064		if (palloc->level != PBLE_LEVEL_0 && iwpbl->pbl_allocated)
  3065			irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
  3066		ib_umem_release(iwmr->region);
  3067		kfree(iwmr);
  3068	
  3069		return ERR_PTR(err);
  3070	}
  3071
Leon Romanovsky Jan. 3, 2023, 9:37 a.m. UTC | #3
On Mon, Jan 02, 2023 at 08:34:33PM -0500, Zhu Yanjun wrote:
> From: Zhu Yanjun <yanjun.zhu@linux.dev>
> 
> This is a followup to the EFA dmabuf[1]. Irdma driver currently does
> not support on-demand-paging(ODP). So it uses habanalabs as the
> dmabuf exporter, and irdma as the importer to allow for peer2peer
> access through libibverbs.
> 
> In this commit, the function ib_umem_dmabuf_get_pinned() is used.
> This function is introduced in EFA dmabuf[1] which allows the driver
> to get a dmabuf umem which is pinned and does not require move_notify
> callback implementation. The returned umem is pinned and DMA mapped
> like standard cpu umems, and is released through ib_umem_release().
> 
> [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/
> 
> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
> ---
>  drivers/infiniband/hw/irdma/verbs.c | 158 ++++++++++++++++++++++++++++
>  1 file changed, 158 insertions(+)
> 
> diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
> index f6973ea55eda..76dc6e65930a 100644
> --- a/drivers/infiniband/hw/irdma/verbs.c
> +++ b/drivers/infiniband/hw/irdma/verbs.c
> @@ -2912,6 +2912,163 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
>  	return ERR_PTR(err);
>  }
>  
> +struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
> +				       u64 len, u64 virt,
> +				       int fd, int access,
> +				       struct ib_udata *udata)

kbuild complained about this line, it should be "static struct ..."

And please use target in your patches: rdma-next/rdma-rc.

Thanks
Zhu Yanjun Jan. 3, 2023, 1:44 p.m. UTC | #4
在 2023/1/3 17:37, Leon Romanovsky 写道:
> On Mon, Jan 02, 2023 at 08:34:33PM -0500, Zhu Yanjun wrote:
>> From: Zhu Yanjun <yanjun.zhu@linux.dev>
>>
>> This is a followup to the EFA dmabuf[1]. Irdma driver currently does
>> not support on-demand-paging(ODP). So it uses habanalabs as the
>> dmabuf exporter, and irdma as the importer to allow for peer2peer
>> access through libibverbs.
>>
>> In this commit, the function ib_umem_dmabuf_get_pinned() is used.
>> This function is introduced in EFA dmabuf[1] which allows the driver
>> to get a dmabuf umem which is pinned and does not require move_notify
>> callback implementation. The returned umem is pinned and DMA mapped
>> like standard cpu umems, and is released through ib_umem_release().
>>
>> [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/
>>
>> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
>> ---
>>   drivers/infiniband/hw/irdma/verbs.c | 158 ++++++++++++++++++++++++++++
>>   1 file changed, 158 insertions(+)
>>
>> diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
>> index f6973ea55eda..76dc6e65930a 100644
>> --- a/drivers/infiniband/hw/irdma/verbs.c
>> +++ b/drivers/infiniband/hw/irdma/verbs.c
>> @@ -2912,6 +2912,163 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
>>   	return ERR_PTR(err);
>>   }
>>   
>> +struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
>> +				       u64 len, u64 virt,
>> +				       int fd, int access,
>> +				       struct ib_udata *udata)
> 
> kbuild complained about this line, it should be "static struct ..."
> 
> And please use target in your patches: rdma-next/rdma-rc.

static is added.

Please check V2.

Thanks and Regards,

Zhu Yanjun

> 
> Thanks
Jason Gunthorpe Jan. 3, 2023, 11:11 p.m. UTC | #5
On Mon, Jan 02, 2023 at 08:34:33PM -0500, Zhu Yanjun wrote:
> From: Zhu Yanjun <yanjun.zhu@linux.dev>
> 
> This is a followup to the EFA dmabuf[1]. Irdma driver currently does
> not support on-demand-paging(ODP). So it uses habanalabs as the
> dmabuf exporter, and irdma as the importer to allow for peer2peer
> access through libibverbs.
> 
> In this commit, the function ib_umem_dmabuf_get_pinned() is used.
> This function is introduced in EFA dmabuf[1] which allows the driver
> to get a dmabuf umem which is pinned and does not require move_notify
> callback implementation. The returned umem is pinned and DMA mapped
> like standard cpu umems, and is released through ib_umem_release().
> 
> [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/
> 
> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
> ---
>  drivers/infiniband/hw/irdma/verbs.c | 158 ++++++++++++++++++++++++++++
>  1 file changed, 158 insertions(+)
> 
> diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
> index f6973ea55eda..76dc6e65930a 100644
> --- a/drivers/infiniband/hw/irdma/verbs.c
> +++ b/drivers/infiniband/hw/irdma/verbs.c
> @@ -2912,6 +2912,163 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
>  	return ERR_PTR(err);
>  }
>  
> +struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
> +				       u64 len, u64 virt,
> +				       int fd, int access,
> +				       struct ib_udata *udata)
> +{
> +	struct irdma_device *iwdev = to_iwdev(pd->device);
> +	struct irdma_ucontext *ucontext;
> +	struct irdma_pble_alloc *palloc;
> +	struct irdma_pbl *iwpbl;
> +	struct irdma_mr *iwmr;
> +	struct irdma_mem_reg_req req;
> +	u32 total, stag = 0;
> +	u8 shadow_pgcnt = 1;
> +	bool use_pbles = false;
> +	unsigned long flags;
> +	int err = -EINVAL;
> +	struct ib_umem_dmabuf *umem_dmabuf;
> +
> +	if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
> +		return ERR_PTR(-EINVAL);
> +
> +	if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN)
> +		return ERR_PTR(-EINVAL);
> +
> +	umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd,
> +						access);
> +	if (IS_ERR(umem_dmabuf)) {
> +		err = PTR_ERR(umem_dmabuf);
> +		ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err);
> +		return ERR_PTR(err);
> +	}
> +
> +	if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) {
> +		ib_umem_release(&umem_dmabuf->umem);
> +		return ERR_PTR(-EFAULT);
> +	}
> +
> +	iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
> +	if (!iwmr) {
> +		ib_umem_release(&umem_dmabuf->umem);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	iwpbl = &iwmr->iwpbl;
> +	iwpbl->iwmr = iwmr;
> +	iwmr->region = &umem_dmabuf->umem;
> +	iwmr->ibmr.pd = pd;
> +	iwmr->ibmr.device = pd->device;
> +	iwmr->ibmr.iova = virt;
> +	iwmr->page_size = PAGE_SIZE;
> +
> +	if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) {
> +		iwmr->page_size = ib_umem_find_best_pgsz(iwmr->region,
> +							 iwdev->rf->sc_dev.hw_attrs.page_size_cap,
> +							 virt);

You can't call rdma_umem_for_each_dma_block() without also calling
this function to validate that the page_size passed to
rdma_umem_for_each_dma_block() is correct.

This seems to be an existing bug, please fix it.

Also, is there a reason this code is all duplicated from
irdma_reg_user_mr? Please split things up like the other drivers to
obtain the umem then use shared code to process the umem as required.

Jason
Zhu Yanjun Jan. 4, 2023, 12:58 p.m. UTC | #6
在 2023/1/4 7:11, Jason Gunthorpe 写道:
> On Mon, Jan 02, 2023 at 08:34:33PM -0500, Zhu Yanjun wrote:
>> From: Zhu Yanjun <yanjun.zhu@linux.dev>
>>
>> This is a followup to the EFA dmabuf[1]. Irdma driver currently does
>> not support on-demand-paging(ODP). So it uses habanalabs as the
>> dmabuf exporter, and irdma as the importer to allow for peer2peer
>> access through libibverbs.
>>
>> In this commit, the function ib_umem_dmabuf_get_pinned() is used.
>> This function is introduced in EFA dmabuf[1] which allows the driver
>> to get a dmabuf umem which is pinned and does not require move_notify
>> callback implementation. The returned umem is pinned and DMA mapped
>> like standard cpu umems, and is released through ib_umem_release().
>>
>> [1]https://lore.kernel.org/lkml/20211007114018.GD2688930@ziepe.ca/t/
>>
>> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
>> ---
>>   drivers/infiniband/hw/irdma/verbs.c | 158 ++++++++++++++++++++++++++++
>>   1 file changed, 158 insertions(+)
>>
>> diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
>> index f6973ea55eda..76dc6e65930a 100644
>> --- a/drivers/infiniband/hw/irdma/verbs.c
>> +++ b/drivers/infiniband/hw/irdma/verbs.c
>> @@ -2912,6 +2912,163 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
>>   	return ERR_PTR(err);
>>   }
>>   
>> +struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
>> +				       u64 len, u64 virt,
>> +				       int fd, int access,
>> +				       struct ib_udata *udata)
>> +{
>> +	struct irdma_device *iwdev = to_iwdev(pd->device);
>> +	struct irdma_ucontext *ucontext;
>> +	struct irdma_pble_alloc *palloc;
>> +	struct irdma_pbl *iwpbl;
>> +	struct irdma_mr *iwmr;
>> +	struct irdma_mem_reg_req req;
>> +	u32 total, stag = 0;
>> +	u8 shadow_pgcnt = 1;
>> +	bool use_pbles = false;
>> +	unsigned long flags;
>> +	int err = -EINVAL;
>> +	struct ib_umem_dmabuf *umem_dmabuf;
>> +
>> +	if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
>> +		return ERR_PTR(-EINVAL);
>> +
>> +	if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN)
>> +		return ERR_PTR(-EINVAL);
>> +
>> +	umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd,
>> +						access);
>> +	if (IS_ERR(umem_dmabuf)) {
>> +		err = PTR_ERR(umem_dmabuf);
>> +		ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err);
>> +		return ERR_PTR(err);
>> +	}
>> +
>> +	if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) {
>> +		ib_umem_release(&umem_dmabuf->umem);
>> +		return ERR_PTR(-EFAULT);
>> +	}
>> +
>> +	iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
>> +	if (!iwmr) {
>> +		ib_umem_release(&umem_dmabuf->umem);
>> +		return ERR_PTR(-ENOMEM);
>> +	}
>> +
>> +	iwpbl = &iwmr->iwpbl;
>> +	iwpbl->iwmr = iwmr;
>> +	iwmr->region = &umem_dmabuf->umem;
>> +	iwmr->ibmr.pd = pd;
>> +	iwmr->ibmr.device = pd->device;
>> +	iwmr->ibmr.iova = virt;
>> +	iwmr->page_size = PAGE_SIZE;
>> +
>> +	if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) {
>> +		iwmr->page_size = ib_umem_find_best_pgsz(iwmr->region,
>> +							 iwdev->rf->sc_dev.hw_attrs.page_size_cap,
>> +							 virt);
> You can't call rdma_umem_for_each_dma_block() without also calling
> this function to validate that the page_size passed to
> rdma_umem_for_each_dma_block() is correct.
Got it. I will fix it in the latest commit.
>
> This seems to be an existing bug, please fix it.
>
> Also, is there a reason this code is all duplicated from
> irdma_reg_user_mr? Please split things up like the other drivers to
> obtain the umem then use shared code to process the umem as required.

Got it.

Zhu Yanjun

>
> Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index f6973ea55eda..76dc6e65930a 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -2912,6 +2912,163 @@  static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
 	return ERR_PTR(err);
 }
 
+struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
+				       u64 len, u64 virt,
+				       int fd, int access,
+				       struct ib_udata *udata)
+{
+	struct irdma_device *iwdev = to_iwdev(pd->device);
+	struct irdma_ucontext *ucontext;
+	struct irdma_pble_alloc *palloc;
+	struct irdma_pbl *iwpbl;
+	struct irdma_mr *iwmr;
+	struct irdma_mem_reg_req req;
+	u32 total, stag = 0;
+	u8 shadow_pgcnt = 1;
+	bool use_pbles = false;
+	unsigned long flags;
+	int err = -EINVAL;
+	struct ib_umem_dmabuf *umem_dmabuf;
+
+	if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
+		return ERR_PTR(-EINVAL);
+
+	if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN)
+		return ERR_PTR(-EINVAL);
+
+	umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd,
+						access);
+	if (IS_ERR(umem_dmabuf)) {
+		err = PTR_ERR(umem_dmabuf);
+		ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%d]\n", err);
+		return ERR_PTR(err);
+	}
+
+	if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) {
+		ib_umem_release(&umem_dmabuf->umem);
+		return ERR_PTR(-EFAULT);
+	}
+
+	iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+	if (!iwmr) {
+		ib_umem_release(&umem_dmabuf->umem);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	iwpbl = &iwmr->iwpbl;
+	iwpbl->iwmr = iwmr;
+	iwmr->region = &umem_dmabuf->umem;
+	iwmr->ibmr.pd = pd;
+	iwmr->ibmr.device = pd->device;
+	iwmr->ibmr.iova = virt;
+	iwmr->page_size = PAGE_SIZE;
+
+	if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) {
+		iwmr->page_size = ib_umem_find_best_pgsz(iwmr->region,
+							 iwdev->rf->sc_dev.hw_attrs.page_size_cap,
+							 virt);
+		if (unlikely(!iwmr->page_size)) {
+			kfree(iwmr);
+			ib_umem_release(iwmr->region);
+			return ERR_PTR(-EOPNOTSUPP);
+		}
+	}
+	iwmr->len = iwmr->region->length;
+	iwpbl->user_base = virt;
+	palloc = &iwpbl->pble_alloc;
+	iwmr->type = req.reg_type;
+	iwmr->page_cnt = ib_umem_num_dma_blocks(iwmr->region, iwmr->page_size);
+
+	switch (req.reg_type) {
+	case IRDMA_MEMREG_TYPE_QP:
+		total = req.sq_pages + req.rq_pages + shadow_pgcnt;
+		if (total > iwmr->page_cnt) {
+			err = -EINVAL;
+			goto error;
+		}
+		total = req.sq_pages + req.rq_pages;
+		use_pbles = (total > 2);
+		err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
+		if (err)
+			goto error;
+
+		ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+						     ibucontext);
+		spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+		list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
+		iwpbl->on_list = true;
+		spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+		break;
+	case IRDMA_MEMREG_TYPE_CQ:
+		if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE)
+			shadow_pgcnt = 0;
+		total = req.cq_pages + shadow_pgcnt;
+		if (total > iwmr->page_cnt) {
+			err = -EINVAL;
+			goto error;
+		}
+
+		use_pbles = (req.cq_pages > 1);
+		err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
+		if (err)
+			goto error;
+
+		ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+						     ibucontext);
+		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+		list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
+		iwpbl->on_list = true;
+		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+		break;
+	case IRDMA_MEMREG_TYPE_MEM:
+		use_pbles = (iwmr->page_cnt != 1);
+
+		err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false);
+		if (err)
+			goto error;
+
+		if (use_pbles) {
+			err = irdma_check_mr_contiguous(palloc,
+							iwmr->page_size);
+			if (err) {
+				irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
+				iwpbl->pbl_allocated = false;
+			}
+		}
+
+		stag = irdma_create_stag(iwdev);
+		if (!stag) {
+			err = -ENOMEM;
+			goto error;
+		}
+
+		iwmr->stag = stag;
+		iwmr->ibmr.rkey = stag;
+		iwmr->ibmr.lkey = stag;
+		err = irdma_hwreg_mr(iwdev, iwmr, access);
+		if (err) {
+			irdma_free_stag(iwdev, stag);
+			goto error;
+		}
+
+		break;
+	default:
+		goto error;
+	}
+
+	iwmr->type = req.reg_type;
+
+	return &iwmr->ibmr;
+
+error:
+	if (palloc->level != PBLE_LEVEL_0 && iwpbl->pbl_allocated)
+		irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
+	ib_umem_release(iwmr->region);
+	kfree(iwmr);
+
+	return ERR_PTR(err);
+}
+
 /**
  * irdma_reg_phys_mr - register kernel physical memory
  * @pd: ibpd pointer
@@ -4418,6 +4575,7 @@  static const struct ib_device_ops irdma_dev_ops = {
 	.query_port = irdma_query_port,
 	.query_qp = irdma_query_qp,
 	.reg_user_mr = irdma_reg_user_mr,
+	.reg_user_mr_dmabuf = irdma_reg_user_mr_dmabuf,
 	.req_notify_cq = irdma_req_notify_cq,
 	.resize_cq = irdma_resize_cq,
 	INIT_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd),