diff mbox series

[rdma-rc,2/2] IB/mlx5: Fix device memory flows

Message ID 20191212100237.330654-3-leon@kernel.org (mailing list archive)
State Accepted
Headers show
Series Prevent device memory VA reuse | expand

Commit Message

Leon Romanovsky Dec. 12, 2019, 10:02 a.m. UTC
From: Yishai Hadas <yishaih@mellanox.com>

Fix device memory flows so that only once there will be no live mmaped
VA to a given allocation the matching object will be destroyed.

This prevents a potential scenario that existing VA that was mmaped by
one process might still be used post its deallocation despite that it's
owned now by other process.

The above is achieved by integrating with IB core APIs to manage
mmap/munmap. Only once the refcount will become 0 the DM object and its
underlay area will be freed.

Fixes: 3b113a1ec3d4 ("IB/mlx5: Support device memory type attribute")
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/cmd.c     |  16 ++--
 drivers/infiniband/hw/mlx5/cmd.h     |   2 +-
 drivers/infiniband/hw/mlx5/main.c    | 120 ++++++++++++++++++---------
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  19 ++++-
 4 files changed, 105 insertions(+), 52 deletions(-)

Comments

Leon Romanovsky Dec. 12, 2019, 11:15 a.m. UTC | #1
On Thu, Dec 12, 2019 at 12:02:37PM +0200, Leon Romanovsky wrote:
> From: Yishai Hadas <yishaih@mellanox.com>
>
> Fix device memory flows so that only once there will be no live mmaped
> VA to a given allocation the matching object will be destroyed.
>
> This prevents a potential scenario that existing VA that was mmaped by
> one process might still be used post its deallocation despite that it's
> owned now by other process.
>
> The above is achieved by integrating with IB core APIs to manage
> mmap/munmap. Only once the refcount will become 0 the DM object and its
> underlay area will be freed.
>
> Fixes: 3b113a1ec3d4 ("IB/mlx5: Support device memory type attribute")
> Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> ---
>  drivers/infiniband/hw/mlx5/cmd.c     |  16 ++--
>  drivers/infiniband/hw/mlx5/cmd.h     |   2 +-
>  drivers/infiniband/hw/mlx5/main.c    | 120 ++++++++++++++++++---------
>  drivers/infiniband/hw/mlx5/mlx5_ib.h |  19 ++++-
>  4 files changed, 105 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
> index 4937947400cd..4c26492ab8a3 100644
> --- a/drivers/infiniband/hw/mlx5/cmd.c
> +++ b/drivers/infiniband/hw/mlx5/cmd.c
> @@ -157,7 +157,7 @@ int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
>  	return -ENOMEM;
>  }
>
> -int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
> +void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
>  {
>  	struct mlx5_core_dev *dev = dm->dev;
>  	u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
> @@ -175,15 +175,13 @@ int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
>  	MLX5_SET(dealloc_memic_in, in, memic_size, length);
>
>  	err =  mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
> +	if (err)
> +		return;
>
> -	if (!err) {
> -		spin_lock(&dm->lock);
> -		bitmap_clear(dm->memic_alloc_pages,
> -			     start_page_idx, num_pages);
> -		spin_unlock(&dm->lock);
> -	}
> -
> -	return err;
> +	spin_lock(&dm->lock);
> +	bitmap_clear(dm->memic_alloc_pages,
> +		     start_page_idx, num_pages);
> +	spin_unlock(&dm->lock);
>  }
>
>  int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
> diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
> index 169cab4915e3..945ebce73613 100644
> --- a/drivers/infiniband/hw/mlx5/cmd.h
> +++ b/drivers/infiniband/hw/mlx5/cmd.h
> @@ -46,7 +46,7 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
>  				void *in, int in_size);
>  int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
>  			 u64 length, u32 alignment);
> -int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
> +void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
>  void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
>  void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
>  void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
> diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
> index 2f5a159cbe1c..4d89d85226c2 100644
> --- a/drivers/infiniband/hw/mlx5/main.c
> +++ b/drivers/infiniband/hw/mlx5/main.c
> @@ -2074,6 +2074,24 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
>  			      virt_to_page(dev->mdev->clock_info));
>  }
>
> +static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
> +{
> +	struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
> +	struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
> +	struct mlx5_ib_dm *mdm;
> +
> +	switch (mentry->mmap_flag) {
> +	case MLX5_IB_MMAP_TYPE_MEMIC:
> +		mdm = container_of(mentry, struct mlx5_ib_dm, mentry);
> +		mlx5_cmd_dealloc_memic(&dev->dm, mdm->dev_addr,
> +				       mdm->size);
> +		kfree(mdm);
> +		break;
> +	default:
> +		WARN_ON(true);
> +	}
> +}
> +
>  static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
>  		    struct vm_area_struct *vma,
>  		    struct mlx5_ib_ucontext *context)
> @@ -2186,26 +2204,55 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
>  	return err;
>  }
>
> -static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
> +static int add_dm_mmap_entry(struct ib_ucontext *context,
> +			     struct mlx5_ib_dm *mdm,
> +			     u64 address)
> +{
> +	mdm->mentry.mmap_flag = MLX5_IB_MMAP_TYPE_MEMIC;
> +	mdm->mentry.address = address;
> +	return rdma_user_mmap_entry_insert_range(
> +			context, &mdm->mentry.rdma_entry,
> +			mdm->size,
> +			MLX5_IB_MMAP_DEVICE_MEM << 16,
> +			(MLX5_IB_MMAP_DEVICE_MEM << 16) + (1UL << 16) - 1);
> +}
> +
> +static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma)
> +{
> +	unsigned long idx;
> +	u8 command;
> +
> +	command = get_command(vma->vm_pgoff);
> +	idx = get_extended_index(vma->vm_pgoff);
> +
> +	return (command << 16 | idx);
> +}
> +
> +static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
> +			       struct vm_area_struct *vma,
> +			       struct ib_ucontext *ucontext)
>  {
> -	struct mlx5_ib_ucontext *mctx = to_mucontext(context);
> -	struct mlx5_ib_dev *dev = to_mdev(context->device);
> -	u16 page_idx = get_extended_index(vma->vm_pgoff);
> -	size_t map_size = vma->vm_end - vma->vm_start;
> -	u32 npages = map_size >> PAGE_SHIFT;
> +	struct mlx5_user_mmap_entry *mentry;
> +	struct rdma_user_mmap_entry *entry;
> +	unsigned long pgoff;
> +	pgprot_t prot;
>  	phys_addr_t pfn;
> +	int ret;
>
> -	if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
> -	    page_idx + npages)
> +	pgoff = mlx5_vma_to_pgoff(vma);
> +	entry = rdma_user_mmap_entry_get_pgoff(ucontext, pgoff);
> +	if (!entry)
>  		return -EINVAL;
>
> -	pfn = ((dev->mdev->bar_addr +
> -	      MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
> -	      PAGE_SHIFT) +
> -	      page_idx;
> -	return rdma_user_mmap_io(context, vma, pfn, map_size,
> -				 pgprot_writecombine(vma->vm_page_prot),
> -				 NULL);
> +	mentry = to_mmmap(entry);
> +	pfn = (mentry->address >> PAGE_SHIFT);
> +	prot = pgprot_writecombine(vma->vm_page_prot);
> +	ret = rdma_user_mmap_io(ucontext, vma, pfn,
> +				entry->npages * PAGE_SIZE,
> +				prot,
> +				entry);
> +	rdma_user_mmap_entry_put(&mentry->rdma_entry);
> +	return ret;
>  }
>
>  static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
> @@ -2248,11 +2295,8 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
>  	case MLX5_IB_MMAP_CLOCK_INFO:
>  		return mlx5_ib_mmap_clock_info_page(dev, vma, context);
>
> -	case MLX5_IB_MMAP_DEVICE_MEM:
> -		return dm_mmap(ibcontext, vma);
> -
>  	default:
> -		return -EINVAL;
> +		return mlx5_ib_mmap_offset(dev, vma, ibcontext);
>  	}
>
>  	return 0;
> @@ -2288,8 +2332,9 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
>  {
>  	struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
>  	u64 start_offset;
> -	u32 page_idx;
> +	u16 page_idx = 0;

This hunk is not needed.

Thanks
Leon Romanovsky Dec. 12, 2019, 11:21 a.m. UTC | #2
On Thu, Dec 12, 2019 at 11:15:01AM +0000, Leon Romanovsky wrote:
> On Thu, Dec 12, 2019 at 12:02:37PM +0200, Leon Romanovsky wrote:
> > From: Yishai Hadas <yishaih@mellanox.com>
> >
> > Fix device memory flows so that only once there will be no live mmaped
> > VA to a given allocation the matching object will be destroyed.
> >
> > This prevents a potential scenario that existing VA that was mmaped by
> > one process might still be used post its deallocation despite that it's
> > owned now by other process.
> >
> > The above is achieved by integrating with IB core APIs to manage
> > mmap/munmap. Only once the refcount will become 0 the DM object and its
> > underlay area will be freed.
> >
> > Fixes: 3b113a1ec3d4 ("IB/mlx5: Support device memory type attribute")
> > Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
> > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > ---
> >  drivers/infiniband/hw/mlx5/cmd.c     |  16 ++--
> >  drivers/infiniband/hw/mlx5/cmd.h     |   2 +-
> >  drivers/infiniband/hw/mlx5/main.c    | 120 ++++++++++++++++++---------
> >  drivers/infiniband/hw/mlx5/mlx5_ib.h |  19 ++++-
> >  4 files changed, 105 insertions(+), 52 deletions(-)
> >
> > diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
> > index 4937947400cd..4c26492ab8a3 100644
> > --- a/drivers/infiniband/hw/mlx5/cmd.c
> > +++ b/drivers/infiniband/hw/mlx5/cmd.c
> > @@ -157,7 +157,7 @@ int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
> >  	return -ENOMEM;
> >  }
> >
> > -int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
> > +void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
> >  {
> >  	struct mlx5_core_dev *dev = dm->dev;
> >  	u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
> > @@ -175,15 +175,13 @@ int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
> >  	MLX5_SET(dealloc_memic_in, in, memic_size, length);
> >
> >  	err =  mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
> > +	if (err)
> > +		return;
> >
> > -	if (!err) {
> > -		spin_lock(&dm->lock);
> > -		bitmap_clear(dm->memic_alloc_pages,
> > -			     start_page_idx, num_pages);
> > -		spin_unlock(&dm->lock);
> > -	}
> > -
> > -	return err;
> > +	spin_lock(&dm->lock);
> > +	bitmap_clear(dm->memic_alloc_pages,
> > +		     start_page_idx, num_pages);
> > +	spin_unlock(&dm->lock);
> >  }
> >
> >  int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
> > diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
> > index 169cab4915e3..945ebce73613 100644
> > --- a/drivers/infiniband/hw/mlx5/cmd.h
> > +++ b/drivers/infiniband/hw/mlx5/cmd.h
> > @@ -46,7 +46,7 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
> >  				void *in, int in_size);
> >  int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
> >  			 u64 length, u32 alignment);
> > -int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
> > +void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
> >  void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
> >  void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
> >  void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
> > diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
> > index 2f5a159cbe1c..4d89d85226c2 100644
> > --- a/drivers/infiniband/hw/mlx5/main.c
> > +++ b/drivers/infiniband/hw/mlx5/main.c
> > @@ -2074,6 +2074,24 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
> >  			      virt_to_page(dev->mdev->clock_info));
> >  }
> >
> > +static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
> > +{
> > +	struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
> > +	struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
> > +	struct mlx5_ib_dm *mdm;
> > +
> > +	switch (mentry->mmap_flag) {
> > +	case MLX5_IB_MMAP_TYPE_MEMIC:
> > +		mdm = container_of(mentry, struct mlx5_ib_dm, mentry);
> > +		mlx5_cmd_dealloc_memic(&dev->dm, mdm->dev_addr,
> > +				       mdm->size);
> > +		kfree(mdm);
> > +		break;
> > +	default:
> > +		WARN_ON(true);
> > +	}
> > +}
> > +
> >  static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
> >  		    struct vm_area_struct *vma,
> >  		    struct mlx5_ib_ucontext *context)
> > @@ -2186,26 +2204,55 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
> >  	return err;
> >  }
> >
> > -static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
> > +static int add_dm_mmap_entry(struct ib_ucontext *context,
> > +			     struct mlx5_ib_dm *mdm,
> > +			     u64 address)
> > +{
> > +	mdm->mentry.mmap_flag = MLX5_IB_MMAP_TYPE_MEMIC;
> > +	mdm->mentry.address = address;
> > +	return rdma_user_mmap_entry_insert_range(
> > +			context, &mdm->mentry.rdma_entry,
> > +			mdm->size,
> > +			MLX5_IB_MMAP_DEVICE_MEM << 16,
> > +			(MLX5_IB_MMAP_DEVICE_MEM << 16) + (1UL << 16) - 1);
> > +}
> > +
> > +static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma)
> > +{
> > +	unsigned long idx;
> > +	u8 command;
> > +
> > +	command = get_command(vma->vm_pgoff);
> > +	idx = get_extended_index(vma->vm_pgoff);
> > +
> > +	return (command << 16 | idx);
> > +}
> > +
> > +static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
> > +			       struct vm_area_struct *vma,
> > +			       struct ib_ucontext *ucontext)
> >  {
> > -	struct mlx5_ib_ucontext *mctx = to_mucontext(context);
> > -	struct mlx5_ib_dev *dev = to_mdev(context->device);
> > -	u16 page_idx = get_extended_index(vma->vm_pgoff);
> > -	size_t map_size = vma->vm_end - vma->vm_start;
> > -	u32 npages = map_size >> PAGE_SHIFT;
> > +	struct mlx5_user_mmap_entry *mentry;
> > +	struct rdma_user_mmap_entry *entry;
> > +	unsigned long pgoff;
> > +	pgprot_t prot;
> >  	phys_addr_t pfn;
> > +	int ret;
> >
> > -	if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
> > -	    page_idx + npages)
> > +	pgoff = mlx5_vma_to_pgoff(vma);
> > +	entry = rdma_user_mmap_entry_get_pgoff(ucontext, pgoff);
> > +	if (!entry)
> >  		return -EINVAL;
> >
> > -	pfn = ((dev->mdev->bar_addr +
> > -	      MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
> > -	      PAGE_SHIFT) +
> > -	      page_idx;
> > -	return rdma_user_mmap_io(context, vma, pfn, map_size,
> > -				 pgprot_writecombine(vma->vm_page_prot),
> > -				 NULL);
> > +	mentry = to_mmmap(entry);
> > +	pfn = (mentry->address >> PAGE_SHIFT);
> > +	prot = pgprot_writecombine(vma->vm_page_prot);
> > +	ret = rdma_user_mmap_io(ucontext, vma, pfn,
> > +				entry->npages * PAGE_SIZE,
> > +				prot,
> > +				entry);
> > +	rdma_user_mmap_entry_put(&mentry->rdma_entry);
> > +	return ret;
> >  }
> >
> >  static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
> > @@ -2248,11 +2295,8 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
> >  	case MLX5_IB_MMAP_CLOCK_INFO:
> >  		return mlx5_ib_mmap_clock_info_page(dev, vma, context);
> >
> > -	case MLX5_IB_MMAP_DEVICE_MEM:
> > -		return dm_mmap(ibcontext, vma);
> > -
> >  	default:
> > -		return -EINVAL;
> > +		return mlx5_ib_mmap_offset(dev, vma, ibcontext);
> >  	}
> >
> >  	return 0;
> > @@ -2288,8 +2332,9 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
> >  {
> >  	struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
> >  	u64 start_offset;
> > -	u32 page_idx;
> > +	u16 page_idx = 0;
>
> This hunk is not needed.

To be clear, I wanted to say this about "= 0" part. The change of the
type is still needed.

Thanks

>
> Thanks
Doug Ledford Dec. 12, 2019, 10 p.m. UTC | #3
On Thu, 2019-12-12 at 11:21 +0000, Leon Romanovsky wrote:
> > > @@ -2288,8 +2332,9 @@ static int handle_alloc_dm_memic(struct
> > > ib_ucontext *ctx,
> > >   {
> > >      struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
> > >      u64 start_offset;
> > > -   u32 page_idx;
> > > +   u16 page_idx = 0;
> > 
> > This hunk is not needed.
> 
> To be clear, I wanted to say this about "= 0" part. The change of the
> type is still needed.
> 
> Thanks

I fixed it up when I took the two patches.  Applied to for-rc, thanks.
Leon Romanovsky Dec. 15, 2019, 6:55 p.m. UTC | #4
On Thu, Dec 12, 2019 at 05:00:49PM -0500, Doug Ledford wrote:
> On Thu, 2019-12-12 at 11:21 +0000, Leon Romanovsky wrote:
> > > > @@ -2288,8 +2332,9 @@ static int handle_alloc_dm_memic(struct
> > > > ib_ucontext *ctx,
> > > >   {
> > > >      struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
> > > >      u64 start_offset;
> > > > -   u32 page_idx;
> > > > +   u16 page_idx = 0;
> > >
> > > This hunk is not needed.
> >
> > To be clear, I wanted to say this about "= 0" part. The change of the
> > type is still needed.
> >
> > Thanks
>
> I fixed it up when I took the two patches.  Applied to for-rc, thanks.

Thanks a lot.

>
> --
> Doug Ledford <dledford@redhat.com>
>     GPG KeyID: B826A3330E572FDD
>     Fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57 2FDD
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 4937947400cd..4c26492ab8a3 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -157,7 +157,7 @@  int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
 	return -ENOMEM;
 }
 
-int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
+void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
 {
 	struct mlx5_core_dev *dev = dm->dev;
 	u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
@@ -175,15 +175,13 @@  int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
 	MLX5_SET(dealloc_memic_in, in, memic_size, length);
 
 	err =  mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return;
 
-	if (!err) {
-		spin_lock(&dm->lock);
-		bitmap_clear(dm->memic_alloc_pages,
-			     start_page_idx, num_pages);
-		spin_unlock(&dm->lock);
-	}
-
-	return err;
+	spin_lock(&dm->lock);
+	bitmap_clear(dm->memic_alloc_pages,
+		     start_page_idx, num_pages);
+	spin_unlock(&dm->lock);
 }
 
 int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 169cab4915e3..945ebce73613 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -46,7 +46,7 @@  int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
 				void *in, int in_size);
 int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
 			 u64 length, u32 alignment);
-int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
+void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
 void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
 void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
 void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 2f5a159cbe1c..4d89d85226c2 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2074,6 +2074,24 @@  static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
 			      virt_to_page(dev->mdev->clock_info));
 }
 
+static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
+{
+	struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
+	struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
+	struct mlx5_ib_dm *mdm;
+
+	switch (mentry->mmap_flag) {
+	case MLX5_IB_MMAP_TYPE_MEMIC:
+		mdm = container_of(mentry, struct mlx5_ib_dm, mentry);
+		mlx5_cmd_dealloc_memic(&dev->dm, mdm->dev_addr,
+				       mdm->size);
+		kfree(mdm);
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
+
 static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
 		    struct vm_area_struct *vma,
 		    struct mlx5_ib_ucontext *context)
@@ -2186,26 +2204,55 @@  static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
 	return err;
 }
 
-static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+static int add_dm_mmap_entry(struct ib_ucontext *context,
+			     struct mlx5_ib_dm *mdm,
+			     u64 address)
+{
+	mdm->mentry.mmap_flag = MLX5_IB_MMAP_TYPE_MEMIC;
+	mdm->mentry.address = address;
+	return rdma_user_mmap_entry_insert_range(
+			context, &mdm->mentry.rdma_entry,
+			mdm->size,
+			MLX5_IB_MMAP_DEVICE_MEM << 16,
+			(MLX5_IB_MMAP_DEVICE_MEM << 16) + (1UL << 16) - 1);
+}
+
+static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma)
+{
+	unsigned long idx;
+	u8 command;
+
+	command = get_command(vma->vm_pgoff);
+	idx = get_extended_index(vma->vm_pgoff);
+
+	return (command << 16 | idx);
+}
+
+static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
+			       struct vm_area_struct *vma,
+			       struct ib_ucontext *ucontext)
 {
-	struct mlx5_ib_ucontext *mctx = to_mucontext(context);
-	struct mlx5_ib_dev *dev = to_mdev(context->device);
-	u16 page_idx = get_extended_index(vma->vm_pgoff);
-	size_t map_size = vma->vm_end - vma->vm_start;
-	u32 npages = map_size >> PAGE_SHIFT;
+	struct mlx5_user_mmap_entry *mentry;
+	struct rdma_user_mmap_entry *entry;
+	unsigned long pgoff;
+	pgprot_t prot;
 	phys_addr_t pfn;
+	int ret;
 
-	if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
-	    page_idx + npages)
+	pgoff = mlx5_vma_to_pgoff(vma);
+	entry = rdma_user_mmap_entry_get_pgoff(ucontext, pgoff);
+	if (!entry)
 		return -EINVAL;
 
-	pfn = ((dev->mdev->bar_addr +
-	      MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
-	      PAGE_SHIFT) +
-	      page_idx;
-	return rdma_user_mmap_io(context, vma, pfn, map_size,
-				 pgprot_writecombine(vma->vm_page_prot),
-				 NULL);
+	mentry = to_mmmap(entry);
+	pfn = (mentry->address >> PAGE_SHIFT);
+	prot = pgprot_writecombine(vma->vm_page_prot);
+	ret = rdma_user_mmap_io(ucontext, vma, pfn,
+				entry->npages * PAGE_SIZE,
+				prot,
+				entry);
+	rdma_user_mmap_entry_put(&mentry->rdma_entry);
+	return ret;
 }
 
 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
@@ -2248,11 +2295,8 @@  static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
 	case MLX5_IB_MMAP_CLOCK_INFO:
 		return mlx5_ib_mmap_clock_info_page(dev, vma, context);
 
-	case MLX5_IB_MMAP_DEVICE_MEM:
-		return dm_mmap(ibcontext, vma);
-
 	default:
-		return -EINVAL;
+		return mlx5_ib_mmap_offset(dev, vma, ibcontext);
 	}
 
 	return 0;
@@ -2288,8 +2332,9 @@  static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
 {
 	struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
 	u64 start_offset;
-	u32 page_idx;
+	u16 page_idx = 0;
 	int err;
+	u64 address;
 
 	dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
 
@@ -2298,28 +2343,30 @@  static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
 	if (err)
 		return err;
 
-	page_idx = (dm->dev_addr - pci_resource_start(dm_db->dev->pdev, 0) -
-		    MLX5_CAP64_DEV_MEM(dm_db->dev, memic_bar_start_addr)) >>
-		    PAGE_SHIFT;
+	address = dm->dev_addr & PAGE_MASK;
+	err = add_dm_mmap_entry(ctx, dm, address);
+	if (err)
+		goto err_dealloc;
 
+	page_idx = dm->mentry.rdma_entry.start_pgoff & 0xFFFF;
 	err = uverbs_copy_to(attrs,
 			     MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
-			     &page_idx, sizeof(page_idx));
+			     &page_idx,
+			     sizeof(page_idx));
 	if (err)
-		goto err_dealloc;
+		goto err_copy;
 
 	start_offset = dm->dev_addr & ~PAGE_MASK;
 	err = uverbs_copy_to(attrs,
 			     MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
 			     &start_offset, sizeof(start_offset));
 	if (err)
-		goto err_dealloc;
-
-	bitmap_set(to_mucontext(ctx)->dm_pages, page_idx,
-		   DIV_ROUND_UP(dm->size, PAGE_SIZE));
+		goto err_copy;
 
 	return 0;
 
+err_copy:
+	rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
 err_dealloc:
 	mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
 
@@ -2423,23 +2470,13 @@  int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
 	struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
 	struct mlx5_core_dev *dev = to_mdev(ibdm->device)->mdev;
-	struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm;
 	struct mlx5_ib_dm *dm = to_mdm(ibdm);
-	u32 page_idx;
 	int ret;
 
 	switch (dm->type) {
 	case MLX5_IB_UAPI_DM_TYPE_MEMIC:
-		ret = mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
-		if (ret)
-			return ret;
-
-		page_idx = (dm->dev_addr - pci_resource_start(dev->pdev, 0) -
-			    MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr)) >>
-			    PAGE_SHIFT;
-		bitmap_clear(ctx->dm_pages, page_idx,
-			     DIV_ROUND_UP(dm->size, PAGE_SIZE));
-		break;
+		rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
+		return 0;
 	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
 		ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING,
 					     dm->size, ctx->devx_uid, dm->dev_addr,
@@ -6235,6 +6272,7 @@  static const struct ib_device_ops mlx5_ib_dev_ops = {
 	.map_mr_sg = mlx5_ib_map_mr_sg,
 	.map_mr_sg_pi = mlx5_ib_map_mr_sg_pi,
 	.mmap = mlx5_ib_mmap,
+	.mmap_free = mlx5_ib_mmap_free,
 	.modify_cq = mlx5_ib_modify_cq,
 	.modify_device = mlx5_ib_modify_device,
 	.modify_port = mlx5_ib_modify_port,
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 5986953ec2fa..b06f32ff5748 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -118,6 +118,10 @@  enum {
 	MLX5_MEMIC_BASE_SIZE	= 1 << MLX5_MEMIC_BASE_ALIGN,
 };
 
+enum mlx5_ib_mmap_type {
+	MLX5_IB_MMAP_TYPE_MEMIC = 1,
+};
+
 #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)                                        \
 	(MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
 #define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
@@ -135,7 +139,6 @@  struct mlx5_ib_ucontext {
 	u32			tdn;
 
 	u64			lib_caps;
-	DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
 	u16			devx_uid;
 	/* For RoCE LAG TX affinity */
 	atomic_t		tx_port_affinity;
@@ -556,6 +559,12 @@  enum mlx5_ib_mtt_access_flags {
 	MLX5_IB_MTT_WRITE = (1 << 1),
 };
 
+struct mlx5_user_mmap_entry {
+	struct rdma_user_mmap_entry rdma_entry;
+	u8 mmap_flag;
+	u64 address;
+};
+
 struct mlx5_ib_dm {
 	struct ib_dm		ibdm;
 	phys_addr_t		dev_addr;
@@ -567,6 +576,7 @@  struct mlx5_ib_dm {
 		} icm_dm;
 		/* other dm types specific params should be added here */
 	};
+	struct mlx5_user_mmap_entry mentry;
 };
 
 #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
@@ -1101,6 +1111,13 @@  to_mflow_act(struct ib_flow_action *ibact)
 	return container_of(ibact, struct mlx5_ib_flow_action, ib_action);
 }
 
+static inline struct mlx5_user_mmap_entry *
+to_mmmap(struct rdma_user_mmap_entry *rdma_entry)
+{
+	return container_of(rdma_entry,
+		struct mlx5_user_mmap_entry, rdma_entry);
+}
+
 int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
 			struct ib_udata *udata, unsigned long virt,
 			struct mlx5_db *db);