diff mbox series

[v8,1/5] RDMA/umem: Support importing dma-buf as user memory region

Message ID 1604616489-69267-2-git-send-email-jianxin.xiong@intel.com (mailing list archive)
State New, archived
Headers show
Series RDMA: Add dma-buf support | expand

Commit Message

Xiong, Jianxin Nov. 5, 2020, 10:48 p.m. UTC
Dma-buf is a standard cross-driver buffer sharing mechanism that can be
used to support peer-to-peer access from RDMA devices.

Device memory exported via dma-buf is associated with a file descriptor.
This is passed to the user space as a property associated with the
buffer allocation. When the buffer is registered as a memory region,
the file descriptor is passed to the RDMA driver along with other
parameters.

Implement the common code for importing dma-buf object and mapping
dma-buf pages.

Signed-off-by: Jianxin Xiong <jianxin.xiong@intel.com>
Reviewed-by: Sean Hefty <sean.hefty@intel.com>
Acked-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Acked-by: Christian Koenig <christian.koenig@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/infiniband/core/Makefile      |   2 +-
 drivers/infiniband/core/umem.c        |   4 +
 drivers/infiniband/core/umem_dmabuf.c | 193 ++++++++++++++++++++++++++++++++++
 drivers/infiniband/core/umem_dmabuf.h |  11 ++
 include/rdma/ib_umem.h                |  39 ++++++-
 5 files changed, 247 insertions(+), 2 deletions(-)
 create mode 100644 drivers/infiniband/core/umem_dmabuf.c
 create mode 100644 drivers/infiniband/core/umem_dmabuf.h

Comments

Jason Gunthorpe Nov. 6, 2020, 12:08 a.m. UTC | #1
On Thu, Nov 05, 2020 at 02:48:05PM -0800, Jianxin Xiong wrote:
> +	/* modify the sgl in-place to match umem address and length */
> +
> +	start = ALIGN_DOWN(umem_dmabuf->umem.address, PAGE_SIZE);
> +	end = ALIGN(umem_dmabuf->umem.address + umem_dmabuf->umem.length,
> +		    PAGE_SIZE);
> +	cur = 0;
> +	nmap = 0;
> +	for_each_sgtable_dma_sg(sgt, sg, i) {
> +		if (cur >= end)
> +			break;
> +		if (cur + sg_dma_len(sg) <= start) {
> +			cur += sg_dma_len(sg);
> +			continue;
> +		}

This seems like a strange way to compute interesections

  if (cur <= start && start < cur + sg_dma_len(sg))

> +		if (cur <= start) {
> +			unsigned long offset = start - cur;
> +
> +			umem_dmabuf->first_sg = sg;
> +			umem_dmabuf->first_sg_offset = offset;
> +			sg_dma_address(sg) += offset;
> +			sg_dma_len(sg) -= offset;
> +			if (&sg_dma_len(sg) != &sg->length)
> +				sg->length -= offset;

We don't need to adjust sg->length, only dma_len, so no reason for
this surprising if.

> +			cur += offset;
> +		}
> +		if (cur + sg_dma_len(sg) >= end) {

Same logic here

> +			unsigned long trim = cur + sg_dma_len(sg) - end;
> +
> +			umem_dmabuf->last_sg = sg;
> +			umem_dmabuf->last_sg_trim = trim;
> +			sg_dma_len(sg) -= trim;
> +			if (&sg_dma_len(sg) != &sg->length)
> +				sg->length -= trim;

break, things are done here

> +		}
> +		cur += sg_dma_len(sg);
> +		nmap++;
> +	}

> +	
> +	umem_dmabuf->umem.sg_head.sgl = umem_dmabuf->first_sg;
> +	umem_dmabuf->umem.sg_head.nents = nmap;
> +	umem_dmabuf->umem.nmap = nmap;
> +	umem_dmabuf->sgt = sgt;
> +
> +	page_size = ib_umem_find_best_pgsz(&umem_dmabuf->umem, PAGE_SIZE,
> +					   umem_dmabuf->umem.iova);
> +
> +	if (WARN_ON(cur != end || page_size != PAGE_SIZE)) {

Looks like nothing prevents this warn on to tigger

The user could specify a length that is beyond
the dma buf, can the dma buf length be checked during get?

Also page_size can be 0 because iova is not OK. iova should be checked
for alignment during get as well:

  iova & (PAGE_SIZE-1) == umem->addr & (PAGE_SIZE-1)

But yes, this is the right idea

Jason
Xiong, Jianxin Nov. 6, 2020, 4:34 p.m. UTC | #2
> -----Original Message-----
> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Thursday, November 05, 2020 4:09 PM
> To: Xiong, Jianxin <jianxin.xiong@intel.com>
> Cc: linux-rdma@vger.kernel.org; dri-devel@lists.freedesktop.org; Doug Ledford <dledford@redhat.com>; Leon Romanovsky
> <leon@kernel.org>; Sumit Semwal <sumit.semwal@linaro.org>; Christian Koenig <christian.koenig@amd.com>; Vetter, Daniel
> <daniel.vetter@intel.com>
> Subject: Re: [PATCH v8 1/5] RDMA/umem: Support importing dma-buf as user memory region
> 
> On Thu, Nov 05, 2020 at 02:48:05PM -0800, Jianxin Xiong wrote:
> > +	/* modify the sgl in-place to match umem address and length */
> > +
> > +	start = ALIGN_DOWN(umem_dmabuf->umem.address, PAGE_SIZE);
> > +	end = ALIGN(umem_dmabuf->umem.address + umem_dmabuf->umem.length,
> > +		    PAGE_SIZE);
> > +	cur = 0;
> > +	nmap = 0;
> > +	for_each_sgtable_dma_sg(sgt, sg, i) {
> > +		if (cur >= end)
> > +			break;
> > +		if (cur + sg_dma_len(sg) <= start) {
> > +			cur += sg_dma_len(sg);
> > +			continue;
> > +		}
> 
> This seems like a strange way to compute interesections 

I can rework that.

> 
>   if (cur <= start && start < cur + sg_dma_len(sg))
> 
> > +		if (cur <= start) {
> > +			unsigned long offset = start - cur;
> > +
> > +			umem_dmabuf->first_sg = sg;
> > +			umem_dmabuf->first_sg_offset = offset;
> > +			sg_dma_address(sg) += offset;
> > +			sg_dma_len(sg) -= offset;
> > +			if (&sg_dma_len(sg) != &sg->length)
> > +				sg->length -= offset;
> 
> We don't need to adjust sg->length, only dma_len, so no reason for this surprising if.
> 
> > +			cur += offset;
> > +		}
> > +		if (cur + sg_dma_len(sg) >= end) {
> 
> Same logic here
> 
> > +			unsigned long trim = cur + sg_dma_len(sg) - end;
> > +
> > +			umem_dmabuf->last_sg = sg;
> > +			umem_dmabuf->last_sg_trim = trim;
> > +			sg_dma_len(sg) -= trim;
> > +			if (&sg_dma_len(sg) != &sg->length)
> > +				sg->length -= trim;
> 
> break, things are done here
> 
> > +		}
> > +		cur += sg_dma_len(sg);
> > +		nmap++;
> > +	}
> 
> > +
> > +	umem_dmabuf->umem.sg_head.sgl = umem_dmabuf->first_sg;
> > +	umem_dmabuf->umem.sg_head.nents = nmap;
> > +	umem_dmabuf->umem.nmap = nmap;
> > +	umem_dmabuf->sgt = sgt;
> > +
> > +	page_size = ib_umem_find_best_pgsz(&umem_dmabuf->umem, PAGE_SIZE,
> > +					   umem_dmabuf->umem.iova);
> > +
> > +	if (WARN_ON(cur != end || page_size != PAGE_SIZE)) {
> 
> Looks like nothing prevents this warn on to tigger
> 
> The user could specify a length that is beyond the dma buf, can the dma buf length be checked during get?

In order to check the length, the buffer needs to be mapped. That can be done.

> 
> Also page_size can be 0 because iova is not OK. iova should be checked for alignment during get as well:
> 
>   iova & (PAGE_SIZE-1) == umem->addr & (PAGE_SIZE-1)
> 

If ib_umem_dmabuf_map_pages is called during get this error is automatically caught.

> But yes, this is the right idea
> 
> Jason
Jason Gunthorpe Nov. 6, 2020, 4:39 p.m. UTC | #3
On Fri, Nov 06, 2020 at 04:34:07PM +0000, Xiong, Jianxin wrote:

> > The user could specify a length that is beyond the dma buf, can
> > the dma buf length be checked during get?
> 
> In order to check the length, the buffer needs to be mapped. That can be done.

Do DMA bufs even have definitate immutable lengths? Going to be a
probelm if they can shrink

> > Also page_size can be 0 because iova is not OK. iova should be
> > checked for alignment during get as well:
> > 
> >   iova & (PAGE_SIZE-1) == umem->addr & (PAGE_SIZE-1)
> 
> If ib_umem_dmabuf_map_pages is called during get this error is automatically caught.

The ib_umem_find_best_pgsz() checks this equation, yes.

So you'd map the sgl before allocating the mkey? This then checks the
length and iova?

Jason
Xiong, Jianxin Nov. 6, 2020, 5:01 p.m. UTC | #4
> -----Original Message-----
> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Friday, November 06, 2020 8:40 AM
> To: Xiong, Jianxin <jianxin.xiong@intel.com>
> Cc: linux-rdma@vger.kernel.org; dri-devel@lists.freedesktop.org; Doug Ledford <dledford@redhat.com>; Leon Romanovsky
> <leon@kernel.org>; Sumit Semwal <sumit.semwal@linaro.org>; Christian Koenig <christian.koenig@amd.com>; Vetter, Daniel
> <daniel.vetter@intel.com>
> Subject: Re: [PATCH v8 1/5] RDMA/umem: Support importing dma-buf as user memory region
> 
> On Fri, Nov 06, 2020 at 04:34:07PM +0000, Xiong, Jianxin wrote:
> 
> > > The user could specify a length that is beyond the dma buf, can the
> > > dma buf length be checked during get?
> >
> > In order to check the length, the buffer needs to be mapped. That can be done.
> 
> Do DMA bufs even have definitate immutable lengths? Going to be a probelm if they can shrink

Good question. The buffer itself has fixed size. If for whatever reason the mapping
is not full it must be temporary. If that does happen ib_umem_dmabuf_map_pages
will undo the mapping and return error. It will be retried later via the pagefault handler.

> 
> > > Also page_size can be 0 because iova is not OK. iova should be
> > > checked for alignment during get as well:
> > >
> > >   iova & (PAGE_SIZE-1) == umem->addr & (PAGE_SIZE-1)
> >
> > If ib_umem_dmabuf_map_pages is called during get this error is automatically caught.
> 
> The ib_umem_find_best_pgsz() checks this equation, yes.
> 
> So you'd map the sgl before allocating the mkey? This then checks the length and iova?

Yes

> 
> Jason
Daniel Vetter Nov. 10, 2020, 2:14 p.m. UTC | #5
On Fri, Nov 06, 2020 at 12:39:53PM -0400, Jason Gunthorpe wrote:
> On Fri, Nov 06, 2020 at 04:34:07PM +0000, Xiong, Jianxin wrote:
> 
> > > The user could specify a length that is beyond the dma buf, can
> > > the dma buf length be checked during get?
> > 
> > In order to check the length, the buffer needs to be mapped. That can be done.
> 
> Do DMA bufs even have definitate immutable lengths? Going to be a
> probelm if they can shrink

Yup. Unfortunately that's not document in the structures themselves,
there's just some random comments sprinkled all over that dma-buf size is
invariant, e.g. see the @mmap kerneldoc in dma_buf_ops:

https://dri.freedesktop.org/docs/drm/driver-api/dma-buf.html?highlight=dma_buf_ops#c.dma_buf_ops

"Because dma-buf buffers have invariant size over their lifetime, ..."

Jianxin, can you pls do a kerneldoc patch which updates the comment for
dma_buf.size and dma_buf_export_info.size?

Thanks, Daniel

> 
> > > Also page_size can be 0 because iova is not OK. iova should be
> > > checked for alignment during get as well:
> > > 
> > >   iova & (PAGE_SIZE-1) == umem->addr & (PAGE_SIZE-1)
> > 
> > If ib_umem_dmabuf_map_pages is called during get this error is automatically caught.
> 
> The ib_umem_find_best_pgsz() checks this equation, yes.
> 
> So you'd map the sgl before allocating the mkey? This then checks the
> length and iova?
> 
> Jason
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Jason Gunthorpe Nov. 10, 2020, 2:27 p.m. UTC | #6
On Tue, Nov 10, 2020 at 03:14:45PM +0100, Daniel Vetter wrote:
> On Fri, Nov 06, 2020 at 12:39:53PM -0400, Jason Gunthorpe wrote:
> > On Fri, Nov 06, 2020 at 04:34:07PM +0000, Xiong, Jianxin wrote:
> > 
> > > > The user could specify a length that is beyond the dma buf, can
> > > > the dma buf length be checked during get?
> > > 
> > > In order to check the length, the buffer needs to be mapped. That can be done.
> > 
> > Do DMA bufs even have definitate immutable lengths? Going to be a
> > probelm if they can shrink
> 
> Yup. Unfortunately that's not document in the structures themselves,
> there's just some random comments sprinkled all over that dma-buf size is
> invariant, e.g. see the @mmap kerneldoc in dma_buf_ops:
> 
> https://dri.freedesktop.org/docs/drm/driver-api/dma-buf.html?highlight=dma_buf_ops#c.dma_buf_ops
> 
> "Because dma-buf buffers have invariant size over their lifetime, ..."
> 
> Jianxin, can you pls do a kerneldoc patch which updates the comment for
> dma_buf.size and dma_buf_export_info.size?

So we can check the size without doing an attachment?

That means the flow should be put back to how it was a series or two
ago where the SGL is only attached during page fault with the entire
programming sequence under the resv lock

Jason
Daniel Vetter Nov. 10, 2020, 2:44 p.m. UTC | #7
On Tue, Nov 10, 2020 at 10:27:57AM -0400, Jason Gunthorpe wrote:
> On Tue, Nov 10, 2020 at 03:14:45PM +0100, Daniel Vetter wrote:
> > On Fri, Nov 06, 2020 at 12:39:53PM -0400, Jason Gunthorpe wrote:
> > > On Fri, Nov 06, 2020 at 04:34:07PM +0000, Xiong, Jianxin wrote:
> > > 
> > > > > The user could specify a length that is beyond the dma buf, can
> > > > > the dma buf length be checked during get?
> > > > 
> > > > In order to check the length, the buffer needs to be mapped. That can be done.
> > > 
> > > Do DMA bufs even have definitate immutable lengths? Going to be a
> > > probelm if they can shrink
> > 
> > Yup. Unfortunately that's not document in the structures themselves,
> > there's just some random comments sprinkled all over that dma-buf size is
> > invariant, e.g. see the @mmap kerneldoc in dma_buf_ops:
> > 
> > https://dri.freedesktop.org/docs/drm/driver-api/dma-buf.html?highlight=dma_buf_ops#c.dma_buf_ops
> > 
> > "Because dma-buf buffers have invariant size over their lifetime, ..."
> > 
> > Jianxin, can you pls do a kerneldoc patch which updates the comment for
> > dma_buf.size and dma_buf_export_info.size?
> 
> So we can check the size without doing an attachment?

Yeah size should be invariant over the lifetime of the dma_buf (it's also
needed for dma_buf_vmap kernel cpu access and dma_buf_mmap userspace mmap
forwarding). No lock or attachment needed. But like I said, would be good
to have the kerneldoc patch to make this clear.

The history here is that the X shm extension suffered badly from
resizeable storage object exploits of the X server, this is why both
dma-buf (and also drm_gem_buffer_object before that generalization) and
memfd are size sealed.
-Daniel

> That means the flow should be put back to how it was a series or two
> ago where the SGL is only attached during page fault with the entire
> programming sequence under the resv lock
> 
> Jason
Xiong, Jianxin Nov. 10, 2020, 5:23 p.m. UTC | #8
> -----Original Message-----
> From: Daniel Vetter <daniel@ffwll.ch>
> Sent: Tuesday, November 10, 2020 6:44 AM
> To: Jason Gunthorpe <jgg@ziepe.ca>
> Cc: Daniel Vetter <daniel@ffwll.ch>; Xiong, Jianxin <jianxin.xiong@intel.com>; Leon Romanovsky <leon@kernel.org>; linux-
> rdma@vger.kernel.org; dri-devel@lists.freedesktop.org; Doug Ledford <dledford@redhat.com>; Vetter, Daniel <daniel.vetter@intel.com>;
> Christian Koenig <christian.koenig@amd.com>
> Subject: Re: [PATCH v8 1/5] RDMA/umem: Support importing dma-buf as user memory region
> 
> On Tue, Nov 10, 2020 at 10:27:57AM -0400, Jason Gunthorpe wrote:
> > On Tue, Nov 10, 2020 at 03:14:45PM +0100, Daniel Vetter wrote:
> > > On Fri, Nov 06, 2020 at 12:39:53PM -0400, Jason Gunthorpe wrote:
> > > > On Fri, Nov 06, 2020 at 04:34:07PM +0000, Xiong, Jianxin wrote:
> > > >
> > > > > > The user could specify a length that is beyond the dma buf,
> > > > > > can the dma buf length be checked during get?
> > > > >
> > > > > In order to check the length, the buffer needs to be mapped. That can be done.
> > > >
> > > > Do DMA bufs even have definitate immutable lengths? Going to be a
> > > > probelm if they can shrink
> > >
> > > Yup. Unfortunately that's not document in the structures themselves,
> > > there's just some random comments sprinkled all over that dma-buf
> > > size is invariant, e.g. see the @mmap kerneldoc in dma_buf_ops:
> > >
> > > https://dri.freedesktop.org/docs/drm/driver-api/dma-buf.html?highlig
> > > ht=dma_buf_ops#c.dma_buf_ops
> > >
> > > "Because dma-buf buffers have invariant size over their lifetime, ..."
> > >
> > > Jianxin, can you pls do a kerneldoc patch which updates the comment
> > > for dma_buf.size and dma_buf_export_info.size?

Sure. 

> >
> > So we can check the size without doing an attachment?
> 
> Yeah size should be invariant over the lifetime of the dma_buf (it's also needed for dma_buf_vmap kernel cpu access and dma_buf_mmap
> userspace mmap forwarding). No lock or attachment needed. But like I said, would be good to have the kerneldoc patch to make this clear.
> 
> The history here is that the X shm extension suffered badly from resizeable storage object exploits of the X server, this is why both dma-buf
> (and also drm_gem_buffer_object before that generalization) and memfd are size sealed.
> -Daniel
> 
> > That means the flow should be put back to how it was a series or two
> > ago where the SGL is only attached during page fault with the entire
> > programming sequence under the resv lock

Will do.

> >
> > Jason
> 
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
diff mbox series

Patch

diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index ccf2670..8ab4eea 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -40,5 +40,5 @@  ib_uverbs-y :=			uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
 				uverbs_std_types_srq.o \
 				uverbs_std_types_wq.o \
 				uverbs_std_types_qp.o
-ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
+ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o
 ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index f1fc7e3..4bc455f 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -2,6 +2,7 @@ 
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2020 Intel Corporation. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -43,6 +44,7 @@ 
 #include <rdma/ib_umem_odp.h>
 
 #include "uverbs.h"
+#include "umem_dmabuf.h"
 
 static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
 {
@@ -278,6 +280,8 @@  void ib_umem_release(struct ib_umem *umem)
 {
 	if (!umem)
 		return;
+	if (umem->is_dmabuf)
+		return ib_umem_dmabuf_release(to_ib_umem_dmabuf(umem));
 	if (umem->is_odp)
 		return ib_umem_odp_release(to_ib_umem_odp(umem));
 
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
new file mode 100644
index 0000000..ee82b7b
--- /dev/null
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -0,0 +1,193 @@ 
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright (c) 2020 Intel Corporation. All rights reserved.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
+#include <linux/dma-mapping.h>
+
+#include "uverbs.h"
+#include "umem_dmabuf.h"
+
+int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
+{
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	struct dma_fence *fence;
+	unsigned long start, end, cur;
+	unsigned int nmap;
+	unsigned int page_size;
+	int i;
+
+	dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+
+	sgt = dma_buf_map_attachment(umem_dmabuf->attach,
+				     DMA_BIDIRECTIONAL);
+
+	if (IS_ERR(sgt))
+		return PTR_ERR(sgt);
+
+	/* modify the sgl in-place to match umem address and length */
+
+	start = ALIGN_DOWN(umem_dmabuf->umem.address, PAGE_SIZE);
+	end = ALIGN(umem_dmabuf->umem.address + umem_dmabuf->umem.length,
+		    PAGE_SIZE);
+	cur = 0;
+	nmap = 0;
+	for_each_sgtable_dma_sg(sgt, sg, i) {
+		if (cur >= end)
+			break;
+		if (cur + sg_dma_len(sg) <= start) {
+			cur += sg_dma_len(sg);
+			continue;
+		}
+		if (cur <= start) {
+			unsigned long offset = start - cur;
+
+			umem_dmabuf->first_sg = sg;
+			umem_dmabuf->first_sg_offset = offset;
+			sg_dma_address(sg) += offset;
+			sg_dma_len(sg) -= offset;
+			if (&sg_dma_len(sg) != &sg->length)
+				sg->length -= offset;
+			cur += offset;
+		}
+		if (cur + sg_dma_len(sg) >= end) {
+			unsigned long trim = cur + sg_dma_len(sg) - end;
+
+			umem_dmabuf->last_sg = sg;
+			umem_dmabuf->last_sg_trim = trim;
+			sg_dma_len(sg) -= trim;
+			if (&sg_dma_len(sg) != &sg->length)
+				sg->length -= trim;
+		}
+		cur += sg_dma_len(sg);
+		nmap++;
+	}
+	
+	umem_dmabuf->umem.sg_head.sgl = umem_dmabuf->first_sg;
+	umem_dmabuf->umem.sg_head.nents = nmap;
+	umem_dmabuf->umem.nmap = nmap;
+	umem_dmabuf->sgt = sgt;
+
+	page_size = ib_umem_find_best_pgsz(&umem_dmabuf->umem, PAGE_SIZE,
+					   umem_dmabuf->umem.iova);
+
+	if (WARN_ON(cur != end || page_size != PAGE_SIZE)) {
+		ib_umem_dmabuf_unmap_pages(umem_dmabuf);
+		return -EINVAL;
+	}
+
+	/*
+	 * Although the sg list is valid now, the content of the pages
+	 * may be not up-to-date. Wait for the exporter to finish
+	 * the migration.
+	 */
+	fence = dma_resv_get_excl(umem_dmabuf->attach->dmabuf->resv);
+	if (fence)
+		dma_fence_wait(fence, false);
+
+	return 0;
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
+
+void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf)
+{
+	dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+
+	if (!umem_dmabuf->sgt)
+		return;
+
+	/* retore the original sgl */
+	sg_dma_address(umem_dmabuf->first_sg) -= umem_dmabuf->first_sg_offset;
+	sg_dma_len(umem_dmabuf->first_sg) += umem_dmabuf->first_sg_offset;
+	sg_dma_len(umem_dmabuf->last_sg) += umem_dmabuf->last_sg_trim;
+	if (&sg_dma_len(umem_dmabuf->first_sg) !=
+	    &umem_dmabuf->first_sg->length) {
+		umem_dmabuf->first_sg->length += umem_dmabuf->first_sg_offset;
+		umem_dmabuf->last_sg->length += umem_dmabuf->last_sg_trim;
+	}
+
+	dma_buf_unmap_attachment(umem_dmabuf->attach, umem_dmabuf->sgt,
+				 DMA_BIDIRECTIONAL);
+	umem_dmabuf->sgt = NULL;
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_unmap_pages);
+
+struct ib_umem *ib_umem_dmabuf_get(struct ib_device *device,
+				   unsigned long offset, size_t size,
+				   int fd, int access,
+				   const struct dma_buf_attach_ops *ops)
+{
+	struct dma_buf *dmabuf;
+	struct ib_umem_dmabuf *umem_dmabuf;
+	struct ib_umem *umem;
+	unsigned long end;
+	long ret;
+
+	if (check_add_overflow(offset, (unsigned long)size, &end))
+		return ERR_PTR(-EINVAL);
+
+	if (unlikely(PAGE_ALIGN(end) < PAGE_SIZE))
+		return ERR_PTR(-EINVAL);
+
+	if (unlikely(!ops || !ops->move_notify))
+		return ERR_PTR(-EINVAL);
+
+	umem_dmabuf = kzalloc(sizeof(*umem_dmabuf), GFP_KERNEL);
+	if (!umem_dmabuf)
+		return ERR_PTR(-ENOMEM);
+
+	umem = &umem_dmabuf->umem;
+	umem->ibdev = device;
+	umem->length = size;
+	umem->address = offset;
+	umem->iova = offset;
+	umem->writable = ib_access_writable(access);
+	umem->is_dmabuf = 1;
+
+	if (unlikely(!ib_umem_num_pages(umem))) {
+		ret = -EINVAL;
+		goto out_free_umem;
+	}
+
+	dmabuf = dma_buf_get(fd);
+	if (IS_ERR(dmabuf)) {
+		ret = PTR_ERR(dmabuf);
+		goto out_free_umem;
+	}
+
+	umem_dmabuf->attach = dma_buf_dynamic_attach(
+					dmabuf,
+					device->dma_device,
+					ops,
+					umem_dmabuf);
+	if (IS_ERR(umem_dmabuf->attach)) {
+		ret = PTR_ERR(umem_dmabuf->attach);
+		goto out_release_dmabuf;
+	}
+
+	return umem;
+
+out_release_dmabuf:
+	dma_buf_put(dmabuf);
+
+out_free_umem:
+	kfree(umem_dmabuf);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_get);
+
+void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf)
+{
+	struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf;
+
+	dma_resv_lock(dmabuf->resv, NULL);
+	ib_umem_dmabuf_unmap_pages(umem_dmabuf);
+	dma_resv_unlock(dmabuf->resv);
+
+	dma_buf_detach(dmabuf, umem_dmabuf->attach);
+	dma_buf_put(dmabuf);
+	kfree(umem_dmabuf);
+}
diff --git a/drivers/infiniband/core/umem_dmabuf.h b/drivers/infiniband/core/umem_dmabuf.h
new file mode 100644
index 0000000..13acf55
--- /dev/null
+++ b/drivers/infiniband/core/umem_dmabuf.h
@@ -0,0 +1,11 @@ 
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2020 Intel Corporation. All rights reserved.
+ */
+
+#ifndef UMEM_DMABUF_H
+#define UMEM_DMABUF_H
+
+void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf);
+
+#endif /* UMEM_DMABUF_H */
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 7059750..e6e79bd 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -1,6 +1,7 @@ 
 /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
 /*
  * Copyright (c) 2007 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2020 Intel Corporation.  All rights reserved.
  */
 
 #ifndef IB_UMEM_H
@@ -13,6 +14,7 @@ 
 
 struct ib_ucontext;
 struct ib_umem_odp;
+struct dma_buf_attach_ops;
 
 struct ib_umem {
 	struct ib_device       *ibdev;
@@ -22,12 +24,29 @@  struct ib_umem {
 	unsigned long		address;
 	u32 writable : 1;
 	u32 is_odp : 1;
+	u32 is_dmabuf : 1;
 	struct work_struct	work;
 	struct sg_table sg_head;
 	int             nmap;
 	unsigned int    sg_nents;
 };
 
+struct ib_umem_dmabuf {
+	struct ib_umem umem;
+	struct dma_buf_attachment *attach;
+	struct sg_table *sgt;
+	struct scatterlist *first_sg;
+	struct scatterlist *last_sg;
+	unsigned long first_sg_offset;
+	unsigned long last_sg_trim;
+	void *private;
+};
+
+static inline struct ib_umem_dmabuf *to_ib_umem_dmabuf(struct ib_umem *umem)
+{
+	return container_of(umem, struct ib_umem_dmabuf, umem);
+}
+
 /* Returns the offset of the umem start relative to the first page. */
 static inline int ib_umem_offset(struct ib_umem *umem)
 {
@@ -79,6 +98,12 @@  int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
 unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
 				     unsigned long pgsz_bitmap,
 				     unsigned long virt);
+struct ib_umem *ib_umem_dmabuf_get(struct ib_device *device,
+				   unsigned long offset, size_t size,
+				   int fd, int access,
+				   const struct dma_buf_attach_ops *ops);
+int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf);
+void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf);
 
 #else /* CONFIG_INFINIBAND_USER_MEM */
 
@@ -101,7 +126,19 @@  static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
 {
 	return 0;
 }
+static inline struct ib_umem *ib_umem_dmabuf_get(struct ib_device *device,
+						 unsigned long offset,
+						 size_t size, int fd,
+						 int access,
+						 struct dma_buf_attach_ops *ops)
+{
+	return ERR_PTR(-EINVAL);
+}
+static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
+{
+	return -EINVAL;
+}
+static inline void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) { }
 
 #endif /* CONFIG_INFINIBAND_USER_MEM */
-
 #endif /* IB_UMEM_H */