[v8,1/5] RDMA/umem: Support importing dma-buf as user memory region

Message ID	1604616489-69267-2-git-send-email-jianxin.xiong@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=ycYK=EL=lists.freedesktop.org=dri-devel-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org A208F2078E IronPort-SDR: qHQ/uGeeCtfTTak8M6R14LWqoFCPzHriEgLGudsvTkAztFmVdoJCU7wyRQVQOjQZk5cSo0Oi7L 66W56tA0BsIg== IronPort-SDR: oz1/EmvvCh1FJ0aawpIz9uW8jRV507qeMuca7lsKVLvuLOfHA9sy8FF/hFL2Fz/AVEM8YKM4R0 14U0cn3He6zQ== From: Jianxin Xiong <jianxin.xiong@intel.com> To: linux-rdma@vger.kernel.org, dri-devel@lists.freedesktop.org Subject: [PATCH v8 1/5] RDMA/umem: Support importing dma-buf as user memory region Date: Thu, 5 Nov 2020 14:48:05 -0800 Message-Id: <1604616489-69267-2-git-send-email-jianxin.xiong@intel.com> In-Reply-To: <1604616489-69267-1-git-send-email-jianxin.xiong@intel.com> References: <1604616489-69267-1-git-send-email-jianxin.xiong@intel.com> Precedence: list Cc: Leon Romanovsky <leon@kernel.org>, Jason Gunthorpe <jgg@ziepe.ca>, Doug Ledford <dledford@redhat.com>, Daniel Vetter <daniel.vetter@intel.com>, Christian Koenig <christian.koenig@amd.com>, Jianxin Xiong <jianxin.xiong@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" <dri-devel-bounces@lists.freedesktop.org>
Series	RDMA: Add dma-buf support \| expand [v8,0/5] RDMA: Add dma-buf support [v8,1/5] RDMA/umem: Support importing dma-buf as user memory region [v8,2/5] RDMA/core: Add device method for registering dma-buf based memory region [v8,3/5] RDMA/uverbs: Add uverbs command for dma-buf based MR registration [v8,4/5] RDMA/mlx5: Support dma-buf based userspace memory region [v8,5/5] dma-buf: Reject attach request from importers that use dma_virt_ops

diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index ccf2670..8ab4eea 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -40,5 +40,5 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ uverbs_std_types_srq.o \ uverbs_std_types_wq.o \ uverbs_std_types_qp.o -ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o +ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index f1fc7e3..4bc455f 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -2,6 +2,7 @@ * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -43,6 +44,7 @@ #include <rdma/ib_umem_odp.h> #include "uverbs.h" +#include "umem_dmabuf.h" static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) { @@ -278,6 +280,8 @@ void ib_umem_release(struct ib_umem *umem) { if (!umem) return; + if (umem->is_dmabuf) + return ib_umem_dmabuf_release(to_ib_umem_dmabuf(umem)); if (umem->is_odp) return ib_umem_odp_release(to_ib_umem_odp(umem)); diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c new file mode 100644 index 0000000..ee82b7b --- /dev/null +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright (c) 2020 Intel Corporation. All rights reserved. + */ + +#include <linux/dma-buf.h> +#include <linux/dma-resv.h> +#include <linux/dma-mapping.h> + +#include "uverbs.h" +#include "umem_dmabuf.h" + +int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) +{ + struct sg_table *sgt; + struct scatterlist *sg; + struct dma_fence *fence; + unsigned long start, end, cur; + unsigned int nmap; + unsigned int page_size; + int i; + + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); + + sgt = dma_buf_map_attachment(umem_dmabuf->attach, + DMA_BIDIRECTIONAL); + + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + /* modify the sgl in-place to match umem address and length */ + + start = ALIGN_DOWN(umem_dmabuf->umem.address, PAGE_SIZE); + end = ALIGN(umem_dmabuf->umem.address + umem_dmabuf->umem.length, + PAGE_SIZE); + cur = 0; + nmap = 0; + for_each_sgtable_dma_sg(sgt, sg, i) { + if (cur >= end) + break; + if (cur + sg_dma_len(sg) <= start) { + cur += sg_dma_len(sg); + continue; + } + if (cur <= start) { + unsigned long offset = start - cur; + + umem_dmabuf->first_sg = sg; + umem_dmabuf->first_sg_offset = offset; + sg_dma_address(sg) += offset; + sg_dma_len(sg) -= offset; + if (&sg_dma_len(sg) != &sg->length) + sg->length -= offset; + cur += offset; + } + if (cur + sg_dma_len(sg) >= end) { + unsigned long trim = cur + sg_dma_len(sg) - end; + + umem_dmabuf->last_sg = sg; + umem_dmabuf->last_sg_trim = trim; + sg_dma_len(sg) -= trim; + if (&sg_dma_len(sg) != &sg->length) + sg->length -= trim; + } + cur += sg_dma_len(sg); + nmap++; + } + + umem_dmabuf->umem.sg_head.sgl = umem_dmabuf->first_sg; + umem_dmabuf->umem.sg_head.nents = nmap; + umem_dmabuf->umem.nmap = nmap; + umem_dmabuf->sgt = sgt; + + page_size = ib_umem_find_best_pgsz(&umem_dmabuf->umem, PAGE_SIZE, + umem_dmabuf->umem.iova); + + if (WARN_ON(cur != end || page_size != PAGE_SIZE)) { + ib_umem_dmabuf_unmap_pages(umem_dmabuf); + return -EINVAL; + } + + /* + * Although the sg list is valid now, the content of the pages + * may be not up-to-date. Wait for the exporter to finish + * the migration. + */ + fence = dma_resv_get_excl(umem_dmabuf->attach->dmabuf->resv); + if (fence) + dma_fence_wait(fence, false); + + return 0; +} +EXPORT_SYMBOL(ib_umem_dmabuf_map_pages); + +void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) +{ + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); + + if (!umem_dmabuf->sgt) + return; + + /* retore the original sgl */ + sg_dma_address(umem_dmabuf->first_sg) -= umem_dmabuf->first_sg_offset; + sg_dma_len(umem_dmabuf->first_sg) += umem_dmabuf->first_sg_offset; + sg_dma_len(umem_dmabuf->last_sg) += umem_dmabuf->last_sg_trim; + if (&sg_dma_len(umem_dmabuf->first_sg) != + &umem_dmabuf->first_sg->length) { + umem_dmabuf->first_sg->length += umem_dmabuf->first_sg_offset; + umem_dmabuf->last_sg->length += umem_dmabuf->last_sg_trim; + } + + dma_buf_unmap_attachment(umem_dmabuf->attach, umem_dmabuf->sgt, + DMA_BIDIRECTIONAL); + umem_dmabuf->sgt = NULL; +} +EXPORT_SYMBOL(ib_umem_dmabuf_unmap_pages); + +struct ib_umem *ib_umem_dmabuf_get(struct ib_device *device, + unsigned long offset, size_t size, + int fd, int access, + const struct dma_buf_attach_ops *ops) +{ + struct dma_buf *dmabuf; + struct ib_umem_dmabuf *umem_dmabuf; + struct ib_umem *umem; + unsigned long end; + long ret; + + if (check_add_overflow(offset, (unsigned long)size, &end)) + return ERR_PTR(-EINVAL); + + if (unlikely(PAGE_ALIGN(end) < PAGE_SIZE)) + return ERR_PTR(-EINVAL); + + if (unlikely(!ops || !ops->move_notify)) + return ERR_PTR(-EINVAL); + + umem_dmabuf = kzalloc(sizeof(*umem_dmabuf), GFP_KERNEL); + if (!umem_dmabuf) + return ERR_PTR(-ENOMEM); + + umem = &umem_dmabuf->umem; + umem->ibdev = device; + umem->length = size; + umem->address = offset; + umem->iova = offset; + umem->writable = ib_access_writable(access); + umem->is_dmabuf = 1; + + if (unlikely(!ib_umem_num_pages(umem))) { + ret = -EINVAL; + goto out_free_umem; + } + + dmabuf = dma_buf_get(fd); + if (IS_ERR(dmabuf)) { + ret = PTR_ERR(dmabuf); + goto out_free_umem; + } + + umem_dmabuf->attach = dma_buf_dynamic_attach( + dmabuf, + device->dma_device, + ops, + umem_dmabuf); + if (IS_ERR(umem_dmabuf->attach)) { + ret = PTR_ERR(umem_dmabuf->attach); + goto out_release_dmabuf; + } + + return umem; + +out_release_dmabuf: + dma_buf_put(dmabuf); + +out_free_umem: + kfree(umem_dmabuf); + return ERR_PTR(ret); +} +EXPORT_SYMBOL(ib_umem_dmabuf_get); + +void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) +{ + struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf; + + dma_resv_lock(dmabuf->resv, NULL); + ib_umem_dmabuf_unmap_pages(umem_dmabuf); + dma_resv_unlock(dmabuf->resv); + + dma_buf_detach(dmabuf, umem_dmabuf->attach); + dma_buf_put(dmabuf); + kfree(umem_dmabuf); +} diff --git a/drivers/infiniband/core/umem_dmabuf.h b/drivers/infiniband/core/umem_dmabuf.h new file mode 100644 index 0000000..13acf55 --- /dev/null +++ b/drivers/infiniband/core/umem_dmabuf.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Copyright (c) 2020 Intel Corporation. All rights reserved. + */ + +#ifndef UMEM_DMABUF_H +#define UMEM_DMABUF_H + +void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf); + +#endif /* UMEM_DMABUF_H */ diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 7059750..e6e79bd 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. */ #ifndef IB_UMEM_H @@ -13,6 +14,7 @@ struct ib_ucontext; struct ib_umem_odp; +struct dma_buf_attach_ops; struct ib_umem { struct ib_device *ibdev; @@ -22,12 +24,29 @@ struct ib_umem { unsigned long address; u32 writable : 1; u32 is_odp : 1; + u32 is_dmabuf : 1; struct work_struct work; struct sg_table sg_head; int nmap; unsigned int sg_nents; }; +struct ib_umem_dmabuf { + struct ib_umem umem; + struct dma_buf_attachment *attach; + struct sg_table *sgt; + struct scatterlist *first_sg; + struct scatterlist *last_sg; + unsigned long first_sg_offset; + unsigned long last_sg_trim; + void *private; +}; + +static inline struct ib_umem_dmabuf *to_ib_umem_dmabuf(struct ib_umem *umem) +{ + return container_of(umem, struct ib_umem_dmabuf, umem); +} + /* Returns the offset of the umem start relative to the first page. */ static inline int ib_umem_offset(struct ib_umem *umem) { @@ -79,6 +98,12 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, unsigned long virt); +struct ib_umem *ib_umem_dmabuf_get(struct ib_device *device, + unsigned long offset, size_t size, + int fd, int access, + const struct dma_buf_attach_ops *ops); +int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf); +void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf); #else /* CONFIG_INFINIBAND_USER_MEM */ @@ -101,7 +126,19 @@ static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, { return 0; } +static inline struct ib_umem *ib_umem_dmabuf_get(struct ib_device *device, + unsigned long offset, + size_t size, int fd, + int access, + struct dma_buf_attach_ops *ops) +{ + return ERR_PTR(-EINVAL); +} +static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) +{ + return -EINVAL; +} +static inline void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) { } #endif /* CONFIG_INFINIBAND_USER_MEM */ - #endif /* IB_UMEM_H */

[v8,1/5] RDMA/umem: Support importing dma-buf as user memory region

Commit Message

Comments

Patch