diff mbox series

[v2,06/17] RDMA/umem: Split ib_umem_num_pages() into ib_umem_num_dma_blocks()

Message ID 6-v2-270386b7e60b+28f4-umem_1_jgg@nvidia.com (mailing list archive)
State Accepted
Delegated to: Jason Gunthorpe
Headers show
Series RDMA: Improve use of umem in DMA drivers | expand

Commit Message

Jason Gunthorpe Sept. 4, 2020, 10:41 p.m. UTC
ib_num_pages() should only be used by things working with the SGL in CPU
pages directly.

Drivers building DMA lists should use the new ib_num_dma_blocks() which
returns the number of blocks rdma_umem_for_each_block() will return.

To make this general for DMA drivers requires a different implementation.
Computing DMA block count based on umem->address only works if the
requested page size is < PAGE_SIZE and/or the IOVA == umem->address.

Instead the number of DMA pages should be computed in the IOVA address
space, not umem->address. Thus the IOVA has to be stored inside the umem
so it can be used for these calculations.

For now set it to umem->address by default and fix it up if
ib_umem_find_best_pgsz() was called. This allows drivers to be converted
to ib_umem_num_dma_blocks() safely.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/umem.c               |  7 ++++++-
 drivers/infiniband/hw/cxgb4/mem.c            |  2 +-
 drivers/infiniband/hw/mlx5/mem.c             |  4 ++--
 drivers/infiniband/hw/mthca/mthca_provider.c |  2 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c |  2 +-
 include/rdma/ib_umem.h                       | 15 ++++++++++++---
 6 files changed, 23 insertions(+), 9 deletions(-)

Comments

Gal Pressman Sept. 7, 2020, 12:16 p.m. UTC | #1
On 05/09/2020 1:41, Jason Gunthorpe wrote:
> ib_num_pages() should only be used by things working with the SGL in CPU

Nit: ib_umem_num_pages().
Jason Gunthorpe Sept. 11, 2020, 1:21 p.m. UTC | #2
On Fri, Sep 04, 2020 at 07:41:47PM -0300, Jason Gunthorpe wrote:
> @@ -33,11 +34,17 @@ static inline int ib_umem_offset(struct ib_umem *umem)
>  	return umem->address & ~PAGE_MASK;
>  }
>  
> +static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem,
> +					    unsigned long pgsz)
> +{
> +	return (ALIGN(umem->iova + umem->length, pgsz) -
> +		ALIGN_DOWN(umem->iova, pgsz)) /
> +	       pgsz;
> +}

0-day says this triggers a __udivdi3 error because iova is 64 bit,
I'll change this to:

 static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem,
                                            unsigned long pgsz)
 {
-       return (ALIGN(umem->iova + umem->length, pgsz) -
-               ALIGN_DOWN(umem->iova, pgsz)) /
+       return (size_t)((ALIGN(umem->iova + umem->length, pgsz) -
+                        ALIGN_DOWN(umem->iova, pgsz))) /
               pgsz;
 }
 

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index fb7630e7aac3a7..b57dbb14de8378 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -161,7 +161,7 @@  unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
 	if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0))))
 		return 0;
 
-	va = virt;
+	umem->iova = va = virt;
 	/* The best result is the smallest page size that results in the minimum
 	 * number of required pages. Compute the largest page size that could
 	 * work based on VA address bits that don't change.
@@ -240,6 +240,11 @@  struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 	umem->ibdev      = device;
 	umem->length     = size;
 	umem->address    = addr;
+	/*
+	 * Drivers should call ib_umem_find_best_pgsz() to set the iova
+	 * correctly.
+	 */
+	umem->iova = addr;
 	umem->writable   = ib_access_writable(access);
 	umem->owning_mm = mm = current->mm;
 	mmgrab(mm);
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 82afdb1987eff6..22c8f5745047db 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -548,7 +548,7 @@  struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
 	shift = PAGE_SHIFT;
 
-	n = ib_umem_num_pages(mhp->umem);
+	n = ib_umem_num_dma_blocks(mhp->umem, 1 << shift);
 	err = alloc_pbl(mhp, n);
 	if (err)
 		goto err_umem_release;
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index c19ec9fd8a63c3..13de3d2edd34e3 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -169,8 +169,8 @@  void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 			  int page_shift, __be64 *pas, int access_flags)
 {
 	return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
-				      ib_umem_num_pages(umem), pas,
-				      access_flags);
+				      ib_umem_num_dma_blocks(umem, PAGE_SIZE),
+				      pas, access_flags);
 }
 int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
 {
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 317e67ad915fe8..b785fb9a2634ff 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -877,7 +877,7 @@  static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 		goto err;
 	}
 
-	n = ib_umem_num_pages(mr->umem);
+	n = ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE);
 
 	mr->mtt = mthca_alloc_mtt(dev, n);
 	if (IS_ERR(mr->mtt)) {
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
index 91f0957e611543..e80848bfb3bdbf 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
@@ -133,7 +133,7 @@  struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 		return ERR_CAST(umem);
 	}
 
-	npages = ib_umem_num_pages(umem);
+	npages = ib_umem_num_dma_blocks(umem, PAGE_SIZE);
 	if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
 		dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n",
 			 npages);
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index b880512ba95f16..0f1ab3d8f77dea 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -17,6 +17,7 @@  struct ib_umem_odp;
 struct ib_umem {
 	struct ib_device       *ibdev;
 	struct mm_struct       *owning_mm;
+	u64 iova;
 	size_t			length;
 	unsigned long		address;
 	u32 writable : 1;
@@ -33,11 +34,17 @@  static inline int ib_umem_offset(struct ib_umem *umem)
 	return umem->address & ~PAGE_MASK;
 }
 
+static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem,
+					    unsigned long pgsz)
+{
+	return (ALIGN(umem->iova + umem->length, pgsz) -
+		ALIGN_DOWN(umem->iova, pgsz)) /
+	       pgsz;
+}
+
 static inline size_t ib_umem_num_pages(struct ib_umem *umem)
 {
-	return (ALIGN(umem->address + umem->length, PAGE_SIZE) -
-		ALIGN_DOWN(umem->address, PAGE_SIZE)) >>
-	       PAGE_SHIFT;
+	return ib_umem_num_dma_blocks(umem, PAGE_SIZE);
 }
 
 static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
@@ -55,6 +62,8 @@  static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
  * pgsz must be <= PAGE_SIZE or computed by ib_umem_find_best_pgsz(). The
  * returned DMA blocks will be aligned to pgsz and span the range:
  * ALIGN_DOWN(umem->address, pgsz) to ALIGN(umem->address + umem->length, pgsz)
+ *
+ * Performs exactly ib_umem_num_dma_blocks() iterations.
  */
 #define rdma_umem_for_each_dma_block(umem, biter, pgsz)                        \
 	for (__rdma_umem_block_iter_start(biter, umem, pgsz);                  \