diff mbox series

[3/6] RDMA/odp: Lift umem_mutex out of ib_umem_odp_unmap_dma_pages()

Message ID 20191001153821.23621-4-jgg@ziepe.ca (mailing list archive)
State Mainlined
Commit 9dc775e7f5508f848661bbfb2e15683affb85f24
Delegated to: Jason Gunthorpe
Headers show
Series Bug fixes for odp | expand

Commit Message

Jason Gunthorpe Oct. 1, 2019, 3:38 p.m. UTC
From: Jason Gunthorpe <jgg@mellanox.com>

This fixes a race of the form:
    CPU0                               CPU1
mlx5_ib_invalidate_range()     mlx5_ib_invalidate_range()
				 // This one actually makes npages == 0
				 ib_umem_odp_unmap_dma_pages()
				 if (npages == 0 && !dying)
  // This one does nothing
  ib_umem_odp_unmap_dma_pages()
  if (npages == 0 && !dying)
     dying = 1;
                                    dying = 1;
				    schedule_work(&umem_odp->work);
     // Double schedule of the same work
     schedule_work(&umem_odp->work);  // BOOM

npages and dying must be read and written under the umem_mutex lock.

Since whenever ib_umem_odp_unmap_dma_pages() is called mlx5 must also call
mlx5_ib_update_xlt, and both need to be done in the same locking region,
hoist the lock out of unmap.

This avoids an expensive double critical section in
mlx5_ib_invalidate_range().

Fixes: 81713d3788d2 ("IB/mlx5: Add implicit MR support")
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/umem_odp.c |  6 ++++--
 drivers/infiniband/hw/mlx5/odp.c   | 12 ++++++++----
 2 files changed, 12 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index f67a30fda1ed9a..163ff7ba92b7f1 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -451,8 +451,10 @@  void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
 	 * that the hardware will not attempt to access the MR any more.
 	 */
 	if (!umem_odp->is_implicit_odp) {
+		mutex_lock(&umem_odp->umem_mutex);
 		ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
 					    ib_umem_end(umem_odp));
+		mutex_unlock(&umem_odp->umem_mutex);
 		kvfree(umem_odp->dma_list);
 		kvfree(umem_odp->page_list);
 	}
@@ -719,6 +721,8 @@  void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
 	u64 addr;
 	struct ib_device *dev = umem_odp->umem.ibdev;
 
+	lockdep_assert_held(&umem_odp->umem_mutex);
+
 	virt = max_t(u64, virt, ib_umem_start(umem_odp));
 	bound = min_t(u64, bound, ib_umem_end(umem_odp));
 	/* Note that during the run of this function, the
@@ -726,7 +730,6 @@  void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
 	 * faults from completion. We might be racing with other
 	 * invalidations, so we must make sure we free each page only
 	 * once. */
-	mutex_lock(&umem_odp->umem_mutex);
 	for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
 		idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
 		if (umem_odp->page_list[idx]) {
@@ -757,7 +760,6 @@  void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
 			umem_odp->npages--;
 		}
 	}
-	mutex_unlock(&umem_odp->umem_mutex);
 }
 EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
 
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 3401c06b7e54f5..1930d78c3091cb 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -308,7 +308,6 @@  void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
 				   idx - blk_start_idx + 1, 0,
 				   MLX5_IB_UPD_XLT_ZAP |
 				   MLX5_IB_UPD_XLT_ATOMIC);
-	mutex_unlock(&umem_odp->umem_mutex);
 	/*
 	 * We are now sure that the device will not access the
 	 * memory. We can safely unmap it, and mark it as dirty if
@@ -319,10 +318,11 @@  void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
 
 	if (unlikely(!umem_odp->npages && mr->parent &&
 		     !umem_odp->dying)) {
-		WRITE_ONCE(umem_odp->dying, 1);
+		umem_odp->dying = 1;
 		atomic_inc(&mr->parent->num_leaf_free);
 		schedule_work(&umem_odp->work);
 	}
+	mutex_unlock(&umem_odp->umem_mutex);
 }
 
 void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
@@ -585,15 +585,19 @@  void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
 		if (mr->parent != imr)
 			continue;
 
+		mutex_lock(&umem_odp->umem_mutex);
 		ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
 					    ib_umem_end(umem_odp));
 
-		if (umem_odp->dying)
+		if (umem_odp->dying) {
+			mutex_unlock(&umem_odp->umem_mutex);
 			continue;
+		}
 
-		WRITE_ONCE(umem_odp->dying, 1);
+		umem_odp->dying = 1;
 		atomic_inc(&imr->num_leaf_free);
 		schedule_work(&umem_odp->work);
+		mutex_unlock(&umem_odp->umem_mutex);
 	}
 	up_read(&per_mm->umem_rwsem);