diff mbox series

[rdma-next,10/12] RDMA/mlx5: Use mlx5_umr_post_send_wait() to update MR pas

Message ID ed8f2ee6c64804072155d727149abf7105f92536.1649747695.git.leonro@nvidia.com (mailing list archive)
State Accepted
Commit b3d47ebd490823514a2d637caee0870b6f192b07
Headers show
Series Refactor UMR post send logic | expand

Commit Message

Leon Romanovsky April 12, 2022, 7:24 a.m. UTC
From: Aharon Landau <aharonl@nvidia.com>

Move mlx5_ib_update_mr_pas logic to umr.c, and use mlx5_umr_post_send_wait()
instead of mlx5_ib_post_send_wait().

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |   1 -
 drivers/infiniband/hw/mlx5/mr.c      |  76 +------------
 drivers/infiniband/hw/mlx5/odp.c     |   2 +-
 drivers/infiniband/hw/mlx5/umr.c     | 159 +++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/umr.h     |   1 +
 5 files changed, 164 insertions(+), 75 deletions(-)

Comments

Jason Gunthorpe April 25, 2022, 5:43 p.m. UTC | #1
On Tue, Apr 12, 2022 at 10:24:05AM +0300, Leon Romanovsky wrote:

> +/*
> + * Send the DMA list to the HW for a normal MR using UMR.
> + * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
> + * flag may be used.
> + */
> +int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
> +{

I would prefer to see zap split into its own function, it shouldn't
call rdma_for_each_block at all - the only reason it was structured
this way in the first place was because everything had to be squeezed
into a WR struct.

Then all the places calling

mlx5r_umr_update_xlt(.. MLX5_IB_UPD_XLT_ZAP) 

should call this new function instead as well.

Zapping an ODP or zapping an normal mkey should just be the same
logic.

Also, looking at it, it would be futher nice if the duplication
between mlx5r_umr_update_xlt() and mlx5r_umr_update_mr_pas() could be
removed and perhaps organized into a way to make it logical to
eliminate the mlx5_odp_populate_xlt() "callback"

Which would give four entry points:

'umr fill xlt from sgl' (mlx5r_umr_update_mr_pas)
'umr fill xlt from dma_list' (populate_mtt)
'umr fill klm from xarray' (populate_klm)
'umr fill xlt/klm with zero' (zap)

I'd imagine a general construction to consolidate more stuff into
mlx5r_umr_create_xlt(), probably having it return a struct, including
setting up the initial wqe so the above are slimmer.

Maybe in a followup series though

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d77a27503488..ea4d5519df8c 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1289,7 +1289,6 @@  int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
 int mlx5_ib_dealloc_mw(struct ib_mw *mw);
 int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
 		       int page_shift, int flags);
-int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
 struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
 					     int access_flags);
 void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index e7cc32b46851..df79fd5be5f2 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1082,76 +1082,6 @@  int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
 	return err;
 }
 
-/*
- * Send the DMA list to the HW for a normal MR using UMR.
- * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
- * flag may be used.
- */
-int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
-{
-	struct mlx5_ib_dev *dev = mr_to_mdev(mr);
-	struct device *ddev = &dev->mdev->pdev->dev;
-	struct ib_block_iter biter;
-	struct mlx5_mtt *cur_mtt;
-	struct mlx5_umr_wr wr;
-	size_t orig_sg_length;
-	struct mlx5_mtt *mtt;
-	size_t final_size;
-	struct ib_sge sg;
-	int err = 0;
-
-	if (WARN_ON(mr->umem->is_odp))
-		return -EINVAL;
-
-	mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg,
-				    ib_umem_num_dma_blocks(mr->umem,
-							   1 << mr->page_shift),
-				    sizeof(*mtt), flags);
-	if (!mtt)
-		return -ENOMEM;
-	orig_sg_length = sg.length;
-
-	cur_mtt = mtt;
-	rdma_for_each_block (mr->umem->sgt_append.sgt.sgl, &biter,
-			     mr->umem->sgt_append.sgt.nents,
-			     BIT(mr->page_shift)) {
-		if (cur_mtt == (void *)mtt + sg.length) {
-			dma_sync_single_for_device(ddev, sg.addr, sg.length,
-						   DMA_TO_DEVICE);
-			err = mlx5_ib_post_send_wait(dev, &wr);
-			if (err)
-				goto err;
-			dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
-						DMA_TO_DEVICE);
-			wr.offset += sg.length;
-			cur_mtt = mtt;
-		}
-
-		cur_mtt->ptag =
-			cpu_to_be64(rdma_block_iter_dma_address(&biter) |
-				    MLX5_IB_MTT_PRESENT);
-
-		if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
-			cur_mtt->ptag = 0;
-
-		cur_mtt++;
-	}
-
-	final_size = (void *)cur_mtt - (void *)mtt;
-	sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
-	memset(cur_mtt, 0, sg.length - final_size);
-	wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
-	wr.xlt_size = sg.length;
-
-	dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
-	err = mlx5_ib_post_send_wait(dev, &wr);
-
-err:
-	sg.length = orig_sg_length;
-	mlx5r_umr_unmap_free_xlt(dev, mtt, &sg);
-	return err;
-}
-
 /*
  * If ibmr is NULL it will be allocated by reg_create.
  * Else, the given ibmr will be used.
@@ -1368,7 +1298,7 @@  static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
 		 * configured properly but left disabled. It is safe to go ahead
 		 * and configure it again via UMR while enabling it.
 		 */
-		err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
+		err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
 		if (err) {
 			mlx5_ib_dereg_mr(&mr->ibmr, NULL);
 			return ERR_PTR(err);
@@ -1467,7 +1397,7 @@  static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
 	if (!umem_dmabuf->sgt)
 		return;
 
-	mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
+	mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
 	ib_umem_dmabuf_unmap_pages(umem_dmabuf);
 }
 
@@ -1602,7 +1532,7 @@  static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
 	mr->ibmr.length = new_umem->length;
 	mr->page_shift = order_base_2(page_size);
 	mr->umem = new_umem;
-	err = mlx5_ib_update_mr_pas(mr, upd_flags);
+	err = mlx5r_umr_update_mr_pas(mr, upd_flags);
 	if (err) {
 		/*
 		 * The MR is revoked at this point so there is no issue to free
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 99dc06f589d4..c00e70e74d94 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -715,7 +715,7 @@  static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
 		ib_umem_dmabuf_unmap_pages(umem_dmabuf);
 		err = -EINVAL;
 	} else {
-		err = mlx5_ib_update_mr_pas(mr, xlt_flags);
+		err = mlx5r_umr_update_mr_pas(mr, xlt_flags);
 	}
 	dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
 
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index ba61e6280cb5..b56e3569c677 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -499,3 +499,162 @@  void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
 
 	return xlt;
 }
+
+static void
+mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
+				  unsigned int flags, struct ib_sge *sg)
+{
+	if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
+		/* fail if free */
+		ctrl_seg->flags = MLX5_UMR_CHECK_FREE;
+	else
+		/* fail if not free */
+		ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE;
+	ctrl_seg->xlt_octowords =
+		cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
+}
+
+static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev,
+					      struct mlx5_mkey_seg *mkey_seg,
+					      struct mlx5_ib_mr *mr,
+					      unsigned int page_shift)
+{
+	mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags);
+	MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn);
+	MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova);
+	MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length);
+	MLX5_SET(mkc, mkey_seg, log_page_size, page_shift);
+	MLX5_SET(mkc, mkey_seg, qpn, 0xffffff);
+	MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key));
+}
+
+static void
+mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg,
+				  struct ib_sge *sg)
+{
+	data_seg->byte_count = cpu_to_be32(sg->length);
+	data_seg->lkey = cpu_to_be32(sg->lkey);
+	data_seg->addr = cpu_to_be64(sg->addr);
+}
+
+static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
+				    u64 offset)
+{
+	u64 octo_offset = mlx5r_umr_get_xlt_octo(offset);
+
+	ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff);
+	ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16);
+	ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
+}
+
+static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
+				       struct mlx5r_umr_wqe *wqe,
+				       struct mlx5_ib_mr *mr, struct ib_sge *sg,
+				       unsigned int flags)
+{
+	bool update_pd_access, update_translation;
+
+	if (flags & MLX5_IB_UPD_XLT_ENABLE)
+		wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask();
+
+	update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE ||
+			   flags & MLX5_IB_UPD_XLT_PD ||
+			   flags & MLX5_IB_UPD_XLT_ACCESS;
+
+	if (update_pd_access) {
+		wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev);
+		wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
+	}
+
+	update_translation =
+		flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR;
+
+	if (update_translation) {
+		wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask();
+		if (!mr->ibmr.length)
+			MLX5_SET(mkc, &wqe->mkey_seg, length64, 1);
+	}
+
+	wqe->ctrl_seg.xlt_octowords =
+		cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
+	wqe->data_seg.byte_count = cpu_to_be32(sg->length);
+}
+
+/*
+ * Send the DMA list to the HW for a normal MR using UMR.
+ * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
+ * flag may be used.
+ */
+int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
+{
+	struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+	struct device *ddev = &dev->mdev->pdev->dev;
+	struct mlx5r_umr_wqe wqe = {};
+	struct ib_block_iter biter;
+	struct mlx5_mtt *cur_mtt;
+	size_t orig_sg_length;
+	struct mlx5_mtt *mtt;
+	size_t final_size;
+	struct ib_sge sg;
+	u64 offset = 0;
+	int err = 0;
+
+	if (WARN_ON(mr->umem->is_odp))
+		return -EINVAL;
+
+	mtt = mlx5r_umr_create_xlt(
+		dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
+		sizeof(*mtt), flags);
+	if (!mtt)
+		return -ENOMEM;
+
+	orig_sg_length = sg.length;
+
+	mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
+	mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr,
+					  mr->page_shift);
+	mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
+
+	cur_mtt = mtt;
+	rdma_for_each_block(mr->umem->sgt_append.sgt.sgl, &biter,
+			    mr->umem->sgt_append.sgt.nents,
+			    BIT(mr->page_shift)) {
+		if (cur_mtt == (void *)mtt + sg.length) {
+			dma_sync_single_for_device(ddev, sg.addr, sg.length,
+						   DMA_TO_DEVICE);
+
+			err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe,
+						       true);
+			if (err)
+				goto err;
+			dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
+						DMA_TO_DEVICE);
+			offset += sg.length;
+			mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
+
+			cur_mtt = mtt;
+		}
+
+		cur_mtt->ptag =
+			cpu_to_be64(rdma_block_iter_dma_address(&biter) |
+				    MLX5_IB_MTT_PRESENT);
+
+		if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
+			cur_mtt->ptag = 0;
+
+		cur_mtt++;
+	}
+
+	final_size = (void *)cur_mtt - (void *)mtt;
+	sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
+	memset(cur_mtt, 0, sg.length - final_size);
+	mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
+
+	dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
+	err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
+
+err:
+	sg.length = orig_sg_length;
+	mlx5r_umr_unmap_free_xlt(dev, mtt, &sg);
+	return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h
index 2485379fec32..3bb251f07f4e 100644
--- a/drivers/infiniband/hw/mlx5/umr.h
+++ b/drivers/infiniband/hw/mlx5/umr.h
@@ -99,5 +99,6 @@  void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
 void mlx5r_umr_free_xlt(void *xlt, size_t length);
 void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
 			      struct ib_sge *sg);
+int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
 
 #endif /* _MLX5_IB_UMR_H */