diff mbox series

[rdma-core,4/4] mlx5: Introduce mlx5dv_wr_mr_list post send builder

Message ID 1553525151-14005-5-git-send-email-yishaih@mellanox.com (mailing list archive)
State Not Applicable
Headers show
Series mlx5: Add UMR builders over the DV API | expand

Commit Message

Yishai Hadas March 25, 2019, 2:45 p.m. UTC
Introduce mlx5dv_wr_mr_list() post send builder to be used for issuing a
WR that may register a memory layout which is based on list of ibv_sge.

Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
---
 providers/mlx5/man/mlx5dv_create_qp.3.md |  3 +
 providers/mlx5/man/mlx5dv_wr_post.3.md   | 20 +++++++
 providers/mlx5/mlx5dv.h                  | 15 +++++
 providers/mlx5/qp.c                      | 97 +++++++++++++++++++++++++++-----
 providers/mlx5/verbs.c                   |  3 +-
 5 files changed, 122 insertions(+), 16 deletions(-)
diff mbox series

Patch

diff --git a/providers/mlx5/man/mlx5dv_create_qp.3.md b/providers/mlx5/man/mlx5dv_create_qp.3.md
index 74a2193..856c69a 100644
--- a/providers/mlx5/man/mlx5dv_create_qp.3.md
+++ b/providers/mlx5/man/mlx5dv_create_qp.3.md
@@ -104,6 +104,9 @@  struct mlx5dv_dc_init_attr {
 	MLX5DV_QP_EX_WITH_MR_INTERLEAVED:
 		Enables the mlx5dv_wr_mr_interleaved() work requset on this QP.
 
+	MLX5DV_QP_EX_WITH_MR_LIST:
+		Enables the mlx5dv_wr_mr_list() work requset on this QP.
+
 # NOTES
 
 **mlx5dv_qp_ex_from_ibv_qp_ex()** is used to get *struct mlx5dv_qp_ex* for
diff --git a/providers/mlx5/man/mlx5dv_wr_post.3.md b/providers/mlx5/man/mlx5dv_wr_post.3.md
index 42e680c..0f7ff4e 100644
--- a/providers/mlx5/man/mlx5dv_wr_post.3.md
+++ b/providers/mlx5/man/mlx5dv_wr_post.3.md
@@ -36,6 +36,12 @@  static inline void mlx5dv_wr_mr_interleaved(struct mlx5dv_qp_ex *mqp,
 					    uint32_t repeat_count,
 					    uint16_t num_interleaved,
 					    struct mlx5dv_mr_interleaved *data);
+
+static inline void mlx5dv_wr_mr_list(struct mlx5dv_qp_ex *mqp,
+				      struct mlx5dv_mkey *mkey,
+				      uint32_t access_flags, /* use enum ibv_access_flags */
+				      uint16_t num_sges,
+				      struct ibv_sge *sge);
 ```
 
 # DESCRIPTION
@@ -80,6 +86,20 @@  man for ibv_wr_post and mlx5dv_qp with its available builders and setters.
     In case *ibv_qp_ex->wr_flags* turns on IBV_SEND_SIGNALED, the reported WC opcode will be MLX5DV_WC_UMR.
     Unregister the *mkey* to enable another pattern registration should be done via ibv_post_send with IBV_WR_LOCAL_INV opcode.
 
+:   *mlx5dv_wr_mr_list()*
+
+    registers a memory layout based on list of ibv_sge.
+    The layout of the memory pointed by the *mkey* after its registration will be based on the list of *sge* counted by *num_sges*.
+    Post a successful registration RDMA operations can use this *mkey*, the hardware will scatter the data according to the pattern.
+    The *mkey* should be used in a zero-based mode, the *addr* field in its *ibv_sge* is an offset in the total data.
+
+    Current implementation requires the IBV_SEND_INLINE option to be on in *ibv_qp_ex->wr_flags* field.
+    To be able to have more than 4 *num_sge* entries, the QP should be created with a larger WQE size that may fit it.
+    This should be done using the *max_inline_data* attribute of *struct ibv_qp_cap* upon its creation.
+
+    In case *ibv_qp_ex->wr_flags* turns on IBV_SEND_SIGNALED, the reported WC opcode will be MLX5DV_WC_UMR.
+    Unregister the *mkey* to enable other pattern registration should be done via ibv_post_send with IBV_WR_LOCAL_INV opcode.
+
 ## QP Specific setters
 
 *DCI* QPs
diff --git a/providers/mlx5/mlx5dv.h b/providers/mlx5/mlx5dv.h
index c5aae57..8b88026 100644
--- a/providers/mlx5/mlx5dv.h
+++ b/providers/mlx5/mlx5dv.h
@@ -204,6 +204,7 @@  struct mlx5dv_dc_init_attr {
 
 enum mlx5dv_qp_create_send_ops_flags {
 	MLX5DV_QP_EX_WITH_MR_INTERLEAVED	= 1 << 0,
+	MLX5DV_QP_EX_WITH_MR_LIST		= 1 << 1,
 };
 
 struct mlx5dv_qp_init_attr {
@@ -242,6 +243,11 @@  struct mlx5dv_qp_ex {
 				  uint32_t repeat_count,
 				  uint16_t num_interleaved,
 				  struct mlx5dv_mr_interleaved *data);
+	void (*wr_mr_list)(struct mlx5dv_qp_ex *mqp,
+			   struct mlx5dv_mkey *mkey,
+			   uint32_t access_flags, /* use enum ibv_access_flags */
+			   uint16_t num_sges,
+			   struct ibv_sge *sge);
 };
 
 struct mlx5dv_qp_ex *mlx5dv_qp_ex_from_ibv_qp_ex(struct ibv_qp_ex *qp);
@@ -265,6 +271,15 @@  static inline void mlx5dv_wr_mr_interleaved(struct mlx5dv_qp_ex *mqp,
 			       num_interleaved, data);
 }
 
+static inline void mlx5dv_wr_mr_list(struct mlx5dv_qp_ex *mqp,
+				      struct mlx5dv_mkey *mkey,
+				      uint32_t access_flags,
+				      uint16_t num_sges,
+				      struct ibv_sge *sge)
+{
+	mqp->wr_mr_list(mqp, mkey, access_flags, num_sges, sge);
+}
+
 enum mlx5dv_flow_action_esp_mask {
 	MLX5DV_FLOW_ACTION_ESP_MASK_FLAGS	= 1 << 0,
 };
diff --git a/providers/mlx5/qp.c b/providers/mlx5/qp.c
index ecfe844..7707c2f 100644
--- a/providers/mlx5/qp.c
+++ b/providers/mlx5/qp.c
@@ -2059,6 +2059,40 @@  static uint8_t get_umr_mr_flags(uint32_t acc)
 		MLX5_WQE_MKEY_CONTEXT_ACCESS_FLAGS_LOCAL_WRITE  : 0));
 }
 
+static int umr_sg_list_create(struct mlx5_qp *qp,
+			      uint16_t num_sges,
+			      struct ibv_sge *sge,
+			      void *seg,
+			      void *qend, int *size, int *xlat_size,
+			      uint64_t *reglen)
+{
+	struct mlx5_wqe_data_seg *dseg;
+	int byte_count = 0;
+	int i;
+	size_t tmp;
+
+	dseg = seg;
+
+	for (i = 0; i < num_sges; i++, dseg++) {
+		if (unlikely(dseg == qend))
+			dseg = mlx5_get_send_wqe(qp, 0);
+
+		dseg->addr =  htobe64(sge[i].addr);
+		dseg->lkey = htobe32(sge[i].lkey);
+		dseg->byte_count = htobe32(sge[i].length);
+		byte_count += sge[i].length;
+	}
+
+	tmp = align(num_sges, 4) - num_sges;
+	memset(dseg, 0, tmp * sizeof(*dseg));
+
+	*size = align(num_sges * sizeof(*dseg), 64);
+	*reglen = byte_count;
+	*xlat_size = num_sges * sizeof(*dseg);
+
+	return 0;
+}
+
 /* The strided block format is as the following:
  * | repeat_block | entry_block | entry_block |...| entry_block |
  * While the repeat entry contains details on the list of the block_entries.
@@ -2109,12 +2143,13 @@  static void umr_strided_seg_create(struct mlx5_qp *qp,
 	*xlat_size = (num_interleaved + 1) * sizeof(*eb);
 }
 
-static void mlx5_send_wr_mr_interleaved(struct mlx5dv_qp_ex *dv_qp,
-					struct mlx5dv_mkey *dv_mkey,
-					uint32_t access_flags,
-					uint32_t repeat_count,
-					uint16_t num_interleaved,
-					struct mlx5dv_mr_interleaved *data)
+static void mlx5_send_wr_mr(struct mlx5dv_qp_ex *dv_qp,
+			    struct mlx5dv_mkey *dv_mkey,
+			    uint32_t access_flags,
+			    uint32_t repeat_count,
+			    uint16_t num_entries,
+			    struct mlx5dv_mr_interleaved *data,
+			    struct ibv_sge *sge)
 {
 	struct mlx5_qp *mqp = mqp_from_mlx5dv_qp_ex(dv_qp);
 	struct ibv_qp_ex *ibqp = &mqp->verbs_qp.qp_ex;
@@ -2134,12 +2169,17 @@  static void mlx5_send_wr_mr_interleaved(struct mlx5dv_qp_ex *dv_qp,
 		return;
 	}
 
-	max_entries = min_t(size_t,
-			    (mqp->max_inline_data + sizeof(struct mlx5_wqe_inl_data_seg)) /
-					sizeof(struct mlx5_wqe_umr_repeat_ent_seg) - 1,
-			     mkey->num_desc);
-
-	if (unlikely(num_interleaved > max_entries)) {
+	max_entries = data ?
+		min_t(size_t,
+		      (mqp->max_inline_data + sizeof(struct mlx5_wqe_inl_data_seg)) /
+				sizeof(struct mlx5_wqe_umr_repeat_ent_seg) - 1,
+		       mkey->num_desc) :
+		min_t(size_t,
+		      (mqp->max_inline_data + sizeof(struct mlx5_wqe_inl_data_seg)) /
+				sizeof(struct mlx5_wqe_data_seg),
+		       mkey->num_desc);
+
+	if (unlikely(num_entries > max_entries)) {
 		mqp->err = ENOMEM;
 		return;
 	}
@@ -2184,8 +2224,13 @@  static void mlx5_send_wr_mr_interleaved(struct mlx5dv_qp_ex *dv_qp,
 	if (unlikely(seg == qend))
 		seg = mlx5_get_send_wqe(mqp, 0);
 
-	umr_strided_seg_create(mqp, repeat_count, num_interleaved, data,
-			       seg, qend, &size, &xlat_size, &reglen);
+	if (data)
+		umr_strided_seg_create(mqp, repeat_count, num_entries, data,
+				       seg, qend, &size, &xlat_size, &reglen);
+	else
+		umr_sg_list_create(mqp, num_entries, sge, seg,
+				   qend, &size, &xlat_size, &reglen);
+
 	mk->len = htobe64(reglen);
 	umr_ctrl_seg->klm_octowords = htobe16(align(xlat_size, 64) / 16);
 	mqp->cur_size += size / 16;
@@ -2197,6 +2242,26 @@  static void mlx5_send_wr_mr_interleaved(struct mlx5dv_qp_ex *dv_qp,
 	_common_wqe_finilize(mqp);
 }
 
+static void mlx5_send_wr_mr_interleaved(struct mlx5dv_qp_ex *dv_qp,
+					struct mlx5dv_mkey *mkey,
+					uint32_t access_flags,
+					uint32_t repeat_count,
+					uint16_t num_interleaved,
+					struct mlx5dv_mr_interleaved *data)
+{
+	mlx5_send_wr_mr(dv_qp, mkey, access_flags, repeat_count,
+			num_interleaved, data, NULL);
+}
+
+static inline void mlx5_send_wr_mr_list(struct mlx5dv_qp_ex *dv_qp,
+					struct mlx5dv_mkey *mkey,
+					uint32_t access_flags,
+					uint16_t num_sges,
+					struct ibv_sge *sge)
+{
+	mlx5_send_wr_mr(dv_qp, mkey, access_flags, 0, num_sges, NULL, sge);
+}
+
 static void mlx5_send_wr_set_dc_addr(struct mlx5dv_qp_ex *dv_qp,
 				     struct ibv_ah *ah,
 				     uint32_t remote_dctn,
@@ -2343,11 +2408,13 @@  int mlx5_qp_fill_wr_pfns(struct mlx5_qp *mqp,
 
 		if (mlx5_ops) {
 			if (!check_comp_mask(mlx5_ops,
-					     MLX5DV_QP_EX_WITH_MR_INTERLEAVED))
+					     MLX5DV_QP_EX_WITH_MR_INTERLEAVED |
+					     MLX5DV_QP_EX_WITH_MR_LIST))
 				return EOPNOTSUPP;
 
 			dv_qp = &mqp->dv_qp;
 			dv_qp->wr_mr_interleaved = mlx5_send_wr_mr_interleaved;
+			dv_qp->wr_mr_list = mlx5_send_wr_mr_list;
 		}
 
 		break;
diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c
index 136c0d2..831ea46 100644
--- a/providers/mlx5/verbs.c
+++ b/providers/mlx5/verbs.c
@@ -1153,7 +1153,8 @@  static int _sq_overhead(struct mlx5_qp *qp,
 			      sizeof(struct mlx5_wqe_atomic_seg);
 
 	if (ops & (IBV_QP_EX_WITH_BIND_MW | IBV_QP_EX_WITH_LOCAL_INV) ||
-	    (mlx5_ops & MLX5DV_QP_EX_WITH_MR_INTERLEAVED))
+	    (mlx5_ops & (MLX5DV_QP_EX_WITH_MR_INTERLEAVED |
+			 MLX5DV_QP_EX_WITH_MR_LIST)))
 		mw_size = sizeof(struct mlx5_wqe_ctrl_seg) +
 			  sizeof(struct mlx5_wqe_umr_ctrl_seg) +
 			  sizeof(struct mlx5_wqe_mkey_context_seg) +