@@ -104,6 +104,9 @@ struct mlx5dv_dc_init_attr {
MLX5DV_QP_EX_WITH_MR_INTERLEAVED:
Enables the mlx5dv_wr_mr_interleaved() work requset on this QP.
+ MLX5DV_QP_EX_WITH_MR_LIST:
+ Enables the mlx5dv_wr_mr_list() work requset on this QP.
+
# NOTES
**mlx5dv_qp_ex_from_ibv_qp_ex()** is used to get *struct mlx5dv_qp_ex* for
@@ -36,6 +36,12 @@ static inline void mlx5dv_wr_mr_interleaved(struct mlx5dv_qp_ex *mqp,
uint32_t repeat_count,
uint16_t num_interleaved,
struct mlx5dv_mr_interleaved *data);
+
+static inline void mlx5dv_wr_mr_list(struct mlx5dv_qp_ex *mqp,
+ struct mlx5dv_mkey *mkey,
+ uint32_t access_flags, /* use enum ibv_access_flags */
+ uint16_t num_sges,
+ struct ibv_sge *sge);
```
# DESCRIPTION
@@ -80,6 +86,20 @@ man for ibv_wr_post and mlx5dv_qp with its available builders and setters.
In case *ibv_qp_ex->wr_flags* turns on IBV_SEND_SIGNALED, the reported WC opcode will be MLX5DV_WC_UMR.
Unregister the *mkey* to enable another pattern registration should be done via ibv_post_send with IBV_WR_LOCAL_INV opcode.
+: *mlx5dv_wr_mr_list()*
+
+ registers a memory layout based on list of ibv_sge.
+ The layout of the memory pointed by the *mkey* after its registration will be based on the list of *sge* counted by *num_sges*.
+ Post a successful registration RDMA operations can use this *mkey*, the hardware will scatter the data according to the pattern.
+ The *mkey* should be used in a zero-based mode, the *addr* field in its *ibv_sge* is an offset in the total data.
+
+ Current implementation requires the IBV_SEND_INLINE option to be on in *ibv_qp_ex->wr_flags* field.
+ To be able to have more than 4 *num_sge* entries, the QP should be created with a larger WQE size that may fit it.
+ This should be done using the *max_inline_data* attribute of *struct ibv_qp_cap* upon its creation.
+
+ In case *ibv_qp_ex->wr_flags* turns on IBV_SEND_SIGNALED, the reported WC opcode will be MLX5DV_WC_UMR.
+ Unregister the *mkey* to enable other pattern registration should be done via ibv_post_send with IBV_WR_LOCAL_INV opcode.
+
## QP Specific setters
*DCI* QPs
@@ -204,6 +204,7 @@ struct mlx5dv_dc_init_attr {
enum mlx5dv_qp_create_send_ops_flags {
MLX5DV_QP_EX_WITH_MR_INTERLEAVED = 1 << 0,
+ MLX5DV_QP_EX_WITH_MR_LIST = 1 << 1,
};
struct mlx5dv_qp_init_attr {
@@ -242,6 +243,11 @@ struct mlx5dv_qp_ex {
uint32_t repeat_count,
uint16_t num_interleaved,
struct mlx5dv_mr_interleaved *data);
+ void (*wr_mr_list)(struct mlx5dv_qp_ex *mqp,
+ struct mlx5dv_mkey *mkey,
+ uint32_t access_flags, /* use enum ibv_access_flags */
+ uint16_t num_sges,
+ struct ibv_sge *sge);
};
struct mlx5dv_qp_ex *mlx5dv_qp_ex_from_ibv_qp_ex(struct ibv_qp_ex *qp);
@@ -265,6 +271,15 @@ static inline void mlx5dv_wr_mr_interleaved(struct mlx5dv_qp_ex *mqp,
num_interleaved, data);
}
+static inline void mlx5dv_wr_mr_list(struct mlx5dv_qp_ex *mqp,
+ struct mlx5dv_mkey *mkey,
+ uint32_t access_flags,
+ uint16_t num_sges,
+ struct ibv_sge *sge)
+{
+ mqp->wr_mr_list(mqp, mkey, access_flags, num_sges, sge);
+}
+
enum mlx5dv_flow_action_esp_mask {
MLX5DV_FLOW_ACTION_ESP_MASK_FLAGS = 1 << 0,
};
@@ -2059,6 +2059,40 @@ static uint8_t get_umr_mr_flags(uint32_t acc)
MLX5_WQE_MKEY_CONTEXT_ACCESS_FLAGS_LOCAL_WRITE : 0));
}
+static int umr_sg_list_create(struct mlx5_qp *qp,
+ uint16_t num_sges,
+ struct ibv_sge *sge,
+ void *seg,
+ void *qend, int *size, int *xlat_size,
+ uint64_t *reglen)
+{
+ struct mlx5_wqe_data_seg *dseg;
+ int byte_count = 0;
+ int i;
+ size_t tmp;
+
+ dseg = seg;
+
+ for (i = 0; i < num_sges; i++, dseg++) {
+ if (unlikely(dseg == qend))
+ dseg = mlx5_get_send_wqe(qp, 0);
+
+ dseg->addr = htobe64(sge[i].addr);
+ dseg->lkey = htobe32(sge[i].lkey);
+ dseg->byte_count = htobe32(sge[i].length);
+ byte_count += sge[i].length;
+ }
+
+ tmp = align(num_sges, 4) - num_sges;
+ memset(dseg, 0, tmp * sizeof(*dseg));
+
+ *size = align(num_sges * sizeof(*dseg), 64);
+ *reglen = byte_count;
+ *xlat_size = num_sges * sizeof(*dseg);
+
+ return 0;
+}
+
/* The strided block format is as the following:
* | repeat_block | entry_block | entry_block |...| entry_block |
* While the repeat entry contains details on the list of the block_entries.
@@ -2109,12 +2143,13 @@ static void umr_strided_seg_create(struct mlx5_qp *qp,
*xlat_size = (num_interleaved + 1) * sizeof(*eb);
}
-static void mlx5_send_wr_mr_interleaved(struct mlx5dv_qp_ex *dv_qp,
- struct mlx5dv_mkey *dv_mkey,
- uint32_t access_flags,
- uint32_t repeat_count,
- uint16_t num_interleaved,
- struct mlx5dv_mr_interleaved *data)
+static void mlx5_send_wr_mr(struct mlx5dv_qp_ex *dv_qp,
+ struct mlx5dv_mkey *dv_mkey,
+ uint32_t access_flags,
+ uint32_t repeat_count,
+ uint16_t num_entries,
+ struct mlx5dv_mr_interleaved *data,
+ struct ibv_sge *sge)
{
struct mlx5_qp *mqp = mqp_from_mlx5dv_qp_ex(dv_qp);
struct ibv_qp_ex *ibqp = &mqp->verbs_qp.qp_ex;
@@ -2134,12 +2169,17 @@ static void mlx5_send_wr_mr_interleaved(struct mlx5dv_qp_ex *dv_qp,
return;
}
- max_entries = min_t(size_t,
- (mqp->max_inline_data + sizeof(struct mlx5_wqe_inl_data_seg)) /
- sizeof(struct mlx5_wqe_umr_repeat_ent_seg) - 1,
- mkey->num_desc);
-
- if (unlikely(num_interleaved > max_entries)) {
+ max_entries = data ?
+ min_t(size_t,
+ (mqp->max_inline_data + sizeof(struct mlx5_wqe_inl_data_seg)) /
+ sizeof(struct mlx5_wqe_umr_repeat_ent_seg) - 1,
+ mkey->num_desc) :
+ min_t(size_t,
+ (mqp->max_inline_data + sizeof(struct mlx5_wqe_inl_data_seg)) /
+ sizeof(struct mlx5_wqe_data_seg),
+ mkey->num_desc);
+
+ if (unlikely(num_entries > max_entries)) {
mqp->err = ENOMEM;
return;
}
@@ -2184,8 +2224,13 @@ static void mlx5_send_wr_mr_interleaved(struct mlx5dv_qp_ex *dv_qp,
if (unlikely(seg == qend))
seg = mlx5_get_send_wqe(mqp, 0);
- umr_strided_seg_create(mqp, repeat_count, num_interleaved, data,
- seg, qend, &size, &xlat_size, ®len);
+ if (data)
+ umr_strided_seg_create(mqp, repeat_count, num_entries, data,
+ seg, qend, &size, &xlat_size, ®len);
+ else
+ umr_sg_list_create(mqp, num_entries, sge, seg,
+ qend, &size, &xlat_size, ®len);
+
mk->len = htobe64(reglen);
umr_ctrl_seg->klm_octowords = htobe16(align(xlat_size, 64) / 16);
mqp->cur_size += size / 16;
@@ -2197,6 +2242,26 @@ static void mlx5_send_wr_mr_interleaved(struct mlx5dv_qp_ex *dv_qp,
_common_wqe_finilize(mqp);
}
+static void mlx5_send_wr_mr_interleaved(struct mlx5dv_qp_ex *dv_qp,
+ struct mlx5dv_mkey *mkey,
+ uint32_t access_flags,
+ uint32_t repeat_count,
+ uint16_t num_interleaved,
+ struct mlx5dv_mr_interleaved *data)
+{
+ mlx5_send_wr_mr(dv_qp, mkey, access_flags, repeat_count,
+ num_interleaved, data, NULL);
+}
+
+static inline void mlx5_send_wr_mr_list(struct mlx5dv_qp_ex *dv_qp,
+ struct mlx5dv_mkey *mkey,
+ uint32_t access_flags,
+ uint16_t num_sges,
+ struct ibv_sge *sge)
+{
+ mlx5_send_wr_mr(dv_qp, mkey, access_flags, 0, num_sges, NULL, sge);
+}
+
static void mlx5_send_wr_set_dc_addr(struct mlx5dv_qp_ex *dv_qp,
struct ibv_ah *ah,
uint32_t remote_dctn,
@@ -2343,11 +2408,13 @@ int mlx5_qp_fill_wr_pfns(struct mlx5_qp *mqp,
if (mlx5_ops) {
if (!check_comp_mask(mlx5_ops,
- MLX5DV_QP_EX_WITH_MR_INTERLEAVED))
+ MLX5DV_QP_EX_WITH_MR_INTERLEAVED |
+ MLX5DV_QP_EX_WITH_MR_LIST))
return EOPNOTSUPP;
dv_qp = &mqp->dv_qp;
dv_qp->wr_mr_interleaved = mlx5_send_wr_mr_interleaved;
+ dv_qp->wr_mr_list = mlx5_send_wr_mr_list;
}
break;
@@ -1153,7 +1153,8 @@ static int _sq_overhead(struct mlx5_qp *qp,
sizeof(struct mlx5_wqe_atomic_seg);
if (ops & (IBV_QP_EX_WITH_BIND_MW | IBV_QP_EX_WITH_LOCAL_INV) ||
- (mlx5_ops & MLX5DV_QP_EX_WITH_MR_INTERLEAVED))
+ (mlx5_ops & (MLX5DV_QP_EX_WITH_MR_INTERLEAVED |
+ MLX5DV_QP_EX_WITH_MR_LIST)))
mw_size = sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_umr_ctrl_seg) +
sizeof(struct mlx5_wqe_mkey_context_seg) +