diff mbox series

[rdma-core,3/4] mlx5: Introduce mlx5dv_wr_mr_interleaved post send builder

Message ID 1553525151-14005-4-git-send-email-yishaih@mellanox.com (mailing list archive)
State Not Applicable
Headers show
Series mlx5: Add UMR builders over the DV API | expand

Commit Message

Yishai Hadas March 25, 2019, 2:45 p.m. UTC
Introduce mlx5dv_wr_mr_interleaved() post send builder to be used for
issuing a WR that may register an interleaved memory layout.

Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
---
 providers/mlx5/man/mlx5dv_create_qp.3.md |   9 ++
 providers/mlx5/man/mlx5dv_wr_post.3.md   |  35 ++++++
 providers/mlx5/mlx5dv.h                  |  50 +++++++++
 providers/mlx5/qp.c                      | 178 ++++++++++++++++++++++++++++++-
 providers/mlx5/verbs.c                   |  37 +++++--
 5 files changed, 293 insertions(+), 16 deletions(-)
diff mbox series

Patch

diff --git a/providers/mlx5/man/mlx5dv_create_qp.3.md b/providers/mlx5/man/mlx5dv_create_qp.3.md
index 7a93e84..74a2193 100644
--- a/providers/mlx5/man/mlx5dv_create_qp.3.md
+++ b/providers/mlx5/man/mlx5dv_create_qp.3.md
@@ -38,6 +38,7 @@  struct mlx5dv_qp_init_attr {
 	uint64_t comp_mask;
 	uint32_t create_flags;
 	struct mlx5dv_dc_init_attr  dc_init_attr;
+	uint64_t send_ops_flags;
 };
 ```
 
@@ -47,6 +48,8 @@  struct mlx5dv_qp_init_attr {
 		valid values in *create_flags*
 	MLX5DV_QP_INIT_ATTR_MASK_DC:
 		valid values in *dc_init_attr*
+	MLX5DV_QP_INIT_ATTR_MASK_SEND_OPS_FLAGS:
+		valid values in *send_ops_flags*
 
 *create_flags*
 :	A bitwise OR of the various values described below.
@@ -95,6 +98,12 @@  struct mlx5dv_dc_init_attr {
 :	used to create a DCT QP.
 
 
+*send_ops_flags*
+:	A bitwise OR of the various values described below.
+
+	MLX5DV_QP_EX_WITH_MR_INTERLEAVED:
+		Enables the mlx5dv_wr_mr_interleaved() work requset on this QP.
+
 # NOTES
 
 **mlx5dv_qp_ex_from_ibv_qp_ex()** is used to get *struct mlx5dv_qp_ex* for
diff --git a/providers/mlx5/man/mlx5dv_wr_post.3.md b/providers/mlx5/man/mlx5dv_wr_post.3.md
index 2c17627..42e680c 100644
--- a/providers/mlx5/man/mlx5dv_wr_post.3.md
+++ b/providers/mlx5/man/mlx5dv_wr_post.3.md
@@ -22,6 +22,20 @@  static inline void mlx5dv_wr_set_dc_addr(struct mlx5dv_qp_ex *mqp,
                                          struct ibv_ah *ah,
                                          uint32_t remote_dctn,
                                          uint64_t remote_dc_key);
+
+struct mlx5dv_mr_interleaved {
+	uint64_t        addr;
+	uint32_t        bytes_count;
+	uint32_t        bytes_skip;
+	uint32_t        lkey;
+};
+
+static inline void mlx5dv_wr_mr_interleaved(struct mlx5dv_qp_ex *mqp,
+					    struct mlx5dv_mkey *mkey,
+					    uint32_t access_flags, /* use enum ibv_access_flags */
+					    uint32_t repeat_count,
+					    uint16_t num_interleaved,
+					    struct mlx5dv_mr_interleaved *data);
 ```
 
 # DESCRIPTION
@@ -45,6 +59,27 @@  features on the posted WR.
 A work request creation requires to use the ibv_qp_ex as described in the
 man for ibv_wr_post and mlx5dv_qp with its available builders and setters.
 
+## QP Specific builders
+*RC* QPs
+:   *mlx5dv_wr_mr_interleaved()*
+
+    registers an interleaved memory layout by using an indirect mkey and some interleaved data.
+    The layout of the memory pointed by the mkey after its registration will be the *data* representation for the *num_interleaved* entries.
+    This single layout representation is repeated by *repeat_count*.
+
+    The *data* as described by struct mlx5dv_mr_interleaved will hold real data defined by *bytes_count* and then a padding of *bytes_skip*.
+    Post a successful registration, RDMA operations can use this *mkey*. The hardware will scatter the data according to the pattern.
+    The *mkey* should be used in a zero-based mode. The *addr* field in its *ibv_sge* is an offset in the total data.
+
+    Current implementation requires the IBV_SEND_INLINE option to be on in *ibv_qp_ex->wr_flags* field.
+    To be able to have more than 3 *num_interleaved* entries, the QP should be created with a larger WQE size that may fit it.
+    This should be done using the *max_inline_data* attribute of *struct ibv_qp_cap* upon its creation.
+
+    As one entry will be consumed for strided header, the *mkey* should be created with one more entry than the required *num_interleaved*.
+
+    In case *ibv_qp_ex->wr_flags* turns on IBV_SEND_SIGNALED, the reported WC opcode will be MLX5DV_WC_UMR.
+    Unregister the *mkey* to enable another pattern registration should be done via ibv_post_send with IBV_WR_LOCAL_INV opcode.
+
 ## QP Specific setters
 
 *DCI* QPs
diff --git a/providers/mlx5/mlx5dv.h b/providers/mlx5/mlx5dv.h
index ce033dc..c5aae57 100644
--- a/providers/mlx5/mlx5dv.h
+++ b/providers/mlx5/mlx5dv.h
@@ -189,6 +189,7 @@  int mlx5dv_destroy_mkey(struct mlx5dv_mkey *mkey);
 enum mlx5dv_qp_init_attr_mask {
 	MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS	= 1 << 0,
 	MLX5DV_QP_INIT_ATTR_MASK_DC			= 1 << 1,
+	MLX5DV_QP_INIT_ATTR_MASK_SEND_OPS_FLAGS		= 1 << 2,
 };
 
 enum mlx5dv_dc_type {
@@ -201,16 +202,32 @@  struct mlx5dv_dc_init_attr {
 	uint64_t dct_access_key;
 };
 
+enum mlx5dv_qp_create_send_ops_flags {
+	MLX5DV_QP_EX_WITH_MR_INTERLEAVED	= 1 << 0,
+};
+
 struct mlx5dv_qp_init_attr {
 	uint64_t comp_mask;	/* Use enum mlx5dv_qp_init_attr_mask */
 	uint32_t create_flags;	/* Use enum mlx5dv_qp_create_flags */
 	struct mlx5dv_dc_init_attr  dc_init_attr;
+	uint64_t send_ops_flags; /* Use enum mlx5dv_qp_create_send_ops_flags */
 };
 
 struct ibv_qp *mlx5dv_create_qp(struct ibv_context *context,
 				struct ibv_qp_init_attr_ex *qp_attr,
 				struct mlx5dv_qp_init_attr *mlx5_qp_attr);
 
+struct mlx5dv_mr_interleaved {
+	uint64_t        addr;
+	uint32_t        bytes_count;
+	uint32_t        bytes_skip;
+	uint32_t        lkey;
+};
+
+enum mlx5dv_wc_opcode {
+	MLX5DV_WC_UMR = IBV_WC_DRIVER1,
+};
+
 struct mlx5dv_qp_ex {
 	uint64_t comp_mask;
 	/*
@@ -219,6 +236,12 @@  struct mlx5dv_qp_ex {
 	 */
 	void (*wr_set_dc_addr)(struct mlx5dv_qp_ex *mqp, struct ibv_ah *ah,
 			       uint32_t remote_dctn, uint64_t remote_dc_key);
+	void (*wr_mr_interleaved)(struct mlx5dv_qp_ex *mqp,
+				  struct mlx5dv_mkey *mkey,
+				  uint32_t access_flags, /* use enum ibv_access_flags */
+				  uint32_t repeat_count,
+				  uint16_t num_interleaved,
+				  struct mlx5dv_mr_interleaved *data);
 };
 
 struct mlx5dv_qp_ex *mlx5dv_qp_ex_from_ibv_qp_ex(struct ibv_qp_ex *qp);
@@ -231,6 +254,17 @@  static inline void mlx5dv_wr_set_dc_addr(struct mlx5dv_qp_ex *mqp,
 	mqp->wr_set_dc_addr(mqp, ah, remote_dctn, remote_dc_key);
 }
 
+static inline void mlx5dv_wr_mr_interleaved(struct mlx5dv_qp_ex *mqp,
+					    struct mlx5dv_mkey *mkey,
+					    uint32_t access_flags,
+					    uint32_t repeat_count,
+					    uint16_t num_interleaved,
+					    struct mlx5dv_mr_interleaved *data)
+{
+	mqp->wr_mr_interleaved(mqp, mkey, access_flags, repeat_count,
+			       num_interleaved, data);
+}
+
 enum mlx5dv_flow_action_esp_mask {
 	MLX5DV_FLOW_ACTION_ESP_MASK_FLAGS	= 1 << 0,
 };
@@ -843,6 +877,22 @@  union mlx5_wqe_umr_inline_seg {
 	struct mlx5_wqe_umr_klm_seg	klm;
 };
 
+struct mlx5_wqe_umr_repeat_ent_seg {
+	__be16		stride;
+	__be16		byte_count;
+	__be32		memkey;
+	__be64		va;
+};
+
+struct mlx5_wqe_umr_repeat_block_seg {
+	__be32		byte_count;
+	__be32		op;
+	__be32		repeat_count;
+	__be16		reserved;
+	__be16		num_ent;
+	struct mlx5_wqe_umr_repeat_ent_seg entries[0];
+};
+
 enum {
 	MLX5_WQE_MKEY_CONTEXT_FREE = 1 << 6
 };
diff --git a/providers/mlx5/qp.c b/providers/mlx5/qp.c
index b2f749c..ecfe844 100644
--- a/providers/mlx5/qp.c
+++ b/providers/mlx5/qp.c
@@ -57,6 +57,7 @@  static const uint32_t mlx5_ib_opcode[] = {
 	[IBV_WR_BIND_MW]		= MLX5_OPCODE_UMR,
 	[IBV_WR_LOCAL_INV]		= MLX5_OPCODE_UMR,
 	[IBV_WR_TSO]			= MLX5_OPCODE_TSO,
+	[IBV_WR_DRIVER1]		= MLX5_OPCODE_UMR,
 };
 
 static void *get_recv_wqe(struct mlx5_qp *qp, int n)
@@ -1245,6 +1246,8 @@  static inline void _common_wqe_init(struct ibv_qp_ex *ibqp,
 		mqp->sq.wr_data[idx] = IBV_WC_BIND_MW;
 	else if (ib_op == IBV_WR_LOCAL_INV)
 		mqp->sq.wr_data[idx] = IBV_WC_LOCAL_INV;
+	else if (ib_op == IBV_WR_DRIVER1)
+		mqp->sq.wr_data[idx] = IBV_WC_DRIVER1;
 
 	ctrl = mlx5_get_send_wqe(mqp, idx);
 	*(uint32_t *)((void *)ctrl + 8) = 0;
@@ -2044,6 +2047,156 @@  mlx5_send_wr_set_xrc_srqn(struct ibv_qp_ex *ibqp, uint32_t remote_srqn)
 		mqp->cur_setters_cnt++;
 }
 
+static uint8_t get_umr_mr_flags(uint32_t acc)
+{
+	return ((acc & IBV_ACCESS_REMOTE_ATOMIC ?
+		MLX5_WQE_MKEY_CONTEXT_ACCESS_FLAGS_ATOMIC : 0) |
+		(acc & IBV_ACCESS_REMOTE_WRITE ?
+		MLX5_WQE_MKEY_CONTEXT_ACCESS_FLAGS_REMOTE_WRITE : 0) |
+		(acc & IBV_ACCESS_REMOTE_READ ?
+		MLX5_WQE_MKEY_CONTEXT_ACCESS_FLAGS_REMOTE_READ  : 0) |
+		(acc & IBV_ACCESS_LOCAL_WRITE ?
+		MLX5_WQE_MKEY_CONTEXT_ACCESS_FLAGS_LOCAL_WRITE  : 0));
+}
+
+/* The strided block format is as the following:
+ * | repeat_block | entry_block | entry_block |...| entry_block |
+ * While the repeat entry contains details on the list of the block_entries.
+ */
+static void umr_strided_seg_create(struct mlx5_qp *qp,
+				   uint32_t repeat_count,
+				   uint16_t num_interleaved,
+				   struct mlx5dv_mr_interleaved *data,
+				   void *seg,
+				   void *qend, int *wqe_size, int *xlat_size,
+				   uint64_t *reglen)
+{
+	struct mlx5_wqe_umr_repeat_block_seg *rb = seg;
+	struct mlx5_wqe_umr_repeat_ent_seg *eb;
+	int byte_count = 0;
+	int tmp;
+	int i;
+
+	rb->op = htobe32(0x400);
+	rb->reserved = 0;
+	rb->num_ent = htobe16(num_interleaved);
+	rb->repeat_count = htobe32(repeat_count);
+	eb = rb->entries;
+
+	/*
+	 * ------------------------------------------------------------
+	 * | repeat_block | entry_block | entry_block |...| entry_block
+	 * ------------------------------------------------------------
+	 */
+	for (i = 0; i < num_interleaved; i++, eb++) {
+		if (unlikely(eb == qend))
+			eb = mlx5_get_send_wqe(qp, 0);
+
+		byte_count += data[i].bytes_count;
+		eb->va = htobe64(data[i].addr);
+		eb->byte_count = htobe16(data[i].bytes_count);
+		eb->stride = htobe16(data[i].bytes_count + data[i].bytes_skip);
+		eb->memkey = htobe32(data[i].lkey);
+	}
+
+	rb->byte_count = htobe32(byte_count);
+	*reglen = byte_count * repeat_count;
+
+	tmp = align(num_interleaved + 1, 4) - num_interleaved - 1;
+	memset(eb, 0, tmp * sizeof(*eb));
+
+	*wqe_size = align(sizeof(*rb) + sizeof(*eb) * num_interleaved, 64);
+	*xlat_size = (num_interleaved + 1) * sizeof(*eb);
+}
+
+static void mlx5_send_wr_mr_interleaved(struct mlx5dv_qp_ex *dv_qp,
+					struct mlx5dv_mkey *dv_mkey,
+					uint32_t access_flags,
+					uint32_t repeat_count,
+					uint16_t num_interleaved,
+					struct mlx5dv_mr_interleaved *data)
+{
+	struct mlx5_qp *mqp = mqp_from_mlx5dv_qp_ex(dv_qp);
+	struct ibv_qp_ex *ibqp = &mqp->verbs_qp.qp_ex;
+	struct mlx5_wqe_umr_ctrl_seg *umr_ctrl_seg;
+	struct mlx5_wqe_mkey_context_seg *mk;
+	struct mlx5_mkey *mkey = container_of(dv_mkey, struct mlx5_mkey,
+					      dv_mkey);
+	int xlat_size;
+	int size;
+	uint64_t reglen = 0;
+	void *qend = mqp->sq.qend;
+	void *seg;
+	uint16_t max_entries;
+
+	if (unlikely(!(ibqp->wr_flags & IBV_SEND_INLINE))) {
+		mqp->err = EOPNOTSUPP;
+		return;
+	}
+
+	max_entries = min_t(size_t,
+			    (mqp->max_inline_data + sizeof(struct mlx5_wqe_inl_data_seg)) /
+					sizeof(struct mlx5_wqe_umr_repeat_ent_seg) - 1,
+			     mkey->num_desc);
+
+	if (unlikely(num_interleaved > max_entries)) {
+		mqp->err = ENOMEM;
+		return;
+	}
+
+	if (unlikely(!check_comp_mask(access_flags,
+				      IBV_ACCESS_LOCAL_WRITE |
+				      IBV_ACCESS_REMOTE_WRITE |
+				      IBV_ACCESS_REMOTE_READ |
+				      IBV_ACCESS_REMOTE_ATOMIC))) {
+		mqp->err = EINVAL;
+		return;
+	}
+
+	_common_wqe_init(ibqp, IBV_WR_DRIVER1);
+	mqp->cur_size = sizeof(struct mlx5_wqe_ctrl_seg) / 16;
+	mqp->cur_ctrl->imm = htobe32(dv_mkey->lkey);
+	seg = umr_ctrl_seg = (void *)mqp->cur_ctrl + sizeof(struct mlx5_wqe_ctrl_seg);
+
+	memset(umr_ctrl_seg, 0, sizeof(*umr_ctrl_seg));
+	umr_ctrl_seg->flags = MLX5_WQE_UMR_CTRL_FLAG_INLINE;
+	umr_ctrl_seg->mkey_mask = htobe64(MLX5_WQE_UMR_CTRL_MKEY_MASK_LEN	|
+		MLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_LOCAL_WRITE	|
+		MLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_REMOTE_READ	|
+		MLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_REMOTE_WRITE	|
+		MLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_ATOMIC	|
+		MLX5_WQE_UMR_CTRL_MKEY_MASK_FREE);
+
+	seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+	mqp->cur_size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+
+	if (unlikely(seg == qend))
+		seg = mlx5_get_send_wqe(mqp, 0);
+
+	mk = seg;
+	memset(mk, 0, sizeof(*mk));
+	mk->access_flags = get_umr_mr_flags(access_flags);
+	mk->qpn_mkey = htobe32(0xffffff00 | (dv_mkey->lkey & 0xff));
+
+	seg += sizeof(*mk);
+	mqp->cur_size += (sizeof(*mk) / 16);
+
+	if (unlikely(seg == qend))
+		seg = mlx5_get_send_wqe(mqp, 0);
+
+	umr_strided_seg_create(mqp, repeat_count, num_interleaved, data,
+			       seg, qend, &size, &xlat_size, &reglen);
+	mk->len = htobe64(reglen);
+	umr_ctrl_seg->klm_octowords = htobe16(align(xlat_size, 64) / 16);
+	mqp->cur_size += size / 16;
+
+	mqp->fm_cache = MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE;
+	mqp->nreq++;
+	mqp->inl_wqe = 1;
+
+	_common_wqe_finilize(mqp);
+}
+
 static void mlx5_send_wr_set_dc_addr(struct mlx5dv_qp_ex *dv_qp,
 				     struct ibv_ah *ah,
 				     uint32_t remote_dctn,
@@ -2164,6 +2317,7 @@  int mlx5_qp_fill_wr_pfns(struct mlx5_qp *mqp,
 	struct ibv_qp_ex *ibqp = &mqp->verbs_qp.qp_ex;
 	uint64_t ops = attr->send_ops_flags;
 	struct mlx5dv_qp_ex *dv_qp;
+	uint64_t mlx5_ops = 0;
 
 	ibqp->wr_start = mlx5_send_wr_start;
 	ibqp->wr_complete = mlx5_send_wr_complete;
@@ -2174,6 +2328,10 @@  int mlx5_qp_fill_wr_pfns(struct mlx5_qp *mqp,
 	     ops & IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD))
 		return EOPNOTSUPP;
 
+	if (mlx5_attr &&
+	    mlx5_attr->comp_mask & MLX5DV_QP_INIT_ATTR_MASK_SEND_OPS_FLAGS)
+		mlx5_ops = mlx5_attr->send_ops_flags;
+
 	/* Set all supported micro-functions regardless user request */
 	switch (attr->qp_type) {
 	case IBV_QPT_RC:
@@ -2182,10 +2340,20 @@  int mlx5_qp_fill_wr_pfns(struct mlx5_qp *mqp,
 
 		fill_wr_builders_rc_xrc_dc(ibqp);
 		fill_wr_setters_rc_uc(ibqp);
+
+		if (mlx5_ops) {
+			if (!check_comp_mask(mlx5_ops,
+					     MLX5DV_QP_EX_WITH_MR_INTERLEAVED))
+				return EOPNOTSUPP;
+
+			dv_qp = &mqp->dv_qp;
+			dv_qp->wr_mr_interleaved = mlx5_send_wr_mr_interleaved;
+		}
+
 		break;
 
 	case IBV_QPT_UC:
-		if (ops & ~MLX5_SUPPORTED_SEND_OPS_FLAGS_UC)
+		if (ops & ~MLX5_SUPPORTED_SEND_OPS_FLAGS_UC || mlx5_ops)
 			return EOPNOTSUPP;
 
 		fill_wr_builders_uc(ibqp);
@@ -2193,7 +2361,7 @@  int mlx5_qp_fill_wr_pfns(struct mlx5_qp *mqp,
 		break;
 
 	case IBV_QPT_XRC_SEND:
-		if (ops & ~MLX5_SUPPORTED_SEND_OPS_FLAGS_XRC)
+		if (ops & ~MLX5_SUPPORTED_SEND_OPS_FLAGS_XRC || mlx5_ops)
 			return EOPNOTSUPP;
 
 		fill_wr_builders_rc_xrc_dc(ibqp);
@@ -2202,7 +2370,7 @@  int mlx5_qp_fill_wr_pfns(struct mlx5_qp *mqp,
 		break;
 
 	case IBV_QPT_UD:
-		if (ops & ~MLX5_SUPPORTED_SEND_OPS_FLAGS_UD)
+		if (ops & ~MLX5_SUPPORTED_SEND_OPS_FLAGS_UD || mlx5_ops)
 			return EOPNOTSUPP;
 
 		if (mqp->flags & MLX5_QP_FLAGS_USE_UNDERLAY)
@@ -2214,7 +2382,7 @@  int mlx5_qp_fill_wr_pfns(struct mlx5_qp *mqp,
 		break;
 
 	case IBV_QPT_RAW_PACKET:
-		if (ops & ~MLX5_SUPPORTED_SEND_OPS_FLAGS_RAW_PACKET)
+		if (ops & ~MLX5_SUPPORTED_SEND_OPS_FLAGS_RAW_PACKET || mlx5_ops)
 			return EOPNOTSUPP;
 
 		fill_wr_builders_eth(ibqp);
@@ -2228,7 +2396,7 @@  int mlx5_qp_fill_wr_pfns(struct mlx5_qp *mqp,
 		      mlx5_attr->dc_init_attr.dc_type == MLX5DV_DCTYPE_DCI))
 			return EOPNOTSUPP;
 
-		if (ops & ~MLX5_SUPPORTED_SEND_OPS_FLAGS_DCI)
+		if (ops & ~MLX5_SUPPORTED_SEND_OPS_FLAGS_DCI || mlx5_ops)
 			return EOPNOTSUPP;
 
 		fill_wr_builders_rc_xrc_dc(ibqp);
diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c
index 839f43c..136c0d2 100644
--- a/providers/mlx5/verbs.c
+++ b/providers/mlx5/verbs.c
@@ -1131,7 +1131,8 @@  int mlx5_destroy_srq(struct ibv_srq *srq)
 
 static int _sq_overhead(struct mlx5_qp *qp,
 			enum ibv_qp_type qp_type,
-			uint64_t ops)
+			uint64_t ops,
+			uint64_t mlx5_ops)
 {
 	size_t size = sizeof(struct mlx5_wqe_ctrl_seg);
 	size_t rdma_size = 0;
@@ -1151,7 +1152,8 @@  static int _sq_overhead(struct mlx5_qp *qp,
 			      sizeof(struct mlx5_wqe_raddr_seg) +
 			      sizeof(struct mlx5_wqe_atomic_seg);
 
-	if (ops & (IBV_QP_EX_WITH_BIND_MW | IBV_QP_EX_WITH_LOCAL_INV))
+	if (ops & (IBV_QP_EX_WITH_BIND_MW | IBV_QP_EX_WITH_LOCAL_INV) ||
+	    (mlx5_ops & MLX5DV_QP_EX_WITH_MR_INTERLEAVED))
 		mw_size = sizeof(struct mlx5_wqe_ctrl_seg) +
 			  sizeof(struct mlx5_wqe_umr_ctrl_seg) +
 			  sizeof(struct mlx5_wqe_mkey_context_seg) +
@@ -1195,9 +1197,11 @@  static int _sq_overhead(struct mlx5_qp *qp,
 	return size;
 }
 
-static int sq_overhead(struct mlx5_qp *qp, struct ibv_qp_init_attr_ex *attr)
+static int sq_overhead(struct mlx5_qp *qp, struct ibv_qp_init_attr_ex *attr,
+		       struct mlx5dv_qp_init_attr *mlx5_qp_attr)
 {
 	uint64_t ops;
+	uint64_t mlx5_ops = 0;
 
 	if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) {
 		ops = attr->send_ops_flags;
@@ -1236,11 +1240,17 @@  static int sq_overhead(struct mlx5_qp *qp, struct ibv_qp_init_attr_ex *attr)
 		}
 	}
 
-	return _sq_overhead(qp, attr->qp_type, ops);
+
+	if (mlx5_qp_attr &&
+	    mlx5_qp_attr->comp_mask & MLX5DV_QP_INIT_ATTR_MASK_SEND_OPS_FLAGS)
+		mlx5_ops = mlx5_qp_attr->send_ops_flags;
+
+	return _sq_overhead(qp, attr->qp_type, ops, mlx5_ops);
 }
 
 static int mlx5_calc_send_wqe(struct mlx5_context *ctx,
 			      struct ibv_qp_init_attr_ex *attr,
+			      struct mlx5dv_qp_init_attr *mlx5_qp_attr,
 			      struct mlx5_qp *qp)
 {
 	int size;
@@ -1248,7 +1258,7 @@  static int mlx5_calc_send_wqe(struct mlx5_context *ctx,
 	int max_gather;
 	int tot_size;
 
-	size = sq_overhead(qp, attr);
+	size = sq_overhead(qp, attr, mlx5_qp_attr);
 	if (size < 0)
 		return size;
 
@@ -1301,6 +1311,7 @@  static int mlx5_calc_rcv_wqe(struct mlx5_context *ctx,
 
 static int mlx5_calc_sq_size(struct mlx5_context *ctx,
 			     struct ibv_qp_init_attr_ex *attr,
+			     struct mlx5dv_qp_init_attr *mlx5_qp_attr,
 			     struct mlx5_qp *qp)
 {
 	int wqe_size;
@@ -1310,7 +1321,7 @@  static int mlx5_calc_sq_size(struct mlx5_context *ctx,
 	if (!attr->cap.max_send_wr)
 		return 0;
 
-	wqe_size = mlx5_calc_send_wqe(ctx, attr, qp);
+	wqe_size = mlx5_calc_send_wqe(ctx, attr, mlx5_qp_attr, qp);
 	if (wqe_size < 0) {
 		mlx5_dbg(fp, MLX5_DBG_QP, "\n");
 		return wqe_size;
@@ -1321,7 +1332,7 @@  static int mlx5_calc_sq_size(struct mlx5_context *ctx,
 		return -EINVAL;
 	}
 
-	qp->max_inline_data = wqe_size - sq_overhead(qp, attr) -
+	qp->max_inline_data = wqe_size - sq_overhead(qp, attr, mlx5_qp_attr) -
 		sizeof(struct mlx5_wqe_inl_data_seg);
 	attr->cap.max_inline_data = qp->max_inline_data;
 
@@ -1441,12 +1452,13 @@  static int mlx5_calc_rq_size(struct mlx5_context *ctx,
 
 static int mlx5_calc_wq_size(struct mlx5_context *ctx,
 			     struct ibv_qp_init_attr_ex *attr,
+			     struct mlx5dv_qp_init_attr *mlx5_qp_attr,
 			     struct mlx5_qp *qp)
 {
 	int ret;
 	int result;
 
-	ret = mlx5_calc_sq_size(ctx, attr, qp);
+	ret = mlx5_calc_sq_size(ctx, attr, mlx5_qp_attr, qp);
 	if (ret < 0)
 		return ret;
 
@@ -1677,7 +1689,8 @@  enum {
 
 enum {
 	MLX5_DV_CREATE_QP_SUP_COMP_MASK = MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS |
-					  MLX5DV_QP_INIT_ATTR_MASK_DC
+					  MLX5DV_QP_INIT_ATTR_MASK_DC |
+					  MLX5DV_QP_INIT_ATTR_MASK_SEND_OPS_FLAGS
 };
 
 enum {
@@ -1912,7 +1925,9 @@  static struct ibv_qp *create_qp(struct ibv_context *context,
 	if (ctx->atomic_cap == IBV_ATOMIC_HCA)
 		qp->atomics_enabled = 1;
 
-	if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) {
+	if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS ||
+	    (mlx5_qp_attr &&
+	     mlx5_qp_attr->comp_mask & MLX5DV_QP_INIT_ATTR_MASK_SEND_OPS_FLAGS)) {
 		/*
 		 * Scatter2cqe, which is a data-path optimization, is disabled
 		 * since driver DC data-path doesn't support it.
@@ -1939,7 +1954,7 @@  static struct ibv_qp *create_qp(struct ibv_context *context,
 	if (!scatter_to_cqe_configured && use_scatter_to_cqe())
 		cmd.flags |= MLX5_QP_FLAG_SCATTER_CQE;
 
-	ret = mlx5_calc_wq_size(ctx, attr, qp);
+	ret = mlx5_calc_wq_size(ctx, attr, mlx5_qp_attr, qp);
 	if (ret < 0) {
 		errno = -ret;
 		goto err;