@@ -63,11 +63,30 @@ enum {
};
enum {
+ MLX5_CQE_APP_OP_TM_CONSUMED = 0x1,
+ MLX5_CQE_APP_OP_TM_EXPECTED = 0x2,
+ MLX5_CQE_APP_OP_TM_UNEXPECTED = 0x3,
+ MLX5_CQE_APP_OP_TM_NO_TAG = 0x4,
MLX5_CQE_APP_OP_TM_APPEND = 0x5,
MLX5_CQE_APP_OP_TM_REMOVE = 0x6,
MLX5_CQE_APP_OP_TM_NOOP = 0x7,
+ MLX5_CQE_APP_OP_TM_CONSUMED_SW_RDNV = 0x9,
+ MLX5_CQE_APP_OP_TM_CONSUMED_MSG = 0xA,
+ MLX5_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV = 0xB,
+ MLX5_CQE_APP_OP_TM_MSG_COMPLETION_CANCELED = 0xC,
};
+
+/* When larger messages or rendezvous transfers are involved, matching and
+ * data transfer completion are distinct events that generate 2 completion
+ * events for the same recv_wr_id.
+ */
+static inline bool mlx5_cqe_app_op_tm_is_complete(int op)
+{
+ return op != MLX5_CQE_APP_OP_TM_CONSUMED &&
+ op != MLX5_CQE_APP_OP_TM_CONSUMED_SW_RDNV;
+}
+
enum {
MLX5_CQ_LAZY_FLAGS =
MLX5_CQ_FLAGS_RX_CSUM_VALID |
@@ -80,6 +99,10 @@ int mlx5_stall_cq_poll_max = 100000;
int mlx5_stall_cq_inc_step = 100;
int mlx5_stall_cq_dec_step = 10;
+enum {
+ MLX5_TM_MAX_SYNC_DIFF = 0x3fff
+};
+
static inline uint8_t get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe)
{
return (cqe->l4_hdr_type_etc >> 2) & 0x3;
@@ -532,10 +555,44 @@ static int handle_tag_matching(struct mlx5_cq *cq,
struct mlx5_srq *srq)
{
FILE *fp = to_mctx(srq->vsrq.srq.context)->dbg_fp;
+ struct mlx5_tag_entry *tag;
struct mlx5_srq_op *op;
+ uint16_t wqe_ctr;
cq->ibv_cq.status = IBV_WC_SUCCESS;
switch (cqe64->app_op) {
+ case MLX5_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV:
+ case MLX5_CQE_APP_OP_TM_CONSUMED_SW_RDNV:
+ case MLX5_CQE_APP_OP_TM_MSG_COMPLETION_CANCELED:
+ cq->ibv_cq.status = IBV_WC_TM_RNDV_INCOMPLETE;
+ SWITCH_FALLTHROUGH;
+
+ case MLX5_CQE_APP_OP_TM_CONSUMED_MSG:
+ case MLX5_CQE_APP_OP_TM_CONSUMED:
+ case MLX5_CQE_APP_OP_TM_EXPECTED:
+ mlx5_spin_lock(&srq->lock);
+ tag = &srq->tm_list[be16toh(cqe64->app_info)];
+ if (!tag->expect_cqe) {
+ mlx5_dbg(fp, MLX5_DBG_CQ, "got idx %d which wasn't added\n",
+ be16toh(cqe64->app_info));
+ cq->ibv_cq.status = IBV_WC_GENERAL_ERR;
+ mlx5_spin_unlock(&srq->lock);
+ return CQ_OK;
+ }
+ cq->ibv_cq.wr_id = tag->wr_id;
+ if (mlx5_cqe_app_op_tm_is_complete(cqe64->app_op))
+ mlx5_tm_release_tag(srq, tag);
+ /* inline scatter 32 not supported for TM */
+ if (cqe64->op_own & MLX5_INLINE_SCATTER_64) {
+ if (be32toh(cqe64->byte_cnt) > tag->size)
+ cq->ibv_cq.status = IBV_WC_LOC_LEN_ERR;
+ else
+ memcpy(tag->ptr, cqe64 - 1,
+ be32toh(cqe64->byte_cnt));
+ }
+ mlx5_spin_unlock(&srq->lock);
+ break;
+
case MLX5_CQE_APP_OP_TM_REMOVE:
if (!(be32toh(cqe64->tm_cqe.success) & MLX5_TMC_SUCCESS))
cq->ibv_cq.status = IBV_WC_TM_ERR;
@@ -575,6 +632,24 @@ static int handle_tag_matching(struct mlx5_cq *cq,
mlx5_spin_unlock(&srq->lock);
break;
+
+ case MLX5_CQE_APP_OP_TM_UNEXPECTED:
+ srq->unexp_in++;
+ if (srq->unexp_in - srq->unexp_out > MLX5_TM_MAX_SYNC_DIFF)
+ cq->flags |= MLX5_CQ_FLAGS_TM_SYNC_REQ;
+ SWITCH_FALLTHROUGH;
+
+ case MLX5_CQE_APP_OP_TM_NO_TAG:
+ wqe_ctr = be16toh(cqe64->wqe_counter);
+ cq->ibv_cq.wr_id = srq->wrid[wqe_ctr];
+ mlx5_free_srq_wqe(srq, wqe_ctr);
+ if (cqe64->op_own & MLX5_INLINE_SCATTER_32)
+ return mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe64,
+ be32toh(cqe64->byte_cnt));
+ else if (cqe64->op_own & MLX5_INLINE_SCATTER_64)
+ return mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe64 - 1,
+ be32toh(cqe64->byte_cnt));
+ break;
#ifdef MLX5_DEBUG
default:
mlx5_dbg(fp, MLX5_DBG_CQ, "un-expected TM opcode in cqe\n");
@@ -688,13 +763,23 @@ static inline int mlx5_parse_cqe(struct mlx5_cq *cq,
if (unlikely(err))
return CQ_POLL_ERR;
- if (lazy)
- cq->ibv_cq.status = handle_responder_lazy(cq, cqe64,
- *cur_rsc,
- is_srq ? *cur_srq : NULL);
- else
+ if (lazy) {
+ if (likely(cqe64->app != MLX5_CQE_APP_TAG_MATCHING)) {
+ cq->ibv_cq.status = handle_responder_lazy
+ (cq, cqe64, *cur_rsc,
+ is_srq ? *cur_srq : NULL);
+ } else {
+ if (unlikely(!is_srq))
+ return CQ_POLL_ERR;
+
+ err = handle_tag_matching(cq, cqe64, *cur_srq);
+ if (unlikely(err))
+ return CQ_POLL_ERR;
+ }
+ } else {
wc->status = handle_responder(wc, cqe64, *cur_rsc,
is_srq ? *cur_srq : NULL);
+ }
break;
case MLX5_CQE_NO_PACKET:
@@ -1151,6 +1236,18 @@ static inline enum ibv_wc_opcode mlx5_cq_read_wc_opcode(struct ibv_cq_ex *ibcq)
case MLX5_CQE_RESP_SEND:
case MLX5_CQE_RESP_SEND_IMM:
case MLX5_CQE_RESP_SEND_INV:
+ if (unlikely(cq->cqe64->app == MLX5_CQE_APP_TAG_MATCHING)) {
+ switch (cq->cqe64->app_op) {
+ case MLX5_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV:
+ case MLX5_CQE_APP_OP_TM_CONSUMED_MSG:
+ case MLX5_CQE_APP_OP_TM_CONSUMED_SW_RDNV:
+ case MLX5_CQE_APP_OP_TM_EXPECTED:
+ case MLX5_CQE_APP_OP_TM_UNEXPECTED:
+ return IBV_WC_TM_RECV;
+ case MLX5_CQE_APP_OP_TM_NO_TAG:
+ return IBV_WC_TM_NO_TAG;
+ }
+ }
return IBV_WC_RECV;
case MLX5_CQE_NO_PACKET:
switch (cq->cqe64->app_op) {
@@ -1160,6 +1257,8 @@ static inline enum ibv_wc_opcode mlx5_cq_read_wc_opcode(struct ibv_cq_ex *ibcq)
return IBV_WC_TM_ADD;
case MLX5_CQE_APP_OP_TM_NOOP:
return IBV_WC_TM_SYNC;
+ case MLX5_CQE_APP_OP_TM_CONSUMED:
+ return IBV_WC_TM_RECV;
}
break;
case MLX5_CQE_REQ:
@@ -1222,6 +1321,24 @@ static inline int mlx5_cq_read_wc_flags(struct ibv_cq_ex *ibcq)
if (cq->flags & MLX5_CQ_FLAGS_TM_SYNC_REQ)
wc_flags |= IBV_WC_TM_SYNC_REQ;
+ if (unlikely(cq->cqe64->app == MLX5_CQE_APP_TAG_MATCHING)) {
+ switch (cq->cqe64->app_op) {
+ case MLX5_CQE_APP_OP_TM_CONSUMED_MSG_SW_RDNV:
+ case MLX5_CQE_APP_OP_TM_CONSUMED_MSG:
+ case MLX5_CQE_APP_OP_TM_MSG_COMPLETION_CANCELED:
+ /* Full completion */
+ wc_flags |= (IBV_WC_TM_MATCH | IBV_WC_TM_DATA_VALID);
+ break;
+ case MLX5_CQE_APP_OP_TM_CONSUMED_SW_RDNV:
+ case MLX5_CQE_APP_OP_TM_CONSUMED: /* First completion */
+ wc_flags |= IBV_WC_TM_MATCH;
+ break;
+ case MLX5_CQE_APP_OP_TM_EXPECTED: /* Second completion */
+ wc_flags |= IBV_WC_TM_DATA_VALID;
+ break;
+ }
+ }
+
wc_flags |= ((be32toh(cq->cqe64->flags_rqpn) >> 28) & 3) ? IBV_WC_GRH : 0;
return wc_flags;
}
@@ -1305,6 +1422,15 @@ static inline uint32_t mlx5_cq_read_flow_tag(struct ibv_cq_ex *ibcq)
return be32toh(cq->cqe64->sop_drop_qpn) & MLX5_FLOW_TAG_MASK;
}
+static inline void mlx5_cq_read_wc_tm_info(struct ibv_cq_ex *ibcq,
+ struct ibv_wc_tm_info *tm_info)
+{
+ struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
+
+ tm_info->tag = be64toh(cq->cqe64->tmh.tag);
+ tm_info->priv = be32toh(cq->cqe64->tmh.app_ctx);
+}
+
#define BIT(i) (1UL << (i))
#define SINGLE_THREADED BIT(0)
@@ -1381,6 +1507,8 @@ void mlx5_cq_fill_pfns(struct mlx5_cq *cq, const struct ibv_cq_init_attr_ex *cq_
cq->ibv_cq.read_cvlan = mlx5_cq_read_wc_cvlan;
if (cq_attr->wc_flags & IBV_WC_EX_WITH_FLOW_TAG)
cq->ibv_cq.read_flow_tag = mlx5_cq_read_flow_tag;
+ if (cq_attr->wc_flags & IBV_WC_EX_WITH_TM_INFO)
+ cq->ibv_cq.read_tm_info = mlx5_cq_read_wc_tm_info;
}
int mlx5_arm_cq(struct ibv_cq *ibvcq, int solicited)
@@ -43,6 +43,7 @@
#endif /* defined(__SSE3__) */
#include <infiniband/verbs.h>
+#include <infiniband/tm_types.h>
/* Always inline the functions */
#ifdef __GNUC__
@@ -311,6 +312,8 @@ struct mlx5_cqe64 {
__be16 vlan_info;
};
struct mlx5_tm_cqe tm_cqe;
+ /* TMH is scattered to CQE upon match */
+ struct ibv_tmh tmh;
};
__be32 srqn_uidx;
__be32 imm_inval_pkey;
@@ -330,7 +330,8 @@ enum {
CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS |
IBV_WC_EX_WITH_COMPLETION_TIMESTAMP |
IBV_WC_EX_WITH_CVLAN |
- IBV_WC_EX_WITH_FLOW_TAG
+ IBV_WC_EX_WITH_FLOW_TAG |
+ IBV_WC_EX_WITH_TM_INFO
};
enum {