diff mbox

[rdma-core,09/10] verbs: Tag matching receive interface

Message ID 1508764681-4531-10-git-send-email-yishaih@mellanox.com (mailing list archive)
State Accepted
Headers show

Commit Message

Yishai Hadas Oct. 23, 2017, 1:18 p.m. UTC
From: Artemy Kovalyov <artemyko@mellanox.com>

It includes:
- Extending enum ibv_wc_status and enum ibv_wc_flags to hold tag
  matching information.
- Adding new read function ibv_wc_read_tm_info() to read
  additional TM related information from the CQE.

Detailed description of the TM receiver mechanism was added to
Documentation/tag_matching.md.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
---
 Documentation/tag_matching.md     | 41 +++++++++++++++++++++++++++++++++++++++
 libibverbs/enum_strs.c            |  3 ++-
 libibverbs/man/ibv_create_cq_ex.3 | 11 +++++++++++
 libibverbs/verbs.h                | 22 ++++++++++++++++++++-
 4 files changed, 75 insertions(+), 2 deletions(-)
diff mbox

Patch

diff --git a/Documentation/tag_matching.md b/Documentation/tag_matching.md
index 23d6532..81d4dd8 100644
--- a/Documentation/tag_matching.md
+++ b/Documentation/tag_matching.md
@@ -298,3 +298,44 @@  Tag-manipulation operations generate the following completion opcodes:
 These completions are complemented by the **IBV_WC_TM_SYNC_REQ** flag, which
 indicates whether further HW synchronization is needed.
 
+TM receive completions generate the following completion codes:
+* **IBV_WC_RECV** - standard SRQ completion; used for unexpected messages
+* **IBV_WC_TM_NO_TAG** - completion of a message sent with the
+  **IBV_TM_NO_TAG** opcode.
+* **IBV_WC_TM_RECV** - completion of a tag-matching operation
+
+The **IBV_WC_TM_RECV** completion is complemented by the following completion
+flags:
+-	**IBV_WC_TM_MATCH** - a match was performed
+-	**IBV_WC_TM_DATA_VALID** - all data of the matched message has been
+	delivered to memory
+
+In single-packet eager messages, both flags are set. When larger messages or
+rendezvous transfers are involved, matching and data transfer completion are
+distinct events that generate 2 completion events for the same **recv_wr_id**.
+While data transfer completions may be arbitrarily delayed depending on
+message size, matching completion is reported immediately and is always
+serialized with respect to other matches and the completion of unexpected
+messages.
+
+In addition, **IBV_WC_TM_RECV** completions provide further information about
+the matched message. This information is obtained using extended CQ processing
+via the following extractor function:
+
+```h
+static inline void ibv_wc_read_tm_info(struct ibv_cq_ex *cq,
+                                       struct ibv_wc_tm_info *tm_info);
+```
+```h
+struct ibv_wc_tm_info {
+	  uint64_t		  tag;	   /* tag from TMH */
+	  uint32_t		  priv;    /* opaque user data from TMH */
+};
+```
+
+Finally, when a posted tagged buffer is insufficient to hold the data of a
+rendezvous request, the HW completes the buffer with an
+IBV_WC_TM_RNDV_INCOMPLETE status. In this case, the TMH and RVH headers are
+scattered into the tagged buffer (tag-matching has still been completed!), and
+message handling is resumed by SW.
+
diff --git a/libibverbs/enum_strs.c b/libibverbs/enum_strs.c
index 93f2c56..c6f6e0f 100644
--- a/libibverbs/enum_strs.c
+++ b/libibverbs/enum_strs.c
@@ -123,9 +123,10 @@  const char *ibv_wc_status_str(enum ibv_wc_status status)
 		[IBV_WC_RESP_TIMEOUT_ERR]	= "response timeout error",
 		[IBV_WC_GENERAL_ERR]		= "general error",
 		[IBV_WC_TM_ERR]			= "TM error",
+		[IBV_WC_TM_RNDV_INCOMPLETE]     = "TM software rendezvous",
 	};
 
-	if (status < IBV_WC_SUCCESS || status > IBV_WC_TM_ERR)
+	if (status < IBV_WC_SUCCESS || status > IBV_WC_TM_RNDV_INCOMPLETE)
 		return "unknown";
 
 	return wc_status_str[status];
diff --git a/libibverbs/man/ibv_create_cq_ex.3 b/libibverbs/man/ibv_create_cq_ex.3
index e943e0e..23f867c 100644
--- a/libibverbs/man/ibv_create_cq_ex.3
+++ b/libibverbs/man/ibv_create_cq_ex.3
@@ -140,6 +140,17 @@  Below members and functions are used in order to poll the current completion. Th
 .BI "uint32_t ibv_wc_read_flow_tag(struct ibv_cq_ex " "*cq"); \c
  Get flow tag from the current completion.
 
+.BI "void ibv_wc_read_tm_info(struct ibv_cq_ex " *cq ","
+.BI "struct ibv_wc_tm_info " *tm_info "); \c
+ Get tag matching info from the current completion.
+.nf
+struct ibv_wc_tm_info {
+.in +8
+uint64_t tag;  /* tag from TMH */
+uint32_t priv; /* opaque user data from TMH */
+.in -8
+};
+
 .SH "RETURN VALUE"
 .B ibv_create_cq_ex()
 returns a pointer to the CQ, or NULL if the request fails.
diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
index 66f8c95..5bceb7b 100644
--- a/libibverbs/verbs.h
+++ b/libibverbs/verbs.h
@@ -429,6 +429,7 @@  enum ibv_wc_status {
 	IBV_WC_RESP_TIMEOUT_ERR,
 	IBV_WC_GENERAL_ERR,
 	IBV_WC_TM_ERR,
+	IBV_WC_TM_RNDV_INCOMPLETE,
 };
 const char *ibv_wc_status_str(enum ibv_wc_status status);
 
@@ -451,6 +452,8 @@  enum ibv_wc_opcode {
 	IBV_WC_TM_ADD,
 	IBV_WC_TM_DEL,
 	IBV_WC_TM_SYNC,
+	IBV_WC_TM_RECV,
+	IBV_WC_TM_NO_TAG,
 };
 
 enum {
@@ -468,6 +471,7 @@  enum ibv_create_cq_wc_flags {
 	IBV_WC_EX_WITH_COMPLETION_TIMESTAMP	= 1 << 7,
 	IBV_WC_EX_WITH_CVLAN		= 1 << 8,
 	IBV_WC_EX_WITH_FLOW_TAG		= 1 << 9,
+	IBV_WC_EX_WITH_TM_INFO		= 1 << 10,
 };
 
 enum {
@@ -484,7 +488,8 @@  enum {
 	IBV_CREATE_CQ_SUP_WC_FLAGS = IBV_WC_STANDARD_FLAGS |
 				IBV_WC_EX_WITH_COMPLETION_TIMESTAMP |
 				IBV_WC_EX_WITH_CVLAN |
-				IBV_WC_EX_WITH_FLOW_TAG
+				IBV_WC_EX_WITH_FLOW_TAG |
+				IBV_WC_EX_WITH_TM_INFO
 };
 
 enum ibv_wc_flags {
@@ -493,6 +498,8 @@  enum ibv_wc_flags {
 	IBV_WC_IP_CSUM_OK	= 1 << IBV_WC_IP_CSUM_OK_SHIFT,
 	IBV_WC_WITH_INV		= 1 << 3,
 	IBV_WC_TM_SYNC_REQ	= 1 << 4,
+	IBV_WC_TM_MATCH		= 1 << 5,
+	IBV_WC_TM_DATA_VALID	= 1 << 6,
 };
 
 struct ibv_wc {
@@ -1148,6 +1155,11 @@  struct ibv_poll_cq_attr {
 	uint32_t comp_mask;
 };
 
+struct ibv_wc_tm_info {
+	uint64_t		tag;	 /* tag from TMH */
+	uint32_t		priv;	 /* opaque user data from TMH */
+};
+
 struct ibv_cq_ex {
 	struct ibv_context     *context;
 	struct ibv_comp_channel *channel;
@@ -1180,6 +1192,8 @@  struct ibv_cq_ex {
 	uint64_t (*read_completion_ts)(struct ibv_cq_ex *current);
 	uint16_t (*read_cvlan)(struct ibv_cq_ex *current);
 	uint32_t (*read_flow_tag)(struct ibv_cq_ex *current);
+	void (*read_tm_info)(struct ibv_cq_ex *current,
+			     struct ibv_wc_tm_info *tm_info);
 };
 
 static inline struct ibv_cq *ibv_cq_ex_to_cq(struct ibv_cq_ex *cq)
@@ -1277,6 +1291,12 @@  static inline uint32_t ibv_wc_read_flow_tag(struct ibv_cq_ex *cq)
 	return cq->read_flow_tag(cq);
 }
 
+static inline void ibv_wc_read_tm_info(struct ibv_cq_ex *cq,
+				       struct ibv_wc_tm_info *tm_info)
+{
+	cq->read_tm_info(cq, tm_info);
+}
+
 static inline int ibv_post_wq_recv(struct ibv_wq *wq,
 				   struct ibv_recv_wr *recv_wr,
 				   struct ibv_recv_wr **bad_recv_wr)