diff mbox series

[3/4] Providers/rxe: Implement ibv_create_cq_ex verb

Message ID 20201106230122.17411-4-rpearson@hpe.com (mailing list archive)
State Not Applicable
Headers show
Series Provider/rxe: Implement extended verbs APIs | expand

Commit Message

Bob Pearson Nov. 6, 2020, 11:01 p.m. UTC
Together with the matching commit for the rxe driver
implement the ibv_create_cq_ex verb.
Also implement the operations in ibv_cq_ex struct.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
---
 kernel-headers/rdma/rdma_user_rxe.h |  30 ++++
 providers/rxe/rxe-abi.h             |   4 +-
 providers/rxe/rxe.c                 | 267 +++++++++++++++++++++++++++-
 providers/rxe/rxe.h                 |  12 +-
 providers/rxe/rxe_queue.h           |  59 +++++-
 5 files changed, 357 insertions(+), 15 deletions(-)

Comments

Jason Gunthorpe Nov. 12, 2020, 2:03 p.m. UTC | #1
On Fri, Nov 06, 2020 at 05:01:21PM -0600, Bob Pearson wrote:
> @@ -171,6 +197,10 @@ struct rxe_alloc_context_resp {
>  	__aligned_u64		driver_cap;
>  };
>  
> +struct rxe_create_cq_cmd {
> +	__aligned_u64 is_ex;
> +};

would be cleaer to call this 'wc_format' or something more specific;

u8 wc_format
u8 reserved[7]

Is fine

> @@ -210,15 +352,129 @@ static struct ibv_cq *rxe_create_cq(struct ibv_context *context, int cqe,
>  	cq->queue = mmap(NULL, resp.mi.size, PROT_READ | PROT_WRITE, MAP_SHARED,
>  			 context->cmd_fd, resp.mi.offset);
>  	if ((void *)cq->queue == MAP_FAILED) {
> -		ibv_cmd_destroy_cq(&cq->ibv_cq);
> +		ibv_cmd_destroy_cq(&cq->vcq.cq);
> +		free(cq);
> +		return NULL;
> +	}
> +
> +	cq->wc_size = 1ULL << cq->queue->log2_elem_size;
> +
> +	if (cq->wc_size < sizeof(struct ib_uverbs_wc)) {
> +		fprintf(stderr, "cq wc size too small %ld need %ld\n",
> +			cq->wc_size, sizeof(struct ib_uverbs_wc));

No prints like this in libraries

Seems reasonable other wise

Jason
diff mbox series

Patch

diff --git a/kernel-headers/rdma/rdma_user_rxe.h b/kernel-headers/rdma/rdma_user_rxe.h
index a31465e2..e8bde1b6 100644
--- a/kernel-headers/rdma/rdma_user_rxe.h
+++ b/kernel-headers/rdma/rdma_user_rxe.h
@@ -158,6 +158,32 @@  struct rxe_recv_wqe {
 	struct rxe_dma_info	dma;
 };
 
+struct rxe_uverbs_wc {
+	/* keep these the same as ib_uverbs_wc */
+	__aligned_u64		wr_id;
+	__u32			status;
+	__u32			opcode;
+	__u32			vendor_err;
+	__u32			byte_len;
+	union {
+		__be32		imm_data;
+		__u32		invalidate_rkey;
+	} ex;
+	__u32			qp_num;
+	__u32			src_qp;
+	__u32			wc_flags;
+	__u16			pkey_index;
+	__u16			slid;
+	__u8			sl;
+	__u8			dlid_path_bits;
+	__u8			port_num;
+	__u8			reserved;
+
+	/* any extras go here */
+	__aligned_u64		timestamp;
+	__aligned_u64		realtime;
+};
+
 enum rxe_capabilities {
 	RXE_CAP_NONE		= 0,
 	RXE_CAP_CMD_EX		= 1ULL << 0,
@@ -171,6 +197,10 @@  struct rxe_alloc_context_resp {
 	__aligned_u64		driver_cap;
 };
 
+struct rxe_create_cq_cmd {
+	__aligned_u64 is_ex;
+};
+
 struct rxe_create_cq_resp {
 	struct mminfo mi;
 };
diff --git a/providers/rxe/rxe-abi.h b/providers/rxe/rxe-abi.h
index 0b0b4b38..08bdb546 100644
--- a/providers/rxe/rxe-abi.h
+++ b/providers/rxe/rxe-abi.h
@@ -42,7 +42,9 @@ 
 DECLARE_DRV_CMD(urxe_alloc_context, IB_USER_VERBS_CMD_GET_CONTEXT,
 		rxe_alloc_context_cmd, rxe_alloc_context_resp);
 DECLARE_DRV_CMD(urxe_create_cq, IB_USER_VERBS_CMD_CREATE_CQ,
-		empty, rxe_create_cq_resp);
+		rxe_create_cq_cmd, rxe_create_cq_resp);
+DECLARE_DRV_CMD(urxe_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
+		rxe_create_cq_cmd, rxe_create_cq_resp);
 DECLARE_DRV_CMD(urxe_create_qp, IB_USER_VERBS_CMD_CREATE_QP,
 		empty, rxe_create_qp_resp);
 DECLARE_DRV_CMD(urxe_create_srq, IB_USER_VERBS_CMD_CREATE_SRQ,
diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
index b1fa2f42..57f0c500 100644
--- a/providers/rxe/rxe.c
+++ b/providers/rxe/rxe.c
@@ -187,20 +186,163 @@  static int rxe_dereg_mr(struct verbs_mr *vmr)
 	return 0;
 }
 
+static int cq_start_poll(struct ibv_cq_ex *current,
+			 struct ibv_poll_cq_attr *attr)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	pthread_spin_lock(&cq->lock);
+
+	atomic_thread_fence(memory_order_acquire);
+	cq->cur_index = load_consumer_index(cq->queue);
+
+	if (check_cq_queue_empty(cq)) {
+		pthread_spin_unlock(&cq->lock);
+		errno = ENOENT;
+		return errno;
+	}
+
+	cq->wc = addr_from_index(cq->queue, cq->cur_index);
+	cq->vcq.cq_ex.status = cq->wc->status;
+	cq->vcq.cq_ex.wr_id = cq->wc->wr_id;
+
+	return 0;
+}
+
+static int cq_next_poll(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	advance_cq_cur_index(cq);
+
+	if (check_cq_queue_empty(cq)) {
+		store_consumer_index(cq->queue, cq->cur_index);
+		pthread_spin_unlock(&cq->lock);
+		errno = ENOENT;
+		return errno;
+	}
+
+	cq->wc = addr_from_index(cq->queue, cq->cur_index);
+	cq->vcq.cq_ex.status = cq->wc->status;
+	cq->vcq.cq_ex.wr_id = cq->wc->wr_id;
+
+	return 0;
+}
+
+static void cq_end_poll(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	advance_cq_cur_index(cq);
+	store_consumer_index(cq->queue, cq->cur_index);
+	pthread_spin_unlock(&cq->lock);
+
+	return;
+}
+
+static enum ibv_wc_opcode cq_read_opcode(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->opcode;
+}
+
+static uint32_t cq_read_vendor_err(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->vendor_err;
+}
+
+static uint32_t cq_read_byte_len(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->byte_len;
+}
+
+static __be32 cq_read_imm_data(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->ex.imm_data;
+}
+
+static uint32_t cq_read_qp_num(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->qp_num;
+}
+
+static uint32_t cq_read_src_qp(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->src_qp;
+}
+
+static unsigned int cq_read_wc_flags(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->wc_flags;
+}
+
+static uint32_t cq_read_slid(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->slid;
+}
+
+static uint8_t cq_read_sl(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->sl;
+}
+
+static uint8_t cq_read_dlid_path_bits(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->dlid_path_bits;
+}
+
+static uint64_t cq_read_completion_ts(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->timestamp;
+}
+
+static uint64_t cq_read_completion_wallclock_ns(struct ibv_cq_ex *current)
+{
+	struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+	return cq->wc->realtime;
+}
+
+static int rxe_destroy_cq(struct ibv_cq *ibcq);
+
 static struct ibv_cq *rxe_create_cq(struct ibv_context *context, int cqe,
 				    struct ibv_comp_channel *channel,
 				    int comp_vector)
 {
 	struct rxe_cq *cq;
-	struct urxe_create_cq_resp resp;
+	struct urxe_create_cq cmd = {};
+	struct urxe_create_cq_resp resp = {};
 	int ret;
 
 	cq = malloc(sizeof(*cq));
 	if (!cq)
 		return NULL;
 
+	cmd.is_ex = 0;
+
 	ret = ibv_cmd_create_cq(context, cqe, channel, comp_vector,
-				&cq->ibv_cq, NULL, 0,
+				&cq->vcq.cq, &cmd.ibv_cmd, sizeof(cmd),
 				&resp.ibv_resp, sizeof(resp));
 	if (ret) {
 		free(cq);
@@ -210,15 +352,129 @@  static struct ibv_cq *rxe_create_cq(struct ibv_context *context, int cqe,
 	cq->queue = mmap(NULL, resp.mi.size, PROT_READ | PROT_WRITE, MAP_SHARED,
 			 context->cmd_fd, resp.mi.offset);
 	if ((void *)cq->queue == MAP_FAILED) {
-		ibv_cmd_destroy_cq(&cq->ibv_cq);
+		ibv_cmd_destroy_cq(&cq->vcq.cq);
+		free(cq);
+		return NULL;
+	}
+
+	cq->wc_size = 1ULL << cq->queue->log2_elem_size;
+
+	if (cq->wc_size < sizeof(struct ib_uverbs_wc)) {
+		fprintf(stderr, "cq wc size too small %ld need %ld\n",
+			cq->wc_size, sizeof(struct ib_uverbs_wc));
+		rxe_destroy_cq(&cq->vcq.cq);
+		return NULL;
+	}
+
+	cq->mmap_info = resp.mi;
+	pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
+
+	return &cq->vcq.cq;
+}
+
+enum rxe_sup_wc_flags {
+	RXE_SUP_WC_FLAGS = IBV_WC_EX_WITH_BYTE_LEN
+			 | IBV_WC_EX_WITH_IMM
+			 | IBV_WC_EX_WITH_QP_NUM
+			 | IBV_WC_EX_WITH_SRC_QP
+			 | IBV_WC_EX_WITH_SLID
+			 | IBV_WC_EX_WITH_SL
+			 | IBV_WC_EX_WITH_DLID_PATH_BITS
+			 | IBV_WC_EX_WITH_COMPLETION_TIMESTAMP
+			 | IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK,
+};
+
+static struct ibv_cq_ex *rxe_create_cq_ex(struct ibv_context *context,
+					  struct ibv_cq_init_attr_ex *attr)
+{
+	int ret;
+	struct rxe_cq *cq;
+	struct urxe_create_cq_ex cmd = {};
+	struct urxe_create_cq_ex_resp resp = {};
+
+	if (attr->wc_flags & ~RXE_SUP_WC_FLAGS) {
+		errno = EOPNOTSUPP;
+		return NULL;
+	}
+
+	cq = calloc(1, sizeof(*cq));
+	if (!cq)
+		return NULL;
+
+	cmd.is_ex = 1;
+
+	ret = ibv_cmd_create_cq_ex(context, attr, &cq->vcq,
+				   &cmd.ibv_cmd, sizeof(cmd),
+				   &resp.ibv_resp, sizeof(resp));
+	if (ret) {
+		free(cq);
+		return NULL;
+	}
+
+	cq->queue = mmap(NULL, resp.mi.size, PROT_READ | PROT_WRITE, MAP_SHARED,
+			 context->cmd_fd, resp.mi.offset);
+	if ((void *)cq->queue == MAP_FAILED) {
+		ibv_cmd_destroy_cq(&cq->vcq.cq);
 		free(cq);
 		return NULL;
 	}
 
+	cq->wc_size = 1ULL << cq->queue->log2_elem_size;
+
+	if (cq->wc_size < sizeof(struct rxe_uverbs_wc)) {
+		fprintf(stderr, "cq wc size too small %ld need %ld\n",
+			cq->wc_size, sizeof(struct rxe_uverbs_wc));
+		rxe_destroy_cq(&cq->vcq.cq);
+		return NULL;
+	}
+
 	cq->mmap_info = resp.mi;
 	pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
 
-	return &cq->ibv_cq;
+	cq->vcq.cq_ex.start_poll	= cq_start_poll;
+	cq->vcq.cq_ex.next_poll		= cq_next_poll;
+	cq->vcq.cq_ex.end_poll		= cq_end_poll;
+	cq->vcq.cq_ex.read_opcode	= cq_read_opcode;
+	cq->vcq.cq_ex.read_vendor_err	= cq_read_vendor_err;
+	cq->vcq.cq_ex.read_wc_flags	= cq_read_wc_flags;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN)
+		cq->vcq.cq_ex.read_byte_len
+			= cq_read_byte_len;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_IMM)
+		cq->vcq.cq_ex.read_imm_data
+			= cq_read_imm_data;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_QP_NUM)
+		cq->vcq.cq_ex.read_qp_num
+			= cq_read_qp_num;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_SRC_QP)
+		cq->vcq.cq_ex.read_src_qp
+			= cq_read_src_qp;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_SLID)
+		cq->vcq.cq_ex.read_slid
+			= cq_read_slid;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_SL)
+		cq->vcq.cq_ex.read_sl
+			= cq_read_sl;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS)
+		cq->vcq.cq_ex.read_dlid_path_bits
+			= cq_read_dlid_path_bits;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP)
+		cq->vcq.cq_ex.read_completion_ts
+			= cq_read_completion_ts;
+
+	if (attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK)
+		cq->vcq.cq_ex.read_completion_wallclock_ns
+			= cq_read_completion_wallclock_ns;
+
+	return &cq->vcq.cq_ex;
 }
 
 static int rxe_resize_cq(struct ibv_cq *ibcq, int cqe)
@@ -890,6 +1146,7 @@  static const struct verbs_context_ops rxe_ctx_ops = {
 
 static const struct verbs_context_ops rxe_ctx_ops_cmd_ex = {
 	.query_device_ex = rxe_query_device_ex,
+	.create_cq_ex = rxe_create_cq_ex,
 };
 
 static struct verbs_context *rxe_alloc_context(struct ibv_device *ibdev,
diff --git a/providers/rxe/rxe.h b/providers/rxe/rxe.h
index f9cae315..e89a781f 100644
--- a/providers/rxe/rxe.h
+++ b/providers/rxe/rxe.h
@@ -62,11 +62,17 @@  struct rxe_context {
 	uint64_t		capabilities;
 };
 
+/* common between cq and cq_ex */
 struct rxe_cq {
-	struct ibv_cq		ibv_cq;
+	struct verbs_cq		vcq;
 	struct mminfo		mmap_info;
-	struct rxe_queue		*queue;
+	struct rxe_queue	*queue;
 	pthread_spinlock_t	lock;
+
+	/* new API support */
+	struct rxe_uverbs_wc	*wc;
+	size_t			wc_size;
+	uint32_t		cur_index;
 };
 
 struct rxe_ah {
@@ -113,7 +119,7 @@  static inline struct rxe_device *to_rdev(struct ibv_device *ibdev)
 
 static inline struct rxe_cq *to_rcq(struct ibv_cq *ibcq)
 {
-	return to_rxxx(cq, cq);
+	return container_of(ibcq, struct rxe_cq, vcq.cq);
 }
 
 static inline struct rxe_qp *to_rqp(struct ibv_qp *ibqp)
diff --git a/providers/rxe/rxe_queue.h b/providers/rxe/rxe_queue.h
index 5c57b3e3..1c3c3d5c 100644
--- a/providers/rxe/rxe_queue.h
+++ b/providers/rxe/rxe_queue.h
@@ -40,6 +40,8 @@ 
 #include <stdint.h>
 #include <stdatomic.h>
 
+#include "rxe.h"
+
 /* MUST MATCH kernel struct rxe_pqc in rxe_queue.h */
 struct rxe_queue {
 	uint32_t		log2_elem_size;
@@ -57,27 +59,27 @@  static inline int next_index(struct rxe_queue *q, int index)
 	return (index + 1) & q->index_mask;
 }
 
+/* Must hold consumer_index lock */
 static inline int queue_empty(struct rxe_queue *q)
 {
-	/* Must hold consumer_index lock */
 	return ((atomic_load(&q->producer_index) -
 		 atomic_load_explicit(&q->consumer_index,
 				      memory_order_relaxed)) &
 		q->index_mask) == 0;
 }
 
+/* Must hold producer_index lock */
 static inline int queue_full(struct rxe_queue *q)
 {
-	/* Must hold producer_index lock */
 	return ((atomic_load_explicit(&q->producer_index,
 				      memory_order_relaxed) +
 		 1 - atomic_load(&q->consumer_index)) &
 		q->index_mask) == 0;
 }
 
+/* Must hold producer_index lock */
 static inline void advance_producer(struct rxe_queue *q)
 {
-	/* Must hold producer_index lock */
 	atomic_thread_fence(memory_order_release);
 	atomic_store(
 	    &q->producer_index,
@@ -86,9 +88,9 @@  static inline void advance_producer(struct rxe_queue *q)
 		q->index_mask);
 }
 
+/* Must hold consumer_index lock */
 static inline void advance_consumer(struct rxe_queue *q)
 {
-	/* Must hold consumer_index lock */
 	atomic_store(
 	    &q->consumer_index,
 	    (atomic_load_explicit(&q->consumer_index, memory_order_relaxed) +
@@ -96,18 +98,48 @@  static inline void advance_consumer(struct rxe_queue *q)
 		q->index_mask);
 }
 
+/* Must hold producer_index lock */
+static inline uint32_t load_producer_index(struct rxe_queue *q)
+{
+	return atomic_load_explicit(&q->producer_index,
+				    memory_order_relaxed);
+}
+
+/* Must hold producer_index lock */
+static inline void store_producer_index(struct rxe_queue *q, uint32_t index)
+{
+	/* flush writes to work queue before moving index */
+	atomic_thread_fence(memory_order_release);
+	atomic_store(&q->producer_index, index);
+}
+
+/* Must hold consumer_index lock */
+static inline uint32_t load_consumer_index(struct rxe_queue *q)
+{
+	return atomic_load_explicit(&q->consumer_index,
+				    memory_order_relaxed);
+}
+
+/* Must hold consumer_index lock */
+static inline void store_consumer_index(struct rxe_queue *q, uint32_t index)
+{
+	/* flush writes to work queue before moving index */
+	atomic_thread_fence(memory_order_release);
+	atomic_store(&q->consumer_index, index);
+}
+
+/* Must hold producer_index lock */
 static inline void *producer_addr(struct rxe_queue *q)
 {
-	/* Must hold producer_index lock */
 	return q->data + ((atomic_load_explicit(&q->producer_index,
 						memory_order_relaxed) &
 			   q->index_mask)
 			  << q->log2_elem_size);
 }
 
+/* Must hold consumer_index lock */
 static inline void *consumer_addr(struct rxe_queue *q)
 {
-	/* Must hold consumer_index lock */
 	return q->data + ((atomic_load_explicit(&q->consumer_index,
 						memory_order_relaxed) &
 			   q->index_mask)
@@ -125,4 +157,19 @@  static inline unsigned int index_from_addr(const struct rxe_queue *q, const void
 	return (((uint8_t *)addr - q->data) >> q->log2_elem_size) & q->index_mask;
 }
 
+static inline void advance_cq_cur_index(struct rxe_cq *cq)
+{
+	struct rxe_queue *q = cq->queue;
+
+	cq->cur_index = (cq->cur_index + 1) & q->index_mask;
+}
+
+static inline int check_cq_queue_empty(struct rxe_cq *cq)
+{
+	struct rxe_queue *q = cq->queue;
+	uint32_t producer_index = atomic_load(&q->producer_index);
+
+	return (cq->cur_index == producer_index);
+}
+
 #endif /* H_RXE_PCQ */