@@ -158,6 +158,32 @@ struct rxe_recv_wqe {
struct rxe_dma_info dma;
};
+struct rxe_uverbs_wc {
+ /* keep these the same as ib_uverbs_wc */
+ __aligned_u64 wr_id;
+ __u32 status;
+ __u32 opcode;
+ __u32 vendor_err;
+ __u32 byte_len;
+ union {
+ __be32 imm_data;
+ __u32 invalidate_rkey;
+ } ex;
+ __u32 qp_num;
+ __u32 src_qp;
+ __u32 wc_flags;
+ __u16 pkey_index;
+ __u16 slid;
+ __u8 sl;
+ __u8 dlid_path_bits;
+ __u8 port_num;
+ __u8 reserved;
+
+ /* any extras go here */
+ __aligned_u64 timestamp;
+ __aligned_u64 realtime;
+};
+
enum rxe_capabilities {
RXE_CAP_NONE = 0,
RXE_CAP_CMD_EX = 1ULL << 0,
@@ -171,6 +197,10 @@ struct rxe_alloc_context_resp {
__aligned_u64 driver_cap;
};
+struct rxe_create_cq_cmd {
+ __aligned_u64 is_ex;
+};
+
struct rxe_create_cq_resp {
struct mminfo mi;
};
@@ -42,7 +42,9 @@
DECLARE_DRV_CMD(urxe_alloc_context, IB_USER_VERBS_CMD_GET_CONTEXT,
rxe_alloc_context_cmd, rxe_alloc_context_resp);
DECLARE_DRV_CMD(urxe_create_cq, IB_USER_VERBS_CMD_CREATE_CQ,
- empty, rxe_create_cq_resp);
+ rxe_create_cq_cmd, rxe_create_cq_resp);
+DECLARE_DRV_CMD(urxe_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
+ rxe_create_cq_cmd, rxe_create_cq_resp);
DECLARE_DRV_CMD(urxe_create_qp, IB_USER_VERBS_CMD_CREATE_QP,
empty, rxe_create_qp_resp);
DECLARE_DRV_CMD(urxe_create_srq, IB_USER_VERBS_CMD_CREATE_SRQ,
@@ -187,20 +186,163 @@ static int rxe_dereg_mr(struct verbs_mr *vmr)
return 0;
}
+static int cq_start_poll(struct ibv_cq_ex *current,
+ struct ibv_poll_cq_attr *attr)
+{
+ struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+ pthread_spin_lock(&cq->lock);
+
+ atomic_thread_fence(memory_order_acquire);
+ cq->cur_index = load_consumer_index(cq->queue);
+
+ if (check_cq_queue_empty(cq)) {
+ pthread_spin_unlock(&cq->lock);
+ errno = ENOENT;
+ return errno;
+ }
+
+ cq->wc = addr_from_index(cq->queue, cq->cur_index);
+ cq->vcq.cq_ex.status = cq->wc->status;
+ cq->vcq.cq_ex.wr_id = cq->wc->wr_id;
+
+ return 0;
+}
+
+static int cq_next_poll(struct ibv_cq_ex *current)
+{
+ struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+ advance_cq_cur_index(cq);
+
+ if (check_cq_queue_empty(cq)) {
+ store_consumer_index(cq->queue, cq->cur_index);
+ pthread_spin_unlock(&cq->lock);
+ errno = ENOENT;
+ return errno;
+ }
+
+ cq->wc = addr_from_index(cq->queue, cq->cur_index);
+ cq->vcq.cq_ex.status = cq->wc->status;
+ cq->vcq.cq_ex.wr_id = cq->wc->wr_id;
+
+ return 0;
+}
+
+static void cq_end_poll(struct ibv_cq_ex *current)
+{
+ struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+ advance_cq_cur_index(cq);
+ store_consumer_index(cq->queue, cq->cur_index);
+ pthread_spin_unlock(&cq->lock);
+
+ return;
+}
+
+static enum ibv_wc_opcode cq_read_opcode(struct ibv_cq_ex *current)
+{
+ struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+ return cq->wc->opcode;
+}
+
+static uint32_t cq_read_vendor_err(struct ibv_cq_ex *current)
+{
+ struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+ return cq->wc->vendor_err;
+}
+
+static uint32_t cq_read_byte_len(struct ibv_cq_ex *current)
+{
+ struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+ return cq->wc->byte_len;
+}
+
+static __be32 cq_read_imm_data(struct ibv_cq_ex *current)
+{
+ struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+ return cq->wc->ex.imm_data;
+}
+
+static uint32_t cq_read_qp_num(struct ibv_cq_ex *current)
+{
+ struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+ return cq->wc->qp_num;
+}
+
+static uint32_t cq_read_src_qp(struct ibv_cq_ex *current)
+{
+ struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+ return cq->wc->src_qp;
+}
+
+static unsigned int cq_read_wc_flags(struct ibv_cq_ex *current)
+{
+ struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+ return cq->wc->wc_flags;
+}
+
+static uint32_t cq_read_slid(struct ibv_cq_ex *current)
+{
+ struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+ return cq->wc->slid;
+}
+
+static uint8_t cq_read_sl(struct ibv_cq_ex *current)
+{
+ struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+ return cq->wc->sl;
+}
+
+static uint8_t cq_read_dlid_path_bits(struct ibv_cq_ex *current)
+{
+ struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+ return cq->wc->dlid_path_bits;
+}
+
+static uint64_t cq_read_completion_ts(struct ibv_cq_ex *current)
+{
+ struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+ return cq->wc->timestamp;
+}
+
+static uint64_t cq_read_completion_wallclock_ns(struct ibv_cq_ex *current)
+{
+ struct rxe_cq *cq = container_of(current, struct rxe_cq, vcq.cq_ex);
+
+ return cq->wc->realtime;
+}
+
+static int rxe_destroy_cq(struct ibv_cq *ibcq);
+
static struct ibv_cq *rxe_create_cq(struct ibv_context *context, int cqe,
struct ibv_comp_channel *channel,
int comp_vector)
{
struct rxe_cq *cq;
- struct urxe_create_cq_resp resp;
+ struct urxe_create_cq cmd = {};
+ struct urxe_create_cq_resp resp = {};
int ret;
cq = malloc(sizeof(*cq));
if (!cq)
return NULL;
+ cmd.is_ex = 0;
+
ret = ibv_cmd_create_cq(context, cqe, channel, comp_vector,
- &cq->ibv_cq, NULL, 0,
+ &cq->vcq.cq, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp));
if (ret) {
free(cq);
@@ -210,15 +352,129 @@ static struct ibv_cq *rxe_create_cq(struct ibv_context *context, int cqe,
cq->queue = mmap(NULL, resp.mi.size, PROT_READ | PROT_WRITE, MAP_SHARED,
context->cmd_fd, resp.mi.offset);
if ((void *)cq->queue == MAP_FAILED) {
- ibv_cmd_destroy_cq(&cq->ibv_cq);
+ ibv_cmd_destroy_cq(&cq->vcq.cq);
+ free(cq);
+ return NULL;
+ }
+
+ cq->wc_size = 1ULL << cq->queue->log2_elem_size;
+
+ if (cq->wc_size < sizeof(struct ib_uverbs_wc)) {
+ fprintf(stderr, "cq wc size too small %ld need %ld\n",
+ cq->wc_size, sizeof(struct ib_uverbs_wc));
+ rxe_destroy_cq(&cq->vcq.cq);
+ return NULL;
+ }
+
+ cq->mmap_info = resp.mi;
+ pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
+
+ return &cq->vcq.cq;
+}
+
+enum rxe_sup_wc_flags {
+ RXE_SUP_WC_FLAGS = IBV_WC_EX_WITH_BYTE_LEN
+ | IBV_WC_EX_WITH_IMM
+ | IBV_WC_EX_WITH_QP_NUM
+ | IBV_WC_EX_WITH_SRC_QP
+ | IBV_WC_EX_WITH_SLID
+ | IBV_WC_EX_WITH_SL
+ | IBV_WC_EX_WITH_DLID_PATH_BITS
+ | IBV_WC_EX_WITH_COMPLETION_TIMESTAMP
+ | IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK,
+};
+
+static struct ibv_cq_ex *rxe_create_cq_ex(struct ibv_context *context,
+ struct ibv_cq_init_attr_ex *attr)
+{
+ int ret;
+ struct rxe_cq *cq;
+ struct urxe_create_cq_ex cmd = {};
+ struct urxe_create_cq_ex_resp resp = {};
+
+ if (attr->wc_flags & ~RXE_SUP_WC_FLAGS) {
+ errno = EOPNOTSUPP;
+ return NULL;
+ }
+
+ cq = calloc(1, sizeof(*cq));
+ if (!cq)
+ return NULL;
+
+ cmd.is_ex = 1;
+
+ ret = ibv_cmd_create_cq_ex(context, attr, &cq->vcq,
+ &cmd.ibv_cmd, sizeof(cmd),
+ &resp.ibv_resp, sizeof(resp));
+ if (ret) {
+ free(cq);
+ return NULL;
+ }
+
+ cq->queue = mmap(NULL, resp.mi.size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ context->cmd_fd, resp.mi.offset);
+ if ((void *)cq->queue == MAP_FAILED) {
+ ibv_cmd_destroy_cq(&cq->vcq.cq);
free(cq);
return NULL;
}
+ cq->wc_size = 1ULL << cq->queue->log2_elem_size;
+
+ if (cq->wc_size < sizeof(struct rxe_uverbs_wc)) {
+ fprintf(stderr, "cq wc size too small %ld need %ld\n",
+ cq->wc_size, sizeof(struct rxe_uverbs_wc));
+ rxe_destroy_cq(&cq->vcq.cq);
+ return NULL;
+ }
+
cq->mmap_info = resp.mi;
pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
- return &cq->ibv_cq;
+ cq->vcq.cq_ex.start_poll = cq_start_poll;
+ cq->vcq.cq_ex.next_poll = cq_next_poll;
+ cq->vcq.cq_ex.end_poll = cq_end_poll;
+ cq->vcq.cq_ex.read_opcode = cq_read_opcode;
+ cq->vcq.cq_ex.read_vendor_err = cq_read_vendor_err;
+ cq->vcq.cq_ex.read_wc_flags = cq_read_wc_flags;
+
+ if (attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN)
+ cq->vcq.cq_ex.read_byte_len
+ = cq_read_byte_len;
+
+ if (attr->wc_flags & IBV_WC_EX_WITH_IMM)
+ cq->vcq.cq_ex.read_imm_data
+ = cq_read_imm_data;
+
+ if (attr->wc_flags & IBV_WC_EX_WITH_QP_NUM)
+ cq->vcq.cq_ex.read_qp_num
+ = cq_read_qp_num;
+
+ if (attr->wc_flags & IBV_WC_EX_WITH_SRC_QP)
+ cq->vcq.cq_ex.read_src_qp
+ = cq_read_src_qp;
+
+ if (attr->wc_flags & IBV_WC_EX_WITH_SLID)
+ cq->vcq.cq_ex.read_slid
+ = cq_read_slid;
+
+ if (attr->wc_flags & IBV_WC_EX_WITH_SL)
+ cq->vcq.cq_ex.read_sl
+ = cq_read_sl;
+
+ if (attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS)
+ cq->vcq.cq_ex.read_dlid_path_bits
+ = cq_read_dlid_path_bits;
+
+ if (attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP)
+ cq->vcq.cq_ex.read_completion_ts
+ = cq_read_completion_ts;
+
+ if (attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK)
+ cq->vcq.cq_ex.read_completion_wallclock_ns
+ = cq_read_completion_wallclock_ns;
+
+ return &cq->vcq.cq_ex;
}
static int rxe_resize_cq(struct ibv_cq *ibcq, int cqe)
@@ -890,6 +1146,7 @@ static const struct verbs_context_ops rxe_ctx_ops = {
static const struct verbs_context_ops rxe_ctx_ops_cmd_ex = {
.query_device_ex = rxe_query_device_ex,
+ .create_cq_ex = rxe_create_cq_ex,
};
static struct verbs_context *rxe_alloc_context(struct ibv_device *ibdev,
@@ -62,11 +62,17 @@ struct rxe_context {
uint64_t capabilities;
};
+/* common between cq and cq_ex */
struct rxe_cq {
- struct ibv_cq ibv_cq;
+ struct verbs_cq vcq;
struct mminfo mmap_info;
- struct rxe_queue *queue;
+ struct rxe_queue *queue;
pthread_spinlock_t lock;
+
+ /* new API support */
+ struct rxe_uverbs_wc *wc;
+ size_t wc_size;
+ uint32_t cur_index;
};
struct rxe_ah {
@@ -113,7 +119,7 @@ static inline struct rxe_device *to_rdev(struct ibv_device *ibdev)
static inline struct rxe_cq *to_rcq(struct ibv_cq *ibcq)
{
- return to_rxxx(cq, cq);
+ return container_of(ibcq, struct rxe_cq, vcq.cq);
}
static inline struct rxe_qp *to_rqp(struct ibv_qp *ibqp)
@@ -40,6 +40,8 @@
#include <stdint.h>
#include <stdatomic.h>
+#include "rxe.h"
+
/* MUST MATCH kernel struct rxe_pqc in rxe_queue.h */
struct rxe_queue {
uint32_t log2_elem_size;
@@ -57,27 +59,27 @@ static inline int next_index(struct rxe_queue *q, int index)
return (index + 1) & q->index_mask;
}
+/* Must hold consumer_index lock */
static inline int queue_empty(struct rxe_queue *q)
{
- /* Must hold consumer_index lock */
return ((atomic_load(&q->producer_index) -
atomic_load_explicit(&q->consumer_index,
memory_order_relaxed)) &
q->index_mask) == 0;
}
+/* Must hold producer_index lock */
static inline int queue_full(struct rxe_queue *q)
{
- /* Must hold producer_index lock */
return ((atomic_load_explicit(&q->producer_index,
memory_order_relaxed) +
1 - atomic_load(&q->consumer_index)) &
q->index_mask) == 0;
}
+/* Must hold producer_index lock */
static inline void advance_producer(struct rxe_queue *q)
{
- /* Must hold producer_index lock */
atomic_thread_fence(memory_order_release);
atomic_store(
&q->producer_index,
@@ -86,9 +88,9 @@ static inline void advance_producer(struct rxe_queue *q)
q->index_mask);
}
+/* Must hold consumer_index lock */
static inline void advance_consumer(struct rxe_queue *q)
{
- /* Must hold consumer_index lock */
atomic_store(
&q->consumer_index,
(atomic_load_explicit(&q->consumer_index, memory_order_relaxed) +
@@ -96,18 +98,48 @@ static inline void advance_consumer(struct rxe_queue *q)
q->index_mask);
}
+/* Must hold producer_index lock */
+static inline uint32_t load_producer_index(struct rxe_queue *q)
+{
+ return atomic_load_explicit(&q->producer_index,
+ memory_order_relaxed);
+}
+
+/* Must hold producer_index lock */
+static inline void store_producer_index(struct rxe_queue *q, uint32_t index)
+{
+ /* flush writes to work queue before moving index */
+ atomic_thread_fence(memory_order_release);
+ atomic_store(&q->producer_index, index);
+}
+
+/* Must hold consumer_index lock */
+static inline uint32_t load_consumer_index(struct rxe_queue *q)
+{
+ return atomic_load_explicit(&q->consumer_index,
+ memory_order_relaxed);
+}
+
+/* Must hold consumer_index lock */
+static inline void store_consumer_index(struct rxe_queue *q, uint32_t index)
+{
+ /* flush writes to work queue before moving index */
+ atomic_thread_fence(memory_order_release);
+ atomic_store(&q->consumer_index, index);
+}
+
+/* Must hold producer_index lock */
static inline void *producer_addr(struct rxe_queue *q)
{
- /* Must hold producer_index lock */
return q->data + ((atomic_load_explicit(&q->producer_index,
memory_order_relaxed) &
q->index_mask)
<< q->log2_elem_size);
}
+/* Must hold consumer_index lock */
static inline void *consumer_addr(struct rxe_queue *q)
{
- /* Must hold consumer_index lock */
return q->data + ((atomic_load_explicit(&q->consumer_index,
memory_order_relaxed) &
q->index_mask)
@@ -125,4 +157,19 @@ static inline unsigned int index_from_addr(const struct rxe_queue *q, const void
return (((uint8_t *)addr - q->data) >> q->log2_elem_size) & q->index_mask;
}
+static inline void advance_cq_cur_index(struct rxe_cq *cq)
+{
+ struct rxe_queue *q = cq->queue;
+
+ cq->cur_index = (cq->cur_index + 1) & q->index_mask;
+}
+
+static inline int check_cq_queue_empty(struct rxe_cq *cq)
+{
+ struct rxe_queue *q = cq->queue;
+ uint32_t producer_index = atomic_load(&q->producer_index);
+
+ return (cq->cur_index == producer_index);
+}
+
#endif /* H_RXE_PCQ */
Together with the matching commit for the rxe driver implement the ibv_create_cq_ex verb. Also implement the operations in ibv_cq_ex struct. Signed-off-by: Bob Pearson <rpearson@hpe.com> --- kernel-headers/rdma/rdma_user_rxe.h | 30 ++++ providers/rxe/rxe-abi.h | 4 +- providers/rxe/rxe.c | 267 +++++++++++++++++++++++++++- providers/rxe/rxe.h | 12 +- providers/rxe/rxe_queue.h | 59 +++++- 5 files changed, 357 insertions(+), 15 deletions(-)