@@ -415,6 +415,8 @@ struct mlx5_srq {
int wqe_shift;
int head;
int tail;
+ int waitq_head;
+ int waitq_tail;
__be32 *db;
uint16_t counter;
int wq_sig;
@@ -807,7 +809,8 @@ int mlx5_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr,
int mlx5_query_srq(struct ibv_srq *srq,
struct ibv_srq_attr *attr);
int mlx5_destroy_srq(struct ibv_srq *srq);
-int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq);
+int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq,
+ uint32_t nwr);
void mlx5_free_srq_wqe(struct mlx5_srq *srq, int ind);
int mlx5_post_srq_recv(struct ibv_srq *ibsrq,
struct ibv_recv_wr *wr,
@@ -1017,4 +1020,14 @@ static inline uint8_t calc_sig(void *wqe, int size)
return ~res;
}
+static inline int align_queue_size(long long req)
+{
+ return mlx5_round_up_power_of_two(req);
+}
+
+static inline bool srq_has_waitq(struct mlx5_srq *srq)
+{
+ return srq->waitq_head >= 0;
+}
+
#endif /* MLX5_H */
@@ -145,13 +145,29 @@ int mlx5_post_srq_recv(struct ibv_srq *ibsrq,
return err;
}
-int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq)
+/* Build a linked list on an array of SRQ WQEs.
+ * Since WQEs are always added to the tail and taken from the head
+ * it doesn't matter where the last WQE points to.
+ */
+static void set_srq_buf_ll(struct mlx5_srq *srq, int start, int end)
{
struct mlx5_wqe_srq_next_seg *next;
+ int i;
+
+ for (i = start; i < end; ++i) {
+ next = get_wqe(srq, i);
+ next->next_wqe_index = htobe16(i + 1);
+ }
+}
+
+int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq,
+ uint32_t max_wr)
+{
int size;
int buf_size;
- int i;
struct mlx5_context *ctx;
+ uint32_t orig_max_wr = max_wr;
+ bool have_wq = true;
ctx = to_mctx(context);
@@ -160,9 +176,18 @@ int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq)
return -1;
}
- srq->wrid = malloc(srq->max * sizeof *srq->wrid);
- if (!srq->wrid)
- return -1;
+ /* At first, try to allocate more WQEs than requested so the extra will
+ * be used for the wait queue.
+ */
+ max_wr = orig_max_wr * 2 + 1;
+
+ if (max_wr > ctx->max_srq_recv_wr) {
+ /* Device limits are smaller than required
+ * to provide a wait queue, continue without.
+ */
+ max_wr = orig_max_wr + 1;
+ have_wq = false;
+ }
size = sizeof(struct mlx5_wqe_srq_next_seg) +
srq->max_gs * sizeof(struct mlx5_wqe_data_seg);
@@ -179,14 +204,28 @@ int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq)
srq->wqe_shift = mlx5_ilog2(size);
+ srq->max = align_queue_size(max_wr);
buf_size = srq->max * size;
if (mlx5_alloc_buf(&srq->buf, buf_size,
- to_mdev(context->device)->page_size)) {
- free(srq->wrid);
+ to_mdev(context->device)->page_size))
return -1;
+
+ srq->head = 0;
+ srq->tail = align_queue_size(orig_max_wr + 1) - 1;
+ if (have_wq) {
+ srq->waitq_head = srq->tail + 1;
+ srq->waitq_tail = srq->max - 1;
+ } else {
+ srq->waitq_head = -1;
+ srq->waitq_tail = -1;
}
+ srq->wrid = malloc(srq->max * sizeof(*srq->wrid));
+ if (!srq->wrid) {
+ mlx5_free_buf(&srq->buf);
+ return -1;
+ }
memset(srq->buf.buf, 0, buf_size);
/*
@@ -194,13 +233,9 @@ int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq)
* linked into the list of free WQEs.
*/
- for (i = 0; i < srq->max; ++i) {
- next = get_wqe(srq, i);
- next->next_wqe_index = htobe16((i + 1) & (srq->max - 1));
- }
-
- srq->head = 0;
- srq->tail = srq->max - 1;
+ set_srq_buf_ll(srq, srq->head, srq->tail);
+ if (have_wq)
+ set_srq_buf_ll(srq, srq->waitq_head, srq->waitq_tail);
return 0;
}
@@ -553,11 +553,6 @@ int mlx5_round_up_power_of_two(long long sz)
return (int)ret;
}
-static int align_queue_size(long long req)
-{
- return mlx5_round_up_power_of_two(req);
-}
-
static int get_cqe_size(struct mlx5dv_cq_init_attr *mlx5cq_attr)
{
char *env;
@@ -1016,11 +1011,10 @@ struct ibv_srq *mlx5_create_srq(struct ibv_pd *pd,
goto err;
}
- srq->max = align_queue_size(attr->attr.max_wr + 1);
srq->max_gs = attr->attr.max_sge;
srq->counter = 0;
- if (mlx5_alloc_srq_buf(pd->context, srq)) {
+ if (mlx5_alloc_srq_buf(pd->context, srq, attr->attr.max_wr)) {
fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
goto err;
}
@@ -1041,11 +1035,22 @@ struct ibv_srq *mlx5_create_srq(struct ibv_pd *pd,
attr->attr.max_sge = srq->max_gs;
pthread_mutex_lock(&ctx->srq_table_mutex);
+
+ /* Override max_wr to let kernel know about extra WQEs for the
+ * wait queue.
+ */
+ attr->attr.max_wr = srq->max - 1;
+
ret = ibv_cmd_create_srq(pd, ibsrq, attr, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp));
if (ret)
goto err_db;
+ /* Override kernel response that includes the wait queue with the real
+ * number of WQEs that are applicable for the application.
+ */
+ attr->attr.max_wr = srq->tail;
+
ret = mlx5_store_srq(ctx, resp.srqn, srq);
if (ret)
goto err_destroy;
@@ -2707,11 +2712,10 @@ struct ibv_srq *mlx5_create_srq_ex(struct ibv_context *context,
goto err;
}
- msrq->max = align_queue_size(attr->attr.max_wr + 1);
msrq->max_gs = attr->attr.max_sge;
msrq->counter = 0;
- if (mlx5_alloc_srq_buf(context, msrq)) {
+ if (mlx5_alloc_srq_buf(context, msrq, attr->attr.max_wr)) {
fprintf(stderr, "%s-%d:\n", __func__, __LINE__);
goto err;
}
@@ -2743,9 +2747,20 @@ struct ibv_srq *mlx5_create_srq_ex(struct ibv_context *context,
pthread_mutex_lock(&ctx->srq_table_mutex);
}
+ /* Override max_wr to let kernel know about extra WQEs for the
+ * wait queue.
+ */
+ attr->attr.max_wr = msrq->max - 1;
+
err = ibv_cmd_create_srq_ex(context, &msrq->vsrq, sizeof(msrq->vsrq),
attr, &cmd.ibv_cmd, sizeof(cmd),
&resp.ibv_resp, sizeof(resp));
+
+ /* Override kernel response that includes the wait queue with the real
+ * number of WQEs that are applicable for the application.
+ */
+ attr->attr.max_wr = msrq->tail;
+
if (err)
goto err_free_uidx;