[rdma-core,3/6] mlx5: Introduce a wait queue for SRQ WQEs

Message ID	1550674658-13295-4-git-send-email-yishaih@mellanox.com (mailing list archive)
State	Not Applicable
Headers	show Return-Path: <linux-rdma-owner@kernel.org> From: Yishai Hadas <yishaih@mellanox.com> To: linux-rdma@vger.kernel.org Cc: yishaih@mellanox.com, monis@mellanox.com, artemyko@mellanox.com, jgg@mellanox.com, majd@mellanox.com Subject: [PATCH rdma-core 3/6] mlx5: Introduce a wait queue for SRQ WQEs Date: Wed, 20 Feb 2019 16:57:35 +0200 Message-Id: <1550674658-13295-4-git-send-email-yishaih@mellanox.com> In-Reply-To: <1550674658-13295-1-git-send-email-yishaih@mellanox.com> References: <1550674658-13295-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk
Series	verbs: Enhanced ODP functionality \| expand [rdma-core,0/6] verbs: Enhanced ODP functionality [rdma-core,1/6] verbs: Add xrc_odp_caps field to response of query_device [rdma-core,2/6] verbs: Add SRQ as ODP capability support [rdma-core,3/6] mlx5: Introduce a wait queue for SRQ WQEs [rdma-core,4/6] mlx5: Handle ODP fault completion in SRQ [rdma-core,5/6] verbs: Add option to register ODP MR in ibv_srq_pingpong [rdma-core,6/6] verbs: Add option to register ODP MR in ibv_xsrq_pingpong

diff --git a/providers/mlx5/mlx5.h b/providers/mlx5/mlx5.h index 75d599a..f315f63 100644 --- a/providers/mlx5/mlx5.h +++ b/providers/mlx5/mlx5.h @@ -415,6 +415,8 @@ struct mlx5_srq { int wqe_shift; int head; int tail; + int waitq_head; + int waitq_tail; __be32 *db; uint16_t counter; int wq_sig; @@ -807,7 +809,8 @@ int mlx5_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr, int mlx5_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr); int mlx5_destroy_srq(struct ibv_srq *srq); -int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq); +int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq, + uint32_t nwr); void mlx5_free_srq_wqe(struct mlx5_srq *srq, int ind); int mlx5_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, @@ -1017,4 +1020,14 @@ static inline uint8_t calc_sig(void *wqe, int size) return ~res; } +static inline int align_queue_size(long long req) +{ + return mlx5_round_up_power_of_two(req); +} + +static inline bool srq_has_waitq(struct mlx5_srq *srq) +{ + return srq->waitq_head >= 0; +} + #endif /* MLX5_H */ diff --git a/providers/mlx5/srq.c b/providers/mlx5/srq.c index 94528bb..a2d37d0 100644 --- a/providers/mlx5/srq.c +++ b/providers/mlx5/srq.c @@ -145,13 +145,29 @@ int mlx5_post_srq_recv(struct ibv_srq *ibsrq, return err; } -int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq) +/* Build a linked list on an array of SRQ WQEs. + * Since WQEs are always added to the tail and taken from the head + * it doesn't matter where the last WQE points to. + */ +static void set_srq_buf_ll(struct mlx5_srq *srq, int start, int end) { struct mlx5_wqe_srq_next_seg *next; + int i; + + for (i = start; i < end; ++i) { + next = get_wqe(srq, i); + next->next_wqe_index = htobe16(i + 1); + } +} + +int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq, + uint32_t max_wr) +{ int size; int buf_size; - int i; struct mlx5_context *ctx; + uint32_t orig_max_wr = max_wr; + bool have_wq = true; ctx = to_mctx(context); @@ -160,9 +176,18 @@ int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq) return -1; } - srq->wrid = malloc(srq->max * sizeof *srq->wrid); - if (!srq->wrid) - return -1; + /* At first, try to allocate more WQEs than requested so the extra will + * be used for the wait queue. + */ + max_wr = orig_max_wr * 2 + 1; + + if (max_wr > ctx->max_srq_recv_wr) { + /* Device limits are smaller than required + * to provide a wait queue, continue without. + */ + max_wr = orig_max_wr + 1; + have_wq = false; + } size = sizeof(struct mlx5_wqe_srq_next_seg) + srq->max_gs * sizeof(struct mlx5_wqe_data_seg); @@ -179,14 +204,28 @@ int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq) srq->wqe_shift = mlx5_ilog2(size); + srq->max = align_queue_size(max_wr); buf_size = srq->max * size; if (mlx5_alloc_buf(&srq->buf, buf_size, - to_mdev(context->device)->page_size)) { - free(srq->wrid); + to_mdev(context->device)->page_size)) return -1; + + srq->head = 0; + srq->tail = align_queue_size(orig_max_wr + 1) - 1; + if (have_wq) { + srq->waitq_head = srq->tail + 1; + srq->waitq_tail = srq->max - 1; + } else { + srq->waitq_head = -1; + srq->waitq_tail = -1; } + srq->wrid = malloc(srq->max * sizeof(*srq->wrid)); + if (!srq->wrid) { + mlx5_free_buf(&srq->buf); + return -1; + } memset(srq->buf.buf, 0, buf_size); /* @@ -194,13 +233,9 @@ int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq) * linked into the list of free WQEs. */ - for (i = 0; i < srq->max; ++i) { - next = get_wqe(srq, i); - next->next_wqe_index = htobe16((i + 1) & (srq->max - 1)); - } - - srq->head = 0; - srq->tail = srq->max - 1; + set_srq_buf_ll(srq, srq->head, srq->tail); + if (have_wq) + set_srq_buf_ll(srq, srq->waitq_head, srq->waitq_tail); return 0; } diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c index 7e1c125..2bccdf8 100644 --- a/providers/mlx5/verbs.c +++ b/providers/mlx5/verbs.c @@ -553,11 +553,6 @@ int mlx5_round_up_power_of_two(long long sz) return (int)ret; } -static int align_queue_size(long long req) -{ - return mlx5_round_up_power_of_two(req); -} - static int get_cqe_size(struct mlx5dv_cq_init_attr *mlx5cq_attr) { char *env; @@ -1016,11 +1011,10 @@ struct ibv_srq *mlx5_create_srq(struct ibv_pd *pd, goto err; } - srq->max = align_queue_size(attr->attr.max_wr + 1); srq->max_gs = attr->attr.max_sge; srq->counter = 0; - if (mlx5_alloc_srq_buf(pd->context, srq)) { + if (mlx5_alloc_srq_buf(pd->context, srq, attr->attr.max_wr)) { fprintf(stderr, "%s-%d:\n", __func__, __LINE__); goto err; } @@ -1041,11 +1035,22 @@ struct ibv_srq *mlx5_create_srq(struct ibv_pd *pd, attr->attr.max_sge = srq->max_gs; pthread_mutex_lock(&ctx->srq_table_mutex); + + /* Override max_wr to let kernel know about extra WQEs for the + * wait queue. + */ + attr->attr.max_wr = srq->max - 1; + ret = ibv_cmd_create_srq(pd, ibsrq, attr, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp)); if (ret) goto err_db; + /* Override kernel response that includes the wait queue with the real + * number of WQEs that are applicable for the application. + */ + attr->attr.max_wr = srq->tail; + ret = mlx5_store_srq(ctx, resp.srqn, srq); if (ret) goto err_destroy; @@ -2707,11 +2712,10 @@ struct ibv_srq *mlx5_create_srq_ex(struct ibv_context *context, goto err; } - msrq->max = align_queue_size(attr->attr.max_wr + 1); msrq->max_gs = attr->attr.max_sge; msrq->counter = 0; - if (mlx5_alloc_srq_buf(context, msrq)) { + if (mlx5_alloc_srq_buf(context, msrq, attr->attr.max_wr)) { fprintf(stderr, "%s-%d:\n", __func__, __LINE__); goto err; } @@ -2743,9 +2747,20 @@ struct ibv_srq *mlx5_create_srq_ex(struct ibv_context *context, pthread_mutex_lock(&ctx->srq_table_mutex); } + /* Override max_wr to let kernel know about extra WQEs for the + * wait queue. + */ + attr->attr.max_wr = msrq->max - 1; + err = ibv_cmd_create_srq_ex(context, &msrq->vsrq, sizeof(msrq->vsrq), attr, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp)); + + /* Override kernel response that includes the wait queue with the real + * number of WQEs that are applicable for the application. + */ + attr->attr.max_wr = msrq->tail; + if (err) goto err_free_uidx;

[rdma-core,3/6] mlx5: Introduce a wait queue for SRQ WQEs

Commit Message

Patch