From patchwork Sun Feb  7 03:12:50 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Weihang Li <liweihang@huawei.com>
X-Patchwork-Id: 12072455
Return-Path: <linux-rdma-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level: 
X-Spam-Status: No, score=-16.7 required=3.0 tests=BAYES_00,
	HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH,
	MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,UPPERCASE_50_75,USER_AGENT_GIT
	autolearn=ham autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
	by smtp.lore.kernel.org (Postfix) with ESMTP id ADFC9C433E6
	for <linux-rdma@archiver.kernel.org>; Sun,  7 Feb 2021 03:16:26 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id 7AFCB64DDB
	for <linux-rdma@archiver.kernel.org>; Sun,  7 Feb 2021 03:16:26 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S229690AbhBGDQL (ORCPT <rfc822;linux-rdma@archiver.kernel.org>);
        Sat, 6 Feb 2021 22:16:11 -0500
Received: from szxga07-in.huawei.com ([45.249.212.35]:12859 "EHLO
        szxga07-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S229788AbhBGDP6 (ORCPT
        <rfc822;linux-rdma@vger.kernel.org>); Sat, 6 Feb 2021 22:15:58 -0500
Received: from DGGEMS402-HUB.china.huawei.com (unknown [172.30.72.60])
        by szxga07-in.huawei.com (SkyGuard) with ESMTP id 4DYDl44jDFz7hLS;
        Sun,  7 Feb 2021 11:13:52 +0800 (CST)
Received: from localhost.localdomain (10.67.165.24) by
 DGGEMS402-HUB.china.huawei.com (10.3.19.202) with Microsoft SMTP Server id
 14.3.498.0; Sun, 7 Feb 2021 11:15:08 +0800
From: Weihang Li <liweihang@huawei.com>
To: <jgg@nvidia.com>, <leon@kernel.org>
CC: <dledford@redhat.com>, <linux-rdma@vger.kernel.org>,
        <linuxarm@openeuler.org>
Subject: [PATCH RFC rdma-core 1/5] Update kernel headers
Date: Sun, 7 Feb 2021 11:12:50 +0800
Message-ID: <1612667574-56673-2-git-send-email-liweihang@huawei.com>
X-Mailer: git-send-email 2.8.1
In-Reply-To: <1612667574-56673-1-git-send-email-liweihang@huawei.com>
References: <1612667574-56673-1-git-send-email-liweihang@huawei.com>
MIME-Version: 1.0
X-Originating-IP: [10.67.165.24]
X-CFilter-Loop: Reflected
Precedence: bulk
List-ID: <linux-rdma.vger.kernel.org>
X-Mailing-List: linux-rdma@vger.kernel.org

To commit ?? ("RDMA/hns: Add method to query WQE buffer's address").

Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 kernel-headers/rdma/hns-abi.h | 64 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h
index 90b739d..b832f26 100644
--- a/kernel-headers/rdma/hns-abi.h
+++ b/kernel-headers/rdma/hns-abi.h
@@ -77,19 +77,83 @@ enum hns_roce_qp_cap_flags {
 	HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0,
 	HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1,
 	HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2,
+	HNS_ROCE_QP_CAP_DCA = 1 << 4,
 };
 
 struct hns_roce_ib_create_qp_resp {
 	__aligned_u64 cap_flags;
 };
 
+enum {
+	HNS_ROCE_CAP_FLAG_DCA_MODE = 1 << 15,
+};
+
 struct hns_roce_ib_alloc_ucontext_resp {
 	__u32	qp_tab_size;
 	__u32	cqe_size;
+	__aligned_u64 cap_flags;
 };
 
 struct hns_roce_ib_alloc_pd_resp {
 	__u32 pdn;
 };
 
+#define UVERBS_ID_NS_MASK 0xF000
+#define UVERBS_ID_NS_SHIFT 12
+
+enum hns_ib_objects {
+	HNS_IB_OBJECT_DCA_MEM = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum hns_ib_dca_mem_methods {
+	HNS_IB_METHOD_DCA_MEM_REG = (1U << UVERBS_ID_NS_SHIFT),
+	HNS_IB_METHOD_DCA_MEM_DEREG,
+	HNS_IB_METHOD_DCA_MEM_SHRINK,
+	HNS_IB_METHOD_DCA_MEM_ATTACH,
+	HNS_IB_METHOD_DCA_MEM_DETACH,
+	HNS_IB_METHOD_DCA_MEM_QUERY,
+};
+
+enum hns_ib_dca_mem_reg_attrs {
+	HNS_IB_ATTR_DCA_MEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	HNS_IB_ATTR_DCA_MEM_REG_LEN,
+	HNS_IB_ATTR_DCA_MEM_REG_ADDR,
+	HNS_IB_ATTR_DCA_MEM_REG_KEY,
+};
+
+enum hns_ib_dca_mem_dereg_attrs {
+	HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum hns_ib_dca_mem_shrink_attrs {
+	HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE,
+	HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY,
+	HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS,
+};
+
+#define HNS_IB_ATTACH_FLAGS_NEW_BUFFER 1U
+
+enum hns_ib_dca_mem_attach_attrs {
+	HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET,
+	HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET,
+	HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET,
+	HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS,
+	HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES,
+};
+
+enum hns_ib_dca_mem_detach_attrs {
+	HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX,
+};
+
+enum hns_ib_dca_mem_query_attrs {
+	HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX,
+	HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY,
+	HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET,
+	HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT,
+};
+
 #endif /* HNS_ABI_USER_H */

From patchwork Sun Feb  7 03:12:51 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Weihang Li <liweihang@huawei.com>
X-Patchwork-Id: 12072457
Return-Path: <linux-rdma-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level: 
X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00,
	HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH,
	MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham
	autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
	by smtp.lore.kernel.org (Postfix) with ESMTP id CA028C433E9
	for <linux-rdma@archiver.kernel.org>; Sun,  7 Feb 2021 03:16:26 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id A3E2864DC3
	for <linux-rdma@archiver.kernel.org>; Sun,  7 Feb 2021 03:16:26 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S229691AbhBGDQN (ORCPT <rfc822;linux-rdma@archiver.kernel.org>);
        Sat, 6 Feb 2021 22:16:13 -0500
Received: from szxga04-in.huawei.com ([45.249.212.190]:11688 "EHLO
        szxga04-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S229849AbhBGDP6 (ORCPT
        <rfc822;linux-rdma@vger.kernel.org>); Sat, 6 Feb 2021 22:15:58 -0500
Received: from DGGEMS402-HUB.china.huawei.com (unknown [172.30.72.59])
        by szxga04-in.huawei.com (SkyGuard) with ESMTP id 4DYDkg0DyLzlGSl;
        Sun,  7 Feb 2021 11:13:31 +0800 (CST)
Received: from localhost.localdomain (10.67.165.24) by
 DGGEMS402-HUB.china.huawei.com (10.3.19.202) with Microsoft SMTP Server id
 14.3.498.0; Sun, 7 Feb 2021 11:15:09 +0800
From: Weihang Li <liweihang@huawei.com>
To: <jgg@nvidia.com>, <leon@kernel.org>
CC: <dledford@redhat.com>, <linux-rdma@vger.kernel.org>,
        <linuxarm@openeuler.org>
Subject: [PATCH RFC rdma-core 2/5] libhns: Introduce DCA for RC QP
Date: Sun, 7 Feb 2021 11:12:51 +0800
Message-ID: <1612667574-56673-3-git-send-email-liweihang@huawei.com>
X-Mailer: git-send-email 2.8.1
In-Reply-To: <1612667574-56673-1-git-send-email-liweihang@huawei.com>
References: <1612667574-56673-1-git-send-email-liweihang@huawei.com>
MIME-Version: 1.0
X-Originating-IP: [10.67.165.24]
X-CFilter-Loop: Reflected
Precedence: bulk
List-ID: <linux-rdma.vger.kernel.org>
X-Mailing-List: linux-rdma@vger.kernel.org

From: Xi Wang <wangxi11@huawei.com>

The HIP09 introduces the DCA(Dynamic context attachment) feature which
supports many RC QPs to share the WQE buffer in a memory pool, this will
reduce the memory consumption when there are too many QPs inactive.

This patch wraps two functions for adding buffers to memory pool and
removing buffers from memory pool by calling ib cmd implemented in hns
kernel driver.

If a QP enables DCA feature, the WQE's buffer will be attached to the
memroy pool when the users start to post WRs and be detached when all CQEs
has been polled.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 providers/hns/hns_roce_u.c     |  41 +++++++++++++
 providers/hns/hns_roce_u.h     |  18 ++++++
 providers/hns/hns_roce_u_buf.c | 134 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 193 insertions(+)

diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index e63ef32..28b1130 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -89,6 +89,40 @@ static const struct verbs_context_ops hns_common_ops = {
 	.destroy_ah = hns_roce_u_destroy_ah,
 };
 
+static int init_dca_context(struct hns_roce_context *ctx, int page_size)
+{
+	struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+	int ret;
+
+	if (!(ctx->cap_flags & HNS_ROCE_CAP_FLAG_DCA_MODE))
+		return 0;
+
+	list_head_init(&dca_ctx->mem_list);
+	ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE);
+	if (ret)
+		return ret;
+
+	dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES;
+	dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
+	dca_ctx->mem_cnt = 0;
+
+	return 0;
+}
+
+static void uninit_dca_context(struct hns_roce_context *ctx)
+{
+	struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+
+	if (!(ctx->cap_flags & HNS_ROCE_CAP_FLAG_DCA_MODE))
+		return;
+
+	pthread_spin_lock(&dca_ctx->lock);
+	hns_roce_cleanup_dca_mem(ctx);
+	pthread_spin_unlock(&dca_ctx->lock);
+
+	pthread_spin_destroy(&dca_ctx->lock);
+}
+
 static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
 						    int cmd_fd,
 						    void *private_data)
@@ -110,6 +144,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
 				&resp.ibv_resp, sizeof(resp)))
 		goto err_free;
 
+	context->cap_flags = resp.cap_flags;
+
 	context->num_qps = resp.qp_tab_size;
 	context->qp_table_shift = ffs(context->num_qps) - 1 -
 				  HNS_ROCE_QP_TABLE_BITS;
@@ -162,6 +198,9 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
 	verbs_set_ops(&context->ibv_ctx, &hns_common_ops);
 	verbs_set_ops(&context->ibv_ctx, &hr_dev->u_hw->hw_ops);
 
+	if (init_dca_context(context, hr_dev->page_size))
+		goto db_free;
+
 	return &context->ibv_ctx;
 
 db_free:
@@ -183,6 +222,8 @@ static void hns_roce_free_context(struct ibv_context *ibctx)
 	if (hr_dev->hw_version == HNS_ROCE_HW_VER1)
 		munmap(context->cq_tptr_base, HNS_ROCE_CQ_DB_BUF_SIZE);
 
+	uninit_dca_context(context);
+
 	verbs_uninit_context(&context->ibv_ctx);
 	free(context);
 }
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 67b4433..619b060 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -142,8 +142,21 @@ struct hns_roce_db_page {
 	bitmap			*bitmap;
 };
 
+#define HNS_DCA_MAX_MEM_SIZE ~0UL
+#define HNS_DCA_DEFAULT_UNIT_PAGES 16
+
+struct hns_roce_dca_ctx {
+	struct list_head mem_list;
+	pthread_spinlock_t lock;
+	uint64_t max_size;
+	uint64_t curr_size;
+	int mem_cnt;
+	unsigned int unit_size;
+};
+
 struct hns_roce_context {
 	struct verbs_context		ibv_ctx;
+	uint32_t			cap_flags;
 	void				*uar;
 	pthread_spinlock_t		uar_lock;
 
@@ -167,6 +180,8 @@ struct hns_roce_context {
 	unsigned int			max_sge;
 	int				max_cqe;
 	unsigned int			cqe_size;
+
+	struct hns_roce_dca_ctx		dca_ctx;
 };
 
 struct hns_roce_pd {
@@ -395,6 +410,9 @@ void hns_roce_free_buf(struct hns_roce_buf *buf);
 
 void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
 
+void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
+int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
+
 void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
 
 extern const struct hns_roce_u_hw hns_roce_u_hw_v1;
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
index 471dd9c..424f916 100644
--- a/providers/hns/hns_roce_u_buf.c
+++ b/providers/hns/hns_roce_u_buf.c
@@ -60,3 +60,137 @@ void hns_roce_free_buf(struct hns_roce_buf *buf)
 
 	munmap(buf->buf, buf->length);
 }
+
+struct hns_roce_dca_mem {
+	uint32_t handle;
+	struct list_node entry;
+	struct hns_roce_buf buf;
+	struct hns_roce_context *ctx;
+};
+
+static void free_dca_mem(struct hns_roce_context *ctx,
+			 struct hns_roce_dca_mem *mem)
+{
+	hns_roce_free_buf(&mem->buf);
+	free(mem);
+}
+
+static struct hns_roce_dca_mem *alloc_dca_mem(uint32_t size)
+{
+	struct hns_roce_dca_mem *mem = NULL;
+	int ret;
+
+	mem = malloc(sizeof(struct hns_roce_dca_mem));
+	if (!mem) {
+		errno = ENOMEM;
+		return NULL;
+	}
+
+	ret = hns_roce_alloc_buf(&mem->buf, size, HNS_HW_PAGE_SIZE);
+	if (ret) {
+		errno = ENOMEM;
+		free(mem);
+		return NULL;
+	}
+
+	return mem;
+}
+
+static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem)
+{
+	return (uintptr_t)dca_mem;
+}
+
+static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset)
+{
+	return dca_mem->buf.buf + offset;
+}
+
+static int register_dca_mem(struct hns_roce_context *ctx, uint64_t key,
+			    void *addr, uint32_t size, uint32_t *handle)
+{
+	struct ib_uverbs_attr *attr;
+	int ret;
+
+	DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+			       HNS_IB_METHOD_DCA_MEM_REG, 4);
+	fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_REG_LEN, size);
+	fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_ADDR, (intptr_t)addr);
+	fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_KEY, key);
+	attr = fill_attr_out_obj(cmd, HNS_IB_ATTR_DCA_MEM_REG_HANDLE);
+
+	ret = execute_ioctl(&ctx->ibv_ctx.context, cmd);
+	if (!ret)
+		*handle = read_attr_obj(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, attr);
+
+	return ret;
+}
+
+static void deregister_dca_mem(struct hns_roce_context *ctx, uint32_t handle)
+{
+	DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+			       HNS_IB_METHOD_DCA_MEM_DEREG, 1);
+	fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, handle);
+	execute_ioctl(&ctx->ibv_ctx.context, cmd);
+}
+
+void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx)
+{
+	struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+	struct hns_roce_dca_mem *mem;
+	struct hns_roce_dca_mem *tmp;
+
+	list_for_each_safe(&dca_ctx->mem_list, mem, tmp, entry)
+		deregister_dca_mem(ctx, mem->handle);
+}
+
+static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
+				uint32_t alloc_size)
+{
+	bool enable;
+
+	pthread_spin_lock(&ctx->lock);
+
+	if (ctx->unit_size == 0) /* Pool size can't be increased */
+		enable = false;
+	else if (ctx->max_size == HNS_DCA_MAX_MEM_SIZE) /* Pool size no limit */
+		enable = true;
+	else /* Pool size doesn't exceed max size */
+		enable = (ctx->curr_size + alloc_size) < ctx->max_size;
+
+	pthread_spin_unlock(&ctx->lock);
+
+	return enable;
+}
+
+int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
+{
+	struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+	struct hns_roce_dca_mem *mem;
+	int ret;
+
+	if (!add_dca_mem_enabled(&ctx->dca_ctx, size))
+		return -ENOMEM;
+
+	/* Step 1: Alloc DCA mem address */
+	mem = alloc_dca_mem(DIV_ROUND_UP(size, dca_ctx->unit_size));
+	if (!mem)
+		return -ENOMEM;
+
+	/* Step 2: Register DCA mem uobject to pin user address */
+	ret = register_dca_mem(ctx, dca_mem_to_key(mem), dca_mem_addr(mem, 0),
+			       mem->buf.length, &mem->handle);
+	if (ret) {
+		free_dca_mem(ctx, mem);
+		return ret;
+	}
+
+	/* Step 3: Add DCA mem node to pool */
+	pthread_spin_lock(&dca_ctx->lock);
+	list_add_tail(&dca_ctx->mem_list, &mem->entry);
+	dca_ctx->mem_cnt++;
+	dca_ctx->curr_size += mem->buf.length;
+	pthread_spin_unlock(&dca_ctx->lock);
+
+	return 0;
+}

From patchwork Sun Feb  7 03:12:52 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Weihang Li <liweihang@huawei.com>
X-Patchwork-Id: 12072453
Return-Path: <linux-rdma-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level: 
X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00,
	HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH,
	MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham
	autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
	by smtp.lore.kernel.org (Postfix) with ESMTP id 935E2C433DB
	for <linux-rdma@archiver.kernel.org>; Sun,  7 Feb 2021 03:16:26 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id 64A5F64DC3
	for <linux-rdma@archiver.kernel.org>; Sun,  7 Feb 2021 03:16:26 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S229609AbhBGDQJ (ORCPT <rfc822;linux-rdma@archiver.kernel.org>);
        Sat, 6 Feb 2021 22:16:09 -0500
Received: from szxga04-in.huawei.com ([45.249.212.190]:11689 "EHLO
        szxga04-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S229972AbhBGDP5 (ORCPT
        <rfc822;linux-rdma@vger.kernel.org>); Sat, 6 Feb 2021 22:15:57 -0500
Received: from DGGEMS402-HUB.china.huawei.com (unknown [172.30.72.59])
        by szxga04-in.huawei.com (SkyGuard) with ESMTP id 4DYDkg0t9xzlH1C;
        Sun,  7 Feb 2021 11:13:31 +0800 (CST)
Received: from localhost.localdomain (10.67.165.24) by
 DGGEMS402-HUB.china.huawei.com (10.3.19.202) with Microsoft SMTP Server id
 14.3.498.0; Sun, 7 Feb 2021 11:15:09 +0800
From: Weihang Li <liweihang@huawei.com>
To: <jgg@nvidia.com>, <leon@kernel.org>
CC: <dledford@redhat.com>, <linux-rdma@vger.kernel.org>,
        <linuxarm@openeuler.org>
Subject: [PATCH RFC rdma-core 3/5] libhns: Add support for shrinking DCA
 memory pool
Date: Sun, 7 Feb 2021 11:12:52 +0800
Message-ID: <1612667574-56673-4-git-send-email-liweihang@huawei.com>
X-Mailer: git-send-email 2.8.1
In-Reply-To: <1612667574-56673-1-git-send-email-liweihang@huawei.com>
References: <1612667574-56673-1-git-send-email-liweihang@huawei.com>
MIME-Version: 1.0
X-Originating-IP: [10.67.165.24]
X-CFilter-Loop: Reflected
Precedence: bulk
List-ID: <linux-rdma.vger.kernel.org>
X-Mailing-List: linux-rdma@vger.kernel.org

From: Xi Wang <wangxi11@huawei.com>

The QP's WQE buffer may be detached after QP is modified or CQE is polled,
and the state of DCA mem object may be changed as clean for no QP is using
it. So shrink the clean DCA mem from the memory pool and destroy the DCA
mem's buffer to reduce the memory consumption.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 providers/hns/hns_roce_u.h       |  2 +
 providers/hns/hns_roce_u_buf.c   | 96 ++++++++++++++++++++++++++++++++++++++++
 providers/hns/hns_roce_u_hw_v2.c |  7 +++
 3 files changed, 105 insertions(+)

diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 619b060..7dc4a1e 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -149,6 +149,7 @@ struct hns_roce_dca_ctx {
 	struct list_head mem_list;
 	pthread_spinlock_t lock;
 	uint64_t max_size;
+	uint64_t min_size;
 	uint64_t curr_size;
 	int mem_cnt;
 	unsigned int unit_size;
@@ -410,6 +411,7 @@ void hns_roce_free_buf(struct hns_roce_buf *buf);
 
 void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
 
+void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx);
 void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
 int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
 
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
index 424f916..8f3d34a 100644
--- a/providers/hns/hns_roce_u_buf.c
+++ b/providers/hns/hns_roce_u_buf.c
@@ -101,6 +101,20 @@ static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem)
 	return (uintptr_t)dca_mem;
 }
 
+static struct hns_roce_dca_mem *key_to_dca_mem(struct hns_roce_dca_ctx *ctx,
+					       uint64_t key)
+{
+	struct hns_roce_dca_mem *mem;
+	struct hns_roce_dca_mem *tmp;
+
+	list_for_each_safe(&ctx->mem_list, mem, tmp, entry) {
+		if (dca_mem_to_key(mem) == key)
+			return mem;
+	}
+
+	return NULL;
+}
+
 static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset)
 {
 	return dca_mem->buf.buf + offset;
@@ -144,6 +158,25 @@ void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx)
 		deregister_dca_mem(ctx, mem->handle);
 }
 
+struct hns_dca_mem_shrink_resp {
+	uint32_t free_mems;
+	uint64_t free_key;
+};
+
+static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+			  uint64_t size, struct hns_dca_mem_shrink_resp *resp)
+{
+	DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+			       HNS_IB_METHOD_DCA_MEM_SHRINK, 4);
+	fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE, handle);
+	fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, size);
+	fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY,
+		      &resp->free_key, sizeof(resp->free_key));
+	fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS,
+		      &resp->free_mems, sizeof(resp->free_mems));
+
+	return execute_ioctl(&ctx->ibv_ctx.context, cmd);
+}
 static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
 				uint32_t alloc_size)
 {
@@ -194,3 +227,66 @@ int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
 
 	return 0;
 }
+
+static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx)
+{
+	bool enable;
+
+	pthread_spin_lock(&ctx->lock);
+	enable = ctx->mem_cnt > 0 && ctx->min_size < ctx->max_size;
+	pthread_spin_unlock(&ctx->lock);
+
+	return enable;
+}
+
+void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx)
+{
+	struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+	struct hns_dca_mem_shrink_resp resp = {};
+	struct hns_roce_dca_mem *mem;
+	int dca_mem_cnt;
+	uint32_t handle;
+	int ret;
+
+	pthread_spin_lock(&dca_ctx->lock);
+	dca_mem_cnt = ctx->dca_ctx.mem_cnt;
+	pthread_spin_unlock(&dca_ctx->lock);
+	while (dca_mem_cnt > 0 && shrink_dca_mem_enabled(dca_ctx)) {
+		resp.free_mems = 0;
+		/* Step 1: Use any DCA mem uobject to shrink pool */
+		pthread_spin_lock(&dca_ctx->lock);
+		mem = list_tail(&dca_ctx->mem_list,
+				struct hns_roce_dca_mem, entry);
+		handle = mem ? mem->handle : 0;
+		pthread_spin_unlock(&dca_ctx->lock);
+		if (!mem)
+			break;
+
+		ret = shrink_dca_mem(ctx, handle, dca_ctx->min_size, &resp);
+		if (ret || likely(resp.free_mems < 1))
+			break;
+
+		/* Step 2: Remove shrunk DCA mem node from pool */
+		pthread_spin_lock(&dca_ctx->lock);
+		mem = key_to_dca_mem(dca_ctx, resp.free_key);
+		if (mem) {
+			list_del(&mem->entry);
+			dca_ctx->mem_cnt--;
+			dca_ctx->curr_size -= mem->buf.length;
+		}
+
+		handle = mem ? mem->handle : 0;
+		pthread_spin_unlock(&dca_ctx->lock);
+		if (!mem)
+			break;
+
+		/* Step 3: Destroy DCA mem uobject */
+		deregister_dca_mem(ctx, handle);
+		free_dca_mem(ctx, mem);
+		/* No any free memory after deregister 1 DCA mem */
+		if (resp.free_mems <= 1)
+			break;
+
+		dca_mem_cnt--;
+	}
+}
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index c8d273f..93f3546 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -651,6 +651,10 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
 
 	pthread_spin_unlock(&cq->lock);
 
+	/* Try to shrink the DCA mem */
+	if (ctx->dca_ctx.mem_cnt > 0)
+		hns_roce_shrink_dca_mem(ctx);
+
 	return err == V2_CQ_POLL_ERR ? err : npolled;
 }
 
@@ -1478,6 +1482,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
 
 	free(qp);
 
+	if (ctx->dca_ctx.mem_cnt > 0)
+		hns_roce_shrink_dca_mem(ctx);
+
 	return ret;
 }
 

From patchwork Sun Feb  7 03:12:53 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Weihang Li <liweihang@huawei.com>
X-Patchwork-Id: 12072461
Return-Path: <linux-rdma-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level: 
X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00,
	HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH,
	MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham
	autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
	by smtp.lore.kernel.org (Postfix) with ESMTP id E6DB2C43381
	for <linux-rdma@archiver.kernel.org>; Sun,  7 Feb 2021 03:16:26 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id BAAF964E76
	for <linux-rdma@archiver.kernel.org>; Sun,  7 Feb 2021 03:16:26 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S229706AbhBGDQQ (ORCPT <rfc822;linux-rdma@archiver.kernel.org>);
        Sat, 6 Feb 2021 22:16:16 -0500
Received: from szxga07-in.huawei.com ([45.249.212.35]:12862 "EHLO
        szxga07-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S230048AbhBGDP6 (ORCPT
        <rfc822;linux-rdma@vger.kernel.org>); Sat, 6 Feb 2021 22:15:58 -0500
Received: from DGGEMS402-HUB.china.huawei.com (unknown [172.30.72.60])
        by szxga07-in.huawei.com (SkyGuard) with ESMTP id 4DYDl447rvz7hK1;
        Sun,  7 Feb 2021 11:13:52 +0800 (CST)
Received: from localhost.localdomain (10.67.165.24) by
 DGGEMS402-HUB.china.huawei.com (10.3.19.202) with Microsoft SMTP Server id
 14.3.498.0; Sun, 7 Feb 2021 11:15:09 +0800
From: Weihang Li <liweihang@huawei.com>
To: <jgg@nvidia.com>, <leon@kernel.org>
CC: <dledford@redhat.com>, <linux-rdma@vger.kernel.org>,
        <linuxarm@openeuler.org>
Subject: [PATCH RFC rdma-core 4/5] libhns: Add support for attaching QP's WQE
 buffer
Date: Sun, 7 Feb 2021 11:12:53 +0800
Message-ID: <1612667574-56673-5-git-send-email-liweihang@huawei.com>
X-Mailer: git-send-email 2.8.1
In-Reply-To: <1612667574-56673-1-git-send-email-liweihang@huawei.com>
References: <1612667574-56673-1-git-send-email-liweihang@huawei.com>
MIME-Version: 1.0
X-Originating-IP: [10.67.165.24]
X-CFilter-Loop: Reflected
Precedence: bulk
List-ID: <linux-rdma.vger.kernel.org>
X-Mailing-List: linux-rdma@vger.kernel.org

From: Xi Wang <wangxi11@huawei.com>

If a uQP works in DCA mode, the WQE's buffer will be split as many blocks
and be stored into a list. The blocks are allocated from the DCA's memory
pool before posting WRs and are dropped when the QP's CI is equal to PI
after polling CQ.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 providers/hns/hns_roce_u.h       |  24 +++++-
 providers/hns/hns_roce_u_buf.c   | 156 ++++++++++++++++++++++++++++++++++++++-
 providers/hns/hns_roce_u_hw_v2.c | 131 +++++++++++++++++++++++++++++---
 providers/hns/hns_roce_u_hw_v2.h |   7 ++
 providers/hns/hns_roce_u_verbs.c |  32 ++++++--
 5 files changed, 332 insertions(+), 18 deletions(-)

diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 7dc4a1e..ae72709 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -119,6 +119,12 @@ struct hns_roce_device {
 	int				hw_version;
 };
 
+struct hns_roce_buf_list {
+	void **bufs;
+	unsigned int max_cnt;
+	unsigned int shift;
+};
+
 struct hns_roce_buf {
 	void				*buf;
 	unsigned int			length;
@@ -268,9 +274,20 @@ struct hns_roce_rinl_buf {
 	unsigned int			wqe_cnt;
 };
 
+struct hns_roce_dca_attach_attr {
+	uint32_t sq_offset;
+	uint32_t sge_offset;
+	uint32_t rq_offset;
+};
+
+struct hns_roce_dca_detach_attr {
+	uint32_t sq_index;
+};
+
 struct hns_roce_qp {
 	struct verbs_qp			verbs_qp;
 	struct hns_roce_buf		buf;
+	struct hns_roce_buf_list	page_list;
 	int				max_inline_data;
 	int				buf_size;
 	unsigned int			sq_signal_bits;
@@ -316,6 +333,7 @@ struct hns_roce_u_hw {
  * minimum page size.
  */
 #define hr_hw_page_align(x) align(x, HNS_HW_PAGE_SIZE)
+#define hr_hw_page_count(x) (hr_hw_page_align(x) / HNS_HW_PAGE_SIZE)
 
 static inline unsigned int to_hr_hem_entries_size(int count, int buf_shift)
 {
@@ -411,9 +429,13 @@ void hns_roce_free_buf(struct hns_roce_buf *buf);
 
 void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
 
+int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+			    struct hns_roce_dca_attach_attr *attr,
+			    uint32_t size, struct hns_roce_buf_list *buf_list);
+void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+			     struct hns_roce_dca_detach_attr *attr);
 void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx);
 void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
 
 void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
 
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c
index 8f3d34a..0bcb39b 100644
--- a/providers/hns/hns_roce_u_buf.c
+++ b/providers/hns/hns_roce_u_buf.c
@@ -177,6 +177,66 @@ static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
 
 	return execute_ioctl(&ctx->ibv_ctx.context, cmd);
 }
+
+struct hns_dca_mem_query_resp {
+	uint64_t key;
+	uint32_t offset;
+	uint32_t page_count;
+};
+
+static int query_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+			 uint32_t index, struct hns_dca_mem_query_resp *resp)
+{
+	DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+			       HNS_IB_METHOD_DCA_MEM_QUERY, 5);
+	fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE, handle);
+	fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, index);
+	fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY,
+		      &resp->key, sizeof(resp->key));
+	fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET,
+		      &resp->offset, sizeof(resp->offset));
+	fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT,
+		      &resp->page_count, sizeof(resp->page_count));
+	return execute_ioctl(&ctx->ibv_ctx.context, cmd);
+}
+
+static int detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+			  struct hns_roce_dca_detach_attr *attr)
+{
+	DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+			       HNS_IB_METHOD_DCA_MEM_DETACH, 4);
+	fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE, handle);
+	fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX,
+			    attr->sq_index);
+	return execute_ioctl(&ctx->ibv_ctx.context, cmd);
+}
+
+struct hns_dca_mem_attach_resp {
+#define HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER BIT(0)
+	uint32_t alloc_flags;
+	uint32_t alloc_pages;
+};
+
+static int attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+			  struct hns_roce_dca_attach_attr *attr,
+			  struct hns_dca_mem_attach_resp *resp)
+{
+	DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM,
+			       HNS_IB_METHOD_DCA_MEM_ATTACH, 6);
+	fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE, handle);
+	fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET,
+			    attr->sq_offset);
+	fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET,
+			    attr->sge_offset);
+	fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET,
+			    attr->rq_offset);
+	fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS,
+		      &resp->alloc_flags, sizeof(resp->alloc_flags));
+	fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES,
+		      &resp->alloc_pages, sizeof(resp->alloc_pages));
+	return execute_ioctl(&ctx->ibv_ctx.context, cmd);
+}
+
 static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
 				uint32_t alloc_size)
 {
@@ -196,7 +256,7 @@ static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx,
 	return enable;
 }
 
-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
+static int add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
 {
 	struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
 	struct hns_roce_dca_mem *mem;
@@ -290,3 +350,97 @@ void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx)
 		dca_mem_cnt--;
 	}
 }
+
+static void config_page_list(void *addr, struct hns_roce_buf_list *page_list,
+			     uint32_t page_index, int page_count)
+{
+	void **bufs = &page_list->bufs[page_index];
+	int page_size = 1 << page_list->shift;
+	int i;
+
+	for (i = 0; i < page_count; i++) {
+		bufs[i] = addr;
+		addr += page_size;
+	}
+}
+
+static int setup_dca_buf_list(struct hns_roce_context *ctx, uint32_t handle,
+			      struct hns_roce_buf_list *buf_list,
+			      uint32_t page_count)
+{
+	struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+	struct hns_dca_mem_query_resp resp = {};
+	struct hns_roce_dca_mem *mem;
+	uint32_t idx = 0;
+	int ret;
+
+	while (idx < page_count && idx < buf_list->max_cnt) {
+		resp.page_count = 0;
+		ret = query_dca_mem(ctx, handle, idx, &resp);
+		if (ret)
+			return -ENOMEM;
+
+		if (resp.page_count < 1)
+			break;
+
+		pthread_spin_lock(&dca_ctx->lock);
+		mem = key_to_dca_mem(dca_ctx, resp.key);
+		if (mem && resp.offset < mem->buf.length) {
+			config_page_list(dca_mem_addr(mem, resp.offset),
+					 buf_list, idx, resp.page_count);
+		} else {
+			pthread_spin_unlock(&dca_ctx->lock);
+			break;
+		}
+
+		pthread_spin_unlock(&dca_ctx->lock);
+
+		idx += resp.page_count;
+	}
+
+	return (idx >= page_count) ? 0 : ENOMEM;
+}
+
+#define DCA_EXPAND_MEM_TRY_TIMES 3
+int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+			    struct hns_roce_dca_attach_attr *attr,
+			    uint32_t size, struct hns_roce_buf_list *buf_list)
+{
+	uint32_t buf_pages = size >> buf_list->shift;
+	struct hns_dca_mem_attach_resp resp = {};
+	bool is_new_buf = true;
+	int try_times = 0;
+	int ret;
+
+	do {
+		resp.alloc_pages = 0;
+		ret = attach_dca_mem(ctx, handle, attr, &resp);
+		if (ret)
+			break;
+
+		if (resp.alloc_pages >= buf_pages) {
+			is_new_buf = !!(resp.alloc_flags &
+				     HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER);
+			break;
+		}
+
+		ret = add_dca_mem(ctx, size);
+		if (ret)
+			break;
+	} while (try_times++ < DCA_EXPAND_MEM_TRY_TIMES);
+
+	if (ret || resp.alloc_pages < buf_pages)
+		return -ENOMEM;
+
+	/* DCA config not changed */
+	if (!is_new_buf && buf_list->bufs[0])
+		return 0;
+
+	return setup_dca_buf_list(ctx, handle, buf_list, buf_pages);
+}
+
+void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
+			     struct hns_roce_dca_detach_attr *attr)
+{
+	detach_dca_mem(ctx, handle, attr);
+}
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 93f3546..fb065d1 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -230,19 +230,28 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq)
 	return get_sw_cqe_v2(cq, cq->cons_index);
 }
 
+static inline void *get_wqe(struct hns_roce_qp *qp, unsigned int offset)
+{
+	if (qp->page_list.bufs)
+		return qp->page_list.bufs[offset >> qp->page_list.shift] +
+			(offset & ((1 << qp->page_list.shift) - 1));
+	else
+		return qp->buf.buf + offset;
+}
+
 static void *get_recv_wqe_v2(struct hns_roce_qp *qp, unsigned int n)
 {
-	return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift);
+	return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
 }
 
 static void *get_send_wqe(struct hns_roce_qp *qp, unsigned int n)
 {
-	return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift);
+	return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
 }
 
 static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n)
 {
-	return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift);
+	return get_wqe(qp, qp->ex_sge.offset + (n << qp->ex_sge.sge_shift));
 }
 
 static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
@@ -507,6 +516,62 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
 	return V2_CQ_OK;
 }
 
+static inline bool check_qp_dca_enable(struct hns_roce_qp *qp)
+{
+	return !!qp->page_list.bufs && (qp->flags & HNS_ROCE_QP_CAP_DCA);
+}
+
+static int dca_attach_qp_buf(struct hns_roce_context *ctx,
+			     struct hns_roce_qp *qp)
+{
+	struct hns_roce_dca_attach_attr attr = {};
+	uint32_t idx;
+
+	pthread_spin_lock(&qp->sq.lock);
+	pthread_spin_lock(&qp->rq.lock);
+
+	if (qp->sq.wqe_cnt > 0) {
+		idx = qp->sq.head & (qp->sq.wqe_cnt - 1);
+		attr.sq_offset = idx << qp->sq.wqe_shift;
+	}
+
+	if (qp->ex_sge.sge_cnt > 0) {
+		idx = qp->next_sge & (qp->ex_sge.sge_cnt - 1);
+		attr.sge_offset = idx << qp->ex_sge.sge_shift;
+	}
+
+	if (qp->rq.wqe_cnt > 0) {
+		idx = qp->rq.head & (qp->rq.wqe_cnt - 1);
+		attr.rq_offset = idx << qp->rq.wqe_shift;
+	}
+
+	pthread_spin_unlock(&qp->rq.lock);
+	pthread_spin_unlock(&qp->sq.lock);
+
+	return hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr,
+				       qp->buf_size, &qp->page_list);
+}
+
+static void dca_detach_qp_buf(struct hns_roce_context *ctx,
+			      struct hns_roce_qp *qp)
+{
+	struct hns_roce_dca_detach_attr attr;
+	bool is_empty;
+
+	pthread_spin_lock(&qp->sq.lock);
+	pthread_spin_lock(&qp->rq.lock);
+
+	is_empty = qp->sq.head == qp->sq.tail && qp->rq.head == qp->rq.tail;
+	if (is_empty && qp->sq.wqe_cnt > 0)
+		attr.sq_index = qp->sq.head & (qp->sq.wqe_cnt - 1);
+
+	pthread_spin_unlock(&qp->rq.lock);
+	pthread_spin_unlock(&qp->sq.lock);
+
+	if (is_empty)
+		hns_roce_detach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr);
+}
+
 static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
 				struct hns_roce_qp **cur_qp, struct ibv_wc *wc)
 {
@@ -636,6 +701,9 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
 
 	for (npolled = 0; npolled < ne; ++npolled) {
 		err = hns_roce_v2_poll_one(cq, &qp, wc + npolled);
+		if (qp && check_qp_dca_enable(qp))
+			dca_detach_qp_buf(ctx, qp);
+
 		if (err != V2_CQ_OK)
 			break;
 	}
@@ -1003,6 +1071,16 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr,
 	return 0;
 }
 
+static inline void fill_rc_dca_fields(uint32_t qp_num,
+				      struct hns_roce_rc_sq_wqe *wqe)
+{
+	roce_set_field(wqe->byte_4, RC_SQ_WQE_BYTE_4_SQPN_L_M,
+		       RC_SQ_WQE_BYTE_4_SQPN_L_S, qp_num);
+	roce_set_field(wqe->byte_4, RC_SQ_WQE_BYTE_4_SQPN_H_M,
+		       RC_SQ_WQE_BYTE_4_SQPN_H_S,
+		       qp_num >> RC_SQ_WQE_BYTE_4_SQPN_L_W);
+}
+
 static void set_bind_mw_seg(struct hns_roce_rc_sq_wqe *wqe,
 			    const struct ibv_send_wr *wr)
 {
@@ -1120,6 +1198,9 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr,
 		return ret;
 
 wqe_valid:
+	if (check_qp_dca_enable(qp))
+		fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, rc_sq_wqe);
+
 	/*
 	 * The pipeline can sequentially post all valid WQEs into WQ buffer,
 	 * including new WQEs waiting for the doorbell to update the PI again.
@@ -1135,6 +1216,21 @@ wqe_valid:
 	return 0;
 }
 
+static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
+{
+	struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
+
+	/* check that state is OK to post receive */
+	if (ibvqp->state == IBV_QPS_RESET || ibvqp->state == IBV_QPS_INIT ||
+	    ibvqp->state == IBV_QPS_RTR)
+		return -EINVAL;
+
+	if (check_qp_dca_enable(qp))
+		return dca_attach_qp_buf(ctx, qp);
+
+	return 0;
+}
+
 int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 			    struct ibv_send_wr **bad_wr)
 {
@@ -1143,14 +1239,14 @@ int hns_roce_u_v2_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 	struct hns_roce_sge_info sge_info = {};
 	unsigned int wqe_idx, nreq;
 	struct ibv_qp_attr attr;
-	int ret = 0;
 	void *wqe;
+	int ret;
 
 	/* check that state is OK to post send */
-	if (ibvqp->state == IBV_QPS_RESET || ibvqp->state == IBV_QPS_INIT ||
-	    ibvqp->state == IBV_QPS_RTR) {
+	ret = check_qp_send(qp, ctx);
+	if (unlikely(ret)) {
 		*bad_wr = wr;
-		return EINVAL;
+		return ret;
 	}
 
 	pthread_spin_lock(&qp->sq.lock);
@@ -1216,6 +1312,20 @@ out:
 	return ret;
 }
 
+static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
+{
+	struct ibv_qp *ibvqp = &qp->verbs_qp.qp;
+
+	/* check that state is OK to post receive */
+	if (ibvqp->state == IBV_QPS_RESET)
+		return -EINVAL;
+
+	if (check_qp_dca_enable(qp))
+		return dca_attach_qp_buf(ctx, qp);
+
+	return 0;
+}
+
 static void fill_rq_wqe(struct hns_roce_qp *qp, struct ibv_recv_wr *wr,
 			unsigned int wqe_idx)
 {
@@ -1253,12 +1363,13 @@ static int hns_roce_u_v2_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
 	struct hns_roce_qp *qp = to_hr_qp(ibvqp);
 	unsigned int wqe_idx, nreq;
 	struct ibv_qp_attr attr;
-	int ret = 0;
+	int ret;
 
 	/* check that state is OK to post receive */
-	if (ibvqp->state == IBV_QPS_RESET) {
+	ret = check_qp_recv(qp, ctx);
+	if (ret) {
 		*bad_wr = wr;
-		return EINVAL;
+		return ret;
 	}
 
 	pthread_spin_lock(&qp->rq.lock);
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index f5e6402..00716c6 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -250,6 +250,13 @@ struct hns_roce_rc_sq_wqe {
 
 #define RC_SQ_WQE_BYTE_4_RDMA_WRITE_S 22
 
+#define RC_SQ_WQE_BYTE_4_SQPN_L_W 2
+#define RC_SQ_WQE_BYTE_4_SQPN_L_S 5
+#define RC_SQ_WQE_BYTE_4_SQPN_L_M GENMASK(5, 2)
+
+#define RC_SQ_WQE_BYTE_4_SQPN_H_S 13
+#define RC_SQ_WQE_BYTE_4_SQPN_H_M GENMASK(18, 13)
+
 #define RC_SQ_WQE_BYTE_16_XRC_SRQN_S 0
 #define RC_SQ_WQE_BYTE_16_XRC_SRQN_M \
 	(((1UL << 24) - 1) << RC_SQ_WQE_BYTE_16_XRC_SRQN_S)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index f3d6fc1..6ec8b12 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -688,6 +688,14 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev,
 	return 0;
 }
 
+static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type)
+{
+	if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND))
+		return true;
+
+	return false;
+}
+
 static void qp_free_wqe(struct hns_roce_qp *qp)
 {
 	qp_free_recv_inl_buf(qp);
@@ -699,8 +707,8 @@ static void qp_free_wqe(struct hns_roce_qp *qp)
 	hns_roce_free_buf(&qp->buf);
 }
 
-static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp,
-			struct hns_roce_context *ctx)
+static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr,
+			struct hns_roce_qp *qp, struct hns_roce_context *ctx)
 {
 	struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
 
@@ -718,12 +726,24 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp,
 	}
 
 	if (qp->rq_rinl_buf.wqe_cnt) {
-		if (qp_alloc_recv_inl_buf(cap, qp))
+		if (qp_alloc_recv_inl_buf(&attr->cap, qp))
 			goto err_alloc;
 	}
 
-	if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, HNS_HW_PAGE_SIZE))
-		goto err_alloc;
+	if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) {
+		/* when DCA is enabled, use a buffer list to store page addr */
+		qp->buf.buf = NULL;
+		qp->page_list.max_cnt = hr_hw_page_count(qp->buf_size);
+		qp->page_list.shift = HNS_HW_PAGE_SHIFT;
+		qp->page_list.bufs = calloc(qp->page_list.max_cnt,
+					    sizeof(void *));
+		if (!qp->page_list.bufs)
+			goto err_alloc;
+	} else {
+		if (hns_roce_alloc_buf(&qp->buf, qp->buf_size,
+				       HNS_HW_PAGE_SIZE))
+			goto err_alloc;
+	}
 
 	return 0;
 
@@ -928,7 +948,7 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr,
 	    pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
 		return -ENOMEM;
 
-	ret = qp_alloc_wqe(&attr->cap, qp, ctx);
+	ret = qp_alloc_wqe(attr, qp, ctx);
 	if (ret)
 		return ret;
 

From patchwork Sun Feb  7 03:12:54 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Patchwork-Submitter: Weihang Li <liweihang@huawei.com>
X-Patchwork-Id: 12072459
Return-Path: <linux-rdma-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level: 
X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00,
	HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH,
	MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham
	autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
	by smtp.lore.kernel.org (Postfix) with ESMTP id 1E558C4332B
	for <linux-rdma@archiver.kernel.org>; Sun,  7 Feb 2021 03:16:27 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id D92A064DC3
	for <linux-rdma@archiver.kernel.org>; Sun,  7 Feb 2021 03:16:26 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S229707AbhBGDQS (ORCPT <rfc822;linux-rdma@archiver.kernel.org>);
        Sat, 6 Feb 2021 22:16:18 -0500
Received: from szxga07-in.huawei.com ([45.249.212.35]:12861 "EHLO
        szxga07-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S229741AbhBGDP7 (ORCPT
        <rfc822;linux-rdma@vger.kernel.org>); Sat, 6 Feb 2021 22:15:59 -0500
Received: from DGGEMS402-HUB.china.huawei.com (unknown [172.30.72.60])
        by szxga07-in.huawei.com (SkyGuard) with ESMTP id 4DYDl44y3Kz7hLs;
        Sun,  7 Feb 2021 11:13:52 +0800 (CST)
Received: from localhost.localdomain (10.67.165.24) by
 DGGEMS402-HUB.china.huawei.com (10.3.19.202) with Microsoft SMTP Server id
 14.3.498.0; Sun, 7 Feb 2021 11:15:10 +0800
From: Weihang Li <liweihang@huawei.com>
To: <jgg@nvidia.com>, <leon@kernel.org>
CC: <dledford@redhat.com>, <linux-rdma@vger.kernel.org>,
        <linuxarm@openeuler.org>
Subject: [PATCH RFC rdma-core 5/5] libhns: Add support for configuring DCA
Date: Sun, 7 Feb 2021 11:12:54 +0800
Message-ID: <1612667574-56673-6-git-send-email-liweihang@huawei.com>
X-Mailer: git-send-email 2.8.1
In-Reply-To: <1612667574-56673-1-git-send-email-liweihang@huawei.com>
References: <1612667574-56673-1-git-send-email-liweihang@huawei.com>
MIME-Version: 1.0
X-Originating-IP: [10.67.165.24]
X-CFilter-Loop: Reflected
Precedence: bulk
List-ID: <linux-rdma.vger.kernel.org>
X-Mailing-List: linux-rdma@vger.kernel.org

From: Xi Wang <wangxi11@huawei.com>

Add a group of environment variables for configuring DCA memory pool which
is as follows:
 HNS_DCA_MAX_SIZE - the max size when expand the memory pool.
 HNS_DCA_MIN_SIZE - the reverved size when shrink the memory pool.
 HNS_DCA_UNIT_SIZE - the increase unit size when expand the memory pool.

Also append 'IBV_QP_CREATE_DYNAMIC_CONTEXT_ATTACH' for creating a DCA QP.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 libibverbs/cmd_qp.c              |  3 ++-
 libibverbs/verbs.h               |  1 +
 providers/hns/hns_roce_u.c       | 52 ++++++++++++++++++++++++++++++++++++++--
 providers/hns/hns_roce_u_verbs.c | 18 ++++++++++----
 4 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/libibverbs/cmd_qp.c b/libibverbs/cmd_qp.c
index 056f397..f9899ad 100644
--- a/libibverbs/cmd_qp.c
+++ b/libibverbs/cmd_qp.c
@@ -38,7 +38,8 @@ enum {
 					IBV_QP_CREATE_SCATTER_FCS |
 					IBV_QP_CREATE_CVLAN_STRIPPING |
 					IBV_QP_CREATE_SOURCE_QPN |
-					IBV_QP_CREATE_PCI_WRITE_END_PADDING
+					IBV_QP_CREATE_PCI_WRITE_END_PADDING |
+					IBV_QP_CREATE_DYNAMIC_CONTEXT_ATTACH
 };
 
 
diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
index 656b0f9..db37dce 100644
--- a/libibverbs/verbs.h
+++ b/libibverbs/verbs.h
@@ -918,6 +918,7 @@ enum ibv_qp_create_flags {
 	IBV_QP_CREATE_CVLAN_STRIPPING		= 1 << 9,
 	IBV_QP_CREATE_SOURCE_QPN		= 1 << 10,
 	IBV_QP_CREATE_PCI_WRITE_END_PADDING	= 1 << 11,
+	IBV_QP_CREATE_DYNAMIC_CONTEXT_ATTACH	= 1 << 13,
 };
 
 enum ibv_qp_create_send_ops_flags {
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c
index 28b1130..3ba99ed 100644
--- a/providers/hns/hns_roce_u.c
+++ b/providers/hns/hns_roce_u.c
@@ -92,6 +92,10 @@ static const struct verbs_context_ops hns_common_ops = {
 static int init_dca_context(struct hns_roce_context *ctx, int page_size)
 {
 	struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx;
+	int unit_size = 0;
+	long max_size = 0;
+	long min_size;
+	char *env;
 	int ret;
 
 	if (!(ctx->cap_flags & HNS_ROCE_CAP_FLAG_DCA_MODE))
@@ -102,8 +106,52 @@ static int init_dca_context(struct hns_roce_context *ctx, int page_size)
 	if (ret)
 		return ret;
 
-	dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES;
-	dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE;
+	env = getenv("HNS_DCA_UNIT_SIZE");
+	if (env) {
+		unit_size = atoi(env);
+		/* Disable DCA only for this process */
+		if (unit_size == 0)
+			return 0;
+	}
+
+	if (unit_size < 1)
+		unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES;
+
+	unit_size = align(unit_size, page_size);
+
+	/*
+	 * not set OR 0: Unlimited memory pool increase.
+	 * others: Maximum memory pool size to be increased.
+	 */
+	env = getenv("HNS_DCA_MAX_SIZE");
+	if (env)
+		max_size = atol(env);
+
+	if (max_size == 0)
+		max_size = HNS_DCA_MAX_MEM_SIZE;
+	else
+		max_size = DIV_ROUND_UP(max_size, unit_size);
+
+	/*
+	 * not set: The memory pool cannot be reduced.
+	 * others: The size of free memory in the pool cannot exceed this value.
+	 * 0: Always reduce the free memory in the pool.
+	 */
+	env = getenv("HNS_DCA_MIN_SIZE");
+	if (env) {
+		min_size = atol(env);
+		if (min_size > 0)
+			min_size = DIV_ROUND_UP(min_size, unit_size);
+		else
+			min_size = 0;
+	} else {
+		min_size = HNS_DCA_MAX_MEM_SIZE;
+	}
+
+	dca_ctx->unit_size = unit_size;
+	dca_ctx->max_size = max_size;
+	dca_ctx->min_size = min_size;
+
 	dca_ctx->mem_cnt = 0;
 
 	return 0;
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 6ec8b12..0697328 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -542,7 +542,7 @@ int hns_roce_u_destroy_srq(struct ibv_srq *srq)
 }
 
 enum {
-	CREATE_QP_SUP_CREATE_FLAGS = 0,
+	CREATE_QP_SUP_CREATE_FLAGS = IBV_QP_CREATE_DYNAMIC_CONTEXT_ATTACH,
 };
 
 enum {
@@ -688,10 +688,11 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev,
 	return 0;
 }
 
-static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type)
+static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type,
+					uint32_t create_flags)
 {
 	if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND))
-		return true;
+		return !!(create_flags & IBV_QP_CREATE_DYNAMIC_CONTEXT_ATTACH);
 
 	return false;
 }
@@ -730,7 +731,8 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr,
 			goto err_alloc;
 	}
 
-	if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) {
+	if (check_qp_support_dca(ctx->dca_ctx.max_size != 0,
+				 attr->qp_type, attr->create_flags)) {
 		/* when DCA is enabled, use a buffer list to store page addr */
 		qp->buf.buf = NULL;
 		qp->page_list.max_cnt = hr_hw_page_count(qp->buf_size);
@@ -901,6 +903,14 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr,
 	struct hns_roce_create_qp_ex cmd_ex = {};
 	int ret;
 
+	/*
+	 * When handling the command in kernel space, the user QP enable
+	 * the DCA mode by checking whether the cmd.buf_addr is NULL but
+	 * not the attr->create_flags has the DCA enable bit, so clear
+	 * this bit before command is ready to run.
+	 */
+	attr->create_flags &= ~IBV_QP_CREATE_DYNAMIC_CONTEXT_ATTACH;
+
 	cmd_ex.sdb_addr = (uintptr_t)qp->sdb;
 	cmd_ex.db_addr = (uintptr_t)qp->rdb;
 	cmd_ex.buf_addr = (uintptr_t)qp->buf.buf;