diff mbox series

[v4,for-next,01/12] RDMA/hns: Introduce DCA for RC QP

Message ID 1627525163-1683-2-git-send-email-liangwenpeng@huawei.com (mailing list archive)
State Changes Requested
Delegated to: Jason Gunthorpe
Headers show
Series RDMA/hns: Add support for Dynamic Context Attachment | expand

Commit Message

Wenpeng Liang July 29, 2021, 2:19 a.m. UTC
From: Xi Wang <wangxi11@huawei.com>

The hip09 introduces the DCA(Dynamic context attachment) feature which
supports many RC QPs to share the WQE buffer in a memory pool, this will
reduce the memory consumption when there are too many QPs are inactive.

If a QP enables DCA feature, the WQE's buffer will not be allocated when
creating. But when the users start to post WRs, the hns driver will
allocate a buffer from the memory pool and then fill WQEs which tagged with
this QP's number.

The hns ROCEE will stop accessing the WQE buffer when the user polled all
of the CQEs for a DCA QP, then the driver will recycle this WQE's buffer
to the memory pool.

This patch adds a group of methods to support the user space register
buffers to a memory pool which belongs to the user context. The hns kernel
driver will update the pages state in this pool when the user calling the
post/poll methods and the user driver can get the QP's WQE buffer address
by the key and offset which queried from kernel.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
 drivers/infiniband/hw/hns/Makefile          |   2 +-
 drivers/infiniband/hw/hns/hns_roce_dca.c    | 343 ++++++++++++++++++++++++++++
 drivers/infiniband/hw/hns/hns_roce_dca.h    |  22 ++
 drivers/infiniband/hw/hns/hns_roce_device.h |   9 +
 drivers/infiniband/hw/hns/hns_roce_main.c   |  27 ++-
 include/uapi/rdma/hns-abi.h                 |  27 +++
 6 files changed, 427 insertions(+), 3 deletions(-)
 create mode 100644 drivers/infiniband/hw/hns/hns_roce_dca.c
 create mode 100644 drivers/infiniband/hw/hns/hns_roce_dca.h

Comments

Jason Gunthorpe Aug. 19, 2021, 11:54 p.m. UTC | #1
On Thu, Jul 29, 2021 at 10:19:12AM +0800, Wenpeng Liang wrote:

> +static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_REG)(
> +	struct uverbs_attr_bundle *attrs)
> +{
> +	struct hns_roce_ucontext *uctx = uverbs_attr_to_hr_uctx(attrs);
> +	struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device);
> +	struct ib_uobject *uobj =
> +		uverbs_attr_get_uobject(attrs, HNS_IB_ATTR_DCA_MEM_REG_HANDLE);
> +	struct dca_mem_attr init_attr = {};
> +	struct dca_mem *mem;
> +	int ret;
> +
> +	if (uverbs_copy_from(&init_attr.addr, attrs,
> +			     HNS_IB_ATTR_DCA_MEM_REG_ADDR) ||
> +	    uverbs_copy_from(&init_attr.size, attrs,
> +			     HNS_IB_ATTR_DCA_MEM_REG_LEN) ||
> +	    uverbs_copy_from(&init_attr.key, attrs,
> +			     HNS_IB_ATTR_DCA_MEM_REG_KEY))
> +		return -EFAULT;

This should return the code from uverbs_copy_from() not
-EFAULT.

> +DECLARE_UVERBS_NAMED_METHOD(
> +	HNS_IB_METHOD_DCA_MEM_REG,
> +	UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, HNS_IB_OBJECT_DCA_MEM,
> +			UVERBS_ACCESS_NEW, UA_MANDATORY),
> +	UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_REG_LEN, UVERBS_ATTR_TYPE(u32),
> +			   UA_MANDATORY),
> +	UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_REG_ADDR, UVERBS_ATTR_TYPE(u64),
> +			   UA_MANDATORY),
> +	UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_REG_KEY, UVERBS_ATTR_TYPE(u64),
> +			   UA_MANDATORY));

I think these ptr_in's are supposed to be const_in these days? The
code you were referencing for this pre-dates const_in and hasn't been
converted.

The distinction is that const_in is only for small < 64 bit types.

Please check all the cases for both of these things
  
> +extern const struct uapi_definition hns_roce_dca_uapi_defs[];
> +static const struct uapi_definition hns_roce_uapi_defs[] = {
> +	UAPI_DEF_CHAIN(hns_roce_dca_uapi_defs),
> +	{}
> +};

The 2nd array isn't necessary, is it?

Jason
Wenpeng Liang Aug. 21, 2021, 9:37 a.m. UTC | #2
On 2021/8/20 7:54, Jason Gunthorpe wrote:
> On Thu, Jul 29, 2021 at 10:19:12AM +0800, Wenpeng Liang wrote:
> 
>> +static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_REG)(
>> +	struct uverbs_attr_bundle *attrs)
>> +{
>> +	struct hns_roce_ucontext *uctx = uverbs_attr_to_hr_uctx(attrs);
>> +	struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device);
>> +	struct ib_uobject *uobj =
>> +		uverbs_attr_get_uobject(attrs, HNS_IB_ATTR_DCA_MEM_REG_HANDLE);
>> +	struct dca_mem_attr init_attr = {};
>> +	struct dca_mem *mem;
>> +	int ret;
>> +
>> +	if (uverbs_copy_from(&init_attr.addr, attrs,
>> +			     HNS_IB_ATTR_DCA_MEM_REG_ADDR) ||
>> +	    uverbs_copy_from(&init_attr.size, attrs,
>> +			     HNS_IB_ATTR_DCA_MEM_REG_LEN) ||
>> +	    uverbs_copy_from(&init_attr.key, attrs,
>> +			     HNS_IB_ATTR_DCA_MEM_REG_KEY))
>> +		return -EFAULT;
> 
> This should return the code from uverbs_copy_from() not
> -EFAULT.
> 

I will fix it, including the other uverbs_copy_from of this patchset.

>> +DECLARE_UVERBS_NAMED_METHOD(
>> +	HNS_IB_METHOD_DCA_MEM_REG,
>> +	UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, HNS_IB_OBJECT_DCA_MEM,
>> +			UVERBS_ACCESS_NEW, UA_MANDATORY),
>> +	UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_REG_LEN, UVERBS_ATTR_TYPE(u32),
>> +			   UA_MANDATORY),
>> +	UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_REG_ADDR, UVERBS_ATTR_TYPE(u64),
>> +			   UA_MANDATORY),
>> +	UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_REG_KEY, UVERBS_ATTR_TYPE(u64),
>> +			   UA_MANDATORY));
> 
> I think these ptr_in's are supposed to be const_in these days? The
> code you were referencing for this pre-dates const_in and hasn't been
> converted.
> 
> The distinction is that const_in is only for small < 64 bit types.
> 
> Please check all the cases for both of these things
> 

thanks,

>> +extern const struct uapi_definition hns_roce_dca_uapi_defs[];
>> +static const struct uapi_definition hns_roce_uapi_defs[] = {
>> +	UAPI_DEF_CHAIN(hns_roce_dca_uapi_defs),
>> +	{}
>> +};
> 
> The 2nd array isn't necessary, is it?
> 
> Jason
> .
> 

I will delete it.

Thanks,
Wenpeng.
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index e105945..9962b23 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -6,7 +6,7 @@ 
 ccflags-y :=  -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
 
 hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
-	hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
+	hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o hns_roce_dca.o \
 	hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o
 
 ifdef CONFIG_INFINIBAND_HNS_HIP06
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c
new file mode 100644
index 0000000..6a3ff12
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_dca.c
@@ -0,0 +1,343 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2021 HiSilicon Limited. All rights reserved.
+ */
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/uverbs_std_types.h>
+#include "hns_roce_device.h"
+#include "hns_roce_dca.h"
+
+#define UVERBS_MODULE_NAME hns_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+/* DCA memory */
+struct dca_mem {
+#define DCA_MEM_FLAGS_ALLOCED BIT(0)
+#define DCA_MEM_FLAGS_REGISTERED BIT(1)
+	u32 flags;
+	struct list_head list; /* link to mem list in dca context */
+	spinlock_t lock; /* protect the @flags and @list */
+	int page_count; /* page count in this mem obj */
+	u64 key; /* register by caller */
+	u32 size; /* bytes in this mem object */
+	struct hns_dca_page_state *states; /* record each page's state */
+	void *pages; /* memory handle for getting dma address */
+};
+
+struct dca_mem_attr {
+	u64 key;
+	u64 addr;
+	u32 size;
+};
+
+static void *alloc_dca_pages(struct hns_roce_dev *hr_dev, struct dca_mem *mem,
+			     struct dca_mem_attr *attr)
+{
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct ib_umem *umem;
+
+	umem = ib_umem_get(ibdev, attr->addr, attr->size, 0);
+	if (IS_ERR(umem)) {
+		ibdev_err(ibdev, "failed to get uDCA pages, ret = %ld.\n",
+			  PTR_ERR(umem));
+		return NULL;
+	}
+
+	mem->page_count = ib_umem_num_dma_blocks(umem, HNS_HW_PAGE_SIZE);
+
+	return umem;
+}
+
+static void init_dca_umem_states(struct hns_dca_page_state *states, int count,
+				 struct ib_umem *umem)
+{
+	dma_addr_t pre_addr, cur_addr;
+	struct ib_block_iter biter;
+	int i = 0;
+
+	pre_addr = 0;
+	rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap,
+			    HNS_HW_PAGE_SIZE) {
+		if (i >= count)
+			return;
+
+		/* In a continuous address, the first pages's head is 1 */
+		cur_addr = rdma_block_iter_dma_address(&biter);
+		if ((i == 0) || (cur_addr - pre_addr != HNS_HW_PAGE_SIZE))
+			states[i].head = 1;
+
+		pre_addr = cur_addr;
+		i++;
+	}
+}
+
+static struct hns_dca_page_state *alloc_dca_states(void *pages, int count)
+{
+	struct hns_dca_page_state *states;
+
+	states = kcalloc(count, sizeof(*states), GFP_KERNEL);
+	if (!states)
+		return NULL;
+
+	init_dca_umem_states(states, count, pages);
+
+	return states;
+}
+
+/* user DCA is managed by ucontext */
+#define to_hr_dca_ctx(uctx) (&(uctx)->dca_ctx)
+
+static void unregister_dca_mem(struct hns_roce_ucontext *uctx,
+			       struct dca_mem *mem)
+{
+	struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx);
+	unsigned long flags;
+	void *states, *pages;
+
+	spin_lock_irqsave(&ctx->pool_lock, flags);
+
+	spin_lock(&mem->lock);
+	mem->flags &= ~DCA_MEM_FLAGS_REGISTERED;
+	mem->page_count = 0;
+	pages = mem->pages;
+	mem->pages = NULL;
+	states = mem->states;
+	mem->states = NULL;
+	spin_unlock(&mem->lock);
+
+	ctx->free_mems--;
+	ctx->free_size -= mem->size;
+
+	ctx->total_size -= mem->size;
+	spin_unlock_irqrestore(&ctx->pool_lock, flags);
+
+	kfree(states);
+	ib_umem_release(pages);
+}
+
+static int register_dca_mem(struct hns_roce_dev *hr_dev,
+			    struct hns_roce_ucontext *uctx,
+			    struct dca_mem *mem, struct dca_mem_attr *attr)
+{
+	struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx);
+	void *states, *pages;
+	unsigned long flags;
+
+	pages = alloc_dca_pages(hr_dev, mem, attr);
+	if (!pages)
+		return -ENOMEM;
+
+	states = alloc_dca_states(pages, mem->page_count);
+	if (!states) {
+		ib_umem_release(pages);
+		return -ENOMEM;
+	}
+
+	spin_lock_irqsave(&ctx->pool_lock, flags);
+
+	spin_lock(&mem->lock);
+	mem->pages = pages;
+	mem->states = states;
+	mem->key = attr->key;
+	mem->size = attr->size;
+	mem->flags |= DCA_MEM_FLAGS_REGISTERED;
+	spin_unlock(&mem->lock);
+
+	ctx->free_mems++;
+	ctx->free_size += attr->size;
+	ctx->total_size += attr->size;
+	spin_unlock_irqrestore(&ctx->pool_lock, flags);
+
+	return 0;
+}
+
+static void init_dca_context(struct hns_roce_dca_ctx *ctx)
+{
+	INIT_LIST_HEAD(&ctx->pool);
+	spin_lock_init(&ctx->pool_lock);
+	ctx->total_size = 0;
+}
+
+static void cleanup_dca_context(struct hns_roce_dev *hr_dev,
+				struct hns_roce_dca_ctx *ctx)
+{
+	struct dca_mem *mem, *tmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->pool_lock, flags);
+	list_for_each_entry_safe(mem, tmp, &ctx->pool, list) {
+		list_del(&mem->list);
+		mem->flags = 0;
+		spin_unlock_irqrestore(&ctx->pool_lock, flags);
+
+		kfree(mem->states);
+		ib_umem_release(mem->pages);
+		kfree(mem);
+
+		spin_lock_irqsave(&ctx->pool_lock, flags);
+	}
+	ctx->total_size = 0;
+	spin_unlock_irqrestore(&ctx->pool_lock, flags);
+}
+
+void hns_roce_register_udca(struct hns_roce_dev *hr_dev,
+			    struct hns_roce_ucontext *uctx)
+{
+	init_dca_context(&uctx->dca_ctx);
+}
+
+void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev,
+			      struct hns_roce_ucontext *uctx)
+{
+	cleanup_dca_context(hr_dev, &uctx->dca_ctx);
+}
+
+static struct dca_mem *alloc_dca_mem(struct hns_roce_dca_ctx *ctx)
+{
+	struct dca_mem *mem, *tmp, *found = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->pool_lock, flags);
+	list_for_each_entry_safe(mem, tmp, &ctx->pool, list) {
+		spin_lock(&mem->lock);
+		if (!mem->flags) {
+			found = mem;
+			mem->flags |= DCA_MEM_FLAGS_ALLOCED;
+			spin_unlock(&mem->lock);
+			goto done;
+		}
+		spin_unlock(&mem->lock);
+	}
+
+done:
+	spin_unlock_irqrestore(&ctx->pool_lock, flags);
+
+	if (found)
+		return found;
+
+	mem = kzalloc(sizeof(*mem), GFP_NOWAIT);
+	if (!mem)
+		return NULL;
+
+	spin_lock_init(&mem->lock);
+	INIT_LIST_HEAD(&mem->list);
+
+	mem->flags |= DCA_MEM_FLAGS_ALLOCED;
+
+	spin_lock_irqsave(&ctx->pool_lock, flags);
+	list_add(&mem->list, &ctx->pool);
+	spin_unlock_irqrestore(&ctx->pool_lock, flags);
+	return mem;
+}
+
+static void free_dca_mem(struct dca_mem *mem)
+{
+	/* We cannot hold the whole pool's lock during the DCA is working
+	 * until cleanup the context in cleanup_dca_context(), so we just
+	 * set the DCA mem state as free when destroying DCA mem object.
+	 */
+	spin_lock(&mem->lock);
+	mem->flags = 0;
+	spin_unlock(&mem->lock);
+}
+
+static struct hns_roce_ucontext *
+uverbs_attr_to_hr_uctx(struct uverbs_attr_bundle *attrs)
+{
+	return rdma_udata_to_drv_context(&attrs->driver_udata,
+					 struct hns_roce_ucontext, ibucontext);
+}
+
+static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_REG)(
+	struct uverbs_attr_bundle *attrs)
+{
+	struct hns_roce_ucontext *uctx = uverbs_attr_to_hr_uctx(attrs);
+	struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device);
+	struct ib_uobject *uobj =
+		uverbs_attr_get_uobject(attrs, HNS_IB_ATTR_DCA_MEM_REG_HANDLE);
+	struct dca_mem_attr init_attr = {};
+	struct dca_mem *mem;
+	int ret;
+
+	if (uverbs_copy_from(&init_attr.addr, attrs,
+			     HNS_IB_ATTR_DCA_MEM_REG_ADDR) ||
+	    uverbs_copy_from(&init_attr.size, attrs,
+			     HNS_IB_ATTR_DCA_MEM_REG_LEN) ||
+	    uverbs_copy_from(&init_attr.key, attrs,
+			     HNS_IB_ATTR_DCA_MEM_REG_KEY))
+		return -EFAULT;
+
+	mem = alloc_dca_mem(to_hr_dca_ctx(uctx));
+	if (!mem)
+		return -ENOMEM;
+
+	ret = register_dca_mem(hr_dev, uctx, mem, &init_attr);
+	if (ret) {
+		free_dca_mem(mem);
+		return ret;
+	}
+
+	uobj->object = mem;
+
+	return 0;
+}
+
+static int dca_cleanup(struct ib_uobject *uobject, enum rdma_remove_reason why,
+		       struct uverbs_attr_bundle *attrs)
+{
+	struct hns_roce_ucontext *uctx = uverbs_attr_to_hr_uctx(attrs);
+	struct dca_mem *mem;
+
+	/* One DCA MEM maybe shared by many QPs, so the DCA mem uobject must
+	 * be destroyed before all QP uobjects, and we will destroy the DCA
+	 * uobjects when cleanup DCA context by calling hns_roce_cleanup_dca().
+	 */
+	if (why == RDMA_REMOVE_CLOSE || why == RDMA_REMOVE_DRIVER_REMOVE)
+		return 0;
+
+	mem = uobject->object;
+	unregister_dca_mem(uctx, mem);
+	free_dca_mem(mem);
+
+	return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+	HNS_IB_METHOD_DCA_MEM_REG,
+	UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, HNS_IB_OBJECT_DCA_MEM,
+			UVERBS_ACCESS_NEW, UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_REG_LEN, UVERBS_ATTR_TYPE(u32),
+			   UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_REG_ADDR, UVERBS_ATTR_TYPE(u64),
+			   UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_REG_KEY, UVERBS_ATTR_TYPE(u64),
+			   UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+	HNS_IB_METHOD_DCA_MEM_DEREG,
+	UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, HNS_IB_OBJECT_DCA_MEM,
+			UVERBS_ACCESS_DESTROY, UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(HNS_IB_OBJECT_DCA_MEM,
+			    UVERBS_TYPE_ALLOC_IDR(dca_cleanup),
+			    &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_REG),
+			    &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DEREG));
+
+static bool dca_is_supported(struct ib_device *device)
+{
+	struct hns_roce_dev *dev = to_hr_dev(device);
+
+	return dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE;
+}
+
+const struct uapi_definition hns_roce_dca_uapi_defs[] = {
+	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+		HNS_IB_OBJECT_DCA_MEM,
+		UAPI_DEF_IS_OBJ_SUPPORTED(dca_is_supported)),
+	{}
+};
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h
new file mode 100644
index 0000000..aad198c
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_dca.h
@@ -0,0 +1,22 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2021 HiSilicon Limited. All rights reserved.
+ */
+
+#ifndef __HNS_ROCE_DCA_H
+#define __HNS_ROCE_DCA_H
+
+/* DCA page state (32 bit) */
+struct hns_dca_page_state {
+	u32 buf_id : 29; /* If zero, means page can be used by any buffer. */
+	u32 lock : 1; /* @buf_id locked this page to prepare access. */
+	u32 active : 1; /* @buf_id is accessing this page. */
+	u32 head : 1; /* This page is the head in a continuous address range. */
+};
+
+void hns_roce_register_udca(struct hns_roce_dev *hr_dev,
+			    struct hns_roce_ucontext *uctx);
+void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev,
+			      struct hns_roce_ucontext *uctx);
+
+#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 991f652..d18bc22 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -225,11 +225,20 @@  struct hns_roce_uar {
 	unsigned long	logic_idx;
 };
 
+struct hns_roce_dca_ctx {
+	struct list_head pool; /* all DCA mems link to @pool */
+	spinlock_t pool_lock; /* protect @pool */
+	unsigned int free_mems; /* free mem num in pool */
+	size_t free_size; /* free mem size in pool */
+	size_t total_size; /* total size in pool */
+};
+
 struct hns_roce_ucontext {
 	struct ib_ucontext	ibucontext;
 	struct hns_roce_uar	uar;
 	struct list_head	page_list;
 	struct mutex		page_mutex;
+	struct hns_roce_dca_ctx	dca_ctx;
 };
 
 struct hns_roce_pd {
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 078a971..3df95d4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -37,10 +37,12 @@ 
 #include <rdma/ib_addr.h>
 #include <rdma/ib_smi.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_ioctl.h>
 #include <rdma/ib_cache.h>
 #include "hns_roce_common.h"
 #include "hns_roce_device.h"
 #include "hns_roce_hem.h"
+#include "hns_roce_dca.h"
 
 static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port, u8 *addr)
 {
@@ -294,16 +296,17 @@  static int hns_roce_modify_device(struct ib_device *ib_dev, int mask,
 static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
 				   struct ib_udata *udata)
 {
-	int ret;
 	struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
-	struct hns_roce_ib_alloc_ucontext_resp resp = {};
 	struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
+	struct hns_roce_ib_alloc_ucontext_resp resp = {};
+	int ret;
 
 	if (!hr_dev->active)
 		return -EAGAIN;
 
 	resp.qp_tab_size = hr_dev->caps.num_qps;
 	resp.srq_tab_size = hr_dev->caps.num_srqs;
+	resp.cap_flags = hr_dev->caps.flags;
 
 	ret = hns_roce_uar_alloc(hr_dev, &context->uar);
 	if (ret)
@@ -315,6 +318,9 @@  static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
 		mutex_init(&context->page_mutex);
 	}
 
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE)
+		hns_roce_register_udca(hr_dev, context);
+
 	resp.cqe_size = hr_dev->caps.cqe_sz;
 
 	ret = ib_copy_to_udata(udata, &resp,
@@ -325,6 +331,9 @@  static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
 	return 0;
 
 error_fail_copy_to_udata:
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE)
+		hns_roce_unregister_udca(hr_dev, context);
+
 	hns_roce_uar_free(hr_dev, &context->uar);
 
 error_fail_uar_alloc:
@@ -334,8 +343,12 @@  static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
 static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
 {
 	struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
+	struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device);
 
 	hns_roce_uar_free(to_hr_dev(ibcontext->device), &context->uar);
+
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE)
+		hns_roce_unregister_udca(hr_dev, context);
 }
 
 static int hns_roce_mmap(struct ib_ucontext *context,
@@ -417,6 +430,12 @@  static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
 	ib_unregister_device(&hr_dev->ib_dev);
 }
 
+extern const struct uapi_definition hns_roce_dca_uapi_defs[];
+static const struct uapi_definition hns_roce_uapi_defs[] = {
+	UAPI_DEF_CHAIN(hns_roce_dca_uapi_defs),
+	{}
+};
+
 static const struct ib_device_ops hns_roce_dev_ops = {
 	.owner = THIS_MODULE,
 	.driver_id = RDMA_DRIVER_HNS,
@@ -526,6 +545,10 @@  static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
 
 	ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops);
 	ib_set_device_ops(ib_dev, &hns_roce_dev_ops);
+
+	if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
+		ib_dev->driver_def = hns_roce_uapi_defs;
+
 	for (i = 0; i < hr_dev->caps.num_ports; i++) {
 		if (!hr_dev->iboe.netdevs[i])
 			continue;
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index 42b1776..c17ec91 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -83,15 +83,42 @@  struct hns_roce_ib_create_qp_resp {
 	__aligned_u64 cap_flags;
 };
 
+enum {
+	HNS_ROCE_CAP_FLAG_DCA_MODE = 1 << 15,
+};
+
 struct hns_roce_ib_alloc_ucontext_resp {
 	__u32	qp_tab_size;
 	__u32	cqe_size;
 	__u32	srq_tab_size;
 	__u32	reserved;
+	__aligned_u64 cap_flags;
 };
 
 struct hns_roce_ib_alloc_pd_resp {
 	__u32 pdn;
 };
 
+#define UVERBS_ID_NS_MASK 0xF000
+#define UVERBS_ID_NS_SHIFT 12
+
+enum hns_ib_objects {
+	HNS_IB_OBJECT_DCA_MEM = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum hns_ib_dca_mem_methods {
+	HNS_IB_METHOD_DCA_MEM_REG = (1U << UVERBS_ID_NS_SHIFT),
+	HNS_IB_METHOD_DCA_MEM_DEREG,
+};
+
+enum hns_ib_dca_mem_reg_attrs {
+	HNS_IB_ATTR_DCA_MEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	HNS_IB_ATTR_DCA_MEM_REG_LEN,
+	HNS_IB_ATTR_DCA_MEM_REG_ADDR,
+	HNS_IB_ATTR_DCA_MEM_REG_KEY,
+};
+
+enum hns_ib_dca_mem_dereg_attrs {
+	HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
 #endif /* HNS_ABI_USER_H */