diff mbox series

[v4,for-next,1/1] RDMA/hns: Support direct wqe of userspace

Message ID 20211122033801.30807-2-liangwenpeng@huawei.com (mailing list archive)
State Superseded
Delegated to: Jason Gunthorpe
Headers show
Series RDMA/hns: Support direct WQE of userspace | expand

Commit Message

Wenpeng Liang Nov. 22, 2021, 3:38 a.m. UTC
From: Yixing Liu <liuyixing1@huawei.com>

Add direct wqe enable switch and address mapping.

Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_device.h |  8 +--
 drivers/infiniband/hw/hns/hns_roce_main.c   | 38 ++++++++++++---
 drivers/infiniband/hw/hns/hns_roce_pd.c     |  3 ++
 drivers/infiniband/hw/hns/hns_roce_qp.c     | 54 ++++++++++++++++++++-
 include/uapi/rdma/hns-abi.h                 |  2 +
 5 files changed, 94 insertions(+), 11 deletions(-)

Comments

Leon Romanovsky Nov. 22, 2021, 8:58 a.m. UTC | #1
On Mon, Nov 22, 2021 at 11:38:01AM +0800, Wenpeng Liang wrote:
> From: Yixing Liu <liuyixing1@huawei.com>
> 
> Add direct wqe enable switch and address mapping.
> 
> Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
> Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_device.h |  8 +--
>  drivers/infiniband/hw/hns/hns_roce_main.c   | 38 ++++++++++++---
>  drivers/infiniband/hw/hns/hns_roce_pd.c     |  3 ++
>  drivers/infiniband/hw/hns/hns_roce_qp.c     | 54 ++++++++++++++++++++-
>  include/uapi/rdma/hns-abi.h                 |  2 +
>  5 files changed, 94 insertions(+), 11 deletions(-)

<...>

>  	entry = to_hns_mmap(rdma_entry);
>  	pfn = entry->address >> PAGE_SHIFT;
> -	prot = vma->vm_page_prot;
>  
> -	if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR)
> -		prot = pgprot_noncached(prot);
> +	switch (entry->mmap_type) {
> +	case HNS_ROCE_MMAP_TYPE_DB:
> +		prot = pgprot_noncached(vma->vm_page_prot);
> +		break;
> +	case HNS_ROCE_MMAP_TYPE_TPTR:
> +		prot = vma->vm_page_prot;
> +		break;
> +	case HNS_ROCE_MMAP_TYPE_DWQE:
> +		prot = pgprot_device(vma->vm_page_prot);

Everything fine, except this pgprot_device(). You probably need to check
WC internally in your driver and use or pgprot_writecombine() or
pgprot_noncached() explicitly.

Thanks
Wenpeng Liang Nov. 22, 2021, 9:28 a.m. UTC | #2
On 2021/11/22 16:58, Leon Romanovsky wrote:
>>  	entry = to_hns_mmap(rdma_entry);
>>  	pfn = entry->address >> PAGE_SHIFT;
>> -	prot = vma->vm_page_prot;
>>  
>> -	if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR)
>> -		prot = pgprot_noncached(prot);
>> +	switch (entry->mmap_type) {
>> +	case HNS_ROCE_MMAP_TYPE_DB:
>> +		prot = pgprot_noncached(vma->vm_page_prot);
>> +		break;
>> +	case HNS_ROCE_MMAP_TYPE_TPTR:
>> +		prot = vma->vm_page_prot;
>> +		break;
>> +	case HNS_ROCE_MMAP_TYPE_DWQE:
>> +		prot = pgprot_device(vma->vm_page_prot);
> Everything fine, except this pgprot_device(). You probably need to check
> WC internally in your driver and use or pgprot_writecombine() or
> pgprot_noncached() explicitly.
> 
> Thanks
> .
> 

This issue is also discussed in the v2 version, direct wqe uses
this prot on HIP09 can achieve better performance than NC.

v2 link: https://patchwork.kernel.org/project/linux-rdma/patch/1622705834-19353-3-git-send-email-liweihang@huawei.com/

Thanks
Wenpeng
Leon Romanovsky Nov. 22, 2021, 11:10 a.m. UTC | #3
On Mon, Nov 22, 2021 at 05:28:31PM +0800, Wenpeng Liang wrote:
> 
> 
> On 2021/11/22 16:58, Leon Romanovsky wrote:
> >>  	entry = to_hns_mmap(rdma_entry);
> >>  	pfn = entry->address >> PAGE_SHIFT;
> >> -	prot = vma->vm_page_prot;
> >>  
> >> -	if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR)
> >> -		prot = pgprot_noncached(prot);
> >> +	switch (entry->mmap_type) {
> >> +	case HNS_ROCE_MMAP_TYPE_DB:
> >> +		prot = pgprot_noncached(vma->vm_page_prot);
> >> +		break;
> >> +	case HNS_ROCE_MMAP_TYPE_TPTR:
> >> +		prot = vma->vm_page_prot;
> >> +		break;
> >> +	case HNS_ROCE_MMAP_TYPE_DWQE:
> >> +		prot = pgprot_device(vma->vm_page_prot);
> > Everything fine, except this pgprot_device(). You probably need to check
> > WC internally in your driver and use or pgprot_writecombine() or
> > pgprot_noncached() explicitly.
> > 
> > Thanks
> > .
> > 
> 
> This issue is also discussed in the v2 version, direct wqe uses
> this prot on HIP09 can achieve better performance than NC.
> 
> v2 link: https://patchwork.kernel.org/project/linux-rdma/patch/1622705834-19353-3-git-send-email-liweihang@huawei.com/

But isn't it specific to ARM model that behaves such? Will it be the case
when you move to upgrade your ARM core?

Thanks

> 
> Thanks
> Wenpeng
Wenpeng Liang Nov. 22, 2021, 12:36 p.m. UTC | #4
On 2021/11/22 19:10, Leon Romanovsky wrote:
> On Mon, Nov 22, 2021 at 05:28:31PM +0800, Wenpeng Liang wrote:
>>
>>
>> On 2021/11/22 16:58, Leon Romanovsky wrote:
>>>>  	entry = to_hns_mmap(rdma_entry);
>>>>  	pfn = entry->address >> PAGE_SHIFT;
>>>> -	prot = vma->vm_page_prot;
>>>>  
>>>> -	if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR)
>>>> -		prot = pgprot_noncached(prot);
>>>> +	switch (entry->mmap_type) {
>>>> +	case HNS_ROCE_MMAP_TYPE_DB:
>>>> +		prot = pgprot_noncached(vma->vm_page_prot);
>>>> +		break;
>>>> +	case HNS_ROCE_MMAP_TYPE_TPTR:
>>>> +		prot = vma->vm_page_prot;
>>>> +		break;
>>>> +	case HNS_ROCE_MMAP_TYPE_DWQE:
>>>> +		prot = pgprot_device(vma->vm_page_prot);
>>> Everything fine, except this pgprot_device(). You probably need to check
>>> WC internally in your driver and use or pgprot_writecombine() or
>>> pgprot_noncached() explicitly.
>>>
>>> Thanks
>>> .
>>>
>>
>> This issue is also discussed in the v2 version, direct wqe uses
>> this prot on HIP09 can achieve better performance than NC.
>>
>> v2 link: https://patchwork.kernel.org/project/linux-rdma/patch/1622705834-19353-3-git-send-email-liweihang@huawei.com/
> 
> But isn't it specific to ARM model that behaves such? Will it be the case
> when you move to upgrade your ARM core?
> 
> Thanks
> 

Although the hns roce engine is a PCIe device, it is integrated into the SoC,
and using pgprot_device() will bring a higher performance improvement.
If the ARM core is upgraded, we will consider this issue again.

Thanks
Wenpeng

>>
>> Thanks
>> Wenpeng
> .
>
Jason Gunthorpe Nov. 25, 2021, 5:50 p.m. UTC | #5
On Mon, Nov 22, 2021 at 10:58:09AM +0200, Leon Romanovsky wrote:
> On Mon, Nov 22, 2021 at 11:38:01AM +0800, Wenpeng Liang wrote:
> > From: Yixing Liu <liuyixing1@huawei.com>
> > 
> > Add direct wqe enable switch and address mapping.
> > 
> > Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
> > Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
> >  drivers/infiniband/hw/hns/hns_roce_device.h |  8 +--
> >  drivers/infiniband/hw/hns/hns_roce_main.c   | 38 ++++++++++++---
> >  drivers/infiniband/hw/hns/hns_roce_pd.c     |  3 ++
> >  drivers/infiniband/hw/hns/hns_roce_qp.c     | 54 ++++++++++++++++++++-
> >  include/uapi/rdma/hns-abi.h                 |  2 +
> >  5 files changed, 94 insertions(+), 11 deletions(-)
> 
> <...>
> 
> >  	entry = to_hns_mmap(rdma_entry);
> >  	pfn = entry->address >> PAGE_SHIFT;
> > -	prot = vma->vm_page_prot;
> >  
> > -	if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR)
> > -		prot = pgprot_noncached(prot);
> > +	switch (entry->mmap_type) {
> > +	case HNS_ROCE_MMAP_TYPE_DB:
> > +		prot = pgprot_noncached(vma->vm_page_prot);
> > +		break;
> > +	case HNS_ROCE_MMAP_TYPE_TPTR:
> > +		prot = vma->vm_page_prot;
> > +		break;
> > +	case HNS_ROCE_MMAP_TYPE_DWQE:
> > +		prot = pgprot_device(vma->vm_page_prot);
> 
> Everything fine, except this pgprot_device(). You probably need to check
> WC internally in your driver and use or pgprot_writecombine() or
> pgprot_noncached() explicitly.

pgprot_device is only used in two places in the kernel
pci_mmap_resource_range() for setting up the sysfs resourceXX mmap

And in pci_remap_iospace() as part of emulationg PIO on mmio
architectures

So, a PCI device should always be using pgprot_device() in its mmap
function

The question is why is pgprot_noncached() being used at all? The only
difference on ARM is that noncached is non-Early Write Acknowledgement
and devices is not.

At the very least this should be explained in a comment why nE vs E is
required in all these cases.

Jason
Wenpeng Liang Nov. 26, 2021, 8:25 a.m. UTC | #6
On 2021/11/26 1:50, Jason Gunthorpe wrote:
> On Mon, Nov 22, 2021 at 10:58:09AM +0200, Leon Romanovsky wrote:
>> On Mon, Nov 22, 2021 at 11:38:01AM +0800, Wenpeng Liang wrote:
>>> From: Yixing Liu <liuyixing1@huawei.com>
>>>
>>> Add direct wqe enable switch and address mapping.
>>>
>>> Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
>>> Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
>>>  drivers/infiniband/hw/hns/hns_roce_device.h |  8 +--
>>>  drivers/infiniband/hw/hns/hns_roce_main.c   | 38 ++++++++++++---
>>>  drivers/infiniband/hw/hns/hns_roce_pd.c     |  3 ++
>>>  drivers/infiniband/hw/hns/hns_roce_qp.c     | 54 ++++++++++++++++++++-
>>>  include/uapi/rdma/hns-abi.h                 |  2 +
>>>  5 files changed, 94 insertions(+), 11 deletions(-)
>>
>> <...>
>>
>>>  	entry = to_hns_mmap(rdma_entry);
>>>  	pfn = entry->address >> PAGE_SHIFT;
>>> -	prot = vma->vm_page_prot;
>>>  
>>> -	if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR)
>>> -		prot = pgprot_noncached(prot);
>>> +	switch (entry->mmap_type) {
>>> +	case HNS_ROCE_MMAP_TYPE_DB:
>>> +		prot = pgprot_noncached(vma->vm_page_prot);
>>> +		break;
>>> +	case HNS_ROCE_MMAP_TYPE_TPTR:
>>> +		prot = vma->vm_page_prot;
>>> +		break;
>>> +	case HNS_ROCE_MMAP_TYPE_DWQE:
>>> +		prot = pgprot_device(vma->vm_page_prot);
>>
>> Everything fine, except this pgprot_device(). You probably need to check
>> WC internally in your driver and use or pgprot_writecombine() or
>> pgprot_noncached() explicitly.
> 
> pgprot_device is only used in two places in the kernel
> pci_mmap_resource_range() for setting up the sysfs resourceXX mmap
> 
> And in pci_remap_iospace() as part of emulationg PIO on mmio
> architectures
> 
> So, a PCI device should always be using pgprot_device() in its mmap
> function
> 
> The question is why is pgprot_noncached() being used at all? The only
> difference on ARM is that noncached is non-Early Write Acknowledgement
> and devices is not.
> 
> At the very least this should be explained in a comment why nE vs E is
> required in all these cases.
> 
> Jason
> .
> 

HIP09 is a SoC device, and our CPU only optimizes ST4 instructions for device
attributes. Therefore, we set device attributes to obtain optimization effects.

The device attribute allows early ack, so it is faster compared with noncached.
In order to ensure the early ack works correctly. Even if the data is incomplete,
our device still knocks on the doorbell according to the content of the first
8 bytes to complete the data transmission.

Thanks
Wenpeng
Jason Gunthorpe Nov. 26, 2021, 12:16 p.m. UTC | #7
On Fri, Nov 26, 2021 at 04:25:27PM +0800, Wenpeng Liang wrote:
> On 2021/11/26 1:50, Jason Gunthorpe wrote:
> > On Mon, Nov 22, 2021 at 10:58:09AM +0200, Leon Romanovsky wrote:
> >> On Mon, Nov 22, 2021 at 11:38:01AM +0800, Wenpeng Liang wrote:
> >>> From: Yixing Liu <liuyixing1@huawei.com>
> >>>
> >>> Add direct wqe enable switch and address mapping.
> >>>
> >>> Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
> >>> Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
> >>>  drivers/infiniband/hw/hns/hns_roce_device.h |  8 +--
> >>>  drivers/infiniband/hw/hns/hns_roce_main.c   | 38 ++++++++++++---
> >>>  drivers/infiniband/hw/hns/hns_roce_pd.c     |  3 ++
> >>>  drivers/infiniband/hw/hns/hns_roce_qp.c     | 54 ++++++++++++++++++++-
> >>>  include/uapi/rdma/hns-abi.h                 |  2 +
> >>>  5 files changed, 94 insertions(+), 11 deletions(-)
> >>
> >> <...>
> >>
> >>>  	entry = to_hns_mmap(rdma_entry);
> >>>  	pfn = entry->address >> PAGE_SHIFT;
> >>> -	prot = vma->vm_page_prot;
> >>>  
> >>> -	if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR)
> >>> -		prot = pgprot_noncached(prot);
> >>> +	switch (entry->mmap_type) {
> >>> +	case HNS_ROCE_MMAP_TYPE_DB:
> >>> +		prot = pgprot_noncached(vma->vm_page_prot);
> >>> +		break;
> >>> +	case HNS_ROCE_MMAP_TYPE_TPTR:
> >>> +		prot = vma->vm_page_prot;
> >>> +		break;
> >>> +	case HNS_ROCE_MMAP_TYPE_DWQE:
> >>> +		prot = pgprot_device(vma->vm_page_prot);
> >>
> >> Everything fine, except this pgprot_device(). You probably need to check
> >> WC internally in your driver and use or pgprot_writecombine() or
> >> pgprot_noncached() explicitly.
> > 
> > pgprot_device is only used in two places in the kernel
> > pci_mmap_resource_range() for setting up the sysfs resourceXX mmap
> > 
> > And in pci_remap_iospace() as part of emulationg PIO on mmio
> > architectures
> > 
> > So, a PCI device should always be using pgprot_device() in its mmap
> > function
> > 
> > The question is why is pgprot_noncached() being used at all? The only
> > difference on ARM is that noncached is non-Early Write Acknowledgement
> > and devices is not.
> > 
> > At the very least this should be explained in a comment why nE vs E is
> > required in all these cases.
> > 
> > Jason
> > .
> > 
> 
> HIP09 is a SoC device, and our CPU only optimizes ST4 instructions for device
> attributes. Therefore, we set device attributes to obtain optimization effects.
> 
> The device attribute allows early ack, so it is faster compared with noncached.
> In order to ensure the early ack works correctly. Even if the data is incomplete,
> our device still knocks on the doorbell according to the content of the first
> 8 bytes to complete the data transmission.

That doesn't really explain why the doorbell needs to be mapped noncache

Jason
Wenpeng Liang Nov. 27, 2021, 9:04 a.m. UTC | #8
On 2021/11/26 20:16, Jason Gunthorpe wrote:
> On Fri, Nov 26, 2021 at 04:25:27PM +0800, Wenpeng Liang wrote:
>> On 2021/11/26 1:50, Jason Gunthorpe wrote:
>>> On Mon, Nov 22, 2021 at 10:58:09AM +0200, Leon Romanovsky wrote:
>>>> On Mon, Nov 22, 2021 at 11:38:01AM +0800, Wenpeng Liang wrote:
>>>>> From: Yixing Liu <liuyixing1@huawei.com>
>>>>>
>>>>> Add direct wqe enable switch and address mapping.
>>>>>
>>>>> Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
>>>>> Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
>>>>>  drivers/infiniband/hw/hns/hns_roce_device.h |  8 +--
>>>>>  drivers/infiniband/hw/hns/hns_roce_main.c   | 38 ++++++++++++---
>>>>>  drivers/infiniband/hw/hns/hns_roce_pd.c     |  3 ++
>>>>>  drivers/infiniband/hw/hns/hns_roce_qp.c     | 54 ++++++++++++++++++++-
>>>>>  include/uapi/rdma/hns-abi.h                 |  2 +
>>>>>  5 files changed, 94 insertions(+), 11 deletions(-)
>>>>
>>>> <...>
>>>>
>>>>>  	entry = to_hns_mmap(rdma_entry);
>>>>>  	pfn = entry->address >> PAGE_SHIFT;
>>>>> -	prot = vma->vm_page_prot;
>>>>>  
>>>>> -	if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR)
>>>>> -		prot = pgprot_noncached(prot);
>>>>> +	switch (entry->mmap_type) {
>>>>> +	case HNS_ROCE_MMAP_TYPE_DB:
>>>>> +		prot = pgprot_noncached(vma->vm_page_prot);
>>>>> +		break;
>>>>> +	case HNS_ROCE_MMAP_TYPE_TPTR:
>>>>> +		prot = vma->vm_page_prot;
>>>>> +		break;
>>>>> +	case HNS_ROCE_MMAP_TYPE_DWQE:
>>>>> +		prot = pgprot_device(vma->vm_page_prot);
>>>>
>>>> Everything fine, except this pgprot_device(). You probably need to check
>>>> WC internally in your driver and use or pgprot_writecombine() or
>>>> pgprot_noncached() explicitly.
>>>
>>> pgprot_device is only used in two places in the kernel
>>> pci_mmap_resource_range() for setting up the sysfs resourceXX mmap
>>>
>>> And in pci_remap_iospace() as part of emulationg PIO on mmio
>>> architectures
>>>
>>> So, a PCI device should always be using pgprot_device() in its mmap
>>> function
>>>
>>> The question is why is pgprot_noncached() being used at all? The only
>>> difference on ARM is that noncached is non-Early Write Acknowledgement
>>> and devices is not.
>>>
>>> At the very least this should be explained in a comment why nE vs E is
>>> required in all these cases.
>>>
>>> Jason
>>> .
>>>
>>
>> HIP09 is a SoC device, and our CPU only optimizes ST4 instructions for device
>> attributes. Therefore, we set device attributes to obtain optimization effects.
>>
>> The device attribute allows early ack, so it is faster compared with noncached.
>> In order to ensure the early ack works correctly. Even if the data is incomplete,
>> our device still knocks on the doorbell according to the content of the first
>> 8 bytes to complete the data transmission.
> 
> That doesn't really explain why the doorbell needs to be mapped noncache
> 

I might have misunderstood what you meant before.

For the HNS_ROCE_MMAP_TYPE_DB type, our device does not support Early Write
Acknowledgement. Therefore, HNS_ROCE_MMAP_TYPE_DB uses the noncached attribute.
I will add a comment in v5.

Thanks
Wenpeng
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index e35164ae7376..bc7112a205a7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -182,6 +182,7 @@  enum {
 	HNS_ROCE_CAP_FLAG_FRMR                  = BIT(8),
 	HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL		= BIT(9),
 	HNS_ROCE_CAP_FLAG_ATOMIC		= BIT(10),
+	HNS_ROCE_CAP_FLAG_DIRECT_WQE		= BIT(12),
 	HNS_ROCE_CAP_FLAG_SDI_MODE		= BIT(14),
 	HNS_ROCE_CAP_FLAG_STASH			= BIT(17),
 };
@@ -228,6 +229,7 @@  struct hns_roce_uar {
 enum hns_roce_mmap_type {
 	HNS_ROCE_MMAP_TYPE_DB = 1,
 	HNS_ROCE_MMAP_TYPE_TPTR,
+	HNS_ROCE_MMAP_TYPE_DWQE,
 };
 
 struct hns_user_mmap_entry {
@@ -627,10 +629,6 @@  struct hns_roce_work {
 	u32 queue_num;
 };
 
-enum {
-	HNS_ROCE_QP_CAP_DIRECT_WQE = BIT(5),
-};
-
 struct hns_roce_qp {
 	struct ib_qp		ibqp;
 	struct hns_roce_wq	rq;
@@ -675,6 +673,7 @@  struct hns_roce_qp {
 	struct list_head	node; /* all qps are on a list */
 	struct list_head	rq_node; /* all recv qps are on a list */
 	struct list_head	sq_node; /* all send qps are on a list */
+	struct hns_user_mmap_entry *dwqe_mmap_entry;
 };
 
 struct hns_roce_ib_iboe {
@@ -1010,6 +1009,7 @@  struct hns_roce_dev {
 	u32 func_num;
 	u32 is_vf;
 	u32 cong_algo_tmpl_id;
+	u64 dwqe_page;
 };
 
 static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 8233bec053ee..32e4e0c95122 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -310,9 +310,25 @@  hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
 	entry->address = address;
 	entry->mmap_type = mmap_type;
 
-	ret = rdma_user_mmap_entry_insert_exact(
-		ucontext, &entry->rdma_entry, length,
-		mmap_type == HNS_ROCE_MMAP_TYPE_DB ? 0 : 1);
+	switch (mmap_type) {
+	case HNS_ROCE_MMAP_TYPE_DB:
+		ret = rdma_user_mmap_entry_insert_exact(
+				ucontext, &entry->rdma_entry, length, 0);
+		break;
+	case HNS_ROCE_MMAP_TYPE_TPTR:
+		ret = rdma_user_mmap_entry_insert_exact(
+				ucontext, &entry->rdma_entry, length, 1);
+		break;
+	case HNS_ROCE_MMAP_TYPE_DWQE:
+		ret = rdma_user_mmap_entry_insert_range(
+				ucontext, &entry->rdma_entry, length, 2,
+				U32_MAX);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
 	if (ret) {
 		kfree(entry);
 		return NULL;
@@ -439,10 +455,20 @@  static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
 
 	entry = to_hns_mmap(rdma_entry);
 	pfn = entry->address >> PAGE_SHIFT;
-	prot = vma->vm_page_prot;
 
-	if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR)
-		prot = pgprot_noncached(prot);
+	switch (entry->mmap_type) {
+	case HNS_ROCE_MMAP_TYPE_DB:
+		prot = pgprot_noncached(vma->vm_page_prot);
+		break;
+	case HNS_ROCE_MMAP_TYPE_TPTR:
+		prot = vma->vm_page_prot;
+		break;
+	case HNS_ROCE_MMAP_TYPE_DWQE:
+		prot = pgprot_device(vma->vm_page_prot);
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	ret = rdma_user_mmap_io(uctx, vma, pfn, rdma_entry->npages * PAGE_SIZE,
 				prot, rdma_entry);
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index 81ffad77ae42..03c349f7ebbe 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -115,6 +115,9 @@  int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
 	} else {
 		uar->pfn = ((pci_resource_start(hr_dev->pci_dev, 2))
 			   >> PAGE_SHIFT);
+		if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
+			hr_dev->dwqe_page =
+				pci_resource_start(hr_dev->pci_dev, 4);
 	}
 
 	return 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 4fcab1611548..c84e1c23722c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -379,6 +379,11 @@  static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
 	return ret;
 }
 
+static void qp_user_mmap_entry_remove(struct hns_roce_qp *hr_qp)
+{
+	rdma_user_mmap_entry_remove(&hr_qp->dwqe_mmap_entry->rdma_entry);
+}
+
 void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
 {
 	struct xarray *xa = &hr_dev->qp_table_xa;
@@ -780,7 +785,11 @@  static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 		goto err_inline;
 	}
 
+	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
+		hr_qp->en_flags |= HNS_ROCE_QP_CAP_DIRECT_WQE;
+
 	return 0;
+
 err_inline:
 	free_rq_inline_buf(hr_qp);
 
@@ -822,6 +831,35 @@  static inline bool kernel_qp_has_rdb(struct hns_roce_dev *hr_dev,
 		hns_roce_qp_has_rq(init_attr));
 }
 
+static int qp_mmap_entry(struct hns_roce_qp *hr_qp,
+			 struct hns_roce_dev *hr_dev,
+			 struct ib_udata *udata,
+			 struct hns_roce_ib_create_qp_resp *resp)
+{
+	struct hns_roce_ucontext *uctx =
+		rdma_udata_to_drv_context(udata,
+			struct hns_roce_ucontext, ibucontext);
+	struct rdma_user_mmap_entry *rdma_entry;
+	u64 address;
+
+	address = hr_dev->dwqe_page + hr_qp->qpn * HNS_ROCE_DWQE_SIZE;
+
+	hr_qp->dwqe_mmap_entry =
+		hns_roce_user_mmap_entry_insert(&uctx->ibucontext, address,
+						HNS_ROCE_DWQE_SIZE,
+						HNS_ROCE_MMAP_TYPE_DWQE);
+
+	if (!hr_qp->dwqe_mmap_entry) {
+		ibdev_err(&hr_dev->ib_dev, "failed to get dwqe mmap entry.\n");
+		return -ENOMEM;
+	}
+
+	rdma_entry = &hr_qp->dwqe_mmap_entry->rdma_entry;
+	resp->dwqe_mmap_key = rdma_user_mmap_get_offset(rdma_entry);
+
+	return 0;
+}
+
 static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
 			    struct hns_roce_qp *hr_qp,
 			    struct ib_qp_init_attr *init_attr,
@@ -909,10 +947,16 @@  static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 		hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB;
 
 	if (udata) {
+		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE) {
+			ret = qp_mmap_entry(hr_qp, hr_dev, udata, resp);
+			if (ret)
+				return ret;
+		}
+
 		ret = alloc_user_qp_db(hr_dev, hr_qp, init_attr, udata, ucmd,
 				       resp);
 		if (ret)
-			return ret;
+			goto err_remove_qp;
 	} else {
 		ret = alloc_kernel_qp_db(hr_dev, hr_qp, init_attr);
 		if (ret)
@@ -920,6 +964,12 @@  static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 	}
 
 	return 0;
+
+err_remove_qp:
+	if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
+		qp_user_mmap_entry_remove(hr_qp);
+
+	return ret;
 }
 
 static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
@@ -933,6 +983,8 @@  static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 			hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
 		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
 			hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
+		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
+			qp_user_mmap_entry_remove(hr_qp);
 	} else {
 		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
 			hns_roce_free_db(hr_dev, &hr_qp->rdb);
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index 42b177655560..f6fde06db4b4 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -77,10 +77,12 @@  enum hns_roce_qp_cap_flags {
 	HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0,
 	HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1,
 	HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2,
+	HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5,
 };
 
 struct hns_roce_ib_create_qp_resp {
 	__aligned_u64 cap_flags;
+	__aligned_u64 dwqe_mmap_key;
 };
 
 struct hns_roce_ib_alloc_ucontext_resp {