diff mbox series

[for-next] RDMA/hns: Add support for extended atomic in userspace

Message ID 1579052546-11746-1-git-send-email-liweihang@huawei.com (mailing list archive)
State Mainlined
Commit 7db82697b8bf05ae56d02bf8da998bcd1122531d
Delegated to: Jason Gunthorpe
Headers show
Series [for-next] RDMA/hns: Add support for extended atomic in userspace | expand

Commit Message

Weihang Li Jan. 15, 2020, 1:42 a.m. UTC
From: Jiaran Zhang <zhangjiaran@huawei.com>

To support extended atomic operations including cmp & swap and fetch & add
of 8 bytes, 16 bytes, 32 bytes, 64 bytes in userspace, some field in qpc
should be configured.

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 16 +++++++++++++++-
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h |  3 ++-
 2 files changed, 17 insertions(+), 2 deletions(-)

Comments

Jason Gunthorpe Jan. 15, 2020, 8:56 p.m. UTC | #1
On Wed, Jan 15, 2020 at 09:42:26AM +0800, Weihang Li wrote:
> From: Jiaran Zhang <zhangjiaran@huawei.com>
> 
> To support extended atomic operations including cmp & swap and fetch & add
> of 8 bytes, 16 bytes, 32 bytes, 64 bytes in userspace, some field in qpc
> should be configured.
> 
> Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
> Signed-off-by: Weihang Li <liweihang@huawei.com>
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 16 +++++++++++++++-
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h |  3 ++-
>  2 files changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index f1e0ba6..7edf3d8 100644
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -1692,7 +1692,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
>  	caps->max_srq_desc_sz	= HNS_ROCE_V2_MAX_SRQ_DESC_SZ;
>  	caps->qpc_entry_sz	= HNS_ROCE_V2_QPC_ENTRY_SZ;
>  	caps->irrl_entry_sz	= HNS_ROCE_V2_IRRL_ENTRY_SZ;
> -	caps->trrl_entry_sz	= HNS_ROCE_V2_TRRL_ENTRY_SZ;
> +	caps->trrl_entry_sz	= HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ;
>  	caps->cqc_entry_sz	= HNS_ROCE_V2_CQC_ENTRY_SZ;
>  	caps->srqc_entry_sz	= HNS_ROCE_V2_SRQC_ENTRY_SZ;
>  	caps->mtpt_entry_sz	= HNS_ROCE_V2_MTPT_ENTRY_SZ;
> @@ -3286,6 +3286,9 @@ static void set_access_flags(struct hns_roce_qp *hr_qp,
>  	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
>  		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
>  	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
> +	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S,
> +		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
> +	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, 0);
>  }
>  
>  static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
> @@ -3653,6 +3656,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
>  			     IB_ACCESS_REMOTE_ATOMIC));
>  		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
>  			     0);
> +		roce_set_bit(context->byte_76_srqn_op_en,
> +			     V2_QPC_BYTE_76_EXT_ATE_S,
> +			     !!(attr->qp_access_flags &
> +				IB_ACCESS_REMOTE_ATOMIC));
> +		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
> +			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
>  	} else {
>  		roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
>  			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ));
> @@ -3668,6 +3677,11 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
>  			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
>  		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
>  			     0);
> +		roce_set_bit(context->byte_76_srqn_op_en,
> +			     V2_QPC_BYTE_76_EXT_ATE_S,
> +			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
> +		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
> +			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
>  	}

What happens to your userspace if it runs on an old kernel and tries
to use extended atomic?

Jason
Weihang Li Jan. 16, 2020, 4:05 a.m. UTC | #2
On 2020/1/16 4:56, Jason Gunthorpe wrote:
> On Wed, Jan 15, 2020 at 09:42:26AM +0800, Weihang Li wrote:
>> From: Jiaran Zhang <zhangjiaran@huawei.com>
>>
>> To support extended atomic operations including cmp & swap and fetch & add
>> of 8 bytes, 16 bytes, 32 bytes, 64 bytes in userspace, some field in qpc
>> should be configured.
>>
>> Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
>> Signed-off-by: Weihang Li <liweihang@huawei.com>
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 16 +++++++++++++++-
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h |  3 ++-
>>  2 files changed, 17 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> index f1e0ba6..7edf3d8 100644
>> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> @@ -1692,7 +1692,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
>>  	caps->max_srq_desc_sz	= HNS_ROCE_V2_MAX_SRQ_DESC_SZ;
>>  	caps->qpc_entry_sz	= HNS_ROCE_V2_QPC_ENTRY_SZ;
>>  	caps->irrl_entry_sz	= HNS_ROCE_V2_IRRL_ENTRY_SZ;
>> -	caps->trrl_entry_sz	= HNS_ROCE_V2_TRRL_ENTRY_SZ;
>> +	caps->trrl_entry_sz	= HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ;
>>  	caps->cqc_entry_sz	= HNS_ROCE_V2_CQC_ENTRY_SZ;
>>  	caps->srqc_entry_sz	= HNS_ROCE_V2_SRQC_ENTRY_SZ;
>>  	caps->mtpt_entry_sz	= HNS_ROCE_V2_MTPT_ENTRY_SZ;
>> @@ -3286,6 +3286,9 @@ static void set_access_flags(struct hns_roce_qp *hr_qp,
>>  	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
>>  		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
>>  	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
>> +	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S,
>> +		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
>> +	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, 0);
>>  }
>>  
>>  static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
>> @@ -3653,6 +3656,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
>>  			     IB_ACCESS_REMOTE_ATOMIC));
>>  		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
>>  			     0);
>> +		roce_set_bit(context->byte_76_srqn_op_en,
>> +			     V2_QPC_BYTE_76_EXT_ATE_S,
>> +			     !!(attr->qp_access_flags &
>> +				IB_ACCESS_REMOTE_ATOMIC));
>> +		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
>> +			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
>>  	} else {
>>  		roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
>>  			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ));
>> @@ -3668,6 +3677,11 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
>>  			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
>>  		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
>>  			     0);
>> +		roce_set_bit(context->byte_76_srqn_op_en,
>> +			     V2_QPC_BYTE_76_EXT_ATE_S,
>> +			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
>> +		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
>> +			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
>>  	}
> 
> What happens to your userspace if it runs on an old kernel and tries
> to use extended atomic?
> 
> Jason
>

Hi Jason,

If the hns userspace runs with old kernel, the hardware will report a asynchronous
event for the extended atomic operation and modify the qp to error state because
the enable bit in this qp's context hasn't been set.

The driver will print like this:

[ 1252.240921] hns3 0000:7d:00.0: Invalid request local work queue 0x9 error.
[ 1252.247772] hns3 0000:7d:00.0: no hr_qp can be found!

Thanks
Weihang

> .
>
Jason Gunthorpe Jan. 16, 2020, 7:51 p.m. UTC | #3
On Thu, Jan 16, 2020 at 12:05:13PM +0800, Weihang Li wrote:
> 
> 
> On 2020/1/16 4:56, Jason Gunthorpe wrote:
> > On Wed, Jan 15, 2020 at 09:42:26AM +0800, Weihang Li wrote:
> >> From: Jiaran Zhang <zhangjiaran@huawei.com>
> >>
> >> To support extended atomic operations including cmp & swap and fetch & add
> >> of 8 bytes, 16 bytes, 32 bytes, 64 bytes in userspace, some field in qpc
> >> should be configured.
> >>
> >> Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
> >> Signed-off-by: Weihang Li <liweihang@huawei.com>
> >>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 16 +++++++++++++++-
> >>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h |  3 ++-
> >>  2 files changed, 17 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> >> index f1e0ba6..7edf3d8 100644
> >> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> >> @@ -1692,7 +1692,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
> >>  	caps->max_srq_desc_sz	= HNS_ROCE_V2_MAX_SRQ_DESC_SZ;
> >>  	caps->qpc_entry_sz	= HNS_ROCE_V2_QPC_ENTRY_SZ;
> >>  	caps->irrl_entry_sz	= HNS_ROCE_V2_IRRL_ENTRY_SZ;
> >> -	caps->trrl_entry_sz	= HNS_ROCE_V2_TRRL_ENTRY_SZ;
> >> +	caps->trrl_entry_sz	= HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ;
> >>  	caps->cqc_entry_sz	= HNS_ROCE_V2_CQC_ENTRY_SZ;
> >>  	caps->srqc_entry_sz	= HNS_ROCE_V2_SRQC_ENTRY_SZ;
> >>  	caps->mtpt_entry_sz	= HNS_ROCE_V2_MTPT_ENTRY_SZ;
> >> @@ -3286,6 +3286,9 @@ static void set_access_flags(struct hns_roce_qp *hr_qp,
> >>  	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
> >>  		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
> >>  	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
> >> +	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S,
> >> +		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
> >> +	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, 0);
> >>  }
> >>  
> >>  static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
> >> @@ -3653,6 +3656,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
> >>  			     IB_ACCESS_REMOTE_ATOMIC));
> >>  		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
> >>  			     0);
> >> +		roce_set_bit(context->byte_76_srqn_op_en,
> >> +			     V2_QPC_BYTE_76_EXT_ATE_S,
> >> +			     !!(attr->qp_access_flags &
> >> +				IB_ACCESS_REMOTE_ATOMIC));
> >> +		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
> >> +			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
> >>  	} else {
> >>  		roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
> >>  			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ));
> >> @@ -3668,6 +3677,11 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
> >>  			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
> >>  		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
> >>  			     0);
> >> +		roce_set_bit(context->byte_76_srqn_op_en,
> >> +			     V2_QPC_BYTE_76_EXT_ATE_S,
> >> +			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
> >> +		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
> >> +			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
> >>  	}
> > 
> > What happens to your userspace if it runs on an old kernel and tries
> > to use extended atomic?
> > 
> > Jason
> >
> 
> Hi Jason,
> 
> If the hns userspace runs with old kernel, the hardware will report a asynchronous
> event for the extended atomic operation and modify the qp to error state because
> the enable bit in this qp's context hasn't been set.
> 
> The driver will print like this:
> 
> [ 1252.240921] hns3 0000:7d:00.0: Invalid request local work queue 0x9 error.
> [ 1252.247772] hns3 0000:7d:00.0: no hr_qp can be found!

Ideally the provider will not set
IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP and related without kernel
support..

I've applied this patch, but I feel like you may need a followup to
fix the capability reporting?

Jason
Weihang Li Jan. 22, 2020, 8:54 a.m. UTC | #4
On 2020/1/17 3:51, Jason Gunthorpe wrote:
>>> What happens to your userspace if it runs on an old kernel and tries
>>> to use extended atomic?
>>>
>>> Jason
>>>
>> Hi Jason,
>>
>> If the hns userspace runs with old kernel, the hardware will report a asynchronous
>> event for the extended atomic operation and modify the qp to error state because
>> the enable bit in this qp's context hasn't been set.
>>
>> The driver will print like this:
>>
>> [ 1252.240921] hns3 0000:7d:00.0: Invalid request local work queue 0x9 error.
>> [ 1252.247772] hns3 0000:7d:00.0: no hr_qp can be found!
> Ideally the provider will not set
> IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP and related without kernel
> support..
> 
> I've applied this patch, but I feel like you may need a followup to
> fix the capability reporting?
> 
> Jason

Hi Jason,

Thank for your suggestions.

But I'm confuse about the relationship between "PCI ATOMIC" in this macro
and atomic operations in RDMA.

I found the related series on patchwork:
https://patchwork.kernel.org/cover/10782873/

And I found the description about atomic operations in PCIe specification
v4.0:

"An Atomic Operation (AtomicOp) is a single PCI Express transaction that
targets a location in Memory Space, reads the location’s value, potentially
writes a new value back to the location, and returns the original value. This
"read-modify-write" sequence to the location is performed atomically."

It seems that the atomic for PCIe and RDMA is different concepts, and the macro
IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP is for PCIe atomic.

Could you please give me more suggestions about them?

Thanks
Weihang
Tom Talpey Jan. 22, 2020, 2:08 p.m. UTC | #5
On 1/22/2020 3:54 AM, Weihang Li wrote:
> 
> 
> On 2020/1/17 3:51, Jason Gunthorpe wrote:
>>>> What happens to your userspace if it runs on an old kernel and tries
>>>> to use extended atomic?
>>>>
>>>> Jason
>>>>
>>> Hi Jason,
>>>
>>> If the hns userspace runs with old kernel, the hardware will report a asynchronous
>>> event for the extended atomic operation and modify the qp to error state because
>>> the enable bit in this qp's context hasn't been set.
>>>
>>> The driver will print like this:
>>>
>>> [ 1252.240921] hns3 0000:7d:00.0: Invalid request local work queue 0x9 error.
>>> [ 1252.247772] hns3 0000:7d:00.0: no hr_qp can be found!
>> Ideally the provider will not set
>> IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP and related without kernel
>> support..
>>
>> I've applied this patch, but I feel like you may need a followup to
>> fix the capability reporting?
>>
>> Jason
> 
> Hi Jason,
> 
> Thank for your suggestions.
> 
> But I'm confuse about the relationship between "PCI ATOMIC" in this macro
> and atomic operations in RDMA.

PCI Atomics are optonal and are a much more recent facility.

RDMA Atomics do not require PCI Atomics, because they have
different semantics with respect to memory atomicity. Read
carefully and you'll see that they operate atomically only
within the adapter, and are not atomic all the way to the
underlying memory. It's a long and somewhat historical story.

Now that PCIe Atomics are becoming more widely supported by
processor complexes, there is the possibility these may be
more tightly embraced by RDMA implementations. There is
discussion in IBTA and IETF around this currently, in fact,
for the new RDMA Atomic Write operation.

Be aware that PCI Atomics are relatively expensive operations.
The existing ones perform a read-modify-write cycle on both
PCI and memory busses. This overhead is not to be taken lightly.

Tom.

> I found the related series on patchwork:
> https://patchwork.kernel.org/cover/10782873/
> 
> And I found the description about atomic operations in PCIe specification
> v4.0:
> 
> "An Atomic Operation (AtomicOp) is a single PCI Express transaction that
> targets a location in Memory Space, reads the location’s value, potentially
> writes a new value back to the location, and returns the original value. This
> "read-modify-write" sequence to the location is performed atomically."
> 
> It seems that the atomic for PCIe and RDMA is different concepts, and the macro
> IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP is for PCIe atomic.
> 
> Could you please give me more suggestions about them?
> 
> Thanks
> Weihang
> 
> 
>
Jason Gunthorpe Jan. 23, 2020, 10:54 p.m. UTC | #6
On Wed, Jan 22, 2020 at 04:54:55PM +0800, Weihang Li wrote:
> 
> 
> On 2020/1/17 3:51, Jason Gunthorpe wrote:
> >>> What happens to your userspace if it runs on an old kernel and tries
> >>> to use extended atomic?
> >>>
> >>> Jason
> >>>
> >> Hi Jason,
> >>
> >> If the hns userspace runs with old kernel, the hardware will report a asynchronous
> >> event for the extended atomic operation and modify the qp to error state because
> >> the enable bit in this qp's context hasn't been set.
> >>
> >> The driver will print like this:
> >>
> >> [ 1252.240921] hns3 0000:7d:00.0: Invalid request local work queue 0x9 error.
> >> [ 1252.247772] hns3 0000:7d:00.0: no hr_qp can be found!
> > Ideally the provider will not set
> > IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP and related without kernel
> > support..
> > 
> > I've applied this patch, but I feel like you may need a followup to
> > fix the capability reporting?
> > 
> > Jason
> 
> Hi Jason,
> 
> Thank for your suggestions.
> 
> But I'm confuse about the relationship between "PCI ATOMIC" in this macro
> and atomic operations in RDMA.
> 
> I found the related series on patchwork:
> https://patchwork.kernel.org/cover/10782873/

I may have got the wrong capability bit here, I'm not sure where the
capability bits for extended atomics are actually

Jason
Weihang Li Jan. 26, 2020, 3:38 a.m. UTC | #7
On 2020/1/22 22:08, Tom Talpey wrote:
> On 1/22/2020 3:54 AM, Weihang Li wrote:
>>
>>
>> On 2020/1/17 3:51, Jason Gunthorpe wrote:
>>>>> What happens to your userspace if it runs on an old kernel and tries
>>>>> to use extended atomic?
>>>>>
>>>>> Jason
>>>>>
>>>> Hi Jason,
>>>>
>>>> If the hns userspace runs with old kernel, the hardware will report a asynchronous
>>>> event for the extended atomic operation and modify the qp to error state because
>>>> the enable bit in this qp's context hasn't been set.
>>>>
>>>> The driver will print like this:
>>>>
>>>> [ 1252.240921] hns3 0000:7d:00.0: Invalid request local work queue 0x9 error.
>>>> [ 1252.247772] hns3 0000:7d:00.0: no hr_qp can be found!
>>> Ideally the provider will not set
>>> IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP and related without kernel
>>> support..
>>>
>>> I've applied this patch, but I feel like you may need a followup to
>>> fix the capability reporting?
>>>
>>> Jason
>>
>> Hi Jason,
>>
>> Thank for your suggestions.
>>
>> But I'm confuse about the relationship between "PCI ATOMIC" in this macro
>> and atomic operations in RDMA.
> 
> PCI Atomics are optonal and are a much more recent facility.
> 
> RDMA Atomics do not require PCI Atomics, because they have
> different semantics with respect to memory atomicity. Read
> carefully and you'll see that they operate atomically only
> within the adapter, and are not atomic all the way to the
> underlying memory. It's a long and somewhat historical story.
> 
> Now that PCIe Atomics are becoming more widely supported by
> processor complexes, there is the possibility these may be
> more tightly embraced by RDMA implementations. There is
> discussion in IBTA and IETF around this currently, in fact,
> for the new RDMA Atomic Write operation.
> 
> Be aware that PCI Atomics are relatively expensive operations.
> The existing ones perform a read-modify-write cycle on both
> PCI and memory busses. This overhead is not to be taken lightly.
> 
> Tom.
> 

Hi Tom,

Thanks for your patience and detailed explanation :)
I know more about their relationship now.

Weihang


>> I found the related series on patchwork:
>> https://patchwork.kernel.org/cover/10782873/
>>
>> And I found the description about atomic operations in PCIe specification
>> v4.0:
>>
>> "An Atomic Operation (AtomicOp) is a single PCI Express transaction that
>> targets a location in Memory Space, reads the location’s value, potentially
>> writes a new value back to the location, and returns the original value. This
>> "read-modify-write" sequence to the location is performed atomically."
>>
>> It seems that the atomic for PCIe and RDMA is different concepts, and the macro
>> IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP is for PCIe atomic.
>>
>> Could you please give me more suggestions about them?
>>
>> Thanks
>> Weihang
>>
>>
>>
> 
> .
Weihang Li Jan. 26, 2020, 3:42 a.m. UTC | #8
On 2020/1/24 6:54, Jason Gunthorpe wrote:
> On Wed, Jan 22, 2020 at 04:54:55PM +0800, Weihang Li wrote:
>>
>>
>> On 2020/1/17 3:51, Jason Gunthorpe wrote:
>>>>> What happens to your userspace if it runs on an old kernel and tries
>>>>> to use extended atomic?
>>>>>
>>>>> Jason
>>>>>
>>>> Hi Jason,
>>>>
>>>> If the hns userspace runs with old kernel, the hardware will report a asynchronous
>>>> event for the extended atomic operation and modify the qp to error state because
>>>> the enable bit in this qp's context hasn't been set.
>>>>
>>>> The driver will print like this:
>>>>
>>>> [ 1252.240921] hns3 0000:7d:00.0: Invalid request local work queue 0x9 error.
>>>> [ 1252.247772] hns3 0000:7d:00.0: no hr_qp can be found!
>>> Ideally the provider will not set
>>> IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP and related without kernel
>>> support..
>>>
>>> I've applied this patch, but I feel like you may need a followup to
>>> fix the capability reporting?
>>>
>>> Jason
>>
>> Hi Jason,
>>
>> Thank for your suggestions.
>>
>> But I'm confuse about the relationship between "PCI ATOMIC" in this macro
>> and atomic operations in RDMA.
>>
>> I found the related series on patchwork:
>> https://patchwork.kernel.org/cover/10782873/
> 
> I may have got the wrong capability bit here, I'm not sure where the
> capability bits for extended atomics are actually
> 
> Jason

Hi Jason,

Thank you anyway. There seems no capability bit for extended atomic
currently. We will try to add one.

Weihang

>
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index f1e0ba6..7edf3d8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1692,7 +1692,7 @@  static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 	caps->max_srq_desc_sz	= HNS_ROCE_V2_MAX_SRQ_DESC_SZ;
 	caps->qpc_entry_sz	= HNS_ROCE_V2_QPC_ENTRY_SZ;
 	caps->irrl_entry_sz	= HNS_ROCE_V2_IRRL_ENTRY_SZ;
-	caps->trrl_entry_sz	= HNS_ROCE_V2_TRRL_ENTRY_SZ;
+	caps->trrl_entry_sz	= HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ;
 	caps->cqc_entry_sz	= HNS_ROCE_V2_CQC_ENTRY_SZ;
 	caps->srqc_entry_sz	= HNS_ROCE_V2_SRQC_ENTRY_SZ;
 	caps->mtpt_entry_sz	= HNS_ROCE_V2_MTPT_ENTRY_SZ;
@@ -3286,6 +3286,9 @@  static void set_access_flags(struct hns_roce_qp *hr_qp,
 	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
 		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
 	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
+	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S,
+		     !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
+	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, 0);
 }
 
 static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
@@ -3653,6 +3656,12 @@  static void modify_qp_init_to_init(struct ib_qp *ibqp,
 			     IB_ACCESS_REMOTE_ATOMIC));
 		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
 			     0);
+		roce_set_bit(context->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S,
+			     !!(attr->qp_access_flags &
+				IB_ACCESS_REMOTE_ATOMIC));
+		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
 	} else {
 		roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
 			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ));
@@ -3668,6 +3677,11 @@  static void modify_qp_init_to_init(struct ib_qp *ibqp,
 			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
 		roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
 			     0);
+		roce_set_bit(context->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S,
+			     !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
+		roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+			     V2_QPC_BYTE_76_EXT_ATE_S, 0);
 	}
 
 	roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 76a14db..c9be484 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -81,6 +81,7 @@ 
 #define HNS_ROCE_V2_QPC_ENTRY_SZ		256
 #define HNS_ROCE_V2_IRRL_ENTRY_SZ		64
 #define HNS_ROCE_V2_TRRL_ENTRY_SZ		48
+#define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ	100
 #define HNS_ROCE_V2_CQC_ENTRY_SZ		64
 #define HNS_ROCE_V2_SRQC_ENTRY_SZ		64
 #define HNS_ROCE_V2_MTPT_ENTRY_SZ		64
@@ -643,7 +644,7 @@  struct hns_roce_v2_qp_context {
 #define	V2_QPC_BYTE_76_ATE_S 27
 
 #define	V2_QPC_BYTE_76_RQIE_S 28
-
+#define	V2_QPC_BYTE_76_EXT_ATE_S 29
 #define	V2_QPC_BYTE_76_RQ_VLAN_EN_S 30
 #define	V2_QPC_BYTE_80_RX_CQN_S 0
 #define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0)