Message ID | 1579052546-11746-1-git-send-email-liweihang@huawei.com (mailing list archive) |
---|---|
State | Mainlined |
Commit | 7db82697b8bf05ae56d02bf8da998bcd1122531d |
Delegated to: | Jason Gunthorpe |
Headers | show |
Series | [for-next] RDMA/hns: Add support for extended atomic in userspace | expand |
On Wed, Jan 15, 2020 at 09:42:26AM +0800, Weihang Li wrote: > From: Jiaran Zhang <zhangjiaran@huawei.com> > > To support extended atomic operations including cmp & swap and fetch & add > of 8 bytes, 16 bytes, 32 bytes, 64 bytes in userspace, some field in qpc > should be configured. > > Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com> > Signed-off-by: Weihang Li <liweihang@huawei.com> > drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 16 +++++++++++++++- > drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 ++- > 2 files changed, 17 insertions(+), 2 deletions(-) > > diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > index f1e0ba6..7edf3d8 100644 > +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > @@ -1692,7 +1692,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) > caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ; > caps->qpc_entry_sz = HNS_ROCE_V2_QPC_ENTRY_SZ; > caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; > - caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ; > + caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ; > caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; > caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ; > caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; > @@ -3286,6 +3286,9 @@ static void set_access_flags(struct hns_roce_qp *hr_qp, > roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, > !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); > roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0); > + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, > + !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); > + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, 0); > } > > static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, > @@ -3653,6 +3656,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, > IB_ACCESS_REMOTE_ATOMIC)); > roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, > 0); > + roce_set_bit(context->byte_76_srqn_op_en, > + V2_QPC_BYTE_76_EXT_ATE_S, > + !!(attr->qp_access_flags & > + IB_ACCESS_REMOTE_ATOMIC)); > + roce_set_bit(qpc_mask->byte_76_srqn_op_en, > + V2_QPC_BYTE_76_EXT_ATE_S, 0); > } else { > roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, > !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ)); > @@ -3668,6 +3677,11 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, > !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); > roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, > 0); > + roce_set_bit(context->byte_76_srqn_op_en, > + V2_QPC_BYTE_76_EXT_ATE_S, > + !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); > + roce_set_bit(qpc_mask->byte_76_srqn_op_en, > + V2_QPC_BYTE_76_EXT_ATE_S, 0); > } What happens to your userspace if it runs on an old kernel and tries to use extended atomic? Jason
On 2020/1/16 4:56, Jason Gunthorpe wrote: > On Wed, Jan 15, 2020 at 09:42:26AM +0800, Weihang Li wrote: >> From: Jiaran Zhang <zhangjiaran@huawei.com> >> >> To support extended atomic operations including cmp & swap and fetch & add >> of 8 bytes, 16 bytes, 32 bytes, 64 bytes in userspace, some field in qpc >> should be configured. >> >> Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com> >> Signed-off-by: Weihang Li <liweihang@huawei.com> >> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 16 +++++++++++++++- >> drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 ++- >> 2 files changed, 17 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> index f1e0ba6..7edf3d8 100644 >> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> @@ -1692,7 +1692,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) >> caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ; >> caps->qpc_entry_sz = HNS_ROCE_V2_QPC_ENTRY_SZ; >> caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; >> - caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ; >> + caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ; >> caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; >> caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ; >> caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; >> @@ -3286,6 +3286,9 @@ static void set_access_flags(struct hns_roce_qp *hr_qp, >> roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, >> !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); >> roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0); >> + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, >> + !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); >> + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, 0); >> } >> >> static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, >> @@ -3653,6 +3656,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, >> IB_ACCESS_REMOTE_ATOMIC)); >> roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, >> 0); >> + roce_set_bit(context->byte_76_srqn_op_en, >> + V2_QPC_BYTE_76_EXT_ATE_S, >> + !!(attr->qp_access_flags & >> + IB_ACCESS_REMOTE_ATOMIC)); >> + roce_set_bit(qpc_mask->byte_76_srqn_op_en, >> + V2_QPC_BYTE_76_EXT_ATE_S, 0); >> } else { >> roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, >> !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ)); >> @@ -3668,6 +3677,11 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, >> !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); >> roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, >> 0); >> + roce_set_bit(context->byte_76_srqn_op_en, >> + V2_QPC_BYTE_76_EXT_ATE_S, >> + !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); >> + roce_set_bit(qpc_mask->byte_76_srqn_op_en, >> + V2_QPC_BYTE_76_EXT_ATE_S, 0); >> } > > What happens to your userspace if it runs on an old kernel and tries > to use extended atomic? > > Jason > Hi Jason, If the hns userspace runs with old kernel, the hardware will report a asynchronous event for the extended atomic operation and modify the qp to error state because the enable bit in this qp's context hasn't been set. The driver will print like this: [ 1252.240921] hns3 0000:7d:00.0: Invalid request local work queue 0x9 error. [ 1252.247772] hns3 0000:7d:00.0: no hr_qp can be found! Thanks Weihang > . >
On Thu, Jan 16, 2020 at 12:05:13PM +0800, Weihang Li wrote: > > > On 2020/1/16 4:56, Jason Gunthorpe wrote: > > On Wed, Jan 15, 2020 at 09:42:26AM +0800, Weihang Li wrote: > >> From: Jiaran Zhang <zhangjiaran@huawei.com> > >> > >> To support extended atomic operations including cmp & swap and fetch & add > >> of 8 bytes, 16 bytes, 32 bytes, 64 bytes in userspace, some field in qpc > >> should be configured. > >> > >> Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com> > >> Signed-off-by: Weihang Li <liweihang@huawei.com> > >> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 16 +++++++++++++++- > >> drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 ++- > >> 2 files changed, 17 insertions(+), 2 deletions(-) > >> > >> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > >> index f1e0ba6..7edf3d8 100644 > >> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > >> @@ -1692,7 +1692,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) > >> caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ; > >> caps->qpc_entry_sz = HNS_ROCE_V2_QPC_ENTRY_SZ; > >> caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; > >> - caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ; > >> + caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ; > >> caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; > >> caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ; > >> caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; > >> @@ -3286,6 +3286,9 @@ static void set_access_flags(struct hns_roce_qp *hr_qp, > >> roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, > >> !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); > >> roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0); > >> + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, > >> + !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); > >> + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, 0); > >> } > >> > >> static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, > >> @@ -3653,6 +3656,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, > >> IB_ACCESS_REMOTE_ATOMIC)); > >> roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, > >> 0); > >> + roce_set_bit(context->byte_76_srqn_op_en, > >> + V2_QPC_BYTE_76_EXT_ATE_S, > >> + !!(attr->qp_access_flags & > >> + IB_ACCESS_REMOTE_ATOMIC)); > >> + roce_set_bit(qpc_mask->byte_76_srqn_op_en, > >> + V2_QPC_BYTE_76_EXT_ATE_S, 0); > >> } else { > >> roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, > >> !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ)); > >> @@ -3668,6 +3677,11 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, > >> !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); > >> roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, > >> 0); > >> + roce_set_bit(context->byte_76_srqn_op_en, > >> + V2_QPC_BYTE_76_EXT_ATE_S, > >> + !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); > >> + roce_set_bit(qpc_mask->byte_76_srqn_op_en, > >> + V2_QPC_BYTE_76_EXT_ATE_S, 0); > >> } > > > > What happens to your userspace if it runs on an old kernel and tries > > to use extended atomic? > > > > Jason > > > > Hi Jason, > > If the hns userspace runs with old kernel, the hardware will report a asynchronous > event for the extended atomic operation and modify the qp to error state because > the enable bit in this qp's context hasn't been set. > > The driver will print like this: > > [ 1252.240921] hns3 0000:7d:00.0: Invalid request local work queue 0x9 error. > [ 1252.247772] hns3 0000:7d:00.0: no hr_qp can be found! Ideally the provider will not set IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP and related without kernel support.. I've applied this patch, but I feel like you may need a followup to fix the capability reporting? Jason
On 2020/1/17 3:51, Jason Gunthorpe wrote: >>> What happens to your userspace if it runs on an old kernel and tries >>> to use extended atomic? >>> >>> Jason >>> >> Hi Jason, >> >> If the hns userspace runs with old kernel, the hardware will report a asynchronous >> event for the extended atomic operation and modify the qp to error state because >> the enable bit in this qp's context hasn't been set. >> >> The driver will print like this: >> >> [ 1252.240921] hns3 0000:7d:00.0: Invalid request local work queue 0x9 error. >> [ 1252.247772] hns3 0000:7d:00.0: no hr_qp can be found! > Ideally the provider will not set > IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP and related without kernel > support.. > > I've applied this patch, but I feel like you may need a followup to > fix the capability reporting? > > Jason Hi Jason, Thank for your suggestions. But I'm confuse about the relationship between "PCI ATOMIC" in this macro and atomic operations in RDMA. I found the related series on patchwork: https://patchwork.kernel.org/cover/10782873/ And I found the description about atomic operations in PCIe specification v4.0: "An Atomic Operation (AtomicOp) is a single PCI Express transaction that targets a location in Memory Space, reads the location’s value, potentially writes a new value back to the location, and returns the original value. This "read-modify-write" sequence to the location is performed atomically." It seems that the atomic for PCIe and RDMA is different concepts, and the macro IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP is for PCIe atomic. Could you please give me more suggestions about them? Thanks Weihang
On 1/22/2020 3:54 AM, Weihang Li wrote: > > > On 2020/1/17 3:51, Jason Gunthorpe wrote: >>>> What happens to your userspace if it runs on an old kernel and tries >>>> to use extended atomic? >>>> >>>> Jason >>>> >>> Hi Jason, >>> >>> If the hns userspace runs with old kernel, the hardware will report a asynchronous >>> event for the extended atomic operation and modify the qp to error state because >>> the enable bit in this qp's context hasn't been set. >>> >>> The driver will print like this: >>> >>> [ 1252.240921] hns3 0000:7d:00.0: Invalid request local work queue 0x9 error. >>> [ 1252.247772] hns3 0000:7d:00.0: no hr_qp can be found! >> Ideally the provider will not set >> IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP and related without kernel >> support.. >> >> I've applied this patch, but I feel like you may need a followup to >> fix the capability reporting? >> >> Jason > > Hi Jason, > > Thank for your suggestions. > > But I'm confuse about the relationship between "PCI ATOMIC" in this macro > and atomic operations in RDMA. PCI Atomics are optonal and are a much more recent facility. RDMA Atomics do not require PCI Atomics, because they have different semantics with respect to memory atomicity. Read carefully and you'll see that they operate atomically only within the adapter, and are not atomic all the way to the underlying memory. It's a long and somewhat historical story. Now that PCIe Atomics are becoming more widely supported by processor complexes, there is the possibility these may be more tightly embraced by RDMA implementations. There is discussion in IBTA and IETF around this currently, in fact, for the new RDMA Atomic Write operation. Be aware that PCI Atomics are relatively expensive operations. The existing ones perform a read-modify-write cycle on both PCI and memory busses. This overhead is not to be taken lightly. Tom. > I found the related series on patchwork: > https://patchwork.kernel.org/cover/10782873/ > > And I found the description about atomic operations in PCIe specification > v4.0: > > "An Atomic Operation (AtomicOp) is a single PCI Express transaction that > targets a location in Memory Space, reads the location’s value, potentially > writes a new value back to the location, and returns the original value. This > "read-modify-write" sequence to the location is performed atomically." > > It seems that the atomic for PCIe and RDMA is different concepts, and the macro > IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP is for PCIe atomic. > > Could you please give me more suggestions about them? > > Thanks > Weihang > > >
On Wed, Jan 22, 2020 at 04:54:55PM +0800, Weihang Li wrote: > > > On 2020/1/17 3:51, Jason Gunthorpe wrote: > >>> What happens to your userspace if it runs on an old kernel and tries > >>> to use extended atomic? > >>> > >>> Jason > >>> > >> Hi Jason, > >> > >> If the hns userspace runs with old kernel, the hardware will report a asynchronous > >> event for the extended atomic operation and modify the qp to error state because > >> the enable bit in this qp's context hasn't been set. > >> > >> The driver will print like this: > >> > >> [ 1252.240921] hns3 0000:7d:00.0: Invalid request local work queue 0x9 error. > >> [ 1252.247772] hns3 0000:7d:00.0: no hr_qp can be found! > > Ideally the provider will not set > > IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP and related without kernel > > support.. > > > > I've applied this patch, but I feel like you may need a followup to > > fix the capability reporting? > > > > Jason > > Hi Jason, > > Thank for your suggestions. > > But I'm confuse about the relationship between "PCI ATOMIC" in this macro > and atomic operations in RDMA. > > I found the related series on patchwork: > https://patchwork.kernel.org/cover/10782873/ I may have got the wrong capability bit here, I'm not sure where the capability bits for extended atomics are actually Jason
On 2020/1/22 22:08, Tom Talpey wrote: > On 1/22/2020 3:54 AM, Weihang Li wrote: >> >> >> On 2020/1/17 3:51, Jason Gunthorpe wrote: >>>>> What happens to your userspace if it runs on an old kernel and tries >>>>> to use extended atomic? >>>>> >>>>> Jason >>>>> >>>> Hi Jason, >>>> >>>> If the hns userspace runs with old kernel, the hardware will report a asynchronous >>>> event for the extended atomic operation and modify the qp to error state because >>>> the enable bit in this qp's context hasn't been set. >>>> >>>> The driver will print like this: >>>> >>>> [ 1252.240921] hns3 0000:7d:00.0: Invalid request local work queue 0x9 error. >>>> [ 1252.247772] hns3 0000:7d:00.0: no hr_qp can be found! >>> Ideally the provider will not set >>> IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP and related without kernel >>> support.. >>> >>> I've applied this patch, but I feel like you may need a followup to >>> fix the capability reporting? >>> >>> Jason >> >> Hi Jason, >> >> Thank for your suggestions. >> >> But I'm confuse about the relationship between "PCI ATOMIC" in this macro >> and atomic operations in RDMA. > > PCI Atomics are optonal and are a much more recent facility. > > RDMA Atomics do not require PCI Atomics, because they have > different semantics with respect to memory atomicity. Read > carefully and you'll see that they operate atomically only > within the adapter, and are not atomic all the way to the > underlying memory. It's a long and somewhat historical story. > > Now that PCIe Atomics are becoming more widely supported by > processor complexes, there is the possibility these may be > more tightly embraced by RDMA implementations. There is > discussion in IBTA and IETF around this currently, in fact, > for the new RDMA Atomic Write operation. > > Be aware that PCI Atomics are relatively expensive operations. > The existing ones perform a read-modify-write cycle on both > PCI and memory busses. This overhead is not to be taken lightly. > > Tom. > Hi Tom, Thanks for your patience and detailed explanation :) I know more about their relationship now. Weihang >> I found the related series on patchwork: >> https://patchwork.kernel.org/cover/10782873/ >> >> And I found the description about atomic operations in PCIe specification >> v4.0: >> >> "An Atomic Operation (AtomicOp) is a single PCI Express transaction that >> targets a location in Memory Space, reads the location’s value, potentially >> writes a new value back to the location, and returns the original value. This >> "read-modify-write" sequence to the location is performed atomically." >> >> It seems that the atomic for PCIe and RDMA is different concepts, and the macro >> IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP is for PCIe atomic. >> >> Could you please give me more suggestions about them? >> >> Thanks >> Weihang >> >> >> > > .
On 2020/1/24 6:54, Jason Gunthorpe wrote: > On Wed, Jan 22, 2020 at 04:54:55PM +0800, Weihang Li wrote: >> >> >> On 2020/1/17 3:51, Jason Gunthorpe wrote: >>>>> What happens to your userspace if it runs on an old kernel and tries >>>>> to use extended atomic? >>>>> >>>>> Jason >>>>> >>>> Hi Jason, >>>> >>>> If the hns userspace runs with old kernel, the hardware will report a asynchronous >>>> event for the extended atomic operation and modify the qp to error state because >>>> the enable bit in this qp's context hasn't been set. >>>> >>>> The driver will print like this: >>>> >>>> [ 1252.240921] hns3 0000:7d:00.0: Invalid request local work queue 0x9 error. >>>> [ 1252.247772] hns3 0000:7d:00.0: no hr_qp can be found! >>> Ideally the provider will not set >>> IBV_PCI_ATOMIC_OPERATION_4_BYTE_SIZE_SUP and related without kernel >>> support.. >>> >>> I've applied this patch, but I feel like you may need a followup to >>> fix the capability reporting? >>> >>> Jason >> >> Hi Jason, >> >> Thank for your suggestions. >> >> But I'm confuse about the relationship between "PCI ATOMIC" in this macro >> and atomic operations in RDMA. >> >> I found the related series on patchwork: >> https://patchwork.kernel.org/cover/10782873/ > > I may have got the wrong capability bit here, I'm not sure where the > capability bits for extended atomics are actually > > Jason Hi Jason, Thank you anyway. There seems no capability bit for extended atomic currently. We will try to add one. Weihang >
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f1e0ba6..7edf3d8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1692,7 +1692,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ; caps->qpc_entry_sz = HNS_ROCE_V2_QPC_ENTRY_SZ; caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; - caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ; + caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ; caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ; caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; @@ -3286,6 +3286,9 @@ static void set_access_flags(struct hns_roce_qp *hr_qp, roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0); + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, + !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, 0); } static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, @@ -3653,6 +3656,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, IB_ACCESS_REMOTE_ATOMIC)); roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0); + roce_set_bit(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_EXT_ATE_S, + !!(attr->qp_access_flags & + IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, + V2_QPC_BYTE_76_EXT_ATE_S, 0); } else { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ)); @@ -3668,6 +3677,11 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0); + roce_set_bit(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_EXT_ATE_S, + !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, + V2_QPC_BYTE_76_EXT_ATE_S, 0); } roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 76a14db..c9be484 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -81,6 +81,7 @@ #define HNS_ROCE_V2_QPC_ENTRY_SZ 256 #define HNS_ROCE_V2_IRRL_ENTRY_SZ 64 #define HNS_ROCE_V2_TRRL_ENTRY_SZ 48 +#define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ 100 #define HNS_ROCE_V2_CQC_ENTRY_SZ 64 #define HNS_ROCE_V2_SRQC_ENTRY_SZ 64 #define HNS_ROCE_V2_MTPT_ENTRY_SZ 64 @@ -643,7 +644,7 @@ struct hns_roce_v2_qp_context { #define V2_QPC_BYTE_76_ATE_S 27 #define V2_QPC_BYTE_76_RQIE_S 28 - +#define V2_QPC_BYTE_76_EXT_ATE_S 29 #define V2_QPC_BYTE_76_RQ_VLAN_EN_S 30 #define V2_QPC_BYTE_80_RX_CQN_S 0 #define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0)