Message ID | 1605867440-2413-2-git-send-email-liweihang@huawei.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | RDMA/hns: Add supports for stash | expand |
On Fri, Nov 20, 2020 at 06:17:19PM +0800, Weihang Li wrote: > From: Lang Cheng <chenglang@huawei.com> > > Stash is a mechanism that uses the core information carried by the ARM AXI > bus to access the L3 cache. It can be used to improve the performance by > increasing the hit ratio of L3 cache. CQs need to enable stash by default. > > Signed-off-by: Lang Cheng <chenglang@huawei.com> > Signed-off-by: Weihang Li <liweihang@huawei.com> > drivers/infiniband/hw/hns/hns_roce_common.h | 10 ++++++++ > drivers/infiniband/hw/hns/hns_roce_device.h | 1 + > drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 +++ > drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 39 +++++++++++++++++------------ > 4 files changed, 37 insertions(+), 16 deletions(-) > > diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h > index f5669ff..41a2252 100644 > +++ b/drivers/infiniband/hw/hns/hns_roce_common.h > @@ -53,6 +53,16 @@ > #define roce_set_bit(origin, shift, val) \ > roce_set_field((origin), (1ul << (shift)), (shift), (val)) > > +#define FIELD_LOC(field_h, field_l) field_h, field_l > + > +#define _hr_reg_enable(arr, field_h, field_l) \ > + (arr)[(field_l) / 32] |= \ > + (BIT((field_l) % 32)) + \ > + BUILD_BUG_ON_ZERO((field_h) != (field_l)) + \ > + BUILD_BUG_ON_ZERO((field_l) / 32 >= ARRAY_SIZE(arr)) > + > +#define hr_reg_enable(arr, field) _hr_reg_enable(arr, field) > + > #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 > #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 > > diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h > index 1d99022..ab7df8e 100644 > +++ b/drivers/infiniband/hw/hns/hns_roce_device.h > @@ -223,6 +223,7 @@ enum { > HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9), > HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), > HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), > + HNS_ROCE_CAP_FLAG_STASH = BIT(17), > }; > > #define HNS_ROCE_DB_TYPE_COUNT 2 > diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > index 4b82912..da7f909 100644 > +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > @@ -3177,6 +3177,9 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, > V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size == > HNS_ROCE_V3_CQE_SIZE ? 1 : 0); > > + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) > + hr_reg_enable(cq_context->raw, CQC_STASH); > + > cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); > > roce_set_field(cq_context->byte_16_hop_addr, > diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h > index 1409d05..50a5187 100644 > +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h > @@ -267,22 +267,27 @@ enum hns_roce_sgid_type { > }; > > struct hns_roce_v2_cq_context { > - __le32 byte_4_pg_ceqn; > - __le32 byte_8_cqn; > - __le32 cqe_cur_blk_addr; > - __le32 byte_16_hop_addr; > - __le32 cqe_nxt_blk_addr; > - __le32 byte_24_pgsz_addr; > - __le32 byte_28_cq_pi; > - __le32 byte_32_cq_ci; > - __le32 cqe_ba; > - __le32 byte_40_cqe_ba; > - __le32 byte_44_db_record; > - __le32 db_record_addr; > - __le32 byte_52_cqe_cnt; > - __le32 byte_56_cqe_period_maxcnt; > - __le32 cqe_report_timer; > - __le32 byte_64_se_cqe_idx; > + union { > + struct { > + __le32 byte_4_pg_ceqn; > + __le32 byte_8_cqn; > + __le32 cqe_cur_blk_addr; > + __le32 byte_16_hop_addr; > + __le32 cqe_nxt_blk_addr; > + __le32 byte_24_pgsz_addr; > + __le32 byte_28_cq_pi; > + __le32 byte_32_cq_ci; > + __le32 cqe_ba; > + __le32 byte_40_cqe_ba; > + __le32 byte_44_db_record; > + __le32 db_record_addr; > + __le32 byte_52_cqe_cnt; > + __le32 byte_56_cqe_period_maxcnt; > + __le32 cqe_report_timer; > + __le32 byte_64_se_cqe_idx; > + }; > + __le32 raw[16]; > + }; It has missed the point of how the FIELD_LOC worked in the iba macros, you want to specify the type FIELD_LOC(struct hns_roce_v2_cq_context, 63, 63) And not introduce a raw array in a union, just validate the type and cast it to a __le32 * And again, if you are going to be building macros like this then setting fields to all ones must be the corner case. Write it more clearly: hr_reg_set(cq_context, CQC_STASH, FIELD_ALL_ONES(CQC_STASH)); Now you can replace some of the other macros with the safer/simpler scheme. Jason
On 2020/11/21 4:13, Jason Gunthorpe wrote: > On Fri, Nov 20, 2020 at 06:17:19PM +0800, Weihang Li wrote: >> From: Lang Cheng <chenglang@huawei.com> >> >> Stash is a mechanism that uses the core information carried by the ARM AXI >> bus to access the L3 cache. It can be used to improve the performance by >> increasing the hit ratio of L3 cache. CQs need to enable stash by default. >> >> Signed-off-by: Lang Cheng <chenglang@huawei.com> >> Signed-off-by: Weihang Li <liweihang@huawei.com> >> drivers/infiniband/hw/hns/hns_roce_common.h | 10 ++++++++ >> drivers/infiniband/hw/hns/hns_roce_device.h | 1 + >> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 +++ >> drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 39 +++++++++++++++++------------ >> 4 files changed, 37 insertions(+), 16 deletions(-) >> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h >> index f5669ff..41a2252 100644 >> +++ b/drivers/infiniband/hw/hns/hns_roce_common.h >> @@ -53,6 +53,16 @@ >> #define roce_set_bit(origin, shift, val) \ >> roce_set_field((origin), (1ul << (shift)), (shift), (val)) >> >> +#define FIELD_LOC(field_h, field_l) field_h, field_l >> + >> +#define _hr_reg_enable(arr, field_h, field_l) \ >> + (arr)[(field_l) / 32] |= \ >> + (BIT((field_l) % 32)) + \ >> + BUILD_BUG_ON_ZERO((field_h) != (field_l)) + \ >> + BUILD_BUG_ON_ZERO((field_l) / 32 >= ARRAY_SIZE(arr)) >> + >> +#define hr_reg_enable(arr, field) _hr_reg_enable(arr, field) >> + >> #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 >> #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 >> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h >> index 1d99022..ab7df8e 100644 >> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h >> @@ -223,6 +223,7 @@ enum { >> HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9), >> HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), >> HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), >> + HNS_ROCE_CAP_FLAG_STASH = BIT(17), >> }; >> >> #define HNS_ROCE_DB_TYPE_COUNT 2 >> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> index 4b82912..da7f909 100644 >> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> @@ -3177,6 +3177,9 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, >> V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size == >> HNS_ROCE_V3_CQE_SIZE ? 1 : 0); >> >> + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) >> + hr_reg_enable(cq_context->raw, CQC_STASH); >> + >> cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); >> >> roce_set_field(cq_context->byte_16_hop_addr, >> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h >> index 1409d05..50a5187 100644 >> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h >> @@ -267,22 +267,27 @@ enum hns_roce_sgid_type { >> }; >> >> struct hns_roce_v2_cq_context { >> - __le32 byte_4_pg_ceqn; >> - __le32 byte_8_cqn; >> - __le32 cqe_cur_blk_addr; >> - __le32 byte_16_hop_addr; >> - __le32 cqe_nxt_blk_addr; >> - __le32 byte_24_pgsz_addr; >> - __le32 byte_28_cq_pi; >> - __le32 byte_32_cq_ci; >> - __le32 cqe_ba; >> - __le32 byte_40_cqe_ba; >> - __le32 byte_44_db_record; >> - __le32 db_record_addr; >> - __le32 byte_52_cqe_cnt; >> - __le32 byte_56_cqe_period_maxcnt; >> - __le32 cqe_report_timer; >> - __le32 byte_64_se_cqe_idx; >> + union { >> + struct { >> + __le32 byte_4_pg_ceqn; >> + __le32 byte_8_cqn; >> + __le32 cqe_cur_blk_addr; >> + __le32 byte_16_hop_addr; >> + __le32 cqe_nxt_blk_addr; >> + __le32 byte_24_pgsz_addr; >> + __le32 byte_28_cq_pi; >> + __le32 byte_32_cq_ci; >> + __le32 cqe_ba; >> + __le32 byte_40_cqe_ba; >> + __le32 byte_44_db_record; >> + __le32 db_record_addr; >> + __le32 byte_52_cqe_cnt; >> + __le32 byte_56_cqe_period_maxcnt; >> + __le32 cqe_report_timer; >> + __le32 byte_64_se_cqe_idx; >> + }; >> + __le32 raw[16]; >> + }; > > It has missed the point of how the FIELD_LOC worked in the iba macros, > you want to specify the type > > FIELD_LOC(struct hns_roce_v2_cq_context, 63, 63) > > And not introduce a raw array in a union, just validate the type and > cast it to a __le32 * > Thank you, we will modify it as you suggest. > And again, if you are going to be building macros like this then > setting fields to all ones must be the corner case. > > Write it more clearly: > > hr_reg_set(cq_context, CQC_STASH, FIELD_ALL_ONES(CQC_STASH)); > OK, we will use hr_reg_enable(ptr, field) to set a single bit to one. And use hr_reg_write(ptr, field, val) to fill a field with a value, which can be also used like hr_reg_write(ptr, field, FIELD_ALL_ONES(field)). Weihang > Now you can replace some of the other macros with the safer/simpler > scheme. > > Jason >
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index f5669ff..41a2252 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -53,6 +53,16 @@ #define roce_set_bit(origin, shift, val) \ roce_set_field((origin), (1ul << (shift)), (shift), (val)) +#define FIELD_LOC(field_h, field_l) field_h, field_l + +#define _hr_reg_enable(arr, field_h, field_l) \ + (arr)[(field_l) / 32] |= \ + (BIT((field_l) % 32)) + \ + BUILD_BUG_ON_ZERO((field_h) != (field_l)) + \ + BUILD_BUG_ON_ZERO((field_l) / 32 >= ARRAY_SIZE(arr)) + +#define hr_reg_enable(arr, field) _hr_reg_enable(arr, field) + #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1d99022..ab7df8e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -223,6 +223,7 @@ enum { HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), + HNS_ROCE_CAP_FLAG_STASH = BIT(17), }; #define HNS_ROCE_DB_TYPE_COUNT 2 diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4b82912..da7f909 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3177,6 +3177,9 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size == HNS_ROCE_V3_CQE_SIZE ? 1 : 0); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) + hr_reg_enable(cq_context->raw, CQC_STASH); + cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); roce_set_field(cq_context->byte_16_hop_addr, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 1409d05..50a5187 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -267,22 +267,27 @@ enum hns_roce_sgid_type { }; struct hns_roce_v2_cq_context { - __le32 byte_4_pg_ceqn; - __le32 byte_8_cqn; - __le32 cqe_cur_blk_addr; - __le32 byte_16_hop_addr; - __le32 cqe_nxt_blk_addr; - __le32 byte_24_pgsz_addr; - __le32 byte_28_cq_pi; - __le32 byte_32_cq_ci; - __le32 cqe_ba; - __le32 byte_40_cqe_ba; - __le32 byte_44_db_record; - __le32 db_record_addr; - __le32 byte_52_cqe_cnt; - __le32 byte_56_cqe_period_maxcnt; - __le32 cqe_report_timer; - __le32 byte_64_se_cqe_idx; + union { + struct { + __le32 byte_4_pg_ceqn; + __le32 byte_8_cqn; + __le32 cqe_cur_blk_addr; + __le32 byte_16_hop_addr; + __le32 cqe_nxt_blk_addr; + __le32 byte_24_pgsz_addr; + __le32 byte_28_cq_pi; + __le32 byte_32_cq_ci; + __le32 cqe_ba; + __le32 byte_40_cqe_ba; + __le32 byte_44_db_record; + __le32 db_record_addr; + __le32 byte_52_cqe_cnt; + __le32 byte_56_cqe_period_maxcnt; + __le32 cqe_report_timer; + __le32 byte_64_se_cqe_idx; + }; + __le32 raw[16]; + }; }; #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0 @@ -360,6 +365,8 @@ struct hns_roce_v2_cq_context { #define V2_CQC_BYTE_64_SE_CQE_IDX_S 0 #define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0) +#define CQC_STASH FIELD_LOC(63, 63) + struct hns_roce_srq_context { __le32 byte_4_srqn_srqst; __le32 byte_8_limit_wl;