@@ -202,6 +202,7 @@ struct hns_roce_ucontext {
struct list_head page_list;
struct mutex page_mutex;
struct hns_user_mmap_entry *db_mmap_entry;
+ u32 config;
};
struct hns_roce_pd {
@@ -334,6 +335,7 @@ struct hns_roce_wq {
u32 head;
u32 tail;
void __iomem *db_reg;
+ u32 ext_sge_cnt;
};
struct hns_roce_sge {
@@ -635,6 +637,7 @@ struct hns_roce_qp {
struct list_head rq_node; /* all recv qps are on a list */
struct list_head sq_node; /* all send qps are on a list */
struct hns_user_mmap_entry *dwqe_mmap_entry;
+ u32 config;
};
struct hns_roce_ib_iboe {
@@ -187,14 +187,6 @@ static void set_atomic_seg(const struct ib_send_wr *wr,
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
}
-static unsigned int get_std_sge_num(struct hns_roce_qp *qp)
-{
- if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD)
- return 0;
-
- return HNS_ROCE_SGE_IN_WQE;
-}
-
static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
const struct ib_send_wr *wr,
unsigned int *sge_idx, u32 msg_len)
@@ -202,14 +194,12 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev;
unsigned int left_len_in_pg;
unsigned int idx = *sge_idx;
- unsigned int std_sge_num;
unsigned int i = 0;
unsigned int len;
void *addr;
void *dseg;
- std_sge_num = get_std_sge_num(qp);
- if (msg_len > (qp->sq.max_gs - std_sge_num) * HNS_ROCE_SGE_SIZE) {
+ if (msg_len > qp->sq.ext_sge_cnt * HNS_ROCE_SGE_SIZE) {
ibdev_err(ibdev,
"no enough extended sge space for inline data.\n");
return -EINVAL;
@@ -354,10 +354,11 @@ static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx)
static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
- int ret;
struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
- struct hns_roce_ib_alloc_ucontext_resp resp = {};
struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
+ struct hns_roce_ib_alloc_ucontext_resp resp = {};
+ struct hns_roce_ib_alloc_ucontext ucmd = {};
+ int ret;
if (!hr_dev->active)
return -EAGAIN;
@@ -365,6 +366,19 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
resp.qp_tab_size = hr_dev->caps.num_qps;
resp.srq_tab_size = hr_dev->caps.num_srqs;
+ ret = ib_copy_from_udata(&ucmd, udata,
+ min(udata->inlen, sizeof(ucmd)));
+ if (ret)
+ return ret;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ context->config = ucmd.config & HNS_ROCE_EXSGE_FLAGS;
+
+ if (context->config & HNS_ROCE_EXSGE_FLAGS) {
+ resp.config |= HNS_ROCE_RSP_EXSGE_FLAGS;
+ resp.max_inline_data = hr_dev->caps.max_sq_inline;
+ }
+
ret = hns_roce_uar_alloc(hr_dev, &context->uar);
if (ret)
goto error_fail_uar_alloc;
@@ -476,38 +476,109 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
return 0;
}
-static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp)
+static u32 get_max_inline_data(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap)
{
- /* GSI/UD QP only has extended sge */
- if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD)
- return qp->sq.max_gs;
-
- if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
- return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE;
+ if (cap->max_inline_data) {
+ cap->max_inline_data = roundup_pow_of_two(cap->max_inline_data);
+ return min(cap->max_inline_data,
+ hr_dev->caps.max_sq_inline);
+ }
return 0;
}
+static void update_inline_data(struct hns_roce_qp *hr_qp,
+ struct ib_qp_cap *cap)
+{
+ u32 sge_num = hr_qp->sq.ext_sge_cnt;
+
+ if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
+ if (!(hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD))
+ sge_num = max((u32)HNS_ROCE_SGE_IN_WQE, sge_num);
+
+ cap->max_inline_data = max(cap->max_inline_data,
+ sge_num * HNS_ROCE_SGE_SIZE);
+ }
+
+ hr_qp->max_inline_data = cap->max_inline_data;
+}
+
+static u32 get_sge_num_from_max_send_sge(bool is_ud_or_gsi,
+ u32 max_send_sge)
+{
+ unsigned int std_sge_num;
+ unsigned int min_sge;
+
+ std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
+ min_sge = is_ud_or_gsi ? 1 : 0;
+ return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) :
+ min_sge;
+}
+
+static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi,
+ u32 max_inline_data)
+{
+ unsigned int inline_sge;
+
+ inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE;
+
+ /*
+ * if max_inline_data less than
+ * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE,
+ * In addition to ud's mode, no need to extend sge.
+ */
+ if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE)
+ inline_sge = 0;
+
+ return inline_sge;
+}
+
static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap)
{
+ bool is_ud_or_gsi = (hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD);
+ unsigned int std_sge_num;
+ u32 inline_ext_sge = 0;
+ u32 ext_wqe_sge_cnt;
u32 total_sge_cnt;
- u32 wqe_sge_cnt;
+
+ cap->max_inline_data = get_max_inline_data(hr_dev, cap);
hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT;
+ std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
+ ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud_or_gsi,
+ cap->max_send_sge);
- hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
+ if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
+ inline_ext_sge = max(ext_wqe_sge_cnt,
+ get_sge_num_from_max_inl_data(is_ud_or_gsi,
+ cap->max_inline_data));
+ hr_qp->sq.ext_sge_cnt = inline_ext_sge ?
+ roundup_pow_of_two(inline_ext_sge) : 0;
- wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp);
+ hr_qp->sq.max_gs = max(1U, (hr_qp->sq.ext_sge_cnt + std_sge_num));
+ hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
+
+ ext_wqe_sge_cnt = hr_qp->sq.ext_sge_cnt;
+ } else {
+ hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
+ hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
+ hr_qp->sq.ext_sge_cnt = hr_qp->sq.max_gs;
+ }
/* If the number of extended sge is not zero, they MUST use the
* space of HNS_HW_PAGE_SIZE at least.
*/
- if (wqe_sge_cnt) {
- total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt);
+ if (ext_wqe_sge_cnt) {
+ total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * ext_wqe_sge_cnt);
hr_qp->sge.sge_cnt = max(total_sge_cnt,
(u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
}
+
+ update_inline_data(hr_qp, cap);
}
static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
@@ -556,6 +627,7 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
hr_qp->sq.wqe_cnt = cnt;
+ cap->max_send_sge = hr_qp->sq.max_gs;
return 0;
}
@@ -986,13 +1058,9 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
struct hns_roce_ib_create_qp *ucmd)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_ucontext *uctx;
int ret;
- if (init_attr->cap.max_inline_data > hr_dev->caps.max_sq_inline)
- init_attr->cap.max_inline_data = hr_dev->caps.max_sq_inline;
-
- hr_qp->max_inline_data = init_attr->cap.max_inline_data;
-
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
else
@@ -1015,12 +1083,17 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
return ret;
}
+ uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext,
+ ibucontext);
+ hr_qp->config = uctx->config;
ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
if (ret)
ibdev_err(ibdev,
"failed to set user SQ size, ret = %d.\n",
ret);
} else {
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
if (ret)
ibdev_err(ibdev,
@@ -85,11 +85,26 @@ struct hns_roce_ib_create_qp_resp {
__aligned_u64 dwqe_mmap_key;
};
+enum {
+ HNS_ROCE_EXSGE_FLAGS = 1 << 0,
+};
+
+enum {
+ HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0,
+};
+
struct hns_roce_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 cqe_size;
__u32 srq_tab_size;
__u32 reserved;
+ __u32 config;
+ __u32 max_inline_data;
+};
+
+struct hns_roce_ib_alloc_ucontext {
+ __u32 config;
+ __u32 reserved;
};
struct hns_roce_ib_alloc_pd_resp {