@@ -90,6 +90,7 @@
#define HNS_ROCE_MAX_PORTS 6
#define HNS_ROCE_GID_SIZE 16
#define HNS_ROCE_SGE_SIZE 16
+#define HNS_ROCE_DWQE_SIZE 65536
#define HNS_ROCE_HOP_NUM_0 0xff
@@ -643,6 +644,10 @@ struct hns_roce_work {
u32 queue_num;
};
+enum {
+ HNS_ROCE_QP_CAP_DIRECT_WQE = BIT(5),
+};
+
struct hns_roce_qp {
struct ib_qp ibqp;
struct hns_roce_wq rq;
@@ -984,6 +989,7 @@ struct hns_roce_dev {
struct mutex pgdir_mutex;
int irq[HNS_ROCE_MAX_IRQ_NUM];
u8 __iomem *reg_base;
+ void __iomem *mem_base;
struct hns_roce_caps caps;
struct xarray qp_table_xa;
@@ -503,6 +503,8 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
if (ret)
return ret;
+ qp->sl = to_hr_ah(ud_wr(wr)->ah)->av.sl;
+
set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge);
/*
@@ -635,6 +637,8 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev,
V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn);
roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M,
V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB);
+ /* indicates data on new BAR, 0 : SQ doorbell, 1 : DWQE */
+ roce_set_bit(sq_db.byte_4, V2_DB_FLAG_S, 0);
roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M,
V2_DB_PARAMETER_IDX_S, qp->sq.head);
roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M,
@@ -644,6 +648,38 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev,
}
}
+static void hns_roce_write512(struct hns_roce_dev *hr_dev, u64 *val,
+ u64 __iomem *dest)
+{
+#define HNS_ROCE_WRITE_TIMES 8
+ struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ int i;
+
+ if (!hr_dev->dis_db && !ops->get_hw_reset_stat(handle))
+ for (i = 0; i < HNS_ROCE_WRITE_TIMES; i++)
+ writeq_relaxed(*(val + i), dest + i);
+}
+
+static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
+ void *wqe)
+{
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
+
+ /* All kinds of DirectWQE have the same header field layout */
+ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FLAG_S, 1);
+ roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M,
+ V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S, qp->sl);
+ roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M,
+ V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S, qp->sl >> 2);
+ roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M,
+ V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head);
+
+ hns_roce_write512(hr_dev, wqe, hr_dev->mem_base +
+ HNS_ROCE_DWQE_SIZE * qp->ibqp.qp_num);
+}
+
static int hns_roce_v2_post_send(struct ib_qp *ibqp,
const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
@@ -710,7 +746,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
qp->next_sge = sge_idx;
/* Memory barrier */
wmb();
- update_sq_db(hr_dev, qp);
+
+ if (nreq == 1 && qp->sq.head == qp->sq.tail + 1 &&
+ (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
+ write_dwqe(hr_dev, qp, wqe);
+ else
+ update_sq_db(hr_dev, qp);
}
spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -6273,6 +6314,7 @@ static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
/* Get info from NIC driver. */
hr_dev->reg_base = handle->rinfo.roce_io_base;
+ hr_dev->mem_base = handle->rinfo.roce_mem_base;
hr_dev->caps.num_ports = 1;
hr_dev->iboe.netdevs[0] = handle->rinfo.netdev;
hr_dev->iboe.phy_port[0] = 0;
@@ -1098,6 +1098,8 @@ struct hns_roce_v2_mpt_entry {
#define V2_DB_BYTE_4_CMD_S 24
#define V2_DB_BYTE_4_CMD_M GENMASK(27, 24)
+#define V2_DB_FLAG_S 31
+
#define V2_DB_PARAMETER_IDX_S 0
#define V2_DB_PARAMETER_IDX_M GENMASK(15, 0)
@@ -1194,6 +1196,15 @@ struct hns_roce_v2_rc_send_wqe {
#define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0
#define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
+#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S 5
+#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5)
+
+#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S 13
+#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13)
+
+#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S 15
+#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15)
+
#define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7
#define V2_RC_SEND_WQE_BYTE_4_CQE_S 8
@@ -1216,6 +1227,8 @@ struct hns_roce_v2_rc_send_wqe {
#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23
+#define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31
+
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)