@@ -340,6 +340,10 @@ struct diag_pkt {
#define HFI1_PSM_IOC_BASE_SEQ 0x0
+/* Number of BTH.PSN bits used for sequence number in expected rcvs */
+#define HFI1_KDETH_BTH_SEQ_SHIFT 11
+#define HFI1_KDETH_BTH_SEQ_MASK (BIT(HFI1_KDETH_BTH_SEQ_SHIFT) - 1)
+
static inline __u64 rhf_to_cpu(const __le32 *rbuf)
{
return __le64_to_cpu(*((__le64 *)rbuf));
@@ -192,6 +192,14 @@ struct exp_tid_set {
};
typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
+
+struct tid_queue {
+ struct list_head queue_head;
+ /* queue head for QP TID resource waiters */
+ u32 enqueue; /* count of tid enqueues */
+ u32 dequeue; /* count of tid dequeues */
+};
+
struct hfi1_ctxtdata {
/* rcvhdrq base, needs mmap before useful */
void *rcvhdrq;
@@ -285,6 +293,13 @@ struct hfi1_ctxtdata {
/* PSM Specific fields */
/* lock protecting all Expected TID data */
struct mutex exp_mutex;
+ /* lock protecting all Expected TID data of kernel contexts */
+ spinlock_t exp_lock;
+
+ /* Queue for QP's waiting for HW TID flows */
+ struct tid_queue flow_queue;
+ /* Queue for QP's waiting for HW receive array entries */
+ struct tid_queue rarr_queue;
/* when waiting for rcv or pioavail */
wait_queue_head_t wait;
/* uuid from PSM */
@@ -317,6 +332,9 @@ struct hfi1_ctxtdata {
*/
u8 subctxt_cnt;
+ /* Bit mask to track free TID RDMA HW flows */
+ unsigned long flow_mask;
+ struct tid_flow_state flows[RXE_NUM_TID_FLOWS];
};
/**
@@ -50,6 +50,36 @@
#include "verbs.h"
#include "tid_rdma.h"
+#define MAX_EXPECTED_PAGES (MAX_EXPECTED_BUFFER / PAGE_SIZE)
+
+#define RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK BIT_ULL(32)
+#define RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK BIT_ULL(33)
+#define RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK BIT_ULL(34)
+#define RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK BIT_ULL(35)
+#define RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK BIT_ULL(37)
+#define RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK BIT_ULL(38)
+
+/*
+ * J_KEY for kernel contexts when TID RDMA is used.
+ * See generate_jkey() in hfi.h for more information.
+ */
+#define TID_RDMA_JKEY 32
+#define HFI1_KERNEL_MIN_JKEY HFI1_ADMIN_JKEY_RANGE
+#define HFI1_KERNEL_MAX_JKEY (2 * HFI1_ADMIN_JKEY_RANGE - 1)
+
+/* Maximum number of segments in flight per QP request. */
+#define TID_RDMA_MAX_READ_SEGS_PER_REQ 6
+#define TID_RDMA_MAX_WRITE_SEGS_PER_REQ 4
+#define TID_RDMA_MAX_READ_SEGS 6
+
+#define TID_RDMA_DESTQP_FLOW_SHIFT 11
+#define TID_RDMA_DESTQP_FLOW_MASK 0x1f
+
+#define TID_FLOW_SW_PSN BIT(0)
+
+/* Maximum number of packets within a flow generation. */
+#define MAX_TID_FLOW_PSN BIT(HFI1_KDETH_BTH_SEQ_SHIFT)
+
void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
{
}
@@ -49,6 +49,213 @@
#ifndef HFI1_TID_RDMA_H
#define HFI1_TID_RDMA_H
+#include <linux/circ_buf.h>
+#include "common.h"
+
+/* Add a convenience helper */
+#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1))
+#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
+#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)
+
+#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
+#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
+#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
+
+/*
+ * Bit definitions for priv->s_flags.
+ * These bit flags overload the bit flags defined for the QP's s_flags.
+ * Due to the fact that these bit fields are used only for the QP priv
+ * s_flags, there are no collisions.
+ *
+ * HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
+ * HFI1_R_TID_WAIT_INTERLCK - QP is waiting for responder interlock
+ */
+#define HFI1_S_TID_BUSY_SET BIT(0)
+/* BIT(1) reserved for RVT_S_BUSY. */
+#define HFI1_R_TID_RSC_TIMER BIT(2)
+/* BIT(3) reserved for RVT_S_RESP_PENDING. */
+/* BIT(4) reserved for RVT_S_ACK_PENDING. */
+#define HFI1_S_TID_WAIT_INTERLCK BIT(5)
+#define HFI1_R_TID_WAIT_INTERLCK BIT(6)
+/* BIT(7) - BIT(15) reserved for RVT_S_WAIT_*. */
+/* BIT(16) reserved for RVT_S_SEND_ONE */
+#define HFI1_S_TID_RETRY_TIMER BIT(17)
+/* BIT(18) reserved for RVT_S_ECN. */
+#define HFI1_R_TID_SW_PSN BIT(19)
+/* BIT(26) reserved for HFI1_S_WAIT_HALT */
+/* BIT(27) reserved for HFI1_S_WAIT_TID_RESP */
+/* BIT(28) reserved for HFI1_S_WAIT_TID_SPACE */
+
+/*
+ * Unlike regular IB RDMA VERBS, which do not require an entry
+ * in the s_ack_queue, TID RDMA WRITE requests do because they
+ * generate responses.
+ * Therefore, the s_ack_queue needs to be extended by a certain
+ * amount. The key point is that the queue needs to be extended
+ * without letting the "user" know so they user doesn't end up
+ * using these extra entries.
+ */
+#define HFI1_TID_RDMA_WRITE_CNT 8
+
+struct tid_rdma_params {
+ struct rcu_head rcu_head;
+ u32 qp;
+ u32 max_len;
+ u16 jkey;
+ u8 max_read;
+ u8 max_write;
+ u8 timeout;
+ u8 urg;
+ u8 version;
+};
+
+struct tid_rdma_qp_params {
+ u8 n_read;
+ u8 n_write;
+ struct work_struct trigger_work;
+ struct tid_rdma_params local;
+ struct tid_rdma_params __rcu *remote;
+};
+
+/* Track state for each hardware flow */
+struct tid_flow_state {
+ u32 generation;
+ u32 psn;
+ u32 r_next_psn; /* next PSN to be received (in TID space) */
+ u8 index;
+ u8 last_index;
+ u8 flags;
+};
+
+enum tid_rdma_req_state {
+ TID_REQUEST_INACTIVE = 0,
+ TID_REQUEST_INIT,
+ TID_REQUEST_INIT_RESEND,
+ TID_REQUEST_ACTIVE,
+ TID_REQUEST_RESEND,
+ TID_REQUEST_RESEND_ACTIVE,
+ TID_REQUEST_QUEUED,
+ TID_REQUEST_SYNC,
+ TID_REQUEST_RNR_NAK,
+ TID_REQUEST_COMPLETE,
+};
+
+struct tid_rdma_request {
+ struct rvt_qp *qp;
+ struct hfi1_ctxtdata *rcd;
+ union {
+ struct rvt_swqe *swqe;
+ struct rvt_ack_entry *ack;
+ } e;
+
+ struct tid_rdma_flow *flows; /* array of tid flows */
+ struct rvt_sge_state ss; /* SGE state for TID RDMA requests */
+ u16 n_max_flows; /* size of the flow circular buffer */
+ u16 n_flows; /* size of the flow buffer window */
+ u16 setup_head; /* flow index we are setting up */
+ u16 clear_tail; /* flow index we are clearing */
+ u16 flow_idx; /* flow index most recently set up */
+ u16 kdeth_seq; /* the sequence (10bits) of the KDETH PSN */
+ u16 acked_tail;
+
+ u32 lkey;
+ u32 rkey;
+ u32 seg_len;
+ u32 total_len;
+ u32 r_ack_psn; /* next expected ack PSN */
+ u32 r_flow_psn; /* IB PSN of next segment start */
+ u32 r_last_acked; /* IB PSN of last ACK'ed packet */
+ u32 s_next_psn; /* IB PSN of next segment start for read */
+
+ u32 total_segs; /* segments required to complete a request */
+ u32 cur_seg; /* index of current segment */
+ u32 comp_seg; /* index of last completed segment */
+ u32 ack_seg; /* index of last ack'ed segment */
+ u32 alloc_seg; /* index of next segment to be allocated */
+ u32 isge; /* index of "current" sge */
+ u32 ack_pending; /* num acks pending for this request */
+
+ enum tid_rdma_req_state state;
+};
+
+/*
+ * When header suppression is used, PSNs associated with a "flow" are
+ * relevant (and not the PSNs maintained by verbs). Track per-flow
+ * PSNs here for a TID RDMA segment.
+ *
+ */
+struct flow_state {
+ u32 flags;
+ u32 resp_ib_psn; /* The IB PSN of the response for this flow */
+ u32 generation; /* generation of flow */
+ u32 spsn; /* starting PSN in TID space */
+ u32 lpsn; /* last PSN in TID space */
+ u32 r_next_psn; /* next PSN to be received (in TID space) */
+
+ /* For tid rdma read */
+ u32 ib_spsn; /* starting PSN in Verbs space */
+ u32 ib_lpsn; /* last PSn in Verbs space */
+};
+
+struct tid_rdma_pageset {
+ dma_addr_t addr : 48; /* Only needed for the first page */
+ u8 idx: 8;
+ u8 count : 8;
+};
+
+/* Overall info for a TID RDMA segment */
+struct tid_rdma_flow {
+ /*
+ * While a TID RDMA segment is being transferred, it uses a QP number
+ * from the "KDETH section of QP numbers" (which is different from the
+ * QP number that originated the request). Bits 11-15 of these QP
+ * numbers identify the "TID flow" for the segment.
+ */
+ struct flow_state flow_state;
+ struct tid_rdma_request *req;
+ struct trdma_flow_state *fstate;
+ u32 tid_qpn;
+ u32 tid_offset;
+ u32 length;
+ u32 sent;
+ u8 tnode_cnt;
+ u8 tidcnt;
+ u8 tid_idx;
+ u8 idx;
+ u8 npagesets;
+ u8 npkts;
+ u8 pkt;
+ u8 resync_npkts;
+};
+
+/**
+ * kern_tid_node - used for managing TID's in TID groups
+ *
+ * @grp_idx: rcd relative index to tid_group
+ * @map: grp->map captured prior to programming this TID group in HW
+ * @cnt: Only @cnt of available group entries are actually programmed
+ */
+struct kern_tid_node {
+ u16 grp_idx;
+ u8 map;
+ u8 cnt;
+};
+
+/* Allocation info for a TID RDMA segment */
+struct trdma_flow_state {
+ struct tid_rdma_pageset pagesets[TID_RDMA_MAX_PAGES];
+ struct kern_tid_node tnode[TID_RDMA_MAX_PAGES];
+ u32 tid_entry[TID_RDMA_MAX_PAGES];
+};
+
+void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd);
+void tid_rdma_flush_wait(struct rvt_qp *qp);
+
+void hfi1_compute_tid_rdma_flow_wt(void);
+void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp);
+int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit);
+void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req);
+
void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet);
@@ -1,7 +1,7 @@
#ifndef _HFI1_USER_EXP_RCV_H
#define _HFI1_USER_EXP_RCV_H
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -48,7 +48,7 @@
*/
#include "hfi.h"
-
+#include "mmu_rb.h"
#include "exp_rcv.h"
struct tid_pageset {
@@ -158,8 +158,13 @@ struct hfi1_qp_priv {
struct sdma_engine *s_sde; /* current sde */
struct send_context *s_sendcontext; /* current sendcontext */
struct hfi1_ctxtdata *rcd; /* QP's receive context */
+ struct page **pages; /* for TID page scan */
+ u32 tid_enqueue; /* saved when tid waited */
u8 s_sc; /* SC[0..4] for next packet */
struct iowait s_iowait;
+ struct list_head tid_wait; /* for queueing tid space */
+ struct tid_flow_state flow_state;
+ struct tid_rdma_qp_params tid_rdma;
struct rvt_qp *owner;
u8 hdr_type; /* 9B or 16B */
};
@@ -310,6 +315,21 @@ static inline u32 delta_psn(u32 a, u32 b)
return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT;
}
+/*
+ * Look through all the active flows for a TID RDMA request and find
+ * the one (if it exists) that contains the specified PSN.
+ */
+static inline u32 __full_flow_psn(struct flow_state *state, u32 psn)
+{
+ return mask_psn((state->generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
+ (psn & HFI1_KDETH_BTH_SEQ_MASK));
+}
+
+static inline u32 full_flow_psn(struct tid_rdma_flow *flow, u32 psn)
+{
+ return __full_flow_psn(&flow->flow_state, psn);
+}
+
struct verbs_txreq;
void hfi1_put_txreq(struct verbs_txreq *tx);