new file mode 100644
@@ -0,0 +1,135 @@
+/*
+ * Broadcom NetXtreme-E User Space RoCE driver
+ *
+ * Copyright (c) 2015-2016, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Basic link list operations and data structures
+ */
+
+#ifndef __BNXT_RE_LIST_H__
+#define __BNXT_RE_LIST_H__
+
+struct bnxt_re_list_node {
+ uint8_t valid;
+ struct bnxt_re_list_node *next, *prev;
+};
+
+struct bnxt_re_list_head {
+ struct bnxt_re_list_node node;
+ pthread_mutex_t lock;
+};
+
+#define DBLY_LIST_HEAD_INIT(name) {{true, &name.node, &name.node},\
+ PTHREAD_MUTEX_INITIALIZER}
+
+#define DBLY_LIST_HEAD(name) \
+ struct bnxt_re_list_head name = DBLY_LIST_HEAD_INIT(name) \
+
+#define INIT_DBLY_LIST_NODE(ptr) do { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); (ptr)->valid = false; \
+} while (0)
+
+#define INIT_DBLY_LIST_HEAD(ptr) INIT_DBLY_LIST_NODE(ptr.node)
+
+static inline void __list_add_node(struct bnxt_re_list_node *new,
+ struct bnxt_re_list_node *prev,
+ struct bnxt_re_list_node *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+static inline void list_add_node_tail(struct bnxt_re_list_node *new,
+ struct bnxt_re_list_head *head)
+{
+ __list_add_node(new, head->node.prev, &head->node);
+ new->valid = true;
+}
+
+static inline void __list_del_node(struct bnxt_re_list_node *prev,
+ struct bnxt_re_list_node *next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+static inline void list_del_node(struct bnxt_re_list_node *entry)
+{
+ __list_del_node(entry->prev, entry->next);
+ entry->next = 0;
+ entry->prev = 0;
+ entry->valid = false;
+}
+
+static inline uint8_t list_empty(struct bnxt_re_list_head *head)
+{
+ struct bnxt_re_list_node *node = &head->node;
+ return (head->node.next == node) && (head->node.prev == node);
+}
+
+#define list_lock(head) pthread_mutex_lock(&((head)->lock))
+#define list_unlock(head) pthread_mutex_unlock(&((head)->lock))
+
+#define list_node(ptr, type, member) \
+ ((type *)((char *)(ptr) - (unsigned long)(&((type *)0)->member)))
+
+#define list_node_valid(node) ((node)->valid)
+
+/*
+ * list_for_each_node_safe - iterate over a list safe against removal of list
+ * entry
+ * @pos: the &struct bnxt_re_list_head to use as a loop counter.
+ * @n: another &struct bnxt_re_list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_node_safe(pos, n, head) \
+ for (pos = (head)->node.next, n = pos->next; pos != &((head)->node); \
+ pos = n, n = pos->next)
+
+static inline void bnxt_re_list_add_node(struct bnxt_re_list_node *node,
+ struct bnxt_re_list_head *head)
+{
+ if (!list_node_valid(node))
+ list_add_node_tail(node, head);
+}
+
+static inline void bnxt_re_list_del_node(struct bnxt_re_list_node *node,
+ struct bnxt_re_list_head *head)
+{
+ if (list_node_valid(node))
+ list_del_node(node);
+}
+
+#endif /* __bnxt_re_LIST_H__ */
@@ -133,6 +133,7 @@ static int bnxt_re_init_context(struct verbs_device *vdev,
dev->pg_size = resp.pg_size;
dev->cqe_size = resp.cqe_size;
dev->max_cq_depth = resp.max_cqd;
+ pthread_spin_init(&cntx->fqlock, PTHREAD_PROCESS_PRIVATE);
ibvctx->ops = bnxt_re_cntx_ops;
return 0;
@@ -141,7 +142,11 @@ static int bnxt_re_init_context(struct verbs_device *vdev,
static void bnxt_re_uninit_context(struct verbs_device *vdev,
struct ibv_context *ibvctx)
{
+ struct bnxt_re_context *cntx;
+
+ cntx = to_bnxt_re_context(ibvctx);
/* Unmap if anything device specific was mapped in init_context. */
+ pthread_spin_destroy(&cntx->fqlock);
}
static struct verbs_device *bnxt_re_driver_init(const char *uverbs_sys_path,
@@ -49,6 +49,7 @@
#include "abi.h"
#include "memory.h"
+#include "list.h"
#define DEV "bnxtre : "
@@ -68,6 +69,8 @@ struct bnxt_re_cq {
uint32_t cqid;
struct bnxt_re_queue cqq;
struct bnxt_re_dpi *udpi;
+ struct bnxt_re_list_head sfhead;
+ struct bnxt_re_list_head rfhead;
uint32_t cqe_size;
uint8_t phase;
};
@@ -103,6 +106,8 @@ struct bnxt_re_qp {
struct bnxt_re_cq *rcq;
struct bnxt_re_dpi *udpi;
struct bnxt_re_qpcap cap;
+ struct bnxt_re_list_node snode;
+ struct bnxt_re_list_node rnode;
uint32_t qpid;
uint32_t tbl_indx;
uint32_t sq_psn;
@@ -132,6 +137,7 @@ struct bnxt_re_context {
uint32_t max_qp;
uint32_t max_srq;
struct bnxt_re_dpi udpi;
+ pthread_spinlock_t fqlock;
};
/* DB ring functions used internally*/
@@ -89,6 +89,11 @@ static inline uint32_t bnxt_re_is_que_full(struct bnxt_re_queue *que)
return (((que->tail + 1) & (que->depth - 1)) == que->head);
}
+static inline uint32_t bnxt_re_is_que_empty(struct bnxt_re_queue *que)
+{
+ return que->tail == que->head;
+}
+
static inline uint32_t bnxt_re_incr(uint32_t val, uint32_t max)
{
return (++val & (max - 1));
@@ -206,6 +206,9 @@ struct ibv_cq *bnxt_re_create_cq(struct ibv_context *ibvctx, int ncqe,
cq->cqq.tail = resp.tail;
cq->udpi = &cntx->udpi;
+ INIT_DBLY_LIST_HEAD(&cq->sfhead);
+ INIT_DBLY_LIST_HEAD(&cq->rfhead);
+
return &cq->ibvcq;
cmdfail:
bnxt_re_free_aligned(&cq->cqq);
@@ -234,6 +237,46 @@ int bnxt_re_destroy_cq(struct ibv_cq *ibvcq)
return 0;
}
+static uint8_t bnxt_re_poll_err_scqe(struct bnxt_re_qp *qp,
+ struct ibv_wc *ibvwc,
+ struct bnxt_re_bcqe *hdr,
+ struct bnxt_re_req_cqe *scqe, int *cnt)
+{
+ struct bnxt_re_queue *sq = qp->sqq;
+ struct bnxt_re_context *cntx;
+ struct bnxt_re_wrid *swrid;
+ struct bnxt_re_psns *spsn;
+ struct bnxt_re_cq *scq;
+ uint32_t head = sq->head;
+ uint8_t status;
+
+ scq = to_bnxt_re_cq(qp->ibvqp.send_cq);
+ cntx = to_bnxt_re_context(scq->ibvcq.context);
+ swrid = &qp->swrid[head];
+ spsn = swrid->psns;
+
+ *cnt = 1;
+ status = (hdr->flg_st_typ_ph >> BNXT_RE_BCQE_STATUS_SHIFT) &
+ BNXT_RE_BCQE_STATUS_MASK;
+ ibvwc->status = bnxt_re_to_ibv_wc_status(status, true);
+ ibvwc->wc_flags = 0;
+ ibvwc->wr_id = swrid->wrid;
+ ibvwc->qp_num = qp->qpid;
+ ibvwc->opcode = (spsn->opc_spsn >> BNXT_RE_PSNS_OPCD_SHIFT) &
+ BNXT_RE_PSNS_OPCD_MASK;
+ ibvwc->byte_len = 0;
+
+ bnxt_re_incr_head(qp->sqq);
+
+ if (qp->qpst != IBV_QPS_ERR)
+ qp->qpst = IBV_QPS_ERR;
+ pthread_spin_lock(&cntx->fqlock);
+ bnxt_re_list_add_node(&qp->snode, &scq->sfhead);
+ pthread_spin_unlock(&cntx->fqlock);
+
+ return false;
+}
+
static uint8_t bnxt_re_poll_success_scqe(struct bnxt_re_qp *qp,
struct ibv_wc *ibvwc,
struct bnxt_re_bcqe *hdr,
@@ -287,21 +330,53 @@ static uint8_t bnxt_re_poll_scqe(struct bnxt_re_qp *qp, struct ibv_wc *ibvwc,
status = (hdr->flg_st_typ_ph >> BNXT_RE_BCQE_STATUS_SHIFT) &
BNXT_RE_BCQE_STATUS_MASK;
- if (status == BNXT_RE_REQ_ST_OK) {
+ if (status == BNXT_RE_REQ_ST_OK)
pcqe = bnxt_re_poll_success_scqe(qp, ibvwc, hdr, scqe, cnt);
- } else {
- /* TODO: Handle error completion properly. */
- fprintf(stderr, "%s(): swc with error, vendor status = %d\n",
- __func__, status);
- *cnt = 1;
- ibvwc->status = bnxt_re_to_ibv_wc_status(status, true);
- ibvwc->wr_id = qp->swrid[qp->sqq->head].wrid;
- bnxt_re_incr_head(qp->sqq);
- }
+ else
+ pcqe = bnxt_re_poll_err_scqe(qp, ibvwc, hdr, scqe, cnt);
return pcqe;
}
+static int bnxt_re_poll_err_rcqe(struct bnxt_re_qp *qp,
+ struct ibv_wc *ibvwc,
+ struct bnxt_re_bcqe *hdr,
+ struct bnxt_re_rc_cqe *rcqe)
+{
+ struct bnxt_re_queue *rq = qp->rqq;
+ struct bnxt_re_wrid *rwrid;
+ struct bnxt_re_cq *rcq;
+ struct bnxt_re_context *cntx;
+ uint32_t head = rq->head;
+ uint8_t status;
+
+ rcq = to_bnxt_re_cq(qp->ibvqp.recv_cq);
+ cntx = to_bnxt_re_context(rcq->ibvcq.context);
+
+ rwrid = &qp->rwrid[head];
+ status = (hdr->flg_st_typ_ph >> BNXT_RE_BCQE_STATUS_SHIFT) &
+ BNXT_RE_BCQE_STATUS_MASK;
+ /* skip h/w flush errors */
+ if (status == BNXT_RE_RSP_ST_HW_FLUSH)
+ return 0;
+ ibvwc->status = bnxt_re_to_ibv_wc_status(status, false);
+ /* TODO: Add SRQ Processing here */
+ if (qp->rqq) {
+ ibvwc->wr_id = rwrid->wrid;
+ ibvwc->qp_num = qp->qpid;
+ ibvwc->opcode = IBV_WC_RECV;
+ ibvwc->byte_len = 0;
+ bnxt_re_incr_head(qp->rqq);
+ if (qp->qpst != IBV_QPS_ERR)
+ qp->qpst = IBV_QPS_ERR;
+ pthread_spin_lock(&cntx->fqlock);
+ bnxt_re_list_add_node(&qp->rnode, &rcq->rfhead);
+ pthread_spin_unlock(&cntx->fqlock);
+ }
+
+ return 1;
+}
+
static void bnxt_re_poll_success_rcqe(struct bnxt_re_qp *qp,
struct ibv_wc *ibvwc,
struct bnxt_re_bcqe *hdr,
@@ -349,18 +424,37 @@ static uint8_t bnxt_re_poll_rcqe(struct bnxt_re_qp *qp, struct ibv_wc *ibvwc,
status = (hdr->flg_st_typ_ph >> BNXT_RE_BCQE_STATUS_SHIFT) &
BNXT_RE_BCQE_STATUS_MASK;
- if (status == BNXT_RE_RSP_ST_OK) {
+ *cnt = 1;
+ if (status == BNXT_RE_RSP_ST_OK)
bnxt_re_poll_success_rcqe(qp, ibvwc, hdr, rcqe);
- *cnt = 1;
- } else {
- /* TODO: Process error completions properly.*/
- *cnt = 1;
- ibvwc->status = bnxt_re_to_ibv_wc_status(status, false);
- if (qp->rqq) {
- ibvwc->wr_id = qp->rwrid[qp->rqq->head].wrid;
- bnxt_re_incr_head(qp->rqq);
- }
- }
+ else
+ *cnt = bnxt_re_poll_err_rcqe(qp, ibvwc, hdr, rcqe);
+
+ return pcqe;
+}
+
+static uint8_t bnxt_re_poll_term_cqe(struct bnxt_re_qp *qp,
+ struct ibv_wc *ibvwc, void *cqe, int *cnt)
+{
+ struct bnxt_re_context *cntx;
+ struct bnxt_re_cq *scq, *rcq;
+ uint8_t pcqe = false;
+
+ scq = to_bnxt_re_cq(qp->ibvqp.send_cq);
+ rcq = to_bnxt_re_cq(qp->ibvqp.recv_cq);
+ cntx = to_bnxt_re_context(scq->ibvcq.context);
+ /* For now just add the QP to flush list without
+ * considering the index reported in the CQE.
+ * Continue reporting flush completions until the
+ * SQ and RQ are empty.
+ */
+ *cnt = 0;
+ if (qp->qpst != IBV_QPS_ERR)
+ qp->qpst = IBV_QPS_ERR;
+ pthread_spin_lock(&cntx->fqlock);
+ bnxt_re_list_add_node(&qp->rnode, &rcq->rfhead);
+ bnxt_re_list_add_node(&qp->snode, &scq->sfhead);
+ pthread_spin_unlock(&cntx->fqlock);
return pcqe;
}
@@ -413,6 +507,12 @@ static int bnxt_re_poll_one(struct bnxt_re_cq *cq, int nwc, struct ibv_wc *wc)
case BNXT_RE_WC_TYPE_RECV_RAW:
break;
case BNXT_RE_WC_TYPE_TERM:
+ scqe = cqe;
+ qp_handle = (uint64_t *)&scqe->qp_handle;
+ qp = (struct bnxt_re_qp *)scqe->qp_handle;
+ if (!qp)
+ break;
+ pcqe = bnxt_re_poll_term_cqe(qp, wc, cqe, &cnt);
break;
case BNXT_RE_WC_TYPE_COFF:
break;
@@ -445,22 +545,107 @@ bail:
return dqed;
}
+static int bnxt_re_poll_flush_wcs(struct bnxt_re_queue *que,
+ struct bnxt_re_wrid *wridp,
+ struct ibv_wc *ibvwc, uint32_t qpid,
+ int nwc)
+{
+ struct bnxt_re_wrid *wrid;
+ struct bnxt_re_psns *psns;
+ uint32_t cnt = 0, head;
+ uint8_t opcode = IBV_WC_RECV;
+
+ while (nwc) {
+ if (bnxt_re_is_que_empty(que))
+ break;
+ head = que->head;
+ wrid = &wridp[head];
+ if (wrid->psns) {
+ psns = wrid->psns;
+ opcode = (psns->opc_spsn >> BNXT_RE_PSNS_OPCD_SHIFT) &
+ BNXT_RE_PSNS_OPCD_MASK;
+ }
+
+ ibvwc->status = IBV_WC_WR_FLUSH_ERR;
+ ibvwc->opcode = opcode;
+ ibvwc->wr_id = wrid->wrid;
+ ibvwc->qp_num = qpid;
+ ibvwc->byte_len = 0;
+ ibvwc->wc_flags = 0;
+
+ bnxt_re_incr_head(que);
+ nwc--;
+ cnt++;
+ ibvwc++;
+ }
+
+ return cnt;
+}
+
+static int bnxt_re_poll_flush_lists(struct bnxt_re_cq *cq, uint32_t nwc,
+ struct ibv_wc *ibvwc)
+{
+ struct bnxt_re_list_node *cur, *tmp;
+ struct bnxt_re_qp *qp;
+ struct bnxt_re_queue *que;
+ int dqed = 0, left;
+
+ /* Check if flush Qs are empty */
+ if (list_empty(&cq->sfhead) && list_empty(&cq->rfhead))
+ return 0;
+
+ if (!list_empty(&cq->sfhead)) {
+ list_for_each_node_safe(cur, tmp, &cq->sfhead) {
+ qp = list_node(cur, struct bnxt_re_qp, snode);
+ que = qp->sqq;
+ if (bnxt_re_is_que_empty(que))
+ continue;
+ dqed = bnxt_re_poll_flush_wcs(que, qp->swrid, ibvwc,
+ qp->qpid, nwc);
+ }
+ }
+
+ left = nwc - dqed;
+ if (!left)
+ return dqed;
+
+ if (!list_empty(&cq->rfhead)) {
+ list_for_each_node_safe(cur, tmp, &cq->rfhead) {
+ qp = list_node(cur, struct bnxt_re_qp, rnode);
+ que = qp->rqq;
+ if (!que || bnxt_re_is_que_empty(que))
+ continue;
+ dqed += bnxt_re_poll_flush_wcs(que, qp->rwrid,
+ ibvwc + dqed, qp->qpid,
+ left);
+ }
+ }
+
+ return dqed;
+}
+
int bnxt_re_poll_cq(struct ibv_cq *ibvcq, int nwc, struct ibv_wc *wc)
{
struct bnxt_re_cq *cq = to_bnxt_re_cq(ibvcq);
- int dqed;
+ struct bnxt_re_context *cntx = to_bnxt_re_context(ibvcq->context);
+ int dqed, left = 0;
pthread_spin_lock(&cq->cqq.qlock);
dqed = bnxt_re_poll_one(cq, nwc, wc);
pthread_spin_unlock(&cq->cqq.qlock);
-
- /* TODO: Flush Management*/
+ /* Check if anything is there to flush. */
+ pthread_spin_lock(&cntx->fqlock);
+ left = nwc - dqed;
+ if (left)
+ dqed += bnxt_re_poll_flush_lists(cq, left, (wc + dqed));
+ pthread_spin_unlock(&cntx->fqlock);
return dqed;
}
static void bnxt_re_cleanup_cq(struct bnxt_re_qp *qp, struct bnxt_re_cq *cq)
{
+ struct bnxt_re_context *cntx;
struct bnxt_re_queue *que = &cq->cqq;
struct bnxt_re_bcqe *hdr;
struct bnxt_re_req_cqe *scqe;
@@ -468,6 +653,8 @@ static void bnxt_re_cleanup_cq(struct bnxt_re_qp *qp, struct bnxt_re_cq *cq)
void *cqe;
int indx, type;
+ cntx = to_bnxt_re_context(cq->ibvcq.context);
+
pthread_spin_lock(&que->qlock);
for (indx = 0; indx < que->depth; indx++) {
cqe = que->va + indx * bnxt_re_get_cqe_sz();
@@ -490,6 +677,11 @@ static void bnxt_re_cleanup_cq(struct bnxt_re_qp *qp, struct bnxt_re_cq *cq)
}
pthread_spin_unlock(&que->qlock);
+
+ pthread_spin_lock(&cntx->fqlock);
+ bnxt_re_list_del_node(&qp->snode, &cq->sfhead);
+ bnxt_re_list_del_node(&qp->rnode, &cq->rfhead);
+ pthread_spin_unlock(&cntx->fqlock);
}
void bnxt_re_cq_event(struct ibv_cq *ibvcq)
@@ -679,6 +871,8 @@ struct ibv_qp *bnxt_re_create_qp(struct ibv_pd *ibvpd,
cap->max_rsge = attr->cap.max_recv_sge;
cap->max_inline = attr->cap.max_inline_data;
cap->sqsig = attr->sq_sig_all;
+ INIT_DBLY_LIST_NODE(&qp->snode);
+ INIT_DBLY_LIST_NODE(&qp->rnode);
return &qp->ibvqp;
failcmd: