@@ -2041,7 +2041,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
u32 hdrsize = packet->hlen;
u32 psn = ib_bth_get_psn(packet->ohdr);
u32 pad = packet->pad;
- struct ib_wc wc;
+ struct rvt_wc wc;
u32 pmtu = qp->pmtu;
int diff;
struct ib_reth *reth;
@@ -60,7 +60,7 @@
static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
{
int i, j, ret;
- struct ib_wc wc;
+ struct rvt_wc wc;
struct rvt_lkey_table *rkt;
struct rvt_pd *pd;
struct rvt_sge_state *ss;
@@ -323,7 +323,7 @@ static void ruc_loopback(struct rvt_qp *sqp)
struct rvt_swqe *wqe;
struct rvt_sge *sge;
unsigned long flags;
- struct ib_wc wc;
+ struct rvt_wc wc;
u64 sdata;
atomic64_t *maddr;
enum ib_wc_status send_status;
@@ -312,7 +312,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
u32 hdrsize = packet->hlen;
u32 psn;
u32 pad = packet->pad;
- struct ib_wc wc;
+ struct rvt_wc wc;
u32 pmtu = qp->pmtu;
struct ib_reth *reth;
int ret;
@@ -79,7 +79,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
unsigned long flags;
struct rvt_sge_state ssge;
struct rvt_sge *sge;
- struct ib_wc wc;
+ struct rvt_wc wc;
u32 length;
enum ib_qp_type sqptype, dqptype;
@@ -847,7 +847,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
{
struct ib_other_headers *ohdr = packet->ohdr;
u32 hdrsize = packet->hlen;
- struct ib_wc wc;
+ struct rvt_wc wc;
u32 qkey;
u32 src_qp;
u16 pkey;
@@ -1744,7 +1744,7 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
u32 hdrsize;
u32 psn;
u32 pad;
- struct ib_wc wc;
+ struct rvt_wc wc;
u32 pmtu = qp->pmtu;
int diff;
struct ib_reth *reth;
@@ -44,7 +44,7 @@
static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
{
int i, j, ret;
- struct ib_wc wc;
+ struct rvt_wc wc;
struct rvt_lkey_table *rkt;
struct rvt_pd *pd;
struct rvt_sge_state *ss;
@@ -341,7 +341,7 @@ static void qib_ruc_loopback(struct rvt_qp *sqp)
struct rvt_swqe *wqe;
struct rvt_sge *sge;
unsigned long flags;
- struct ib_wc wc;
+ struct rvt_wc wc;
u64 sdata;
atomic64_t *maddr;
enum ib_wc_status send_status;
@@ -242,7 +242,7 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
u32 hdrsize;
u32 psn;
u32 pad;
- struct ib_wc wc;
+ struct rvt_wc wc;
u32 pmtu = qp->pmtu;
struct ib_reth *reth;
int ret;
@@ -58,7 +58,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
unsigned long flags;
struct rvt_sge_state ssge;
struct rvt_sge *sge;
- struct ib_wc wc;
+ struct rvt_wc wc;
u32 length;
enum ib_qp_type sqptype, dqptype;
@@ -434,7 +434,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int opcode;
u32 hdrsize;
u32 pad;
- struct ib_wc wc;
+ struct rvt_wc wc;
u32 qkey;
u32 src_qp;
u16 dlid;
@@ -60,7 +60,7 @@
*
* This may be called with qp->s_lock held.
*/
-void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
+void rvt_cq_enter(struct rvt_cq *cq, struct rvt_wc *entry, bool solicited)
{
struct rvt_cq_wc *wc;
unsigned long flags;
@@ -95,7 +95,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
return;
}
trace_rvt_cq_enter(cq, entry, head);
- if (cq->ip) {
+ if (!cq->kqueue) {
wc->uqueue[head].wr_id = entry->wr_id;
wc->uqueue[head].status = entry->status;
wc->uqueue[head].opcode = entry->opcode;
@@ -113,7 +113,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
/* Make sure entry is written before the head index. */
smp_wmb();
} else {
- wc->kqueue[head] = *entry;
+ cq->kqueue[head] = *entry;
}
wc->head = next;
@@ -201,33 +201,27 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
if (!cq)
return ERR_PTR(-ENOMEM);
- /*
- * Allocate the completion queue entries and head/tail pointers.
- * This is allocated separately so that it can be resized and
- * also mapped into user space.
- * We need to use vmalloc() in order to support mmap and large
- * numbers of entries.
- */
- sz = sizeof(*wc);
- if (udata && udata->outlen >= sizeof(__u64))
- sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
- else
- sz += sizeof(struct ib_wc) * (entries + 1);
- wc = udata ?
- vmalloc_user(sz) :
- vzalloc_node(sz, rdi->dparms.node);
- if (!wc) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_cq;
- }
-
- /*
- * Return the address of the WC as the offset to mmap.
- * See rvt_mmap() for details.
- */
if (udata && udata->outlen >= sizeof(__u64)) {
int err;
+ /*
+ * Allocate the user completion queue entries and head/tail
+ * pointers. This is allocated separately so that it can be
+ * resized and also mapped into user space.
+ * We need to use vmalloc() in order to support mmap and large
+ * numbers of entries.
+ */
+ sz = sizeof(*wc) + sizeof(struct ib_uverbs_wc) * (entries + 1);
+ wc = vmalloc_user(sz);
+ if (!wc) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_cq;
+ }
+
+ /*
+ * Return the address of the WC as the offset to mmap.
+ * See rvt_mmap() for details.
+ */
cq->ip = rvt_create_mmap_info(rdi, sz, context, wc);
if (!cq->ip) {
ret = ERR_PTR(-ENOMEM);
@@ -240,6 +234,24 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
ret = ERR_PTR(err);
goto bail_ip;
}
+ } else {
+ /*
+ * Allocate head and tail pointers for kernel completion
+ * queue.
+ */
+ wc = vzalloc_node(sizeof(*wc), rdi->dparms.node);
+ if (!wc) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_cq;
+ }
+
+ /* Allocate the kernel completion queue entries */
+ sz = sizeof(struct rvt_wc) * (entries + 1);
+ cq->kqueue = vzalloc_node(sz, rdi->dparms.node);
+ if (!cq->kqueue) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_wc;
+ }
}
spin_lock_irq(&rdi->n_cqs_lock);
@@ -275,6 +287,7 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
goto done;
bail_ip:
+ vfree(cq->kqueue);
kfree(cq->ip);
bail_wc:
vfree(wc);
@@ -305,6 +318,7 @@ int rvt_destroy_cq(struct ib_cq *ibcq)
kref_put(&cq->ip->ref, rvt_release_mmap_info);
else
vfree(cq->queue);
+ vfree(cq->kqueue);
kfree(cq);
return 0;
@@ -352,11 +366,13 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
- struct rvt_cq_wc *old_wc;
- struct rvt_cq_wc *wc;
+ struct rvt_cq_wc *old_wc = NULL;
+ struct rvt_cq_wc *wc = NULL;
+ struct rvt_wc *old_kqueue = NULL;
u32 head, tail, n;
int ret;
u32 sz;
+ struct rvt_wc *kqueue = NULL;
struct rvt_dev_info *rdi = cq->rdi;
if (cqe < 1 || cqe > rdi->dparms.props.max_cqe)
@@ -365,16 +381,17 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
- sz = sizeof(*wc);
- if (udata && udata->outlen >= sizeof(__u64))
- sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
- else
- sz += sizeof(struct ib_wc) * (cqe + 1);
- wc = udata ?
- vmalloc_user(sz) :
- vzalloc_node(sz, rdi->dparms.node);
- if (!wc)
- return -ENOMEM;
+ if (!cq->kqueue) {
+ sz = sizeof(*wc) + sizeof(struct ib_uverbs_wc) * (cqe + 1);
+ wc = vmalloc_user(sz);
+ if (!wc)
+ return -ENOMEM;
+ } else {
+ sz = sizeof(struct rvt_wc) * (cqe + 1);
+ kqueue = vzalloc_node(sz, rdi->dparms.node);
+ if (!kqueue)
+ return -ENOMEM;
+ }
/* Check that we can write the offset to mmap. */
if (udata && udata->outlen >= sizeof(__u64)) {
@@ -390,11 +407,10 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
* Make sure head and tail are sane since they
* might be user writable.
*/
- old_wc = cq->queue;
- head = old_wc->head;
+ head = cq->queue->head;
if (head > (u32)cq->ibcq.cqe)
head = (u32)cq->ibcq.cqe;
- tail = old_wc->tail;
+ tail = cq->queue->tail;
if (tail > (u32)cq->ibcq.cqe)
tail = (u32)cq->ibcq.cqe;
if (head < tail)
@@ -406,21 +422,31 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
goto bail_unlock;
}
for (n = 0; tail != head; n++) {
- if (cq->ip)
- wc->uqueue[n] = old_wc->uqueue[tail];
+ if (!cq->kqueue)
+ wc->uqueue[n] = cq->queue->uqueue[tail];
else
- wc->kqueue[n] = old_wc->kqueue[tail];
+ kqueue[n] = cq->kqueue[tail];
if (tail == (u32)cq->ibcq.cqe)
tail = 0;
else
tail++;
}
cq->ibcq.cqe = cqe;
- wc->head = n;
- wc->tail = 0;
- cq->queue = wc;
+
+ /* A new work completion is only allocated for the user mode case */
+ if (wc) {
+ wc->head = n;
+ wc->tail = 0;
+
+ old_wc = cq->queue;
+ cq->queue = wc;
+ } else { /* kernel work completions were allocated */
+ old_kqueue = cq->kqueue;
+ cq->kqueue = kqueue;
+ }
spin_unlock_irq(&cq->lock);
+ vfree(old_kqueue);
vfree(old_wc);
if (cq->ip) {
@@ -454,6 +480,24 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
return ret;
}
+static void copy_rvt_wc_to_ib_wc(struct ib_wc *ibwc, struct rvt_wc *rvtwc)
+{
+ ibwc->wr_id = rvtwc->wr_id;
+ ibwc->status = rvtwc->status;
+ ibwc->opcode = rvtwc->opcode;
+ ibwc->vendor_err = rvtwc->vendor_err;
+ ibwc->byte_len = rvtwc->byte_len;
+ ibwc->qp = rvtwc->qp;
+ ibwc->ex.invalidate_rkey = rvtwc->ex.invalidate_rkey;
+ ibwc->src_qp = rvtwc->src_qp;
+ ibwc->wc_flags = rvtwc->wc_flags;
+ ibwc->slid = rvtwc->slid;
+ ibwc->pkey_index = rvtwc->pkey_index;
+ ibwc->sl = rvtwc->sl;
+ ibwc->dlid_path_bits = rvtwc->dlid_path_bits;
+ ibwc->port_num = rvtwc->port_num;
+}
+
/**
* rvt_poll_cq - poll for work completion entries
* @ibcq: the completion queue to poll
@@ -474,7 +518,7 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
u32 tail;
/* The kernel can only poll a kernel completion queue */
- if (cq->ip)
+ if (!cq->kqueue)
return -EINVAL;
spin_lock_irqsave(&cq->lock, flags);
@@ -487,8 +531,8 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
if (tail == wc->head)
break;
/* The kernel doesn't need a RMB since it has the lock. */
- trace_rvt_cq_poll(cq, &wc->kqueue[tail], npolled);
- *entry = wc->kqueue[tail];
+ trace_rvt_cq_poll(cq, &cq->kqueue[tail], npolled);
+ copy_rvt_wc_to_ib_wc(entry, &cq->kqueue[tail]);
if (tail >= cq->ibcq.cqe)
tail = 0;
else
@@ -1049,7 +1049,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
*/
int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
{
- struct ib_wc wc;
+ struct rvt_wc wc;
int ret = 0;
struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
@@ -1571,7 +1571,7 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
return -ENOMEM;
}
if (unlikely(qp_err_flush)) {
- struct ib_wc wc;
+ struct rvt_wc wc;
memset(&wc, 0, sizeof(wc));
wc.qp = &qp->ibqp;
@@ -76,7 +76,7 @@
DECLARE_EVENT_CLASS(
rvt_cq_entry_template,
- TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
+ TP_PROTO(struct rvt_cq *cq, struct rvt_wc *wc, u32 idx),
TP_ARGS(cq, wc, idx),
TP_STRUCT__entry(
RDI_DEV_ENTRY(cq->rdi)
@@ -110,12 +110,12 @@
DEFINE_EVENT(
rvt_cq_entry_template, rvt_cq_enter,
- TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
+ TP_PROTO(struct rvt_cq *cq, struct rvt_wc *wc, u32 idx),
TP_ARGS(cq, wc, idx));
DEFINE_EVENT(
rvt_cq_entry_template, rvt_cq_poll,
- TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
+ TP_PROTO(struct rvt_cq *cq, struct rvt_wc *wc, u32 idx),
TP_ARGS(cq, wc, idx));
#endif /* __RVT_TRACE_CQ_H */
@@ -61,6 +61,30 @@
#define RVT_CQ_NONE (IB_CQ_NEXT_COMP + 1)
/*
+ * If any fields within struct rvt_wc change, the function
+ * copy_rvt_wc_to_ib_wc() should be updated.
+ */
+struct rvt_wc {
+ u64 wr_id;
+ enum ib_wc_status status;
+ enum ib_wc_opcode opcode;
+ u32 vendor_err;
+ u32 byte_len;
+ struct ib_qp *qp;
+ union {
+ __be32 imm_data;
+ u32 invalidate_rkey;
+ } ex;
+ u32 src_qp;
+ int wc_flags;
+ u32 slid;
+ u16 pkey_index;
+ u8 sl;
+ u8 dlid_path_bits;
+ u8 port_num; /* valid only for DR SMPs onswitches*/
+} ____cacheline_aligned_in_smp;
+
+/*
* This structure is used to contain the head pointer, tail pointer,
* and completion queue entries as a single memory allocation so
* it can be mmap'ed into user space.
@@ -68,11 +92,8 @@
struct rvt_cq_wc {
u32 head; /* index of next entry to fill */
u32 tail; /* index of next ib_poll_cq() entry */
- union {
- /* these are actually size ibcq.cqe + 1 */
- struct ib_uverbs_wc uqueue[0];
- struct ib_wc kqueue[0];
- };
+ /* this is actually size ibcq.cqe + 1 */
+ struct ib_uverbs_wc uqueue[0];
};
/*
@@ -87,6 +108,8 @@ struct rvt_cq {
struct rvt_dev_info *rdi;
struct rvt_cq_wc *queue;
struct rvt_mmap_info *ip;
+ /* this is actually size ibcq.cqe + 1 */
+ struct rvt_wc *kqueue;
};
static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq)
@@ -94,6 +117,6 @@ struct rvt_cq {
return container_of(ibcq, struct rvt_cq, ibcq);
}
-void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited);
+void rvt_cq_enter(struct rvt_cq *cq, struct rvt_wc *entry, bool solicited);
#endif /* DEF_RDMAVT_INCCQH */
@@ -585,7 +585,7 @@ static inline void rvt_qp_swqe_complete(
if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED) ||
status != IB_WC_SUCCESS) {
- struct ib_wc wc;
+ struct rvt_wc wc;
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr.wr_id;