Message ID | 20190526114156.6827-11-bmt@zurich.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | SIW: Software iWarp RDMA (siw) driver | expand |
On Sun, May 26, 2019 at 01:41:54PM +0200, Bernard Metzler wrote: > Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> > --- > drivers/infiniband/sw/siw/siw_cq.c | 109 +++++++++++++++++++++++++++++ > 1 file changed, 109 insertions(+) > create mode 100644 drivers/infiniband/sw/siw/siw_cq.c > > diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c > new file mode 100644 > index 000000000000..e776e1b9273c > --- /dev/null > +++ b/drivers/infiniband/sw/siw/siw_cq.c > @@ -0,0 +1,109 @@ > +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause > + > +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ > +/* Copyright (c) 2008-2019, IBM Corporation */ > + > +#include <linux/errno.h> > +#include <linux/types.h> > +#include <linux/list.h> > + > +#include <rdma/iw_cm.h> > +#include <rdma/ib_verbs.h> > +#include <rdma/ib_smi.h> > +#include <rdma/ib_user_verbs.h> > + > +#include "siw.h" > +#include "siw_cm.h" > +#include "siw_debug.h" > + > +static int map_wc_opcode[SIW_NUM_OPCODES] = { > + [SIW_OP_WRITE] = IB_WC_RDMA_WRITE, > + [SIW_OP_SEND] = IB_WC_SEND, > + [SIW_OP_SEND_WITH_IMM] = IB_WC_SEND, > + [SIW_OP_READ] = IB_WC_RDMA_READ, > + [SIW_OP_READ_LOCAL_INV] = IB_WC_RDMA_READ, > + [SIW_OP_COMP_AND_SWAP] = IB_WC_COMP_SWAP, > + [SIW_OP_FETCH_AND_ADD] = IB_WC_FETCH_ADD, > + [SIW_OP_INVAL_STAG] = IB_WC_LOCAL_INV, > + [SIW_OP_REG_MR] = IB_WC_REG_MR, > + [SIW_OP_RECEIVE] = IB_WC_RECV, > + [SIW_OP_READ_RESPONSE] = -1 /* not used */ > +}; > + > +static struct { > + enum siw_opcode siw; > + enum ib_wc_status ib; > +} map_cqe_status[SIW_NUM_WC_STATUS] = { > + { SIW_WC_SUCCESS, IB_WC_SUCCESS }, > + { SIW_WC_LOC_LEN_ERR, IB_WC_LOC_LEN_ERR }, > + { SIW_WC_LOC_PROT_ERR, IB_WC_LOC_PROT_ERR }, > + { SIW_WC_LOC_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR }, > + { SIW_WC_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR }, > + { SIW_WC_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR }, > + { SIW_WC_LOC_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR }, > + { SIW_WC_REM_ACCESS_ERR, IB_WC_REM_ACCESS_ERR }, > + { SIW_WC_REM_INV_REQ_ERR, IB_WC_REM_INV_REQ_ERR }, > + { SIW_WC_GENERAL_ERR, IB_WC_GENERAL_ERR } > +}; > + > +/* > + * Reap one CQE from the CQ. Only used by kernel clients > + * during CQ normal operation. Might be called during CQ > + * flush for user mapped CQE array as well. > + */ > +int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) > +{ > + struct siw_cqe *cqe; > + unsigned long flags; > + > + spin_lock_irqsave(&cq->lock, flags); > + > + cqe = &cq->queue[cq->cq_get % cq->num_cqe]; Safer to ensure that cq_get is always in range, instead of performing modulo for accesses. > + if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) { > + memset(wc, 0, sizeof(*wc)); > + wc->wr_id = cqe->id; > + wc->status = map_cqe_status[cqe->status].ib; > + wc->opcode = map_wc_opcode[cqe->opcode]; > + wc->byte_len = cqe->bytes; > + > + /* > + * During CQ flush, also user land CQE's may get > + * reaped here, which do not hold a QP reference > + * and do not qualify for memory extension verbs. > + */ > + if (likely(cq->kernel_verbs)) { Let's try to find a way without "kernel_verbs" variable. > + if (cqe->flags & SIW_WQE_REM_INVAL) { > + wc->ex.invalidate_rkey = cqe->inval_stag; > + wc->wc_flags = IB_WC_WITH_INVALIDATE; > + } > + wc->qp = cqe->base_qp; > + siw_dbg_cq(cq, "idx %u, type %d, flags %2x, id 0x%p\n", > + cq->cq_get % cq->num_cqe, cqe->opcode, > + cqe->flags, (void *)cqe->id); > + } > + WRITE_ONCE(cqe->flags, 0); > + cq->cq_get++; > + > + spin_unlock_irqrestore(&cq->lock, flags); > + > + return 1; > + } > + spin_unlock_irqrestore(&cq->lock, flags); > + > + return 0; > +} > + > +/* > + * siw_cq_flush() > + * > + * Flush all CQ elements. > + */ > +void siw_cq_flush(struct siw_cq *cq) > +{ > + struct ib_wc wc; > + > + siw_dbg_cq(cq, "enter\n"); No "enter" prints, please. > + > + while (siw_reap_cqe(cq, &wc)) > + ; > +} > -- > 2.17.2 >
-----"Leon Romanovsky" <leon@kernel.org> wrote: ----- >To: "Bernard Metzler" <bmt@zurich.ibm.com> >From: "Leon Romanovsky" <leon@kernel.org> >Date: 06/10/2019 09:31AM >Cc: linux-rdma@vger.kernel.org >Subject: [EXTERNAL] Re: [PATCH for-next v1 10/12] SIW completion >queue methods > >On Sun, May 26, 2019 at 01:41:54PM +0200, Bernard Metzler wrote: >> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> >> --- >> drivers/infiniband/sw/siw/siw_cq.c | 109 >+++++++++++++++++++++++++++++ >> 1 file changed, 109 insertions(+) >> create mode 100644 drivers/infiniband/sw/siw/siw_cq.c >> >> diff --git a/drivers/infiniband/sw/siw/siw_cq.c >b/drivers/infiniband/sw/siw/siw_cq.c >> new file mode 100644 >> index 000000000000..e776e1b9273c >> --- /dev/null >> +++ b/drivers/infiniband/sw/siw/siw_cq.c >> @@ -0,0 +1,109 @@ >> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause >> + >> +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ >> +/* Copyright (c) 2008-2019, IBM Corporation */ >> + >> +#include <linux/errno.h> >> +#include <linux/types.h> >> +#include <linux/list.h> >> + >> +#include <rdma/iw_cm.h> >> +#include <rdma/ib_verbs.h> >> +#include <rdma/ib_smi.h> >> +#include <rdma/ib_user_verbs.h> >> + >> +#include "siw.h" >> +#include "siw_cm.h" >> +#include "siw_debug.h" >> + >> +static int map_wc_opcode[SIW_NUM_OPCODES] = { >> + [SIW_OP_WRITE] = IB_WC_RDMA_WRITE, >> + [SIW_OP_SEND] = IB_WC_SEND, >> + [SIW_OP_SEND_WITH_IMM] = IB_WC_SEND, >> + [SIW_OP_READ] = IB_WC_RDMA_READ, >> + [SIW_OP_READ_LOCAL_INV] = IB_WC_RDMA_READ, >> + [SIW_OP_COMP_AND_SWAP] = IB_WC_COMP_SWAP, >> + [SIW_OP_FETCH_AND_ADD] = IB_WC_FETCH_ADD, >> + [SIW_OP_INVAL_STAG] = IB_WC_LOCAL_INV, >> + [SIW_OP_REG_MR] = IB_WC_REG_MR, >> + [SIW_OP_RECEIVE] = IB_WC_RECV, >> + [SIW_OP_READ_RESPONSE] = -1 /* not used */ >> +}; >> + >> +static struct { >> + enum siw_opcode siw; >> + enum ib_wc_status ib; >> +} map_cqe_status[SIW_NUM_WC_STATUS] = { >> + { SIW_WC_SUCCESS, IB_WC_SUCCESS }, >> + { SIW_WC_LOC_LEN_ERR, IB_WC_LOC_LEN_ERR }, >> + { SIW_WC_LOC_PROT_ERR, IB_WC_LOC_PROT_ERR }, >> + { SIW_WC_LOC_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR }, >> + { SIW_WC_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR }, >> + { SIW_WC_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR }, >> + { SIW_WC_LOC_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR }, >> + { SIW_WC_REM_ACCESS_ERR, IB_WC_REM_ACCESS_ERR }, >> + { SIW_WC_REM_INV_REQ_ERR, IB_WC_REM_INV_REQ_ERR }, >> + { SIW_WC_GENERAL_ERR, IB_WC_GENERAL_ERR } >> +}; >> + >> +/* >> + * Reap one CQE from the CQ. Only used by kernel clients >> + * during CQ normal operation. Might be called during CQ >> + * flush for user mapped CQE array as well. >> + */ >> +int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) >> +{ >> + struct siw_cqe *cqe; >> + unsigned long flags; >> + >> + spin_lock_irqsave(&cq->lock, flags); >> + >> + cqe = &cq->queue[cq->cq_get % cq->num_cqe]; > >Safer to ensure that cq_get is always in range, instead of performing >modulo for accesses. cg_get is a free running uint32_t, num_cqe is rounded up to the next power of two. So I never have to check the range. I should make a comment to make it readable. > >> + if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) { >> + memset(wc, 0, sizeof(*wc)); >> + wc->wr_id = cqe->id; >> + wc->status = map_cqe_status[cqe->status].ib; >> + wc->opcode = map_wc_opcode[cqe->opcode]; >> + wc->byte_len = cqe->bytes; >> + >> + /* >> + * During CQ flush, also user land CQE's may get >> + * reaped here, which do not hold a QP reference >> + * and do not qualify for memory extension verbs. >> + */ >> + if (likely(cq->kernel_verbs)) { > >Let's try to find a way without "kernel_verbs" variable. > >> + if (cqe->flags & SIW_WQE_REM_INVAL) { >> + wc->ex.invalidate_rkey = cqe->inval_stag; >> + wc->wc_flags = IB_WC_WITH_INVALIDATE; >> + } >> + wc->qp = cqe->base_qp; >> + siw_dbg_cq(cq, "idx %u, type %d, flags %2x, id 0x%p\n", >> + cq->cq_get % cq->num_cqe, cqe->opcode, >> + cqe->flags, (void *)cqe->id); >> + } >> + WRITE_ONCE(cqe->flags, 0); >> + cq->cq_get++; >> + >> + spin_unlock_irqrestore(&cq->lock, flags); >> + >> + return 1; >> + } >> + spin_unlock_irqrestore(&cq->lock, flags); >> + >> + return 0; >> +} >> + >> +/* >> + * siw_cq_flush() >> + * >> + * Flush all CQ elements. >> + */ >> +void siw_cq_flush(struct siw_cq *cq) >> +{ >> + struct ib_wc wc; >> + >> + siw_dbg_cq(cq, "enter\n"); > >No "enter" prints, please. All gone ;) > >> + >> + while (siw_reap_cqe(cq, &wc)) >> + ; >> +} >> -- >> 2.17.2 >> > >
diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c new file mode 100644 index 000000000000..e776e1b9273c --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_cq.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/list.h> + +#include <rdma/iw_cm.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> + +#include "siw.h" +#include "siw_cm.h" +#include "siw_debug.h" + +static int map_wc_opcode[SIW_NUM_OPCODES] = { + [SIW_OP_WRITE] = IB_WC_RDMA_WRITE, + [SIW_OP_SEND] = IB_WC_SEND, + [SIW_OP_SEND_WITH_IMM] = IB_WC_SEND, + [SIW_OP_READ] = IB_WC_RDMA_READ, + [SIW_OP_READ_LOCAL_INV] = IB_WC_RDMA_READ, + [SIW_OP_COMP_AND_SWAP] = IB_WC_COMP_SWAP, + [SIW_OP_FETCH_AND_ADD] = IB_WC_FETCH_ADD, + [SIW_OP_INVAL_STAG] = IB_WC_LOCAL_INV, + [SIW_OP_REG_MR] = IB_WC_REG_MR, + [SIW_OP_RECEIVE] = IB_WC_RECV, + [SIW_OP_READ_RESPONSE] = -1 /* not used */ +}; + +static struct { + enum siw_opcode siw; + enum ib_wc_status ib; +} map_cqe_status[SIW_NUM_WC_STATUS] = { + { SIW_WC_SUCCESS, IB_WC_SUCCESS }, + { SIW_WC_LOC_LEN_ERR, IB_WC_LOC_LEN_ERR }, + { SIW_WC_LOC_PROT_ERR, IB_WC_LOC_PROT_ERR }, + { SIW_WC_LOC_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR }, + { SIW_WC_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR }, + { SIW_WC_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR }, + { SIW_WC_LOC_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR }, + { SIW_WC_REM_ACCESS_ERR, IB_WC_REM_ACCESS_ERR }, + { SIW_WC_REM_INV_REQ_ERR, IB_WC_REM_INV_REQ_ERR }, + { SIW_WC_GENERAL_ERR, IB_WC_GENERAL_ERR } +}; + +/* + * Reap one CQE from the CQ. Only used by kernel clients + * during CQ normal operation. Might be called during CQ + * flush for user mapped CQE array as well. + */ +int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) +{ + struct siw_cqe *cqe; + unsigned long flags; + + spin_lock_irqsave(&cq->lock, flags); + + cqe = &cq->queue[cq->cq_get % cq->num_cqe]; + if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) { + memset(wc, 0, sizeof(*wc)); + wc->wr_id = cqe->id; + wc->status = map_cqe_status[cqe->status].ib; + wc->opcode = map_wc_opcode[cqe->opcode]; + wc->byte_len = cqe->bytes; + + /* + * During CQ flush, also user land CQE's may get + * reaped here, which do not hold a QP reference + * and do not qualify for memory extension verbs. + */ + if (likely(cq->kernel_verbs)) { + if (cqe->flags & SIW_WQE_REM_INVAL) { + wc->ex.invalidate_rkey = cqe->inval_stag; + wc->wc_flags = IB_WC_WITH_INVALIDATE; + } + wc->qp = cqe->base_qp; + siw_dbg_cq(cq, "idx %u, type %d, flags %2x, id 0x%p\n", + cq->cq_get % cq->num_cqe, cqe->opcode, + cqe->flags, (void *)cqe->id); + } + WRITE_ONCE(cqe->flags, 0); + cq->cq_get++; + + spin_unlock_irqrestore(&cq->lock, flags); + + return 1; + } + spin_unlock_irqrestore(&cq->lock, flags); + + return 0; +} + +/* + * siw_cq_flush() + * + * Flush all CQ elements. + */ +void siw_cq_flush(struct siw_cq *cq) +{ + struct ib_wc wc; + + siw_dbg_cq(cq, "enter\n"); + + while (siw_reap_cqe(cq, &wc)) + ; +}
Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> --- drivers/infiniband/sw/siw/siw_cq.c | 109 +++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 drivers/infiniband/sw/siw/siw_cq.c