Message ID | 20190219100903.15408-12-bmt@zurich.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Jason Gunthorpe |
Headers | show |
Series | SIW: Request for Comments | expand |
On 19-Feb-19 12:09, Bernard Metzler wrote: > Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> > --- > drivers/infiniband/sw/siw/siw_cq.c | 149 +++++++++++++++++++++++++++++ > 1 file changed, 149 insertions(+) > create mode 100644 drivers/infiniband/sw/siw/siw_cq.c > > diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c > new file mode 100644 > index 000000000000..28c436f43597 > --- /dev/null > +++ b/drivers/infiniband/sw/siw/siw_cq.c > @@ -0,0 +1,149 @@ > +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause > +/* > + * Software iWARP device driver > + * > + * Authors: Bernard Metzler <bmt@zurich.ibm.com> > + * > + * Copyright (c) 2008-2018, IBM Corporation > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above copyright notice, > + * this list of conditions and the following disclaimer. > + * > + * - Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * - Neither the name of IBM nor the names of its contributors may be > + * used to endorse or promote products derived from this software without > + * specific prior written permission. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > + */ > + > +#include <linux/errno.h> > +#include <linux/types.h> > +#include <linux/list.h> > + > +#include <rdma/iw_cm.h> > +#include <rdma/ib_verbs.h> > +#include <rdma/ib_smi.h> > +#include <rdma/ib_user_verbs.h> > + > +#include "siw.h" > +#include "siw_obj.h" > +#include "siw_cm.h" > + > +static int map_wc_opcode[SIW_NUM_OPCODES] = { > + [SIW_OP_WRITE] = IB_WC_RDMA_WRITE, > + [SIW_OP_SEND] = IB_WC_SEND, > + [SIW_OP_SEND_WITH_IMM] = IB_WC_SEND, > + [SIW_OP_READ] = IB_WC_RDMA_READ, > + [SIW_OP_READ_LOCAL_INV] = IB_WC_RDMA_READ, > + [SIW_OP_COMP_AND_SWAP] = IB_WC_COMP_SWAP, > + [SIW_OP_FETCH_AND_ADD] = IB_WC_FETCH_ADD, > + [SIW_OP_INVAL_STAG] = IB_WC_LOCAL_INV, > + [SIW_OP_REG_MR] = IB_WC_REG_MR, > + [SIW_OP_RECEIVE] = IB_WC_RECV, > + [SIW_OP_READ_RESPONSE] = -1 /* not used */ > +}; > + > +static struct { > + enum siw_opcode siw; > + enum ib_wc_status ib; > +} map_cqe_status[SIW_NUM_WC_STATUS] = { > + {SIW_WC_SUCCESS, IB_WC_SUCCESS}, > + {SIW_WC_LOC_LEN_ERR, IB_WC_LOC_LEN_ERR}, > + {SIW_WC_LOC_PROT_ERR, IB_WC_LOC_PROT_ERR}, > + {SIW_WC_LOC_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR}, > + {SIW_WC_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR}, > + {SIW_WC_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR}, > + {SIW_WC_LOC_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR}, > + {SIW_WC_REM_ACCESS_ERR, IB_WC_REM_ACCESS_ERR}, > + {SIW_WC_REM_INV_REQ_ERR, IB_WC_REM_INV_REQ_ERR}, > + {SIW_WC_GENERAL_ERR, IB_WC_GENERAL_ERR} > +}; > + > +/* > + * Reap one CQE from the CQ. Only used by kernel clients > + * during CQ normal operation. Might be called during CQ > + * flush for user mapped CQE array as well. > + */ > +int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) > +{ > + struct siw_cqe *cqe; > + unsigned long flags; > + > + spin_lock_irqsave(&cq->lock, flags); > + > + cqe = &cq->queue[cq->cq_get % cq->num_cqe]; > + if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) { > + memset(wc, 0, sizeof(*wc)); > + wc->wr_id = cqe->id; > + wc->status = map_cqe_status[cqe->status].ib; > + wc->opcode = map_wc_opcode[cqe->opcode]; > + wc->byte_len = cqe->bytes; Don't you need to fill wc->wc_flags as well? > + > + /* > + * During CQ flush, also user land CQE's may > + * get reaped here, which do not hold a QP > + * reference. > + */ > + if (likely(cq->kernel_verbs)) { > + wc->qp = &((struct siw_qp *)cqe->qp)->base_qp; > + siw_dbg_obj(cq, "[QP %d]: reap wqe type %d, idx %d\n", > + QP_ID(cqe->qp), cqe->opcode, > + cq->cq_get % cq->num_cqe); > + siw_qp_put(cqe->qp); > + } > + WRITE_ONCE(cqe->flags, 0); > + cq->cq_get++; > + > + spin_unlock_irqrestore(&cq->lock, flags); > + > + return 1; > + } > + spin_unlock_irqrestore(&cq->lock, flags); > + > + return 0; > +} > + > +void siw_completion_task(unsigned long data) > +{ > + struct ib_cq *base_cq = (struct ib_cq *)data; > + > + (*base_cq->comp_handler)(base_cq, base_cq->cq_context); > +} > + > +/* > + * siw_cq_flush() > + * > + * Flush all CQ elements. > + */ > +void siw_cq_flush(struct siw_cq *cq) > +{ > + struct ib_wc wc; > + int got; > + > + siw_dbg(cq->hdr.sdev, "[CQ %d]: enter\n", OBJ_ID(cq)); > + > + do { > + got = siw_reap_cqe(cq, &wc); > + } while (got > 0); > +} > Reviewed-by: Gal Pressman <galpress@amazon.com>
-----"Gal Pressman" <galpress@amazon.com> wrote: ----- >To: "Bernard Metzler" <bmt@zurich.ibm.com>, ><linux-rdma@vger.kernel.org> >From: "Gal Pressman" <galpress@amazon.com> >Date: 02/28/2019 03:15PM >Subject: Re: [PATCH v5 11/13] SIW completion queue methods > >On 19-Feb-19 12:09, Bernard Metzler wrote: >> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> >> --- >> drivers/infiniband/sw/siw/siw_cq.c | 149 >+++++++++++++++++++++++++++++ >> 1 file changed, 149 insertions(+) >> create mode 100644 drivers/infiniband/sw/siw/siw_cq.c >> >> diff --git a/drivers/infiniband/sw/siw/siw_cq.c >b/drivers/infiniband/sw/siw/siw_cq.c >> new file mode 100644 >> index 000000000000..28c436f43597 >> --- /dev/null >> +++ b/drivers/infiniband/sw/siw/siw_cq.c >> @@ -0,0 +1,149 @@ >> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause >> +/* >> + * Software iWARP device driver >> + * >> + * Authors: Bernard Metzler <bmt@zurich.ibm.com> >> + * >> + * Copyright (c) 2008-2018, IBM Corporation >> + * >> + * This software is available to you under a choice of one of two >> + * licenses. You may choose to be licensed under the terms of the >GNU >> + * General Public License (GPL) Version 2, available from the file >> + * COPYING in the main directory of this source tree, or the >> + * BSD license below: >> + * >> + * Redistribution and use in source and binary forms, with or >> + * without modification, are permitted provided that the >following >> + * conditions are met: >> + * >> + * - Redistributions of source code must retain the above >copyright notice, >> + * this list of conditions and the following disclaimer. >> + * >> + * - Redistributions in binary form must reproduce the above >copyright >> + * notice, this list of conditions and the following >disclaimer in the >> + * documentation and/or other materials provided with the >distribution. >> + * >> + * - Neither the name of IBM nor the names of its contributors >may be >> + * used to endorse or promote products derived from this >software without >> + * specific prior written permission. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES >OF >> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND >> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT >HOLDERS >> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN >AN >> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR >IN >> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >THE >> + * SOFTWARE. >> + */ >> + >> +#include <linux/errno.h> >> +#include <linux/types.h> >> +#include <linux/list.h> >> + >> +#include <rdma/iw_cm.h> >> +#include <rdma/ib_verbs.h> >> +#include <rdma/ib_smi.h> >> +#include <rdma/ib_user_verbs.h> >> + >> +#include "siw.h" >> +#include "siw_obj.h" >> +#include "siw_cm.h" >> + >> +static int map_wc_opcode[SIW_NUM_OPCODES] = { >> + [SIW_OP_WRITE] = IB_WC_RDMA_WRITE, >> + [SIW_OP_SEND] = IB_WC_SEND, >> + [SIW_OP_SEND_WITH_IMM] = IB_WC_SEND, >> + [SIW_OP_READ] = IB_WC_RDMA_READ, >> + [SIW_OP_READ_LOCAL_INV] = IB_WC_RDMA_READ, >> + [SIW_OP_COMP_AND_SWAP] = IB_WC_COMP_SWAP, >> + [SIW_OP_FETCH_AND_ADD] = IB_WC_FETCH_ADD, >> + [SIW_OP_INVAL_STAG] = IB_WC_LOCAL_INV, >> + [SIW_OP_REG_MR] = IB_WC_REG_MR, >> + [SIW_OP_RECEIVE] = IB_WC_RECV, >> + [SIW_OP_READ_RESPONSE] = -1 /* not used */ >> +}; >> + >> +static struct { >> + enum siw_opcode siw; >> + enum ib_wc_status ib; >> +} map_cqe_status[SIW_NUM_WC_STATUS] = { >> + {SIW_WC_SUCCESS, IB_WC_SUCCESS}, >> + {SIW_WC_LOC_LEN_ERR, IB_WC_LOC_LEN_ERR}, >> + {SIW_WC_LOC_PROT_ERR, IB_WC_LOC_PROT_ERR}, >> + {SIW_WC_LOC_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR}, >> + {SIW_WC_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR}, >> + {SIW_WC_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR}, >> + {SIW_WC_LOC_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR}, >> + {SIW_WC_REM_ACCESS_ERR, IB_WC_REM_ACCESS_ERR}, >> + {SIW_WC_REM_INV_REQ_ERR, IB_WC_REM_INV_REQ_ERR}, >> + {SIW_WC_GENERAL_ERR, IB_WC_GENERAL_ERR} >> +}; >> + >> +/* >> + * Reap one CQE from the CQ. Only used by kernel clients >> + * during CQ normal operation. Might be called during CQ >> + * flush for user mapped CQE array as well. >> + */ >> +int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) >> +{ >> + struct siw_cqe *cqe; >> + unsigned long flags; >> + >> + spin_lock_irqsave(&cq->lock, flags); >> + >> + cqe = &cq->queue[cq->cq_get % cq->num_cqe]; >> + if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) { >> + memset(wc, 0, sizeof(*wc)); >> + wc->wr_id = cqe->id; >> + wc->status = map_cqe_status[cqe->status].ib; >> + wc->opcode = map_wc_opcode[cqe->opcode]; >> + wc->byte_len = cqe->bytes; > >Don't you need to fill wc->wc_flags as well? good point. all zero here, since no immediate data are implemented in the version I am pushing. We have a prototype implementing RFC 7306 (atomics and immediate data), but let's do such an update later, if we got basic siw accepted. Other completion flags are not relevant as well. > >> + >> + /* >> + * During CQ flush, also user land CQE's may >> + * get reaped here, which do not hold a QP >> + * reference. >> + */ >> + if (likely(cq->kernel_verbs)) { >> + wc->qp = &((struct siw_qp *)cqe->qp)->base_qp; >> + siw_dbg_obj(cq, "[QP %d]: reap wqe type %d, idx %d\n", >> + QP_ID(cqe->qp), cqe->opcode, >> + cq->cq_get % cq->num_cqe); >> + siw_qp_put(cqe->qp); >> + } >> + WRITE_ONCE(cqe->flags, 0); >> + cq->cq_get++; >> + >> + spin_unlock_irqrestore(&cq->lock, flags); >> + >> + return 1; >> + } >> + spin_unlock_irqrestore(&cq->lock, flags); >> + >> + return 0; >> +} >> + >> +void siw_completion_task(unsigned long data) >> +{ >> + struct ib_cq *base_cq = (struct ib_cq *)data; >> + >> + (*base_cq->comp_handler)(base_cq, base_cq->cq_context); >> +} >> + >> +/* >> + * siw_cq_flush() >> + * >> + * Flush all CQ elements. >> + */ >> +void siw_cq_flush(struct siw_cq *cq) >> +{ >> + struct ib_wc wc; >> + int got; >> + >> + siw_dbg(cq->hdr.sdev, "[CQ %d]: enter\n", OBJ_ID(cq)); >> + >> + do { >> + got = siw_reap_cqe(cq, &wc); >> + } while (got > 0); >> +} >> > >Reviewed-by: Gal Pressman <galpress@amazon.com> > >
> On Feb 28, 2019, at 9:36 AM, Bernard Metzler <BMT@zurich.ibm.com> wrote: > > -----"Gal Pressman" <galpress@amazon.com> wrote: ----- > >> To: "Bernard Metzler" <bmt@zurich.ibm.com>, >> <linux-rdma@vger.kernel.org> >> From: "Gal Pressman" <galpress@amazon.com> >> Date: 02/28/2019 03:15PM >> Subject: Re: [PATCH v5 11/13] SIW completion queue methods >> >> On 19-Feb-19 12:09, Bernard Metzler wrote: >>> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> >>> --- >>> drivers/infiniband/sw/siw/siw_cq.c | 149 >> +++++++++++++++++++++++++++++ >>> 1 file changed, 149 insertions(+) >>> create mode 100644 drivers/infiniband/sw/siw/siw_cq.c >>> >>> diff --git a/drivers/infiniband/sw/siw/siw_cq.c >> b/drivers/infiniband/sw/siw/siw_cq.c >>> new file mode 100644 >>> index 000000000000..28c436f43597 >>> --- /dev/null >>> +++ b/drivers/infiniband/sw/siw/siw_cq.c >>> @@ -0,0 +1,149 @@ >>> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause >>> +/* >>> + * Software iWARP device driver >>> + * >>> + * Authors: Bernard Metzler <bmt@zurich.ibm.com> >>> + * >>> + * Copyright (c) 2008-2018, IBM Corporation >>> + * >>> + * This software is available to you under a choice of one of two >>> + * licenses. You may choose to be licensed under the terms of the >> GNU >>> + * General Public License (GPL) Version 2, available from the file >>> + * COPYING in the main directory of this source tree, or the >>> + * BSD license below: >>> + * >>> + * Redistribution and use in source and binary forms, with or >>> + * without modification, are permitted provided that the >> following >>> + * conditions are met: >>> + * >>> + * - Redistributions of source code must retain the above >> copyright notice, >>> + * this list of conditions and the following disclaimer. >>> + * >>> + * - Redistributions in binary form must reproduce the above >> copyright >>> + * notice, this list of conditions and the following >> disclaimer in the >>> + * documentation and/or other materials provided with the >> distribution. >>> + * >>> + * - Neither the name of IBM nor the names of its contributors >> may be >>> + * used to endorse or promote products derived from this >> software without >>> + * specific prior written permission. >>> + * >>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >>> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES >> OF >>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND >>> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT >> HOLDERS >>> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN >> AN >>> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR >> IN >>> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >> THE >>> + * SOFTWARE. >>> + */ >>> + >>> +#include <linux/errno.h> >>> +#include <linux/types.h> >>> +#include <linux/list.h> >>> + >>> +#include <rdma/iw_cm.h> >>> +#include <rdma/ib_verbs.h> >>> +#include <rdma/ib_smi.h> >>> +#include <rdma/ib_user_verbs.h> >>> + >>> +#include "siw.h" >>> +#include "siw_obj.h" >>> +#include "siw_cm.h" >>> + >>> +static int map_wc_opcode[SIW_NUM_OPCODES] = { >>> + [SIW_OP_WRITE] = IB_WC_RDMA_WRITE, >>> + [SIW_OP_SEND] = IB_WC_SEND, >>> + [SIW_OP_SEND_WITH_IMM] = IB_WC_SEND, >>> + [SIW_OP_READ] = IB_WC_RDMA_READ, >>> + [SIW_OP_READ_LOCAL_INV] = IB_WC_RDMA_READ, >>> + [SIW_OP_COMP_AND_SWAP] = IB_WC_COMP_SWAP, >>> + [SIW_OP_FETCH_AND_ADD] = IB_WC_FETCH_ADD, >>> + [SIW_OP_INVAL_STAG] = IB_WC_LOCAL_INV, >>> + [SIW_OP_REG_MR] = IB_WC_REG_MR, >>> + [SIW_OP_RECEIVE] = IB_WC_RECV, >>> + [SIW_OP_READ_RESPONSE] = -1 /* not used */ >>> +}; >>> + >>> +static struct { >>> + enum siw_opcode siw; >>> + enum ib_wc_status ib; >>> +} map_cqe_status[SIW_NUM_WC_STATUS] = { >>> + {SIW_WC_SUCCESS, IB_WC_SUCCESS}, >>> + {SIW_WC_LOC_LEN_ERR, IB_WC_LOC_LEN_ERR}, >>> + {SIW_WC_LOC_PROT_ERR, IB_WC_LOC_PROT_ERR}, >>> + {SIW_WC_LOC_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR}, >>> + {SIW_WC_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR}, >>> + {SIW_WC_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR}, >>> + {SIW_WC_LOC_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR}, >>> + {SIW_WC_REM_ACCESS_ERR, IB_WC_REM_ACCESS_ERR}, >>> + {SIW_WC_REM_INV_REQ_ERR, IB_WC_REM_INV_REQ_ERR}, >>> + {SIW_WC_GENERAL_ERR, IB_WC_GENERAL_ERR} >>> +}; >>> + >>> +/* >>> + * Reap one CQE from the CQ. Only used by kernel clients >>> + * during CQ normal operation. Might be called during CQ >>> + * flush for user mapped CQE array as well. >>> + */ >>> +int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) >>> +{ >>> + struct siw_cqe *cqe; >>> + unsigned long flags; >>> + >>> + spin_lock_irqsave(&cq->lock, flags); >>> + >>> + cqe = &cq->queue[cq->cq_get % cq->num_cqe]; >>> + if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) { >>> + memset(wc, 0, sizeof(*wc)); >>> + wc->wr_id = cqe->id; >>> + wc->status = map_cqe_status[cqe->status].ib; >>> + wc->opcode = map_wc_opcode[cqe->opcode]; >>> + wc->byte_len = cqe->bytes; >> >> Don't you need to fill wc->wc_flags as well? > > good point. > all zero here, since no immediate data are implemented > in the version I am pushing. We have a prototype implementing > RFC 7306 (atomics and immediate data), but let's do such > an update later, if we got basic siw accepted. > > Other completion flags are not relevant as well. Hum. Does that mean remote invalidation is not supported? >>> + >>> + /* >>> + * During CQ flush, also user land CQE's may >>> + * get reaped here, which do not hold a QP >>> + * reference. >>> + */ >>> + if (likely(cq->kernel_verbs)) { >>> + wc->qp = &((struct siw_qp *)cqe->qp)->base_qp; >>> + siw_dbg_obj(cq, "[QP %d]: reap wqe type %d, idx %d\n", >>> + QP_ID(cqe->qp), cqe->opcode, >>> + cq->cq_get % cq->num_cqe); >>> + siw_qp_put(cqe->qp); >>> + } >>> + WRITE_ONCE(cqe->flags, 0); >>> + cq->cq_get++; >>> + >>> + spin_unlock_irqrestore(&cq->lock, flags); >>> + >>> + return 1; >>> + } >>> + spin_unlock_irqrestore(&cq->lock, flags); >>> + >>> + return 0; >>> +} >>> + >>> +void siw_completion_task(unsigned long data) >>> +{ >>> + struct ib_cq *base_cq = (struct ib_cq *)data; >>> + >>> + (*base_cq->comp_handler)(base_cq, base_cq->cq_context); >>> +} >>> + >>> +/* >>> + * siw_cq_flush() >>> + * >>> + * Flush all CQ elements. >>> + */ >>> +void siw_cq_flush(struct siw_cq *cq) >>> +{ >>> + struct ib_wc wc; >>> + int got; >>> + >>> + siw_dbg(cq->hdr.sdev, "[CQ %d]: enter\n", OBJ_ID(cq)); >>> + >>> + do { >>> + got = siw_reap_cqe(cq, &wc); >>> + } while (got > 0); >>> +} >>> >> >> Reviewed-by: Gal Pressman <galpress@amazon.com> -- Chuck Lever
-----"Chuck Lever" <chuck.lever@oracle.com> wrote: ----- >To: "Bernard Metzler" <BMT@zurich.ibm.com> >From: "Chuck Lever" <chuck.lever@oracle.com> >Date: 02/28/2019 03:57PM >Cc: "Gal Pressman" <galpress@amazon.com>, linux-rdma@vger.kernel.org >Subject: Re: [PATCH v5 11/13] SIW completion queue methods > > >> On Feb 28, 2019, at 9:36 AM, Bernard Metzler <BMT@zurich.ibm.com> >wrote: >> >> -----"Gal Pressman" <galpress@amazon.com> wrote: ----- >> >>> To: "Bernard Metzler" <bmt@zurich.ibm.com>, >>> <linux-rdma@vger.kernel.org> >>> From: "Gal Pressman" <galpress@amazon.com> >>> Date: 02/28/2019 03:15PM >>> Subject: Re: [PATCH v5 11/13] SIW completion queue methods >>> >>> On 19-Feb-19 12:09, Bernard Metzler wrote: >>>> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> >>>> --- >>>> drivers/infiniband/sw/siw/siw_cq.c | 149 >>> +++++++++++++++++++++++++++++ >>>> 1 file changed, 149 insertions(+) >>>> create mode 100644 drivers/infiniband/sw/siw/siw_cq.c >>>> >>>> diff --git a/drivers/infiniband/sw/siw/siw_cq.c >>> b/drivers/infiniband/sw/siw/siw_cq.c >>>> new file mode 100644 >>>> index 000000000000..28c436f43597 >>>> --- /dev/null >>>> +++ b/drivers/infiniband/sw/siw/siw_cq.c >>>> @@ -0,0 +1,149 @@ >>>> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause >>>> +/* >>>> + * Software iWARP device driver >>>> + * >>>> + * Authors: Bernard Metzler <bmt@zurich.ibm.com> >>>> + * >>>> + * Copyright (c) 2008-2018, IBM Corporation >>>> + * >>>> + * This software is available to you under a choice of one of >two >>>> + * licenses. You may choose to be licensed under the terms of >the >>> GNU >>>> + * General Public License (GPL) Version 2, available from the >file >>>> + * COPYING in the main directory of this source tree, or the >>>> + * BSD license below: >>>> + * >>>> + * Redistribution and use in source and binary forms, with or >>>> + * without modification, are permitted provided that the >>> following >>>> + * conditions are met: >>>> + * >>>> + * - Redistributions of source code must retain the above >>> copyright notice, >>>> + * this list of conditions and the following disclaimer. >>>> + * >>>> + * - Redistributions in binary form must reproduce the above >>> copyright >>>> + * notice, this list of conditions and the following >>> disclaimer in the >>>> + * documentation and/or other materials provided with the >>> distribution. >>>> + * >>>> + * - Neither the name of IBM nor the names of its contributors >>> may be >>>> + * used to endorse or promote products derived from this >>> software without >>>> + * specific prior written permission. >>>> + * >>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY >KIND, >>>> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE >WARRANTIES >>> OF >>>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND >>>> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT >>> HOLDERS >>>> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER >IN >>> AN >>>> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF >OR >>> IN >>>> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >>> THE >>>> + * SOFTWARE. >>>> + */ >>>> + >>>> +#include <linux/errno.h> >>>> +#include <linux/types.h> >>>> +#include <linux/list.h> >>>> + >>>> +#include <rdma/iw_cm.h> >>>> +#include <rdma/ib_verbs.h> >>>> +#include <rdma/ib_smi.h> >>>> +#include <rdma/ib_user_verbs.h> >>>> + >>>> +#include "siw.h" >>>> +#include "siw_obj.h" >>>> +#include "siw_cm.h" >>>> + >>>> +static int map_wc_opcode[SIW_NUM_OPCODES] = { >>>> + [SIW_OP_WRITE] = IB_WC_RDMA_WRITE, >>>> + [SIW_OP_SEND] = IB_WC_SEND, >>>> + [SIW_OP_SEND_WITH_IMM] = IB_WC_SEND, >>>> + [SIW_OP_READ] = IB_WC_RDMA_READ, >>>> + [SIW_OP_READ_LOCAL_INV] = IB_WC_RDMA_READ, >>>> + [SIW_OP_COMP_AND_SWAP] = IB_WC_COMP_SWAP, >>>> + [SIW_OP_FETCH_AND_ADD] = IB_WC_FETCH_ADD, >>>> + [SIW_OP_INVAL_STAG] = IB_WC_LOCAL_INV, >>>> + [SIW_OP_REG_MR] = IB_WC_REG_MR, >>>> + [SIW_OP_RECEIVE] = IB_WC_RECV, >>>> + [SIW_OP_READ_RESPONSE] = -1 /* not used */ >>>> +}; >>>> + >>>> +static struct { >>>> + enum siw_opcode siw; >>>> + enum ib_wc_status ib; >>>> +} map_cqe_status[SIW_NUM_WC_STATUS] = { >>>> + {SIW_WC_SUCCESS, IB_WC_SUCCESS}, >>>> + {SIW_WC_LOC_LEN_ERR, IB_WC_LOC_LEN_ERR}, >>>> + {SIW_WC_LOC_PROT_ERR, IB_WC_LOC_PROT_ERR}, >>>> + {SIW_WC_LOC_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR}, >>>> + {SIW_WC_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR}, >>>> + {SIW_WC_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR}, >>>> + {SIW_WC_LOC_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR}, >>>> + {SIW_WC_REM_ACCESS_ERR, IB_WC_REM_ACCESS_ERR}, >>>> + {SIW_WC_REM_INV_REQ_ERR, IB_WC_REM_INV_REQ_ERR}, >>>> + {SIW_WC_GENERAL_ERR, IB_WC_GENERAL_ERR} >>>> +}; >>>> + >>>> +/* >>>> + * Reap one CQE from the CQ. Only used by kernel clients >>>> + * during CQ normal operation. Might be called during CQ >>>> + * flush for user mapped CQE array as well. >>>> + */ >>>> +int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) >>>> +{ >>>> + struct siw_cqe *cqe; >>>> + unsigned long flags; >>>> + >>>> + spin_lock_irqsave(&cq->lock, flags); >>>> + >>>> + cqe = &cq->queue[cq->cq_get % cq->num_cqe]; >>>> + if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) { >>>> + memset(wc, 0, sizeof(*wc)); >>>> + wc->wr_id = cqe->id; >>>> + wc->status = map_cqe_status[cqe->status].ib; >>>> + wc->opcode = map_wc_opcode[cqe->opcode]; >>>> + wc->byte_len = cqe->bytes; >>> >>> Don't you need to fill wc->wc_flags as well? >> >> good point. >> all zero here, since no immediate data are implemented >> in the version I am pushing. We have a prototype implementing >> RFC 7306 (atomics and immediate data), but let's do such >> an update later, if we got basic siw accepted. >> >> Other completion flags are not relevant as well. > >Hum. Does that mean remote invalidation is not supported? OO! It is... So I need IB_WC_WITH_INVALIDATE support. NVMeF stuff seem to be forgiving... > > >>>> + >>>> + /* >>>> + * During CQ flush, also user land CQE's may >>>> + * get reaped here, which do not hold a QP >>>> + * reference. >>>> + */ >>>> + if (likely(cq->kernel_verbs)) { >>>> + wc->qp = &((struct siw_qp *)cqe->qp)->base_qp; >>>> + siw_dbg_obj(cq, "[QP %d]: reap wqe type %d, idx %d\n", >>>> + QP_ID(cqe->qp), cqe->opcode, >>>> + cq->cq_get % cq->num_cqe); >>>> + siw_qp_put(cqe->qp); >>>> + } >>>> + WRITE_ONCE(cqe->flags, 0); >>>> + cq->cq_get++; >>>> + >>>> + spin_unlock_irqrestore(&cq->lock, flags); >>>> + >>>> + return 1; >>>> + } >>>> + spin_unlock_irqrestore(&cq->lock, flags); >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +void siw_completion_task(unsigned long data) >>>> +{ >>>> + struct ib_cq *base_cq = (struct ib_cq *)data; >>>> + >>>> + (*base_cq->comp_handler)(base_cq, base_cq->cq_context); >>>> +} >>>> + >>>> +/* >>>> + * siw_cq_flush() >>>> + * >>>> + * Flush all CQ elements. >>>> + */ >>>> +void siw_cq_flush(struct siw_cq *cq) >>>> +{ >>>> + struct ib_wc wc; >>>> + int got; >>>> + >>>> + siw_dbg(cq->hdr.sdev, "[CQ %d]: enter\n", OBJ_ID(cq)); >>>> + >>>> + do { >>>> + got = siw_reap_cqe(cq, &wc); >>>> + } while (got > 0); >>>> +} >>>> >>> >>> Reviewed-by: Gal Pressman <galpress@amazon.com> > >-- >Chuck Lever > > > >
diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c new file mode 100644 index 000000000000..28c436f43597 --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_cq.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause +/* + * Software iWARP device driver + * + * Authors: Bernard Metzler <bmt@zurich.ibm.com> + * + * Copyright (c) 2008-2018, IBM Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * - Neither the name of IBM nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/list.h> + +#include <rdma/iw_cm.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> + +#include "siw.h" +#include "siw_obj.h" +#include "siw_cm.h" + +static int map_wc_opcode[SIW_NUM_OPCODES] = { + [SIW_OP_WRITE] = IB_WC_RDMA_WRITE, + [SIW_OP_SEND] = IB_WC_SEND, + [SIW_OP_SEND_WITH_IMM] = IB_WC_SEND, + [SIW_OP_READ] = IB_WC_RDMA_READ, + [SIW_OP_READ_LOCAL_INV] = IB_WC_RDMA_READ, + [SIW_OP_COMP_AND_SWAP] = IB_WC_COMP_SWAP, + [SIW_OP_FETCH_AND_ADD] = IB_WC_FETCH_ADD, + [SIW_OP_INVAL_STAG] = IB_WC_LOCAL_INV, + [SIW_OP_REG_MR] = IB_WC_REG_MR, + [SIW_OP_RECEIVE] = IB_WC_RECV, + [SIW_OP_READ_RESPONSE] = -1 /* not used */ +}; + +static struct { + enum siw_opcode siw; + enum ib_wc_status ib; +} map_cqe_status[SIW_NUM_WC_STATUS] = { + {SIW_WC_SUCCESS, IB_WC_SUCCESS}, + {SIW_WC_LOC_LEN_ERR, IB_WC_LOC_LEN_ERR}, + {SIW_WC_LOC_PROT_ERR, IB_WC_LOC_PROT_ERR}, + {SIW_WC_LOC_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR}, + {SIW_WC_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR}, + {SIW_WC_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR}, + {SIW_WC_LOC_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR}, + {SIW_WC_REM_ACCESS_ERR, IB_WC_REM_ACCESS_ERR}, + {SIW_WC_REM_INV_REQ_ERR, IB_WC_REM_INV_REQ_ERR}, + {SIW_WC_GENERAL_ERR, IB_WC_GENERAL_ERR} +}; + +/* + * Reap one CQE from the CQ. Only used by kernel clients + * during CQ normal operation. Might be called during CQ + * flush for user mapped CQE array as well. + */ +int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) +{ + struct siw_cqe *cqe; + unsigned long flags; + + spin_lock_irqsave(&cq->lock, flags); + + cqe = &cq->queue[cq->cq_get % cq->num_cqe]; + if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) { + memset(wc, 0, sizeof(*wc)); + wc->wr_id = cqe->id; + wc->status = map_cqe_status[cqe->status].ib; + wc->opcode = map_wc_opcode[cqe->opcode]; + wc->byte_len = cqe->bytes; + + /* + * During CQ flush, also user land CQE's may + * get reaped here, which do not hold a QP + * reference. + */ + if (likely(cq->kernel_verbs)) { + wc->qp = &((struct siw_qp *)cqe->qp)->base_qp; + siw_dbg_obj(cq, "[QP %d]: reap wqe type %d, idx %d\n", + QP_ID(cqe->qp), cqe->opcode, + cq->cq_get % cq->num_cqe); + siw_qp_put(cqe->qp); + } + WRITE_ONCE(cqe->flags, 0); + cq->cq_get++; + + spin_unlock_irqrestore(&cq->lock, flags); + + return 1; + } + spin_unlock_irqrestore(&cq->lock, flags); + + return 0; +} + +void siw_completion_task(unsigned long data) +{ + struct ib_cq *base_cq = (struct ib_cq *)data; + + (*base_cq->comp_handler)(base_cq, base_cq->cq_context); +} + +/* + * siw_cq_flush() + * + * Flush all CQ elements. + */ +void siw_cq_flush(struct siw_cq *cq) +{ + struct ib_wc wc; + int got; + + siw_dbg(cq->hdr.sdev, "[CQ %d]: enter\n", OBJ_ID(cq)); + + do { + got = siw_reap_cqe(cq, &wc); + } while (got > 0); +}
Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> --- drivers/infiniband/sw/siw/siw_cq.c | 149 +++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 drivers/infiniband/sw/siw/siw_cq.c