diff mbox series

[v5,11/13] SIW completion queue methods

Message ID 20190219100903.15408-12-bmt@zurich.ibm.com (mailing list archive)
State Superseded
Delegated to: Jason Gunthorpe
Headers show
Series SIW: Request for Comments | expand

Commit Message

Bernard Metzler Feb. 19, 2019, 10:09 a.m. UTC
Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
---
 drivers/infiniband/sw/siw/siw_cq.c | 149 +++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)
 create mode 100644 drivers/infiniband/sw/siw/siw_cq.c

Comments

Gal Pressman Feb. 28, 2019, 2:15 p.m. UTC | #1
On 19-Feb-19 12:09, Bernard Metzler wrote:
> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
> ---
>  drivers/infiniband/sw/siw/siw_cq.c | 149 +++++++++++++++++++++++++++++
>  1 file changed, 149 insertions(+)
>  create mode 100644 drivers/infiniband/sw/siw/siw_cq.c
> 
> diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c
> new file mode 100644
> index 000000000000..28c436f43597
> --- /dev/null
> +++ b/drivers/infiniband/sw/siw/siw_cq.c
> @@ -0,0 +1,149 @@
> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
> +/*
> + * Software iWARP device driver
> + *
> + * Authors: Bernard Metzler <bmt@zurich.ibm.com>
> + *
> + * Copyright (c) 2008-2018, IBM Corporation
> + *
> + * This software is available to you under a choice of one of two
> + * licenses. You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * BSD license below:
> + *
> + *   Redistribution and use in source and binary forms, with or
> + *   without modification, are permitted provided that the following
> + *   conditions are met:
> + *
> + *   - Redistributions of source code must retain the above copyright notice,
> + *     this list of conditions and the following disclaimer.
> + *
> + *   - Redistributions in binary form must reproduce the above copyright
> + *     notice, this list of conditions and the following disclaimer in the
> + *     documentation and/or other materials provided with the distribution.
> + *
> + *   - Neither the name of IBM nor the names of its contributors may be
> + *     used to endorse or promote products derived from this software without
> + *     specific prior written permission.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */
> +
> +#include <linux/errno.h>
> +#include <linux/types.h>
> +#include <linux/list.h>
> +
> +#include <rdma/iw_cm.h>
> +#include <rdma/ib_verbs.h>
> +#include <rdma/ib_smi.h>
> +#include <rdma/ib_user_verbs.h>
> +
> +#include "siw.h"
> +#include "siw_obj.h"
> +#include "siw_cm.h"
> +
> +static int map_wc_opcode[SIW_NUM_OPCODES] = {
> +	[SIW_OP_WRITE]		= IB_WC_RDMA_WRITE,
> +	[SIW_OP_SEND]		= IB_WC_SEND,
> +	[SIW_OP_SEND_WITH_IMM]	= IB_WC_SEND,
> +	[SIW_OP_READ]		= IB_WC_RDMA_READ,
> +	[SIW_OP_READ_LOCAL_INV]	= IB_WC_RDMA_READ,
> +	[SIW_OP_COMP_AND_SWAP]	= IB_WC_COMP_SWAP,
> +	[SIW_OP_FETCH_AND_ADD]	= IB_WC_FETCH_ADD,
> +	[SIW_OP_INVAL_STAG]	= IB_WC_LOCAL_INV,
> +	[SIW_OP_REG_MR]		= IB_WC_REG_MR,
> +	[SIW_OP_RECEIVE]	= IB_WC_RECV,
> +	[SIW_OP_READ_RESPONSE]	= -1 /* not used */
> +};
> +
> +static struct {
> +	enum siw_opcode   siw;
> +	enum ib_wc_status ib;
> +} map_cqe_status[SIW_NUM_WC_STATUS] = {
> +	{SIW_WC_SUCCESS,		IB_WC_SUCCESS},
> +	{SIW_WC_LOC_LEN_ERR,		IB_WC_LOC_LEN_ERR},
> +	{SIW_WC_LOC_PROT_ERR,		IB_WC_LOC_PROT_ERR},
> +	{SIW_WC_LOC_QP_OP_ERR,		IB_WC_LOC_QP_OP_ERR},
> +	{SIW_WC_WR_FLUSH_ERR,		IB_WC_WR_FLUSH_ERR},
> +	{SIW_WC_BAD_RESP_ERR,		IB_WC_BAD_RESP_ERR},
> +	{SIW_WC_LOC_ACCESS_ERR,		IB_WC_LOC_ACCESS_ERR},
> +	{SIW_WC_REM_ACCESS_ERR,		IB_WC_REM_ACCESS_ERR},
> +	{SIW_WC_REM_INV_REQ_ERR,	IB_WC_REM_INV_REQ_ERR},
> +	{SIW_WC_GENERAL_ERR,		IB_WC_GENERAL_ERR}
> +};
> +
> +/*
> + * Reap one CQE from the CQ. Only used by kernel clients
> + * during CQ normal operation. Might be called during CQ
> + * flush for user mapped CQE array as well.
> + */
> +int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
> +{
> +	struct siw_cqe *cqe;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&cq->lock, flags);
> +
> +	cqe = &cq->queue[cq->cq_get % cq->num_cqe];
> +	if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) {
> +		memset(wc, 0, sizeof(*wc));
> +		wc->wr_id = cqe->id;
> +		wc->status = map_cqe_status[cqe->status].ib;
> +		wc->opcode = map_wc_opcode[cqe->opcode];
> +		wc->byte_len = cqe->bytes;

Don't you need to fill wc->wc_flags as well?

> +
> +		/*
> +		 * During CQ flush, also user land CQE's may
> +		 * get reaped here, which do not hold a QP
> +		 * reference.
> +		 */
> +		if (likely(cq->kernel_verbs)) {
> +			wc->qp = &((struct siw_qp *)cqe->qp)->base_qp;
> +			siw_dbg_obj(cq, "[QP %d]: reap wqe type %d, idx %d\n",
> +				    QP_ID(cqe->qp), cqe->opcode,
> +				    cq->cq_get % cq->num_cqe);
> +			siw_qp_put(cqe->qp);
> +		}
> +		WRITE_ONCE(cqe->flags, 0);
> +		cq->cq_get++;
> +
> +		spin_unlock_irqrestore(&cq->lock, flags);
> +
> +		return 1;
> +	}
> +	spin_unlock_irqrestore(&cq->lock, flags);
> +
> +	return 0;
> +}
> +
> +void siw_completion_task(unsigned long data)
> +{
> +	struct ib_cq *base_cq = (struct ib_cq *)data;
> +
> +	(*base_cq->comp_handler)(base_cq, base_cq->cq_context);
> +}
> +
> +/*
> + * siw_cq_flush()
> + *
> + * Flush all CQ elements.
> + */
> +void siw_cq_flush(struct siw_cq *cq)
> +{
> +	struct ib_wc wc;
> +	int got;
> +
> +	siw_dbg(cq->hdr.sdev, "[CQ %d]: enter\n", OBJ_ID(cq));
> +
> +	do {
> +		got = siw_reap_cqe(cq, &wc);
> +	} while (got > 0);
> +}
> 

Reviewed-by: Gal Pressman <galpress@amazon.com>
Bernard Metzler Feb. 28, 2019, 2:36 p.m. UTC | #2
-----"Gal Pressman" <galpress@amazon.com> wrote: -----

>To: "Bernard Metzler" <bmt@zurich.ibm.com>,
><linux-rdma@vger.kernel.org>
>From: "Gal Pressman" <galpress@amazon.com>
>Date: 02/28/2019 03:15PM
>Subject: Re: [PATCH v5 11/13] SIW completion queue methods
>
>On 19-Feb-19 12:09, Bernard Metzler wrote:
>> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
>> ---
>>  drivers/infiniband/sw/siw/siw_cq.c | 149
>+++++++++++++++++++++++++++++
>>  1 file changed, 149 insertions(+)
>>  create mode 100644 drivers/infiniband/sw/siw/siw_cq.c
>> 
>> diff --git a/drivers/infiniband/sw/siw/siw_cq.c
>b/drivers/infiniband/sw/siw/siw_cq.c
>> new file mode 100644
>> index 000000000000..28c436f43597
>> --- /dev/null
>> +++ b/drivers/infiniband/sw/siw/siw_cq.c
>> @@ -0,0 +1,149 @@
>> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
>> +/*
>> + * Software iWARP device driver
>> + *
>> + * Authors: Bernard Metzler <bmt@zurich.ibm.com>
>> + *
>> + * Copyright (c) 2008-2018, IBM Corporation
>> + *
>> + * This software is available to you under a choice of one of two
>> + * licenses. You may choose to be licensed under the terms of the
>GNU
>> + * General Public License (GPL) Version 2, available from the file
>> + * COPYING in the main directory of this source tree, or the
>> + * BSD license below:
>> + *
>> + *   Redistribution and use in source and binary forms, with or
>> + *   without modification, are permitted provided that the
>following
>> + *   conditions are met:
>> + *
>> + *   - Redistributions of source code must retain the above
>copyright notice,
>> + *     this list of conditions and the following disclaimer.
>> + *
>> + *   - Redistributions in binary form must reproduce the above
>copyright
>> + *     notice, this list of conditions and the following
>disclaimer in the
>> + *     documentation and/or other materials provided with the
>distribution.
>> + *
>> + *   - Neither the name of IBM nor the names of its contributors
>may be
>> + *     used to endorse or promote products derived from this
>software without
>> + *     specific prior written permission.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
>OF
>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
>> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
>HOLDERS
>> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
>AN
>> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
>IN
>> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>THE
>> + * SOFTWARE.
>> + */
>> +
>> +#include <linux/errno.h>
>> +#include <linux/types.h>
>> +#include <linux/list.h>
>> +
>> +#include <rdma/iw_cm.h>
>> +#include <rdma/ib_verbs.h>
>> +#include <rdma/ib_smi.h>
>> +#include <rdma/ib_user_verbs.h>
>> +
>> +#include "siw.h"
>> +#include "siw_obj.h"
>> +#include "siw_cm.h"
>> +
>> +static int map_wc_opcode[SIW_NUM_OPCODES] = {
>> +	[SIW_OP_WRITE]		= IB_WC_RDMA_WRITE,
>> +	[SIW_OP_SEND]		= IB_WC_SEND,
>> +	[SIW_OP_SEND_WITH_IMM]	= IB_WC_SEND,
>> +	[SIW_OP_READ]		= IB_WC_RDMA_READ,
>> +	[SIW_OP_READ_LOCAL_INV]	= IB_WC_RDMA_READ,
>> +	[SIW_OP_COMP_AND_SWAP]	= IB_WC_COMP_SWAP,
>> +	[SIW_OP_FETCH_AND_ADD]	= IB_WC_FETCH_ADD,
>> +	[SIW_OP_INVAL_STAG]	= IB_WC_LOCAL_INV,
>> +	[SIW_OP_REG_MR]		= IB_WC_REG_MR,
>> +	[SIW_OP_RECEIVE]	= IB_WC_RECV,
>> +	[SIW_OP_READ_RESPONSE]	= -1 /* not used */
>> +};
>> +
>> +static struct {
>> +	enum siw_opcode   siw;
>> +	enum ib_wc_status ib;
>> +} map_cqe_status[SIW_NUM_WC_STATUS] = {
>> +	{SIW_WC_SUCCESS,		IB_WC_SUCCESS},
>> +	{SIW_WC_LOC_LEN_ERR,		IB_WC_LOC_LEN_ERR},
>> +	{SIW_WC_LOC_PROT_ERR,		IB_WC_LOC_PROT_ERR},
>> +	{SIW_WC_LOC_QP_OP_ERR,		IB_WC_LOC_QP_OP_ERR},
>> +	{SIW_WC_WR_FLUSH_ERR,		IB_WC_WR_FLUSH_ERR},
>> +	{SIW_WC_BAD_RESP_ERR,		IB_WC_BAD_RESP_ERR},
>> +	{SIW_WC_LOC_ACCESS_ERR,		IB_WC_LOC_ACCESS_ERR},
>> +	{SIW_WC_REM_ACCESS_ERR,		IB_WC_REM_ACCESS_ERR},
>> +	{SIW_WC_REM_INV_REQ_ERR,	IB_WC_REM_INV_REQ_ERR},
>> +	{SIW_WC_GENERAL_ERR,		IB_WC_GENERAL_ERR}
>> +};
>> +
>> +/*
>> + * Reap one CQE from the CQ. Only used by kernel clients
>> + * during CQ normal operation. Might be called during CQ
>> + * flush for user mapped CQE array as well.
>> + */
>> +int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
>> +{
>> +	struct siw_cqe *cqe;
>> +	unsigned long flags;
>> +
>> +	spin_lock_irqsave(&cq->lock, flags);
>> +
>> +	cqe = &cq->queue[cq->cq_get % cq->num_cqe];
>> +	if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) {
>> +		memset(wc, 0, sizeof(*wc));
>> +		wc->wr_id = cqe->id;
>> +		wc->status = map_cqe_status[cqe->status].ib;
>> +		wc->opcode = map_wc_opcode[cqe->opcode];
>> +		wc->byte_len = cqe->bytes;
>
>Don't you need to fill wc->wc_flags as well?

good point.
all zero here, since no immediate data are implemented
in the version I am pushing. We have a prototype implementing
RFC 7306 (atomics and immediate data), but let's do such
an update later, if we got basic siw accepted.

Other completion flags are not relevant as well.

>
>> +
>> +		/*
>> +		 * During CQ flush, also user land CQE's may
>> +		 * get reaped here, which do not hold a QP
>> +		 * reference.
>> +		 */
>> +		if (likely(cq->kernel_verbs)) {
>> +			wc->qp = &((struct siw_qp *)cqe->qp)->base_qp;
>> +			siw_dbg_obj(cq, "[QP %d]: reap wqe type %d, idx %d\n",
>> +				    QP_ID(cqe->qp), cqe->opcode,
>> +				    cq->cq_get % cq->num_cqe);
>> +			siw_qp_put(cqe->qp);
>> +		}
>> +		WRITE_ONCE(cqe->flags, 0);
>> +		cq->cq_get++;
>> +
>> +		spin_unlock_irqrestore(&cq->lock, flags);
>> +
>> +		return 1;
>> +	}
>> +	spin_unlock_irqrestore(&cq->lock, flags);
>> +
>> +	return 0;
>> +}
>> +
>> +void siw_completion_task(unsigned long data)
>> +{
>> +	struct ib_cq *base_cq = (struct ib_cq *)data;
>> +
>> +	(*base_cq->comp_handler)(base_cq, base_cq->cq_context);
>> +}
>> +
>> +/*
>> + * siw_cq_flush()
>> + *
>> + * Flush all CQ elements.
>> + */
>> +void siw_cq_flush(struct siw_cq *cq)
>> +{
>> +	struct ib_wc wc;
>> +	int got;
>> +
>> +	siw_dbg(cq->hdr.sdev, "[CQ %d]: enter\n", OBJ_ID(cq));
>> +
>> +	do {
>> +		got = siw_reap_cqe(cq, &wc);
>> +	} while (got > 0);
>> +}
>> 
>
>Reviewed-by: Gal Pressman <galpress@amazon.com>
>
>
Chuck Lever III Feb. 28, 2019, 2:50 p.m. UTC | #3
> On Feb 28, 2019, at 9:36 AM, Bernard Metzler <BMT@zurich.ibm.com> wrote:
> 
> -----"Gal Pressman" <galpress@amazon.com> wrote: -----
> 
>> To: "Bernard Metzler" <bmt@zurich.ibm.com>,
>> <linux-rdma@vger.kernel.org>
>> From: "Gal Pressman" <galpress@amazon.com>
>> Date: 02/28/2019 03:15PM
>> Subject: Re: [PATCH v5 11/13] SIW completion queue methods
>> 
>> On 19-Feb-19 12:09, Bernard Metzler wrote:
>>> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
>>> ---
>>> drivers/infiniband/sw/siw/siw_cq.c | 149
>> +++++++++++++++++++++++++++++
>>> 1 file changed, 149 insertions(+)
>>> create mode 100644 drivers/infiniband/sw/siw/siw_cq.c
>>> 
>>> diff --git a/drivers/infiniband/sw/siw/siw_cq.c
>> b/drivers/infiniband/sw/siw/siw_cq.c
>>> new file mode 100644
>>> index 000000000000..28c436f43597
>>> --- /dev/null
>>> +++ b/drivers/infiniband/sw/siw/siw_cq.c
>>> @@ -0,0 +1,149 @@
>>> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
>>> +/*
>>> + * Software iWARP device driver
>>> + *
>>> + * Authors: Bernard Metzler <bmt@zurich.ibm.com>
>>> + *
>>> + * Copyright (c) 2008-2018, IBM Corporation
>>> + *
>>> + * This software is available to you under a choice of one of two
>>> + * licenses. You may choose to be licensed under the terms of the
>> GNU
>>> + * General Public License (GPL) Version 2, available from the file
>>> + * COPYING in the main directory of this source tree, or the
>>> + * BSD license below:
>>> + *
>>> + *   Redistribution and use in source and binary forms, with or
>>> + *   without modification, are permitted provided that the
>> following
>>> + *   conditions are met:
>>> + *
>>> + *   - Redistributions of source code must retain the above
>> copyright notice,
>>> + *     this list of conditions and the following disclaimer.
>>> + *
>>> + *   - Redistributions in binary form must reproduce the above
>> copyright
>>> + *     notice, this list of conditions and the following
>> disclaimer in the
>>> + *     documentation and/or other materials provided with the
>> distribution.
>>> + *
>>> + *   - Neither the name of IBM nor the names of its contributors
>> may be
>>> + *     used to endorse or promote products derived from this
>> software without
>>> + *     specific prior written permission.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>>> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
>> OF
>>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
>>> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
>> HOLDERS
>>> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
>> AN
>>> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
>> IN
>>> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>> THE
>>> + * SOFTWARE.
>>> + */
>>> +
>>> +#include <linux/errno.h>
>>> +#include <linux/types.h>
>>> +#include <linux/list.h>
>>> +
>>> +#include <rdma/iw_cm.h>
>>> +#include <rdma/ib_verbs.h>
>>> +#include <rdma/ib_smi.h>
>>> +#include <rdma/ib_user_verbs.h>
>>> +
>>> +#include "siw.h"
>>> +#include "siw_obj.h"
>>> +#include "siw_cm.h"
>>> +
>>> +static int map_wc_opcode[SIW_NUM_OPCODES] = {
>>> +	[SIW_OP_WRITE]		= IB_WC_RDMA_WRITE,
>>> +	[SIW_OP_SEND]		= IB_WC_SEND,
>>> +	[SIW_OP_SEND_WITH_IMM]	= IB_WC_SEND,
>>> +	[SIW_OP_READ]		= IB_WC_RDMA_READ,
>>> +	[SIW_OP_READ_LOCAL_INV]	= IB_WC_RDMA_READ,
>>> +	[SIW_OP_COMP_AND_SWAP]	= IB_WC_COMP_SWAP,
>>> +	[SIW_OP_FETCH_AND_ADD]	= IB_WC_FETCH_ADD,
>>> +	[SIW_OP_INVAL_STAG]	= IB_WC_LOCAL_INV,
>>> +	[SIW_OP_REG_MR]		= IB_WC_REG_MR,
>>> +	[SIW_OP_RECEIVE]	= IB_WC_RECV,
>>> +	[SIW_OP_READ_RESPONSE]	= -1 /* not used */
>>> +};
>>> +
>>> +static struct {
>>> +	enum siw_opcode   siw;
>>> +	enum ib_wc_status ib;
>>> +} map_cqe_status[SIW_NUM_WC_STATUS] = {
>>> +	{SIW_WC_SUCCESS,		IB_WC_SUCCESS},
>>> +	{SIW_WC_LOC_LEN_ERR,		IB_WC_LOC_LEN_ERR},
>>> +	{SIW_WC_LOC_PROT_ERR,		IB_WC_LOC_PROT_ERR},
>>> +	{SIW_WC_LOC_QP_OP_ERR,		IB_WC_LOC_QP_OP_ERR},
>>> +	{SIW_WC_WR_FLUSH_ERR,		IB_WC_WR_FLUSH_ERR},
>>> +	{SIW_WC_BAD_RESP_ERR,		IB_WC_BAD_RESP_ERR},
>>> +	{SIW_WC_LOC_ACCESS_ERR,		IB_WC_LOC_ACCESS_ERR},
>>> +	{SIW_WC_REM_ACCESS_ERR,		IB_WC_REM_ACCESS_ERR},
>>> +	{SIW_WC_REM_INV_REQ_ERR,	IB_WC_REM_INV_REQ_ERR},
>>> +	{SIW_WC_GENERAL_ERR,		IB_WC_GENERAL_ERR}
>>> +};
>>> +
>>> +/*
>>> + * Reap one CQE from the CQ. Only used by kernel clients
>>> + * during CQ normal operation. Might be called during CQ
>>> + * flush for user mapped CQE array as well.
>>> + */
>>> +int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
>>> +{
>>> +	struct siw_cqe *cqe;
>>> +	unsigned long flags;
>>> +
>>> +	spin_lock_irqsave(&cq->lock, flags);
>>> +
>>> +	cqe = &cq->queue[cq->cq_get % cq->num_cqe];
>>> +	if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) {
>>> +		memset(wc, 0, sizeof(*wc));
>>> +		wc->wr_id = cqe->id;
>>> +		wc->status = map_cqe_status[cqe->status].ib;
>>> +		wc->opcode = map_wc_opcode[cqe->opcode];
>>> +		wc->byte_len = cqe->bytes;
>> 
>> Don't you need to fill wc->wc_flags as well?
> 
> good point.
> all zero here, since no immediate data are implemented
> in the version I am pushing. We have a prototype implementing
> RFC 7306 (atomics and immediate data), but let's do such
> an update later, if we got basic siw accepted.
> 
> Other completion flags are not relevant as well.

Hum. Does that mean remote invalidation is not supported?


>>> +
>>> +		/*
>>> +		 * During CQ flush, also user land CQE's may
>>> +		 * get reaped here, which do not hold a QP
>>> +		 * reference.
>>> +		 */
>>> +		if (likely(cq->kernel_verbs)) {
>>> +			wc->qp = &((struct siw_qp *)cqe->qp)->base_qp;
>>> +			siw_dbg_obj(cq, "[QP %d]: reap wqe type %d, idx %d\n",
>>> +				    QP_ID(cqe->qp), cqe->opcode,
>>> +				    cq->cq_get % cq->num_cqe);
>>> +			siw_qp_put(cqe->qp);
>>> +		}
>>> +		WRITE_ONCE(cqe->flags, 0);
>>> +		cq->cq_get++;
>>> +
>>> +		spin_unlock_irqrestore(&cq->lock, flags);
>>> +
>>> +		return 1;
>>> +	}
>>> +	spin_unlock_irqrestore(&cq->lock, flags);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +void siw_completion_task(unsigned long data)
>>> +{
>>> +	struct ib_cq *base_cq = (struct ib_cq *)data;
>>> +
>>> +	(*base_cq->comp_handler)(base_cq, base_cq->cq_context);
>>> +}
>>> +
>>> +/*
>>> + * siw_cq_flush()
>>> + *
>>> + * Flush all CQ elements.
>>> + */
>>> +void siw_cq_flush(struct siw_cq *cq)
>>> +{
>>> +	struct ib_wc wc;
>>> +	int got;
>>> +
>>> +	siw_dbg(cq->hdr.sdev, "[CQ %d]: enter\n", OBJ_ID(cq));
>>> +
>>> +	do {
>>> +		got = siw_reap_cqe(cq, &wc);
>>> +	} while (got > 0);
>>> +}
>>> 
>> 
>> Reviewed-by: Gal Pressman <galpress@amazon.com>

--
Chuck Lever
Bernard Metzler Feb. 28, 2019, 3:41 p.m. UTC | #4
-----"Chuck Lever" <chuck.lever@oracle.com> wrote: -----

>To: "Bernard Metzler" <BMT@zurich.ibm.com>
>From: "Chuck Lever" <chuck.lever@oracle.com>
>Date: 02/28/2019 03:57PM
>Cc: "Gal Pressman" <galpress@amazon.com>, linux-rdma@vger.kernel.org
>Subject: Re: [PATCH v5 11/13] SIW completion queue methods
>
>
>> On Feb 28, 2019, at 9:36 AM, Bernard Metzler <BMT@zurich.ibm.com>
>wrote:
>> 
>> -----"Gal Pressman" <galpress@amazon.com> wrote: -----
>> 
>>> To: "Bernard Metzler" <bmt@zurich.ibm.com>,
>>> <linux-rdma@vger.kernel.org>
>>> From: "Gal Pressman" <galpress@amazon.com>
>>> Date: 02/28/2019 03:15PM
>>> Subject: Re: [PATCH v5 11/13] SIW completion queue methods
>>> 
>>> On 19-Feb-19 12:09, Bernard Metzler wrote:
>>>> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
>>>> ---
>>>> drivers/infiniband/sw/siw/siw_cq.c | 149
>>> +++++++++++++++++++++++++++++
>>>> 1 file changed, 149 insertions(+)
>>>> create mode 100644 drivers/infiniband/sw/siw/siw_cq.c
>>>> 
>>>> diff --git a/drivers/infiniband/sw/siw/siw_cq.c
>>> b/drivers/infiniband/sw/siw/siw_cq.c
>>>> new file mode 100644
>>>> index 000000000000..28c436f43597
>>>> --- /dev/null
>>>> +++ b/drivers/infiniband/sw/siw/siw_cq.c
>>>> @@ -0,0 +1,149 @@
>>>> +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
>>>> +/*
>>>> + * Software iWARP device driver
>>>> + *
>>>> + * Authors: Bernard Metzler <bmt@zurich.ibm.com>
>>>> + *
>>>> + * Copyright (c) 2008-2018, IBM Corporation
>>>> + *
>>>> + * This software is available to you under a choice of one of
>two
>>>> + * licenses. You may choose to be licensed under the terms of
>the
>>> GNU
>>>> + * General Public License (GPL) Version 2, available from the
>file
>>>> + * COPYING in the main directory of this source tree, or the
>>>> + * BSD license below:
>>>> + *
>>>> + *   Redistribution and use in source and binary forms, with or
>>>> + *   without modification, are permitted provided that the
>>> following
>>>> + *   conditions are met:
>>>> + *
>>>> + *   - Redistributions of source code must retain the above
>>> copyright notice,
>>>> + *     this list of conditions and the following disclaimer.
>>>> + *
>>>> + *   - Redistributions in binary form must reproduce the above
>>> copyright
>>>> + *     notice, this list of conditions and the following
>>> disclaimer in the
>>>> + *     documentation and/or other materials provided with the
>>> distribution.
>>>> + *
>>>> + *   - Neither the name of IBM nor the names of its contributors
>>> may be
>>>> + *     used to endorse or promote products derived from this
>>> software without
>>>> + *     specific prior written permission.
>>>> + *
>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
>KIND,
>>>> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
>WARRANTIES
>>> OF
>>>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
>>>> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
>>> HOLDERS
>>>> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
>IN
>>> AN
>>>> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
>OR
>>> IN
>>>> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>>> THE
>>>> + * SOFTWARE.
>>>> + */
>>>> +
>>>> +#include <linux/errno.h>
>>>> +#include <linux/types.h>
>>>> +#include <linux/list.h>
>>>> +
>>>> +#include <rdma/iw_cm.h>
>>>> +#include <rdma/ib_verbs.h>
>>>> +#include <rdma/ib_smi.h>
>>>> +#include <rdma/ib_user_verbs.h>
>>>> +
>>>> +#include "siw.h"
>>>> +#include "siw_obj.h"
>>>> +#include "siw_cm.h"
>>>> +
>>>> +static int map_wc_opcode[SIW_NUM_OPCODES] = {
>>>> +	[SIW_OP_WRITE]		= IB_WC_RDMA_WRITE,
>>>> +	[SIW_OP_SEND]		= IB_WC_SEND,
>>>> +	[SIW_OP_SEND_WITH_IMM]	= IB_WC_SEND,
>>>> +	[SIW_OP_READ]		= IB_WC_RDMA_READ,
>>>> +	[SIW_OP_READ_LOCAL_INV]	= IB_WC_RDMA_READ,
>>>> +	[SIW_OP_COMP_AND_SWAP]	= IB_WC_COMP_SWAP,
>>>> +	[SIW_OP_FETCH_AND_ADD]	= IB_WC_FETCH_ADD,
>>>> +	[SIW_OP_INVAL_STAG]	= IB_WC_LOCAL_INV,
>>>> +	[SIW_OP_REG_MR]		= IB_WC_REG_MR,
>>>> +	[SIW_OP_RECEIVE]	= IB_WC_RECV,
>>>> +	[SIW_OP_READ_RESPONSE]	= -1 /* not used */
>>>> +};
>>>> +
>>>> +static struct {
>>>> +	enum siw_opcode   siw;
>>>> +	enum ib_wc_status ib;
>>>> +} map_cqe_status[SIW_NUM_WC_STATUS] = {
>>>> +	{SIW_WC_SUCCESS,		IB_WC_SUCCESS},
>>>> +	{SIW_WC_LOC_LEN_ERR,		IB_WC_LOC_LEN_ERR},
>>>> +	{SIW_WC_LOC_PROT_ERR,		IB_WC_LOC_PROT_ERR},
>>>> +	{SIW_WC_LOC_QP_OP_ERR,		IB_WC_LOC_QP_OP_ERR},
>>>> +	{SIW_WC_WR_FLUSH_ERR,		IB_WC_WR_FLUSH_ERR},
>>>> +	{SIW_WC_BAD_RESP_ERR,		IB_WC_BAD_RESP_ERR},
>>>> +	{SIW_WC_LOC_ACCESS_ERR,		IB_WC_LOC_ACCESS_ERR},
>>>> +	{SIW_WC_REM_ACCESS_ERR,		IB_WC_REM_ACCESS_ERR},
>>>> +	{SIW_WC_REM_INV_REQ_ERR,	IB_WC_REM_INV_REQ_ERR},
>>>> +	{SIW_WC_GENERAL_ERR,		IB_WC_GENERAL_ERR}
>>>> +};
>>>> +
>>>> +/*
>>>> + * Reap one CQE from the CQ. Only used by kernel clients
>>>> + * during CQ normal operation. Might be called during CQ
>>>> + * flush for user mapped CQE array as well.
>>>> + */
>>>> +int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
>>>> +{
>>>> +	struct siw_cqe *cqe;
>>>> +	unsigned long flags;
>>>> +
>>>> +	spin_lock_irqsave(&cq->lock, flags);
>>>> +
>>>> +	cqe = &cq->queue[cq->cq_get % cq->num_cqe];
>>>> +	if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) {
>>>> +		memset(wc, 0, sizeof(*wc));
>>>> +		wc->wr_id = cqe->id;
>>>> +		wc->status = map_cqe_status[cqe->status].ib;
>>>> +		wc->opcode = map_wc_opcode[cqe->opcode];
>>>> +		wc->byte_len = cqe->bytes;
>>> 
>>> Don't you need to fill wc->wc_flags as well?
>> 
>> good point.
>> all zero here, since no immediate data are implemented
>> in the version I am pushing. We have a prototype implementing
>> RFC 7306 (atomics and immediate data), but let's do such
>> an update later, if we got basic siw accepted.
>> 
>> Other completion flags are not relevant as well.
>
>Hum. Does that mean remote invalidation is not supported?

OO! It is... So I need IB_WC_WITH_INVALIDATE support.

NVMeF stuff seem to be forgiving...

>
>
>>>> +
>>>> +		/*
>>>> +		 * During CQ flush, also user land CQE's may
>>>> +		 * get reaped here, which do not hold a QP
>>>> +		 * reference.
>>>> +		 */
>>>> +		if (likely(cq->kernel_verbs)) {
>>>> +			wc->qp = &((struct siw_qp *)cqe->qp)->base_qp;
>>>> +			siw_dbg_obj(cq, "[QP %d]: reap wqe type %d, idx %d\n",
>>>> +				    QP_ID(cqe->qp), cqe->opcode,
>>>> +				    cq->cq_get % cq->num_cqe);
>>>> +			siw_qp_put(cqe->qp);
>>>> +		}
>>>> +		WRITE_ONCE(cqe->flags, 0);
>>>> +		cq->cq_get++;
>>>> +
>>>> +		spin_unlock_irqrestore(&cq->lock, flags);
>>>> +
>>>> +		return 1;
>>>> +	}
>>>> +	spin_unlock_irqrestore(&cq->lock, flags);
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +void siw_completion_task(unsigned long data)
>>>> +{
>>>> +	struct ib_cq *base_cq = (struct ib_cq *)data;
>>>> +
>>>> +	(*base_cq->comp_handler)(base_cq, base_cq->cq_context);
>>>> +}
>>>> +
>>>> +/*
>>>> + * siw_cq_flush()
>>>> + *
>>>> + * Flush all CQ elements.
>>>> + */
>>>> +void siw_cq_flush(struct siw_cq *cq)
>>>> +{
>>>> +	struct ib_wc wc;
>>>> +	int got;
>>>> +
>>>> +	siw_dbg(cq->hdr.sdev, "[CQ %d]: enter\n", OBJ_ID(cq));
>>>> +
>>>> +	do {
>>>> +		got = siw_reap_cqe(cq, &wc);
>>>> +	} while (got > 0);
>>>> +}
>>>> 
>>> 
>>> Reviewed-by: Gal Pressman <galpress@amazon.com>
>
>--
>Chuck Lever
>
>
>
>
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c
new file mode 100644
index 000000000000..28c436f43597
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw_cq.c
@@ -0,0 +1,149 @@ 
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+/*
+ * Software iWARP device driver
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2018, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/list.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "siw.h"
+#include "siw_obj.h"
+#include "siw_cm.h"
+
+static int map_wc_opcode[SIW_NUM_OPCODES] = {
+	[SIW_OP_WRITE]		= IB_WC_RDMA_WRITE,
+	[SIW_OP_SEND]		= IB_WC_SEND,
+	[SIW_OP_SEND_WITH_IMM]	= IB_WC_SEND,
+	[SIW_OP_READ]		= IB_WC_RDMA_READ,
+	[SIW_OP_READ_LOCAL_INV]	= IB_WC_RDMA_READ,
+	[SIW_OP_COMP_AND_SWAP]	= IB_WC_COMP_SWAP,
+	[SIW_OP_FETCH_AND_ADD]	= IB_WC_FETCH_ADD,
+	[SIW_OP_INVAL_STAG]	= IB_WC_LOCAL_INV,
+	[SIW_OP_REG_MR]		= IB_WC_REG_MR,
+	[SIW_OP_RECEIVE]	= IB_WC_RECV,
+	[SIW_OP_READ_RESPONSE]	= -1 /* not used */
+};
+
+static struct {
+	enum siw_opcode   siw;
+	enum ib_wc_status ib;
+} map_cqe_status[SIW_NUM_WC_STATUS] = {
+	{SIW_WC_SUCCESS,		IB_WC_SUCCESS},
+	{SIW_WC_LOC_LEN_ERR,		IB_WC_LOC_LEN_ERR},
+	{SIW_WC_LOC_PROT_ERR,		IB_WC_LOC_PROT_ERR},
+	{SIW_WC_LOC_QP_OP_ERR,		IB_WC_LOC_QP_OP_ERR},
+	{SIW_WC_WR_FLUSH_ERR,		IB_WC_WR_FLUSH_ERR},
+	{SIW_WC_BAD_RESP_ERR,		IB_WC_BAD_RESP_ERR},
+	{SIW_WC_LOC_ACCESS_ERR,		IB_WC_LOC_ACCESS_ERR},
+	{SIW_WC_REM_ACCESS_ERR,		IB_WC_REM_ACCESS_ERR},
+	{SIW_WC_REM_INV_REQ_ERR,	IB_WC_REM_INV_REQ_ERR},
+	{SIW_WC_GENERAL_ERR,		IB_WC_GENERAL_ERR}
+};
+
+/*
+ * Reap one CQE from the CQ. Only used by kernel clients
+ * during CQ normal operation. Might be called during CQ
+ * flush for user mapped CQE array as well.
+ */
+int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
+{
+	struct siw_cqe *cqe;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cq->lock, flags);
+
+	cqe = &cq->queue[cq->cq_get % cq->num_cqe];
+	if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) {
+		memset(wc, 0, sizeof(*wc));
+		wc->wr_id = cqe->id;
+		wc->status = map_cqe_status[cqe->status].ib;
+		wc->opcode = map_wc_opcode[cqe->opcode];
+		wc->byte_len = cqe->bytes;
+
+		/*
+		 * During CQ flush, also user land CQE's may
+		 * get reaped here, which do not hold a QP
+		 * reference.
+		 */
+		if (likely(cq->kernel_verbs)) {
+			wc->qp = &((struct siw_qp *)cqe->qp)->base_qp;
+			siw_dbg_obj(cq, "[QP %d]: reap wqe type %d, idx %d\n",
+				    QP_ID(cqe->qp), cqe->opcode,
+				    cq->cq_get % cq->num_cqe);
+			siw_qp_put(cqe->qp);
+		}
+		WRITE_ONCE(cqe->flags, 0);
+		cq->cq_get++;
+
+		spin_unlock_irqrestore(&cq->lock, flags);
+
+		return 1;
+	}
+	spin_unlock_irqrestore(&cq->lock, flags);
+
+	return 0;
+}
+
+void siw_completion_task(unsigned long data)
+{
+	struct ib_cq *base_cq = (struct ib_cq *)data;
+
+	(*base_cq->comp_handler)(base_cq, base_cq->cq_context);
+}
+
+/*
+ * siw_cq_flush()
+ *
+ * Flush all CQ elements.
+ */
+void siw_cq_flush(struct siw_cq *cq)
+{
+	struct ib_wc wc;
+	int got;
+
+	siw_dbg(cq->hdr.sdev, "[CQ %d]: enter\n", OBJ_ID(cq));
+
+	do {
+		got = siw_reap_cqe(cq, &wc);
+	} while (got > 0);
+}