diff mbox

[v2,04/13] SoftiWarp object management

Message ID 20171006122853.16310-5-bmt@zurich.ibm.com (mailing list archive)
State Changes Requested
Headers show

Commit Message

Bernard Metzler Oct. 6, 2017, 12:28 p.m. UTC
Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
---
 drivers/infiniband/sw/siw/siw_obj.c | 428 ++++++++++++++++++++++++++++++++++++
 drivers/infiniband/sw/siw/siw_obj.h | 113 ++++++++++
 2 files changed, 541 insertions(+)
 create mode 100644 drivers/infiniband/sw/siw/siw_obj.c
 create mode 100644 drivers/infiniband/sw/siw/siw_obj.h

Comments

Leon Romanovsky Oct. 8, 2017, 12:28 p.m. UTC | #1
On Fri, Oct 06, 2017 at 08:28:44AM -0400, Bernard Metzler wrote:
> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
> ---
>  drivers/infiniband/sw/siw/siw_obj.c | 428 ++++++++++++++++++++++++++++++++++++
>  drivers/infiniband/sw/siw/siw_obj.h | 113 ++++++++++
>  2 files changed, 541 insertions(+)
>  create mode 100644 drivers/infiniband/sw/siw/siw_obj.c
>  create mode 100644 drivers/infiniband/sw/siw/siw_obj.h
>
> diff --git a/drivers/infiniband/sw/siw/siw_obj.c b/drivers/infiniband/sw/siw/siw_obj.c
> new file mode 100644
> index 000000000000..a6d28773e09d
> --- /dev/null
> +++ b/drivers/infiniband/sw/siw/siw_obj.c
> @@ -0,0 +1,428 @@
> +/*
> + * Software iWARP device driver for Linux

No need to add "Linux" for the Linux Driver code in the Linux Kernel.

> + *
> + * Authors: Bernard Metzler <bmt@zurich.ibm.com>
> + *
> + * Copyright (c) 2008-2017, IBM Corporation
> + *
> + * This software is available to you under a choice of one of two
> + * licenses. You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * BSD license below:
> + *
> + *   Redistribution and use in source and binary forms, with or
> + *   without modification, are permitted provided that the following
> + *   conditions are met:
> + *
> + *   - Redistributions of source code must retain the above copyright notice,
> + *     this list of conditions and the following disclaimer.
> + *
> + *   - Redistributions in binary form must reproduce the above copyright
> + *     notice, this list of conditions and the following disclaimer in the
> + *     documentation and/or other materials provided with the distribution.
> + *
> + *   - Neither the name of IBM nor the names of its contributors may be
> + *     used to endorse or promote products derived from this software without
> + *     specific prior written permission.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */
> +
> +#include <linux/spinlock.h>
> +#include <linux/kref.h>
> +#include <linux/vmalloc.h>
> +
> +#include "siw.h"
> +#include "siw_obj.h"
> +#include "siw_cm.h"
> +
> +
> +void siw_objhdr_init(struct siw_objhdr *hdr)
> +{
> +	kref_init(&hdr->ref);
> +}
> +
> +void siw_idr_init(struct siw_dev *sdev)
> +{
> +	spin_lock_init(&sdev->idr_lock);
> +
> +	idr_init(&sdev->qp_idr);
> +	idr_init(&sdev->cq_idr);
> +	idr_init(&sdev->pd_idr);
> +	idr_init(&sdev->mem_idr);
> +}
> +
> +void siw_idr_release(struct siw_dev *sdev)
> +{
> +	idr_destroy(&sdev->qp_idr);
> +	idr_destroy(&sdev->cq_idr);
> +	idr_destroy(&sdev->pd_idr);
> +	idr_destroy(&sdev->mem_idr);
> +}

Why do you need need idr_* calls and why can't IB/core idr_* management
be enough? I didn't review the various *_obj functions.

> +
> +static inline int siw_add_obj(spinlock_t *lock, struct idr *idr,
> +			      struct siw_objhdr *obj)

Please don't add inline functions in C files.

> +{
> +	unsigned long flags;
> +	int id, pre_id;
> +
> +	do {
> +		get_random_bytes(&pre_id, sizeof(pre_id));
> +		pre_id &= 0xffffff;
> +	} while (pre_id == 0);
> +again:
> +	spin_lock_irqsave(lock, flags);
> +	id = idr_alloc(idr, obj, pre_id, 0xffffff - 1, GFP_KERNEL);
> +	spin_unlock_irqrestore(lock, flags);
> +
> +	if (id > 0) {
> +		siw_objhdr_init(obj);
> +		obj->id = id;
> +		dprint(DBG_OBJ, "(OBJ%d): IDR New Object\n", id);

Please don't reinvent pr_debug infrastructure. There is no need in
custom dprint(..) logic.

> +	} else if (id == -ENOSPC && pre_id != 1) {
> +		pre_id = 1;
> +		goto again;
> +	} else {
> +		BUG_ON(id == 0);

No BUG_ON in new code.

> +		dprint(DBG_OBJ|DBG_ON, "(OBJ??): IDR New Object failed!\n");
> +	}
> +	return id > 0 ? 0 : id;
> +}
> +
Doug Ledford Oct. 13, 2017, 1:01 a.m. UTC | #2
On 10/8/2017 8:28 AM, Leon Romanovsky wrote:
>> +	} else if (id == -ENOSPC && pre_id != 1) {
>> +		pre_id = 1;
>> +		goto again;
>> +	} else {
>> +		BUG_ON(id == 0);
> No BUG_ON in new code.
> 
>> +		dprint(DBG_OBJ|DBG_ON, "(OBJ??): IDR New Object failed!\n");
>> +	}
>> +	return id > 0 ? 0 : id;
>> +}

Not to mention that this segment displays why BUG_ON() is so bad.  You
have issued a BUG_ON() and then on the very next line you have a
dprint() as if there is anything else that happens after a BUG_ON().
There isn't.  By definition, with a BUG_ON(), you have killed the entire
machine.  You will never get to the dprint() line, it is completely
unreachable code.  Unless you run across something the likes of a
hardware device that is completely out of control, that is DMAing to
random address, and that you can't stop, BUG_ON is not an appropriate
solution.  You should only BUG_ON if continuing to run will render the
machine randomly corrupted and jeopardize writing that corruption to
disk and wiping the persistent system data out.  Short of that, you need
to find a way to gracefully handle the situation and continue.  It may
mean shutting your device down forcefully so that the rest of the
machine may live, but that's OK.  Shutting the entire machine down is not.
Bernard Metzler Oct. 14, 2017, 12:34 a.m. UTC | #3
-----Doug Ledford <dledford@redhat.com> wrote: -----

>To: Leon Romanovsky <leon@kernel.org>, Bernard Metzler
><bmt@zurich.ibm.com>
>From: Doug Ledford <dledford@redhat.com>
>Date: 10/13/2017 03:01AM
>Cc: linux-rdma@vger.kernel.org
>Subject: Re: [PATCH v2 04/13] SoftiWarp object management
>
>On 10/8/2017 8:28 AM, Leon Romanovsky wrote:
>>> +	} else if (id == -ENOSPC && pre_id != 1) {
>>> +		pre_id = 1;
>>> +		goto again;
>>> +	} else {
>>> +		BUG_ON(id == 0);
>> No BUG_ON in new code.
>> 
>>> +		dprint(DBG_OBJ|DBG_ON, "(OBJ??): IDR New Object failed!\n");
>>> +	}
>>> +	return id > 0 ? 0 : id;
>>> +}
>
>Not to mention that this segment displays why BUG_ON() is so bad.
>You
>have issued a BUG_ON() and then on the very next line you have a
>dprint() as if there is anything else that happens after a BUG_ON().
>There isn't.  By definition, with a BUG_ON(), you have killed the
>entire
>machine.  You will never get to the dprint() line, it is completely
>unreachable code.  Unless you run across something the likes of a
>hardware device that is completely out of control, that is DMAing to
>random address, and that you can't stop, BUG_ON is not an appropriate
>solution.  You should only BUG_ON if continuing to run will render
>the
>machine randomly corrupted and jeopardize writing that corruption to
>disk and wiping the persistent system data out.  Short of that, you
>need
>to find a way to gracefully handle the situation and continue.  It
>may
>mean shutting your device down forcefully so that the rest of the
>machine may live, but that's OK.  Shutting the entire machine down is
>not.
>
>-- 
>Doug Ledford <dledford@redhat.com>
>    GPG Key ID: B826A3330E572FDD
>    Key fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57
>2FDD
>
>
[attachment "signature.asc" removed by Bernard Metzler/Zurich/IBM]

Hi Doug,

Thanks for finding that. Its a bug in my code - besides of using the BUG()
statement. I am aware that BUG() eventually halts the machine.

Thank you!
Bernard.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bernard Metzler Oct. 14, 2017, 1:07 a.m. UTC | #4
>To: Bernard Metzler <bmt@zurich.ibm.com>
>From: Leon Romanovsky 
>Sent by: linux-rdma-owner@vger.kernel.org
>Date: 10/08/2017 02:28PM
>Cc: linux-rdma@vger.kernel.org
>Subject: Re: [PATCH v2 04/13] SoftiWarp object management
>
>On Fri, Oct 06, 2017 at 08:28:44AM -0400, Bernard Metzler wrote:
>> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
>> ---
>>  drivers/infiniband/sw/siw/siw_obj.c | 428
>++++++++++++++++++++++++++++++++++++
>>  drivers/infiniband/sw/siw/siw_obj.h | 113 ++++++++++
>>  2 files changed, 541 insertions(+)
>>  create mode 100644 drivers/infiniband/sw/siw/siw_obj.c
>>  create mode 100644 drivers/infiniband/sw/siw/siw_obj.h
>>
>> diff --git a/drivers/infiniband/sw/siw/siw_obj.c
>b/drivers/infiniband/sw/siw/siw_obj.c
>> new file mode 100644
>> index 000000000000..a6d28773e09d
>> --- /dev/null
>> +++ b/drivers/infiniband/sw/siw/siw_obj.c
>> @@ -0,0 +1,428 @@
>> +/*
>> + * Software iWARP device driver for Linux
>
>No need to add "Linux" for the Linux Driver code in the Linux Kernel.

good point. I will remove tat.
>
>> + *
>> + * Authors: Bernard Metzler <bmt@zurich.ibm.com>
>> + *
>> + * Copyright (c) 2008-2017, IBM Corporation
>> + *
>> + * This software is available to you under a choice of one of two
>> + * licenses. You may choose to be licensed under the terms of the
>GNU
>> + * General Public License (GPL) Version 2, available from the file
>> + * COPYING in the main directory of this source tree, or the
>> + * BSD license below:
>> + *
>> + *   Redistribution and use in source and binary forms, with or
>> + *   without modification, are permitted provided that the
>following
>> + *   conditions are met:
>> + *
>> + *   - Redistributions of source code must retain the above
>copyright notice,
>> + *     this list of conditions and the following disclaimer.
>> + *
>> + *   - Redistributions in binary form must reproduce the above
>copyright
>> + *     notice, this list of conditions and the following
>disclaimer in the
>> + *     documentation and/or other materials provided with the
>distribution.
>> + *
>> + *   - Neither the name of IBM nor the names of its contributors
>may be
>> + *     used to endorse or promote products derived from this
>software without
>> + *     specific prior written permission.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
>OF
>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
>> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
>HOLDERS
>> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
>AN
>> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
>IN
>> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>THE
>> + * SOFTWARE.
>> + */
>> +
>> +#include <linux/spinlock.h>
>> +#include <linux/kref.h>
>> +#include <linux/vmalloc.h>
>> +
>> +#include "siw.h"
>> +#include "siw_obj.h"
>> +#include "siw_cm.h"
>> +
>> +
>> +void siw_objhdr_init(struct siw_objhdr *hdr)
>> +{
>> +	kref_init(&hdr->ref);
>> +}
>> +
>> +void siw_idr_init(struct siw_dev *sdev)
>> +{
>> +	spin_lock_init(&sdev->idr_lock);
>> +
>> +	idr_init(&sdev->qp_idr);
>> +	idr_init(&sdev->cq_idr);
>> +	idr_init(&sdev->pd_idr);
>> +	idr_init(&sdev->mem_idr);
>> +}
>> +
>> +void siw_idr_release(struct siw_dev *sdev)
>> +{
>> +	idr_destroy(&sdev->qp_idr);
>> +	idr_destroy(&sdev->cq_idr);
>> +	idr_destroy(&sdev->pd_idr);
>> +	idr_destroy(&sdev->mem_idr);
>> +}
>
>Why do you need need idr_* calls and why can't IB/core idr_*
>management
>be enough? I didn't review the various *_obj functions.

For memory management, I need that. Memory can be referenced
by in progress operations like READ responses. Memory objects
must be protected until those operations finish. I think this
statement is also true for other resources, like QP's etc.
Let me thimk through it again and come back with a more 
detailed answer.

>
>> +
>> +static inline int siw_add_obj(spinlock_t *lock, struct idr *idr,
>> +			      struct siw_objhdr *obj)
>
>Please don't add inline functions in C files.
>

OK, will fix that.
>> +{
>> +	unsigned long flags;
>> +	int id, pre_id;
>> +
>> +	do {
>> +		get_random_bytes(&pre_id, sizeof(pre_id));
>> +		pre_id &= 0xffffff;
>> +	} while (pre_id == 0);
>> +again:
>> +	spin_lock_irqsave(lock, flags);
>> +	id = idr_alloc(idr, obj, pre_id, 0xffffff - 1, GFP_KERNEL);
>> +	spin_unlock_irqrestore(lock, flags);
>> +
>> +	if (id > 0) {
>> +		siw_objhdr_init(obj);
>> +		obj->id = id;
>> +		dprint(DBG_OBJ, "(OBJ%d): IDR New Object\n", id);
>
>Please don't reinvent pr_debug infrastructure. There is no need in
>custom dprint(..) logic.

this whole dprint logic actually came in to debug RDMA applications.
It proofed to be very useful if one wants to understand why an RDMA
operation fails (e.g. a memory access protection error because of
wrong, key, offset, address, etc.). I see I have to remove it.

>
>> +	} else if (id == -ENOSPC && pre_id != 1) {
>> +		pre_id = 1;
>> +		goto again;
>> +	} else {
>> +		BUG_ON(id == 0);
>
>No BUG_ON in new code.
>
OK

>> +		dprint(DBG_OBJ|DBG_ON, "(OBJ??): IDR New Object failed!\n");
>> +	}
>> +	return id > 0 ? 0 : id;
>> +}
>> +
>

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/sw/siw/siw_obj.c b/drivers/infiniband/sw/siw/siw_obj.c
new file mode 100644
index 000000000000..a6d28773e09d
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw_obj.c
@@ -0,0 +1,428 @@ 
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2017, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/kref.h>
+#include <linux/vmalloc.h>
+
+#include "siw.h"
+#include "siw_obj.h"
+#include "siw_cm.h"
+
+
+void siw_objhdr_init(struct siw_objhdr *hdr)
+{
+	kref_init(&hdr->ref);
+}
+
+void siw_idr_init(struct siw_dev *sdev)
+{
+	spin_lock_init(&sdev->idr_lock);
+
+	idr_init(&sdev->qp_idr);
+	idr_init(&sdev->cq_idr);
+	idr_init(&sdev->pd_idr);
+	idr_init(&sdev->mem_idr);
+}
+
+void siw_idr_release(struct siw_dev *sdev)
+{
+	idr_destroy(&sdev->qp_idr);
+	idr_destroy(&sdev->cq_idr);
+	idr_destroy(&sdev->pd_idr);
+	idr_destroy(&sdev->mem_idr);
+}
+
+static inline int siw_add_obj(spinlock_t *lock, struct idr *idr,
+			      struct siw_objhdr *obj)
+{
+	unsigned long flags;
+	int id, pre_id;
+
+	do {
+		get_random_bytes(&pre_id, sizeof(pre_id));
+		pre_id &= 0xffffff;
+	} while (pre_id == 0);
+again:
+	spin_lock_irqsave(lock, flags);
+	id = idr_alloc(idr, obj, pre_id, 0xffffff - 1, GFP_KERNEL);
+	spin_unlock_irqrestore(lock, flags);
+
+	if (id > 0) {
+		siw_objhdr_init(obj);
+		obj->id = id;
+		dprint(DBG_OBJ, "(OBJ%d): IDR New Object\n", id);
+	} else if (id == -ENOSPC && pre_id != 1) {
+		pre_id = 1;
+		goto again;
+	} else {
+		BUG_ON(id == 0);
+		dprint(DBG_OBJ|DBG_ON, "(OBJ??): IDR New Object failed!\n");
+	}
+	return id > 0 ? 0 : id;
+}
+
+static inline struct siw_objhdr *siw_get_obj(struct idr *idr, int id)
+{
+	struct siw_objhdr *obj = idr_find(idr, id);
+
+	if (obj)
+		kref_get(&obj->ref);
+
+	return obj;
+}
+
+struct siw_cq *siw_cq_id2obj(struct siw_dev *sdev, int id)
+{
+	struct siw_objhdr *obj = siw_get_obj(&sdev->cq_idr, id);
+
+	if (obj)
+		return container_of(obj, struct siw_cq, hdr);
+
+	return NULL;
+}
+
+struct siw_qp *siw_qp_id2obj(struct siw_dev *sdev, int id)
+{
+	struct siw_objhdr *obj = siw_get_obj(&sdev->qp_idr, id);
+
+	if (obj)
+		return container_of(obj, struct siw_qp, hdr);
+
+	return NULL;
+}
+
+/*
+ * siw_mem_id2obj()
+ *
+ * resolves memory from stag given by id. might be called from:
+ * o process context before sending out of sgl, or
+ * o in softirq when resolving target memory
+ */
+struct siw_mem *siw_mem_id2obj(struct siw_dev *sdev, int id)
+{
+	struct siw_objhdr *obj;
+
+	rcu_read_lock();
+	obj = siw_get_obj(&sdev->mem_idr, id);
+	rcu_read_unlock();
+
+	if (obj) {
+		dprint(DBG_MM|DBG_OBJ, "(MEM%d): New refcount: %d\n",
+		       obj->id, refcount_read(&obj->ref));
+
+		return container_of(obj, struct siw_mem, hdr);
+	}
+	dprint(DBG_MM|DBG_OBJ|DBG_ON, "(MEM%d): not found!\n", id);
+
+	return NULL;
+}
+
+int siw_qp_add(struct siw_dev *sdev, struct siw_qp *qp)
+{
+	int rv = siw_add_obj(&sdev->idr_lock, &sdev->qp_idr, &qp->hdr);
+
+	if (!rv) {
+		dprint(DBG_OBJ, "(QP%d): New Object\n", QP_ID(qp));
+		qp->hdr.sdev = sdev;
+	}
+	return rv;
+}
+
+int siw_cq_add(struct siw_dev *sdev, struct siw_cq *cq)
+{
+	int rv = siw_add_obj(&sdev->idr_lock, &sdev->cq_idr, &cq->hdr);
+
+	if (!rv) {
+		dprint(DBG_OBJ, "(CQ%d): New Object\n", cq->hdr.id);
+		cq->hdr.sdev = sdev;
+	}
+	return rv;
+}
+
+int siw_pd_add(struct siw_dev *sdev, struct siw_pd *pd)
+{
+	int rv = siw_add_obj(&sdev->idr_lock, &sdev->pd_idr, &pd->hdr);
+
+	if (!rv) {
+		dprint(DBG_OBJ, "(PD%d): New Object\n", pd->hdr.id);
+		pd->hdr.sdev = sdev;
+	}
+	return rv;
+}
+
+/*
+ * Stag lookup is based on its index part only (24 bits).
+ * The code avoids special Stag of zero and tries to randomize
+ * STag values between 1 and SIW_STAG_MAX.
+ */
+int siw_mem_add(struct siw_dev *sdev, struct siw_mem *m)
+{
+	unsigned long flags;
+	int id, pre_id;
+
+	do {
+		get_random_bytes(&pre_id, sizeof(pre_id));
+		pre_id &= 0xffffff;
+	} while (pre_id == 0);
+again:
+	spin_lock_irqsave(&sdev->idr_lock, flags);
+	id = idr_alloc(&sdev->mem_idr, m, pre_id, SIW_STAG_MAX, GFP_KERNEL);
+	spin_unlock_irqrestore(&sdev->idr_lock, flags);
+
+	if (id == -ENOSPC || id > SIW_STAG_MAX) {
+		if (pre_id == 1) {
+			dprint(DBG_OBJ|DBG_MM|DBG_ON,
+				"(IDR): New Object failed: %d\n", pre_id);
+			return -ENOSPC;
+		}
+		pre_id = 1;
+		goto again;
+	}
+	siw_objhdr_init(&m->hdr);
+	m->hdr.id = id;
+	m->hdr.sdev = sdev;
+	dprint(DBG_OBJ|DBG_MM, "(IDR%d): New Object\n", id);
+
+	return 0;
+}
+
+void siw_remove_obj(spinlock_t *lock, struct idr *idr,
+		      struct siw_objhdr *hdr)
+{
+	unsigned long	flags;
+
+	dprint(DBG_OBJ, "(OBJ%d): IDR Remove Object\n", hdr->id);
+
+	spin_lock_irqsave(lock, flags);
+	idr_remove(idr, hdr->id);
+	spin_unlock_irqrestore(lock, flags);
+}
+
+
+/********** routines to put objs back and free if no ref left *****/
+
+static void siw_free_cq(struct kref *ref)
+{
+	struct siw_cq *cq =
+		(container_of(container_of(ref, struct siw_objhdr, ref),
+			      struct siw_cq, hdr));
+
+	dprint(DBG_OBJ, "(CQ%d): Free Object\n", cq->hdr.id);
+
+	atomic_dec(&cq->hdr.sdev->num_cq);
+	if (cq->queue)
+		vfree(cq->queue);
+	kfree(cq);
+}
+
+static void siw_free_qp(struct kref *ref)
+{
+	struct siw_qp	*qp =
+		container_of(container_of(ref, struct siw_objhdr, ref),
+			     struct siw_qp, hdr);
+	struct siw_dev	*sdev = qp->hdr.sdev;
+	unsigned long flags;
+
+	dprint(DBG_OBJ|DBG_CM, "(QP%d): Free Object\n", QP_ID(qp));
+
+	if (qp->cep)
+		siw_cep_put(qp->cep);
+
+	siw_remove_obj(&sdev->idr_lock, &sdev->qp_idr, &qp->hdr);
+
+	spin_lock_irqsave(&sdev->idr_lock, flags);
+	list_del(&qp->devq);
+	spin_unlock_irqrestore(&sdev->idr_lock, flags);
+
+	if (qp->sendq)
+		vfree(qp->sendq);
+	if (qp->recvq)
+		vfree(qp->recvq);
+	if (qp->irq)
+		vfree(qp->irq);
+	if (qp->orq)
+		vfree(qp->orq);
+
+	atomic_dec(&sdev->num_qp);
+	kfree(qp);
+}
+
+static void siw_free_pd(struct kref *ref)
+{
+	struct siw_pd	*pd =
+		container_of(container_of(ref, struct siw_objhdr, ref),
+			     struct siw_pd, hdr);
+
+	dprint(DBG_OBJ, "(PD%d): Free Object\n", pd->hdr.id);
+
+	atomic_dec(&pd->hdr.sdev->num_pd);
+	kfree(pd);
+}
+
+static void siw_free_mem(struct kref *ref)
+{
+	struct siw_mem *m;
+
+	m = container_of(container_of(ref, struct siw_objhdr, ref),
+			 struct siw_mem, hdr);
+
+	dprint(DBG_MM|DBG_OBJ, "(MEM%d): Free\n", OBJ_ID(m));
+
+	atomic_dec(&m->hdr.sdev->num_mem);
+
+	if (SIW_MEM_IS_MW(m)) {
+		struct siw_mw *mw = container_of(m, struct siw_mw, mem);
+
+		kfree_rcu(mw, rcu);
+	} else {
+		struct siw_mr *mr = container_of(m, struct siw_mr, mem);
+
+		dprint(DBG_MM|DBG_OBJ, "(MEM%d): Release obj %p, (PBL %d)\n",
+			OBJ_ID(m), mr->mem_obj, mr->mem.is_pbl ? 1 : 0);
+		if (mr->mem_obj) {
+			if (mr->mem.is_pbl == 0)
+				siw_umem_release(mr->umem);
+			else
+				siw_pbl_free(mr->pbl);
+		}
+		kfree_rcu(mr, rcu);
+	}
+}
+
+
+void siw_cq_put(struct siw_cq *cq)
+{
+	dprint(DBG_OBJ, "(CQ%d): Old refcount: %d\n",
+		OBJ_ID(cq), refcount_read(&cq->hdr.ref));
+	kref_put(&cq->hdr.ref, siw_free_cq);
+}
+
+void siw_qp_put(struct siw_qp *qp)
+{
+	dprint(DBG_OBJ, "(QP%d): Old refcount: %d\n",
+		QP_ID(qp), refcount_read(&qp->hdr.ref));
+	kref_put(&qp->hdr.ref, siw_free_qp);
+}
+
+void siw_pd_put(struct siw_pd *pd)
+{
+	dprint(DBG_OBJ, "(PD%d): Old refcount: %d\n",
+		OBJ_ID(pd), refcount_read(&pd->hdr.ref));
+	kref_put(&pd->hdr.ref, siw_free_pd);
+}
+
+void siw_mem_put(struct siw_mem *m)
+{
+	dprint(DBG_MM|DBG_OBJ, "(MEM%d): Old refcount: %d\n",
+		OBJ_ID(m), refcount_read(&m->hdr.ref));
+	kref_put(&m->hdr.ref, siw_free_mem);
+}
+
+
+/***** routines for WQE handling ***/
+
+static inline void siw_unref_mem_sgl(union siw_mem_resolved *mem, int num_sge)
+{
+	while (num_sge--) {
+		if (mem->obj != NULL) {
+			siw_mem_put(mem->obj);
+			mem->obj = NULL;
+			mem++;
+		} else
+			break;
+	}
+}
+
+void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
+{
+	switch (op) {
+
+	case SIW_OP_SEND:
+	case SIW_OP_WRITE:
+	case SIW_OP_SEND_WITH_IMM:
+	case SIW_OP_SEND_REMOTE_INV:
+	case SIW_OP_READ:
+	case SIW_OP_READ_LOCAL_INV:
+		if (!(wqe->sqe.flags & SIW_WQE_INLINE))
+			siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
+		break;
+
+	case SIW_OP_RECEIVE:
+		siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
+		break;
+
+	case SIW_OP_READ_RESPONSE:
+		siw_unref_mem_sgl(wqe->mem, 1);
+		break;
+
+	default:
+		/*
+		 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
+		 * do not hold memory references
+		 */
+		break;
+	}
+}
+
+int siw_invalidate_stag(struct siw_pd *pd, u32 stag)
+{
+	u32 stag_idx = stag >> 8;
+	struct siw_mem *mem = siw_mem_id2obj(pd->hdr.sdev, stag_idx);
+	int rv = 0;
+
+	if (unlikely(!mem)) {
+		dprint(DBG_ON, ": STag %u unknown\n", stag_idx);
+		return -EINVAL;
+	}
+	if (unlikely(siw_mem2mr(mem)->pd != pd)) {
+		dprint(DBG_ON, ": PD mismatch for STag %u\n", stag_idx);
+		rv = -EINVAL;
+		goto out;
+	}
+	/*
+	 * Per RDMA verbs definition, an STag may already be in invalid
+	 * state if invalidation is requested. So no state check here.
+	 */
+	mem->stag_valid = 0;
+
+	dprint(DBG_MM, ": STag %u now invalid\n", stag_idx);
+out:
+	siw_mem_put(mem);
+	return rv;
+}
diff --git a/drivers/infiniband/sw/siw/siw_obj.h b/drivers/infiniband/sw/siw/siw_obj.h
new file mode 100644
index 000000000000..ca8b9fa39f0c
--- /dev/null
+++ b/drivers/infiniband/sw/siw/siw_obj.h
@@ -0,0 +1,113 @@ 
+/*
+ * Software iWARP device driver for Linux
+ *
+ * Authors: Bernard Metzler <bmt@zurich.ibm.com>
+ *
+ * Copyright (c) 2008-2017, IBM Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *   Redistribution and use in source and binary forms, with or
+ *   without modification, are permitted provided that the following
+ *   conditions are met:
+ *
+ *   - Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of IBM nor the names of its contributors may be
+ *     used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _SIW_OBJ_H
+#define _SIW_OBJ_H
+
+#include <linux/idr.h>
+#include <linux/rwsem.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "siw_debug.h"
+
+
+static inline struct siw_dev *siw_dev_ofa2siw(struct ib_device *ofa_dev)
+{
+	return container_of(ofa_dev, struct siw_dev, ofa_dev);
+}
+
+static inline struct siw_mr *siw_mr_ofa2siw(struct ib_mr *ofa_mr)
+{
+	return container_of(ofa_mr, struct siw_mr, ofa_mr);
+}
+
+static inline void siw_cq_get(struct siw_cq *cq)
+{
+	kref_get(&cq->hdr.ref);
+	dprint(DBG_OBJ, "(CQ%d): New refcount: %d\n",
+		OBJ_ID(cq), refcount_read(&cq->hdr.ref));
+}
+static inline void siw_qp_get(struct siw_qp *qp)
+{
+	kref_get(&qp->hdr.ref);
+	dprint(DBG_OBJ, "(QP%d): New refcount: %d\n",
+		OBJ_ID(qp), refcount_read(&qp->hdr.ref));
+}
+static inline void siw_pd_get(struct siw_pd *pd)
+{
+	kref_get(&pd->hdr.ref);
+	dprint(DBG_OBJ, "(PD%d): New refcount: %d\n",
+		OBJ_ID(pd), refcount_read(&pd->hdr.ref));
+}
+static inline void siw_mem_get(struct siw_mem *mem)
+{
+	kref_get(&mem->hdr.ref);
+	dprint(DBG_OBJ|DBG_MM, "(MEM%d): New refcount: %d\n",
+		OBJ_ID(mem), refcount_read(&mem->hdr.ref));
+}
+
+extern void siw_remove_obj(spinlock_t *lock, struct idr *idr,
+				struct siw_objhdr *hdr);
+
+extern void siw_objhdr_init(struct siw_objhdr *hdr);
+extern void siw_idr_init(struct siw_dev *dev);
+extern void siw_idr_release(struct siw_dev *dev);
+
+extern struct siw_cq *siw_cq_id2obj(struct siw_dev *dev, int id);
+extern struct siw_qp *siw_qp_id2obj(struct siw_dev *dev, int id);
+extern struct siw_mem *siw_mem_id2obj(struct siw_dev *dev, int id);
+
+extern int siw_qp_add(struct siw_dev *dev, struct siw_qp *qp);
+extern int siw_cq_add(struct siw_dev *dev, struct siw_cq *cq);
+extern int siw_pd_add(struct siw_dev *dev, struct siw_pd *pd);
+extern int siw_mem_add(struct siw_dev *dev, struct siw_mem *mem);
+
+extern struct siw_wqe *siw_freeq_wqe_get(struct siw_qp *qp);
+
+extern void siw_cq_put(struct siw_cq *cq);
+extern void siw_qp_put(struct siw_qp *qp);
+extern void siw_pd_put(struct siw_pd *pd);
+extern void siw_mem_put(struct siw_mem *mem);
+extern void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode opcode);
+
+extern int siw_invalidate_stag(struct siw_pd *pd, u32 stag);
+#endif