diff mbox series

[for-next,v11,11/13] RDMA/rxe: Add wait_for_completion to pool objects

Message ID 20220304000808.225811-12-rpearsonhpe@gmail.com (mailing list archive)
State Superseded
Headers show
Series Fix race conditions in rxe_pool | expand

Commit Message

Bob Pearson March 4, 2022, 12:08 a.m. UTC
Reference counting for object deletion can cause an object to
wait for something else to happen before an object gets deleted.
The destroy verbs can then return to rdma-core with the object still
holding references. Adding wait_for_completion in this path
prevents this.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_mr.c    |  2 +-
 drivers/infiniband/sw/rxe/rxe_mw.c    |  2 +-
 drivers/infiniband/sw/rxe/rxe_pool.c  | 29 +++++++++++++++++++++++++++
 drivers/infiniband/sw/rxe/rxe_pool.h  |  5 +++++
 drivers/infiniband/sw/rxe/rxe_verbs.c | 22 ++++++++++----------
 5 files changed, 47 insertions(+), 13 deletions(-)

Comments

Jason Gunthorpe March 16, 2022, 12:17 a.m. UTC | #1
On Thu, Mar 03, 2022 at 06:08:07PM -0600, Bob Pearson wrote:
> Reference counting for object deletion can cause an object to
> wait for something else to happen before an object gets deleted.
> The destroy verbs can then return to rdma-core with the object still
> holding references. Adding wait_for_completion in this path
> prevents this.

Maybe call this rxe_pool_destroy() or something instead of wait

wait doesn't really convay to the reader tha the memory is free after
it returns

Jason
Bob Pearson March 16, 2022, 3:57 a.m. UTC | #2
On 3/15/22 19:17, Jason Gunthorpe wrote:
> On Thu, Mar 03, 2022 at 06:08:07PM -0600, Bob Pearson wrote:
>> Reference counting for object deletion can cause an object to
>> wait for something else to happen before an object gets deleted.
>> The destroy verbs can then return to rdma-core with the object still
>> holding references. Adding wait_for_completion in this path
>> prevents this.
> 
> Maybe call this rxe_pool_destroy() or something instead of wait
> 
> wait doesn't really convay to the reader tha the memory is free after
> it returns
> 
> Jason

which is correct because except for MRs which are freed in completion code
all the other objects are just passed back to rdma-core which will free them
in its own good time.

Bob
Jason Gunthorpe March 16, 2022, 1:43 p.m. UTC | #3
On Tue, Mar 15, 2022 at 10:57:20PM -0500, Bob Pearson wrote:
> On 3/15/22 19:17, Jason Gunthorpe wrote:
> > On Thu, Mar 03, 2022 at 06:08:07PM -0600, Bob Pearson wrote:
> >> Reference counting for object deletion can cause an object to
> >> wait for something else to happen before an object gets deleted.
> >> The destroy verbs can then return to rdma-core with the object still
> >> holding references. Adding wait_for_completion in this path
> >> prevents this.
> > 
> > Maybe call this rxe_pool_destroy() or something instead of wait
> > 
> > wait doesn't really convay to the reader tha the memory is free after
> > it returns
> > 
> > Jason
> 
> which is correct because except for MRs which are freed in completion code
> all the other objects are just passed back to rdma-core which will free them
> in its own good time.

If you do as I suggested to put the xa_erase in the rxe_wait then it
is much closer to destroy.

Perhaps 'unadd' or 'isolate' ?

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index e4ad2cc12584..83e370ae9df6 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -693,7 +693,7 @@  int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 
 	mr->state = RXE_MR_STATE_INVALID;
 	rxe_put(mr_pd(mr));
-	rxe_put(mr);
+	rxe_wait(mr);
 
 	return 0;
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 4edbed1410e9..157b3f968522 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -64,8 +64,8 @@  int rxe_dealloc_mw(struct ib_mw *ibmw)
 	rxe_do_dealloc_mw(mw);
 	spin_unlock_bh(&mw->lock);
 
-	rxe_put(mw);
 	rxe_put(pd);
+	rxe_wait(mw);
 
 	return 0;
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index c0b70687a83e..4fb6c7dd32ad 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -6,6 +6,8 @@ 
 
 #include "rxe.h"
 
+#define RXE_POOL_TIMEOUT	(200)
+#define RXE_POOL_MAX_TIMEOUTS	(3)
 #define RXE_POOL_ALIGN		(16)
 
 static const struct rxe_type_info {
@@ -158,6 +160,7 @@  void *rxe_alloc(struct rxe_pool *pool)
 	elem->pool = pool;
 	elem->obj = obj;
 	kref_init(&elem->ref_cnt);
+	init_completion(&elem->complete);
 
 	err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit,
 			      &pool->next, GFP_KERNEL);
@@ -186,6 +189,7 @@  int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem)
 	elem->pool = pool;
 	elem->obj = (u8 *)elem - pool->elem_offset;
 	kref_init(&elem->ref_cnt);
+	init_completion(&elem->complete);
 
 	err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit,
 			      &pool->next, GFP_KERNEL);
@@ -228,6 +232,29 @@  static void rxe_elem_release(struct kref *kref)
 	__xa_erase(&pool->xa, elem->index);
 	xa_unlock_irqrestore(xa, flags);
 
+	complete(&elem->complete);
+}
+
+int __rxe_wait(struct rxe_pool_elem *elem)
+{
+	struct rxe_pool *pool = elem->pool;
+	static int timeout = RXE_POOL_TIMEOUT;
+	int ret, err = 0;
+
+	__rxe_put(elem);
+
+	if (timeout) {
+		ret = wait_for_completion_timeout(&elem->complete, timeout);
+		if (!ret) {
+			pr_warn("Timed out waiting for %s#%d to complete\n",
+				pool->name, elem->index);
+			if (++pool->timeouts >= RXE_POOL_MAX_TIMEOUTS)
+				timeout = 0;
+
+			err = -EINVAL;
+		}
+	}
+
 	if (pool->cleanup)
 		pool->cleanup(elem);
 
@@ -235,6 +262,8 @@  static void rxe_elem_release(struct kref *kref)
 		kfree(elem->obj);
 
 	atomic_dec(&pool->num_elem);
+
+	return err;
 }
 
 int __rxe_get(struct rxe_pool_elem *elem)
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index c48d8f6f95f3..1863fa165450 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -28,6 +28,7 @@  struct rxe_pool_elem {
 	void			*obj;
 	struct kref		ref_cnt;
 	struct list_head	list;
+	struct completion	complete;
 	u32			index;
 };
 
@@ -37,6 +38,7 @@  struct rxe_pool {
 	void			(*cleanup)(struct rxe_pool_elem *elem);
 	enum rxe_pool_flags	flags;
 	enum rxe_elem_type	type;
+	unsigned int		timeouts;
 
 	unsigned int		max_elem;
 	atomic_t		num_elem;
@@ -77,6 +79,9 @@  int __rxe_put(struct rxe_pool_elem *elem);
 
 #define rxe_put(obj) __rxe_put(&(obj)->elem)
 
+int __rxe_wait(struct rxe_pool_elem *elem);
+#define rxe_wait(obj) __rxe_wait(&(obj)->elem)
+
 #define rxe_read(obj) kref_read(&(obj)->elem.ref_cnt)
 
 int __rxe_show(struct rxe_pool_elem *elem);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index e010e8860492..0529ad8e819b 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -115,7 +115,7 @@  static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
 {
 	struct rxe_ucontext *uc = to_ruc(ibuc);
 
-	rxe_put(uc);
+	rxe_wait(uc);
 }
 
 static int rxe_port_immutable(struct ib_device *dev, u32 port_num,
@@ -149,7 +149,7 @@  static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 {
 	struct rxe_pd *pd = to_rpd(ibpd);
 
-	rxe_put(pd);
+	rxe_wait(pd);
 	return 0;
 }
 
@@ -188,7 +188,7 @@  static int rxe_create_ah(struct ib_ah *ibah,
 		err = copy_to_user(&uresp->ah_num, &ah->ah_num,
 					 sizeof(uresp->ah_num));
 		if (err) {
-			rxe_put(ah);
+			rxe_wait(ah);
 			return -EFAULT;
 		}
 	} else if (ah->is_user) {
@@ -231,7 +231,7 @@  static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
 	struct rxe_ah *ah = to_rah(ibah);
 
 	rxe_hide(ah);
-	rxe_put(ah);
+	rxe_wait(ah);
 	return 0;
 }
 
@@ -318,7 +318,7 @@  static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
 
 err2:
 	rxe_put(pd);
-	rxe_put(srq);
+	rxe_wait(srq);
 err1:
 	return err;
 }
@@ -377,7 +377,7 @@  static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 		rxe_queue_cleanup(srq->rq.queue);
 
 	rxe_put(srq->pd);
-	rxe_put(srq);
+	rxe_wait(srq);
 	return 0;
 }
 
@@ -448,7 +448,7 @@  static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
 	return 0;
 
 qp_init:
-	rxe_put(qp);
+	rxe_wait(qp);
 	return err;
 }
 
@@ -503,7 +503,7 @@  static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 		return ret;
 
 	rxe_qp_destroy(qp);
-	rxe_put(qp);
+	rxe_wait(qp);
 	return 0;
 }
 
@@ -816,7 +816,7 @@  static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 
 	rxe_cq_disable(cq);
 
-	rxe_put(cq);
+	rxe_wait(cq);
 	return 0;
 }
 
@@ -946,7 +946,7 @@  static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
 
 err3:
 	rxe_put(pd);
-	rxe_put(mr);
+	rxe_wait(mr);
 err2:
 	return ERR_PTR(err);
 }
@@ -980,7 +980,7 @@  static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
 
 err2:
 	rxe_put(pd);
-	rxe_put(mr);
+	rxe_wait(mr);
 err1:
 	return ERR_PTR(err);
 }