@@ -692,6 +692,7 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
mr->state = RXE_MR_STATE_INVALID;
rxe_drop_ref(mr_pd(mr));
rxe_drop_ref(mr);
+ rxe_wait(mr);
return 0;
}
@@ -60,8 +60,9 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
rxe_do_dealloc_mw(mw);
spin_unlock_bh(&mw->lock);
- rxe_drop_ref(mw);
rxe_drop_ref(pd);
+ rxe_drop_ref(mw);
+ rxe_wait(mw);
return 0;
}
@@ -7,6 +7,7 @@
#include "rxe.h"
+#define RXE_POOL_TIMEOUT (200)
#define RXE_POOL_ALIGN (16)
static const struct rxe_type_info {
@@ -154,6 +155,7 @@ void *rxe_alloc(struct rxe_pool *pool)
elem->pool = pool;
elem->obj = obj;
kref_init(&elem->ref_cnt);
+ init_completion(&elem->complete);
err = xa_alloc_cyclic_bh(&pool->xa, &elem->index, elem, pool->limit,
&pool->next, GFP_KERNEL);
@@ -185,6 +187,7 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem)
elem->pool = pool;
elem->obj = (u8 *)elem - pool->elem_offset;
kref_init(&elem->ref_cnt);
+ init_completion(&elem->complete);
err = xa_alloc_cyclic_bh(&pool->xa, &elem->index, elem, pool->limit,
&pool->next, GFP_KERNEL);
@@ -212,31 +215,22 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
return obj;
}
-static void rxe_obj_free_rcu(struct rcu_head *rcu)
-{
- struct rxe_pool_elem *elem = container_of(rcu, typeof(*elem), rcu);
-
- kfree(elem->obj);
-}
-
static void __rxe_elem_release_rcu(struct kref *kref)
__releases(&pool->xa.xa_lock)
{
- struct rxe_pool_elem *elem = container_of(kref,
- struct rxe_pool_elem, ref_cnt);
+ struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt);
struct rxe_pool *pool = elem->pool;
__xa_erase(&pool->xa, elem->index);
- spin_unlock(&pool->xa.xa_lock);
+ spin_unlock_bh(&pool->xa.xa_lock);
if (pool->cleanup)
pool->cleanup(elem);
atomic_dec(&pool->num_elem);
- if (pool->flags & RXE_POOL_ALLOC)
- call_rcu(&elem->rcu, rxe_obj_free_rcu);
+ complete(&elem->complete);
}
int __rxe_add_ref(struct rxe_pool_elem *elem)
@@ -244,8 +238,67 @@ int __rxe_add_ref(struct rxe_pool_elem *elem)
return kref_get_unless_zero(&elem->ref_cnt);
}
+static bool refcount_dec_and_lock_bh(refcount_t *r, spinlock_t *lock)
+ __acquires(lock) __releases(lock)
+{
+ if (refcount_dec_not_one(r))
+ return false;
+
+ spin_lock_bh(lock);
+ if (!refcount_dec_and_test(r)) {
+ spin_unlock_bh(lock);
+ return false;
+ }
+
+ return true;
+}
+
+static int kref_put_lock_bh(struct kref *kref,
+ void (*release)(struct kref *kref),
+ spinlock_t *lock)
+{
+ if (refcount_dec_and_lock_bh(&kref->refcount, lock)) {
+ release(kref);
+ return 1;
+ }
+ return 0;
+}
+
int __rxe_drop_ref(struct rxe_pool_elem *elem)
{
- return kref_put_lock(&elem->ref_cnt, __rxe_elem_release_rcu,
+ return kref_put_lock_bh(&elem->ref_cnt, __rxe_elem_release_rcu,
&elem->pool->xa.xa_lock);
}
+
+static void rxe_obj_free_rcu(struct rcu_head *rcu)
+{
+ struct rxe_pool_elem *elem = container_of(rcu, typeof(*elem), rcu);
+
+ kfree(elem->obj);
+}
+
+int __rxe_wait(struct rxe_pool_elem *elem)
+{
+ struct rxe_pool *pool = elem->pool;
+ static int timeout = RXE_POOL_TIMEOUT;
+ static int timeout_failures;
+ int ret;
+
+ if (timeout) {
+ ret = wait_for_completion_timeout(&elem->complete, timeout);
+ if (!ret) {
+ if (timeout_failures++ == 5) {
+ timeout = 0;
+ pr_warn("Exceeded max completion timeouts. Disabling wait_for_completion\n");
+ } else {
+ pr_warn_ratelimited("Timed out waiting for %s#%d to complete\n",
+ pool->name + 4, elem->index);
+ }
+ }
+ }
+
+ if (pool->flags & RXE_POOL_ALLOC)
+ call_rcu(&elem->rcu, rxe_obj_free_rcu);
+
+ return ret;
+}
@@ -29,6 +29,7 @@ struct rxe_pool_elem {
struct kref ref_cnt;
struct list_head list;
struct rcu_head rcu;
+ struct completion complete;
u32 index;
};
@@ -67,4 +68,7 @@ int __rxe_add_ref(struct rxe_pool_elem *elem);
int __rxe_drop_ref(struct rxe_pool_elem *elem);
#define rxe_drop_ref(obj) __rxe_drop_ref(&(obj)->elem)
+int __rxe_wait(struct rxe_pool_elem *elem);
+#define rxe_wait(obj) __rxe_wait(&(obj)->elem)
+
#endif /* RXE_POOL_H */
@@ -116,6 +116,7 @@ static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
struct rxe_ucontext *uc = to_ruc(ibuc);
rxe_drop_ref(uc);
+ rxe_wait(uc);
}
static int rxe_port_immutable(struct ib_device *dev, u32 port_num,
@@ -150,6 +151,7 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
struct rxe_pd *pd = to_rpd(ibpd);
rxe_drop_ref(pd);
+ rxe_wait(pd);
return 0;
}
@@ -189,6 +191,7 @@ static int rxe_create_ah(struct ib_ah *ibah,
sizeof(uresp->ah_num));
if (err) {
rxe_drop_ref(ah);
+ rxe_wait(ah);
return -EFAULT;
}
} else if (ah->is_user) {
@@ -229,6 +232,7 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
struct rxe_ah *ah = to_rah(ibah);
rxe_drop_ref(ah);
+ rxe_wait(ah);
return 0;
}
@@ -315,6 +319,7 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
err2:
rxe_drop_ref(pd);
rxe_drop_ref(srq);
+ rxe_wait(srq);
err1:
return err;
}
@@ -373,6 +378,7 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
rxe_drop_ref(srq->pd);
rxe_drop_ref(srq);
+ rxe_wait(srq);
return 0;
}
@@ -442,6 +448,7 @@ static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
qp_init:
rxe_drop_ref(qp);
+ rxe_wait(qp);
return err;
}
@@ -496,6 +503,7 @@ static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
rxe_qp_destroy(qp);
rxe_drop_ref(qp);
+ rxe_wait(qp);
return 0;
}
@@ -807,6 +815,7 @@ static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
rxe_cq_disable(cq);
rxe_drop_ref(cq);
+ rxe_wait(cq);
return 0;
}
@@ -932,6 +941,7 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
err3:
rxe_drop_ref(pd);
rxe_drop_ref(mr);
+ rxe_wait(mr);
err2:
return ERR_PTR(err);
}
@@ -964,6 +974,7 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
err2:
rxe_drop_ref(pd);
rxe_drop_ref(mr);
+ rxe_wait(mr);
err1:
return ERR_PTR(err);
}
Reference counting for object deletion can cause an object to wait for something else to happen before an object gets deleted. The destroy verbs can then return to rdma-core with the object still holding references. Adding wait_for_completion in this path prevents this. Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> --- drivers/infiniband/sw/rxe/rxe_mr.c | 1 + drivers/infiniband/sw/rxe/rxe_mw.c | 3 +- drivers/infiniband/sw/rxe/rxe_pool.c | 79 ++++++++++++++++++++++----- drivers/infiniband/sw/rxe/rxe_pool.h | 4 ++ drivers/infiniband/sw/rxe/rxe_verbs.c | 11 ++++ 5 files changed, 84 insertions(+), 14 deletions(-)