diff mbox series

[RFC,v9,07/26] RDMA/rxe: Use kzmalloc/kfree for mca

Message ID 20220127213755.31697-8-rpearsonhpe@gmail.com (mailing list archive)
State RFC
Headers show
Series [RFC,v9,01/26] RDMA/rxe: Move rxe_mcast_add/delete to rxe_mcast.c | expand

Commit Message

Bob Pearson Jan. 27, 2022, 9:37 p.m. UTC
Remove rxe_mca (was rxe_mc_elem) from rxe pools and use kzmalloc
and kfree to allocate and free. Use the sequence

    <lookup qp>
    new_mca = kzalloc(sizeof(*new_mca), GFP_KERNEL);
    <spin lock>
    <lookup qp again> /* in case of a race */
    <init new_mca>
    <spin unlock>

instead of GFP_ATOMIC inside of the spinlock. Add an extra reference
to mcg when a new one is created and drop when the last qp is detached.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe.c       |  8 -----
 drivers/infiniband/sw/rxe/rxe_mcast.c | 51 ++++++++++++++++-----------
 drivers/infiniband/sw/rxe/rxe_pool.c  |  5 ---
 drivers/infiniband/sw/rxe/rxe_pool.h  |  1 -
 drivers/infiniband/sw/rxe/rxe_verbs.h |  2 --
 5 files changed, 30 insertions(+), 37 deletions(-)

Comments

Jason Gunthorpe Jan. 28, 2022, 6 p.m. UTC | #1
On Thu, Jan 27, 2022 at 03:37:36PM -0600, Bob Pearson wrote:
> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
> index 9336295c4ee2..39f38ee665f2 100644
> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> @@ -36,6 +36,7 @@ static struct rxe_mcg *create_grp(struct rxe_dev *rxe,
>  	grp = rxe_alloc_locked(&rxe->mc_grp_pool);
>  	if (!grp)
>  		return ERR_PTR(-ENOMEM);
> +	rxe_add_ref(grp);

I have no idea what this ref is for, the grp already has a ref of 1.

You should put the ref incrs near the place that makes a copy of the
pointer. Every pointer should have a ref.

When rxe_alloc_locked() returns the ref is 1 and this ref logically
belongs to the caller

When the caller does rxe_add_key_locked() then the ref is moved into
the rbtree and is now owned by the rbtree

When the caller does rxe_drop_key_locked() then the ref is moved out
of the rbtree and is now owned again by the caller.

After this patch this is leaking the memory on the error unwind:

	err = rxe_mcast_add(rxe, mgid);
	if (unlikely(err)) {
		rxe_drop_key_locked(grp);
		rxe_drop_ref(grp);
		return ERR_PTR(err);
	}

>  	INIT_LIST_HEAD(&grp->qp_list);
>  	spin_lock_init(&grp->mcg_lock);
> @@ -85,12 +86,28 @@ static int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
>  			   struct rxe_mcg *grp)
>  {
>  	int err;
> -	struct rxe_mca *elem;
> +	struct rxe_mca *mca, *new_mca;
>  
> -	/* check to see of the qp is already a member of the group */
> +	/* check to see if the qp is already a member of the group */
>  	spin_lock_bh(&grp->mcg_lock);
> -	list_for_each_entry(elem, &grp->qp_list, qp_list) {
> -		if (elem->qp == qp) {
> +	list_for_each_entry(mca, &grp->qp_list, qp_list) {
> +		if (mca->qp == qp) {
> +			spin_unlock_bh(&grp->mcg_lock);
> +			return 0;
> +		}
> +	}
> +	spin_unlock_bh(&grp->mcg_lock);

This would all be much simpler and faster to change it so the qp has
the list head that stores the list of groups it is joined to.

This code never seems to need to go from a group back to the list of
qps.

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index fab291245366..c55736e441e7 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -29,7 +29,6 @@  void rxe_dealloc(struct ib_device *ib_dev)
 	rxe_pool_cleanup(&rxe->mr_pool);
 	rxe_pool_cleanup(&rxe->mw_pool);
 	rxe_pool_cleanup(&rxe->mc_grp_pool);
-	rxe_pool_cleanup(&rxe->mc_elem_pool);
 
 	if (rxe->tfm)
 		crypto_free_shash(rxe->tfm);
@@ -163,15 +162,8 @@  static int rxe_init_pools(struct rxe_dev *rxe)
 	if (err)
 		goto err9;
 
-	err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM,
-			    rxe->attr.max_total_mcast_qp_attach);
-	if (err)
-		goto err10;
-
 	return 0;
 
-err10:
-	rxe_pool_cleanup(&rxe->mc_grp_pool);
 err9:
 	rxe_pool_cleanup(&rxe->mw_pool);
 err8:
diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index 9336295c4ee2..39f38ee665f2 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -36,6 +36,7 @@  static struct rxe_mcg *create_grp(struct rxe_dev *rxe,
 	grp = rxe_alloc_locked(&rxe->mc_grp_pool);
 	if (!grp)
 		return ERR_PTR(-ENOMEM);
+	rxe_add_ref(grp);
 
 	INIT_LIST_HEAD(&grp->qp_list);
 	spin_lock_init(&grp->mcg_lock);
@@ -85,12 +86,28 @@  static int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
 			   struct rxe_mcg *grp)
 {
 	int err;
-	struct rxe_mca *elem;
+	struct rxe_mca *mca, *new_mca;
 
-	/* check to see of the qp is already a member of the group */
+	/* check to see if the qp is already a member of the group */
 	spin_lock_bh(&grp->mcg_lock);
-	list_for_each_entry(elem, &grp->qp_list, qp_list) {
-		if (elem->qp == qp) {
+	list_for_each_entry(mca, &grp->qp_list, qp_list) {
+		if (mca->qp == qp) {
+			spin_unlock_bh(&grp->mcg_lock);
+			return 0;
+		}
+	}
+	spin_unlock_bh(&grp->mcg_lock);
+
+	/* speculative alloc new mca without using GFP_ATOMIC */
+	new_mca = kzalloc(sizeof(*mca), GFP_KERNEL);
+	if (!new_mca)
+		return -ENOMEM;
+
+	spin_lock_bh(&grp->mcg_lock);
+	/* re-check to see if someone else just attached qp */
+	list_for_each_entry(mca, &grp->qp_list, qp_list) {
+		if (mca->qp == qp) {
+			kfree(new_mca);
 			err = 0;
 			goto out;
 		}
@@ -101,20 +118,11 @@  static int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
 		goto out;
 	}
 
-	elem = rxe_alloc_locked(&rxe->mc_elem_pool);
-	if (!elem) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	/* each qp holds a ref on the grp */
-	rxe_add_ref(grp);
-
 	grp->num_qp++;
-	elem->qp = qp;
+	new_mca->qp = qp;
 	atomic_inc(&qp->mcg_num);
 
-	list_add(&elem->qp_list, &grp->qp_list);
+	list_add(&new_mca->qp_list, &grp->qp_list);
 
 	err = 0;
 out:
@@ -126,7 +134,7 @@  static int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
 				   union ib_gid *mgid)
 {
 	struct rxe_mcg *grp;
-	struct rxe_mca *elem, *tmp;
+	struct rxe_mca *mca, *tmp;
 
 	grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);
 	if (!grp)
@@ -134,16 +142,17 @@  static int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
 
 	spin_lock_bh(&grp->mcg_lock);
 
-	list_for_each_entry_safe(elem, tmp, &grp->qp_list, qp_list) {
-		if (elem->qp == qp) {
-			list_del(&elem->qp_list);
+	list_for_each_entry_safe(mca, tmp, &grp->qp_list, qp_list) {
+		if (mca->qp == qp) {
+			list_del(&mca->qp_list);
 			grp->num_qp--;
+			if (grp->num_qp <= 0)
+				rxe_drop_ref(grp);
 			atomic_dec(&qp->mcg_num);
 
 			spin_unlock_bh(&grp->mcg_lock);
-			rxe_drop_ref(elem);
-			rxe_drop_ref(grp);	/* ref held by QP */
 			rxe_drop_ref(grp);	/* ref from get_key */
+			kfree(mca);
 			return 0;
 		}
 	}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 63c594173565..a6756aa93e2b 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -90,11 +90,6 @@  static const struct rxe_type_info {
 		.key_offset	= offsetof(struct rxe_mcg, mgid),
 		.key_size	= sizeof(union ib_gid),
 	},
-	[RXE_TYPE_MC_ELEM] = {
-		.name		= "rxe-mc_elem",
-		.size		= sizeof(struct rxe_mca),
-		.elem_offset	= offsetof(struct rxe_mca, elem),
-	},
 };
 
 static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 214279310f4d..511f81554fd1 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -23,7 +23,6 @@  enum rxe_elem_type {
 	RXE_TYPE_MR,
 	RXE_TYPE_MW,
 	RXE_TYPE_MC_GRP,
-	RXE_TYPE_MC_ELEM,
 	RXE_NUM_TYPES,		/* keep me last */
 };
 
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 55f8ed2bc621..02745d51c163 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -363,7 +363,6 @@  struct rxe_mcg {
 };
 
 struct rxe_mca {
-	struct rxe_pool_elem	elem;
 	struct list_head	qp_list;
 	struct rxe_qp		*qp;
 };
@@ -397,7 +396,6 @@  struct rxe_dev {
 	struct rxe_pool		mr_pool;
 	struct rxe_pool		mw_pool;
 	struct rxe_pool		mc_grp_pool;
-	struct rxe_pool		mc_elem_pool;
 
 	spinlock_t		pending_lock; /* guard pending_mmaps */
 	struct list_head	pending_mmaps;