diff mbox series

[for-next,v3,01/10] RDMA/rxe: Make rxe_alloc() take pool lock

Message ID 20211022191824.18307-2-rpearsonhpe@gmail.com (mailing list archive)
State Superseded
Delegated to: Jason Gunthorpe
Headers show
Series Correct race conditions in rdma_rxe | expand

Commit Message

Bob Pearson Oct. 22, 2021, 7:18 p.m. UTC
In rxe_pool.c there are two separate pool APIs for creating a new object
rxe_alloc() and rxe_alloc_locked(). Currently they are identical except
for GFP_KERNEL vs GFP_ATOMIC. Make rxe_alloc() take the pool lock which
is in line with the other APIs in the library and was the original intent.
Keep kzalloc() outside of the lock to avoid using GFP_ATOMIC.
Make similar change to rxe_add_to_pool. The code checking the pool limit
is potentially racy without a lock. The lock around finishing the pool
element provides a memory barrier before 'publishing' the object.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
v3
  Kept rxe_alloc and rxe_alloc_locked separate to allow use of GFP_KERNEL
  in rxe_alloc

 drivers/infiniband/sw/rxe/rxe_pool.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

Comments

Jason Gunthorpe Oct. 25, 2021, 5:55 p.m. UTC | #1
On Fri, Oct 22, 2021 at 02:18:16PM -0500, Bob Pearson wrote:
> In rxe_pool.c there are two separate pool APIs for creating a new object
> rxe_alloc() and rxe_alloc_locked(). Currently they are identical except
> for GFP_KERNEL vs GFP_ATOMIC. Make rxe_alloc() take the pool lock which
> is in line with the other APIs in the library and was the original intent.
> Keep kzalloc() outside of the lock to avoid using GFP_ATOMIC.
> Make similar change to rxe_add_to_pool. The code checking the pool limit
> is potentially racy without a lock. The lock around finishing the pool
> element provides a memory barrier before 'publishing' the object.
> 
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> v3
>   Kept rxe_alloc and rxe_alloc_locked separate to allow use of GFP_KERNEL
>   in rxe_alloc
> 
>  drivers/infiniband/sw/rxe/rxe_pool.c | 18 +++++++++++++++---
>  1 file changed, 15 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
> index 7b4cb46edfd9..8138e459b6c1 100644
> +++ b/drivers/infiniband/sw/rxe/rxe_pool.c
> @@ -356,39 +356,51 @@ void *rxe_alloc(struct rxe_pool *pool)
>  {
>  	struct rxe_type_info *info = &rxe_type_info[pool->type];
>  	struct rxe_pool_entry *elem;
> +	unsigned long flags;
>  	u8 *obj;
>  
> +	write_lock_irqsave(&pool->pool_lock, flags);
>  	if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
>  		goto out_cnt;
> +	write_unlock_irqrestore(&pool->pool_lock, flags);

Atomic's don't need locks this isn't racy as is today


>  	obj = kzalloc(info->size, GFP_KERNEL);
> -	if (!obj)
> -		goto out_cnt;
> +	if (!obj) {
> +		atomic_dec(&pool->num_elem);
> +		return NULL;
> +	}
>  
> +	write_lock_irqsave(&pool->pool_lock, flags);
>  	elem = (struct rxe_pool_entry *)(obj + info->elem_offset);
> -
>  	elem->pool = pool;
>  	kref_init(&elem->ref_cnt);
> +	write_unlock_irqrestore(&pool->pool_lock, flags);

And none of this needs a lock either

The 'release' operation should be provided by the code that writes a
non-thread-local pointer, not by the code that initializes it a
pointer that never leaves the stack.

Jason
Bob Pearson Oct. 25, 2021, 7:09 p.m. UTC | #2
Here (not related to the mcast issue) I am guilty of hiding my plans.
A little later I was going to move creating the metadata (RB tree and
index table) into the lock and then later after converting to xarrays
replace the lock with the xa_lock which you have to have anyway
because it's part of xa_alloc().
As you point out it is the index/key tree change that is visible to
everyone. The races here are in trying to get the index or change the
RB tree. Perhaps I should merge this change with the later one where I
get rid of add/drop_index(). Or, rearrange things and convert to
xarrays and then worry about races.
Jason Gunthorpe Oct. 25, 2021, 7:41 p.m. UTC | #3
On Mon, Oct 25, 2021 at 02:09:36PM -0500, Robert Pearson wrote:
> Here (not related to the mcast issue) I am guilty of hiding my plans.
> A little later I was going to move creating the metadata (RB tree and
> index table) into the lock and then later after converting to xarrays
> replace the lock with the xa_lock which you have to have anyway
> because it's part of xa_alloc().
> As you point out it is the index/key tree change that is visible to
> everyone. The races here are in trying to get the index or change the
> RB tree. Perhaps I should merge this change with the later one where I
> get rid of add/drop_index(). Or, rearrange things and convert to
> xarrays and then worry about races.

Right, it should be later

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 7b4cb46edfd9..8138e459b6c1 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -356,39 +356,51 @@  void *rxe_alloc(struct rxe_pool *pool)
 {
 	struct rxe_type_info *info = &rxe_type_info[pool->type];
 	struct rxe_pool_entry *elem;
+	unsigned long flags;
 	u8 *obj;
 
+	write_lock_irqsave(&pool->pool_lock, flags);
 	if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
 		goto out_cnt;
+	write_unlock_irqrestore(&pool->pool_lock, flags);
 
 	obj = kzalloc(info->size, GFP_KERNEL);
-	if (!obj)
-		goto out_cnt;
+	if (!obj) {
+		atomic_dec(&pool->num_elem);
+		return NULL;
+	}
 
+	write_lock_irqsave(&pool->pool_lock, flags);
 	elem = (struct rxe_pool_entry *)(obj + info->elem_offset);
-
 	elem->pool = pool;
 	kref_init(&elem->ref_cnt);
+	write_unlock_irqrestore(&pool->pool_lock, flags);
 
 	return obj;
 
 out_cnt:
 	atomic_dec(&pool->num_elem);
+	write_unlock_irqrestore(&pool->pool_lock, flags);
 	return NULL;
 }
 
 int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
 {
+	unsigned long flags;
+
+	write_lock_irqsave(&pool->pool_lock, flags);
 	if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
 		goto out_cnt;
 
 	elem->pool = pool;
 	kref_init(&elem->ref_cnt);
+	write_unlock_irqrestore(&pool->pool_lock, flags);
 
 	return 0;
 
 out_cnt:
 	atomic_dec(&pool->num_elem);
+	write_unlock_irqrestore(&pool->pool_lock, flags);
 	return -EINVAL;
 }