diff mbox series

[for-next] RDMA/rxe: Fix "Replace red-black trees by xarrays"

Message ID 20220410223939.3769-1-rpearsonhpe@gmail.com (mailing list archive)
State Not Applicable
Headers show
Series [for-next] RDMA/rxe: Fix "Replace red-black trees by xarrays" | expand

Commit Message

Bob Pearson April 10, 2022, 10:39 p.m. UTC
The referenced commit causes lockdep warnings by using the
default spin_lock in xa_alloc_cyclic and xa_erase which
include calls to xa_lock()/xa_unlock() while at the same time
explicitly calling xa_lock_irqsave() in rxe_pool_get_index().

The latter is required to handle some object lookups correctly. The
immediate fix is to explicitly use xa_lock_irqsave() everywhere.

This commit replaces xa_alloc_cyclic() by __xa_alloc_cyclic() and
xa_erase() by __xa_erase() and explicitly lock these calls with
xa_lock_irqsave().

This commit will be reverted later when the read side operations
in rxe_pool.c will be converted to rcu_read_locks which will not
require locking the write side operations with irqsave locks.

This commit fixes the "WARNING: Inconsistent lock state" bug in
blktests. The recent revert patch from Bart fixes the other
bug in blktests with very long delays.

Fixes: 3225717f6dfa ("RDMA/rxe: Replace red-black trees by carrays")
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_pool.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

Comments

Bart Van Assche April 11, 2022, 3:06 a.m. UTC | #1
On 4/10/22 15:39, Bob Pearson wrote:
> Fixes: 3225717f6dfa ("RDMA/rxe: Replace red-black trees by carrays")
                                                              ^^^^^^^
                                                              xarrays?

> @@ -138,8 +140,10 @@ void *rxe_alloc(struct rxe_pool *pool)
>   	elem->obj = obj;
>   	kref_init(&elem->ref_cnt);
>   
> -	err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
> +	xa_lock_irqsave(xa, flags);
> +	err = __xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
>   			      &pool->next, GFP_KERNEL);
> +	xa_unlock_irqrestore(xa, flags);

Please take a look at the xas_unlock_type() and xas_lock_type() calls in 
__xas_nomem(). I think that the above change will trigger a GFP_KERNEL 
allocation with interrupts disabled. My understanding is that GFP_KERNEL 
allocations may sleep and hence that the above code may cause 
__xas_nomem() to sleep with interrupts disabled. I don't think that is 
allowed.

Thanks,

Bart.
Bob Pearson April 11, 2022, 3:13 a.m. UTC | #2
On 4/10/22 22:06, Bart Van Assche wrote:
> On 4/10/22 15:39, Bob Pearson wrote:
>> Fixes: 3225717f6dfa ("RDMA/rxe: Replace red-black trees by carrays")
>                                                              ^^^^^^^
>                                                              xarrays?
> 
>> @@ -138,8 +140,10 @@ void *rxe_alloc(struct rxe_pool *pool)
>>       elem->obj = obj;
>>       kref_init(&elem->ref_cnt);
>>   -    err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
>> +    xa_lock_irqsave(xa, flags);
>> +    err = __xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
>>                     &pool->next, GFP_KERNEL);
>> +    xa_unlock_irqrestore(xa, flags);
> 
> Please take a look at the xas_unlock_type() and xas_lock_type() calls in __xas_nomem(). I think that the above change will trigger a GFP_KERNEL allocation with interrupts disabled. My understanding is that GFP_KERNEL allocations may sleep and hence that the above code may cause __xas_nomem() to sleep with interrupts disabled. I don't think that is allowed.
> 
> Thanks,
> 
> Bart.

You're right. I missed that. Zhu wants to write the patch so hopefully he's on top of that.
For now we could use GFP_ATOMIC.

Bob
Zhu Yanjun April 11, 2022, 3:15 a.m. UTC | #3
On Mon, Apr 11, 2022 at 11:06 AM Bart Van Assche <bvanassche@acm.org> wrote:
>
> On 4/10/22 15:39, Bob Pearson wrote:
> > Fixes: 3225717f6dfa ("RDMA/rxe: Replace red-black trees by carrays")
>                                                               ^^^^^^^
>                                                               xarrays?
>
> > @@ -138,8 +140,10 @@ void *rxe_alloc(struct rxe_pool *pool)
> >       elem->obj = obj;
> >       kref_init(&elem->ref_cnt);
> >
> > -     err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
> > +     xa_lock_irqsave(xa, flags);
> > +     err = __xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
> >                             &pool->next, GFP_KERNEL);
> > +     xa_unlock_irqrestore(xa, flags);
>
> Please take a look at the xas_unlock_type() and xas_lock_type() calls in
> __xas_nomem(). I think that the above change will trigger a GFP_KERNEL
> allocation with interrupts disabled. My understanding is that GFP_KERNEL
> allocations may sleep and hence that the above code may cause
> __xas_nomem() to sleep with interrupts disabled. I don't think that is
> allowed.

Thanks. I will send V2.

Zhu Yanjun

>
> Thanks,
>
> Bart.
Jason Gunthorpe April 11, 2022, 11:38 a.m. UTC | #4
On Sun, Apr 10, 2022 at 10:13:16PM -0500, Bob Pearson wrote:
> On 4/10/22 22:06, Bart Van Assche wrote:
> > On 4/10/22 15:39, Bob Pearson wrote:
> >> Fixes: 3225717f6dfa ("RDMA/rxe: Replace red-black trees by carrays")
> >                                                              ^^^^^^^
> >                                                              xarrays?
> > 
> >> @@ -138,8 +140,10 @@ void *rxe_alloc(struct rxe_pool *pool)
> >>       elem->obj = obj;
> >>       kref_init(&elem->ref_cnt);
> >>   -    err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
> >> +    xa_lock_irqsave(xa, flags);
> >> +    err = __xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
> >>                     &pool->next, GFP_KERNEL);
> >> +    xa_unlock_irqrestore(xa, flags);
> > 
> > Please take a look at the xas_unlock_type() and xas_lock_type() calls in __xas_nomem(). I think that the above change will trigger a GFP_KERNEL allocation with interrupts disabled. My understanding is that GFP_KERNEL allocations may sleep and hence that the above code may cause __xas_nomem() to sleep with interrupts disabled. I don't think that is allowed.
> > 
> > Thanks,
> > 
> > Bart.
> 
> You're right. I missed that. Zhu wants to write the patch so hopefully he's on top of that.
> For now we could use GFP_ATOMIC.

Yes, you cannot use irq_save varients here. You have to know your
calling context is non-atomic already and use the irq wrapper.

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 87066d04ed18..440f96af213b 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -118,7 +118,9 @@  void rxe_pool_cleanup(struct rxe_pool *pool)
 
 void *rxe_alloc(struct rxe_pool *pool)
 {
+	struct xarray *xa = &pool->xa;
 	struct rxe_pool_elem *elem;
+	unsigned long flags;
 	void *obj;
 	int err;
 
@@ -138,8 +140,10 @@  void *rxe_alloc(struct rxe_pool *pool)
 	elem->obj = obj;
 	kref_init(&elem->ref_cnt);
 
-	err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
+	xa_lock_irqsave(xa, flags);
+	err = __xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
 			      &pool->next, GFP_KERNEL);
+	xa_unlock_irqrestore(xa, flags);
 	if (err)
 		goto err_free;
 
@@ -154,6 +158,8 @@  void *rxe_alloc(struct rxe_pool *pool)
 
 int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem)
 {
+	struct xarray *xa = &pool->xa;
+	unsigned long flags;
 	int err;
 
 	if (WARN_ON(pool->flags & RXE_POOL_ALLOC))
@@ -166,8 +172,10 @@  int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem)
 	elem->obj = (u8 *)elem - pool->elem_offset;
 	kref_init(&elem->ref_cnt);
 
-	err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
+	xa_lock_irqsave(xa, flags);
+	err = __xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
 			      &pool->next, GFP_KERNEL);
+	xa_unlock_irqrestore(xa, flags);
 	if (err)
 		goto err_cnt;
 
@@ -200,8 +208,12 @@  static void rxe_elem_release(struct kref *kref)
 {
 	struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt);
 	struct rxe_pool *pool = elem->pool;
+	struct xarray *xa = &pool->xa;
+	unsigned long flags;
 
-	xa_erase(&pool->xa, elem->index);
+	xa_lock_irqsave(xa, flags);
+	__xa_erase(&pool->xa, elem->index);
+	xa_unlock_irqrestore(xa, flags);
 
 	if (pool->cleanup)
 		pool->cleanup(elem);