diff mbox series

[for-next] RDMA/rxe: Fix "RDMA/rxe: Cleanup rxe_mcast.c"

Message ID 20220413052937.92713-1-rpearsonhpe@gmail.com (mailing list archive)
State Superseded
Headers show
Series [for-next] RDMA/rxe: Fix "RDMA/rxe: Cleanup rxe_mcast.c" | expand

Commit Message

Bob Pearson April 13, 2022, 5:29 a.m. UTC
rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock
while rxe_recv.c uses _bh spinlocks for the same lock. Adding
tracing shows that the code in rxe_mcast.c is all executed in_task()
context while the code in rxe_recv.c that refers to the lock
executes in softirq context. This causes a lockdep warning in code
that executes multicast I/O including blktests check srp.

Change the locking of rxe->mcg_lock in rxe_mcast.c to use
spin_(un)lock_bh().

With this change the following locks in rdma_rxe which are all _bh
show no lockdep warnings.

	atomic_ops_lock
	mw->lock
	port->port_lock
	qp->state_lock
	rxe->mcg_lock
	rxe->mmap_offset_lock
	rxe->pending_lock
	task->state_lock

The only other remaining lock is pool->xa.xa_lock which
must either be some combination of _bh and _irq types depending
on the object type (== ah or not) or plain spin_lock if
the read side operations are converted to use rcu_read_lock().

Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c")
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++----------------
 1 file changed, 15 insertions(+), 21 deletions(-)

Comments

Zhu Yanjun April 13, 2022, 5:58 a.m. UTC | #1
On Wed, Apr 13, 2022 at 1:31 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>
> rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock
> while rxe_recv.c uses _bh spinlocks for the same lock. Adding
> tracing shows that the code in rxe_mcast.c is all executed in_task()
> context while the code in rxe_recv.c that refers to the lock
> executes in softirq context. This causes a lockdep warning in code
> that executes multicast I/O including blktests check srp.
>
> Change the locking of rxe->mcg_lock in rxe_mcast.c to use
> spin_(un)lock_bh().

Now RXE is not stable. We should focus on the problem of RXE.

Zhu Yanjun

>
> With this change the following locks in rdma_rxe which are all _bh
> show no lockdep warnings.
>
>         atomic_ops_lock
>         mw->lock
>         port->port_lock
>         qp->state_lock
>         rxe->mcg_lock
>         rxe->mmap_offset_lock
>         rxe->pending_lock
>         task->state_lock
>
> The only other remaining lock is pool->xa.xa_lock which
> must either be some combination of _bh and _irq types depending
> on the object type (== ah or not) or plain spin_lock if
> the read side operations are converted to use rcu_read_lock().
>
> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c")
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> ---
>  drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++----------------
>  1 file changed, 15 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
> index ae8f11cb704a..7f50566b8d89 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
>  struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>  {
>         struct rxe_mcg *mcg;
> -       unsigned long flags;
>
> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> +       spin_lock_bh(&rxe->mcg_lock);
>         mcg = __rxe_lookup_mcg(rxe, mgid);
> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> +       spin_unlock_bh(&rxe->mcg_lock);
>
>         return mcg;
>  }
> @@ -198,7 +197,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
>  static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>  {
>         struct rxe_mcg *mcg, *tmp;
> -       unsigned long flags;
>         int err;
>
>         if (rxe->attr.max_mcast_grp == 0)
> @@ -214,7 +212,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>         if (!mcg)
>                 return ERR_PTR(-ENOMEM);
>
> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> +       spin_lock_bh(&rxe->mcg_lock);
>         /* re-check to see if someone else just added it */
>         tmp = __rxe_lookup_mcg(rxe, mgid);
>         if (tmp) {
> @@ -232,12 +230,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>         if (err)
>                 goto err_dec;
>  out:
> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> +       spin_unlock_bh(&rxe->mcg_lock);
>         return mcg;
>
>  err_dec:
>         atomic_dec(&rxe->mcg_num);
> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> +       spin_unlock_bh(&rxe->mcg_lock);
>         kfree(mcg);
>         return ERR_PTR(err);
>  }
> @@ -280,11 +278,9 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
>   */
>  static void rxe_destroy_mcg(struct rxe_mcg *mcg)
>  {
> -       unsigned long flags;
> -
> -       spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
> +       spin_lock_bh(&mcg->rxe->mcg_lock);
>         __rxe_destroy_mcg(mcg);
> -       spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags);
> +       spin_unlock_bh(&mcg->rxe->mcg_lock);
>  }
>
>  /**
> @@ -339,25 +335,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>  {
>         struct rxe_dev *rxe = mcg->rxe;
>         struct rxe_mca *mca, *tmp;
> -       unsigned long flags;
>         int err;
>
>         /* check to see if the qp is already a member of the group */
> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> +       spin_lock_bh(&rxe->mcg_lock);
>         list_for_each_entry(mca, &mcg->qp_list, qp_list) {
>                 if (mca->qp == qp) {
> -                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> +                       spin_unlock_bh(&rxe->mcg_lock);
>                         return 0;
>                 }
>         }
> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> +       spin_unlock_bh(&rxe->mcg_lock);
>
>         /* speculative alloc new mca without using GFP_ATOMIC */
>         mca = kzalloc(sizeof(*mca), GFP_KERNEL);
>         if (!mca)
>                 return -ENOMEM;
>
> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> +       spin_lock_bh(&rxe->mcg_lock);
>         /* re-check to see if someone else just attached qp */
>         list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
>                 if (tmp->qp == qp) {
> @@ -371,7 +366,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>         if (err)
>                 kfree(mca);
>  out:
> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> +       spin_unlock_bh(&rxe->mcg_lock);
>         return err;
>  }
>
> @@ -405,9 +400,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>  {
>         struct rxe_dev *rxe = mcg->rxe;
>         struct rxe_mca *mca, *tmp;
> -       unsigned long flags;
>
> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> +       spin_lock_bh(&rxe->mcg_lock);
>         list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
>                 if (mca->qp == qp) {
>                         __rxe_cleanup_mca(mca, mcg);
> @@ -421,13 +415,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>                         if (atomic_read(&mcg->qp_num) <= 0)
>                                 __rxe_destroy_mcg(mcg);
>
> -                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> +                       spin_unlock_bh(&rxe->mcg_lock);
>                         return 0;
>                 }
>         }
>
>         /* we didn't find the qp on the list */
> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> +       spin_unlock_bh(&rxe->mcg_lock);
>         return -EINVAL;
>  }
>
> --
> 2.32.0
>
Bob Pearson April 13, 2022, 6:03 a.m. UTC | #2
On 4/13/22 00:58, Zhu Yanjun wrote:
> On Wed, Apr 13, 2022 at 1:31 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>
>> rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock
>> while rxe_recv.c uses _bh spinlocks for the same lock. Adding
>> tracing shows that the code in rxe_mcast.c is all executed in_task()
>> context while the code in rxe_recv.c that refers to the lock
>> executes in softirq context. This causes a lockdep warning in code
>> that executes multicast I/O including blktests check srp.
>>
>> Change the locking of rxe->mcg_lock in rxe_mcast.c to use
>> spin_(un)lock_bh().
> 
> Now RXE is not stable. We should focus on the problem of RXE.
> 
> Zhu Yanjun

This a real bug and triggers lockdep warnings when I run blktests srp.
The blktests test suite apparently uses multicast. It is obviously wrong
you can't use both _bh and _irqsave locks and pass lockdep checking.

What tests are not running correctly for you?

Bob
> 
>>
>> With this change the following locks in rdma_rxe which are all _bh
>> show no lockdep warnings.
>>
>>         atomic_ops_lock
>>         mw->lock
>>         port->port_lock
>>         qp->state_lock
>>         rxe->mcg_lock
>>         rxe->mmap_offset_lock
>>         rxe->pending_lock
>>         task->state_lock
>>
>> The only other remaining lock is pool->xa.xa_lock which
>> must either be some combination of _bh and _irq types depending
>> on the object type (== ah or not) or plain spin_lock if
>> the read side operations are converted to use rcu_read_lock().
>>
>> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c")
>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>> ---
>>  drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++----------------
>>  1 file changed, 15 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
>> index ae8f11cb704a..7f50566b8d89 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
>> @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
>>  struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>>  {
>>         struct rxe_mcg *mcg;
>> -       unsigned long flags;
>>
>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
>> +       spin_lock_bh(&rxe->mcg_lock);
>>         mcg = __rxe_lookup_mcg(rxe, mgid);
>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>> +       spin_unlock_bh(&rxe->mcg_lock);
>>
>>         return mcg;
>>  }
>> @@ -198,7 +197,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
>>  static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>>  {
>>         struct rxe_mcg *mcg, *tmp;
>> -       unsigned long flags;
>>         int err;
>>
>>         if (rxe->attr.max_mcast_grp == 0)
>> @@ -214,7 +212,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>>         if (!mcg)
>>                 return ERR_PTR(-ENOMEM);
>>
>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
>> +       spin_lock_bh(&rxe->mcg_lock);
>>         /* re-check to see if someone else just added it */
>>         tmp = __rxe_lookup_mcg(rxe, mgid);
>>         if (tmp) {
>> @@ -232,12 +230,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>>         if (err)
>>                 goto err_dec;
>>  out:
>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>> +       spin_unlock_bh(&rxe->mcg_lock);
>>         return mcg;
>>
>>  err_dec:
>>         atomic_dec(&rxe->mcg_num);
>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>> +       spin_unlock_bh(&rxe->mcg_lock);
>>         kfree(mcg);
>>         return ERR_PTR(err);
>>  }
>> @@ -280,11 +278,9 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
>>   */
>>  static void rxe_destroy_mcg(struct rxe_mcg *mcg)
>>  {
>> -       unsigned long flags;
>> -
>> -       spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
>> +       spin_lock_bh(&mcg->rxe->mcg_lock);
>>         __rxe_destroy_mcg(mcg);
>> -       spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags);
>> +       spin_unlock_bh(&mcg->rxe->mcg_lock);
>>  }
>>
>>  /**
>> @@ -339,25 +335,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>>  {
>>         struct rxe_dev *rxe = mcg->rxe;
>>         struct rxe_mca *mca, *tmp;
>> -       unsigned long flags;
>>         int err;
>>
>>         /* check to see if the qp is already a member of the group */
>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
>> +       spin_lock_bh(&rxe->mcg_lock);
>>         list_for_each_entry(mca, &mcg->qp_list, qp_list) {
>>                 if (mca->qp == qp) {
>> -                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>> +                       spin_unlock_bh(&rxe->mcg_lock);
>>                         return 0;
>>                 }
>>         }
>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>> +       spin_unlock_bh(&rxe->mcg_lock);
>>
>>         /* speculative alloc new mca without using GFP_ATOMIC */
>>         mca = kzalloc(sizeof(*mca), GFP_KERNEL);
>>         if (!mca)
>>                 return -ENOMEM;
>>
>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
>> +       spin_lock_bh(&rxe->mcg_lock);
>>         /* re-check to see if someone else just attached qp */
>>         list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
>>                 if (tmp->qp == qp) {
>> @@ -371,7 +366,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>>         if (err)
>>                 kfree(mca);
>>  out:
>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>> +       spin_unlock_bh(&rxe->mcg_lock);
>>         return err;
>>  }
>>
>> @@ -405,9 +400,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>>  {
>>         struct rxe_dev *rxe = mcg->rxe;
>>         struct rxe_mca *mca, *tmp;
>> -       unsigned long flags;
>>
>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
>> +       spin_lock_bh(&rxe->mcg_lock);
>>         list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
>>                 if (mca->qp == qp) {
>>                         __rxe_cleanup_mca(mca, mcg);
>> @@ -421,13 +415,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>>                         if (atomic_read(&mcg->qp_num) <= 0)
>>                                 __rxe_destroy_mcg(mcg);
>>
>> -                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>> +                       spin_unlock_bh(&rxe->mcg_lock);
>>                         return 0;
>>                 }
>>         }
>>
>>         /* we didn't find the qp on the list */
>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>> +       spin_unlock_bh(&rxe->mcg_lock);
>>         return -EINVAL;
>>  }
>>
>> --
>> 2.32.0
>>
Zhu Yanjun April 13, 2022, 6:11 a.m. UTC | #3
On Wed, Apr 13, 2022 at 2:03 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>
> On 4/13/22 00:58, Zhu Yanjun wrote:
> > On Wed, Apr 13, 2022 at 1:31 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
> >>
> >> rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock
> >> while rxe_recv.c uses _bh spinlocks for the same lock. Adding
> >> tracing shows that the code in rxe_mcast.c is all executed in_task()
> >> context while the code in rxe_recv.c that refers to the lock
> >> executes in softirq context. This causes a lockdep warning in code
> >> that executes multicast I/O including blktests check srp.
> >>
> >> Change the locking of rxe->mcg_lock in rxe_mcast.c to use
> >> spin_(un)lock_bh().
> >
> > Now RXE is not stable. We should focus on the problem of RXE.
> >
> > Zhu Yanjun
>
> This a real bug and triggers lockdep warnings when I run blktests srp.
> The blktests test suite apparently uses multicast. It is obviously wrong
> you can't use both _bh and _irqsave locks and pass lockdep checking.
>
> What tests are not running correctly for you?

The crash related with mr is resolved?

Zhu Yanjun

>
> Bob
> >
> >>
> >> With this change the following locks in rdma_rxe which are all _bh
> >> show no lockdep warnings.
> >>
> >>         atomic_ops_lock
> >>         mw->lock
> >>         port->port_lock
> >>         qp->state_lock
> >>         rxe->mcg_lock
> >>         rxe->mmap_offset_lock
> >>         rxe->pending_lock
> >>         task->state_lock
> >>
> >> The only other remaining lock is pool->xa.xa_lock which
> >> must either be some combination of _bh and _irq types depending
> >> on the object type (== ah or not) or plain spin_lock if
> >> the read side operations are converted to use rcu_read_lock().
> >>
> >> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c")
> >> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> >> ---
> >>  drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++----------------
> >>  1 file changed, 15 insertions(+), 21 deletions(-)
> >>
> >> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
> >> index ae8f11cb704a..7f50566b8d89 100644
> >> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
> >> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> >> @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
> >>  struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> >>  {
> >>         struct rxe_mcg *mcg;
> >> -       unsigned long flags;
> >>
> >> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> >> +       spin_lock_bh(&rxe->mcg_lock);
> >>         mcg = __rxe_lookup_mcg(rxe, mgid);
> >> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >> +       spin_unlock_bh(&rxe->mcg_lock);
> >>
> >>         return mcg;
> >>  }
> >> @@ -198,7 +197,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
> >>  static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> >>  {
> >>         struct rxe_mcg *mcg, *tmp;
> >> -       unsigned long flags;
> >>         int err;
> >>
> >>         if (rxe->attr.max_mcast_grp == 0)
> >> @@ -214,7 +212,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> >>         if (!mcg)
> >>                 return ERR_PTR(-ENOMEM);
> >>
> >> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> >> +       spin_lock_bh(&rxe->mcg_lock);
> >>         /* re-check to see if someone else just added it */
> >>         tmp = __rxe_lookup_mcg(rxe, mgid);
> >>         if (tmp) {
> >> @@ -232,12 +230,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> >>         if (err)
> >>                 goto err_dec;
> >>  out:
> >> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >> +       spin_unlock_bh(&rxe->mcg_lock);
> >>         return mcg;
> >>
> >>  err_dec:
> >>         atomic_dec(&rxe->mcg_num);
> >> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >> +       spin_unlock_bh(&rxe->mcg_lock);
> >>         kfree(mcg);
> >>         return ERR_PTR(err);
> >>  }
> >> @@ -280,11 +278,9 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
> >>   */
> >>  static void rxe_destroy_mcg(struct rxe_mcg *mcg)
> >>  {
> >> -       unsigned long flags;
> >> -
> >> -       spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
> >> +       spin_lock_bh(&mcg->rxe->mcg_lock);
> >>         __rxe_destroy_mcg(mcg);
> >> -       spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags);
> >> +       spin_unlock_bh(&mcg->rxe->mcg_lock);
> >>  }
> >>
> >>  /**
> >> @@ -339,25 +335,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> >>  {
> >>         struct rxe_dev *rxe = mcg->rxe;
> >>         struct rxe_mca *mca, *tmp;
> >> -       unsigned long flags;
> >>         int err;
> >>
> >>         /* check to see if the qp is already a member of the group */
> >> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> >> +       spin_lock_bh(&rxe->mcg_lock);
> >>         list_for_each_entry(mca, &mcg->qp_list, qp_list) {
> >>                 if (mca->qp == qp) {
> >> -                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >> +                       spin_unlock_bh(&rxe->mcg_lock);
> >>                         return 0;
> >>                 }
> >>         }
> >> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >> +       spin_unlock_bh(&rxe->mcg_lock);
> >>
> >>         /* speculative alloc new mca without using GFP_ATOMIC */
> >>         mca = kzalloc(sizeof(*mca), GFP_KERNEL);
> >>         if (!mca)
> >>                 return -ENOMEM;
> >>
> >> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> >> +       spin_lock_bh(&rxe->mcg_lock);
> >>         /* re-check to see if someone else just attached qp */
> >>         list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
> >>                 if (tmp->qp == qp) {
> >> @@ -371,7 +366,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> >>         if (err)
> >>                 kfree(mca);
> >>  out:
> >> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >> +       spin_unlock_bh(&rxe->mcg_lock);
> >>         return err;
> >>  }
> >>
> >> @@ -405,9 +400,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> >>  {
> >>         struct rxe_dev *rxe = mcg->rxe;
> >>         struct rxe_mca *mca, *tmp;
> >> -       unsigned long flags;
> >>
> >> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> >> +       spin_lock_bh(&rxe->mcg_lock);
> >>         list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
> >>                 if (mca->qp == qp) {
> >>                         __rxe_cleanup_mca(mca, mcg);
> >> @@ -421,13 +415,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> >>                         if (atomic_read(&mcg->qp_num) <= 0)
> >>                                 __rxe_destroy_mcg(mcg);
> >>
> >> -                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >> +                       spin_unlock_bh(&rxe->mcg_lock);
> >>                         return 0;
> >>                 }
> >>         }
> >>
> >>         /* we didn't find the qp on the list */
> >> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >> +       spin_unlock_bh(&rxe->mcg_lock);
> >>         return -EINVAL;
> >>  }
> >>
> >> --
> >> 2.32.0
> >>
>
Bob Pearson April 13, 2022, 6:18 a.m. UTC | #4
On 4/13/22 01:11, Zhu Yanjun wrote:
> On Wed, Apr 13, 2022 at 2:03 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>
>> On 4/13/22 00:58, Zhu Yanjun wrote:
>>> On Wed, Apr 13, 2022 at 1:31 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>>>
>>>> rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock
>>>> while rxe_recv.c uses _bh spinlocks for the same lock. Adding
>>>> tracing shows that the code in rxe_mcast.c is all executed in_task()
>>>> context while the code in rxe_recv.c that refers to the lock
>>>> executes in softirq context. This causes a lockdep warning in code
>>>> that executes multicast I/O including blktests check srp.
>>>>
>>>> Change the locking of rxe->mcg_lock in rxe_mcast.c to use
>>>> spin_(un)lock_bh().
>>>
>>> Now RXE is not stable. We should focus on the problem of RXE.
>>>
>>> Zhu Yanjun
>>
>> This a real bug and triggers lockdep warnings when I run blktests srp.
>> The blktests test suite apparently uses multicast. It is obviously wrong
>> you can't use both _bh and _irqsave locks and pass lockdep checking.
>>
>> What tests are not running correctly for you?
> 
> The crash related with mr is resolved?

I am pursuing getting everything to pass with the rcu_read_lock() patch.
Currently I have blktests, perftests and pyverbs tests all passing.
I have rping running but it hangs. I have WARN_ON's for mr = 0 but I
am not seeing them show up so there absolutely no trace output from rping.
It just hangs after a few minutes with no dmesg. I have lockdep turned on and
as just mentioned WARN_ON's for mr = 0. My suspicion is that this is
related to a race during shutdown but I haven't traced it to the root cause
yet. I don't trust the responder resources code at all.

I am done for today here though.

Bob
> 
> Zhu Yanjun
> 
>>
>> Bob
>>>
>>>>
>>>> With this change the following locks in rdma_rxe which are all _bh
>>>> show no lockdep warnings.
>>>>
>>>>         atomic_ops_lock
>>>>         mw->lock
>>>>         port->port_lock
>>>>         qp->state_lock
>>>>         rxe->mcg_lock
>>>>         rxe->mmap_offset_lock
>>>>         rxe->pending_lock
>>>>         task->state_lock
>>>>
>>>> The only other remaining lock is pool->xa.xa_lock which
>>>> must either be some combination of _bh and _irq types depending
>>>> on the object type (== ah or not) or plain spin_lock if
>>>> the read side operations are converted to use rcu_read_lock().
>>>>
>>>> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c")
>>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>>>> ---
>>>>  drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++----------------
>>>>  1 file changed, 15 insertions(+), 21 deletions(-)
>>>>
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
>>>> index ae8f11cb704a..7f50566b8d89 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
>>>> @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
>>>>  struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>>>>  {
>>>>         struct rxe_mcg *mcg;
>>>> -       unsigned long flags;
>>>>
>>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
>>>> +       spin_lock_bh(&rxe->mcg_lock);
>>>>         mcg = __rxe_lookup_mcg(rxe, mgid);
>>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +       spin_unlock_bh(&rxe->mcg_lock);
>>>>
>>>>         return mcg;
>>>>  }
>>>> @@ -198,7 +197,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
>>>>  static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>>>>  {
>>>>         struct rxe_mcg *mcg, *tmp;
>>>> -       unsigned long flags;
>>>>         int err;
>>>>
>>>>         if (rxe->attr.max_mcast_grp == 0)
>>>> @@ -214,7 +212,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>>>>         if (!mcg)
>>>>                 return ERR_PTR(-ENOMEM);
>>>>
>>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
>>>> +       spin_lock_bh(&rxe->mcg_lock);
>>>>         /* re-check to see if someone else just added it */
>>>>         tmp = __rxe_lookup_mcg(rxe, mgid);
>>>>         if (tmp) {
>>>> @@ -232,12 +230,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>>>>         if (err)
>>>>                 goto err_dec;
>>>>  out:
>>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +       spin_unlock_bh(&rxe->mcg_lock);
>>>>         return mcg;
>>>>
>>>>  err_dec:
>>>>         atomic_dec(&rxe->mcg_num);
>>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +       spin_unlock_bh(&rxe->mcg_lock);
>>>>         kfree(mcg);
>>>>         return ERR_PTR(err);
>>>>  }
>>>> @@ -280,11 +278,9 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
>>>>   */
>>>>  static void rxe_destroy_mcg(struct rxe_mcg *mcg)
>>>>  {
>>>> -       unsigned long flags;
>>>> -
>>>> -       spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
>>>> +       spin_lock_bh(&mcg->rxe->mcg_lock);
>>>>         __rxe_destroy_mcg(mcg);
>>>> -       spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags);
>>>> +       spin_unlock_bh(&mcg->rxe->mcg_lock);
>>>>  }
>>>>
>>>>  /**
>>>> @@ -339,25 +335,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>>>>  {
>>>>         struct rxe_dev *rxe = mcg->rxe;
>>>>         struct rxe_mca *mca, *tmp;
>>>> -       unsigned long flags;
>>>>         int err;
>>>>
>>>>         /* check to see if the qp is already a member of the group */
>>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
>>>> +       spin_lock_bh(&rxe->mcg_lock);
>>>>         list_for_each_entry(mca, &mcg->qp_list, qp_list) {
>>>>                 if (mca->qp == qp) {
>>>> -                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +                       spin_unlock_bh(&rxe->mcg_lock);
>>>>                         return 0;
>>>>                 }
>>>>         }
>>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +       spin_unlock_bh(&rxe->mcg_lock);
>>>>
>>>>         /* speculative alloc new mca without using GFP_ATOMIC */
>>>>         mca = kzalloc(sizeof(*mca), GFP_KERNEL);
>>>>         if (!mca)
>>>>                 return -ENOMEM;
>>>>
>>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
>>>> +       spin_lock_bh(&rxe->mcg_lock);
>>>>         /* re-check to see if someone else just attached qp */
>>>>         list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
>>>>                 if (tmp->qp == qp) {
>>>> @@ -371,7 +366,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>>>>         if (err)
>>>>                 kfree(mca);
>>>>  out:
>>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +       spin_unlock_bh(&rxe->mcg_lock);
>>>>         return err;
>>>>  }
>>>>
>>>> @@ -405,9 +400,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>>>>  {
>>>>         struct rxe_dev *rxe = mcg->rxe;
>>>>         struct rxe_mca *mca, *tmp;
>>>> -       unsigned long flags;
>>>>
>>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
>>>> +       spin_lock_bh(&rxe->mcg_lock);
>>>>         list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
>>>>                 if (mca->qp == qp) {
>>>>                         __rxe_cleanup_mca(mca, mcg);
>>>> @@ -421,13 +415,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>>>>                         if (atomic_read(&mcg->qp_num) <= 0)
>>>>                                 __rxe_destroy_mcg(mcg);
>>>>
>>>> -                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +                       spin_unlock_bh(&rxe->mcg_lock);
>>>>                         return 0;
>>>>                 }
>>>>         }
>>>>
>>>>         /* we didn't find the qp on the list */
>>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +       spin_unlock_bh(&rxe->mcg_lock);
>>>>         return -EINVAL;
>>>>  }
>>>>
>>>> --
>>>> 2.32.0
>>>>
>>
Zhu Yanjun April 13, 2022, 6:30 a.m. UTC | #5
On Wed, Apr 13, 2022 at 2:18 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>
> On 4/13/22 01:11, Zhu Yanjun wrote:
> > On Wed, Apr 13, 2022 at 2:03 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
> >>
> >> On 4/13/22 00:58, Zhu Yanjun wrote:
> >>> On Wed, Apr 13, 2022 at 1:31 PM Bob Pearson <rpearsonhpe@gmail.com> wrote:
> >>>>
> >>>> rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock
> >>>> while rxe_recv.c uses _bh spinlocks for the same lock. Adding
> >>>> tracing shows that the code in rxe_mcast.c is all executed in_task()
> >>>> context while the code in rxe_recv.c that refers to the lock
> >>>> executes in softirq context. This causes a lockdep warning in code
> >>>> that executes multicast I/O including blktests check srp.
> >>>>
> >>>> Change the locking of rxe->mcg_lock in rxe_mcast.c to use
> >>>> spin_(un)lock_bh().
> >>>
> >>> Now RXE is not stable. We should focus on the problem of RXE.
> >>>
> >>> Zhu Yanjun
> >>
> >> This a real bug and triggers lockdep warnings when I run blktests srp.
> >> The blktests test suite apparently uses multicast. It is obviously wrong
> >> you can't use both _bh and _irqsave locks and pass lockdep checking.
> >>
> >> What tests are not running correctly for you?
> >
> > The crash related with mr is resolved?
>
> I am pursuing getting everything to pass with the rcu_read_lock() patch.
> Currently I have blktests, perftests and pyverbs tests all passing.
> I have rping running but it hangs. I have WARN_ON's for mr = 0 but I
> am not seeing them show up so there absolutely no trace output from rping.
> It just hangs after a few minutes with no dmesg. I have lockdep turned on and
> as just mentioned WARN_ON's for mr = 0. My suspicion is that this is
> related to a race during shutdown but I haven't traced it to the root cause

Please focus on this mr crash. I have made tests with reverting the
related commit with
the xarray. This mr crash disappeared.

It seems that this rping mr crash is related with xarray.

I am still working on it.

Zhu Yanjun

> yet. I don't trust the responder resources code at all.
>
> I am done for today here though.
>
> Bob
> >
> > Zhu Yanjun
> >
> >>
> >> Bob
> >>>
> >>>>
> >>>> With this change the following locks in rdma_rxe which are all _bh
> >>>> show no lockdep warnings.
> >>>>
> >>>>         atomic_ops_lock
> >>>>         mw->lock
> >>>>         port->port_lock
> >>>>         qp->state_lock
> >>>>         rxe->mcg_lock
> >>>>         rxe->mmap_offset_lock
> >>>>         rxe->pending_lock
> >>>>         task->state_lock
> >>>>
> >>>> The only other remaining lock is pool->xa.xa_lock which
> >>>> must either be some combination of _bh and _irq types depending
> >>>> on the object type (== ah or not) or plain spin_lock if
> >>>> the read side operations are converted to use rcu_read_lock().
> >>>>
> >>>> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c")
> >>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> >>>> ---
> >>>>  drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++----------------
> >>>>  1 file changed, 15 insertions(+), 21 deletions(-)
> >>>>
> >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
> >>>> index ae8f11cb704a..7f50566b8d89 100644
> >>>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
> >>>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> >>>> @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
> >>>>  struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> >>>>  {
> >>>>         struct rxe_mcg *mcg;
> >>>> -       unsigned long flags;
> >>>>
> >>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> >>>> +       spin_lock_bh(&rxe->mcg_lock);
> >>>>         mcg = __rxe_lookup_mcg(rxe, mgid);
> >>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +       spin_unlock_bh(&rxe->mcg_lock);
> >>>>
> >>>>         return mcg;
> >>>>  }
> >>>> @@ -198,7 +197,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
> >>>>  static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> >>>>  {
> >>>>         struct rxe_mcg *mcg, *tmp;
> >>>> -       unsigned long flags;
> >>>>         int err;
> >>>>
> >>>>         if (rxe->attr.max_mcast_grp == 0)
> >>>> @@ -214,7 +212,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> >>>>         if (!mcg)
> >>>>                 return ERR_PTR(-ENOMEM);
> >>>>
> >>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> >>>> +       spin_lock_bh(&rxe->mcg_lock);
> >>>>         /* re-check to see if someone else just added it */
> >>>>         tmp = __rxe_lookup_mcg(rxe, mgid);
> >>>>         if (tmp) {
> >>>> @@ -232,12 +230,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> >>>>         if (err)
> >>>>                 goto err_dec;
> >>>>  out:
> >>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +       spin_unlock_bh(&rxe->mcg_lock);
> >>>>         return mcg;
> >>>>
> >>>>  err_dec:
> >>>>         atomic_dec(&rxe->mcg_num);
> >>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +       spin_unlock_bh(&rxe->mcg_lock);
> >>>>         kfree(mcg);
> >>>>         return ERR_PTR(err);
> >>>>  }
> >>>> @@ -280,11 +278,9 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
> >>>>   */
> >>>>  static void rxe_destroy_mcg(struct rxe_mcg *mcg)
> >>>>  {
> >>>> -       unsigned long flags;
> >>>> -
> >>>> -       spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
> >>>> +       spin_lock_bh(&mcg->rxe->mcg_lock);
> >>>>         __rxe_destroy_mcg(mcg);
> >>>> -       spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags);
> >>>> +       spin_unlock_bh(&mcg->rxe->mcg_lock);
> >>>>  }
> >>>>
> >>>>  /**
> >>>> @@ -339,25 +335,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> >>>>  {
> >>>>         struct rxe_dev *rxe = mcg->rxe;
> >>>>         struct rxe_mca *mca, *tmp;
> >>>> -       unsigned long flags;
> >>>>         int err;
> >>>>
> >>>>         /* check to see if the qp is already a member of the group */
> >>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> >>>> +       spin_lock_bh(&rxe->mcg_lock);
> >>>>         list_for_each_entry(mca, &mcg->qp_list, qp_list) {
> >>>>                 if (mca->qp == qp) {
> >>>> -                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +                       spin_unlock_bh(&rxe->mcg_lock);
> >>>>                         return 0;
> >>>>                 }
> >>>>         }
> >>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +       spin_unlock_bh(&rxe->mcg_lock);
> >>>>
> >>>>         /* speculative alloc new mca without using GFP_ATOMIC */
> >>>>         mca = kzalloc(sizeof(*mca), GFP_KERNEL);
> >>>>         if (!mca)
> >>>>                 return -ENOMEM;
> >>>>
> >>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> >>>> +       spin_lock_bh(&rxe->mcg_lock);
> >>>>         /* re-check to see if someone else just attached qp */
> >>>>         list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
> >>>>                 if (tmp->qp == qp) {
> >>>> @@ -371,7 +366,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> >>>>         if (err)
> >>>>                 kfree(mca);
> >>>>  out:
> >>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +       spin_unlock_bh(&rxe->mcg_lock);
> >>>>         return err;
> >>>>  }
> >>>>
> >>>> @@ -405,9 +400,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> >>>>  {
> >>>>         struct rxe_dev *rxe = mcg->rxe;
> >>>>         struct rxe_mca *mca, *tmp;
> >>>> -       unsigned long flags;
> >>>>
> >>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> >>>> +       spin_lock_bh(&rxe->mcg_lock);
> >>>>         list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
> >>>>                 if (mca->qp == qp) {
> >>>>                         __rxe_cleanup_mca(mca, mcg);
> >>>> @@ -421,13 +415,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> >>>>                         if (atomic_read(&mcg->qp_num) <= 0)
> >>>>                                 __rxe_destroy_mcg(mcg);
> >>>>
> >>>> -                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +                       spin_unlock_bh(&rxe->mcg_lock);
> >>>>                         return 0;
> >>>>                 }
> >>>>         }
> >>>>
> >>>>         /* we didn't find the qp on the list */
> >>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +       spin_unlock_bh(&rxe->mcg_lock);
> >>>>         return -EINVAL;
> >>>>  }
> >>>>
> >>>> --
> >>>> 2.32.0
> >>>>
> >>
>
Jason Gunthorpe May 3, 2022, 11:38 a.m. UTC | #6
On Wed, Apr 13, 2022 at 12:29:38AM -0500, Bob Pearson wrote:
> rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock
> while rxe_recv.c uses _bh spinlocks for the same lock. Adding
> tracing shows that the code in rxe_mcast.c is all executed in_task()
> context while the code in rxe_recv.c that refers to the lock
> executes in softirq context. This causes a lockdep warning in code
> that executes multicast I/O including blktests check srp.

What is the lockdep warning? This patch looks OK, but irqsave should
be a universal lock - so why does lockdep complain? Is there nesting?

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index ae8f11cb704a..7f50566b8d89 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -143,11 +143,10 @@  static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
 struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
 {
 	struct rxe_mcg *mcg;
-	unsigned long flags;
 
-	spin_lock_irqsave(&rxe->mcg_lock, flags);
+	spin_lock_bh(&rxe->mcg_lock);
 	mcg = __rxe_lookup_mcg(rxe, mgid);
-	spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+	spin_unlock_bh(&rxe->mcg_lock);
 
 	return mcg;
 }
@@ -198,7 +197,6 @@  static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
 static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
 {
 	struct rxe_mcg *mcg, *tmp;
-	unsigned long flags;
 	int err;
 
 	if (rxe->attr.max_mcast_grp == 0)
@@ -214,7 +212,7 @@  static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
 	if (!mcg)
 		return ERR_PTR(-ENOMEM);
 
-	spin_lock_irqsave(&rxe->mcg_lock, flags);
+	spin_lock_bh(&rxe->mcg_lock);
 	/* re-check to see if someone else just added it */
 	tmp = __rxe_lookup_mcg(rxe, mgid);
 	if (tmp) {
@@ -232,12 +230,12 @@  static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
 	if (err)
 		goto err_dec;
 out:
-	spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+	spin_unlock_bh(&rxe->mcg_lock);
 	return mcg;
 
 err_dec:
 	atomic_dec(&rxe->mcg_num);
-	spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+	spin_unlock_bh(&rxe->mcg_lock);
 	kfree(mcg);
 	return ERR_PTR(err);
 }
@@ -280,11 +278,9 @@  static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
  */
 static void rxe_destroy_mcg(struct rxe_mcg *mcg)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
+	spin_lock_bh(&mcg->rxe->mcg_lock);
 	__rxe_destroy_mcg(mcg);
-	spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags);
+	spin_unlock_bh(&mcg->rxe->mcg_lock);
 }
 
 /**
@@ -339,25 +335,24 @@  static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
 {
 	struct rxe_dev *rxe = mcg->rxe;
 	struct rxe_mca *mca, *tmp;
-	unsigned long flags;
 	int err;
 
 	/* check to see if the qp is already a member of the group */
-	spin_lock_irqsave(&rxe->mcg_lock, flags);
+	spin_lock_bh(&rxe->mcg_lock);
 	list_for_each_entry(mca, &mcg->qp_list, qp_list) {
 		if (mca->qp == qp) {
-			spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+			spin_unlock_bh(&rxe->mcg_lock);
 			return 0;
 		}
 	}
-	spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+	spin_unlock_bh(&rxe->mcg_lock);
 
 	/* speculative alloc new mca without using GFP_ATOMIC */
 	mca = kzalloc(sizeof(*mca), GFP_KERNEL);
 	if (!mca)
 		return -ENOMEM;
 
-	spin_lock_irqsave(&rxe->mcg_lock, flags);
+	spin_lock_bh(&rxe->mcg_lock);
 	/* re-check to see if someone else just attached qp */
 	list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
 		if (tmp->qp == qp) {
@@ -371,7 +366,7 @@  static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
 	if (err)
 		kfree(mca);
 out:
-	spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+	spin_unlock_bh(&rxe->mcg_lock);
 	return err;
 }
 
@@ -405,9 +400,8 @@  static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
 {
 	struct rxe_dev *rxe = mcg->rxe;
 	struct rxe_mca *mca, *tmp;
-	unsigned long flags;
 
-	spin_lock_irqsave(&rxe->mcg_lock, flags);
+	spin_lock_bh(&rxe->mcg_lock);
 	list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
 		if (mca->qp == qp) {
 			__rxe_cleanup_mca(mca, mcg);
@@ -421,13 +415,13 @@  static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
 			if (atomic_read(&mcg->qp_num) <= 0)
 				__rxe_destroy_mcg(mcg);
 
-			spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+			spin_unlock_bh(&rxe->mcg_lock);
 			return 0;
 		}
 	}
 
 	/* we didn't find the qp on the list */
-	spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+	spin_unlock_bh(&rxe->mcg_lock);
 	return -EINVAL;
 }