Message ID | 20220413052937.92713-1-rpearsonhpe@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [for-next] RDMA/rxe: Fix "RDMA/rxe: Cleanup rxe_mcast.c" | expand |
On Wed, Apr 13, 2022 at 1:31 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: > > rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock > while rxe_recv.c uses _bh spinlocks for the same lock. Adding > tracing shows that the code in rxe_mcast.c is all executed in_task() > context while the code in rxe_recv.c that refers to the lock > executes in softirq context. This causes a lockdep warning in code > that executes multicast I/O including blktests check srp. > > Change the locking of rxe->mcg_lock in rxe_mcast.c to use > spin_(un)lock_bh(). Now RXE is not stable. We should focus on the problem of RXE. Zhu Yanjun > > With this change the following locks in rdma_rxe which are all _bh > show no lockdep warnings. > > atomic_ops_lock > mw->lock > port->port_lock > qp->state_lock > rxe->mcg_lock > rxe->mmap_offset_lock > rxe->pending_lock > task->state_lock > > The only other remaining lock is pool->xa.xa_lock which > must either be some combination of _bh and _irq types depending > on the object type (== ah or not) or plain spin_lock if > the read side operations are converted to use rcu_read_lock(). > > Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c") > Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > --- > drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++---------------- > 1 file changed, 15 insertions(+), 21 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c > index ae8f11cb704a..7f50566b8d89 100644 > --- a/drivers/infiniband/sw/rxe/rxe_mcast.c > +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c > @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, > struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > { > struct rxe_mcg *mcg; > - unsigned long flags; > > - spin_lock_irqsave(&rxe->mcg_lock, flags); > + spin_lock_bh(&rxe->mcg_lock); > mcg = __rxe_lookup_mcg(rxe, mgid); > - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > + spin_unlock_bh(&rxe->mcg_lock); > > return mcg; > } > @@ -198,7 +197,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, > static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > { > struct rxe_mcg *mcg, *tmp; > - unsigned long flags; > int err; > > if (rxe->attr.max_mcast_grp == 0) > @@ -214,7 +212,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > if (!mcg) > return ERR_PTR(-ENOMEM); > > - spin_lock_irqsave(&rxe->mcg_lock, flags); > + spin_lock_bh(&rxe->mcg_lock); > /* re-check to see if someone else just added it */ > tmp = __rxe_lookup_mcg(rxe, mgid); > if (tmp) { > @@ -232,12 +230,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > if (err) > goto err_dec; > out: > - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > + spin_unlock_bh(&rxe->mcg_lock); > return mcg; > > err_dec: > atomic_dec(&rxe->mcg_num); > - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > + spin_unlock_bh(&rxe->mcg_lock); > kfree(mcg); > return ERR_PTR(err); > } > @@ -280,11 +278,9 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) > */ > static void rxe_destroy_mcg(struct rxe_mcg *mcg) > { > - unsigned long flags; > - > - spin_lock_irqsave(&mcg->rxe->mcg_lock, flags); > + spin_lock_bh(&mcg->rxe->mcg_lock); > __rxe_destroy_mcg(mcg); > - spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags); > + spin_unlock_bh(&mcg->rxe->mcg_lock); > } > > /** > @@ -339,25 +335,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > { > struct rxe_dev *rxe = mcg->rxe; > struct rxe_mca *mca, *tmp; > - unsigned long flags; > int err; > > /* check to see if the qp is already a member of the group */ > - spin_lock_irqsave(&rxe->mcg_lock, flags); > + spin_lock_bh(&rxe->mcg_lock); > list_for_each_entry(mca, &mcg->qp_list, qp_list) { > if (mca->qp == qp) { > - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > + spin_unlock_bh(&rxe->mcg_lock); > return 0; > } > } > - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > + spin_unlock_bh(&rxe->mcg_lock); > > /* speculative alloc new mca without using GFP_ATOMIC */ > mca = kzalloc(sizeof(*mca), GFP_KERNEL); > if (!mca) > return -ENOMEM; > > - spin_lock_irqsave(&rxe->mcg_lock, flags); > + spin_lock_bh(&rxe->mcg_lock); > /* re-check to see if someone else just attached qp */ > list_for_each_entry(tmp, &mcg->qp_list, qp_list) { > if (tmp->qp == qp) { > @@ -371,7 +366,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > if (err) > kfree(mca); > out: > - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > + spin_unlock_bh(&rxe->mcg_lock); > return err; > } > > @@ -405,9 +400,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > { > struct rxe_dev *rxe = mcg->rxe; > struct rxe_mca *mca, *tmp; > - unsigned long flags; > > - spin_lock_irqsave(&rxe->mcg_lock, flags); > + spin_lock_bh(&rxe->mcg_lock); > list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { > if (mca->qp == qp) { > __rxe_cleanup_mca(mca, mcg); > @@ -421,13 +415,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > if (atomic_read(&mcg->qp_num) <= 0) > __rxe_destroy_mcg(mcg); > > - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > + spin_unlock_bh(&rxe->mcg_lock); > return 0; > } > } > > /* we didn't find the qp on the list */ > - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > + spin_unlock_bh(&rxe->mcg_lock); > return -EINVAL; > } > > -- > 2.32.0 >
On 4/13/22 00:58, Zhu Yanjun wrote: > On Wed, Apr 13, 2022 at 1:31 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: >> >> rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock >> while rxe_recv.c uses _bh spinlocks for the same lock. Adding >> tracing shows that the code in rxe_mcast.c is all executed in_task() >> context while the code in rxe_recv.c that refers to the lock >> executes in softirq context. This causes a lockdep warning in code >> that executes multicast I/O including blktests check srp. >> >> Change the locking of rxe->mcg_lock in rxe_mcast.c to use >> spin_(un)lock_bh(). > > Now RXE is not stable. We should focus on the problem of RXE. > > Zhu Yanjun This a real bug and triggers lockdep warnings when I run blktests srp. The blktests test suite apparently uses multicast. It is obviously wrong you can't use both _bh and _irqsave locks and pass lockdep checking. What tests are not running correctly for you? Bob > >> >> With this change the following locks in rdma_rxe which are all _bh >> show no lockdep warnings. >> >> atomic_ops_lock >> mw->lock >> port->port_lock >> qp->state_lock >> rxe->mcg_lock >> rxe->mmap_offset_lock >> rxe->pending_lock >> task->state_lock >> >> The only other remaining lock is pool->xa.xa_lock which >> must either be some combination of _bh and _irq types depending >> on the object type (== ah or not) or plain spin_lock if >> the read side operations are converted to use rcu_read_lock(). >> >> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c") >> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> >> --- >> drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++---------------- >> 1 file changed, 15 insertions(+), 21 deletions(-) >> >> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c >> index ae8f11cb704a..7f50566b8d89 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c >> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c >> @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, >> struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) >> { >> struct rxe_mcg *mcg; >> - unsigned long flags; >> >> - spin_lock_irqsave(&rxe->mcg_lock, flags); >> + spin_lock_bh(&rxe->mcg_lock); >> mcg = __rxe_lookup_mcg(rxe, mgid); >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >> + spin_unlock_bh(&rxe->mcg_lock); >> >> return mcg; >> } >> @@ -198,7 +197,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, >> static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) >> { >> struct rxe_mcg *mcg, *tmp; >> - unsigned long flags; >> int err; >> >> if (rxe->attr.max_mcast_grp == 0) >> @@ -214,7 +212,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) >> if (!mcg) >> return ERR_PTR(-ENOMEM); >> >> - spin_lock_irqsave(&rxe->mcg_lock, flags); >> + spin_lock_bh(&rxe->mcg_lock); >> /* re-check to see if someone else just added it */ >> tmp = __rxe_lookup_mcg(rxe, mgid); >> if (tmp) { >> @@ -232,12 +230,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) >> if (err) >> goto err_dec; >> out: >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >> + spin_unlock_bh(&rxe->mcg_lock); >> return mcg; >> >> err_dec: >> atomic_dec(&rxe->mcg_num); >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >> + spin_unlock_bh(&rxe->mcg_lock); >> kfree(mcg); >> return ERR_PTR(err); >> } >> @@ -280,11 +278,9 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) >> */ >> static void rxe_destroy_mcg(struct rxe_mcg *mcg) >> { >> - unsigned long flags; >> - >> - spin_lock_irqsave(&mcg->rxe->mcg_lock, flags); >> + spin_lock_bh(&mcg->rxe->mcg_lock); >> __rxe_destroy_mcg(mcg); >> - spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags); >> + spin_unlock_bh(&mcg->rxe->mcg_lock); >> } >> >> /** >> @@ -339,25 +335,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) >> { >> struct rxe_dev *rxe = mcg->rxe; >> struct rxe_mca *mca, *tmp; >> - unsigned long flags; >> int err; >> >> /* check to see if the qp is already a member of the group */ >> - spin_lock_irqsave(&rxe->mcg_lock, flags); >> + spin_lock_bh(&rxe->mcg_lock); >> list_for_each_entry(mca, &mcg->qp_list, qp_list) { >> if (mca->qp == qp) { >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >> + spin_unlock_bh(&rxe->mcg_lock); >> return 0; >> } >> } >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >> + spin_unlock_bh(&rxe->mcg_lock); >> >> /* speculative alloc new mca without using GFP_ATOMIC */ >> mca = kzalloc(sizeof(*mca), GFP_KERNEL); >> if (!mca) >> return -ENOMEM; >> >> - spin_lock_irqsave(&rxe->mcg_lock, flags); >> + spin_lock_bh(&rxe->mcg_lock); >> /* re-check to see if someone else just attached qp */ >> list_for_each_entry(tmp, &mcg->qp_list, qp_list) { >> if (tmp->qp == qp) { >> @@ -371,7 +366,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) >> if (err) >> kfree(mca); >> out: >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >> + spin_unlock_bh(&rxe->mcg_lock); >> return err; >> } >> >> @@ -405,9 +400,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) >> { >> struct rxe_dev *rxe = mcg->rxe; >> struct rxe_mca *mca, *tmp; >> - unsigned long flags; >> >> - spin_lock_irqsave(&rxe->mcg_lock, flags); >> + spin_lock_bh(&rxe->mcg_lock); >> list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { >> if (mca->qp == qp) { >> __rxe_cleanup_mca(mca, mcg); >> @@ -421,13 +415,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) >> if (atomic_read(&mcg->qp_num) <= 0) >> __rxe_destroy_mcg(mcg); >> >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >> + spin_unlock_bh(&rxe->mcg_lock); >> return 0; >> } >> } >> >> /* we didn't find the qp on the list */ >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >> + spin_unlock_bh(&rxe->mcg_lock); >> return -EINVAL; >> } >> >> -- >> 2.32.0 >>
On Wed, Apr 13, 2022 at 2:03 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: > > On 4/13/22 00:58, Zhu Yanjun wrote: > > On Wed, Apr 13, 2022 at 1:31 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: > >> > >> rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock > >> while rxe_recv.c uses _bh spinlocks for the same lock. Adding > >> tracing shows that the code in rxe_mcast.c is all executed in_task() > >> context while the code in rxe_recv.c that refers to the lock > >> executes in softirq context. This causes a lockdep warning in code > >> that executes multicast I/O including blktests check srp. > >> > >> Change the locking of rxe->mcg_lock in rxe_mcast.c to use > >> spin_(un)lock_bh(). > > > > Now RXE is not stable. We should focus on the problem of RXE. > > > > Zhu Yanjun > > This a real bug and triggers lockdep warnings when I run blktests srp. > The blktests test suite apparently uses multicast. It is obviously wrong > you can't use both _bh and _irqsave locks and pass lockdep checking. > > What tests are not running correctly for you? The crash related with mr is resolved? Zhu Yanjun > > Bob > > > >> > >> With this change the following locks in rdma_rxe which are all _bh > >> show no lockdep warnings. > >> > >> atomic_ops_lock > >> mw->lock > >> port->port_lock > >> qp->state_lock > >> rxe->mcg_lock > >> rxe->mmap_offset_lock > >> rxe->pending_lock > >> task->state_lock > >> > >> The only other remaining lock is pool->xa.xa_lock which > >> must either be some combination of _bh and _irq types depending > >> on the object type (== ah or not) or plain spin_lock if > >> the read side operations are converted to use rcu_read_lock(). > >> > >> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c") > >> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > >> --- > >> drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++---------------- > >> 1 file changed, 15 insertions(+), 21 deletions(-) > >> > >> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c > >> index ae8f11cb704a..7f50566b8d89 100644 > >> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c > >> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c > >> @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, > >> struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > >> { > >> struct rxe_mcg *mcg; > >> - unsigned long flags; > >> > >> - spin_lock_irqsave(&rxe->mcg_lock, flags); > >> + spin_lock_bh(&rxe->mcg_lock); > >> mcg = __rxe_lookup_mcg(rxe, mgid); > >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >> + spin_unlock_bh(&rxe->mcg_lock); > >> > >> return mcg; > >> } > >> @@ -198,7 +197,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, > >> static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > >> { > >> struct rxe_mcg *mcg, *tmp; > >> - unsigned long flags; > >> int err; > >> > >> if (rxe->attr.max_mcast_grp == 0) > >> @@ -214,7 +212,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > >> if (!mcg) > >> return ERR_PTR(-ENOMEM); > >> > >> - spin_lock_irqsave(&rxe->mcg_lock, flags); > >> + spin_lock_bh(&rxe->mcg_lock); > >> /* re-check to see if someone else just added it */ > >> tmp = __rxe_lookup_mcg(rxe, mgid); > >> if (tmp) { > >> @@ -232,12 +230,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > >> if (err) > >> goto err_dec; > >> out: > >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >> + spin_unlock_bh(&rxe->mcg_lock); > >> return mcg; > >> > >> err_dec: > >> atomic_dec(&rxe->mcg_num); > >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >> + spin_unlock_bh(&rxe->mcg_lock); > >> kfree(mcg); > >> return ERR_PTR(err); > >> } > >> @@ -280,11 +278,9 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) > >> */ > >> static void rxe_destroy_mcg(struct rxe_mcg *mcg) > >> { > >> - unsigned long flags; > >> - > >> - spin_lock_irqsave(&mcg->rxe->mcg_lock, flags); > >> + spin_lock_bh(&mcg->rxe->mcg_lock); > >> __rxe_destroy_mcg(mcg); > >> - spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags); > >> + spin_unlock_bh(&mcg->rxe->mcg_lock); > >> } > >> > >> /** > >> @@ -339,25 +335,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > >> { > >> struct rxe_dev *rxe = mcg->rxe; > >> struct rxe_mca *mca, *tmp; > >> - unsigned long flags; > >> int err; > >> > >> /* check to see if the qp is already a member of the group */ > >> - spin_lock_irqsave(&rxe->mcg_lock, flags); > >> + spin_lock_bh(&rxe->mcg_lock); > >> list_for_each_entry(mca, &mcg->qp_list, qp_list) { > >> if (mca->qp == qp) { > >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >> + spin_unlock_bh(&rxe->mcg_lock); > >> return 0; > >> } > >> } > >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >> + spin_unlock_bh(&rxe->mcg_lock); > >> > >> /* speculative alloc new mca without using GFP_ATOMIC */ > >> mca = kzalloc(sizeof(*mca), GFP_KERNEL); > >> if (!mca) > >> return -ENOMEM; > >> > >> - spin_lock_irqsave(&rxe->mcg_lock, flags); > >> + spin_lock_bh(&rxe->mcg_lock); > >> /* re-check to see if someone else just attached qp */ > >> list_for_each_entry(tmp, &mcg->qp_list, qp_list) { > >> if (tmp->qp == qp) { > >> @@ -371,7 +366,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > >> if (err) > >> kfree(mca); > >> out: > >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >> + spin_unlock_bh(&rxe->mcg_lock); > >> return err; > >> } > >> > >> @@ -405,9 +400,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > >> { > >> struct rxe_dev *rxe = mcg->rxe; > >> struct rxe_mca *mca, *tmp; > >> - unsigned long flags; > >> > >> - spin_lock_irqsave(&rxe->mcg_lock, flags); > >> + spin_lock_bh(&rxe->mcg_lock); > >> list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { > >> if (mca->qp == qp) { > >> __rxe_cleanup_mca(mca, mcg); > >> @@ -421,13 +415,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > >> if (atomic_read(&mcg->qp_num) <= 0) > >> __rxe_destroy_mcg(mcg); > >> > >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >> + spin_unlock_bh(&rxe->mcg_lock); > >> return 0; > >> } > >> } > >> > >> /* we didn't find the qp on the list */ > >> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >> + spin_unlock_bh(&rxe->mcg_lock); > >> return -EINVAL; > >> } > >> > >> -- > >> 2.32.0 > >> >
On 4/13/22 01:11, Zhu Yanjun wrote: > On Wed, Apr 13, 2022 at 2:03 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: >> >> On 4/13/22 00:58, Zhu Yanjun wrote: >>> On Wed, Apr 13, 2022 at 1:31 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: >>>> >>>> rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock >>>> while rxe_recv.c uses _bh spinlocks for the same lock. Adding >>>> tracing shows that the code in rxe_mcast.c is all executed in_task() >>>> context while the code in rxe_recv.c that refers to the lock >>>> executes in softirq context. This causes a lockdep warning in code >>>> that executes multicast I/O including blktests check srp. >>>> >>>> Change the locking of rxe->mcg_lock in rxe_mcast.c to use >>>> spin_(un)lock_bh(). >>> >>> Now RXE is not stable. We should focus on the problem of RXE. >>> >>> Zhu Yanjun >> >> This a real bug and triggers lockdep warnings when I run blktests srp. >> The blktests test suite apparently uses multicast. It is obviously wrong >> you can't use both _bh and _irqsave locks and pass lockdep checking. >> >> What tests are not running correctly for you? > > The crash related with mr is resolved? I am pursuing getting everything to pass with the rcu_read_lock() patch. Currently I have blktests, perftests and pyverbs tests all passing. I have rping running but it hangs. I have WARN_ON's for mr = 0 but I am not seeing them show up so there absolutely no trace output from rping. It just hangs after a few minutes with no dmesg. I have lockdep turned on and as just mentioned WARN_ON's for mr = 0. My suspicion is that this is related to a race during shutdown but I haven't traced it to the root cause yet. I don't trust the responder resources code at all. I am done for today here though. Bob > > Zhu Yanjun > >> >> Bob >>> >>>> >>>> With this change the following locks in rdma_rxe which are all _bh >>>> show no lockdep warnings. >>>> >>>> atomic_ops_lock >>>> mw->lock >>>> port->port_lock >>>> qp->state_lock >>>> rxe->mcg_lock >>>> rxe->mmap_offset_lock >>>> rxe->pending_lock >>>> task->state_lock >>>> >>>> The only other remaining lock is pool->xa.xa_lock which >>>> must either be some combination of _bh and _irq types depending >>>> on the object type (== ah or not) or plain spin_lock if >>>> the read side operations are converted to use rcu_read_lock(). >>>> >>>> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c") >>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> >>>> --- >>>> drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++---------------- >>>> 1 file changed, 15 insertions(+), 21 deletions(-) >>>> >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c >>>> index ae8f11cb704a..7f50566b8d89 100644 >>>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c >>>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c >>>> @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, >>>> struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) >>>> { >>>> struct rxe_mcg *mcg; >>>> - unsigned long flags; >>>> >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); >>>> + spin_lock_bh(&rxe->mcg_lock); >>>> mcg = __rxe_lookup_mcg(rxe, mgid); >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> >>>> return mcg; >>>> } >>>> @@ -198,7 +197,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, >>>> static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) >>>> { >>>> struct rxe_mcg *mcg, *tmp; >>>> - unsigned long flags; >>>> int err; >>>> >>>> if (rxe->attr.max_mcast_grp == 0) >>>> @@ -214,7 +212,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) >>>> if (!mcg) >>>> return ERR_PTR(-ENOMEM); >>>> >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); >>>> + spin_lock_bh(&rxe->mcg_lock); >>>> /* re-check to see if someone else just added it */ >>>> tmp = __rxe_lookup_mcg(rxe, mgid); >>>> if (tmp) { >>>> @@ -232,12 +230,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) >>>> if (err) >>>> goto err_dec; >>>> out: >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> return mcg; >>>> >>>> err_dec: >>>> atomic_dec(&rxe->mcg_num); >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> kfree(mcg); >>>> return ERR_PTR(err); >>>> } >>>> @@ -280,11 +278,9 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) >>>> */ >>>> static void rxe_destroy_mcg(struct rxe_mcg *mcg) >>>> { >>>> - unsigned long flags; >>>> - >>>> - spin_lock_irqsave(&mcg->rxe->mcg_lock, flags); >>>> + spin_lock_bh(&mcg->rxe->mcg_lock); >>>> __rxe_destroy_mcg(mcg); >>>> - spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&mcg->rxe->mcg_lock); >>>> } >>>> >>>> /** >>>> @@ -339,25 +335,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) >>>> { >>>> struct rxe_dev *rxe = mcg->rxe; >>>> struct rxe_mca *mca, *tmp; >>>> - unsigned long flags; >>>> int err; >>>> >>>> /* check to see if the qp is already a member of the group */ >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); >>>> + spin_lock_bh(&rxe->mcg_lock); >>>> list_for_each_entry(mca, &mcg->qp_list, qp_list) { >>>> if (mca->qp == qp) { >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> return 0; >>>> } >>>> } >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> >>>> /* speculative alloc new mca without using GFP_ATOMIC */ >>>> mca = kzalloc(sizeof(*mca), GFP_KERNEL); >>>> if (!mca) >>>> return -ENOMEM; >>>> >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); >>>> + spin_lock_bh(&rxe->mcg_lock); >>>> /* re-check to see if someone else just attached qp */ >>>> list_for_each_entry(tmp, &mcg->qp_list, qp_list) { >>>> if (tmp->qp == qp) { >>>> @@ -371,7 +366,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) >>>> if (err) >>>> kfree(mca); >>>> out: >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> return err; >>>> } >>>> >>>> @@ -405,9 +400,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) >>>> { >>>> struct rxe_dev *rxe = mcg->rxe; >>>> struct rxe_mca *mca, *tmp; >>>> - unsigned long flags; >>>> >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); >>>> + spin_lock_bh(&rxe->mcg_lock); >>>> list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { >>>> if (mca->qp == qp) { >>>> __rxe_cleanup_mca(mca, mcg); >>>> @@ -421,13 +415,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) >>>> if (atomic_read(&mcg->qp_num) <= 0) >>>> __rxe_destroy_mcg(mcg); >>>> >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> return 0; >>>> } >>>> } >>>> >>>> /* we didn't find the qp on the list */ >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> return -EINVAL; >>>> } >>>> >>>> -- >>>> 2.32.0 >>>> >>
On Wed, Apr 13, 2022 at 2:18 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: > > On 4/13/22 01:11, Zhu Yanjun wrote: > > On Wed, Apr 13, 2022 at 2:03 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: > >> > >> On 4/13/22 00:58, Zhu Yanjun wrote: > >>> On Wed, Apr 13, 2022 at 1:31 PM Bob Pearson <rpearsonhpe@gmail.com> wrote: > >>>> > >>>> rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock > >>>> while rxe_recv.c uses _bh spinlocks for the same lock. Adding > >>>> tracing shows that the code in rxe_mcast.c is all executed in_task() > >>>> context while the code in rxe_recv.c that refers to the lock > >>>> executes in softirq context. This causes a lockdep warning in code > >>>> that executes multicast I/O including blktests check srp. > >>>> > >>>> Change the locking of rxe->mcg_lock in rxe_mcast.c to use > >>>> spin_(un)lock_bh(). > >>> > >>> Now RXE is not stable. We should focus on the problem of RXE. > >>> > >>> Zhu Yanjun > >> > >> This a real bug and triggers lockdep warnings when I run blktests srp. > >> The blktests test suite apparently uses multicast. It is obviously wrong > >> you can't use both _bh and _irqsave locks and pass lockdep checking. > >> > >> What tests are not running correctly for you? > > > > The crash related with mr is resolved? > > I am pursuing getting everything to pass with the rcu_read_lock() patch. > Currently I have blktests, perftests and pyverbs tests all passing. > I have rping running but it hangs. I have WARN_ON's for mr = 0 but I > am not seeing them show up so there absolutely no trace output from rping. > It just hangs after a few minutes with no dmesg. I have lockdep turned on and > as just mentioned WARN_ON's for mr = 0. My suspicion is that this is > related to a race during shutdown but I haven't traced it to the root cause Please focus on this mr crash. I have made tests with reverting the related commit with the xarray. This mr crash disappeared. It seems that this rping mr crash is related with xarray. I am still working on it. Zhu Yanjun > yet. I don't trust the responder resources code at all. > > I am done for today here though. > > Bob > > > > Zhu Yanjun > > > >> > >> Bob > >>> > >>>> > >>>> With this change the following locks in rdma_rxe which are all _bh > >>>> show no lockdep warnings. > >>>> > >>>> atomic_ops_lock > >>>> mw->lock > >>>> port->port_lock > >>>> qp->state_lock > >>>> rxe->mcg_lock > >>>> rxe->mmap_offset_lock > >>>> rxe->pending_lock > >>>> task->state_lock > >>>> > >>>> The only other remaining lock is pool->xa.xa_lock which > >>>> must either be some combination of _bh and _irq types depending > >>>> on the object type (== ah or not) or plain spin_lock if > >>>> the read side operations are converted to use rcu_read_lock(). > >>>> > >>>> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c") > >>>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> > >>>> --- > >>>> drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++---------------- > >>>> 1 file changed, 15 insertions(+), 21 deletions(-) > >>>> > >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c > >>>> index ae8f11cb704a..7f50566b8d89 100644 > >>>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c > >>>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c > >>>> @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, > >>>> struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> { > >>>> struct rxe_mcg *mcg; > >>>> - unsigned long flags; > >>>> > >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); > >>>> + spin_lock_bh(&rxe->mcg_lock); > >>>> mcg = __rxe_lookup_mcg(rxe, mgid); > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> > >>>> return mcg; > >>>> } > >>>> @@ -198,7 +197,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, > >>>> static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> { > >>>> struct rxe_mcg *mcg, *tmp; > >>>> - unsigned long flags; > >>>> int err; > >>>> > >>>> if (rxe->attr.max_mcast_grp == 0) > >>>> @@ -214,7 +212,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> if (!mcg) > >>>> return ERR_PTR(-ENOMEM); > >>>> > >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); > >>>> + spin_lock_bh(&rxe->mcg_lock); > >>>> /* re-check to see if someone else just added it */ > >>>> tmp = __rxe_lookup_mcg(rxe, mgid); > >>>> if (tmp) { > >>>> @@ -232,12 +230,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> if (err) > >>>> goto err_dec; > >>>> out: > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> return mcg; > >>>> > >>>> err_dec: > >>>> atomic_dec(&rxe->mcg_num); > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> kfree(mcg); > >>>> return ERR_PTR(err); > >>>> } > >>>> @@ -280,11 +278,9 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) > >>>> */ > >>>> static void rxe_destroy_mcg(struct rxe_mcg *mcg) > >>>> { > >>>> - unsigned long flags; > >>>> - > >>>> - spin_lock_irqsave(&mcg->rxe->mcg_lock, flags); > >>>> + spin_lock_bh(&mcg->rxe->mcg_lock); > >>>> __rxe_destroy_mcg(mcg); > >>>> - spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&mcg->rxe->mcg_lock); > >>>> } > >>>> > >>>> /** > >>>> @@ -339,25 +335,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > >>>> { > >>>> struct rxe_dev *rxe = mcg->rxe; > >>>> struct rxe_mca *mca, *tmp; > >>>> - unsigned long flags; > >>>> int err; > >>>> > >>>> /* check to see if the qp is already a member of the group */ > >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); > >>>> + spin_lock_bh(&rxe->mcg_lock); > >>>> list_for_each_entry(mca, &mcg->qp_list, qp_list) { > >>>> if (mca->qp == qp) { > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> return 0; > >>>> } > >>>> } > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> > >>>> /* speculative alloc new mca without using GFP_ATOMIC */ > >>>> mca = kzalloc(sizeof(*mca), GFP_KERNEL); > >>>> if (!mca) > >>>> return -ENOMEM; > >>>> > >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); > >>>> + spin_lock_bh(&rxe->mcg_lock); > >>>> /* re-check to see if someone else just attached qp */ > >>>> list_for_each_entry(tmp, &mcg->qp_list, qp_list) { > >>>> if (tmp->qp == qp) { > >>>> @@ -371,7 +366,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > >>>> if (err) > >>>> kfree(mca); > >>>> out: > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> return err; > >>>> } > >>>> > >>>> @@ -405,9 +400,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > >>>> { > >>>> struct rxe_dev *rxe = mcg->rxe; > >>>> struct rxe_mca *mca, *tmp; > >>>> - unsigned long flags; > >>>> > >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); > >>>> + spin_lock_bh(&rxe->mcg_lock); > >>>> list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { > >>>> if (mca->qp == qp) { > >>>> __rxe_cleanup_mca(mca, mcg); > >>>> @@ -421,13 +415,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > >>>> if (atomic_read(&mcg->qp_num) <= 0) > >>>> __rxe_destroy_mcg(mcg); > >>>> > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> return 0; > >>>> } > >>>> } > >>>> > >>>> /* we didn't find the qp on the list */ > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> return -EINVAL; > >>>> } > >>>> > >>>> -- > >>>> 2.32.0 > >>>> > >> >
On Wed, Apr 13, 2022 at 12:29:38AM -0500, Bob Pearson wrote: > rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock > while rxe_recv.c uses _bh spinlocks for the same lock. Adding > tracing shows that the code in rxe_mcast.c is all executed in_task() > context while the code in rxe_recv.c that refers to the lock > executes in softirq context. This causes a lockdep warning in code > that executes multicast I/O including blktests check srp. What is the lockdep warning? This patch looks OK, but irqsave should be a universal lock - so why does lockdep complain? Is there nesting? Jason
diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index ae8f11cb704a..7f50566b8d89 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) { struct rxe_mcg *mcg; - unsigned long flags; - spin_lock_irqsave(&rxe->mcg_lock, flags); + spin_lock_bh(&rxe->mcg_lock); mcg = __rxe_lookup_mcg(rxe, mgid); - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return mcg; } @@ -198,7 +197,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) { struct rxe_mcg *mcg, *tmp; - unsigned long flags; int err; if (rxe->attr.max_mcast_grp == 0) @@ -214,7 +212,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) if (!mcg) return ERR_PTR(-ENOMEM); - spin_lock_irqsave(&rxe->mcg_lock, flags); + spin_lock_bh(&rxe->mcg_lock); /* re-check to see if someone else just added it */ tmp = __rxe_lookup_mcg(rxe, mgid); if (tmp) { @@ -232,12 +230,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) if (err) goto err_dec; out: - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return mcg; err_dec: atomic_dec(&rxe->mcg_num); - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); kfree(mcg); return ERR_PTR(err); } @@ -280,11 +278,9 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) */ static void rxe_destroy_mcg(struct rxe_mcg *mcg) { - unsigned long flags; - - spin_lock_irqsave(&mcg->rxe->mcg_lock, flags); + spin_lock_bh(&mcg->rxe->mcg_lock); __rxe_destroy_mcg(mcg); - spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags); + spin_unlock_bh(&mcg->rxe->mcg_lock); } /** @@ -339,25 +335,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) { struct rxe_dev *rxe = mcg->rxe; struct rxe_mca *mca, *tmp; - unsigned long flags; int err; /* check to see if the qp is already a member of the group */ - spin_lock_irqsave(&rxe->mcg_lock, flags); + spin_lock_bh(&rxe->mcg_lock); list_for_each_entry(mca, &mcg->qp_list, qp_list) { if (mca->qp == qp) { - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return 0; } } - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); /* speculative alloc new mca without using GFP_ATOMIC */ mca = kzalloc(sizeof(*mca), GFP_KERNEL); if (!mca) return -ENOMEM; - spin_lock_irqsave(&rxe->mcg_lock, flags); + spin_lock_bh(&rxe->mcg_lock); /* re-check to see if someone else just attached qp */ list_for_each_entry(tmp, &mcg->qp_list, qp_list) { if (tmp->qp == qp) { @@ -371,7 +366,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) if (err) kfree(mca); out: - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return err; } @@ -405,9 +400,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) { struct rxe_dev *rxe = mcg->rxe; struct rxe_mca *mca, *tmp; - unsigned long flags; - spin_lock_irqsave(&rxe->mcg_lock, flags); + spin_lock_bh(&rxe->mcg_lock); list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { if (mca->qp == qp) { __rxe_cleanup_mca(mca, mcg); @@ -421,13 +415,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) if (atomic_read(&mcg->qp_num) <= 0) __rxe_destroy_mcg(mcg); - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return 0; } } /* we didn't find the qp on the list */ - spin_unlock_irqrestore(&rxe->mcg_lock, flags); + spin_unlock_bh(&rxe->mcg_lock); return -EINVAL; }
rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock while rxe_recv.c uses _bh spinlocks for the same lock. Adding tracing shows that the code in rxe_mcast.c is all executed in_task() context while the code in rxe_recv.c that refers to the lock executes in softirq context. This causes a lockdep warning in code that executes multicast I/O including blktests check srp. Change the locking of rxe->mcg_lock in rxe_mcast.c to use spin_(un)lock_bh(). With this change the following locks in rdma_rxe which are all _bh show no lockdep warnings. atomic_ops_lock mw->lock port->port_lock qp->state_lock rxe->mcg_lock rxe->mmap_offset_lock rxe->pending_lock task->state_lock The only other remaining lock is pool->xa.xa_lock which must either be some combination of _bh and _irq types depending on the object type (== ah or not) or plain spin_lock if the read side operations are converted to use rcu_read_lock(). Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c") Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> --- drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++---------------- 1 file changed, 15 insertions(+), 21 deletions(-)