diff mbox

[for-rc,4/6] RDMA/bnxt_re: Synchronize destroy_qp with poll_cq

Message ID 1518758413-20850-5-git-send-email-selvin.xavier@broadcom.com (mailing list archive)
State Accepted
Headers show

Commit Message

Selvin Xavier Feb. 16, 2018, 5:20 a.m. UTC
Avoid system crash when destroy_qp is invoked while
the driver is processing the poll_cq. Synchronize these
functions using the cq_lock.

Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 39 +++++++++++++++++++++++++++++---
 drivers/infiniband/hw/bnxt_re/ib_verbs.h |  2 ++
 drivers/infiniband/hw/bnxt_re/qplib_fp.c | 21 +++++------------
 drivers/infiniband/hw/bnxt_re/qplib_fp.h |  4 +++-
 4 files changed, 47 insertions(+), 19 deletions(-)

Comments

Leon Romanovsky Feb. 16, 2018, 7:38 a.m. UTC | #1
On Thu, Feb 15, 2018 at 09:20:11PM -0800, Selvin Xavier wrote:
> Avoid system crash when destroy_qp is invoked while
> the driver is processing the poll_cq. Synchronize these
> functions using the cq_lock.
>
> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
> ---
>  drivers/infiniband/hw/bnxt_re/ib_verbs.c | 39 +++++++++++++++++++++++++++++---
>  drivers/infiniband/hw/bnxt_re/ib_verbs.h |  2 ++
>  drivers/infiniband/hw/bnxt_re/qplib_fp.c | 21 +++++------------
>  drivers/infiniband/hw/bnxt_re/qplib_fp.h |  4 +++-
>  4 files changed, 47 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> index b7eb104..9cb9928 100644
> --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> @@ -786,20 +786,51 @@ int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
>  	return 0;
>  }
>
> +static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
> +	__acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&qp->scq->cq_lock, flags);
> +	if (qp->rcq != qp->scq)
> +		spin_lock(&qp->rcq->cq_lock);
> +	else
> +		__acquire(&qp->rcq->cq_lock);

Sorry for naive question, why do you need __acquire/__release? And why
can't spin_is_locked(&qp->rcq->cq_lock) be used for unlock code?


> +
> +	return flags;
> +}
> +
> +static void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
> +			       unsigned long flags)
> +	__releases(&qp->scq->cq_lock) __releases(&qp->rcq->cq_lock)
> +{
> +	if (qp->rcq != qp->scq)
> +		spin_unlock(&qp->rcq->cq_lock);
> +	else
> +		__release(&qp->rcq->cq_lock);
> +	spin_unlock_irqrestore(&qp->scq->cq_lock, flags);
> +}
> +

Thanks
Selvin Xavier Feb. 16, 2018, 11:39 a.m. UTC | #2
On Fri, Feb 16, 2018 at 1:08 PM, Leon Romanovsky <leon@kernel.org> wrote:
> On Thu, Feb 15, 2018 at 09:20:11PM -0800, Selvin Xavier wrote:
>> Avoid system crash when destroy_qp is invoked while
>> the driver is processing the poll_cq. Synchronize these
>> functions using the cq_lock.
>>
>> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
>> ---
>>  drivers/infiniband/hw/bnxt_re/ib_verbs.c | 39 +++++++++++++++++++++++++++++---
>>  drivers/infiniband/hw/bnxt_re/ib_verbs.h |  2 ++
>>  drivers/infiniband/hw/bnxt_re/qplib_fp.c | 21 +++++------------
>>  drivers/infiniband/hw/bnxt_re/qplib_fp.h |  4 +++-
>>  4 files changed, 47 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
>> index b7eb104..9cb9928 100644
>> --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
>> +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
>> @@ -786,20 +786,51 @@ int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
>>       return 0;
>>  }
>>
>> +static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
>> +     __acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock)
>> +{
>> +     unsigned long flags;
>> +
>> +     spin_lock_irqsave(&qp->scq->cq_lock, flags);
>> +     if (qp->rcq != qp->scq)
>> +             spin_lock(&qp->rcq->cq_lock);
>> +     else
>> +             __acquire(&qp->rcq->cq_lock);
>
> Sorry for naive question, why do you need __acquire/__release? And why
> can't spin_is_locked(&qp->rcq->cq_lock) be used for unlock code?
>
I used __acquire and __release for satisfying the sparse. Sparse used to
give warnings  "context  imbalance - wrong count at exit" since the locking
and unlocking happens from two different functions. seen similar implementation
in other drivers too.

I feel spin_is_locked also can be used.. but it doesn't avoid the RCQ
!= SCQ check.
Also, if feel the code symmetry is better in this way.
>
>> +
>> +     return flags;
>> +}
>> +
>> +static void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
>> +                            unsigned long flags)
>> +     __releases(&qp->scq->cq_lock) __releases(&qp->rcq->cq_lock)
>> +{
>> +     if (qp->rcq != qp->scq)
>> +             spin_unlock(&qp->rcq->cq_lock);
>> +     else
>> +             __release(&qp->rcq->cq_lock);
>> +     spin_unlock_irqrestore(&qp->scq->cq_lock, flags);
>> +}
>> +
>
> Thanks
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky Feb. 16, 2018, 3:51 p.m. UTC | #3
On Fri, Feb 16, 2018 at 05:09:15PM +0530, Selvin Xavier wrote:
> On Fri, Feb 16, 2018 at 1:08 PM, Leon Romanovsky <leon@kernel.org> wrote:
> > On Thu, Feb 15, 2018 at 09:20:11PM -0800, Selvin Xavier wrote:
> >> Avoid system crash when destroy_qp is invoked while
> >> the driver is processing the poll_cq. Synchronize these
> >> functions using the cq_lock.
> >>
> >> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
> >> ---
> >>  drivers/infiniband/hw/bnxt_re/ib_verbs.c | 39 +++++++++++++++++++++++++++++---
> >>  drivers/infiniband/hw/bnxt_re/ib_verbs.h |  2 ++
> >>  drivers/infiniband/hw/bnxt_re/qplib_fp.c | 21 +++++------------
> >>  drivers/infiniband/hw/bnxt_re/qplib_fp.h |  4 +++-
> >>  4 files changed, 47 insertions(+), 19 deletions(-)
> >>
> >> diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> >> index b7eb104..9cb9928 100644
> >> --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> >> +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> >> @@ -786,20 +786,51 @@ int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
> >>       return 0;
> >>  }
> >>
> >> +static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
> >> +     __acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock)
> >> +{
> >> +     unsigned long flags;
> >> +
> >> +     spin_lock_irqsave(&qp->scq->cq_lock, flags);
> >> +     if (qp->rcq != qp->scq)
> >> +             spin_lock(&qp->rcq->cq_lock);
> >> +     else
> >> +             __acquire(&qp->rcq->cq_lock);
> >
> > Sorry for naive question, why do you need __acquire/__release? And why
> > can't spin_is_locked(&qp->rcq->cq_lock) be used for unlock code?
> >
> I used __acquire and __release for satisfying the sparse. Sparse used to
> give warnings  "context  imbalance - wrong count at exit" since the locking
> and unlocking happens from two different functions. seen similar implementation
> in other drivers too.

I see, actually I think that sparse complain was right and you went to
far by wrapping standard calls. I see that it is very popular in bnxt_re
code and the problem with that, that it makes very hard to refactor such
code in case of some global change/improvement.

>
> I feel spin_is_locked also can be used.. but it doesn't avoid the RCQ
> != SCQ check.
> Also, if feel the code symmetry is better in this way.
> >
> >> +
> >> +     return flags;
> >> +}
> >> +
> >> +static void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
> >> +                            unsigned long flags)
> >> +     __releases(&qp->scq->cq_lock) __releases(&qp->rcq->cq_lock)
> >> +{
> >> +     if (qp->rcq != qp->scq)
> >> +             spin_unlock(&qp->rcq->cq_lock);
> >> +     else
> >> +             __release(&qp->rcq->cq_lock);
> >> +     spin_unlock_irqrestore(&qp->scq->cq_lock, flags);
> >> +}
> >> +
> >
> > Thanks
Doug Ledford Feb. 19, 2018, 8:11 p.m. UTC | #4
On Fri, 2018-02-16 at 17:51 +0200, Leon Romanovsky wrote:
> > >> +static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
> > >> +     __acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock)
> > >> +{
> > >> +     unsigned long flags;
> > >> +
> > >> +     spin_lock_irqsave(&qp->scq->cq_lock, flags);
> > >> +     if (qp->rcq != qp->scq)
> > >> +             spin_lock(&qp->rcq->cq_lock);
> > >> +     else
> > >> +             __acquire(&qp->rcq->cq_lock);
> > >
> > > Sorry for naive question, why do you need __acquire/__release? And why
> > > can't spin_is_locked(&qp->rcq->cq_lock) be used for unlock code?
> > >
> > I used __acquire and __release for satisfying the sparse. Sparse used to
> > give warnings  "context  imbalance - wrong count at exit" since the locking
> > and unlocking happens from two different functions. seen similar implementation
> > in other drivers too.
> 
> I see, actually I think that sparse complain was right and you went to
> far by wrapping standard calls. I see that it is very popular in bnxt_re
> code and the problem with that, that it makes very hard to refactor such
> code in case of some global change/improvement.

Leon, are you actually objecting here, or just noting that you would
prefer a different style?
Leon Romanovsky Feb. 20, 2018, 5:45 a.m. UTC | #5
On Mon, Feb 19, 2018 at 03:11:49PM -0500, Doug Ledford wrote:
> On Fri, 2018-02-16 at 17:51 +0200, Leon Romanovsky wrote:
> > > >> +static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
> > > >> +     __acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock)
> > > >> +{
> > > >> +     unsigned long flags;
> > > >> +
> > > >> +     spin_lock_irqsave(&qp->scq->cq_lock, flags);
> > > >> +     if (qp->rcq != qp->scq)
> > > >> +             spin_lock(&qp->rcq->cq_lock);
> > > >> +     else
> > > >> +             __acquire(&qp->rcq->cq_lock);
> > > >
> > > > Sorry for naive question, why do you need __acquire/__release? And why
> > > > can't spin_is_locked(&qp->rcq->cq_lock) be used for unlock code?
> > > >
> > > I used __acquire and __release for satisfying the sparse. Sparse used to
> > > give warnings  "context  imbalance - wrong count at exit" since the locking
> > > and unlocking happens from two different functions. seen similar implementation
> > > in other drivers too.
> >
> > I see, actually I think that sparse complain was right and you went to
> > far by wrapping standard calls. I see that it is very popular in bnxt_re
> > code and the problem with that, that it makes very hard to refactor such
> > code in case of some global change/improvement.
>
> Leon, are you actually objecting here, or just noting that you would
> prefer a different style?

Sometimes, it makes sense to provide wrappers e.g. different get/put,
to_..(), from_..()) calls for annotate ambiguous ++/--. But locks
are different, they need to be clear and can't be hided.

So yes, I'm objecting here for this lock/unlock wrapper and would like
to see more pushback from maintainers on extensive usage of ridiculous
wrappers (including mlx code).

Thanks

>
> --
> Doug Ledford <dledford@redhat.com>
>     GPG KeyID: B826A3330E572FDD
>     Key fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57 2FDD
Doug Ledford Feb. 20, 2018, 4:14 p.m. UTC | #6
On Tue, 2018-02-20 at 07:45 +0200, Leon Romanovsky wrote:
> On Mon, Feb 19, 2018 at 03:11:49PM -0500, Doug Ledford wrote:
> > On Fri, 2018-02-16 at 17:51 +0200, Leon Romanovsky wrote:
> > > > > > +static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
> > > > > > +     __acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock)
> > > > > > +{
> > > > > > +     unsigned long flags;
> > > > > > +
> > > > > > +     spin_lock_irqsave(&qp->scq->cq_lock, flags);
> > > > > > +     if (qp->rcq != qp->scq)
> > > > > > +             spin_lock(&qp->rcq->cq_lock);
> > > > > > +     else
> > > > > > +             __acquire(&qp->rcq->cq_lock);
> > > > > 
> > > > > Sorry for naive question, why do you need __acquire/__release? And why
> > > > > can't spin_is_locked(&qp->rcq->cq_lock) be used for unlock code?
> > > > > 
> > > > 
> > > > I used __acquire and __release for satisfying the sparse. Sparse used to
> > > > give warnings  "context  imbalance - wrong count at exit" since the locking
> > > > and unlocking happens from two different functions. seen similar implementation
> > > > in other drivers too.
> > > 
> > > I see, actually I think that sparse complain was right and you went to
> > > far by wrapping standard calls. I see that it is very popular in bnxt_re
> > > code and the problem with that, that it makes very hard to refactor such
> > > code in case of some global change/improvement.
> > 
> > Leon, are you actually objecting here, or just noting that you would
> > prefer a different style?
> 
> Sometimes, it makes sense to provide wrappers e.g. different get/put,
> to_..(), from_..()) calls for annotate ambiguous ++/--. But locks
> are different, they need to be clear and can't be hided.
> 
> So yes, I'm objecting here for this lock/unlock wrapper and would like
> to see more pushback from maintainers on extensive usage of ridiculous
> wrappers (including mlx code).

I've spent some time thinking about this, and I'm not sure I agree with
you Leon.  Getting locking right is important, and in this case it
doesn't really hide any details, but it gets all of the annotations
right.  I'm personally inclined to take the patch.  I'll look around at
some other places in the kernel, but wrappers for getting locking right
aren't that rare I don't think.

And your comment about hard to refactor in case of some global
change/improvement make no sense to me.  The locks are on their own
internal completion queues, and this locking function is nothing more
than "if you have one cq, you lock it, if you have two, you lock both
and in the right order".  I don't see that impacting a refactor.
Doug Ledford Feb. 20, 2018, 4:56 p.m. UTC | #7
On Tue, 2018-02-20 at 11:14 -0500, Doug Ledford wrote:
> On Tue, 2018-02-20 at 07:45 +0200, Leon Romanovsky wrote:
> > 
> > Sometimes, it makes sense to provide wrappers e.g. different get/put,
> > to_..(), from_..()) calls for annotate ambiguous ++/--. But locks
> > are different, they need to be clear and can't be hided.
> > 
> > So yes, I'm objecting here for this lock/unlock wrapper and would like
> > to see more pushback from maintainers on extensive usage of ridiculous
> > wrappers (including mlx code).
> 
> I've spent some time thinking about this, and I'm not sure I agree with
> you Leon.  Getting locking right is important, and in this case it
> doesn't really hide any details, but it gets all of the annotations
> right.  I'm personally inclined to take the patch.  I'll look around at
> some other places in the kernel, but wrappers for getting locking right
> aren't that rare I don't think.

I used the following command to do a quick scan of whether or not
wrappers for locking are common:

grep -r __acquires -B 2 --include=*.[hc]

After reviewing the output, I'm fine with this patch.  It shows that
some authors do simple locking directly in the functions that need to
take the lock, but by in large, when either the locking complexity goes
up or even just the annotation complexity goes up, locking helpers are
in fact very common.  As an example, kernel/sched/sched.h is full of
helpers.

I'm also going to disagree with you on the "more pushback".  It is going
to depend on the complexity of the locking and annotations as to whether
I might push back.
Leon Romanovsky Feb. 21, 2018, 5:53 a.m. UTC | #8
On Tue, Feb 20, 2018 at 11:56:29AM -0500, Doug Ledford wrote:
> On Tue, 2018-02-20 at 11:14 -0500, Doug Ledford wrote:
> > On Tue, 2018-02-20 at 07:45 +0200, Leon Romanovsky wrote:
> > >
> > > Sometimes, it makes sense to provide wrappers e.g. different get/put,
> > > to_..(), from_..()) calls for annotate ambiguous ++/--. But locks
> > > are different, they need to be clear and can't be hided.
> > >
> > > So yes, I'm objecting here for this lock/unlock wrapper and would like
> > > to see more pushback from maintainers on extensive usage of ridiculous
> > > wrappers (including mlx code).
> >
> > I've spent some time thinking about this, and I'm not sure I agree with
> > you Leon.  Getting locking right is important, and in this case it
> > doesn't really hide any details, but it gets all of the annotations
> > right.  I'm personally inclined to take the patch.  I'll look around at
> > some other places in the kernel, but wrappers for getting locking right
> > aren't that rare I don't think.
>
> I used the following command to do a quick scan of whether or not
> wrappers for locking are common:
>
> grep -r __acquires -B 2 --include=*.[hc]
>
> After reviewing the output, I'm fine with this patch.  It shows that
> some authors do simple locking directly in the functions that need to
> take the lock, but by in large, when either the locking complexity goes
> up or even just the annotation complexity goes up, locking helpers are
> in fact very common.  As an example, kernel/sched/sched.h is full of
> helpers.
>
> I'm also going to disagree with you on the "more pushback".  It is going
> to depend on the complexity of the locking and annotations as to whether
> I might push back.

No problem, different opinion is always good.

I don't think that sched (kernel core) subsystem code/requirements are the same
as some random driver code, but whatever.

Thanks

>
>
> --
> Doug Ledford <dledford@redhat.com>
>     GPG KeyID: B826A3330E572FDD
>     Key fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57 2FDD
Jason Gunthorpe Feb. 21, 2018, 3:42 p.m. UTC | #9
On Tue, Feb 20, 2018 at 11:56:29AM -0500, Doug Ledford wrote:

> After reviewing the output, I'm fine with this patch.  It shows that
> some authors do simple locking directly in the functions that need to
> take the lock, but by in large, when either the locking complexity goes
> up or even just the annotation complexity goes up, locking helpers are
> in fact very common.  As an example, kernel/sched/sched.h is full of
> helpers.

Generally speaking we should have pushback for insane locking, and
taking nested spinlocks might be the best solution or maybe not.

We've had several ABBA locking bugs lately directly because of too
much locking complexity.

Otherwise if two spinlocks are needed then I think having a helper to
obtain both is a better idea than open coding since it reduces the
chance to create an ABBA bug..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index b7eb104..9cb9928 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -786,20 +786,51 @@  int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
 	return 0;
 }
 
+static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
+	__acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->scq->cq_lock, flags);
+	if (qp->rcq != qp->scq)
+		spin_lock(&qp->rcq->cq_lock);
+	else
+		__acquire(&qp->rcq->cq_lock);
+
+	return flags;
+}
+
+static void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
+			       unsigned long flags)
+	__releases(&qp->scq->cq_lock) __releases(&qp->rcq->cq_lock)
+{
+	if (qp->rcq != qp->scq)
+		spin_unlock(&qp->rcq->cq_lock);
+	else
+		__release(&qp->rcq->cq_lock);
+	spin_unlock_irqrestore(&qp->scq->cq_lock, flags);
+}
+
 /* Queue Pairs */
 int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
 {
 	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
 	struct bnxt_re_dev *rdev = qp->rdev;
 	int rc;
+	unsigned int flags;
 
 	bnxt_qplib_flush_cqn_wq(&qp->qplib_qp);
-	bnxt_qplib_del_flush_qp(&qp->qplib_qp);
 	rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
 	if (rc) {
 		dev_err(rdev_to_dev(rdev), "Failed to destroy HW QP");
 		return rc;
 	}
+
+	flags = bnxt_re_lock_cqs(qp);
+	bnxt_qplib_clean_qp(&qp->qplib_qp);
+	bnxt_re_unlock_cqs(qp, flags);
+	bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp);
+
 	if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) {
 		rc = bnxt_qplib_destroy_ah(&rdev->qplib_res,
 					   &rdev->sqp_ah->qplib_ah);
@@ -809,7 +840,7 @@  int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
 			return rc;
 		}
 
-		bnxt_qplib_del_flush_qp(&qp->qplib_qp);
+		bnxt_qplib_clean_qp(&qp->qplib_qp);
 		rc = bnxt_qplib_destroy_qp(&rdev->qplib_res,
 					   &rdev->qp1_sqp->qplib_qp);
 		if (rc) {
@@ -1068,6 +1099,7 @@  struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 			goto fail;
 		}
 		qp->qplib_qp.scq = &cq->qplib_cq;
+		qp->scq = cq;
 	}
 
 	if (qp_init_attr->recv_cq) {
@@ -1079,6 +1111,7 @@  struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 			goto fail;
 		}
 		qp->qplib_qp.rcq = &cq->qplib_cq;
+		qp->rcq = cq;
 	}
 
 	if (qp_init_attr->srq) {
@@ -1612,7 +1645,7 @@  int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 			dev_dbg(rdev_to_dev(rdev),
 				"Move QP = %p out of flush list\n",
 				qp);
-			bnxt_qplib_del_flush_qp(&qp->qplib_qp);
+			bnxt_qplib_clean_qp(&qp->qplib_qp);
 		}
 	}
 	if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 423ebe0..b88a48d 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -89,6 +89,8 @@  struct bnxt_re_qp {
 	/* QP1 */
 	u32			send_psn;
 	struct ib_ud_header	qp1_hdr;
+	struct bnxt_re_cq	*scq;
+	struct bnxt_re_cq	*rcq;
 };
 
 struct bnxt_re_cq {
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 8b5f11a..52df919 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -173,7 +173,7 @@  static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
 	}
 }
 
-void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
+void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp)
 {
 	unsigned long flags;
 
@@ -1417,7 +1417,6 @@  int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_destroy_qp req;
 	struct creq_destroy_qp_resp resp;
-	unsigned long flags;
 	u16 cmd_flags = 0;
 	int rc;
 
@@ -1435,19 +1434,12 @@  int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
 		return rc;
 	}
 
-	/* Must walk the associated CQs to nullified the QP ptr */
-	spin_lock_irqsave(&qp->scq->hwq.lock, flags);
-
-	__clean_cq(qp->scq, (u64)(unsigned long)qp);
-
-	if (qp->rcq && qp->rcq != qp->scq) {
-		spin_lock(&qp->rcq->hwq.lock);
-		__clean_cq(qp->rcq, (u64)(unsigned long)qp);
-		spin_unlock(&qp->rcq->hwq.lock);
-	}
-
-	spin_unlock_irqrestore(&qp->scq->hwq.lock, flags);
+	return 0;
+}
 
+void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res,
+			    struct bnxt_qplib_qp *qp)
+{
 	bnxt_qplib_free_qp_hdr_buf(res, qp);
 	bnxt_qplib_free_hwq(res->pdev, &qp->sq.hwq);
 	kfree(qp->sq.swq);
@@ -1460,7 +1452,6 @@  int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
 	if (qp->orrq.max_elements)
 		bnxt_qplib_free_hwq(res->pdev, &qp->orrq);
 
-	return 0;
 }
 
 void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 211b27a..ca0a2ff 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -478,6 +478,9 @@  int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
 int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
 int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
 int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp);
+void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res,
+			    struct bnxt_qplib_qp *qp);
 void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
 				struct bnxt_qplib_sge *sge);
 void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
@@ -500,7 +503,6 @@  void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type);
 void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
 int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);
 void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp);
-void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp);
 void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp,
 				 unsigned long *flags);
 void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp,