diff mbox series

[RESEND,for-next,5/9] IB/{rdmavt, qib, hfi1}: Use new routine to release reference counts

Message ID 20190412134135.5959.38764.stgit@scvm10.sc.intel.com (mailing list archive)
State Accepted
Delegated to: Jason Gunthorpe
Headers show
Series IB/hfi1, rdmavt: Smattering of fixes and code cleanups/improvments | expand

Commit Message

Dennis Dalessandro April 12, 2019, 1:41 p.m. UTC
From: Mike Marciniszyn <mike.marciniszyn@intel.com>

The reference count adjustments on reference count completion
are open coded throughout.

Add a routine to do all reference count adjustments and use.

Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>

---
For some reason patch didn't make the list or patchworks. This
is just a re-send. All the rest seem to have made it.
---
 drivers/infiniband/hw/hfi1/rc.c    |    4 ++--
 drivers/infiniband/hw/qib/qib_rc.c |    4 ++--
 drivers/infiniband/sw/rdmavt/qp.c  |    9 ++-------
 include/rdma/rdmavt_qp.h           |   14 ++++++++++++++
 4 files changed, 20 insertions(+), 11 deletions(-)

Comments

Leon Romanovsky April 12, 2019, 3:43 p.m. UTC | #1
On Fri, Apr 12, 2019 at 06:41:42AM -0700, Dennis Dalessandro wrote:
> From: Mike Marciniszyn <mike.marciniszyn@intel.com>
>
> The reference count adjustments on reference count completion
> are open coded throughout.
>
> Add a routine to do all reference count adjustments and use.
>
> Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
>
> ---
> For some reason patch didn't make the list or patchworks. This
> is just a re-send. All the rest seem to have made it.
> ---
>  drivers/infiniband/hw/hfi1/rc.c    |    4 ++--
>  drivers/infiniband/hw/qib/qib_rc.c |    4 ++--
>  drivers/infiniband/sw/rdmavt/qp.c  |    9 ++-------
>  include/rdma/rdmavt_qp.h           |   14 ++++++++++++++
>  4 files changed, 20 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
> index 5ba39a9..a922edc 100644
> --- a/drivers/infiniband/hw/hfi1/rc.c
> +++ b/drivers/infiniband/hw/hfi1/rc.c
> @@ -1834,7 +1834,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
>  		qp->s_last = s_last;
>  		/* see post_send() */
>  		barrier();
> -		rvt_put_swqe(wqe);
> +		rvt_put_qp_swqe(qp, wqe);
>  		rvt_qp_swqe_complete(qp,
>  				     wqe,
>  				     ib_hfi1_wc_opcode[wqe->wr.opcode],
> @@ -1882,7 +1882,7 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
>  		u32 s_last;
>
>  		trdma_clean_swqe(qp, wqe);
> -		rvt_put_swqe(wqe);
> +		rvt_put_qp_swqe(qp, wqe);
>  		rvt_qp_wqe_unreserve(qp, wqe);
>  		s_last = qp->s_last;
>  		trace_hfi1_qp_send_completion(qp, wqe, s_last);
> diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
> index 50dd981..2ac4c67 100644
> --- a/drivers/infiniband/hw/qib/qib_rc.c
> +++ b/drivers/infiniband/hw/qib/qib_rc.c
> @@ -933,7 +933,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
>  		qp->s_last = s_last;
>  		/* see post_send() */
>  		barrier();
> -		rvt_put_swqe(wqe);
> +		rvt_put_qp_swqe(qp, wqe);
>  		rvt_qp_swqe_complete(qp,
>  				     wqe,
>  				     ib_qib_wc_opcode[wqe->wr.opcode],
> @@ -975,7 +975,7 @@ static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
>  	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
>  		u32 s_last;
>
> -		rvt_put_swqe(wqe);
> +		rvt_put_qp_swqe(qp, wqe);
>  		s_last = qp->s_last;
>  		if (++s_last >= qp->s_size)
>  			s_last = 0;
> diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
> index 2460303..31a2e65 100644
> --- a/drivers/infiniband/sw/rdmavt/qp.c
> +++ b/drivers/infiniband/sw/rdmavt/qp.c
> @@ -623,10 +623,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
>  		while (qp->s_last != qp->s_head) {
>  			struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last);
>
> -			rvt_put_swqe(wqe);
> -			if (qp->allowed_ops == IB_OPCODE_UD)
> -				atomic_dec(&ibah_to_rvtah(
> -						wqe->ud_wr.ah)->refcount);
> +			rvt_put_qp_swqe(qp, wqe);
>  			if (++qp->s_last >= qp->s_size)
>  				qp->s_last = 0;
>  			smp_wmb(); /* see qp_set_savail */
> @@ -2683,9 +2680,7 @@ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
>  	qp->s_last = last;
>  	/* See post_send() */
>  	barrier();
> -	rvt_put_swqe(wqe);
> -	if (qp->allowed_ops == IB_OPCODE_UD)
> -		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
> +	rvt_put_qp_swqe(qp, wqe);
>
>  	rvt_qp_swqe_complete(qp,
>  			     wqe,
> diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
> index a00c46a..68e38c2 100644
> --- a/include/rdma/rdmavt_qp.h
> +++ b/include/rdma/rdmavt_qp.h
> @@ -723,6 +723,20 @@ static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
>  	return rvt_mod_retry_timer_ext(qp, 0);
>  }
>
> +/**
> + * rvt_put_qp_swqe - drop refs held by swqe
> + * @qp: the send qp
> + * @wqe: the send wqe
> + *
> + * This drops any references held by the swqe
> + */
> +static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
> +{
> +	rvt_put_swqe(wqe);
> +	if (qp->allowed_ops == IB_OPCODE_UD)
> +		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
> +}

In the context of my allocation change patches, I wanted to ask on which
objects should driver perform refcounting?

Thanks

> +
>  extern const int  ib_rvt_state_ops[];
>
>  struct rvt_dev_info;
>
Dennis Dalessandro April 15, 2019, 12:31 p.m. UTC | #2
On 4/12/2019 11:43 AM, Leon Romanovsky wrote:
> On Fri, Apr 12, 2019 at 06:41:42AM -0700, Dennis Dalessandro wrote:
>> From: Mike Marciniszyn <mike.marciniszyn@intel.com>
>>
>> The reference count adjustments on reference count completion
>> are open coded throughout.
>>
>> Add a routine to do all reference count adjustments and use.
>>
>> Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
>> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
>> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
>>
>> ---
>> For some reason patch didn't make the list or patchworks. This
>> is just a re-send. All the rest seem to have made it.
>> ---
>>   drivers/infiniband/hw/hfi1/rc.c    |    4 ++--
>>   drivers/infiniband/hw/qib/qib_rc.c |    4 ++--
>>   drivers/infiniband/sw/rdmavt/qp.c  |    9 ++-------
>>   include/rdma/rdmavt_qp.h           |   14 ++++++++++++++
>>   4 files changed, 20 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
>> index 5ba39a9..a922edc 100644
>> --- a/drivers/infiniband/hw/hfi1/rc.c
>> +++ b/drivers/infiniband/hw/hfi1/rc.c
>> @@ -1834,7 +1834,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
>>   		qp->s_last = s_last;
>>   		/* see post_send() */
>>   		barrier();
>> -		rvt_put_swqe(wqe);
>> +		rvt_put_qp_swqe(qp, wqe);
>>   		rvt_qp_swqe_complete(qp,
>>   				     wqe,
>>   				     ib_hfi1_wc_opcode[wqe->wr.opcode],
>> @@ -1882,7 +1882,7 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
>>   		u32 s_last;
>>
>>   		trdma_clean_swqe(qp, wqe);
>> -		rvt_put_swqe(wqe);
>> +		rvt_put_qp_swqe(qp, wqe);
>>   		rvt_qp_wqe_unreserve(qp, wqe);
>>   		s_last = qp->s_last;
>>   		trace_hfi1_qp_send_completion(qp, wqe, s_last);
>> diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
>> index 50dd981..2ac4c67 100644
>> --- a/drivers/infiniband/hw/qib/qib_rc.c
>> +++ b/drivers/infiniband/hw/qib/qib_rc.c
>> @@ -933,7 +933,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
>>   		qp->s_last = s_last;
>>   		/* see post_send() */
>>   		barrier();
>> -		rvt_put_swqe(wqe);
>> +		rvt_put_qp_swqe(qp, wqe);
>>   		rvt_qp_swqe_complete(qp,
>>   				     wqe,
>>   				     ib_qib_wc_opcode[wqe->wr.opcode],
>> @@ -975,7 +975,7 @@ static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
>>   	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
>>   		u32 s_last;
>>
>> -		rvt_put_swqe(wqe);
>> +		rvt_put_qp_swqe(qp, wqe);
>>   		s_last = qp->s_last;
>>   		if (++s_last >= qp->s_size)
>>   			s_last = 0;
>> diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
>> index 2460303..31a2e65 100644
>> --- a/drivers/infiniband/sw/rdmavt/qp.c
>> +++ b/drivers/infiniband/sw/rdmavt/qp.c
>> @@ -623,10 +623,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
>>   		while (qp->s_last != qp->s_head) {
>>   			struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last);
>>
>> -			rvt_put_swqe(wqe);
>> -			if (qp->allowed_ops == IB_OPCODE_UD)
>> -				atomic_dec(&ibah_to_rvtah(
>> -						wqe->ud_wr.ah)->refcount);
>> +			rvt_put_qp_swqe(qp, wqe);
>>   			if (++qp->s_last >= qp->s_size)
>>   				qp->s_last = 0;
>>   			smp_wmb(); /* see qp_set_savail */
>> @@ -2683,9 +2680,7 @@ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
>>   	qp->s_last = last;
>>   	/* See post_send() */
>>   	barrier();
>> -	rvt_put_swqe(wqe);
>> -	if (qp->allowed_ops == IB_OPCODE_UD)
>> -		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
>> +	rvt_put_qp_swqe(qp, wqe);
>>
>>   	rvt_qp_swqe_complete(qp,
>>   			     wqe,
>> diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
>> index a00c46a..68e38c2 100644
>> --- a/include/rdma/rdmavt_qp.h
>> +++ b/include/rdma/rdmavt_qp.h
>> @@ -723,6 +723,20 @@ static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
>>   	return rvt_mod_retry_timer_ext(qp, 0);
>>   }
>>
>> +/**
>> + * rvt_put_qp_swqe - drop refs held by swqe
>> + * @qp: the send qp
>> + * @wqe: the send wqe
>> + *
>> + * This drops any references held by the swqe
>> + */
>> +static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
>> +{
>> +	rvt_put_swqe(wqe);
>> +	if (qp->allowed_ops == IB_OPCODE_UD)
>> +		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
>> +}
> 
> In the context of my allocation change patches, I wanted to ask on which
> objects should driver perform refcounting?

Are you asking if we really need to do our own refcounting in the driver 
since your patch series went in to for-next?

-Denny
Leon Romanovsky April 15, 2019, 1:12 p.m. UTC | #3
On Mon, Apr 15, 2019 at 08:31:03AM -0400, Dennis Dalessandro wrote:
> On 4/12/2019 11:43 AM, Leon Romanovsky wrote:
> > On Fri, Apr 12, 2019 at 06:41:42AM -0700, Dennis Dalessandro wrote:
> > > From: Mike Marciniszyn <mike.marciniszyn@intel.com>
> > >
> > > The reference count adjustments on reference count completion
> > > are open coded throughout.
> > >
> > > Add a routine to do all reference count adjustments and use.
> > >
> > > Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
> > > Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
> > > Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
> > >
> > > ---
> > > For some reason patch didn't make the list or patchworks. This
> > > is just a re-send. All the rest seem to have made it.
> > > ---
> > >   drivers/infiniband/hw/hfi1/rc.c    |    4 ++--
> > >   drivers/infiniband/hw/qib/qib_rc.c |    4 ++--
> > >   drivers/infiniband/sw/rdmavt/qp.c  |    9 ++-------
> > >   include/rdma/rdmavt_qp.h           |   14 ++++++++++++++
> > >   4 files changed, 20 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
> > > index 5ba39a9..a922edc 100644
> > > --- a/drivers/infiniband/hw/hfi1/rc.c
> > > +++ b/drivers/infiniband/hw/hfi1/rc.c
> > > @@ -1834,7 +1834,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
> > >   		qp->s_last = s_last;
> > >   		/* see post_send() */
> > >   		barrier();
> > > -		rvt_put_swqe(wqe);
> > > +		rvt_put_qp_swqe(qp, wqe);
> > >   		rvt_qp_swqe_complete(qp,
> > >   				     wqe,
> > >   				     ib_hfi1_wc_opcode[wqe->wr.opcode],
> > > @@ -1882,7 +1882,7 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
> > >   		u32 s_last;
> > >
> > >   		trdma_clean_swqe(qp, wqe);
> > > -		rvt_put_swqe(wqe);
> > > +		rvt_put_qp_swqe(qp, wqe);
> > >   		rvt_qp_wqe_unreserve(qp, wqe);
> > >   		s_last = qp->s_last;
> > >   		trace_hfi1_qp_send_completion(qp, wqe, s_last);
> > > diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
> > > index 50dd981..2ac4c67 100644
> > > --- a/drivers/infiniband/hw/qib/qib_rc.c
> > > +++ b/drivers/infiniband/hw/qib/qib_rc.c
> > > @@ -933,7 +933,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
> > >   		qp->s_last = s_last;
> > >   		/* see post_send() */
> > >   		barrier();
> > > -		rvt_put_swqe(wqe);
> > > +		rvt_put_qp_swqe(qp, wqe);
> > >   		rvt_qp_swqe_complete(qp,
> > >   				     wqe,
> > >   				     ib_qib_wc_opcode[wqe->wr.opcode],
> > > @@ -975,7 +975,7 @@ static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
> > >   	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
> > >   		u32 s_last;
> > >
> > > -		rvt_put_swqe(wqe);
> > > +		rvt_put_qp_swqe(qp, wqe);
> > >   		s_last = qp->s_last;
> > >   		if (++s_last >= qp->s_size)
> > >   			s_last = 0;
> > > diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
> > > index 2460303..31a2e65 100644
> > > --- a/drivers/infiniband/sw/rdmavt/qp.c
> > > +++ b/drivers/infiniband/sw/rdmavt/qp.c
> > > @@ -623,10 +623,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
> > >   		while (qp->s_last != qp->s_head) {
> > >   			struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last);
> > >
> > > -			rvt_put_swqe(wqe);
> > > -			if (qp->allowed_ops == IB_OPCODE_UD)
> > > -				atomic_dec(&ibah_to_rvtah(
> > > -						wqe->ud_wr.ah)->refcount);
> > > +			rvt_put_qp_swqe(qp, wqe);
> > >   			if (++qp->s_last >= qp->s_size)
> > >   				qp->s_last = 0;
> > >   			smp_wmb(); /* see qp_set_savail */
> > > @@ -2683,9 +2680,7 @@ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
> > >   	qp->s_last = last;
> > >   	/* See post_send() */
> > >   	barrier();
> > > -	rvt_put_swqe(wqe);
> > > -	if (qp->allowed_ops == IB_OPCODE_UD)
> > > -		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
> > > +	rvt_put_qp_swqe(qp, wqe);
> > >
> > >   	rvt_qp_swqe_complete(qp,
> > >   			     wqe,
> > > diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
> > > index a00c46a..68e38c2 100644
> > > --- a/include/rdma/rdmavt_qp.h
> > > +++ b/include/rdma/rdmavt_qp.h
> > > @@ -723,6 +723,20 @@ static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
> > >   	return rvt_mod_retry_timer_ext(qp, 0);
> > >   }
> > >
> > > +/**
> > > + * rvt_put_qp_swqe - drop refs held by swqe
> > > + * @qp: the send qp
> > > + * @wqe: the send wqe
> > > + *
> > > + * This drops any references held by the swqe
> > > + */
> > > +static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
> > > +{
> > > +	rvt_put_swqe(wqe);
> > > +	if (qp->allowed_ops == IB_OPCODE_UD)
> > > +		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
> > > +}
> >
> > In the context of my allocation change patches, I wanted to ask on which
> > objects should driver perform refcounting?
>
> Are you asking if we really need to do our own refcounting in the driver
> since your patch series went in to for-next?

For this AH code - yes, I don't think that refcounting is needed.

However my question is more general - I need to know in which objects
we can remove refcounting without any worries that I'll break some driver.

Thanks

>
> -Denny
>
>
>
Leon Romanovsky April 16, 2019, 6:20 a.m. UTC | #4
On Mon, Apr 15, 2019 at 08:52:52PM +0000, Ruhl, Michael J wrote:
> >-----Original Message-----
> >From: Leon Romanovsky [mailto:leon@kernel.org]
> >Sent: Monday, April 15, 2019 9:13 AM
> >To: Dalessandro, Dennis <dennis.dalessandro@intel.com>
> >Cc: jgg@ziepe.ca; dledford@redhat.com; linux-rdma@vger.kernel.org; Ruhl,
> >Michael J <michael.j.ruhl@intel.com>; Marciniszyn, Mike
> ><mike.marciniszyn@intel.com>
> >Subject: Re: [PATCH RESEND for-next 5/9] IB/{rdmavt, qib, hfi1}: Use new
> >routine to release reference counts
> >
> >On Mon, Apr 15, 2019 at 08:31:03AM -0400, Dennis Dalessandro wrote:
> >> On 4/12/2019 11:43 AM, Leon Romanovsky wrote:
> >> > On Fri, Apr 12, 2019 at 06:41:42AM -0700, Dennis Dalessandro wrote:
> >> > > From: Mike Marciniszyn <mike.marciniszyn@intel.com>
> >> > >
> >> > > The reference count adjustments on reference count completion
> >> > > are open coded throughout.
> >> > >
> >> > > Add a routine to do all reference count adjustments and use.
> >> > >
> >> > > Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
> >> > > Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
> >> > > Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
> >> > >
> >> > > ---
> >> > > For some reason patch didn't make the list or patchworks. This
> >> > > is just a re-send. All the rest seem to have made it.
> >> > > ---
> >> > >   drivers/infiniband/hw/hfi1/rc.c    |    4 ++--
> >> > >   drivers/infiniband/hw/qib/qib_rc.c |    4 ++--
> >> > >   drivers/infiniband/sw/rdmavt/qp.c  |    9 ++-------
> >> > >   include/rdma/rdmavt_qp.h           |   14 ++++++++++++++
> >> > >   4 files changed, 20 insertions(+), 11 deletions(-)
> >> > >
> >> > > diff --git a/drivers/infiniband/hw/hfi1/rc.c
> >b/drivers/infiniband/hw/hfi1/rc.c
> >> > > index 5ba39a9..a922edc 100644
> >> > > --- a/drivers/infiniband/hw/hfi1/rc.c
> >> > > +++ b/drivers/infiniband/hw/hfi1/rc.c
> >> > > @@ -1834,7 +1834,7 @@ void hfi1_rc_send_complete(struct rvt_qp
> >*qp, struct hfi1_opa_header *opah)
> >> > >   		qp->s_last = s_last;
> >> > >   		/* see post_send() */
> >> > >   		barrier();
> >> > > -		rvt_put_swqe(wqe);
> >> > > +		rvt_put_qp_swqe(qp, wqe);
> >> > >   		rvt_qp_swqe_complete(qp,
> >> > >   				     wqe,
> >> > >   				     ib_hfi1_wc_opcode[wqe->wr.opcode],
> >> > > @@ -1882,7 +1882,7 @@ struct rvt_swqe *do_rc_completion(struct
> >rvt_qp *qp,
> >> > >   		u32 s_last;
> >> > >
> >> > >   		trdma_clean_swqe(qp, wqe);
> >> > > -		rvt_put_swqe(wqe);
> >> > > +		rvt_put_qp_swqe(qp, wqe);
> >> > >   		rvt_qp_wqe_unreserve(qp, wqe);
> >> > >   		s_last = qp->s_last;
> >> > >   		trace_hfi1_qp_send_completion(qp, wqe, s_last);
> >> > > diff --git a/drivers/infiniband/hw/qib/qib_rc.c
> >b/drivers/infiniband/hw/qib/qib_rc.c
> >> > > index 50dd981..2ac4c67 100644
> >> > > --- a/drivers/infiniband/hw/qib/qib_rc.c
> >> > > +++ b/drivers/infiniband/hw/qib/qib_rc.c
> >> > > @@ -933,7 +933,7 @@ void qib_rc_send_complete(struct rvt_qp *qp,
> >struct ib_header *hdr)
> >> > >   		qp->s_last = s_last;
> >> > >   		/* see post_send() */
> >> > >   		barrier();
> >> > > -		rvt_put_swqe(wqe);
> >> > > +		rvt_put_qp_swqe(qp, wqe);
> >> > >   		rvt_qp_swqe_complete(qp,
> >> > >   				     wqe,
> >> > >   				     ib_qib_wc_opcode[wqe->wr.opcode],
> >> > > @@ -975,7 +975,7 @@ static inline void update_last_psn(struct rvt_qp
> >*qp, u32 psn)
> >> > >   	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
> >> > >   		u32 s_last;
> >> > >
> >> > > -		rvt_put_swqe(wqe);
> >> > > +		rvt_put_qp_swqe(qp, wqe);
> >> > >   		s_last = qp->s_last;
> >> > >   		if (++s_last >= qp->s_size)
> >> > >   			s_last = 0;
> >> > > diff --git a/drivers/infiniband/sw/rdmavt/qp.c
> >b/drivers/infiniband/sw/rdmavt/qp.c
> >> > > index 2460303..31a2e65 100644
> >> > > --- a/drivers/infiniband/sw/rdmavt/qp.c
> >> > > +++ b/drivers/infiniband/sw/rdmavt/qp.c
> >> > > @@ -623,10 +623,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp,
> >int clr_sends)
> >> > >   		while (qp->s_last != qp->s_head) {
> >> > >   			struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp-
> >>s_last);
> >> > >
> >> > > -			rvt_put_swqe(wqe);
> >> > > -			if (qp->allowed_ops == IB_OPCODE_UD)
> >> > > -				atomic_dec(&ibah_to_rvtah(
> >> > > -						wqe->ud_wr.ah)->refcount);
> >> > > +			rvt_put_qp_swqe(qp, wqe);
> >> > >   			if (++qp->s_last >= qp->s_size)
> >> > >   				qp->s_last = 0;
> >> > >   			smp_wmb(); /* see qp_set_savail */
> >> > > @@ -2683,9 +2680,7 @@ void rvt_send_complete(struct rvt_qp *qp,
> >struct rvt_swqe *wqe,
> >> > >   	qp->s_last = last;
> >> > >   	/* See post_send() */
> >> > >   	barrier();
> >> > > -	rvt_put_swqe(wqe);
> >> > > -	if (qp->allowed_ops == IB_OPCODE_UD)
> >> > > -		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
> >> > > +	rvt_put_qp_swqe(qp, wqe);
> >> > >
> >> > >   	rvt_qp_swqe_complete(qp,
> >> > >   			     wqe,
> >> > > diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
> >> > > index a00c46a..68e38c2 100644
> >> > > --- a/include/rdma/rdmavt_qp.h
> >> > > +++ b/include/rdma/rdmavt_qp.h
> >> > > @@ -723,6 +723,20 @@ static inline void rvt_mod_retry_timer(struct
> >rvt_qp *qp)
> >> > >   	return rvt_mod_retry_timer_ext(qp, 0);
> >> > >   }
> >> > >
> >> > > +/**
> >> > > + * rvt_put_qp_swqe - drop refs held by swqe
> >> > > + * @qp: the send qp
> >> > > + * @wqe: the send wqe
> >> > > + *
> >> > > + * This drops any references held by the swqe
> >> > > + */
> >> > > +static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe
> >*wqe)
> >> > > +{
> >> > > +	rvt_put_swqe(wqe);
> >> > > +	if (qp->allowed_ops == IB_OPCODE_UD)
> >> > > +		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
> >> > > +}
> >> >
> >> > In the context of my allocation change patches, I wanted to ask on which
> >> > objects should driver perform refcounting?
> >>
> >> Are you asking if we really need to do our own refcounting in the driver
> >> since your patch series went in to for-next?
> >
> >For this AH code - yes, I don't think that refcounting is needed.
>
> Hi Leon,
>
> We do need the reference count because the AH is used by the asynchronous
> send engine.
>
> I am going to work up a patch to implement the SLEEPABLE path in our destroy
> to deal with this.

I see, but it is not SLEEPABLE/not SLEEPABLE paths, there is a need to ensure
that destroy AH is synchronous operation.

Do you have time expectation when will it be ready?

It seems like my commit d345691471b4 ("RDMA: Handle AH
allocations by IB/core") assumed that all releases are synchronous.

There is use-after-free bug now in your driver.

>
> >However my question is more general - I need to know in which objects
> >we can remove refcounting without any worries that I'll break some driver.
>
> That is tougher to answer, and will need some more research.
>
> Do you have a list of objects in mind to start with?

The most challengeable are MR and QPs, everything else can definitely be
transformed to be without refcounting.

>
> Thanks,
>
> Mike
>
>
> >Thanks
> >
> >>
> >> -Denny
> >>
> >>
> >>
Jason Gunthorpe April 17, 2019, 6:20 a.m. UTC | #5
On Mon, Apr 15, 2019 at 08:52:52PM +0000, Ruhl, Michael J wrote:

> We do need the reference count because the AH is used by the asynchronous
> send engine.

Drivers cannot use refcounts to manage their object lifetimes. Destroy
must be synchronous.

So if you have AH's in use by another thread it must sleep during
destroy..

> I am going to work up a patch to implement the SLEEPABLE path in our destroy
> to deal with this.

I thought that was only for create? Did we get an expectation that
destroy is non-sleeping? I hope not..

Jason
Ruhl, Michael J April 17, 2019, 9:35 p.m. UTC | #6
>-----Original Message-----
>From: Jason Gunthorpe [mailto:jgg@ziepe.ca]
>Sent: Tuesday, April 16, 2019 11:20 PM
>To: Ruhl, Michael J <michael.j.ruhl@intel.com>
>Cc: Leon Romanovsky <leon@kernel.org>; Dalessandro, Dennis
><dennis.dalessandro@intel.com>; dledford@redhat.com; linux-
>rdma@vger.kernel.org; Marciniszyn, Mike <mike.marciniszyn@intel.com>
>Subject: Re: [PATCH RESEND for-next 5/9] IB/{rdmavt, qib, hfi1}: Use new
>routine to release reference counts
>
>On Mon, Apr 15, 2019 at 08:52:52PM +0000, Ruhl, Michael J wrote:
>
>> We do need the reference count because the AH is used by the
>asynchronous
>> send engine.
>
>Drivers cannot use refcounts to manage their object lifetimes. Destroy
>must be synchronous.

I think that that is what we are trying to address (make destroy synchronous).

>So if you have AH's in use by another thread it must sleep during
>destroy..

What must sleep, the other thread or the destroy?

>> I am going to work up a patch to implement the SLEEPABLE path in our destroy
>> to deal with this.
>
>I thought that was only for create? Did we get an expectation that
>destroy is non-sleeping? I hope not..

Umm.  The _destroy() interface has a destroy_flags parameter.

My assumption for this is that if RDMA_DESTROY_AH_SLEEPABLE is set,
the drivers _destroy() function must wait (sleep?) until all access (our internal refcount)
to the AH is compete.

Are you saying this mechanism is for the _create only?

Thanks,

Mike

>Jason
Ira Weiny April 18, 2019, 3:42 a.m. UTC | #7
On Wed, Apr 17, 2019 at 03:20:05AM -0300, Jason Gunthorpe wrote:
> On Mon, Apr 15, 2019 at 08:52:52PM +0000, Ruhl, Michael J wrote:
> 
> > We do need the reference count because the AH is used by the asynchronous
> > send engine.
> 
> Drivers cannot use refcounts to manage their object lifetimes. Destroy
> must be synchronous.

I admit that I'm not really following along but it seems wrong to tell drivers
that they can't refcount their objects...  One would think that a refcount of
their objects is safer than some of the locking hoops we have seen...

Do you mean they can't (don't need to) refcount objects which the core is
responsible for like QP, AH, etc or all objects?

Ira

> 
> So if you have AH's in use by another thread it must sleep during
> destroy..
> 
> > I am going to work up a patch to implement the SLEEPABLE path in our destroy
> > to deal with this.
> 
> I thought that was only for create? Did we get an expectation that
> destroy is non-sleeping? I hope not..
> 
> Jason
Jason Gunthorpe April 18, 2019, 5:45 a.m. UTC | #8
On Wed, Apr 17, 2019 at 08:42:52PM -0700, Ira Weiny wrote:
> On Wed, Apr 17, 2019 at 03:20:05AM -0300, Jason Gunthorpe wrote:
> > On Mon, Apr 15, 2019 at 08:52:52PM +0000, Ruhl, Michael J wrote:
> > 
> > > We do need the reference count because the AH is used by the asynchronous
> > > send engine.
> > 
> > Drivers cannot use refcounts to manage their object lifetimes. Destroy
> > must be synchronous.
> 
> I admit that I'm not really following along but it seems wrong to tell drivers
> that they can't refcount their objects...  One would think that a refcount of
> their objects is safer than some of the locking hoops we have seen...

It is not safer, it just pushed bugs into the ULPs.

Drivers cannot fail destroy - and they cannot continue to hold onto
related references after destroy returns (ie a QP cannot continue to
block destruction of a PD). In almost all cases this means the object
must actually be destroyed during destroy.

The most we could offer a driver is to hold the backing object memory
with a kref, so the driver could allow other threads to still see the
dead object after destroy succeeds. This might be what hfi needs.

Basically once destroyed an object can have no futher impact on any
other object.

> Do you mean they can't (don't need to) refcount objects which the core is
> responsible for like QP, AH, etc or all objects?

You can refcount them, but then you have to wait for the refcount to
go to zero during destroy, and odds are good that will cause the
driver to deadlock itself.

Strategies for destroy that gauarentee forward progress are better, ie
serialize with parallel thread, use locking, etc.

This is why I also don't like the idea of destroy being non-sleepable...

Jason
Jason Gunthorpe April 18, 2019, 5:49 a.m. UTC | #9
On Wed, Apr 17, 2019 at 09:35:45PM +0000, Ruhl, Michael J wrote:
> >So if you have AH's in use by another thread it must sleep during
> >destroy..
> 
> What must sleep, the other thread or the destroy?

The destroy..

> >> I am going to work up a patch to implement the SLEEPABLE path in our destroy
> >> to deal with this.
> >
> >I thought that was only for create? Did we get an expectation that
> >destroy is non-sleeping? I hope not..
> 
> Umm.  The _destroy() interface has a destroy_flags parameter.

Bleck, this AH thing is miserable. I wonder how many bugs we have here..

Well, you are stuck then, you have to make some kref based approach.

The general direction here is for the core code to provide a kref for
each object, so maybe we need to accelerate that for AH.

> My assumption for this is that if RDMA_DESTROY_AH_SLEEPABLE is set,
> the drivers _destroy() function must wait (sleep?) until all access (our internal refcount)
> to the AH is compete.

Not really, it is a foolish flag that we shouldn't have introduced :(

At best it just means the caller is in an atomic context - but if your
destroy requires sleep you can't just fail the destroy, so you pretty
much are screwed and can't make a sleeping destroy anyhow.

EFA is using this because they further assume that if the create is
done sleepable then the destroy is always also sleepable and thus they
cant alway sleep as they fail atomic create.

Jason
Ira Weiny April 18, 2019, 4:11 p.m. UTC | #10
> 
> On Wed, Apr 17, 2019 at 08:42:52PM -0700, Ira Weiny wrote:
> > On Wed, Apr 17, 2019 at 03:20:05AM -0300, Jason Gunthorpe wrote:
> > > On Mon, Apr 15, 2019 at 08:52:52PM +0000, Ruhl, Michael J wrote:
> > >
> > > > We do need the reference count because the AH is used by the
> > > > asynchronous send engine.
> > >
> > > Drivers cannot use refcounts to manage their object lifetimes.
> > > Destroy must be synchronous.
> >
> > I admit that I'm not really following along but it seems wrong to tell
> > drivers that they can't refcount their objects...  One would think
> > that a refcount of their objects is safer than some of the locking hoops we
> have seen...
> 
> It is not safer, it just pushed bugs into the ULPs.

Ok I see what you are saying and I agree...  For objects which are controlled by the core.  See below...

> 
> Drivers cannot fail destroy - and they cannot continue to hold onto related
> references after destroy returns (ie a QP cannot continue to block destruction
> of a PD). In almost all cases this means the object must actually be destroyed
> during destroy.
> 
> The most we could offer a driver is to hold the backing object memory with a
> kref, so the driver could allow other threads to still see the dead object after
> destroy succeeds. This might be what hfi needs.

Maybe.  I think this is part of the problem with having objects which the core controls but which the drivers have critical data in.  I'm not sure about performance but if the driver needs something to hold onto for internal use then that object should be separate from the core object.  Or as you say destroy must block.

The way things are structured now I agree with you.

> 
> Basically once destroyed an object can have no futher impact on any other
> object.

I agree.

> 
> > Do you mean they can't (don't need to) refcount objects which the core
> > is responsible for like QP, AH, etc or all objects?
> 
> You can refcount them, but then you have to wait for the refcount to go to
> zero during destroy, and odds are good that will cause the driver to deadlock
> itself.

Ok I think the answer to my question is "yes we are speaking of objects the core is tracking for the driver on behalf of the user".

> 
> Strategies for destroy that gauarentee forward progress are better, ie
> serialize with parallel thread, use locking, etc.

Agreed

> 
> This is why I also don't like the idea of destroy being non-sleepable...

Agreed...  It does depend on the situation.  I just wanted to make it clear that this is not some hard and fast rule that "RDMA drivers shall never do reference counting."  I think in some situations for __internal__ resources it is appropriate.

Make sense?
Ira

> 
> Jason
Ruhl, Michael J April 19, 2019, 6:56 p.m. UTC | #11
>-----Original Message-----
>From: Leon Romanovsky [mailto:leon@kernel.org]
>Sent: Monday, April 15, 2019 11:21 PM
>To: Ruhl, Michael J <michael.j.ruhl@intel.com>
>Cc: Dalessandro, Dennis <dennis.dalessandro@intel.com>; jgg@ziepe.ca;
>dledford@redhat.com; linux-rdma@vger.kernel.org; Marciniszyn, Mike
><mike.marciniszyn@intel.com>
>Subject: Re: [PATCH RESEND for-next 5/9] IB/{rdmavt, qib, hfi1}: Use new
>routine to release reference counts
>
>On Mon, Apr 15, 2019 at 08:52:52PM +0000, Ruhl, Michael J wrote:
>> >-----Original Message-----
>> >From: Leon Romanovsky [mailto:leon@kernel.org]
>> >Sent: Monday, April 15, 2019 9:13 AM
>> >To: Dalessandro, Dennis <dennis.dalessandro@intel.com>
>> >Cc: jgg@ziepe.ca; dledford@redhat.com; linux-rdma@vger.kernel.org;
>Ruhl,
>> >Michael J <michael.j.ruhl@intel.com>; Marciniszyn, Mike
>> ><mike.marciniszyn@intel.com>
>> >Subject: Re: [PATCH RESEND for-next 5/9] IB/{rdmavt, qib, hfi1}: Use new
>> >routine to release reference counts
>> >
>> >On Mon, Apr 15, 2019 at 08:31:03AM -0400, Dennis Dalessandro wrote:
>> >> On 4/12/2019 11:43 AM, Leon Romanovsky wrote:
>> >> > On Fri, Apr 12, 2019 at 06:41:42AM -0700, Dennis Dalessandro wrote:
>> >> > > From: Mike Marciniszyn <mike.marciniszyn@intel.com>
>> >> > >
>> >> > > The reference count adjustments on reference count completion
>> >> > > are open coded throughout.
>> >> > >
>> >> > > Add a routine to do all reference count adjustments and use.
>> >> > >
>> >> > > Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
>> >> > > Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
>> >> > > Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
>> >> > >
>> >> > > ---
>> >> > > For some reason patch didn't make the list or patchworks. This
>> >> > > is just a re-send. All the rest seem to have made it.
>> >> > > ---
>> >> > >   drivers/infiniband/hw/hfi1/rc.c    |    4 ++--
>> >> > >   drivers/infiniband/hw/qib/qib_rc.c |    4 ++--
>> >> > >   drivers/infiniband/sw/rdmavt/qp.c  |    9 ++-------
>> >> > >   include/rdma/rdmavt_qp.h           |   14 ++++++++++++++
>> >> > >   4 files changed, 20 insertions(+), 11 deletions(-)
>> >> > >
>> >> > > diff --git a/drivers/infiniband/hw/hfi1/rc.c
>> >b/drivers/infiniband/hw/hfi1/rc.c
>> >> > > index 5ba39a9..a922edc 100644
>> >> > > --- a/drivers/infiniband/hw/hfi1/rc.c
>> >> > > +++ b/drivers/infiniband/hw/hfi1/rc.c
>> >> > > @@ -1834,7 +1834,7 @@ void hfi1_rc_send_complete(struct rvt_qp
>> >*qp, struct hfi1_opa_header *opah)
>> >> > >   		qp->s_last = s_last;
>> >> > >   		/* see post_send() */
>> >> > >   		barrier();
>> >> > > -		rvt_put_swqe(wqe);
>> >> > > +		rvt_put_qp_swqe(qp, wqe);
>> >> > >   		rvt_qp_swqe_complete(qp,
>> >> > >   				     wqe,
>> >> > >   				     ib_hfi1_wc_opcode[wqe-
>>wr.opcode],
>> >> > > @@ -1882,7 +1882,7 @@ struct rvt_swqe *do_rc_completion(struct
>> >rvt_qp *qp,
>> >> > >   		u32 s_last;
>> >> > >
>> >> > >   		trdma_clean_swqe(qp, wqe);
>> >> > > -		rvt_put_swqe(wqe);
>> >> > > +		rvt_put_qp_swqe(qp, wqe);
>> >> > >   		rvt_qp_wqe_unreserve(qp, wqe);
>> >> > >   		s_last = qp->s_last;
>> >> > >   		trace_hfi1_qp_send_completion(qp, wqe, s_last);
>> >> > > diff --git a/drivers/infiniband/hw/qib/qib_rc.c
>> >b/drivers/infiniband/hw/qib/qib_rc.c
>> >> > > index 50dd981..2ac4c67 100644
>> >> > > --- a/drivers/infiniband/hw/qib/qib_rc.c
>> >> > > +++ b/drivers/infiniband/hw/qib/qib_rc.c
>> >> > > @@ -933,7 +933,7 @@ void qib_rc_send_complete(struct rvt_qp
>*qp,
>> >struct ib_header *hdr)
>> >> > >   		qp->s_last = s_last;
>> >> > >   		/* see post_send() */
>> >> > >   		barrier();
>> >> > > -		rvt_put_swqe(wqe);
>> >> > > +		rvt_put_qp_swqe(qp, wqe);
>> >> > >   		rvt_qp_swqe_complete(qp,
>> >> > >   				     wqe,
>> >> > >   				     ib_qib_wc_opcode[wqe-
>>wr.opcode],
>> >> > > @@ -975,7 +975,7 @@ static inline void update_last_psn(struct
>rvt_qp
>> >*qp, u32 psn)
>> >> > >   	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
>> >> > >   		u32 s_last;
>> >> > >
>> >> > > -		rvt_put_swqe(wqe);
>> >> > > +		rvt_put_qp_swqe(qp, wqe);
>> >> > >   		s_last = qp->s_last;
>> >> > >   		if (++s_last >= qp->s_size)
>> >> > >   			s_last = 0;
>> >> > > diff --git a/drivers/infiniband/sw/rdmavt/qp.c
>> >b/drivers/infiniband/sw/rdmavt/qp.c
>> >> > > index 2460303..31a2e65 100644
>> >> > > --- a/drivers/infiniband/sw/rdmavt/qp.c
>> >> > > +++ b/drivers/infiniband/sw/rdmavt/qp.c
>> >> > > @@ -623,10 +623,7 @@ static void rvt_clear_mr_refs(struct rvt_qp
>*qp,
>> >int clr_sends)
>> >> > >   		while (qp->s_last != qp->s_head) {
>> >> > >   			struct rvt_swqe *wqe =
>rvt_get_swqe_ptr(qp, qp-
>> >>s_last);
>> >> > >
>> >> > > -			rvt_put_swqe(wqe);
>> >> > > -			if (qp->allowed_ops == IB_OPCODE_UD)
>> >> > > -				atomic_dec(&ibah_to_rvtah(
>> >> > > -						wqe->ud_wr.ah)-
>>refcount);
>> >> > > +			rvt_put_qp_swqe(qp, wqe);
>> >> > >   			if (++qp->s_last >= qp->s_size)
>> >> > >   				qp->s_last = 0;
>> >> > >   			smp_wmb(); /* see qp_set_savail */
>> >> > > @@ -2683,9 +2680,7 @@ void rvt_send_complete(struct rvt_qp *qp,
>> >struct rvt_swqe *wqe,
>> >> > >   	qp->s_last = last;
>> >> > >   	/* See post_send() */
>> >> > >   	barrier();
>> >> > > -	rvt_put_swqe(wqe);
>> >> > > -	if (qp->allowed_ops == IB_OPCODE_UD)
>> >> > > -		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)-
>>refcount);
>> >> > > +	rvt_put_qp_swqe(qp, wqe);
>> >> > >
>> >> > >   	rvt_qp_swqe_complete(qp,
>> >> > >   			     wqe,
>> >> > > diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
>> >> > > index a00c46a..68e38c2 100644
>> >> > > --- a/include/rdma/rdmavt_qp.h
>> >> > > +++ b/include/rdma/rdmavt_qp.h
>> >> > > @@ -723,6 +723,20 @@ static inline void rvt_mod_retry_timer(struct
>> >rvt_qp *qp)
>> >> > >   	return rvt_mod_retry_timer_ext(qp, 0);
>> >> > >   }
>> >> > >
>> >> > > +/**
>> >> > > + * rvt_put_qp_swqe - drop refs held by swqe
>> >> > > + * @qp: the send qp
>> >> > > + * @wqe: the send wqe
>> >> > > + *
>> >> > > + * This drops any references held by the swqe
>> >> > > + */
>> >> > > +static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct
>rvt_swqe
>> >*wqe)
>> >> > > +{
>> >> > > +	rvt_put_swqe(wqe);
>> >> > > +	if (qp->allowed_ops == IB_OPCODE_UD)
>> >> > > +		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)-
>>refcount);
>> >> > > +}
>> >> >
>> >> > In the context of my allocation change patches, I wanted to ask on
>which
>> >> > objects should driver perform refcounting?
>> >>
>> >> Are you asking if we really need to do our own refcounting in the driver
>> >> since your patch series went in to for-next?
>> >
>> >For this AH code - yes, I don't think that refcounting is needed.
>>
>> Hi Leon,
>>
>> We do need the reference count because the AH is used by the
>asynchronous
>> send engine.
>>
>> I am going to work up a patch to implement the SLEEPABLE path in our
>destroy
>> to deal with this.
>
>I see, but it is not SLEEPABLE/not SLEEPABLE paths, there is a need to ensure
>that destroy AH is synchronous operation.
>
>Do you have time expectation when will it be ready?
>
>It seems like my commit d345691471b4 ("RDMA: Handle AH
>allocations by IB/core") assumed that all releases are synchronous.
>
>There is use-after-free bug now in your driver.

Hi Leon,

WRT this specific refcount, after further review, I am going to eliminate it
in an upcoming patch (hopefully available next week).

Because of the issues cleaned up with this patch set, I am basing
my patch on the patch set that includes this one (i.e. it is a follow on)

I will be looking into the other objects you mentioned (MR and QPs)
after the AH refcount is cleaned up to see if I can add any good info there.

Thanks,

Mike

>>
>> >However my question is more general - I need to know in which objects
>> >we can remove refcounting without any worries that I'll break some driver.
>>
>> That is tougher to answer, and will need some more research.
>>
>> Do you have a list of objects in mind to start with?
>
>The most challengeable are MR and QPs, everything else can definitely be
>transformed to be without refcounting.
>
>>
>> Thanks,
>>
>> Mike
>>
>>
>> >Thanks
>> >
>> >>
>> >> -Denny
>> >>
>> >>
>> >>
Jason Gunthorpe April 22, 2019, 12:33 p.m. UTC | #12
On Thu, Apr 18, 2019 at 04:11:56PM +0000, Weiny, Ira wrote:
> > 
> > On Wed, Apr 17, 2019 at 08:42:52PM -0700, Ira Weiny wrote:
> > > On Wed, Apr 17, 2019 at 03:20:05AM -0300, Jason Gunthorpe wrote:
> > > > On Mon, Apr 15, 2019 at 08:52:52PM +0000, Ruhl, Michael J wrote:
> > > >
> > > > > We do need the reference count because the AH is used by the
> > > > > asynchronous send engine.
> > > >
> > > > Drivers cannot use refcounts to manage their object lifetimes.
> > > > Destroy must be synchronous.
> > >
> > > I admit that I'm not really following along but it seems wrong to tell
> > > drivers that they can't refcount their objects...  One would think
> > > that a refcount of their objects is safer than some of the locking hoops we
> > have seen...
> > 
> > It is not safer, it just pushed bugs into the ULPs.
> 
> Ok I see what you are saying and I agree...  For objects which are
> controlled by the core.  See below...

No, for all objects that interact with the defined uverbs API. Ie
verbs objects. It doesn't matter if the core or driver allocates the
memory, the rules are the same.

> > Drivers cannot fail destroy - and they cannot continue to hold onto related
> > references after destroy returns (ie a QP cannot continue to block destruction
> > of a PD). In almost all cases this means the object must actually be destroyed
> > during destroy.
> > 
> > The most we could offer a driver is to hold the backing object memory with a
> > kref, so the driver could allow other threads to still see the dead object after
> > destroy succeeds. This might be what hfi needs.
> 
> Maybe.  I think this is part of the problem with having objects
> which the core controls but which the drivers have critical data in.
> I'm not sure about performance but if the driver needs something to
> hold onto for internal use then that object should be separate from
> the core object.  Or as you say destroy must block.

Or the driver works with the core to extend the life of the backing
memory (kref, rcu, etc) even though the object still must be
destroyed.

> > This is why I also don't like the idea of destroy being non-sleepable...
> 
> Agreed...  It does depend on the situation.  I just wanted to make
> it clear that this is not some hard and fast rule that "RDMA drivers
> shall never do reference counting."  I think in some situations for
> __internal__ resources it is appropriate.

Maybe, but most often cases I see trying to do 'refcounts' without
sleeping are just some kind of broken kref.

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 5ba39a9..a922edc 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -1834,7 +1834,7 @@  void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
 		qp->s_last = s_last;
 		/* see post_send() */
 		barrier();
-		rvt_put_swqe(wqe);
+		rvt_put_qp_swqe(qp, wqe);
 		rvt_qp_swqe_complete(qp,
 				     wqe,
 				     ib_hfi1_wc_opcode[wqe->wr.opcode],
@@ -1882,7 +1882,7 @@  struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
 		u32 s_last;
 
 		trdma_clean_swqe(qp, wqe);
-		rvt_put_swqe(wqe);
+		rvt_put_qp_swqe(qp, wqe);
 		rvt_qp_wqe_unreserve(qp, wqe);
 		s_last = qp->s_last;
 		trace_hfi1_qp_send_completion(qp, wqe, s_last);
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 50dd981..2ac4c67 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -933,7 +933,7 @@  void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
 		qp->s_last = s_last;
 		/* see post_send() */
 		barrier();
-		rvt_put_swqe(wqe);
+		rvt_put_qp_swqe(qp, wqe);
 		rvt_qp_swqe_complete(qp,
 				     wqe,
 				     ib_qib_wc_opcode[wqe->wr.opcode],
@@ -975,7 +975,7 @@  static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
 	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
 		u32 s_last;
 
-		rvt_put_swqe(wqe);
+		rvt_put_qp_swqe(qp, wqe);
 		s_last = qp->s_last;
 		if (++s_last >= qp->s_size)
 			s_last = 0;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 2460303..31a2e65 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -623,10 +623,7 @@  static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
 		while (qp->s_last != qp->s_head) {
 			struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 
-			rvt_put_swqe(wqe);
-			if (qp->allowed_ops == IB_OPCODE_UD)
-				atomic_dec(&ibah_to_rvtah(
-						wqe->ud_wr.ah)->refcount);
+			rvt_put_qp_swqe(qp, wqe);
 			if (++qp->s_last >= qp->s_size)
 				qp->s_last = 0;
 			smp_wmb(); /* see qp_set_savail */
@@ -2683,9 +2680,7 @@  void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
 	qp->s_last = last;
 	/* See post_send() */
 	barrier();
-	rvt_put_swqe(wqe);
-	if (qp->allowed_ops == IB_OPCODE_UD)
-		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
+	rvt_put_qp_swqe(qp, wqe);
 
 	rvt_qp_swqe_complete(qp,
 			     wqe,
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index a00c46a..68e38c2 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -723,6 +723,20 @@  static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
 	return rvt_mod_retry_timer_ext(qp, 0);
 }
 
+/**
+ * rvt_put_qp_swqe - drop refs held by swqe
+ * @qp: the send qp
+ * @wqe: the send wqe
+ *
+ * This drops any references held by the swqe
+ */
+static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+	rvt_put_swqe(wqe);
+	if (qp->allowed_ops == IB_OPCODE_UD)
+		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
+}
+
 extern const int  ib_rvt_state_ops[];
 
 struct rvt_dev_info;