diff mbox series

[for-next,v4] Subject: RDMA/rxe: Handle zero length rdma

Message ID 20230202044240.6304-1-rpearsonhpe@gmail.com (mailing list archive)
State Accepted
Delegated to: Jason Gunthorpe
Headers show
Series [for-next,v4] Subject: RDMA/rxe: Handle zero length rdma | expand

Commit Message

Bob Pearson Feb. 2, 2023, 4:42 a.m. UTC
Currently the rxe driver does not handle all cases of zero length
rdma operations correctly. The client does not have to provide an
rkey for zero length RDMA read or write operations so the rkey
provided may be invalid and should not be used to lookup an mr.

This patch corrects the driver to ignore the provided rkey if the
reth length is zero for read or write operations and make sure to
set the mr to NULL. In read_reply() if length is zero rxe_recheck_mr()
is not called. Warnings are added in the routines in rxe_mr.c to
catch NULL MRs when the length is non-zero.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
v4:
  Fixed a regression in flush operations because the rkey must be
  valid in that case even if the length is zero which maybe the
  case for selectivity level of the entire memory region.

Reported-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com>

v3:
  Fixed my fat finger typing on v2. Mangled the patch.

v2:
  Rebased to current for-next.
  Cleaned up description to be a little more accurate.
---
 drivers/infiniband/sw/rxe/rxe_mr.c   |  7 ++++
 drivers/infiniband/sw/rxe/rxe_resp.c | 59 +++++++++++++++++++++-------
 2 files changed, 51 insertions(+), 15 deletions(-)


base-commit: 01884059c09fa770206ca03bf3660502f97d5fdd

Comments

Daisuke Matsuda (Fujitsu) Feb. 3, 2023, 10 a.m. UTC | #1
On Thu, Feb 2, 2023 1:43 PM Bob Pearson wrote:
> 
> Currently the rxe driver does not handle all cases of zero length
> rdma operations correctly. The client does not have to provide an
> rkey for zero length RDMA read or write operations so the rkey
> provided may be invalid and should not be used to lookup an mr.
> 
> This patch corrects the driver to ignore the provided rkey if the
> reth length is zero for read or write operations and make sure to
> set the mr to NULL. In read_reply() if length is zero rxe_recheck_mr()
> is not called. Warnings are added in the routines in rxe_mr.c to
> catch NULL MRs when the length is non-zero.
> 

<...>

> @@ -432,6 +435,10 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
>  	int err;
>  	u8 *va;
> 
> +	/* mr must be valid even if length is zero */
> +	if (WARN_ON(!mr))
> +		return -EINVAL;

If 'mr' is NULL, NULL pointer dereference can occur in process_flush()
before reaching here. Isn't it better to do the check in process_flush()?

> +
>  	if (length == 0)
>  		return 0;
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
> index cccf7c6c21e9..c8e7b4ca456b 100644
> --- a/drivers/infiniband/sw/rxe/rxe_resp.c
> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c
> @@ -420,13 +420,23 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
>  	return RESPST_CHK_RKEY;
>  }
> 
> +/* if the reth length field is zero we can assume nothing
> + * about the rkey value and should not validate or use it.
> + * Instead set qp->resp.rkey to 0 which is an invalid rkey
> + * value since the minimum index part is 1.
> + */
>  static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
>  {
> +	unsigned int length = reth_len(pkt);
> +
>  	qp->resp.va = reth_va(pkt);
>  	qp->resp.offset = 0;
> -	qp->resp.rkey = reth_rkey(pkt);
> -	qp->resp.resid = reth_len(pkt);
> -	qp->resp.length = reth_len(pkt);
> +	qp->resp.resid = length;
> +	qp->resp.length = length;

As you know, the comment above this function is applicable only
to RDMA Read and Write. What do you say to mentioning FLUSH
here rather than at the one in rxe_flush_pmem_iova().

Thanks,
Daisuke

> +	if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0)
> +		qp->resp.rkey = 0;
> +	else
> +		qp->resp.rkey = reth_rkey(pkt);
>  }
> 

<...>
Bob Pearson Feb. 3, 2023, 6:47 p.m. UTC | #2
On 2/3/23 04:00, Daisuke Matsuda (Fujitsu) wrote:
> On Thu, Feb 2, 2023 1:43 PM Bob Pearson wrote:
>>
>> Currently the rxe driver does not handle all cases of zero length
>> rdma operations correctly. The client does not have to provide an
>> rkey for zero length RDMA read or write operations so the rkey
>> provided may be invalid and should not be used to lookup an mr.
>>
>> This patch corrects the driver to ignore the provided rkey if the
>> reth length is zero for read or write operations and make sure to
>> set the mr to NULL. In read_reply() if length is zero rxe_recheck_mr()
>> is not called. Warnings are added in the routines in rxe_mr.c to
>> catch NULL MRs when the length is non-zero.
>>
> 
> <...>
> 
>> @@ -432,6 +435,10 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
>>  	int err;
>>  	u8 *va;
>>
>> +	/* mr must be valid even if length is zero */
>> +	if (WARN_ON(!mr))
>> +		return -EINVAL;
> 
> If 'mr' is NULL, NULL pointer dereference can occur in process_flush()
> before reaching here. Isn't it better to do the check in process_flush()?
> 
>> +
>>  	if (length == 0)
>>  		return 0;
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
>> index cccf7c6c21e9..c8e7b4ca456b 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_resp.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c
>> @@ -420,13 +420,23 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
>>  	return RESPST_CHK_RKEY;
>>  }
>>
>> +/* if the reth length field is zero we can assume nothing
>> + * about the rkey value and should not validate or use it.
>> + * Instead set qp->resp.rkey to 0 which is an invalid rkey
>> + * value since the minimum index part is 1.
>> + */
>>  static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
>>  {
>> +	unsigned int length = reth_len(pkt);
>> +
>>  	qp->resp.va = reth_va(pkt);
>>  	qp->resp.offset = 0;
>> -	qp->resp.rkey = reth_rkey(pkt);
>> -	qp->resp.resid = reth_len(pkt);
>> -	qp->resp.length = reth_len(pkt);
>> +	qp->resp.resid = length;
>> +	qp->resp.length = length;
> 
> As you know, the comment above this function is applicable only
> to RDMA Read and Write. What do you say to mentioning FLUSH
> here rather than at the one in rxe_flush_pmem_iova().
> 
> Thanks,
> Daisuke
> 
>> +	if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0)
>> +		qp->resp.rkey = 0;
>> +	else
>> +		qp->resp.rkey = reth_rkey(pkt);
>>  }
>>
> 
> <...>
> 
Daisuke,

The updated patch no longer sets mr = NULL for flush. This check is mainly to defend against someone changing
it again in the future and is near where mr is dereferenced. You are correct about the comment I can change it.
Will give it a day or two to see if anything else comes in.

Regards,

Bob Pearson
Daisuke Matsuda (Fujitsu) Feb. 8, 2023, 7:12 a.m. UTC | #3
On Sat, Feb 4, 2023 3:47 AM Bob Pearson wrote:
> 
> On 2/3/23 04:00, Daisuke Matsuda (Fujitsu) wrote:
> > On Thu, Feb 2, 2023 1:43 PM Bob Pearson wrote:
> >>
> >> Currently the rxe driver does not handle all cases of zero length
> >> rdma operations correctly. The client does not have to provide an
> >> rkey for zero length RDMA read or write operations so the rkey
> >> provided may be invalid and should not be used to lookup an mr.
> >>
> >> This patch corrects the driver to ignore the provided rkey if the
> >> reth length is zero for read or write operations and make sure to
> >> set the mr to NULL. In read_reply() if length is zero rxe_recheck_mr()
> >> is not called. Warnings are added in the routines in rxe_mr.c to
> >> catch NULL MRs when the length is non-zero.
> >>

The change looks good. I will leave it to you whether to fix the comment.
Reviewed-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com>

Thanks,
Daisuke

> >
> > <...>
> >
> >> @@ -432,6 +435,10 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
> >>  	int err;
> >>  	u8 *va;
> >>
> >> +	/* mr must be valid even if length is zero */
> >> +	if (WARN_ON(!mr))
> >> +		return -EINVAL;
> >
> > If 'mr' is NULL, NULL pointer dereference can occur in process_flush()
> > before reaching here. Isn't it better to do the check in process_flush()?
> >
> >> +
> >>  	if (length == 0)
> >>  		return 0;
> >>
> >> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
> >> index cccf7c6c21e9..c8e7b4ca456b 100644
> >> --- a/drivers/infiniband/sw/rxe/rxe_resp.c
> >> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c
> >> @@ -420,13 +420,23 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
> >>  	return RESPST_CHK_RKEY;
> >>  }
> >>
> >> +/* if the reth length field is zero we can assume nothing
> >> + * about the rkey value and should not validate or use it.
> >> + * Instead set qp->resp.rkey to 0 which is an invalid rkey
> >> + * value since the minimum index part is 1.
> >> + */
> >>  static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
> >>  {
> >> +	unsigned int length = reth_len(pkt);
> >> +
> >>  	qp->resp.va = reth_va(pkt);
> >>  	qp->resp.offset = 0;
> >> -	qp->resp.rkey = reth_rkey(pkt);
> >> -	qp->resp.resid = reth_len(pkt);
> >> -	qp->resp.length = reth_len(pkt);
> >> +	qp->resp.resid = length;
> >> +	qp->resp.length = length;
> >
> > As you know, the comment above this function is applicable only
> > to RDMA Read and Write. What do you say to mentioning FLUSH
> > here rather than at the one in rxe_flush_pmem_iova().
> >
> > Thanks,
> > Daisuke
> >
> >> +	if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0)
> >> +		qp->resp.rkey = 0;
> >> +	else
> >> +		qp->resp.rkey = reth_rkey(pkt);
> >>  }
> >>
> >
> > <...>
> >
> Daisuke,
> 
> The updated patch no longer sets mr = NULL for flush. This check is mainly to defend against someone changing
> it again in the future and is near where mr is dereferenced. You are correct about the comment I can change it.
> Will give it a day or two to see if anything else comes in.
> 
> Regards,
> 
> Bob Pearson
Zhijian Li (Fujitsu) Feb. 8, 2023, 8:41 a.m. UTC | #4
On 02/02/2023 12:42, Bob Pearson wrote:
> +/* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL
> + * if an invalid rkey is received or the rdma length is zero. For middle
> + * or last packets use the stored value of mr.
> + */
>   static enum resp_states check_rkey(struct rxe_qp *qp,
>   				   struct rxe_pkt_info *pkt)
>   {
> @@ -473,10 +487,12 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
>   		return RESPST_EXECUTE;
>   	}
>   
> -	/* A zero-byte op is not required to set an addr or rkey. See C9-88 */
> +	/* A zero-byte read or write op is not required to
> +	 * set an addr or rkey. See C9-88
> +	 */
>   	if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
> -	    (pkt->mask & RXE_RETH_MASK) &&
> -	    reth_len(pkt) == 0) {
> +	    (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) {
> +		qp->resp.mr = NULL;

You are making sure 'qp->resp.mr = NULL', but I doubt the previous qp->resp.mr will leak after this assignment when its value is not NULL.


Thanks
Zhijian

>   		return RESPST_EXECUTE;
>   	}
>   
> @@ -555,6 +571,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
>   	return RESPST_EXECUTE;
>   
>   err:
> +	qp->resp.mr = NULL;
>   	if (mr)
Daisuke Matsuda (Fujitsu) Feb. 9, 2023, 8:14 a.m. UTC | #5
On Wed, Feb 8, 2023 5:41 PM Li, Zhijian/李 智坚 wrote:
> 
> On 02/02/2023 12:42, Bob Pearson wrote:
> > +/* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL
> > + * if an invalid rkey is received or the rdma length is zero. For middle
> > + * or last packets use the stored value of mr.
> > + */
> >   static enum resp_states check_rkey(struct rxe_qp *qp,
> >   				   struct rxe_pkt_info *pkt)
> >   {
> > @@ -473,10 +487,12 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
> >   		return RESPST_EXECUTE;
> >   	}
> >
> > -	/* A zero-byte op is not required to set an addr or rkey. See C9-88 */
> > +	/* A zero-byte read or write op is not required to
> > +	 * set an addr or rkey. See C9-88
> > +	 */
> >   	if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
> > -	    (pkt->mask & RXE_RETH_MASK) &&
> > -	    reth_len(pkt) == 0) {
> > +	    (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) {
> > +		qp->resp.mr = NULL;
> 
> You are making sure 'qp->resp.mr = NULL', but I doubt the previous qp->resp.mr will leak after this assignment when its
> value is not NULL.

I do not see what you mean by " the previous qp->resp.mr ".
As far as I understand, 'qp->resp.mr' is set to NULL in cleanup()
before responder completes its work, so it is not supposed to be 
reused unlike 'res->read.rkey' being retained for multi-packet Read
responses. Could you elaborate on your view?

Daisuke

> 
> 
> Thanks
> Zhijian
> 
> >   		return RESPST_EXECUTE;
> >   	}
> >
> > @@ -555,6 +571,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
> >   	return RESPST_EXECUTE;
> >
> >   err:
> > +	qp->resp.mr = NULL;
> >   	if (mr)
Zhijian Li (Fujitsu) Feb. 9, 2023, 10:20 a.m. UTC | #6
On 09/02/2023 16:14, Matsuda, Daisuke/松田 大輔 wrote:
> On Wed, Feb 8, 2023 5:41 PM Li, Zhijian/李 智坚 wrote:
>>
>> On 02/02/2023 12:42, Bob Pearson wrote:
>>> +/* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL
>>> + * if an invalid rkey is received or the rdma length is zero. For middle
>>> + * or last packets use the stored value of mr.
>>> + */
>>>    static enum resp_states check_rkey(struct rxe_qp *qp,
>>>    				   struct rxe_pkt_info *pkt)
>>>    {
>>> @@ -473,10 +487,12 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
>>>    		return RESPST_EXECUTE;
>>>    	}
>>>
>>> -	/* A zero-byte op is not required to set an addr or rkey. See C9-88 */
>>> +	/* A zero-byte read or write op is not required to
>>> +	 * set an addr or rkey. See C9-88
>>> +	 */
>>>    	if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
>>> -	    (pkt->mask & RXE_RETH_MASK) &&
>>> -	    reth_len(pkt) == 0) {
>>> +	    (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) {
>>> +		qp->resp.mr = NULL;
>>
>> You are making sure 'qp->resp.mr = NULL', but I doubt the previous qp->resp.mr will leak after this assignment when its
>> value is not NULL.
> 
> I do not see what you mean by " the previous qp->resp.mr ".
> As far as I understand, 'qp->resp.mr' is set to NULL in cleanup()
> before responder completes its work, so it is not supposed to be
> reused unlike 'res->read.rkey' being retained for multi-packet Read
> responses. Could you elaborate on your view?

IMO every 'qp->resp.mr' assignment will take a mr reference, so we should drop the this reference if we want to assign it a new mr again.


Theoretically, it should be changed to something like
if (qp->resp.mr) {
	rxe_put(qp->resp.mr)
         qp->resp.mr = NULL;
}




>>
>>>    		return RESPST_EXECUTE;
>>>    	}
>>>
>>> @@ -555,6 +571,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
>>>    	return RESPST_EXECUTE;
>>>
>>>    err:
>>> +	qp->resp.mr = NULL;

ditto

Thanks
Zhijian

>>>    	if (mr)
Daisuke Matsuda (Fujitsu) Feb. 13, 2023, 9:32 a.m. UTC | #7
On Thu, Feb 9, 2023 7:21 PM Li, Zhijian/李 智坚 wrote:
> 
> 
> 
> On 09/02/2023 16:14, Matsuda, Daisuke/松田 大輔 wrote:
> > On Wed, Feb 8, 2023 5:41 PM Li, Zhijian/李 智坚 wrote:
> >>
> >> On 02/02/2023 12:42, Bob Pearson wrote:
> >>> +/* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL
> >>> + * if an invalid rkey is received or the rdma length is zero. For middle
> >>> + * or last packets use the stored value of mr.
> >>> + */
> >>>    static enum resp_states check_rkey(struct rxe_qp *qp,
> >>>    				   struct rxe_pkt_info *pkt)
> >>>    {
> >>> @@ -473,10 +487,12 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
> >>>    		return RESPST_EXECUTE;
> >>>    	}
> >>>
> >>> -	/* A zero-byte op is not required to set an addr or rkey. See C9-88 */
> >>> +	/* A zero-byte read or write op is not required to
> >>> +	 * set an addr or rkey. See C9-88
> >>> +	 */
> >>>    	if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
> >>> -	    (pkt->mask & RXE_RETH_MASK) &&
> >>> -	    reth_len(pkt) == 0) {
> >>> +	    (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) {
> >>> +		qp->resp.mr = NULL;
> >>
> >> You are making sure 'qp->resp.mr = NULL', but I doubt the previous qp->resp.mr will leak after this assignment
> when its
> >> value is not NULL.
> >
> > I do not see what you mean by " the previous qp->resp.mr ".
> > As far as I understand, 'qp->resp.mr' is set to NULL in cleanup()
> > before responder completes its work, so it is not supposed to be
> > reused unlike 'res->read.rkey' being retained for multi-packet Read
> > responses. Could you elaborate on your view?
> 
> IMO every 'qp->resp.mr' assignment will take a mr reference, so we should drop the this reference if we want to assign it
> a new mr again.
> 
> 
> Theoretically, it should be changed to something like
> if (qp->resp.mr) {
> 	rxe_put(qp->resp.mr)
>          qp->resp.mr = NULL;
> }

You are correct about what you wrote.
' qp->resp.mr = NULL;' and 'rxe_put(qp->resp.mr);' come in pairs in
other parts of rxe. I agree the same principle should be applied here.

However, it seems to me that this 'qp->resp.mr' is always NULL. For all 
operations other than multi-packet Read responses, cleanup() must be 
executed before responder completes the work. On the other hand, for
multi-packet Read replies, responder does not execute cleanup() and thus
does not drop the reference until the last reply is sent, but it does not
execute check_rkey() either since it uses the fast path jumping from
get_req() to read_reply().

From practical perspective, we can just remove ' qp->resp.mr = NULL;', 
and then 'qp->resp.mr' will still be NULL. From theoretical perspective,
we can make the change as Zhijian suggested. What other guys think
about this?

Thanks,
Daisuke

> 
> 
> 
> 
> >>
> >>>    		return RESPST_EXECUTE;
> >>>    	}
> >>>
> >>> @@ -555,6 +571,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
> >>>    	return RESPST_EXECUTE;
> >>>
> >>>    err:
> >>> +	qp->resp.mr = NULL;
> 
> ditto
> 
> Thanks
> Zhijian
> 
> >>>    	if (mr)
Bob Pearson Feb. 13, 2023, 5:41 p.m. UTC | #8
On 2/9/23 04:20, lizhijian@fujitsu.com wrote:
> 
> 
> On 09/02/2023 16:14, Matsuda, Daisuke/松田 大輔 wrote:
>> On Wed, Feb 8, 2023 5:41 PM Li, Zhijian/李 智坚 wrote:
>>>
>>> On 02/02/2023 12:42, Bob Pearson wrote:
>>>> +/* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL
>>>> + * if an invalid rkey is received or the rdma length is zero. For middle
>>>> + * or last packets use the stored value of mr.
>>>> + */
>>>>    static enum resp_states check_rkey(struct rxe_qp *qp,
>>>>    				   struct rxe_pkt_info *pkt)
>>>>    {
>>>> @@ -473,10 +487,12 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
>>>>    		return RESPST_EXECUTE;
>>>>    	}
>>>>
>>>> -	/* A zero-byte op is not required to set an addr or rkey. See C9-88 */
>>>> +	/* A zero-byte read or write op is not required to
>>>> +	 * set an addr or rkey. See C9-88
>>>> +	 */
>>>>    	if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
>>>> -	    (pkt->mask & RXE_RETH_MASK) &&
>>>> -	    reth_len(pkt) == 0) {
>>>> +	    (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) {
>>>> +		qp->resp.mr = NULL;
>>>
>>> You are making sure 'qp->resp.mr = NULL', but I doubt the previous qp->resp.mr will leak after this assignment when its
>>> value is not NULL.
>>
>> I do not see what you mean by " the previous qp->resp.mr ".
>> As far as I understand, 'qp->resp.mr' is set to NULL in cleanup()
>> before responder completes its work, so it is not supposed to be
>> reused unlike 'res->read.rkey' being retained for multi-packet Read
>> responses. Could you elaborate on your view?
> 
> IMO every 'qp->resp.mr' assignment will take a mr reference, so we should drop the this reference if we want to assign it a new mr again.
> 
> 
> Theoretically, it should be changed to something like
> if (qp->resp.mr) {
> 	rxe_put(qp->resp.mr)
>          qp->resp.mr = NULL;
> }
> 
Generally I agree with the idea that a (few) assignments like this, even if after minutes of looking
over the code can be shown to be unneeded because it is already NULL or will get deleted, are helpful
if it makes the code easier to understand and prevents later changes to unintentionally break things.

Bob
> 
> 
> 
>>>
>>>>    		return RESPST_EXECUTE;
>>>>    	}
>>>>
>>>> @@ -555,6 +571,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
>>>>    	return RESPST_EXECUTE;
>>>>
>>>>    err:
>>>> +	qp->resp.mr = NULL;
> 
> ditto
> 
> Thanks
> Zhijian
> 
>>>>    	if (mr)
Zhu Yanjun Feb. 14, 2023, 1:13 a.m. UTC | #9
在 2023/2/14 1:41, Bob Pearson 写道:
> On 2/9/23 04:20, lizhijian@fujitsu.com wrote:
>>
>>
>> On 09/02/2023 16:14, Matsuda, Daisuke/松田 大輔 wrote:
>>> On Wed, Feb 8, 2023 5:41 PM Li, Zhijian/李 智坚 wrote:
>>>>
>>>> On 02/02/2023 12:42, Bob Pearson wrote:
>>>>> +/* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL
>>>>> + * if an invalid rkey is received or the rdma length is zero. For middle
>>>>> + * or last packets use the stored value of mr.
>>>>> + */
>>>>>     static enum resp_states check_rkey(struct rxe_qp *qp,
>>>>>     				   struct rxe_pkt_info *pkt)
>>>>>     {
>>>>> @@ -473,10 +487,12 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
>>>>>     		return RESPST_EXECUTE;
>>>>>     	}
>>>>>
>>>>> -	/* A zero-byte op is not required to set an addr or rkey. See C9-88 */
>>>>> +	/* A zero-byte read or write op is not required to
>>>>> +	 * set an addr or rkey. See C9-88
>>>>> +	 */
>>>>>     	if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
>>>>> -	    (pkt->mask & RXE_RETH_MASK) &&
>>>>> -	    reth_len(pkt) == 0) {
>>>>> +	    (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) {
>>>>> +		qp->resp.mr = NULL;
>>>>
>>>> You are making sure 'qp->resp.mr = NULL', but I doubt the previous qp->resp.mr will leak after this assignment when its
>>>> value is not NULL.
>>>
>>> I do not see what you mean by " the previous qp->resp.mr ".
>>> As far as I understand, 'qp->resp.mr' is set to NULL in cleanup()
>>> before responder completes its work, so it is not supposed to be
>>> reused unlike 'res->read.rkey' being retained for multi-packet Read
>>> responses. Could you elaborate on your view?
>>
>> IMO every 'qp->resp.mr' assignment will take a mr reference, so we should drop the this reference if we want to assign it a new mr again.
>>
>>
>> Theoretically, it should be changed to something like
>> if (qp->resp.mr) {
>> 	rxe_put(qp->resp.mr)
>>           qp->resp.mr = NULL;
>> }

Without "qp->resp.mr = NULL;", it is possible to cause use after free 
error if qp->resp.mr is used again.

Zhu Yanjun

>>
> Generally I agree with the idea that a (few) assignments like this, even if after minutes of looking
> over the code can be shown to be unneeded because it is already NULL or will get deleted, are helpful
> if it makes the code easier to understand and prevents later changes to unintentionally break things.
> 
> Bob
>>
>>
>>
>>>>
>>>>>     		return RESPST_EXECUTE;
>>>>>     	}
>>>>>
>>>>> @@ -555,6 +571,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
>>>>>     	return RESPST_EXECUTE;
>>>>>
>>>>>     err:
>>>>> +	qp->resp.mr = NULL;
>>
>> ditto
>>
>> Thanks
>> Zhijian
>>
>>>>>     	if (mr)
>
Jason Gunthorpe Feb. 16, 2023, 3:58 p.m. UTC | #10
On Wed, Feb 01, 2023 at 10:42:41PM -0600, Bob Pearson wrote:
> Currently the rxe driver does not handle all cases of zero length
> rdma operations correctly. The client does not have to provide an
> rkey for zero length RDMA read or write operations so the rkey
> provided may be invalid and should not be used to lookup an mr.
> 
> This patch corrects the driver to ignore the provided rkey if the
> reth length is zero for read or write operations and make sure to
> set the mr to NULL. In read_reply() if length is zero rxe_recheck_mr()
> is not called. Warnings are added in the routines in rxe_mr.c to
> catch NULL MRs when the length is non-zero.
> 
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> Reviewed-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com>
> ---
> v4:
>   Fixed a regression in flush operations because the rkey must be
>   valid in that case even if the length is zero which maybe the
>   case for selectivity level of the entire memory region.
> 
> Reported-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com>

Applied to for-next, I added a fixes line for day 0

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index c80458634962..5e9a03831bf9 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -314,6 +314,9 @@  int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
 	if (length == 0)
 		return 0;
 
+	if (WARN_ON(!mr))
+		return -EINVAL;
+
 	if (mr->ibmr.type == IB_MR_TYPE_DMA) {
 		rxe_mr_copy_dma(mr, iova, addr, length, dir);
 		return 0;
@@ -432,6 +435,10 @@  int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
 	int err;
 	u8 *va;
 
+	/* mr must be valid even if length is zero */
+	if (WARN_ON(!mr))
+		return -EINVAL;
+
 	if (length == 0)
 		return 0;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index cccf7c6c21e9..c8e7b4ca456b 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -420,13 +420,23 @@  static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
 	return RESPST_CHK_RKEY;
 }
 
+/* if the reth length field is zero we can assume nothing
+ * about the rkey value and should not validate or use it.
+ * Instead set qp->resp.rkey to 0 which is an invalid rkey
+ * value since the minimum index part is 1.
+ */
 static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 {
+	unsigned int length = reth_len(pkt);
+
 	qp->resp.va = reth_va(pkt);
 	qp->resp.offset = 0;
-	qp->resp.rkey = reth_rkey(pkt);
-	qp->resp.resid = reth_len(pkt);
-	qp->resp.length = reth_len(pkt);
+	qp->resp.resid = length;
+	qp->resp.length = length;
+	if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0)
+		qp->resp.rkey = 0;
+	else
+		qp->resp.rkey = reth_rkey(pkt);
 }
 
 static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
@@ -437,6 +447,10 @@  static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 	qp->resp.resid = sizeof(u64);
 }
 
+/* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL
+ * if an invalid rkey is received or the rdma length is zero. For middle
+ * or last packets use the stored value of mr.
+ */
 static enum resp_states check_rkey(struct rxe_qp *qp,
 				   struct rxe_pkt_info *pkt)
 {
@@ -473,10 +487,12 @@  static enum resp_states check_rkey(struct rxe_qp *qp,
 		return RESPST_EXECUTE;
 	}
 
-	/* A zero-byte op is not required to set an addr or rkey. See C9-88 */
+	/* A zero-byte read or write op is not required to
+	 * set an addr or rkey. See C9-88
+	 */
 	if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
-	    (pkt->mask & RXE_RETH_MASK) &&
-	    reth_len(pkt) == 0) {
+	    (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) {
+		qp->resp.mr = NULL;
 		return RESPST_EXECUTE;
 	}
 
@@ -555,6 +571,7 @@  static enum resp_states check_rkey(struct rxe_qp *qp,
 	return RESPST_EXECUTE;
 
 err:
+	qp->resp.mr = NULL;
 	if (mr)
 		rxe_put(mr);
 	if (mw)
@@ -889,7 +906,11 @@  static enum resp_states read_reply(struct rxe_qp *qp,
 	}
 
 	if (res->state == rdatm_res_state_new) {
-		if (!res->replay) {
+		if (!res->replay || qp->resp.length == 0) {
+			/* if length == 0 mr will be NULL (is ok)
+			 * otherwise qp->resp.mr holds a ref on mr
+			 * which we transfer to mr and drop below.
+			 */
 			mr = qp->resp.mr;
 			qp->resp.mr = NULL;
 		} else {
@@ -903,6 +924,10 @@  static enum resp_states read_reply(struct rxe_qp *qp,
 		else
 			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST;
 	} else {
+		/* re-lookup mr from rkey on all later packets.
+		 * length will be non-zero. This can fail if someone
+		 * modifies or destroys the mr since the first packet.
+		 */
 		mr = rxe_recheck_mr(qp, res->read.rkey);
 		if (!mr)
 			return RESPST_ERR_RKEY_VIOLATION;
@@ -920,18 +945,16 @@  static enum resp_states read_reply(struct rxe_qp *qp,
 	skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
 				 res->cur_psn, AETH_ACK_UNLIMITED);
 	if (!skb) {
-		if (mr)
-			rxe_put(mr);
-		return RESPST_ERR_RNR;
+		state = RESPST_ERR_RNR;
+		goto err_out;
 	}
 
 	err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
 			  payload, RXE_FROM_MR_OBJ);
-	if (mr)
-		rxe_put(mr);
 	if (err) {
 		kfree_skb(skb);
-		return RESPST_ERR_RKEY_VIOLATION;
+		state = RESPST_ERR_RKEY_VIOLATION;
+		goto err_out;
 	}
 
 	if (bth_pad(&ack_pkt)) {
@@ -940,9 +963,12 @@  static enum resp_states read_reply(struct rxe_qp *qp,
 		memset(pad, 0, bth_pad(&ack_pkt));
 	}
 
+	/* rxe_xmit_packet always consumes the skb */
 	err = rxe_xmit_packet(qp, &ack_pkt, skb);
-	if (err)
-		return RESPST_ERR_RNR;
+	if (err) {
+		state = RESPST_ERR_RNR;
+		goto err_out;
+	}
 
 	res->read.va += payload;
 	res->read.resid -= payload;
@@ -959,6 +985,9 @@  static enum resp_states read_reply(struct rxe_qp *qp,
 		state = RESPST_CLEANUP;
 	}
 
+err_out:
+	if (mr)
+		rxe_put(mr);
 	return state;
 }