[1/2] SUNRPC: Fix buffer handling of GSS MIC with less slack
diff mbox series

Message ID 043d2ca649c3d81cdf0b43b149cd43069ad1c1e2.1568307763.git.bcodding@redhat.com
State New
Headers show
Series
  • [1/2] SUNRPC: Fix buffer handling of GSS MIC with less slack
Related show

Commit Message

Benjamin Coddington Sept. 12, 2019, 5:07 p.m. UTC
The GSS Message Integrity Check data for krb5i may lie partially in the XDR
reply buffer's pages and tail.  If so, we try to copy the entire MIC into
free space in the tail.  But as the estimations of the slack space required
for authentication and verification have improved there may be less free
space in the tail to complete this copy -- see commit 2c94b8eca1a2
("SUNRPC: Use au_rslack when computing reply buffer size").  In fact, there
may only be room in the tail for a single copy of the MIC, and not part of
the MIC and then another complete copy.

The real world failure reported is that `ls` of a directory on NFS may
sometimes return -EIO, which can be traced back to xdr_buf_read_netobj()
failing to find available free space in the tail to copy the MIC.

Fix this by checking for the case of the MIC crossing the boundaries of
head, pages, and tail. If so, shift the buffer until the MIC is contained
completely within the pages or tail.  This allows the remainder of the
function to create a sub buffer that directly address the complete MIC.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Cc: stable@vger.kernel.org
---
 net/sunrpc/xdr.c | 45 +++++++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 18 deletions(-)

Comments

Chuck Lever Sept. 13, 2019, 2:41 p.m. UTC | #1
> On Sep 12, 2019, at 1:07 PM, Benjamin Coddington <bcodding@redhat.com> wrote:
> 
> The GSS Message Integrity Check data for krb5i may lie partially in the XDR
> reply buffer's pages and tail.  If so, we try to copy the entire MIC into
> free space in the tail.  But as the estimations of the slack space required
> for authentication and verification have improved there may be less free
> space in the tail to complete this copy -- see commit 2c94b8eca1a2
> ("SUNRPC: Use au_rslack when computing reply buffer size").  In fact, there
> may only be room in the tail for a single copy of the MIC, and not part of
> the MIC and then another complete copy.
> 
> The real world failure reported is that `ls` of a directory on NFS may
> sometimes return -EIO, which can be traced back to xdr_buf_read_netobj()
> failing to find available free space in the tail to copy the MIC.
> 
> Fix this by checking for the case of the MIC crossing the boundaries of
> head, pages, and tail. If so, shift the buffer until the MIC is contained
> completely within the pages or tail.  This allows the remainder of the
> function to create a sub buffer that directly address the complete MIC.
> 
> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> Cc: stable@vger.kernel.org

# v5.1 ?


> ---
> net/sunrpc/xdr.c | 45 +++++++++++++++++++++++++++------------------
> 1 file changed, 27 insertions(+), 18 deletions(-)
> 
> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
> index 48c93b9e525e..6e05a9693568 100644
> --- a/net/sunrpc/xdr.c
> +++ b/net/sunrpc/xdr.c
> @@ -1237,39 +1237,48 @@ xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
> EXPORT_SYMBOL_GPL(xdr_encode_word);
> 
> /* If the netobj starting offset bytes from the start of xdr_buf is contained
> - * entirely in the head or the tail, set object to point to it; otherwise
> - * try to find space for it at the end of the tail, copy it there, and
> - * set obj to point to it. */
> + * entirely in the head, pages, or tail, set object to point to it; otherwise
> + * shift the buffer until it is contained entirely within the pages or tail.
> + */
> int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned int offset)
> {
> 	struct xdr_buf subbuf;
> +	unsigned int len_to_boundary;
> 
> 	if (xdr_decode_word(buf, offset, &obj->len))
> 		return -EFAULT;
> -	if (xdr_buf_subsegment(buf, &subbuf, offset + 4, obj->len))
> +
> +	offset += 4;
> +
> +	/* Is the obj partially in the head? */
> +	len_to_boundary = buf->head->iov_len - offset;
> +	if (len_to_boundary > 0 && len_to_boundary < obj->len)
> +		xdr_shift_buf(buf, len_to_boundary);
> +
> +	/* Is the obj partially in the pages? */
> +	len_to_boundary = buf->head->iov_len + buf->page_len - offset;
> +	if (len_to_boundary > 0 && len_to_boundary < obj->len)
> +		xdr_shrink_pagelen(buf, len_to_boundary);

Do you need to check if the obj is entirely in ->pages but crosses a page boundary?


> +
> +	if (xdr_buf_subsegment(buf, &subbuf, offset, obj->len))
> 		return -EFAULT;
> 
> -	/* Is the obj contained entirely in the head? */
> -	obj->data = subbuf.head[0].iov_base;
> -	if (subbuf.head[0].iov_len == obj->len)
> -		return 0;
> -	/* ..or is the obj contained entirely in the tail? */
> +	/* Most likely: is the obj contained entirely in the tail? */
> 	obj->data = subbuf.tail[0].iov_base;
> 	if (subbuf.tail[0].iov_len == obj->len)
> 		return 0;
> 
> -	/* use end of tail as storage for obj:
> -	 * (We don't copy to the beginning because then we'd have
> -	 * to worry about doing a potentially overlapping copy.
> -	 * This assumes the object is at most half the length of the
> -	 * tail.) */
> +	/* ..or is the obj contained entirely in the head? */
> +	obj->data = subbuf.head[0].iov_base;
> +	if (subbuf.head[0].iov_len == obj->len)
> +		return 0;
> +
> +	/* obj is in the pages: move to tail */
> 	if (obj->len > buf->buflen - buf->len)
> 		return -ENOMEM;
> -	if (buf->tail[0].iov_len != 0)
> -		obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
> -	else
> -		obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
> +	obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
> 	__read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len);
> +
> 	return 0;
> }
> EXPORT_SYMBOL_GPL(xdr_buf_read_netobj);
> -- 
> 2.20.1
> 

--
Chuck Lever
Benjamin Coddington Sept. 13, 2019, 2:49 p.m. UTC | #2
On 13 Sep 2019, at 10:41, Chuck Lever wrote:

>> On Sep 12, 2019, at 1:07 PM, Benjamin Coddington 
>> <bcodding@redhat.com> wrote:
>>
>> The GSS Message Integrity Check data for krb5i may lie partially in 
>> the XDR
>> reply buffer's pages and tail.  If so, we try to copy the entire MIC 
>> into
>> free space in the tail.  But as the estimations of the slack space 
>> required
>> for authentication and verification have improved there may be less 
>> free
>> space in the tail to complete this copy -- see commit 2c94b8eca1a2
>> ("SUNRPC: Use au_rslack when computing reply buffer size").  In fact, 
>> there
>> may only be room in the tail for a single copy of the MIC, and not 
>> part of
>> the MIC and then another complete copy.
>>
>> The real world failure reported is that `ls` of a directory on NFS 
>> may
>> sometimes return -EIO, which can be traced back to 
>> xdr_buf_read_netobj()
>> failing to find available free space in the tail to copy the MIC.
>>
>> Fix this by checking for the case of the MIC crossing the boundaries 
>> of
>> head, pages, and tail. If so, shift the buffer until the MIC is 
>> contained
>> completely within the pages or tail.  This allows the remainder of 
>> the
>> function to create a sub buffer that directly address the complete 
>> MIC.
>>
>> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
>> Cc: stable@vger.kernel.org
>
> # v5.1 ?

That makes sense to match the changes to rslack.

>> ---
>> net/sunrpc/xdr.c | 45 +++++++++++++++++++++++++++------------------
>> 1 file changed, 27 insertions(+), 18 deletions(-)
>>
>> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
>> index 48c93b9e525e..6e05a9693568 100644
>> --- a/net/sunrpc/xdr.c
>> +++ b/net/sunrpc/xdr.c
>> @@ -1237,39 +1237,48 @@ xdr_encode_word(struct xdr_buf *buf, unsigned 
>> int base, u32 obj)
>> EXPORT_SYMBOL_GPL(xdr_encode_word);
>>
>> /* If the netobj starting offset bytes from the start of xdr_buf is 
>> contained
>> - * entirely in the head or the tail, set object to point to it; 
>> otherwise
>> - * try to find space for it at the end of the tail, copy it there, 
>> and
>> - * set obj to point to it. */
>> + * entirely in the head, pages, or tail, set object to point to it; 
>> otherwise
>> + * shift the buffer until it is contained entirely within the pages 
>> or tail.
>> + */
>> int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, 
>> unsigned int offset)
>> {
>> 	struct xdr_buf subbuf;
>> +	unsigned int len_to_boundary;
>>
>> 	if (xdr_decode_word(buf, offset, &obj->len))
>> 		return -EFAULT;
>> -	if (xdr_buf_subsegment(buf, &subbuf, offset + 4, obj->len))
>> +
>> +	offset += 4;
>> +
>> +	/* Is the obj partially in the head? */
>> +	len_to_boundary = buf->head->iov_len - offset;
>> +	if (len_to_boundary > 0 && len_to_boundary < obj->len)
>> +		xdr_shift_buf(buf, len_to_boundary);
>> +
>> +	/* Is the obj partially in the pages? */
>> +	len_to_boundary = buf->head->iov_len + buf->page_len - offset;
>> +	if (len_to_boundary > 0 && len_to_boundary < obj->len)
>> +		xdr_shrink_pagelen(buf, len_to_boundary);
>
> Do you need to check if the obj is entirely in ->pages but crosses a 
> page boundary?

We're going to copy it out into the tail in that case.  I'm assuming
read_bytes_from_xdr_buf() can handle reading across page boundaries.

So unless I'm missing something, I don't think we need to check.

Ben
Chuck Lever Sept. 13, 2019, 4:05 p.m. UTC | #3
Hi Ben-

A few review comments below.


> On Sep 12, 2019, at 1:07 PM, Benjamin Coddington <bcodding@redhat.com> wrote:
> 
> The GSS Message Integrity Check data for krb5i may lie partially in the XDR
> reply buffer's pages and tail.  If so, we try to copy the entire MIC into
> free space in the tail.  But as the estimations of the slack space required
> for authentication and verification have improved there may be less free
> space in the tail to complete this copy -- see commit 2c94b8eca1a2
> ("SUNRPC: Use au_rslack when computing reply buffer size").  In fact, there
> may only be room in the tail for a single copy of the MIC, and not part of
> the MIC and then another complete copy.
> 
> The real world failure reported is that `ls` of a directory on NFS may
> sometimes return -EIO, which can be traced back to xdr_buf_read_netobj()
> failing to find available free space in the tail to copy the MIC.
> 
> Fix this by checking for the case of the MIC crossing the boundaries of
> head, pages, and tail. If so, shift the buffer until the MIC is contained
> completely within the pages or tail.  This allows the remainder of the
> function to create a sub buffer that directly address the complete MIC.
> 
> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> Cc: stable@vger.kernel.org
> ---
> net/sunrpc/xdr.c | 45 +++++++++++++++++++++++++++------------------
> 1 file changed, 27 insertions(+), 18 deletions(-)
> 
> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
> index 48c93b9e525e..6e05a9693568 100644
> --- a/net/sunrpc/xdr.c
> +++ b/net/sunrpc/xdr.c
> @@ -1237,39 +1237,48 @@ xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
> EXPORT_SYMBOL_GPL(xdr_encode_word);
> 
> /* If the netobj starting offset bytes from the start of xdr_buf is contained
> - * entirely in the head or the tail, set object to point to it; otherwise
> - * try to find space for it at the end of the tail, copy it there, and
> - * set obj to point to it. */
> + * entirely in the head, pages, or tail, set object to point to it; otherwise
> + * shift the buffer until it is contained entirely within the pages or tail.
> + */

Nit: I would explicitly note in this comment that there is no need
for the caller to free @obj, and perhaps it should be noted that
the organization of @buf can be changed by this function.

Maybe more appropriate for 2/2.


> int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned int offset)
> {
> 	struct xdr_buf subbuf;
> +	unsigned int len_to_boundary;
> 
> 	if (xdr_decode_word(buf, offset, &obj->len))
> 		return -EFAULT;
> -	if (xdr_buf_subsegment(buf, &subbuf, offset + 4, obj->len))
> +
> +	offset += 4;

Nit: No blank line before "offset += 4;" would help me understand
how the offset bump is related to xdr_decode_word(). It took me
a few blinks to see.


> +
> +	/* Is the obj partially in the head? */
> +	len_to_boundary = buf->head->iov_len - offset;
> +	if (len_to_boundary > 0 && len_to_boundary < obj->len)

I'm not especially excited about the integer underflow when offset
is larger than buf->head->iov_len. This might be more explicit:

        if (offset < buf->head[0].iov_len &&
            offset + obj->len > buf->head[0].iov_len)

> +		xdr_shift_buf(buf, len_to_boundary);
> +
> +	/* Is the obj partially in the pages? */
> +	len_to_boundary = buf->head->iov_len + buf->page_len - offset;
> +	if (len_to_boundary > 0 && len_to_boundary < obj->len)

Ditto.


> +		xdr_shrink_pagelen(buf, len_to_boundary);
> +
> +	if (xdr_buf_subsegment(buf, &subbuf, offset, obj->len))
> 		return -EFAULT;
> 
> -	/* Is the obj contained entirely in the head? */
> -	obj->data = subbuf.head[0].iov_base;
> -	if (subbuf.head[0].iov_len == obj->len)
> -		return 0;
> -	/* ..or is the obj contained entirely in the tail? */
> +	/* Most likely: is the obj contained entirely in the tail? */
> 	obj->data = subbuf.tail[0].iov_base;
> 	if (subbuf.tail[0].iov_len == obj->len)
> 		return 0;
> 
> -	/* use end of tail as storage for obj:
> -	 * (We don't copy to the beginning because then we'd have
> -	 * to worry about doing a potentially overlapping copy.
> -	 * This assumes the object is at most half the length of the
> -	 * tail.) */
> +	/* ..or is the obj contained entirely in the head? */
> +	obj->data = subbuf.head[0].iov_base;
> +	if (subbuf.head[0].iov_len == obj->len)
> +		return 0;

It looks like you're reversing these two tests as a micro-optimization?
Maybe that should be left for another patch, since this is supposed to
be a narrow fix.

Also, I found the new comments confusing: here they refer to the head
and tail of @subbuf; above they refer to head and tail of @buf. Note for
2/2, I guess.


> +
> +	/* obj is in the pages: move to tail */
> 	if (obj->len > buf->buflen - buf->len)
> 		return -ENOMEM;
> -	if (buf->tail[0].iov_len != 0)
> -		obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
> -	else
> -		obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
> +	obj->data = buf->head[0].iov_base + buf->head[0].iov_len;

Not sure this is a safe change. It's possible that the head buffer
and tail buffer are not contiguous, which is what the buf->tail.iov_len
check is looking for, IMO. Can this hunk be left out?


> 	__read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len);
> +
> 	return 0;
> }
> EXPORT_SYMBOL_GPL(xdr_buf_read_netobj);
> -- 
> 2.20.1
> 

--
Chuck Lever
Benjamin Coddington Sept. 13, 2019, 5:39 p.m. UTC | #4
On 13 Sep 2019, at 12:05, Chuck Lever wrote:

> Hi Ben-
>
> A few review comments below.
>
>
>> On Sep 12, 2019, at 1:07 PM, Benjamin Coddington 
>> <bcodding@redhat.com> wrote:
>>
>> The GSS Message Integrity Check data for krb5i may lie partially in 
>> the XDR
>> reply buffer's pages and tail.  If so, we try to copy the entire MIC 
>> into
>> free space in the tail.  But as the estimations of the slack space 
>> required
>> for authentication and verification have improved there may be less 
>> free
>> space in the tail to complete this copy -- see commit 2c94b8eca1a2
>> ("SUNRPC: Use au_rslack when computing reply buffer size").  In fact, 
>> there
>> may only be room in the tail for a single copy of the MIC, and not 
>> part of
>> the MIC and then another complete copy.
>>
>> The real world failure reported is that `ls` of a directory on NFS 
>> may
>> sometimes return -EIO, which can be traced back to 
>> xdr_buf_read_netobj()
>> failing to find available free space in the tail to copy the MIC.
>>
>> Fix this by checking for the case of the MIC crossing the boundaries 
>> of
>> head, pages, and tail. If so, shift the buffer until the MIC is 
>> contained
>> completely within the pages or tail.  This allows the remainder of 
>> the
>> function to create a sub buffer that directly address the complete 
>> MIC.
>>
>> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
>> Cc: stable@vger.kernel.org
>> ---
>> net/sunrpc/xdr.c | 45 +++++++++++++++++++++++++++------------------
>> 1 file changed, 27 insertions(+), 18 deletions(-)
>>
>> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
>> index 48c93b9e525e..6e05a9693568 100644
>> --- a/net/sunrpc/xdr.c
>> +++ b/net/sunrpc/xdr.c
>> @@ -1237,39 +1237,48 @@ xdr_encode_word(struct xdr_buf *buf, unsigned 
>> int base, u32 obj)
>> EXPORT_SYMBOL_GPL(xdr_encode_word);
>>
>> /* If the netobj starting offset bytes from the start of xdr_buf is 
>> contained
>> - * entirely in the head or the tail, set object to point to it; 
>> otherwise
>> - * try to find space for it at the end of the tail, copy it there, 
>> and
>> - * set obj to point to it. */
>> + * entirely in the head, pages, or tail, set object to point to it; 
>> otherwise
>> + * shift the buffer until it is contained entirely within the pages 
>> or tail.
>> + */
>
> Nit: I would explicitly note in this comment that there is no need
> for the caller to free @obj, and perhaps it should be noted that
> the organization of @buf can be changed by this function.
>
> Maybe more appropriate for 2/2.

Ok.. yes.. though I don't feel strongly about noting that *obj doesn't
need to be freed.


>> int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, 
>> unsigned int offset)
>> {
>> 	struct xdr_buf subbuf;
>> +	unsigned int len_to_boundary;
>>
>> 	if (xdr_decode_word(buf, offset, &obj->len))
>> 		return -EFAULT;
>> -	if (xdr_buf_subsegment(buf, &subbuf, offset + 4, obj->len))
>> +
>> +	offset += 4;
>
> Nit: No blank line before "offset += 4;" would help me understand
> how the offset bump is related to xdr_decode_word(). It took me
> a few blinks to see.
>
>
>> +
>> +	/* Is the obj partially in the head? */
>> +	len_to_boundary = buf->head->iov_len - offset;
>> +	if (len_to_boundary > 0 && len_to_boundary < obj->len)
>
> I'm not especially excited about the integer underflow when offset
> is larger than buf->head->iov_len. This might be more explicit:
>
>         if (offset < buf->head[0].iov_len &&
>             offset + obj->len > buf->head[0].iov_len)

Yep, makes sense - and I prefer the clarity.

>> +		xdr_shift_buf(buf, len_to_boundary);
>> +
>> +	/* Is the obj partially in the pages? */
>> +	len_to_boundary = buf->head->iov_len + buf->page_len - offset;
>> +	if (len_to_boundary > 0 && len_to_boundary < obj->len)
>
> Ditto.
>
>
>> +		xdr_shrink_pagelen(buf, len_to_boundary);
>> +
>> +	if (xdr_buf_subsegment(buf, &subbuf, offset, obj->len))
>> 		return -EFAULT;
>>
>> -	/* Is the obj contained entirely in the head? */
>> -	obj->data = subbuf.head[0].iov_base;
>> -	if (subbuf.head[0].iov_len == obj->len)
>> -		return 0;
>> -	/* ..or is the obj contained entirely in the tail? */
>> +	/* Most likely: is the obj contained entirely in the tail? */
>> 	obj->data = subbuf.tail[0].iov_base;
>> 	if (subbuf.tail[0].iov_len == obj->len)
>> 		return 0;
>>
>> -	/* use end of tail as storage for obj:
>> -	 * (We don't copy to the beginning because then we'd have
>> -	 * to worry about doing a potentially overlapping copy.
>> -	 * This assumes the object is at most half the length of the
>> -	 * tail.) */
>> +	/* ..or is the obj contained entirely in the head? */
>> +	obj->data = subbuf.head[0].iov_base;
>> +	if (subbuf.head[0].iov_len == obj->len)
>> +		return 0;
>
> It looks like you're reversing these two tests as a 
> micro-optimization?
> Maybe that should be left for another patch, since this is supposed to
> be a narrow fix.

Yes, if not done here - is it even worth another patch?

> Also, I found the new comments confusing: here they refer to the head
> and tail of @subbuf; above they refer to head and tail of @buf. Note 
> for
> 2/2, I guess.

I can clarify in 2/2.

>> +
>> +	/* obj is in the pages: move to tail */
>> 	if (obj->len > buf->buflen - buf->len)
>> 		return -ENOMEM;
>> -	if (buf->tail[0].iov_len != 0)
>> -		obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
>> -	else
>> -		obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
>> +	obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
>
> Not sure this is a safe change. It's possible that the head buffer
> and tail buffer are not contiguous, which is what the 
> buf->tail.iov_len
> check is looking for, IMO. Can this hunk be left out?

That's something I missed somehow, thanks for pointing it out.  I see 
now
that the transport can allocate them any way it likes.

Thanks for the review, I'll send a v2.

Ben
Benjamin Coddington Sept. 15, 2019, 2:08 p.m. UTC | #5
On 13 Sep 2019, at 13:39, Benjamin Coddington wrote:

> On 13 Sep 2019, at 12:05, Chuck Lever wrote:
>
>> Hi Ben-
>>
>> A few review comments below.
>>
>>
>>> On Sep 12, 2019, at 1:07 PM, Benjamin Coddington 
>>> <bcodding@redhat.com> wrote:
>>>
>>> The GSS Message Integrity Check data for krb5i may lie partially in 
>>> the XDR
>>> reply buffer's pages and tail.  If so, we try to copy the entire MIC 
>>> into
>>> free space in the tail.  But as the estimations of the slack space 
>>> required
>>> for authentication and verification have improved there may be less 
>>> free
>>> space in the tail to complete this copy -- see commit 2c94b8eca1a2
>>> ("SUNRPC: Use au_rslack when computing reply buffer size").  In 
>>> fact, there
>>> may only be room in the tail for a single copy of the MIC, and not 
>>> part of
>>> the MIC and then another complete copy.
>>>
>>> The real world failure reported is that `ls` of a directory on NFS 
>>> may
>>> sometimes return -EIO, which can be traced back to 
>>> xdr_buf_read_netobj()
>>> failing to find available free space in the tail to copy the MIC.
>>>
>>> Fix this by checking for the case of the MIC crossing the boundaries 
>>> of
>>> head, pages, and tail. If so, shift the buffer until the MIC is 
>>> contained
>>> completely within the pages or tail.  This allows the remainder of 
>>> the
>>> function to create a sub buffer that directly address the complete 
>>> MIC.
>>>
>>> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
>>> Cc: stable@vger.kernel.org
>>> ---
>>> net/sunrpc/xdr.c | 45 +++++++++++++++++++++++++++------------------
>>> 1 file changed, 27 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
>>> index 48c93b9e525e..6e05a9693568 100644
>>> --- a/net/sunrpc/xdr.c
>>> +++ b/net/sunrpc/xdr.c
>>> @@ -1237,39 +1237,48 @@ xdr_encode_word(struct xdr_buf *buf, 
>>> unsigned int base, u32 obj)
>>> EXPORT_SYMBOL_GPL(xdr_encode_word);
>>>
>>> /* If the netobj starting offset bytes from the start of xdr_buf is 
>>> contained
>>> - * entirely in the head or the tail, set object to point to it; 
>>> otherwise
>>> - * try to find space for it at the end of the tail, copy it there, 
>>> and
>>> - * set obj to point to it. */
>>> + * entirely in the head, pages, or tail, set object to point to it; 
>>> otherwise
>>> + * shift the buffer until it is contained entirely within the pages 
>>> or tail.
>>> + */
>>
>> Nit: I would explicitly note in this comment that there is no need
>> for the caller to free @obj, and perhaps it should be noted that
>> the organization of @buf can be changed by this function.
>>
>> Maybe more appropriate for 2/2.
>
> Ok.. yes.. though I don't feel strongly about noting that *obj doesn't
> need to be freed.
>
>
>>> int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, 
>>> unsigned int offset)
>>> {
>>> 	struct xdr_buf subbuf;
>>> +	unsigned int len_to_boundary;
>>>
>>> 	if (xdr_decode_word(buf, offset, &obj->len))
>>> 		return -EFAULT;
>>> -	if (xdr_buf_subsegment(buf, &subbuf, offset + 4, obj->len))
>>> +
>>> +	offset += 4;
>>
>> Nit: No blank line before "offset += 4;" would help me understand
>> how the offset bump is related to xdr_decode_word(). It took me
>> a few blinks to see.
>>
>>
>>> +
>>> +	/* Is the obj partially in the head? */
>>> +	len_to_boundary = buf->head->iov_len - offset;
>>> +	if (len_to_boundary > 0 && len_to_boundary < obj->len)
>>
>> I'm not especially excited about the integer underflow when offset
>> is larger than buf->head->iov_len. This might be more explicit:
>>
>>         if (offset < buf->head[0].iov_len &&
>>             offset + obj->len > buf->head[0].iov_len)
>
> Yep, makes sense - and I prefer the clarity.
>
>>> +		xdr_shift_buf(buf, len_to_boundary);
>>> +
>>> +	/* Is the obj partially in the pages? */
>>> +	len_to_boundary = buf->head->iov_len + buf->page_len - offset;
>>> +	if (len_to_boundary > 0 && len_to_boundary < obj->len)
>>
>> Ditto.
>>
>>
>>> +		xdr_shrink_pagelen(buf, len_to_boundary);
>>> +
>>> +	if (xdr_buf_subsegment(buf, &subbuf, offset, obj->len))
>>> 		return -EFAULT;
>>>
>>> -	/* Is the obj contained entirely in the head? */
>>> -	obj->data = subbuf.head[0].iov_base;
>>> -	if (subbuf.head[0].iov_len == obj->len)
>>> -		return 0;
>>> -	/* ..or is the obj contained entirely in the tail? */
>>> +	/* Most likely: is the obj contained entirely in the tail? */
>>> 	obj->data = subbuf.tail[0].iov_base;
>>> 	if (subbuf.tail[0].iov_len == obj->len)
>>> 		return 0;
>>>
>>> -	/* use end of tail as storage for obj:
>>> -	 * (We don't copy to the beginning because then we'd have
>>> -	 * to worry about doing a potentially overlapping copy.
>>> -	 * This assumes the object is at most half the length of the
>>> -	 * tail.) */
>>> +	/* ..or is the obj contained entirely in the head? */
>>> +	obj->data = subbuf.head[0].iov_base;
>>> +	if (subbuf.head[0].iov_len == obj->len)
>>> +		return 0;
>>
>> It looks like you're reversing these two tests as a 
>> micro-optimization?
>> Maybe that should be left for another patch, since this is supposed 
>> to
>> be a narrow fix.
>
> Yes, if not done here - is it even worth another patch?
>
>> Also, I found the new comments confusing: here they refer to the head
>> and tail of @subbuf; above they refer to head and tail of @buf. Note 
>> for
>> 2/2, I guess.
>
> I can clarify in 2/2.
>
>>> +
>>> +	/* obj is in the pages: move to tail */
>>> 	if (obj->len > buf->buflen - buf->len)
>>> 		return -ENOMEM;
>>> -	if (buf->tail[0].iov_len != 0)
>>> -		obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
>>> -	else
>>> -		obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
>>> +	obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
>>
>> Not sure this is a safe change. It's possible that the head buffer
>> and tail buffer are not contiguous, which is what the 
>> buf->tail.iov_len
>> check is looking for, IMO. Can this hunk be left out?
>
> That's something I missed somehow, thanks for pointing it out.  I see 
> now
> that the transport can allocate them any way it likes.

Just looking at this again today -- we can definitely keep the check, 
but
the second half of the statement also assumes a contiguous head/tail 
range.
I think it's safe to just remove the test altogether and place the 
netobj at
the end of the tail.  Then in 2/2, we'll just place it at the beginning 
of
the tail because the function is specialized for the mic.

Ben

Patch
diff mbox series

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 48c93b9e525e..6e05a9693568 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1237,39 +1237,48 @@  xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
 EXPORT_SYMBOL_GPL(xdr_encode_word);
 
 /* If the netobj starting offset bytes from the start of xdr_buf is contained
- * entirely in the head or the tail, set object to point to it; otherwise
- * try to find space for it at the end of the tail, copy it there, and
- * set obj to point to it. */
+ * entirely in the head, pages, or tail, set object to point to it; otherwise
+ * shift the buffer until it is contained entirely within the pages or tail.
+ */
 int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned int offset)
 {
 	struct xdr_buf subbuf;
+	unsigned int len_to_boundary;
 
 	if (xdr_decode_word(buf, offset, &obj->len))
 		return -EFAULT;
-	if (xdr_buf_subsegment(buf, &subbuf, offset + 4, obj->len))
+
+	offset += 4;
+
+	/* Is the obj partially in the head? */
+	len_to_boundary = buf->head->iov_len - offset;
+	if (len_to_boundary > 0 && len_to_boundary < obj->len)
+		xdr_shift_buf(buf, len_to_boundary);
+
+	/* Is the obj partially in the pages? */
+	len_to_boundary = buf->head->iov_len + buf->page_len - offset;
+	if (len_to_boundary > 0 && len_to_boundary < obj->len)
+		xdr_shrink_pagelen(buf, len_to_boundary);
+
+	if (xdr_buf_subsegment(buf, &subbuf, offset, obj->len))
 		return -EFAULT;
 
-	/* Is the obj contained entirely in the head? */
-	obj->data = subbuf.head[0].iov_base;
-	if (subbuf.head[0].iov_len == obj->len)
-		return 0;
-	/* ..or is the obj contained entirely in the tail? */
+	/* Most likely: is the obj contained entirely in the tail? */
 	obj->data = subbuf.tail[0].iov_base;
 	if (subbuf.tail[0].iov_len == obj->len)
 		return 0;
 
-	/* use end of tail as storage for obj:
-	 * (We don't copy to the beginning because then we'd have
-	 * to worry about doing a potentially overlapping copy.
-	 * This assumes the object is at most half the length of the
-	 * tail.) */
+	/* ..or is the obj contained entirely in the head? */
+	obj->data = subbuf.head[0].iov_base;
+	if (subbuf.head[0].iov_len == obj->len)
+		return 0;
+
+	/* obj is in the pages: move to tail */
 	if (obj->len > buf->buflen - buf->len)
 		return -ENOMEM;
-	if (buf->tail[0].iov_len != 0)
-		obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
-	else
-		obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
+	obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
 	__read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xdr_buf_read_netobj);