diff mbox series

[v2] skb_expand_head() adjust skb->truesize incorrectly

Message ID 860513d5-fd02-832b-1c4c-ea2b17477d76@virtuozzo.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [v2] skb_expand_head() adjust skb->truesize incorrectly | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Guessed tree name to be net-next
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cc_maintainers warning 6 maintainers not CCed: alobakin@pm.me jonathan.lemon@gmail.com pabeni@redhat.com gnault@redhat.com cong.wang@bytedance.com willemb@google.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit fail Errors and warnings before: 1 this patch: 3
netdev/kdoc fail Errors and warnings before: 0 this patch: 2
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 82 lines checked
netdev/build_allmodconfig_warn fail Errors and warnings before: 1 this patch: 3
netdev/header_inline success Link

Commit Message

Vasily Averin Aug. 29, 2021, 12:59 p.m. UTC
Christoph Paasch reports [1] about incorrect skb->truesize
after skb_expand_head() call in ip6_xmit.
This may happen because of two reasons:
- skb_set_owner_w() for newly cloned skb is called too early,
before pskb_expand_head() where truesize is adjusted for (!skb-sk) case.
- pskb_expand_head() does not adjust truesize in (skb->sk) case.
In this case sk->sk_wmem_alloc should be adjusted too.

[1] https://lkml.org/lkml/2021/8/20/1082

Reported-by: Christoph Paasch <christoph.paasch@gmail.com>
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
---
v2: based on patch version from Eric Dumazet,
    added __pskb_expand_head() function, which can be forced
    to adjust skb->truesize and sk->sk_wmem_alloc.
---
 net/core/skbuff.c | 43 +++++++++++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 14 deletions(-)

Comments

Vasily Averin Aug. 30, 2021, 5:52 a.m. UTC | #1
1) I forgot to specify that the patch is intended fro net-next git
2) I forgot to ad Alexey Kuznetsov in cc. I resend the patch to him 
  in a separate letter and received his consent.
3) I forgot to set Fixed mark
Fixes: f1260ff15a71 ("skbuff: introduce skb_expand_head()")

Thank you,
	Vasily Averin

On 8/29/21 3:59 PM, Vasily Averin wrote:
> Christoph Paasch reports [1] about incorrect skb->truesize
> after skb_expand_head() call in ip6_xmit.
> This may happen because of two reasons:
> - skb_set_owner_w() for newly cloned skb is called too early,
> before pskb_expand_head() where truesize is adjusted for (!skb-sk) case.
> - pskb_expand_head() does not adjust truesize in (skb->sk) case.
> In this case sk->sk_wmem_alloc should be adjusted too.
> 
> [1] https://lkml.org/lkml/2021/8/20/1082
> 
> Reported-by: Christoph Paasch <christoph.paasch@gmail.com>
> Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
> ---
> v2: based on patch version from Eric Dumazet,
>     added __pskb_expand_head() function, which can be forced
>     to adjust skb->truesize and sk->sk_wmem_alloc.
> ---
>  net/core/skbuff.c | 43 +++++++++++++++++++++++++++++--------------
>  1 file changed, 29 insertions(+), 14 deletions(-)
> 
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index f931176..4691023 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -1681,10 +1681,10 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
>   *	reloaded after call to this function.
>   */
>  
> -int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
> -		     gfp_t gfp_mask)
> +static int __pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
> +			      gfp_t gfp_mask, bool update_truesize)
>  {
> -	int i, osize = skb_end_offset(skb);
> +	int delta, i, osize = skb_end_offset(skb);
>  	int size = osize + nhead + ntail;
>  	long off;
>  	u8 *data;
> @@ -1756,9 +1756,13 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
>  	 * For the moment, we really care of rx path, or
>  	 * when skb is orphaned (not attached to a socket).
>  	 */
> -	if (!skb->sk || skb->destructor == sock_edemux)
> -		skb->truesize += size - osize;
> -
> +	delta = size - osize;
> +	if (!skb->sk || skb->destructor == sock_edemux) {
> +		skb->truesize += delta;
> +	} else if (update_truesize) {
> +		refcount_add(delta, &skb->sk->sk_wmem_alloc);
> +		skb->truesize += delta;
> +	}
>  	return 0;
>  
>  nofrags:
> @@ -1766,6 +1770,12 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
>  nodata:
>  	return -ENOMEM;
>  }
> +
> +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
> +		     gfp_t gfp_mask)
> +{
> +	return __pskb_expand_head(skb, nhead, ntail, gfp_mask, false);
> +}
>  EXPORT_SYMBOL(pskb_expand_head);
>  
>  /* Make private copy of skb with writable head and some headroom */
> @@ -1804,28 +1814,33 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
>  struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
>  {
>  	int delta = headroom - skb_headroom(skb);
> +	struct sk_buff *oskb = NULL;
>  
>  	if (WARN_ONCE(delta <= 0,
>  		      "%s is expecting an increase in the headroom", __func__))
>  		return skb;
>  
> +	delta = SKB_DATA_ALIGN(delta);
>  	/* pskb_expand_head() might crash, if skb is shared */
>  	if (skb_shared(skb)) {
>  		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
>  
> -		if (likely(nskb)) {
> -			if (skb->sk)
> -				skb_set_owner_w(nskb, skb->sk);
> -			consume_skb(skb);
> -		} else {
> +		if (unlikely(!nskb)) {
>  			kfree_skb(skb);
> +			return NULL;
>  		}
> +		oskb = skb;
>  		skb = nskb;
>  	}
> -	if (skb &&
> -	    pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
> +	if (__pskb_expand_head(skb, delta, 0, GFP_ATOMIC, true)) {
>  		kfree_skb(skb);
> -		skb = NULL;
> +		kfree_skb(oskb);
> +		return NULL;
> +	}
> +	if (oskb) {
> +		if (oskb->sk)
> +			skb_set_owner_w(skb, oskb->sk);
> +		consume_skb(oskb);
>  	}
>  	return skb;
>  }
>
Eric Dumazet Aug. 30, 2021, 4:01 p.m. UTC | #2
On 8/29/21 5:59 AM, Vasily Averin wrote:
> Christoph Paasch reports [1] about incorrect skb->truesize
> after skb_expand_head() call in ip6_xmit.
> This may happen because of two reasons:
> - skb_set_owner_w() for newly cloned skb is called too early,
> before pskb_expand_head() where truesize is adjusted for (!skb-sk) case.
> - pskb_expand_head() does not adjust truesize in (skb->sk) case.
> In this case sk->sk_wmem_alloc should be adjusted too.
> 
> [1] https://lkml.org/lkml/2021/8/20/1082
> 
> Reported-by: Christoph Paasch <christoph.paasch@gmail.com>
> Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
> ---
> v2: based on patch version from Eric Dumazet,
>     added __pskb_expand_head() function, which can be forced
>     to adjust skb->truesize and sk->sk_wmem_alloc.
> ---
>  net/core/skbuff.c | 43 +++++++++++++++++++++++++++++--------------
>  1 file changed, 29 insertions(+), 14 deletions(-)
> 
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index f931176..4691023 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -1681,10 +1681,10 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
>   *	reloaded after call to this function.
>   */
>  
> -int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
> -		     gfp_t gfp_mask)
> +static int __pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
> +			      gfp_t gfp_mask, bool update_truesize)
>  {
> -	int i, osize = skb_end_offset(skb);
> +	int delta, i, osize = skb_end_offset(skb);
>  	int size = osize + nhead + ntail;
>  	long off;
>  	u8 *data;
> @@ -1756,9 +1756,13 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
>  	 * For the moment, we really care of rx path, or
>  	 * when skb is orphaned (not attached to a socket).
>  	 */
> -	if (!skb->sk || skb->destructor == sock_edemux)
> -		skb->truesize += size - osize;
> -
> +	delta = size - osize;
> +	if (!skb->sk || skb->destructor == sock_edemux) {
> +		skb->truesize += delta;
> +	} else if (update_truesize) {

Unfortunately we can not always do this sk_wmem_alloc change here.

Some skb have skb->sk set, but the 'reference on socket' is not through sk_wmem_alloc

It seems you need a helper to make sure skb->destructor is one of
the destructors that use skb->truesize and sk->sk_wmem_alloc

For instance, skb_orphan_partial() could have been used.



> +		refcount_add(delta, &skb->sk->sk_wmem_alloc);
> +		skb->truesize += delta;
> +	}
>  	return 0;
>  
>  nofrags:
> @@ -1766,6 +1770,12 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
>  nodata:
>  	return -ENOMEM;
>  }
> +
> +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
> +		     gfp_t gfp_mask)
> +{
> +	return __pskb_expand_head(skb, nhead, ntail, gfp_mask, false);
> +}
>  EXPORT_SYMBOL(pskb_expand_head);
>  
>  /* Make private copy of skb with writable head and some headroom */
> @@ -1804,28 +1814,33 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
>  struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
>  {
>  	int delta = headroom - skb_headroom(skb);
> +	struct sk_buff *oskb = NULL;
>  
>  	if (WARN_ONCE(delta <= 0,
>  		      "%s is expecting an increase in the headroom", __func__))
>  		return skb;
>  
> +	delta = SKB_DATA_ALIGN(delta);
>  	/* pskb_expand_head() might crash, if skb is shared */
>  	if (skb_shared(skb)) {
>  		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
>  
> -		if (likely(nskb)) {
> -			if (skb->sk)
> -				skb_set_owner_w(nskb, skb->sk);
> -			consume_skb(skb);
> -		} else {
> +		if (unlikely(!nskb)) {
>  			kfree_skb(skb);
> +			return NULL;
>  		}
> +		oskb = skb;
>  		skb = nskb;
>  	}
> -	if (skb &&
> -	    pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
> +	if (__pskb_expand_head(skb, delta, 0, GFP_ATOMIC, true)) {
>  		kfree_skb(skb);
> -		skb = NULL;
> +		kfree_skb(oskb);
> +		return NULL;
> +	}
> +	if (oskb) {
> +		if (oskb->sk)
> +			skb_set_owner_w(skb, oskb->sk);
> +		consume_skb(oskb);
>  	}
>  	return skb;
>  }
>
Vasily Averin Aug. 30, 2021, 6:09 p.m. UTC | #3
On 8/30/21 7:01 PM, Eric Dumazet wrote:
> On 8/29/21 5:59 AM, Vasily Averin wrote:
>> Christoph Paasch reports [1] about incorrect skb->truesize
>> after skb_expand_head() call in ip6_xmit.
>> This may happen because of two reasons:
>> - skb_set_owner_w() for newly cloned skb is called too early,
>> before pskb_expand_head() where truesize is adjusted for (!skb-sk) case.
>> - pskb_expand_head() does not adjust truesize in (skb->sk) case.
>> In this case sk->sk_wmem_alloc should be adjusted too.
>>
>> [1] https://lkml.org/lkml/2021/8/20/1082
>> @@ -1756,9 +1756,13 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
>>  	 * For the moment, we really care of rx path, or
>>  	 * when skb is orphaned (not attached to a socket).
>>  	 */
>> -	if (!skb->sk || skb->destructor == sock_edemux)
>> -		skb->truesize += size - osize;
>> -
>> +	delta = size - osize;
>> +	if (!skb->sk || skb->destructor == sock_edemux) {
>> +		skb->truesize += delta;
>> +	} else if (update_truesize) {
> 
> Unfortunately we can not always do this sk_wmem_alloc change here.
> 
> Some skb have skb->sk set, but the 'reference on socket' is not through sk_wmem_alloc

Could you please provide some example?
In past in all handeled cases we have cloned original skb and then unconditionally assigned skb sock_wfree destructor.
Do you want to say that it worked correctly somehow?

I expected if we set sock_wfree, we have guarantee that old skb adjusted sk_wmem_alloc.
Am I wrong?
Could you please point on such case?

> It seems you need a helper to make sure skb->destructor is one of
> the destructors that use skb->truesize and sk->sk_wmem_alloc
> 
> For instance, skb_orphan_partial() could have been used.

Thank you, will investigate.
	Vasily Averin
Vasily Averin Aug. 30, 2021, 6:37 p.m. UTC | #4
On 8/30/21 9:09 PM, Vasily Averin wrote:
> On 8/30/21 7:01 PM, Eric Dumazet wrote:
>> On 8/29/21 5:59 AM, Vasily Averin wrote:
>>> Christoph Paasch reports [1] about incorrect skb->truesize
>>> after skb_expand_head() call in ip6_xmit.
>>> This may happen because of two reasons:
>>> - skb_set_owner_w() for newly cloned skb is called too early,
>>> before pskb_expand_head() where truesize is adjusted for (!skb-sk) case.
>>> - pskb_expand_head() does not adjust truesize in (skb->sk) case.
>>> In this case sk->sk_wmem_alloc should be adjusted too.
>>>
>>> [1] https://lkml.org/lkml/2021/8/20/1082
>>> @@ -1756,9 +1756,13 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
>>>  	 * For the moment, we really care of rx path, or
>>>  	 * when skb is orphaned (not attached to a socket).
>>>  	 */
>>> -	if (!skb->sk || skb->destructor == sock_edemux)
>>> -		skb->truesize += size - osize;
>>> -
>>> +	delta = size - osize;
>>> +	if (!skb->sk || skb->destructor == sock_edemux) {
>>> +		skb->truesize += delta;
>>> +	} else if (update_truesize) {
>>
>> Unfortunately we can not always do this sk_wmem_alloc change here.
>>
>> Some skb have skb->sk set, but the 'reference on socket' is not through sk_wmem_alloc
> 
> Could you please provide some example?
> In past in all handeled cases we have cloned original skb and then unconditionally assigned skb sock_wfree destructor.
> Do you want to say that it worked correctly somehow?
> 
> I expected if we set sock_wfree, we have guarantee that old skb adjusted sk_wmem_alloc.
> Am I wrong?
> Could you please point on such case?

However if it is true -- it is not enough to adjust sk_wmem_alloc for proper destructors,
because another destructors may require to do something else.
In this case I can check destructor first and clone skb before pskb_expand_head() call,
like it was happen before.

>> It seems you need a helper to make sure skb->destructor is one of
>> the destructors that use skb->truesize and sk->sk_wmem_alloc
>>
>> For instance, skb_orphan_partial() could have been used.
> 
> Thank you, will investigate.
> 	Vasily Averin
>
Eric Dumazet Aug. 30, 2021, 7:58 p.m. UTC | #5
On 8/30/21 11:09 AM, Vasily Averin wrote:
> On 8/30/21 7:01 PM, Eric Dumazet wrote:
>> On 8/29/21 5:59 AM, Vasily Averin wrote:
>>> Christoph Paasch reports [1] about incorrect skb->truesize
>>> after skb_expand_head() call in ip6_xmit.
>>> This may happen because of two reasons:
>>> - skb_set_owner_w() for newly cloned skb is called too early,
>>> before pskb_expand_head() where truesize is adjusted for (!skb-sk) case.
>>> - pskb_expand_head() does not adjust truesize in (skb->sk) case.
>>> In this case sk->sk_wmem_alloc should be adjusted too.
>>>
>>> [1] https://lkml.org/lkml/2021/8/20/1082
>>> @@ -1756,9 +1756,13 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
>>>  	 * For the moment, we really care of rx path, or
>>>  	 * when skb is orphaned (not attached to a socket).
>>>  	 */
>>> -	if (!skb->sk || skb->destructor == sock_edemux)
>>> -		skb->truesize += size - osize;
>>> -
>>> +	delta = size - osize;
>>> +	if (!skb->sk || skb->destructor == sock_edemux) {
>>> +		skb->truesize += delta;
>>> +	} else if (update_truesize) {
>>
>> Unfortunately we can not always do this sk_wmem_alloc change here.
>>
>> Some skb have skb->sk set, but the 'reference on socket' is not through sk_wmem_alloc
> 
> Could you please provide some example?
> In past in all handeled cases we have cloned original skb and then unconditionally assigned skb sock_wfree destructor.

In the past we ignored old value of skb->destructor,
since the clone got a NULL destructor.

In your patch you assumes it is sock_wfree, or other destructors changing sk_wmem_alloc


You need to make sure skb->destructor is one of the known destructors which 
will basically remove skb->truesize from sk->sk_wmem_alloc.

This will also make sure skb->sk is a 'full socket'

If not, you should not change sk->sk_wmem_alloc

> Do you want to say that it worked correctly somehow?

I am simply saying your patch adds a wrong assumption.

> 
> I expected if we set sock_wfree, we have guarantee that old skb adjusted sk_wmem_alloc.
> Am I wrong?
> Could you please point on such case?
> 
>> It seems you need a helper to make sure skb->destructor is one of
>> the destructors that use skb->truesize and sk->sk_wmem_alloc
>>
>> For instance, skb_orphan_partial() could have been used.
> 
> Thank you, will investigate.
> 	Vasily Averin
>
diff mbox series

Patch

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f931176..4691023 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1681,10 +1681,10 @@  struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
  *	reloaded after call to this function.
  */
 
-int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
-		     gfp_t gfp_mask)
+static int __pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
+			      gfp_t gfp_mask, bool update_truesize)
 {
-	int i, osize = skb_end_offset(skb);
+	int delta, i, osize = skb_end_offset(skb);
 	int size = osize + nhead + ntail;
 	long off;
 	u8 *data;
@@ -1756,9 +1756,13 @@  int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	 * For the moment, we really care of rx path, or
 	 * when skb is orphaned (not attached to a socket).
 	 */
-	if (!skb->sk || skb->destructor == sock_edemux)
-		skb->truesize += size - osize;
-
+	delta = size - osize;
+	if (!skb->sk || skb->destructor == sock_edemux) {
+		skb->truesize += delta;
+	} else if (update_truesize) {
+		refcount_add(delta, &skb->sk->sk_wmem_alloc);
+		skb->truesize += delta;
+	}
 	return 0;
 
 nofrags:
@@ -1766,6 +1770,12 @@  int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 nodata:
 	return -ENOMEM;
 }
+
+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
+		     gfp_t gfp_mask)
+{
+	return __pskb_expand_head(skb, nhead, ntail, gfp_mask, false);
+}
 EXPORT_SYMBOL(pskb_expand_head);
 
 /* Make private copy of skb with writable head and some headroom */
@@ -1804,28 +1814,33 @@  struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
 struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
 {
 	int delta = headroom - skb_headroom(skb);
+	struct sk_buff *oskb = NULL;
 
 	if (WARN_ONCE(delta <= 0,
 		      "%s is expecting an increase in the headroom", __func__))
 		return skb;
 
+	delta = SKB_DATA_ALIGN(delta);
 	/* pskb_expand_head() might crash, if skb is shared */
 	if (skb_shared(skb)) {
 		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
 
-		if (likely(nskb)) {
-			if (skb->sk)
-				skb_set_owner_w(nskb, skb->sk);
-			consume_skb(skb);
-		} else {
+		if (unlikely(!nskb)) {
 			kfree_skb(skb);
+			return NULL;
 		}
+		oskb = skb;
 		skb = nskb;
 	}
-	if (skb &&
-	    pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
+	if (__pskb_expand_head(skb, delta, 0, GFP_ATOMIC, true)) {
 		kfree_skb(skb);
-		skb = NULL;
+		kfree_skb(oskb);
+		return NULL;
+	}
+	if (oskb) {
+		if (oskb->sk)
+			skb_set_owner_w(skb, oskb->sk);
+		consume_skb(oskb);
 	}
 	return skb;
 }