diff mbox series

[RFC,net-next,v2,1/3] net: skb: plumb napi state thru skb freeing paths

Message ID 20230405232100.103392-2-kuba@kernel.org (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series page_pool: allow caching from safely localized NAPI | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 20 this patch: 20
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 20 this patch: 20
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 134 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Jakub Kicinski April 5, 2023, 11:20 p.m. UTC
We maintain a NAPI-local cache of skbs which is fed by napi_consume_skb().
Going forward we will also try to cache head and data pages.
Plumb the "are we in a normal NAPI context" information thru
deeper into the freeing path, up to skb_release_data() and
skb_free_head()/skb_pp_recycle().

Use "bool in_normal_napi" rather than bare "int budget",
the further we get from NAPI the more confusing the budget
argument may seem (particularly whether 0 or MAX is the
correct value to pass in when not in NAPI).

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/skbuff.c | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

Comments

Yunsheng Lin April 7, 2023, 9:15 a.m. UTC | #1
On 2023/4/6 7:20, Jakub Kicinski wrote:
> We maintain a NAPI-local cache of skbs which is fed by napi_consume_skb().
> Going forward we will also try to cache head and data pages.
> Plumb the "are we in a normal NAPI context" information thru
> deeper into the freeing path, up to skb_release_data() and
> skb_free_head()/skb_pp_recycle().
> 
> Use "bool in_normal_napi" rather than bare "int budget",
> the further we get from NAPI the more confusing the budget
> argument may seem (particularly whether 0 or MAX is the
> correct value to pass in when not in NAPI).
> 
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
>  net/core/skbuff.c | 38 ++++++++++++++++++++------------------
>  1 file changed, 20 insertions(+), 18 deletions(-)
> 
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 050a875d09c5..8d5377b4112f 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -839,7 +839,7 @@ static void skb_clone_fraglist(struct sk_buff *skb)
>  		skb_get(list);
>  }
>  
> -static bool skb_pp_recycle(struct sk_buff *skb, void *data)
> +static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool in_normal_napi)

What does *normal* means in 'in_normal_napi'?
can we just use in_napi?

>  {
>  	if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
>  		return false;
> @@ -856,12 +856,12 @@ static void skb_kfree_head(void *head, unsigned int end_offset)
>  		kfree(head);
>  }
>  
> -static void skb_free_head(struct sk_buff *skb)
> +static void skb_free_head(struct sk_buff *skb, bool in_normal_napi)
>  {
>  	unsigned char *head = skb->head;
>  
>  	if (skb->head_frag) {
> -		if (skb_pp_recycle(skb, head))
> +		if (skb_pp_recycle(skb, head, in_normal_napi))
>  			return;
>  		skb_free_frag(head);
>  	} else {
> @@ -869,7 +869,8 @@ static void skb_free_head(struct sk_buff *skb)
>  	}
>  }
>  
> -static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason)
> +static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
> +			     bool in_normal_napi)
>  {
>  	struct skb_shared_info *shinfo = skb_shinfo(skb);
>  	int i;
> @@ -894,7 +895,7 @@ static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason)
>  	if (shinfo->frag_list)
>  		kfree_skb_list_reason(shinfo->frag_list, reason);
>  
> -	skb_free_head(skb);
> +	skb_free_head(skb, in_normal_napi);
>  exit:
>  	/* When we clone an SKB we copy the reycling bit. The pp_recycle
>  	 * bit is only set on the head though, so in order to avoid races
> @@ -955,11 +956,12 @@ void skb_release_head_state(struct sk_buff *skb)
>  }
>  
>  /* Free everything but the sk_buff shell. */
> -static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason)
> +static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason,
> +			    bool in_normal_napi)
>  {
>  	skb_release_head_state(skb);
>  	if (likely(skb->head))
> -		skb_release_data(skb, reason);
> +		skb_release_data(skb, reason, in_normal_napi);
>  }
>  
>  /**
> @@ -973,7 +975,7 @@ static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason)
>  
>  void __kfree_skb(struct sk_buff *skb)
>  {
> -	skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED);
> +	skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED, false);
>  	kfree_skbmem(skb);
>  }
>  EXPORT_SYMBOL(__kfree_skb);
> @@ -1027,7 +1029,7 @@ static void kfree_skb_add_bulk(struct sk_buff *skb,
>  		return;
>  	}
>  
> -	skb_release_all(skb, reason);
> +	skb_release_all(skb, reason, false);
>  	sa->skb_array[sa->skb_count++] = skb;
>  
>  	if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) {
> @@ -1201,7 +1203,7 @@ EXPORT_SYMBOL(consume_skb);
>  void __consume_stateless_skb(struct sk_buff *skb)
>  {
>  	trace_consume_skb(skb, __builtin_return_address(0));
> -	skb_release_data(skb, SKB_CONSUMED);
> +	skb_release_data(skb, SKB_CONSUMED, false);
>  	kfree_skbmem(skb);
>  }
>  
> @@ -1226,7 +1228,7 @@ static void napi_skb_cache_put(struct sk_buff *skb)
>  
>  void __kfree_skb_defer(struct sk_buff *skb)
>  {
> -	skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED);
> +	skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED, false);
>  	napi_skb_cache_put(skb);

Is there any reson not to call skb_release_all() with in_normal_napi
being true while napi_skb_cache_put() is called here?

>  }
>  
> @@ -1264,7 +1266,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
>  		return;
>  	}
>  
> -	skb_release_all(skb, SKB_CONSUMED);
> +	skb_release_all(skb, SKB_CONSUMED, !!budget);
>  	napi_skb_cache_put(skb);
>  }
>  EXPORT_SYMBOL(napi_consume_skb);
> @@ -1395,7 +1397,7 @@ EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
>   */
>  struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
>  {
> -	skb_release_all(dst, SKB_CONSUMED);
> +	skb_release_all(dst, SKB_CONSUMED, false);
>  	return __skb_clone(dst, src);
>  }
>  EXPORT_SYMBOL_GPL(skb_morph);
> @@ -2018,9 +2020,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
>  		if (skb_has_frag_list(skb))
>  			skb_clone_fraglist(skb);
>  
> -		skb_release_data(skb, SKB_CONSUMED);
> +		skb_release_data(skb, SKB_CONSUMED, false);
>  	} else {
> -		skb_free_head(skb);
> +		skb_free_head(skb, false);
>  	}
>  	off = (data + nhead) - skb->head;
>  
> @@ -6389,12 +6391,12 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
>  			skb_frag_ref(skb, i);
>  		if (skb_has_frag_list(skb))
>  			skb_clone_fraglist(skb);
> -		skb_release_data(skb, SKB_CONSUMED);
> +		skb_release_data(skb, SKB_CONSUMED, false);
>  	} else {
>  		/* we can reuse existing recount- all we did was
>  		 * relocate values
>  		 */
> -		skb_free_head(skb);
> +		skb_free_head(skb, false);
>  	}
>  
>  	skb->head = data;
> @@ -6529,7 +6531,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
>  		skb_kfree_head(data, size);
>  		return -ENOMEM;
>  	}
> -	skb_release_data(skb, SKB_CONSUMED);
> +	skb_release_data(skb, SKB_CONSUMED, false);
>  
>  	skb->head = data;
>  	skb->head_frag = 0;
>
Jakub Kicinski April 7, 2023, 2:14 p.m. UTC | #2
On Fri, 7 Apr 2023 17:15:15 +0800 Yunsheng Lin wrote:
> On 2023/4/6 7:20, Jakub Kicinski wrote:
> > We maintain a NAPI-local cache of skbs which is fed by napi_consume_skb().
> > Going forward we will also try to cache head and data pages.
> > Plumb the "are we in a normal NAPI context" information thru
> > deeper into the freeing path, up to skb_release_data() and
> > skb_free_head()/skb_pp_recycle().
> > 
> > Use "bool in_normal_napi" rather than bare "int budget",
> > the further we get from NAPI the more confusing the budget
> > argument may seem (particularly whether 0 or MAX is the
> > correct value to pass in when not in NAPI).

> > @@ -839,7 +839,7 @@ static void skb_clone_fraglist(struct sk_buff *skb)
> >  		skb_get(list);
> >  }
> >  
> > -static bool skb_pp_recycle(struct sk_buff *skb, void *data)
> > +static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool in_normal_napi)  
> 
> What does *normal* means in 'in_normal_napi'?
> can we just use in_napi?

Technically netpoll also calls NAPI, that's why I threw in the
"normal". If folks prefer in_napi or some other name I'm more 
than happy to change. Naming is hard.

> > @@ -1226,7 +1228,7 @@ static void napi_skb_cache_put(struct sk_buff *skb)
> >  
> >  void __kfree_skb_defer(struct sk_buff *skb)
> >  {
> > -	skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED);
> > +	skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED, false);
> >  	napi_skb_cache_put(skb);  
> 
> Is there any reson not to call skb_release_all() with in_normal_napi
> being true while napi_skb_cache_put() is called here?

True, __kfree_skb_defer() needs more work also. I'll set in to true 
in the PATCH posting and clean up the function in a follow up.
Jakub Kicinski April 7, 2023, 3:28 p.m. UTC | #3
On Fri, 7 Apr 2023 07:14:02 -0700 Jakub Kicinski wrote:
> > > -static bool skb_pp_recycle(struct sk_buff *skb, void *data)
> > > +static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool in_normal_napi)    
> > 
> > What does *normal* means in 'in_normal_napi'?
> > can we just use in_napi?  
> 
> Technically netpoll also calls NAPI, that's why I threw in the
> "normal". If folks prefer in_napi or some other name I'm more 
> than happy to change. Naming is hard.

Maybe I should rename it to in_softirq ? Or napi_safe ?
Because __kfree_skb_defer() gets called from the Tx side.
And even the Rx deferred free isn't really *in* NAPI.
Jesper Dangaard Brouer April 9, 2023, 5:28 p.m. UTC | #4
On 07/04/2023 17.28, Jakub Kicinski wrote:
> On Fri, 7 Apr 2023 07:14:02 -0700 Jakub Kicinski wrote:
>>>> -static bool skb_pp_recycle(struct sk_buff *skb, void *data)
>>>> +static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool in_normal_napi)
>>>
>>> What does *normal* means in 'in_normal_napi'?
>>> can we just use in_napi?
>>
>> Technically netpoll also calls NAPI, that's why I threw in the
>> "normal". If folks prefer in_napi or some other name I'm more
>> than happy to change. Naming is hard.
> 
> Maybe I should rename it to in_softirq ? Or napi_safe ?
> Because __kfree_skb_defer() gets called from the Tx side.
> And even the Rx deferred free isn't really *in* NAPI.
> 

I like the name "napi_safe".

--Jesper
Yunsheng Lin April 10, 2023, 9:20 a.m. UTC | #5
On 2023/4/10 1:28, Jesper Dangaard Brouer wrote:
> 
> 
> On 07/04/2023 17.28, Jakub Kicinski wrote:
>> On Fri, 7 Apr 2023 07:14:02 -0700 Jakub Kicinski wrote:
>>>>> -static bool skb_pp_recycle(struct sk_buff *skb, void *data)
>>>>> +static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool in_normal_napi)
>>>>
>>>> What does *normal* means in 'in_normal_napi'?
>>>> can we just use in_napi?
>>>
>>> Technically netpoll also calls NAPI, that's why I threw in the
>>> "normal". If folks prefer in_napi or some other name I'm more
>>> than happy to change. Naming is hard.
>>
>> Maybe I should rename it to in_softirq ? Or napi_safe ?
>> Because __kfree_skb_defer() gets called from the Tx side.
>> And even the Rx deferred free isn't really *in* NAPI.
>>
> 
> I like the name "napi_safe".

+1.

> 
> --Jesper
> 
> .
>
diff mbox series

Patch

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 050a875d09c5..8d5377b4112f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -839,7 +839,7 @@  static void skb_clone_fraglist(struct sk_buff *skb)
 		skb_get(list);
 }
 
-static bool skb_pp_recycle(struct sk_buff *skb, void *data)
+static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool in_normal_napi)
 {
 	if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
 		return false;
@@ -856,12 +856,12 @@  static void skb_kfree_head(void *head, unsigned int end_offset)
 		kfree(head);
 }
 
-static void skb_free_head(struct sk_buff *skb)
+static void skb_free_head(struct sk_buff *skb, bool in_normal_napi)
 {
 	unsigned char *head = skb->head;
 
 	if (skb->head_frag) {
-		if (skb_pp_recycle(skb, head))
+		if (skb_pp_recycle(skb, head, in_normal_napi))
 			return;
 		skb_free_frag(head);
 	} else {
@@ -869,7 +869,8 @@  static void skb_free_head(struct sk_buff *skb)
 	}
 }
 
-static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason)
+static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
+			     bool in_normal_napi)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	int i;
@@ -894,7 +895,7 @@  static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason)
 	if (shinfo->frag_list)
 		kfree_skb_list_reason(shinfo->frag_list, reason);
 
-	skb_free_head(skb);
+	skb_free_head(skb, in_normal_napi);
 exit:
 	/* When we clone an SKB we copy the reycling bit. The pp_recycle
 	 * bit is only set on the head though, so in order to avoid races
@@ -955,11 +956,12 @@  void skb_release_head_state(struct sk_buff *skb)
 }
 
 /* Free everything but the sk_buff shell. */
-static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason)
+static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason,
+			    bool in_normal_napi)
 {
 	skb_release_head_state(skb);
 	if (likely(skb->head))
-		skb_release_data(skb, reason);
+		skb_release_data(skb, reason, in_normal_napi);
 }
 
 /**
@@ -973,7 +975,7 @@  static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason)
 
 void __kfree_skb(struct sk_buff *skb)
 {
-	skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED);
+	skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED, false);
 	kfree_skbmem(skb);
 }
 EXPORT_SYMBOL(__kfree_skb);
@@ -1027,7 +1029,7 @@  static void kfree_skb_add_bulk(struct sk_buff *skb,
 		return;
 	}
 
-	skb_release_all(skb, reason);
+	skb_release_all(skb, reason, false);
 	sa->skb_array[sa->skb_count++] = skb;
 
 	if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) {
@@ -1201,7 +1203,7 @@  EXPORT_SYMBOL(consume_skb);
 void __consume_stateless_skb(struct sk_buff *skb)
 {
 	trace_consume_skb(skb, __builtin_return_address(0));
-	skb_release_data(skb, SKB_CONSUMED);
+	skb_release_data(skb, SKB_CONSUMED, false);
 	kfree_skbmem(skb);
 }
 
@@ -1226,7 +1228,7 @@  static void napi_skb_cache_put(struct sk_buff *skb)
 
 void __kfree_skb_defer(struct sk_buff *skb)
 {
-	skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED);
+	skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED, false);
 	napi_skb_cache_put(skb);
 }
 
@@ -1264,7 +1266,7 @@  void napi_consume_skb(struct sk_buff *skb, int budget)
 		return;
 	}
 
-	skb_release_all(skb, SKB_CONSUMED);
+	skb_release_all(skb, SKB_CONSUMED, !!budget);
 	napi_skb_cache_put(skb);
 }
 EXPORT_SYMBOL(napi_consume_skb);
@@ -1395,7 +1397,7 @@  EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
  */
 struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
 {
-	skb_release_all(dst, SKB_CONSUMED);
+	skb_release_all(dst, SKB_CONSUMED, false);
 	return __skb_clone(dst, src);
 }
 EXPORT_SYMBOL_GPL(skb_morph);
@@ -2018,9 +2020,9 @@  int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 		if (skb_has_frag_list(skb))
 			skb_clone_fraglist(skb);
 
-		skb_release_data(skb, SKB_CONSUMED);
+		skb_release_data(skb, SKB_CONSUMED, false);
 	} else {
-		skb_free_head(skb);
+		skb_free_head(skb, false);
 	}
 	off = (data + nhead) - skb->head;
 
@@ -6389,12 +6391,12 @@  static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
 			skb_frag_ref(skb, i);
 		if (skb_has_frag_list(skb))
 			skb_clone_fraglist(skb);
-		skb_release_data(skb, SKB_CONSUMED);
+		skb_release_data(skb, SKB_CONSUMED, false);
 	} else {
 		/* we can reuse existing recount- all we did was
 		 * relocate values
 		 */
-		skb_free_head(skb);
+		skb_free_head(skb, false);
 	}
 
 	skb->head = data;
@@ -6529,7 +6531,7 @@  static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
 		skb_kfree_head(data, size);
 		return -ENOMEM;
 	}
-	skb_release_data(skb, SKB_CONSUMED);
+	skb_release_data(skb, SKB_CONSUMED, false);
 
 	skb->head = data;
 	skb->head_frag = 0;