diff mbox series

[net-next] net: skbuff: sprinkle more __GFP_NOWARN on ingress allocs

Message ID 20240802001956.566242-1-kuba@kernel.org (mailing list archive)
State Accepted
Delegated to: Netdev Maintainers
Headers show
Series [net-next] net: skbuff: sprinkle more __GFP_NOWARN on ingress allocs | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 29 this patch: 29
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 4 of 4 maintainers
netdev/build_clang success Errors and warnings before: 29 this patch: 29
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 29 this patch: 29
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 45 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 54 this patch: 54
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-08-03--00-00 (tests: 701)

Commit Message

Jakub Kicinski Aug. 2, 2024, 12:19 a.m. UTC
build_skb() and frag allocations done with GFP_ATOMIC will
fail in real life, when system is under memory pressure,
and there's nothing we can do about that. So no point
printing warnings.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/skbuff.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

Comments

Jason Xing Aug. 2, 2024, 4:52 a.m. UTC | #1
Hello Jakub,

On Fri, Aug 2, 2024 at 8:20 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> build_skb() and frag allocations done with GFP_ATOMIC will
> fail in real life, when system is under memory pressure,

It's true. It can frequently happen under huge pressure.

> and there's nothing we can do about that. So no point
> printing warnings.

As you said, we cannot handle it because of that flag, but I wonder if
we at least let users/admins know about this failure, like: adding MIB
counter or trace_alloc_skb() tracepoint, which can also avoid printing
too many useless/necessary warnings. Or else, people won't know what
exactly happens in the kernel.

Thanks,
Jason

>
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
>  net/core/skbuff.c | 15 +++++++++------
>  1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 83f8cd8aa2d1..de2a044cc665 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -314,8 +314,8 @@ void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
>         fragsz = SKB_DATA_ALIGN(fragsz);
>
>         local_lock_nested_bh(&napi_alloc_cache.bh_lock);
> -       data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC,
> -                                      align_mask);
> +       data = __page_frag_alloc_align(&nc->page, fragsz,
> +                                      GFP_ATOMIC | __GFP_NOWARN, align_mask);
>         local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
>         return data;
>
> @@ -330,7 +330,8 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
>                 struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);
>
>                 fragsz = SKB_DATA_ALIGN(fragsz);
> -               data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC,
> +               data = __page_frag_alloc_align(nc, fragsz,
> +                                              GFP_ATOMIC | __GFP_NOWARN,
>                                                align_mask);
>         } else {
>                 local_bh_disable();
> @@ -349,7 +350,7 @@ static struct sk_buff *napi_skb_cache_get(void)
>         local_lock_nested_bh(&napi_alloc_cache.bh_lock);
>         if (unlikely(!nc->skb_count)) {
>                 nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
> -                                                     GFP_ATOMIC,
> +                                                     GFP_ATOMIC | __GFP_NOWARN,
>                                                       NAPI_SKB_CACHE_BULK,
>                                                       nc->skb_cache);
>                 if (unlikely(!nc->skb_count)) {
> @@ -418,7 +419,8 @@ struct sk_buff *slab_build_skb(void *data)
>         struct sk_buff *skb;
>         unsigned int size;
>
> -       skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
> +       skb = kmem_cache_alloc(net_hotdata.skbuff_cache,
> +                              GFP_ATOMIC | __GFP_NOWARN);
>         if (unlikely(!skb))
>                 return NULL;
>
> @@ -469,7 +471,8 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
>  {
>         struct sk_buff *skb;
>
> -       skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
> +       skb = kmem_cache_alloc(net_hotdata.skbuff_cache,
> +                              GFP_ATOMIC | __GFP_NOWARN);
>         if (unlikely(!skb))
>                 return NULL;
>
> --
> 2.45.2
>
>
Eric Dumazet Aug. 2, 2024, 7:18 a.m. UTC | #2
On Fri, Aug 2, 2024 at 2:20 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> build_skb() and frag allocations done with GFP_ATOMIC will
> fail in real life, when system is under memory pressure,
> and there's nothing we can do about that. So no point
> printing warnings.
>
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>

Reviewed-by: Eric Dumazet <edumazet@google.com>
Jakub Kicinski Aug. 2, 2024, 2:48 p.m. UTC | #3
On Fri, 2 Aug 2024 12:52:06 +0800 Jason Xing wrote:
> > and there's nothing we can do about that. So no point
> > printing warnings.  
> 
> As you said, we cannot handle it because of that flag, but I wonder if
> we at least let users/admins know about this failure, like: adding MIB
> counter or trace_alloc_skb() tracepoint, which can also avoid printing
> too many useless/necessary warnings. Or else, people won't know what
> exactly happens in the kernel.

Hm, maybe... I prefer not to add counters and trace points upstream
until they have sat for a few months in production and proven their
usefulness.

We also have a driver-level, per device counter already:
https://docs.kernel.org/next/networking/netlink_spec/netdev.html#rx-alloc-fail-uint
and I'm pretty sure system level OOM tracking will indicate severe
OOM conditions as well.
Jason Xing Aug. 5, 2024, 8:06 a.m. UTC | #4
On Fri, Aug 2, 2024 at 10:48 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Fri, 2 Aug 2024 12:52:06 +0800 Jason Xing wrote:
> > > and there's nothing we can do about that. So no point
> > > printing warnings.
> >
> > As you said, we cannot handle it because of that flag, but I wonder if
> > we at least let users/admins know about this failure, like: adding MIB
> > counter or trace_alloc_skb() tracepoint, which can also avoid printing
> > too many useless/necessary warnings. Or else, people won't know what
> > exactly happens in the kernel.
>
> Hm, maybe... I prefer not to add counters and trace points upstream
> until they have sat for a few months in production and proven their
> usefulness.
>
> We also have a driver-level, per device counter already:
> https://docs.kernel.org/next/networking/netlink_spec/netdev.html#rx-alloc-fail-uint
> and I'm pretty sure system level OOM tracking will indicate severe
> OOM conditions as well.

Thanks for your explanation. I see.
diff mbox series

Patch

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 83f8cd8aa2d1..de2a044cc665 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -314,8 +314,8 @@  void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
 	fragsz = SKB_DATA_ALIGN(fragsz);
 
 	local_lock_nested_bh(&napi_alloc_cache.bh_lock);
-	data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC,
-				       align_mask);
+	data = __page_frag_alloc_align(&nc->page, fragsz,
+				       GFP_ATOMIC | __GFP_NOWARN, align_mask);
 	local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
 	return data;
 
@@ -330,7 +330,8 @@  void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
 		struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);
 
 		fragsz = SKB_DATA_ALIGN(fragsz);
-		data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC,
+		data = __page_frag_alloc_align(nc, fragsz,
+					       GFP_ATOMIC | __GFP_NOWARN,
 					       align_mask);
 	} else {
 		local_bh_disable();
@@ -349,7 +350,7 @@  static struct sk_buff *napi_skb_cache_get(void)
 	local_lock_nested_bh(&napi_alloc_cache.bh_lock);
 	if (unlikely(!nc->skb_count)) {
 		nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
-						      GFP_ATOMIC,
+						      GFP_ATOMIC | __GFP_NOWARN,
 						      NAPI_SKB_CACHE_BULK,
 						      nc->skb_cache);
 		if (unlikely(!nc->skb_count)) {
@@ -418,7 +419,8 @@  struct sk_buff *slab_build_skb(void *data)
 	struct sk_buff *skb;
 	unsigned int size;
 
-	skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
+	skb = kmem_cache_alloc(net_hotdata.skbuff_cache,
+			       GFP_ATOMIC | __GFP_NOWARN);
 	if (unlikely(!skb))
 		return NULL;
 
@@ -469,7 +471,8 @@  struct sk_buff *__build_skb(void *data, unsigned int frag_size)
 {
 	struct sk_buff *skb;
 
-	skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
+	skb = kmem_cache_alloc(net_hotdata.skbuff_cache,
+			       GFP_ATOMIC | __GFP_NOWARN);
 	if (unlikely(!skb))
 		return NULL;