diff mbox series

[net-next,4/4] net: add dedicated kmem_cache for typical/small skb->head

Message ID 20230202185801.4179599-5-edumazet@google.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: core: use a dedicated kmem_cache for skb head allocs | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2 this patch: 2
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 1 this patch: 1
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2 this patch: 2
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Blank lines aren't necessary after an open brace '{'
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Dumazet Feb. 2, 2023, 6:58 p.m. UTC
Recent removal of ksize() in alloc_skb() increased
performance because we no longer read
the associated struct page.

We have an equivalent cost at kfree_skb() time.

kfree(skb->head) has to access a struct page,
often cold in cpu caches to get the owning
struct kmem_cache.

Considering that many allocations are small,
we can have our own kmem_cache to avoid the cache line miss.

This also saves memory because these small heads
are no longer padded to 1024 bytes.

CONFIG_SLUB=y
$ grep skbuff_small_head /proc/slabinfo
skbuff_small_head   2907   2907    640   51    8 : tunables    0    0    0 : slabdata     57     57      0

CONFIG_SLAB=y
$ grep skbuff_small_head /proc/slabinfo
skbuff_small_head    607    624    640    6    1 : tunables   54   27    8 : slabdata    104    104      5

Note: after Kees Cook patches and this one, we might
be able to revert commit
dbae2b062824 ("net: skb: introduce and use a single page frag cache")
because GRO_MAX_HEAD is also small.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
---
 net/core/skbuff.c | 52 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 5 deletions(-)

Comments

Soheil Hassas Yeganeh Feb. 2, 2023, 8:14 p.m. UTC | #1
On Thu, Feb 2, 2023 at 1:58 PM Eric Dumazet <edumazet@google.com> wrote:
>
> Recent removal of ksize() in alloc_skb() increased
> performance because we no longer read
> the associated struct page.
>
> We have an equivalent cost at kfree_skb() time.
>
> kfree(skb->head) has to access a struct page,
> often cold in cpu caches to get the owning
> struct kmem_cache.
>
> Considering that many allocations are small,
> we can have our own kmem_cache to avoid the cache line miss.
>
> This also saves memory because these small heads
> are no longer padded to 1024 bytes.
>
> CONFIG_SLUB=y
> $ grep skbuff_small_head /proc/slabinfo
> skbuff_small_head   2907   2907    640   51    8 : tunables    0    0    0 : slabdata     57     57      0
>
> CONFIG_SLAB=y
> $ grep skbuff_small_head /proc/slabinfo
> skbuff_small_head    607    624    640    6    1 : tunables   54   27    8 : slabdata    104    104      5
>
> Note: after Kees Cook patches and this one, we might
> be able to revert commit
> dbae2b062824 ("net: skb: introduce and use a single page frag cache")
> because GRO_MAX_HEAD is also small.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Paolo Abeni <pabeni@redhat.com>

Acked-by: Soheil Hassas Yeganeh <soheil@google.com>

Very nice!

> ---
>  net/core/skbuff.c | 52 ++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 47 insertions(+), 5 deletions(-)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index ae0b2aa1f01e8060cc4fe69137e9bd98e44280cc..3e540b4924701cc57b6fbd1b668bab3b652ee94c 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -89,6 +89,19 @@ static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
>  #ifdef CONFIG_SKB_EXTENSIONS
>  static struct kmem_cache *skbuff_ext_cache __ro_after_init;
>  #endif
> +static struct kmem_cache *skb_small_head_cache __ro_after_init;
> +
> +#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
> +
> +/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. */
> +#define SKB_SMALL_HEAD_CACHE_SIZE                                      \
> +       (is_power_of_2(SKB_SMALL_HEAD_SIZE) ?                   \
> +               (SKB_SMALL_HEAD_SIZE + L1_CACHE_BYTES) :        \
> +               SKB_SMALL_HEAD_SIZE)
> +
> +#define SKB_SMALL_HEAD_HEADROOM                                                \
> +       SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
> +
>  int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
>  EXPORT_SYMBOL(sysctl_max_skb_frags);
>
> @@ -486,6 +499,21 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
>         void *obj;
>
>         obj_size = SKB_HEAD_ALIGN(*size);
> +       if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
> +           !(flags & KMALLOC_NOT_NORMAL_BITS)) {
> +
> +               /* skb_small_head_cache has non power of two size,
> +                * likely forcing SLUB to use order-3 pages.
> +                * We deliberately attempt a NOMEMALLOC allocation only.
> +                */
> +               obj = kmem_cache_alloc_node(skb_small_head_cache,
> +                               flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
> +                               node);
> +               if (obj) {
> +                       *size = SKB_SMALL_HEAD_CACHE_SIZE;
> +                       goto out;
> +               }
> +       }
>         *size = obj_size = kmalloc_size_roundup(obj_size);
>         /*
>          * Try a regular allocation, when that fails and we're not entitled
> @@ -805,6 +833,14 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data)
>         return page_pool_return_skb_page(virt_to_page(data));
>  }
>
> +static void skb_kfree_head(void *head, unsigned int end_offset)
> +{
> +       if (end_offset == SKB_SMALL_HEAD_HEADROOM)
> +               kmem_cache_free(skb_small_head_cache, head);
> +       else
> +               kfree(head);
> +}
> +
>  static void skb_free_head(struct sk_buff *skb)
>  {
>         unsigned char *head = skb->head;
> @@ -814,7 +850,7 @@ static void skb_free_head(struct sk_buff *skb)
>                         return;
>                 skb_free_frag(head);
>         } else {
> -               kfree(head);
> +               skb_kfree_head(head, skb_end_offset(skb));
>         }
>  }
>
> @@ -1995,7 +2031,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
>         return 0;
>
>  nofrags:
> -       kfree(data);
> +       skb_kfree_head(data, size);
>  nodata:
>         return -ENOMEM;
>  }
> @@ -4633,6 +4669,12 @@ void __init skb_init(void)
>                                                 0,
>                                                 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
>                                                 NULL);
> +       skb_small_head_cache = kmem_cache_create("skbuff_small_head",
> +                                               SKB_SMALL_HEAD_CACHE_SIZE,
> +                                               0,
> +                                               SLAB_HWCACHE_ALIGN | SLAB_PANIC,
> +                                               NULL);
> +
>         skb_extensions_init();
>  }
>
> @@ -6297,7 +6339,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
>         if (skb_cloned(skb)) {
>                 /* drop the old head gracefully */
>                 if (skb_orphan_frags(skb, gfp_mask)) {
> -                       kfree(data);
> +                       skb_kfree_head(data, size);
>                         return -ENOMEM;
>                 }
>                 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
> @@ -6405,7 +6447,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
>         memcpy((struct skb_shared_info *)(data + size),
>                skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
>         if (skb_orphan_frags(skb, gfp_mask)) {
> -               kfree(data);
> +               skb_kfree_head(data, size);
>                 return -ENOMEM;
>         }
>         shinfo = (struct skb_shared_info *)(data + size);
> @@ -6441,7 +6483,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
>                 /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */
>                 if (skb_has_frag_list(skb))
>                         kfree_skb_list(skb_shinfo(skb)->frag_list);
> -               kfree(data);
> +               skb_kfree_head(data, size);
>                 return -ENOMEM;
>         }
>         skb_release_data(skb, SKB_CONSUMED);
> --
> 2.39.1.456.gfc5497dd1b-goog
>
Jakub Kicinski Feb. 3, 2023, 5:11 a.m. UTC | #2
On Thu,  2 Feb 2023 18:58:01 +0000 Eric Dumazet wrote:
> +/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. */

Why is that?  Is it to prevent potential mixing up of objects from 
the cache with objects from general slabs (since we only do a
end_offset == SKB_SMALL_HEAD_HEADROOM check)?
Eric Dumazet Feb. 3, 2023, 7 a.m. UTC | #3
On Fri, Feb 3, 2023 at 6:11 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Thu,  2 Feb 2023 18:58:01 +0000 Eric Dumazet wrote:
> > +/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. */
>
> Why is that?  Is it to prevent potential mixing up of objects from
> the cache with objects from general slabs (since we only do a
> end_offset == SKB_SMALL_HEAD_HEADROOM check)?

Good question.

Some alloc_skb() callers use GFP_DMA (or __GFP_ACCOUNT)
we can not use the dedicated kmem_cache for them.

They could get an object of size 512 or 1024

Since I chose not adding yet another
skb->head_has_been_allocated_from_small_head_cache,
we want to make sure we will  not have issues in the future, if
SKB_HEAD_ALIGN(MAX_TCP_HEADER)
becomes a power-of-two. (for example for some of us increasing MAX_SKB_FRAGS)

Alternative would be to add a check at boot time, making sure
no standard cache has the same object size.

This might have an issue with CONFIG_SLOB=y, I wish this was gone already...
Paolo Abeni Feb. 3, 2023, 7:59 a.m. UTC | #4
On Thu, 2023-02-02 at 18:58 +0000, Eric Dumazet wrote:
> Note: after Kees Cook patches and this one, we might
> be able to revert commit
> dbae2b062824 ("net: skb: introduce and use a single page frag cache")
> because GRO_MAX_HEAD is also small.

I guess I'll need some time to do the relevant benchmarks, but I'm not
able to schedule them very soon.

> @@ -486,6 +499,21 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
>  	void *obj;
>  
>  	obj_size = SKB_HEAD_ALIGN(*size);
> +	if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
> +	    !(flags & KMALLOC_NOT_NORMAL_BITS)) {
> +
> +		/* skb_small_head_cache has non power of two size,
> +		 * likely forcing SLUB to use order-3 pages.
> +		 * We deliberately attempt a NOMEMALLOC allocation only.
> +		 */
> +		obj = kmem_cache_alloc_node(skb_small_head_cache,
> +				flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
> +				node);
> +		if (obj) {
> +			*size = SKB_SMALL_HEAD_CACHE_SIZE;
> +			goto out;
> +		}

In case kmem allocation failure, should we try to skip the 2nd 
__GFP_NOMEMALLOC attempt below?

I *think* non power of two size is also required to avoid an issue
plain (no GFP_DMA nor __GFP_ACCOUNT) allocations in case of fallback to
kmalloc(), to prevent skb_kfree_head() mis-interpreting skb->head as
kmem_cache allocated.

Thanks!

Paolo
Eric Dumazet Feb. 3, 2023, 8:17 a.m. UTC | #5
On Fri, Feb 3, 2023 at 8:59 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
> On Thu, 2023-02-02 at 18:58 +0000, Eric Dumazet wrote:
> > Note: after Kees Cook patches and this one, we might
> > be able to revert commit
> > dbae2b062824 ("net: skb: introduce and use a single page frag cache")
> > because GRO_MAX_HEAD is also small.
>
> I guess I'll need some time to do the relevant benchmarks, but I'm not
> able to schedule them very soon.

No worries, this can be done later.
Note the results might depend on SLUB/SLAB choice.

>
> > @@ -486,6 +499,21 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
> >       void *obj;
> >
> >       obj_size = SKB_HEAD_ALIGN(*size);
> > +     if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
> > +         !(flags & KMALLOC_NOT_NORMAL_BITS)) {
> > +
> > +             /* skb_small_head_cache has non power of two size,
> > +              * likely forcing SLUB to use order-3 pages.
> > +              * We deliberately attempt a NOMEMALLOC allocation only.
> > +              */
> > +             obj = kmem_cache_alloc_node(skb_small_head_cache,
> > +                             flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
> > +                             node);
> > +             if (obj) {
> > +                     *size = SKB_SMALL_HEAD_CACHE_SIZE;
> > +                     goto out;
> > +             }
>
> In case kmem allocation failure, should we try to skip the 2nd
> __GFP_NOMEMALLOC attempt below?

We could, but my reasoning was that we might find an object in the
other kmem_cache freelist.

kmalloc-1 objects tend to use smaller page orders, so are more likely
to succeed if
there is no order-3 page available in the buddy allocator.(I mentioned
this in the comment)


>
> I *think* non power of two size is also required to avoid an issue
> plain (no GFP_DMA nor __GFP_ACCOUNT) allocations in case of fallback to
> kmalloc(), to prevent skb_kfree_head() mis-interpreting skb->head as
> kmem_cache allocated.

Indeed there are multiple cases explaining why SKB_SMALL_HEAD_CACHE_SIZE
needs to be a non power of two.
kernel test robot Feb. 3, 2023, 7:37 p.m. UTC | #6
Hi Eric,

I love your patch! Yet something to improve:

[auto build test ERROR on net-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Eric-Dumazet/net-add-SKB_HEAD_ALIGN-helper/20230203-031126
patch link:    https://lore.kernel.org/r/20230202185801.4179599-5-edumazet%40google.com
patch subject: [PATCH net-next 4/4] net: add dedicated kmem_cache for typical/small skb->head
config: arc-randconfig-r014-20230204 (https://download.01.org/0day-ci/archive/20230204/202302040329.E10xZHbY-lkp@intel.com/config)
compiler: arceb-elf-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/002bb9238e98f3fbeb0402c97f711420c3321b93
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Eric-Dumazet/net-add-SKB_HEAD_ALIGN-helper/20230203-031126
        git checkout 002bb9238e98f3fbeb0402c97f711420c3321b93
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arc olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arc SHELL=/bin/bash net/core/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   net/core/skbuff.c: In function 'kmalloc_reserve':
>> net/core/skbuff.c:503:23: error: 'KMALLOC_NOT_NORMAL_BITS' undeclared (first use in this function)
     503 |             !(flags & KMALLOC_NOT_NORMAL_BITS)) {
         |                       ^~~~~~~~~~~~~~~~~~~~~~~
   net/core/skbuff.c:503:23: note: each undeclared identifier is reported only once for each function it appears in


vim +/KMALLOC_NOT_NORMAL_BITS +503 net/core/skbuff.c

   486	
   487	/*
   488	 * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
   489	 * the caller if emergency pfmemalloc reserves are being used. If it is and
   490	 * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
   491	 * may be used. Otherwise, the packet data may be discarded until enough
   492	 * memory is free
   493	 */
   494	static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
   495				     bool *pfmemalloc)
   496	{
   497		bool ret_pfmemalloc = false;
   498		unsigned int obj_size;
   499		void *obj;
   500	
   501		obj_size = SKB_HEAD_ALIGN(*size);
   502		if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
 > 503		    !(flags & KMALLOC_NOT_NORMAL_BITS)) {
   504	
   505			/* skb_small_head_cache has non power of two size,
   506			 * likely forcing SLUB to use order-3 pages.
   507			 * We deliberately attempt a NOMEMALLOC allocation only.
   508			 */
   509			obj = kmem_cache_alloc_node(skb_small_head_cache,
   510					flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
   511					node);
   512			if (obj) {
   513				*size = SKB_SMALL_HEAD_CACHE_SIZE;
   514				goto out;
   515			}
   516		}
   517		*size = obj_size = kmalloc_size_roundup(obj_size);
   518		/*
   519		 * Try a regular allocation, when that fails and we're not entitled
   520		 * to the reserves, fail.
   521		 */
   522		obj = kmalloc_node_track_caller(obj_size,
   523						flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
   524						node);
   525		if (obj || !(gfp_pfmemalloc_allowed(flags)))
   526			goto out;
   527	
   528		/* Try again but now we are using pfmemalloc reserves */
   529		ret_pfmemalloc = true;
   530		obj = kmalloc_node_track_caller(obj_size, flags, node);
   531	
   532	out:
   533		if (pfmemalloc)
   534			*pfmemalloc = ret_pfmemalloc;
   535	
   536		return obj;
   537	}
   538
Jakub Kicinski Feb. 3, 2023, 7:42 p.m. UTC | #7
On Sat, 4 Feb 2023 03:37:10 +0800 kernel test robot wrote:
> All errors (new ones prefixed by >>):
> 
>    net/core/skbuff.c: In function 'kmalloc_reserve':
> >> net/core/skbuff.c:503:23: error: 'KMALLOC_NOT_NORMAL_BITS' undeclared (first use in this function)  
>      503 |             !(flags & KMALLOC_NOT_NORMAL_BITS)) {
>          |                       ^~~~~~~~~~~~~~~~~~~~~~~
>    net/core/skbuff.c:503:23: note: each undeclared identifier is reported only once for each function it appears in

diff --git a/net/Kconfig b/net/Kconfig
index 27bc49c7ad73..41e27a4805a8 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -8,6 +8,7 @@ menuconfig NET
        select NLATTR
        select GENERIC_NET_UTILS
        select BPF
+       depends on !SLOB
        help
          Unless you really know what you are doing, you should say Y here.
          The reason is that some programs need kernel networking support even

? :)
Jakub Kicinski Feb. 3, 2023, 7:47 p.m. UTC | #8
On Fri, 3 Feb 2023 09:17:55 +0100 Eric Dumazet wrote:
> > I *think* non power of two size is also required to avoid an issue
> > plain (no GFP_DMA nor __GFP_ACCOUNT) allocations in case of fallback to
> > kmalloc(), to prevent skb_kfree_head() mis-interpreting skb->head as
> > kmem_cache allocated.  
> 
> Indeed there are multiple cases explaining why SKB_SMALL_HEAD_CACHE_SIZE
> needs to be a non power of two.

Since you may need to respin would you mind spelling this out a bit
more in the comment ? Maybe: 

-/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. */
+/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.
+ * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique
+ * size, and we can differentiate heads from skb_small_head_cache
+ * vs system slabs by looking at their size (skb_end_offset()).
+ */
diff mbox series

Patch

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ae0b2aa1f01e8060cc4fe69137e9bd98e44280cc..3e540b4924701cc57b6fbd1b668bab3b652ee94c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -89,6 +89,19 @@  static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
 #ifdef CONFIG_SKB_EXTENSIONS
 static struct kmem_cache *skbuff_ext_cache __ro_after_init;
 #endif
+static struct kmem_cache *skb_small_head_cache __ro_after_init;
+
+#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
+
+/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. */
+#define SKB_SMALL_HEAD_CACHE_SIZE					\
+	(is_power_of_2(SKB_SMALL_HEAD_SIZE) ?			\
+		(SKB_SMALL_HEAD_SIZE + L1_CACHE_BYTES) :	\
+		SKB_SMALL_HEAD_SIZE)
+
+#define SKB_SMALL_HEAD_HEADROOM						\
+	SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
+
 int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
 EXPORT_SYMBOL(sysctl_max_skb_frags);
 
@@ -486,6 +499,21 @@  static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
 	void *obj;
 
 	obj_size = SKB_HEAD_ALIGN(*size);
+	if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
+	    !(flags & KMALLOC_NOT_NORMAL_BITS)) {
+
+		/* skb_small_head_cache has non power of two size,
+		 * likely forcing SLUB to use order-3 pages.
+		 * We deliberately attempt a NOMEMALLOC allocation only.
+		 */
+		obj = kmem_cache_alloc_node(skb_small_head_cache,
+				flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
+				node);
+		if (obj) {
+			*size = SKB_SMALL_HEAD_CACHE_SIZE;
+			goto out;
+		}
+	}
 	*size = obj_size = kmalloc_size_roundup(obj_size);
 	/*
 	 * Try a regular allocation, when that fails and we're not entitled
@@ -805,6 +833,14 @@  static bool skb_pp_recycle(struct sk_buff *skb, void *data)
 	return page_pool_return_skb_page(virt_to_page(data));
 }
 
+static void skb_kfree_head(void *head, unsigned int end_offset)
+{
+	if (end_offset == SKB_SMALL_HEAD_HEADROOM)
+		kmem_cache_free(skb_small_head_cache, head);
+	else
+		kfree(head);
+}
+
 static void skb_free_head(struct sk_buff *skb)
 {
 	unsigned char *head = skb->head;
@@ -814,7 +850,7 @@  static void skb_free_head(struct sk_buff *skb)
 			return;
 		skb_free_frag(head);
 	} else {
-		kfree(head);
+		skb_kfree_head(head, skb_end_offset(skb));
 	}
 }
 
@@ -1995,7 +2031,7 @@  int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	return 0;
 
 nofrags:
-	kfree(data);
+	skb_kfree_head(data, size);
 nodata:
 	return -ENOMEM;
 }
@@ -4633,6 +4669,12 @@  void __init skb_init(void)
 						0,
 						SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 						NULL);
+	skb_small_head_cache = kmem_cache_create("skbuff_small_head",
+						SKB_SMALL_HEAD_CACHE_SIZE,
+						0,
+						SLAB_HWCACHE_ALIGN | SLAB_PANIC,
+						NULL);
+
 	skb_extensions_init();
 }
 
@@ -6297,7 +6339,7 @@  static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
 	if (skb_cloned(skb)) {
 		/* drop the old head gracefully */
 		if (skb_orphan_frags(skb, gfp_mask)) {
-			kfree(data);
+			skb_kfree_head(data, size);
 			return -ENOMEM;
 		}
 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
@@ -6405,7 +6447,7 @@  static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
 	memcpy((struct skb_shared_info *)(data + size),
 	       skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
 	if (skb_orphan_frags(skb, gfp_mask)) {
-		kfree(data);
+		skb_kfree_head(data, size);
 		return -ENOMEM;
 	}
 	shinfo = (struct skb_shared_info *)(data + size);
@@ -6441,7 +6483,7 @@  static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
 		/* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */
 		if (skb_has_frag_list(skb))
 			kfree_skb_list(skb_shinfo(skb)->frag_list);
-		kfree(data);
+		skb_kfree_head(data, size);
 		return -ENOMEM;
 	}
 	skb_release_data(skb, SKB_CONSUMED);