diff mbox series

[net-next,v9,02/13] mm: move the page fragment allocator from page_alloc into its own file

Message ID 20240625135216.47007-3-linyunsheng@huawei.com (mailing list archive)
State New
Headers show
Series [net-next,v9,01/13] mm: page_frag: add a test module for page_frag | expand

Commit Message

Yunsheng Lin June 25, 2024, 1:52 p.m. UTC
Inspired by [1], move the page fragment allocator from page_alloc
into its own c file and header file, as we are about to make more
change for it to replace another page_frag implementation in
sock.c

1. https://lore.kernel.org/all/20230411160902.4134381-3-dhowells@redhat.com/

CC: David Howells <dhowells@redhat.com>
CC: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
 include/linux/gfp.h             |  22 -----
 include/linux/mm_types.h        |  18 ----
 include/linux/page_frag_cache.h |  47 +++++++++++
 include/linux/skbuff.h          |   1 +
 mm/Makefile                     |   1 +
 mm/page_alloc.c                 | 136 ------------------------------
 mm/page_frag_cache.c            | 144 ++++++++++++++++++++++++++++++++
 mm/page_frag_test.c             |   1 +
 8 files changed, 194 insertions(+), 176 deletions(-)
 create mode 100644 include/linux/page_frag_cache.h
 create mode 100644 mm/page_frag_cache.c

Comments

Alexander Duyck July 1, 2024, 11:10 p.m. UTC | #1
On Tue, 2024-06-25 at 21:52 +0800, Yunsheng Lin wrote:
> Inspired by [1], move the page fragment allocator from page_alloc
> into its own c file and header file, as we are about to make more
> change for it to replace another page_frag implementation in
> sock.c
> 
> 1. https://lore.kernel.org/all/20230411160902.4134381-3-dhowells@redhat.com/
> 
> CC: David Howells <dhowells@redhat.com>
> CC: Alexander Duyck <alexander.duyck@gmail.com>
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>

So one thing that I think might have been overlooked in the previous
reviews is the fact that the headers weren't necessarily self
sufficient. You were introducing dependencies that had to be fulfilled
by other headers.

One thing you might try doing as part of your testing would be to add a
C file that just adds your header and calls your functions to verify
that there aren't any unincluded dependencies.

> ---
>  include/linux/gfp.h             |  22 -----
>  include/linux/mm_types.h        |  18 ----
>  include/linux/page_frag_cache.h |  47 +++++++++++
>  include/linux/skbuff.h          |   1 +
>  mm/Makefile                     |   1 +
>  mm/page_alloc.c                 | 136 ------------------------------
>  mm/page_frag_cache.c            | 144 ++++++++++++++++++++++++++++++++
>  mm/page_frag_test.c             |   1 +
>  8 files changed, 194 insertions(+), 176 deletions(-)
>  create mode 100644 include/linux/page_frag_cache.h
>  create mode 100644 mm/page_frag_cache.c
> 
...

> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
> new file mode 100644
> index 000000000000..3a44bfc99750
> --- /dev/null
> +++ b/include/linux/page_frag_cache.h
> @@ -0,0 +1,47 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef _LINUX_PAGE_FRAG_CACHE_H
> +#define _LINUX_PAGE_FRAG_CACHE_H
> +
> +#include <linux/gfp_types.h>
> +

The gfp_types.h only really gives you the values you pass to the
gfp_mask. Did you mean to include linux/types.h to get the gfp_t
typedef?

> +#define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)

You should probably include linux/align.h to pull in the __ALIGN_MASK.

> +#define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)

I am pretty sure get_order is from asm/page.h as well.

> +
> +struct page_frag_cache {
> +	void *va;
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)

I am pretty sure PAGE_SIZE is included from asm/page.h

> +	__u16 offset;
> +	__u16 size;
> +#else
> +	__u32 offset;
> +#endif
> +	/* we maintain a pagecount bias, so that we dont dirty cache line
> +	 * containing page->_refcount every time we allocate a fragment.
> +	 */
> +	unsigned int		pagecnt_bias;
> +	bool pfmemalloc;
> +};
> +
> +void page_frag_cache_drain(struct page_frag_cache *nc);
> +void __page_frag_cache_drain(struct page *page, unsigned int count);
> +void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
> +			      gfp_t gfp_mask, unsigned int align_mask);
> +
> +static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
> +					  unsigned int fragsz, gfp_t gfp_mask,
> +					  unsigned int align)
> +{
> +	WARN_ON_ONCE(!is_power_of_2(align));

To get is_power_of_2 you should be including linux/log2.h.

> +	return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
> +}
> +
> +static inline void *page_frag_alloc(struct page_frag_cache *nc,
> +				    unsigned int fragsz, gfp_t gfp_mask)
> +{
> +	return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
> +}
> +
> +void page_frag_free(void *addr);
> +
> +#endif
> 

...

> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
> new file mode 100644
> index 000000000000..88f567ef0e29
> --- /dev/null
> +++ b/mm/page_frag_cache.c
> @@ -0,0 +1,144 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Page fragment allocator
> + *
> + * Page Fragment:
> + *  An arbitrary-length arbitrary-offset area of memory which resides within a
> + *  0 or higher order page.  Multiple fragments within that page are
> + *  individually refcounted, in the page's reference counter.
> + *
> + * The page_frag functions provide a simple allocation framework for page
> + * fragments.  This is used by the network stack and network device drivers to
> + * provide a backing region of memory for use as either an sk_buff->head, or to
> + * be used in the "frags" portion of skb_shared_info.
> + */
> +
> +#include <linux/export.h>
> +#include <linux/init.h>
> +#include <linux/mm.h>
> +#include <linux/page_frag_cache.h>
> +#include "internal.h"

You could probably include gfp_types.h here since this is where you are
using the GFP_XXX values.

> +
> +static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
> +					     gfp_t gfp_mask)
> +{
> +	struct page *page = NULL;
> +	gfp_t gfp = gfp_mask;
> +
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +	gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) |  __GFP_COMP |
> +		   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
> +	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
> +				PAGE_FRAG_CACHE_MAX_ORDER);
> +	nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
> +#endif
> +	if (unlikely(!page))
> +		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
> +
> +	nc->va = page ? page_address(page) : NULL;
> +
> +	return page;
> +}
> +
> +void page_frag_cache_drain(struct page_frag_cache *nc)
> +{
> +	if (!nc->va)
> +		return;
> +
> +	__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
> +	nc->va = NULL;
> +}
> +EXPORT_SYMBOL(page_frag_cache_drain);
> +
> +void __page_frag_cache_drain(struct page *page, unsigned int count)
> +{
> +	VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
> +
> +	if (page_ref_sub_and_test(page, count))
> +		free_unref_page(page, compound_order(page));
> +}
> +EXPORT_SYMBOL(__page_frag_cache_drain);
> +
> +void *__page_frag_alloc_align(struct page_frag_cache *nc,
> +			      unsigned int fragsz, gfp_t gfp_mask,
> +			      unsigned int align_mask)
> +{
> +	unsigned int size = PAGE_SIZE;
> +	struct page *page;
> +	int offset;
> +
> +	if (unlikely(!nc->va)) {
> +refill:
> +		page = __page_frag_cache_refill(nc, gfp_mask);
> +		if (!page)
> +			return NULL;
> +
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +		/* if size can vary use size else just use PAGE_SIZE */
> +		size = nc->size;
> +#endif
> +		/* Even if we own the page, we do not use atomic_set().
> +		 * This would break get_page_unless_zero() users.
> +		 */
> +		page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
> +
> +		/* reset page count bias and offset to start of new frag */
> +		nc->pfmemalloc = page_is_pfmemalloc(page);
> +		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
> +		nc->offset = size;
> +	}
> +
> +	offset = nc->offset - fragsz;
> +	if (unlikely(offset < 0)) {
> +		page = virt_to_page(nc->va);
> +
> +		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
> +			goto refill;
> +
> +		if (unlikely(nc->pfmemalloc)) {
> +			free_unref_page(page, compound_order(page));
> +			goto refill;
> +		}
> +
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +		/* if size can vary use size else just use PAGE_SIZE */
> +		size = nc->size;
> +#endif
> +		/* OK, page count is 0, we can safely set it */
> +		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
> +
> +		/* reset page count bias and offset to start of new frag */
> +		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
> +		offset = size - fragsz;
> +		if (unlikely(offset < 0)) {
> +			/*
> +			 * The caller is trying to allocate a fragment
> +			 * with fragsz > PAGE_SIZE but the cache isn't big
> +			 * enough to satisfy the request, this may
> +			 * happen in low memory conditions.
> +			 * We don't release the cache page because
> +			 * it could make memory pressure worse
> +			 * so we simply return NULL here.
> +			 */
> +			return NULL;
> +		}
> +	}
> +
> +	nc->pagecnt_bias--;
> +	offset &= align_mask;
> +	nc->offset = offset;
> +
> +	return nc->va + offset;
> +}
> +EXPORT_SYMBOL(__page_frag_alloc_align);
> +
> +/*
> + * Frees a page fragment allocated out of either a compound or order 0 page.
> + */
> +void page_frag_free(void *addr)
> +{
> +	struct page *page = virt_to_head_page(addr);
> +
> +	if (unlikely(put_page_testzero(page)))
> +		free_unref_page(page, compound_order(page));
> +}
> +EXPORT_SYMBOL(page_frag_free);
> diff --git a/mm/page_frag_test.c b/mm/page_frag_test.c
> index 5ee3f33b756d..07748ee0a21f 100644
> --- a/mm/page_frag_test.c
> +++ b/mm/page_frag_test.c
> @@ -16,6 +16,7 @@
>  #include <linux/log2.h>
>  #include <linux/completion.h>
>  #include <linux/kthread.h>
> +#include <linux/page_frag_cache.h>
>  
>  #define OBJPOOL_NR_OBJECT_MAX	BIT(24)
>
Yunsheng Lin July 2, 2024, 12:27 p.m. UTC | #2
On 2024/7/2 7:10, Alexander H Duyck wrote:
> On Tue, 2024-06-25 at 21:52 +0800, Yunsheng Lin wrote:
>> Inspired by [1], move the page fragment allocator from page_alloc
>> into its own c file and header file, as we are about to make more
>> change for it to replace another page_frag implementation in
>> sock.c
>>
>> 1. https://lore.kernel.org/all/20230411160902.4134381-3-dhowells@redhat.com/
>>
>> CC: David Howells <dhowells@redhat.com>
>> CC: Alexander Duyck <alexander.duyck@gmail.com>
>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> 
> So one thing that I think might have been overlooked in the previous
> reviews is the fact that the headers weren't necessarily self
> sufficient. You were introducing dependencies that had to be fulfilled
> by other headers.
> 
> One thing you might try doing as part of your testing would be to add a
> C file that just adds your header and calls your functions to verify
> that there aren't any unincluded dependencies.

Sure, will do.

>
diff mbox series

Patch

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 7f9691d375f0..3d8f9dc6c6ee 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -363,28 +363,6 @@  __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mas
 extern void __free_pages(struct page *page, unsigned int order);
 extern void free_pages(unsigned long addr, unsigned int order);
 
-struct page_frag_cache;
-void page_frag_cache_drain(struct page_frag_cache *nc);
-extern void __page_frag_cache_drain(struct page *page, unsigned int count);
-void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
-			      gfp_t gfp_mask, unsigned int align_mask);
-
-static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
-					  unsigned int fragsz, gfp_t gfp_mask,
-					  unsigned int align)
-{
-	WARN_ON_ONCE(!is_power_of_2(align));
-	return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
-}
-
-static inline void *page_frag_alloc(struct page_frag_cache *nc,
-			     unsigned int fragsz, gfp_t gfp_mask)
-{
-	return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
-}
-
-extern void page_frag_free(void *addr);
-
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index af3a0256fa93..7a4e695a7a1e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -505,9 +505,6 @@  static_assert(sizeof(struct ptdesc) <= sizeof(struct page));
  */
 #define STRUCT_PAGE_MAX_SHIFT	(order_base_2(sizeof(struct page)))
 
-#define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
-#define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
-
 /*
  * page_private can be used on tail pages.  However, PagePrivate is only
  * checked by the VM on the head page.  So page_private on the tail pages
@@ -526,21 +523,6 @@  static inline void *folio_get_private(struct folio *folio)
 	return folio->private;
 }
 
-struct page_frag_cache {
-	void * va;
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-	__u16 offset;
-	__u16 size;
-#else
-	__u32 offset;
-#endif
-	/* we maintain a pagecount bias, so that we dont dirty cache line
-	 * containing page->_refcount every time we allocate a fragment.
-	 */
-	unsigned int		pagecnt_bias;
-	bool pfmemalloc;
-};
-
 typedef unsigned long vm_flags_t;
 
 /*
diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
new file mode 100644
index 000000000000..3a44bfc99750
--- /dev/null
+++ b/include/linux/page_frag_cache.h
@@ -0,0 +1,47 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_PAGE_FRAG_CACHE_H
+#define _LINUX_PAGE_FRAG_CACHE_H
+
+#include <linux/gfp_types.h>
+
+#define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
+#define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
+
+struct page_frag_cache {
+	void *va;
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+	__u16 offset;
+	__u16 size;
+#else
+	__u32 offset;
+#endif
+	/* we maintain a pagecount bias, so that we dont dirty cache line
+	 * containing page->_refcount every time we allocate a fragment.
+	 */
+	unsigned int		pagecnt_bias;
+	bool pfmemalloc;
+};
+
+void page_frag_cache_drain(struct page_frag_cache *nc);
+void __page_frag_cache_drain(struct page *page, unsigned int count);
+void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
+			      gfp_t gfp_mask, unsigned int align_mask);
+
+static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
+					  unsigned int fragsz, gfp_t gfp_mask,
+					  unsigned int align)
+{
+	WARN_ON_ONCE(!is_power_of_2(align));
+	return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
+}
+
+static inline void *page_frag_alloc(struct page_frag_cache *nc,
+				    unsigned int fragsz, gfp_t gfp_mask)
+{
+	return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
+}
+
+void page_frag_free(void *addr);
+
+#endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f4cda3fbdb75..eb8ae8292c48 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -31,6 +31,7 @@ 
 #include <linux/in6.h>
 #include <linux/if_packet.h>
 #include <linux/llist.h>
+#include <linux/page_frag_cache.h>
 #include <net/flow.h>
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 #include <linux/netfilter/nf_conntrack_common.h>
diff --git a/mm/Makefile b/mm/Makefile
index 29d9f7618a33..3080257a0a75 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -64,6 +64,7 @@  page-alloc-$(CONFIG_SHUFFLE_PAGE_ALLOCATOR) += shuffle.o
 memory-hotplug-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 
 obj-y += page-alloc.o
+obj-y += page_frag_cache.o
 obj-y += init-mm.o
 obj-y += memblock.o
 obj-y += $(memory-hotplug-y)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7300aa9f14b0..f9d4cfc9aeb7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4781,142 +4781,6 @@  void free_pages(unsigned long addr, unsigned int order)
 
 EXPORT_SYMBOL(free_pages);
 
-/*
- * Page Fragment:
- *  An arbitrary-length arbitrary-offset area of memory which resides
- *  within a 0 or higher order page.  Multiple fragments within that page
- *  are individually refcounted, in the page's reference counter.
- *
- * The page_frag functions below provide a simple allocation framework for
- * page fragments.  This is used by the network stack and network device
- * drivers to provide a backing region of memory for use as either an
- * sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
- */
-static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
-					     gfp_t gfp_mask)
-{
-	struct page *page = NULL;
-	gfp_t gfp = gfp_mask;
-
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-	gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) |  __GFP_COMP |
-		   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
-	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
-				PAGE_FRAG_CACHE_MAX_ORDER);
-	nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
-#endif
-	if (unlikely(!page))
-		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
-
-	nc->va = page ? page_address(page) : NULL;
-
-	return page;
-}
-
-void page_frag_cache_drain(struct page_frag_cache *nc)
-{
-	if (!nc->va)
-		return;
-
-	__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
-	nc->va = NULL;
-}
-EXPORT_SYMBOL(page_frag_cache_drain);
-
-void __page_frag_cache_drain(struct page *page, unsigned int count)
-{
-	VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
-
-	if (page_ref_sub_and_test(page, count))
-		free_unref_page(page, compound_order(page));
-}
-EXPORT_SYMBOL(__page_frag_cache_drain);
-
-void *__page_frag_alloc_align(struct page_frag_cache *nc,
-			      unsigned int fragsz, gfp_t gfp_mask,
-			      unsigned int align_mask)
-{
-	unsigned int size = PAGE_SIZE;
-	struct page *page;
-	int offset;
-
-	if (unlikely(!nc->va)) {
-refill:
-		page = __page_frag_cache_refill(nc, gfp_mask);
-		if (!page)
-			return NULL;
-
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-		/* if size can vary use size else just use PAGE_SIZE */
-		size = nc->size;
-#endif
-		/* Even if we own the page, we do not use atomic_set().
-		 * This would break get_page_unless_zero() users.
-		 */
-		page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
-
-		/* reset page count bias and offset to start of new frag */
-		nc->pfmemalloc = page_is_pfmemalloc(page);
-		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
-		nc->offset = size;
-	}
-
-	offset = nc->offset - fragsz;
-	if (unlikely(offset < 0)) {
-		page = virt_to_page(nc->va);
-
-		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
-			goto refill;
-
-		if (unlikely(nc->pfmemalloc)) {
-			free_unref_page(page, compound_order(page));
-			goto refill;
-		}
-
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-		/* if size can vary use size else just use PAGE_SIZE */
-		size = nc->size;
-#endif
-		/* OK, page count is 0, we can safely set it */
-		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
-
-		/* reset page count bias and offset to start of new frag */
-		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
-		offset = size - fragsz;
-		if (unlikely(offset < 0)) {
-			/*
-			 * The caller is trying to allocate a fragment
-			 * with fragsz > PAGE_SIZE but the cache isn't big
-			 * enough to satisfy the request, this may
-			 * happen in low memory conditions.
-			 * We don't release the cache page because
-			 * it could make memory pressure worse
-			 * so we simply return NULL here.
-			 */
-			return NULL;
-		}
-	}
-
-	nc->pagecnt_bias--;
-	offset &= align_mask;
-	nc->offset = offset;
-
-	return nc->va + offset;
-}
-EXPORT_SYMBOL(__page_frag_alloc_align);
-
-/*
- * Frees a page fragment allocated out of either a compound or order 0 page.
- */
-void page_frag_free(void *addr)
-{
-	struct page *page = virt_to_head_page(addr);
-
-	if (unlikely(put_page_testzero(page)))
-		free_unref_page(page, compound_order(page));
-}
-EXPORT_SYMBOL(page_frag_free);
-
 static void *make_alloc_exact(unsigned long addr, unsigned int order,
 		size_t size)
 {
diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
new file mode 100644
index 000000000000..88f567ef0e29
--- /dev/null
+++ b/mm/page_frag_cache.c
@@ -0,0 +1,144 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/* Page fragment allocator
+ *
+ * Page Fragment:
+ *  An arbitrary-length arbitrary-offset area of memory which resides within a
+ *  0 or higher order page.  Multiple fragments within that page are
+ *  individually refcounted, in the page's reference counter.
+ *
+ * The page_frag functions provide a simple allocation framework for page
+ * fragments.  This is used by the network stack and network device drivers to
+ * provide a backing region of memory for use as either an sk_buff->head, or to
+ * be used in the "frags" portion of skb_shared_info.
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/page_frag_cache.h>
+#include "internal.h"
+
+static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
+					     gfp_t gfp_mask)
+{
+	struct page *page = NULL;
+	gfp_t gfp = gfp_mask;
+
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+	gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) |  __GFP_COMP |
+		   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
+	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
+				PAGE_FRAG_CACHE_MAX_ORDER);
+	nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
+#endif
+	if (unlikely(!page))
+		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
+
+	nc->va = page ? page_address(page) : NULL;
+
+	return page;
+}
+
+void page_frag_cache_drain(struct page_frag_cache *nc)
+{
+	if (!nc->va)
+		return;
+
+	__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
+	nc->va = NULL;
+}
+EXPORT_SYMBOL(page_frag_cache_drain);
+
+void __page_frag_cache_drain(struct page *page, unsigned int count)
+{
+	VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
+
+	if (page_ref_sub_and_test(page, count))
+		free_unref_page(page, compound_order(page));
+}
+EXPORT_SYMBOL(__page_frag_cache_drain);
+
+void *__page_frag_alloc_align(struct page_frag_cache *nc,
+			      unsigned int fragsz, gfp_t gfp_mask,
+			      unsigned int align_mask)
+{
+	unsigned int size = PAGE_SIZE;
+	struct page *page;
+	int offset;
+
+	if (unlikely(!nc->va)) {
+refill:
+		page = __page_frag_cache_refill(nc, gfp_mask);
+		if (!page)
+			return NULL;
+
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+		/* if size can vary use size else just use PAGE_SIZE */
+		size = nc->size;
+#endif
+		/* Even if we own the page, we do not use atomic_set().
+		 * This would break get_page_unless_zero() users.
+		 */
+		page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
+
+		/* reset page count bias and offset to start of new frag */
+		nc->pfmemalloc = page_is_pfmemalloc(page);
+		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
+		nc->offset = size;
+	}
+
+	offset = nc->offset - fragsz;
+	if (unlikely(offset < 0)) {
+		page = virt_to_page(nc->va);
+
+		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
+			goto refill;
+
+		if (unlikely(nc->pfmemalloc)) {
+			free_unref_page(page, compound_order(page));
+			goto refill;
+		}
+
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+		/* if size can vary use size else just use PAGE_SIZE */
+		size = nc->size;
+#endif
+		/* OK, page count is 0, we can safely set it */
+		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
+
+		/* reset page count bias and offset to start of new frag */
+		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
+		offset = size - fragsz;
+		if (unlikely(offset < 0)) {
+			/*
+			 * The caller is trying to allocate a fragment
+			 * with fragsz > PAGE_SIZE but the cache isn't big
+			 * enough to satisfy the request, this may
+			 * happen in low memory conditions.
+			 * We don't release the cache page because
+			 * it could make memory pressure worse
+			 * so we simply return NULL here.
+			 */
+			return NULL;
+		}
+	}
+
+	nc->pagecnt_bias--;
+	offset &= align_mask;
+	nc->offset = offset;
+
+	return nc->va + offset;
+}
+EXPORT_SYMBOL(__page_frag_alloc_align);
+
+/*
+ * Frees a page fragment allocated out of either a compound or order 0 page.
+ */
+void page_frag_free(void *addr)
+{
+	struct page *page = virt_to_head_page(addr);
+
+	if (unlikely(put_page_testzero(page)))
+		free_unref_page(page, compound_order(page));
+}
+EXPORT_SYMBOL(page_frag_free);
diff --git a/mm/page_frag_test.c b/mm/page_frag_test.c
index 5ee3f33b756d..07748ee0a21f 100644
--- a/mm/page_frag_test.c
+++ b/mm/page_frag_test.c
@@ -16,6 +16,7 @@ 
 #include <linux/log2.h>
 #include <linux/completion.h>
 #include <linux/kthread.h>
+#include <linux/page_frag_cache.h>
 
 #define OBJPOOL_NR_OBJECT_MAX	BIT(24)