diff mbox series

[RFC,v11,07/14] mm: page_frag: reuse existing space for 'size' and 'pfmemalloc'

Message ID 20240719093338.55117-8-linyunsheng@huawei.com (mailing list archive)
State Superseded
Headers show
Series Replace page_frag with page_frag_cache for sk_page_frag() | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Guessed tree name to be net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1313 this patch: 1313
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 3 of 3 maintainers
netdev/build_clang success Errors and warnings before: 1817 this patch: 1817
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 16107 this patch: 16107
netdev/checkpatch warning WARNING: Do not crash the kernel unless it is absolutely unavoidable--use WARN_ON_ONCE() plus recovery code (if feasible) instead of BUG() or variants
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Yunsheng Lin July 19, 2024, 9:33 a.m. UTC
Currently there is one 'struct page_frag' for every 'struct
sock' and 'struct task_struct', we are about to replace the
'struct page_frag' with 'struct page_frag_cache' for them.
Before begin the replacing, we need to ensure the size of
'struct page_frag_cache' is not bigger than the size of
'struct page_frag', as there may be tens of thousands of
'struct sock' and 'struct task_struct' instances in the
system.

By or'ing the page order & pfmemalloc with lower bits of
'va' instead of using 'u16' or 'u32' for page size and 'u8'
for pfmemalloc, we are able to avoid 3 or 5 bytes space waste.
And page address & pfmemalloc & order is unchanged for the
same page in the same 'page_frag_cache' instance, it makes
sense to fit them together.

After this patch, the size of 'struct page_frag_cache' should be
the same as the size of 'struct page_frag'.

CC: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
 include/linux/mm_types_task.h   | 16 +++++------
 include/linux/page_frag_cache.h | 49 +++++++++++++++++++++++++++++++--
 mm/page_frag_cache.c            | 49 +++++++++++++++------------------
 3 files changed, 77 insertions(+), 37 deletions(-)

Comments

Alexander Duyck July 21, 2024, 10:59 p.m. UTC | #1
On Fri, 2024-07-19 at 17:33 +0800, Yunsheng Lin wrote:
> Currently there is one 'struct page_frag' for every 'struct
> sock' and 'struct task_struct', we are about to replace the
> 'struct page_frag' with 'struct page_frag_cache' for them.
> Before begin the replacing, we need to ensure the size of
> 'struct page_frag_cache' is not bigger than the size of
> 'struct page_frag', as there may be tens of thousands of
> 'struct sock' and 'struct task_struct' instances in the
> system.
> 
> By or'ing the page order & pfmemalloc with lower bits of
> 'va' instead of using 'u16' or 'u32' for page size and 'u8'
> for pfmemalloc, we are able to avoid 3 or 5 bytes space waste.
> And page address & pfmemalloc & order is unchanged for the
> same page in the same 'page_frag_cache' instance, it makes
> sense to fit them together.
> 
> After this patch, the size of 'struct page_frag_cache' should be
> the same as the size of 'struct page_frag'.
> 
> CC: Alexander Duyck <alexander.duyck@gmail.com>
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> ---
>  include/linux/mm_types_task.h   | 16 +++++------
>  include/linux/page_frag_cache.h | 49 +++++++++++++++++++++++++++++++--
>  mm/page_frag_cache.c            | 49 +++++++++++++++------------------
>  3 files changed, 77 insertions(+), 37 deletions(-)
> 
> diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
> index b1c54b2b9308..f2610112a642 100644
> --- a/include/linux/mm_types_task.h
> +++ b/include/linux/mm_types_task.h
> @@ -50,18 +50,18 @@ struct page_frag {
>  #define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
>  #define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
>  struct page_frag_cache {
> -	void *va;
> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +	/* encoded_va consists of the virtual address, pfmemalloc bit and order
> +	 * of a page.
> +	 */
> +	unsigned long encoded_va;
> +
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
>  	__u16 remaining;
> -	__u16 size;
> +	__u16 pagecnt_bias;
>  #else
>  	__u32 remaining;
> +	__u32 pagecnt_bias;
>  #endif
> -	/* we maintain a pagecount bias, so that we dont dirty cache line
> -	 * containing page->_refcount every time we allocate a fragment.
> -	 */
> -	unsigned int		pagecnt_bias;
> -	bool pfmemalloc;
>  };
>  
>  /* Track pages that require TLB flushes */
> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
> index ef1572f11248..12a16f8e8ad0 100644
> --- a/include/linux/page_frag_cache.h
> +++ b/include/linux/page_frag_cache.h
> @@ -3,19 +3,64 @@
>  #ifndef _LINUX_PAGE_FRAG_CACHE_H
>  #define _LINUX_PAGE_FRAG_CACHE_H
>  
> +#include <linux/bits.h>
> +#include <linux/build_bug.h>
>  #include <linux/log2.h>
>  #include <linux/types.h>
>  #include <linux/mm_types_task.h>
>  #include <asm/page.h>
>  
> +#define PAGE_FRAG_CACHE_ORDER_MASK		GENMASK(7, 0)

I would pull the PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE check from below
and use it to wrap this mask definition. If we don't need order you
could define the mask as 0. With that you get the benefit of the
compiler being able to figure out we don't read things as any value
ANDed with 0 is 0.

Also a comment explaining why you want it to be a full byte here would
be useful. I am assuming this is for assembler optimization as the
shift operation is usually expecting a byte.

> +#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT		BIT(8)
> +#define PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT	8
> +
> +static inline unsigned long encode_aligned_va(void *va, unsigned int order,
> +					      bool pfmemalloc)
> +{
> +	BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK);
> +	BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT >= PAGE_SHIFT);
> +
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +	return (unsigned long)va | order |
> +		(pfmemalloc << PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT);
> +#else
> +	return (unsigned long)va |
> +		(pfmemalloc << PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT);
> +#endif

So with the mask trick I called out above you could just have (order &
PAGE_FRAG_CACHE_ORDER_MASK) be one of your inputs. If ORDER_MASK is 0
it should just strip the compiler will know it will turn out 0.

Also doing a shift on a bool is a risky action. What you might look at
doing instead would be something like a multiplication of a unsigned
long bit by a bool, or at least you need to recast pfmemalloc to
something other than a bool.

> +}
> +
> +static inline unsigned long encoded_page_order(unsigned long encoded_va)
> +{
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +	return encoded_va & PAGE_FRAG_CACHE_ORDER_MASK;
> +#else
> +	return 0;
> +#endif
> +}
> +

As mentioned above, if the mask takes care of it for us it should just
return 0 automatically and cut out this code without the #if/else
logic.

> +static inline bool encoded_page_pfmemalloc(unsigned long encoded_va)
> +{
> +	return encoded_va & PAGE_FRAG_CACHE_PFMEMALLOC_BIT;
> +}
> +

Technically you aren't returning a bool here, you are returning an
unsigned long. It would be best to wrap this in "!!()".

> +static inline void *encoded_page_address(unsigned long encoded_va)
> +{
> +	return (void *)(encoded_va & PAGE_MASK);
> +}
> +
>  static inline void page_frag_cache_init(struct page_frag_cache *nc)
>  {
> -	nc->va = NULL;
> +	nc->encoded_va = 0;
>  }
>  
>  static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
>  {
> -	return !!nc->pfmemalloc;
> +	return encoded_page_pfmemalloc(nc->encoded_va);
> +}
> +
> +static inline unsigned int page_frag_cache_page_size(unsigned long encoded_va)
> +{
> +	return PAGE_SIZE << encoded_page_order(encoded_va);
>  }
>  
>  void page_frag_cache_drain(struct page_frag_cache *nc);
> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
> index b12496f05c4a..7928e5d50711 100644
> --- a/mm/page_frag_cache.c
> +++ b/mm/page_frag_cache.c
> @@ -22,7 +22,7 @@
>  static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
>  					     gfp_t gfp_mask)
>  {
> -	unsigned int page_size = PAGE_FRAG_CACHE_MAX_SIZE;
> +	unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER;
>  	struct page *page = NULL;
>  	gfp_t gfp = gfp_mask;
>  
> @@ -35,28 +35,27 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
>  	if (unlikely(!page)) {
>  		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
>  		if (unlikely(!page)) {
> -			nc->va = NULL;
> +			nc->encoded_va = 0;
>  			return NULL;
>  		}
>  
> -		page_size = PAGE_SIZE;
> +		order = 0;
>  	}
>  
> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> -	nc->size = page_size;
> -#endif
> -	nc->va = page_address(page);
> +	nc->encoded_va = encode_aligned_va(page_address(page), order,
> +					   page_is_pfmemalloc(page));
>  
>  	return page;
>  }
>  
>  void page_frag_cache_drain(struct page_frag_cache *nc)
>  {
> -	if (!nc->va)
> +	if (!nc->encoded_va)
>  		return;
>  
> -	__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
> -	nc->va = NULL;
> +	__page_frag_cache_drain(virt_to_head_page((void *)nc->encoded_va),
> +				nc->pagecnt_bias);
> +	nc->encoded_va = 0;
>  }
>  EXPORT_SYMBOL(page_frag_cache_drain);
>  
> @@ -73,36 +72,30 @@ void *__page_frag_alloc_va_align(struct page_frag_cache *nc,
>  				 unsigned int fragsz, gfp_t gfp_mask,
>  				 unsigned int align_mask)
>  {
> -	unsigned int size = PAGE_SIZE;
> -	unsigned int remaining;
> +	unsigned long encoded_va = nc->encoded_va;
> +	unsigned int size, remaining;
>  	struct page *page;
>  
> -	if (unlikely(!nc->va)) {
> +	if (unlikely(!encoded_va)) {
>  refill:
>  		page = __page_frag_cache_refill(nc, gfp_mask);
>  		if (!page)
>  			return NULL;
>  
> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> -		/* if size can vary use size else just use PAGE_SIZE */
> -		size = nc->size;
> -#endif
> +		encoded_va = nc->encoded_va;
> +		size = page_frag_cache_page_size(encoded_va);
> +
>  		/* Even if we own the page, we do not use atomic_set().
>  		 * This would break get_page_unless_zero() users.
>  		 */
>  		page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
>  
>  		/* reset page count bias and remaining to start of new frag */
> -		nc->pfmemalloc = page_is_pfmemalloc(page);
>  		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
>  		nc->remaining = size;
>  	}
>  
> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> -	/* if size can vary use size else just use PAGE_SIZE */
> -	size = nc->size;
> -#endif
> -
> +	size = page_frag_cache_page_size(encoded_va);

As I think I mentioned in an earlier patch it would probably be better
to do this before the if statement above. That way you avoid
recomputing size when you allocate a new page. With any luck the
compiler will realize that this is essentially an "else" for the if
statement above. Either that or just make this an else for the
allocation block above.

>  	remaining = nc->remaining & align_mask;
>  	if (unlikely(remaining < fragsz)) {
>  		if (unlikely(fragsz > PAGE_SIZE)) {
> @@ -118,13 +111,15 @@ void *__page_frag_alloc_va_align(struct page_frag_cache *nc,
>  			return NULL;
>  		}
>  
> -		page = virt_to_page(nc->va);
> +		page = virt_to_page((void *)encoded_va);
>  
>  		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
>  			goto refill;
>  
> -		if (unlikely(nc->pfmemalloc)) {
> -			free_unref_page(page, compound_order(page));
> +		if (unlikely(encoded_page_pfmemalloc(encoded_va))) {
> +			VM_BUG_ON(compound_order(page) !=
> +				  encoded_page_order(encoded_va));
> +			free_unref_page(page, encoded_page_order(encoded_va));
>  			goto refill;
>  		}
>  
> @@ -141,7 +136,7 @@ void *__page_frag_alloc_va_align(struct page_frag_cache *nc,
>  	nc->pagecnt_bias--;
>  	nc->remaining = remaining - fragsz;
>  
> -	return nc->va + (size - remaining);
> +	return encoded_page_address(encoded_va) + (size - remaining);
>  }
>  EXPORT_SYMBOL(__page_frag_alloc_va_align);
>
diff mbox series

Patch

diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index b1c54b2b9308..f2610112a642 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -50,18 +50,18 @@  struct page_frag {
 #define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
 #define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
 struct page_frag_cache {
-	void *va;
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+	/* encoded_va consists of the virtual address, pfmemalloc bit and order
+	 * of a page.
+	 */
+	unsigned long encoded_va;
+
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
 	__u16 remaining;
-	__u16 size;
+	__u16 pagecnt_bias;
 #else
 	__u32 remaining;
+	__u32 pagecnt_bias;
 #endif
-	/* we maintain a pagecount bias, so that we dont dirty cache line
-	 * containing page->_refcount every time we allocate a fragment.
-	 */
-	unsigned int		pagecnt_bias;
-	bool pfmemalloc;
 };
 
 /* Track pages that require TLB flushes */
diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
index ef1572f11248..12a16f8e8ad0 100644
--- a/include/linux/page_frag_cache.h
+++ b/include/linux/page_frag_cache.h
@@ -3,19 +3,64 @@ 
 #ifndef _LINUX_PAGE_FRAG_CACHE_H
 #define _LINUX_PAGE_FRAG_CACHE_H
 
+#include <linux/bits.h>
+#include <linux/build_bug.h>
 #include <linux/log2.h>
 #include <linux/types.h>
 #include <linux/mm_types_task.h>
 #include <asm/page.h>
 
+#define PAGE_FRAG_CACHE_ORDER_MASK		GENMASK(7, 0)
+#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT		BIT(8)
+#define PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT	8
+
+static inline unsigned long encode_aligned_va(void *va, unsigned int order,
+					      bool pfmemalloc)
+{
+	BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK);
+	BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT >= PAGE_SHIFT);
+
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+	return (unsigned long)va | order |
+		(pfmemalloc << PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT);
+#else
+	return (unsigned long)va |
+		(pfmemalloc << PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT);
+#endif
+}
+
+static inline unsigned long encoded_page_order(unsigned long encoded_va)
+{
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+	return encoded_va & PAGE_FRAG_CACHE_ORDER_MASK;
+#else
+	return 0;
+#endif
+}
+
+static inline bool encoded_page_pfmemalloc(unsigned long encoded_va)
+{
+	return encoded_va & PAGE_FRAG_CACHE_PFMEMALLOC_BIT;
+}
+
+static inline void *encoded_page_address(unsigned long encoded_va)
+{
+	return (void *)(encoded_va & PAGE_MASK);
+}
+
 static inline void page_frag_cache_init(struct page_frag_cache *nc)
 {
-	nc->va = NULL;
+	nc->encoded_va = 0;
 }
 
 static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
 {
-	return !!nc->pfmemalloc;
+	return encoded_page_pfmemalloc(nc->encoded_va);
+}
+
+static inline unsigned int page_frag_cache_page_size(unsigned long encoded_va)
+{
+	return PAGE_SIZE << encoded_page_order(encoded_va);
 }
 
 void page_frag_cache_drain(struct page_frag_cache *nc);
diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
index b12496f05c4a..7928e5d50711 100644
--- a/mm/page_frag_cache.c
+++ b/mm/page_frag_cache.c
@@ -22,7 +22,7 @@ 
 static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
 					     gfp_t gfp_mask)
 {
-	unsigned int page_size = PAGE_FRAG_CACHE_MAX_SIZE;
+	unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER;
 	struct page *page = NULL;
 	gfp_t gfp = gfp_mask;
 
@@ -35,28 +35,27 @@  static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
 	if (unlikely(!page)) {
 		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
 		if (unlikely(!page)) {
-			nc->va = NULL;
+			nc->encoded_va = 0;
 			return NULL;
 		}
 
-		page_size = PAGE_SIZE;
+		order = 0;
 	}
 
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-	nc->size = page_size;
-#endif
-	nc->va = page_address(page);
+	nc->encoded_va = encode_aligned_va(page_address(page), order,
+					   page_is_pfmemalloc(page));
 
 	return page;
 }
 
 void page_frag_cache_drain(struct page_frag_cache *nc)
 {
-	if (!nc->va)
+	if (!nc->encoded_va)
 		return;
 
-	__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
-	nc->va = NULL;
+	__page_frag_cache_drain(virt_to_head_page((void *)nc->encoded_va),
+				nc->pagecnt_bias);
+	nc->encoded_va = 0;
 }
 EXPORT_SYMBOL(page_frag_cache_drain);
 
@@ -73,36 +72,30 @@  void *__page_frag_alloc_va_align(struct page_frag_cache *nc,
 				 unsigned int fragsz, gfp_t gfp_mask,
 				 unsigned int align_mask)
 {
-	unsigned int size = PAGE_SIZE;
-	unsigned int remaining;
+	unsigned long encoded_va = nc->encoded_va;
+	unsigned int size, remaining;
 	struct page *page;
 
-	if (unlikely(!nc->va)) {
+	if (unlikely(!encoded_va)) {
 refill:
 		page = __page_frag_cache_refill(nc, gfp_mask);
 		if (!page)
 			return NULL;
 
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-		/* if size can vary use size else just use PAGE_SIZE */
-		size = nc->size;
-#endif
+		encoded_va = nc->encoded_va;
+		size = page_frag_cache_page_size(encoded_va);
+
 		/* Even if we own the page, we do not use atomic_set().
 		 * This would break get_page_unless_zero() users.
 		 */
 		page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
 
 		/* reset page count bias and remaining to start of new frag */
-		nc->pfmemalloc = page_is_pfmemalloc(page);
 		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
 		nc->remaining = size;
 	}
 
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-	/* if size can vary use size else just use PAGE_SIZE */
-	size = nc->size;
-#endif
-
+	size = page_frag_cache_page_size(encoded_va);
 	remaining = nc->remaining & align_mask;
 	if (unlikely(remaining < fragsz)) {
 		if (unlikely(fragsz > PAGE_SIZE)) {
@@ -118,13 +111,15 @@  void *__page_frag_alloc_va_align(struct page_frag_cache *nc,
 			return NULL;
 		}
 
-		page = virt_to_page(nc->va);
+		page = virt_to_page((void *)encoded_va);
 
 		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
 			goto refill;
 
-		if (unlikely(nc->pfmemalloc)) {
-			free_unref_page(page, compound_order(page));
+		if (unlikely(encoded_page_pfmemalloc(encoded_va))) {
+			VM_BUG_ON(compound_order(page) !=
+				  encoded_page_order(encoded_va));
+			free_unref_page(page, encoded_page_order(encoded_va));
 			goto refill;
 		}
 
@@ -141,7 +136,7 @@  void *__page_frag_alloc_va_align(struct page_frag_cache *nc,
 	nc->pagecnt_bias--;
 	nc->remaining = remaining - fragsz;
 
-	return nc->va + (size - remaining);
+	return encoded_page_address(encoded_va) + (size - remaining);
 }
 EXPORT_SYMBOL(__page_frag_alloc_va_align);