diff mbox series

[net-next,v2,10/15] mm: page_frag: reuse existing bit field of 'va' for pagecnt_bias

Message ID 20240415131941.51153-11-linyunsheng@huawei.com (mailing list archive)
State New
Headers show
Series [net-next,v2,01/15] mm: page_frag: add a test module for page_frag | expand

Commit Message

Yunsheng Lin April 15, 2024, 1:19 p.m. UTC
As alignment of 'va' is always aligned with the order of the
page allocated, we can reuse the LSB bits for the pagecount
bias, and remove the orginal space needed by 'pagecnt_bias'.
Also limit the 'fragsz' to be at least the size of
'usigned int' to match the limited pagecnt_bias.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
 include/linux/page_frag_cache.h | 20 +++++++----
 mm/page_frag_cache.c            | 63 +++++++++++++++++++--------------
 2 files changed, 50 insertions(+), 33 deletions(-)

Comments

Alexander Duyck April 16, 2024, 4:33 p.m. UTC | #1
On Mon, 2024-04-15 at 21:19 +0800, Yunsheng Lin wrote:
> As alignment of 'va' is always aligned with the order of the
> page allocated, we can reuse the LSB bits for the pagecount
> bias, and remove the orginal space needed by 'pagecnt_bias'.
> Also limit the 'fragsz' to be at least the size of
> 'usigned int' to match the limited pagecnt_bias.
> 
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>

What is the point of this? You are trading off space for size on a data
structure that is only something like 24B in size and only allocated a
few times.

> ---
>  include/linux/page_frag_cache.h | 20 +++++++----
>  mm/page_frag_cache.c            | 63 +++++++++++++++++++--------------
>  2 files changed, 50 insertions(+), 33 deletions(-)
> 
> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
> index 40a7d6da9ef0..a97a1ac017d6 100644
> --- a/include/linux/page_frag_cache.h
> +++ b/include/linux/page_frag_cache.h
> @@ -9,7 +9,18 @@
>  #define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
>  
>  struct page_frag_cache {
> -	void *va;
> +	union {
> +		void *va;
> +		/* we maintain a pagecount bias, so that we dont dirty cache
> +		 * line containing page->_refcount every time we allocate a
> +		 * fragment. As 'va' is always aligned with the order of the
> +		 * page allocated, we can reuse the LSB bits for the pagecount
> +		 * bias, and its bit width happens to be indicated by the
> +		 * 'size_mask' below.
> +		 */
> +		unsigned long pagecnt_bias;
> +
> +	};

Both va and pagecnt_bias are frequently accessed items. If pagecnt_bias
somehow ends up exceeding the alignment of the page we run the risk of
corrupting data or creating an page fault.

In my opinion this is not worth the risk especially since with the
previous change your new change results in 0 size savings on 64b
systems as the structure will be aligned to the size of the pointer.

>  #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>  	__u16 offset;
>  	__u16 size_mask:15;
> @@ -18,10 +29,6 @@ struct page_frag_cache {
>  	__u32 offset:31;
>  	__u32 pfmemalloc:1;
>  #endif
> -	/* we maintain a pagecount bias, so that we dont dirty cache line
> -	 * containing page->_refcount every time we allocate a fragment.
> -	 */
> -	unsigned int		pagecnt_bias;
>  };
>  
>  static inline void page_frag_cache_init(struct page_frag_cache *nc)
> @@ -56,7 +63,8 @@ static inline void *page_frag_alloc_va_align(struct page_frag_cache *nc,
>  					     gfp_t gfp_mask,
>  					     unsigned int align)
>  {
> -	WARN_ON_ONCE(!is_power_of_2(align) || align >= PAGE_SIZE);
> +	WARN_ON_ONCE(!is_power_of_2(align) || align >= PAGE_SIZE ||
> +		     fragsz < sizeof(unsigned int));

What is the reason for this change? Seems like it is to account for an
issue somewhere.

>  
>  	return __page_frag_alloc_va_align(nc, fragsz, gfp_mask, align);
>  }
> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
> index 8d93029116e1..5f7f96c88163 100644
> --- a/mm/page_frag_cache.c
> +++ b/mm/page_frag_cache.c
> @@ -18,8 +18,8 @@
>  #include <linux/page_frag_cache.h>
>  #include "internal.h"
>  
> -static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
> -					     gfp_t gfp_mask)
> +static bool __page_frag_cache_refill(struct page_frag_cache *nc,
> +				     gfp_t gfp_mask)
>  {
>  	struct page *page = NULL;
>  	gfp_t gfp = gfp_mask;
> @@ -38,9 +38,26 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
>  	if (unlikely(!page))
>  		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
>  
> -	nc->va = page ? page_address(page) : NULL;
> +	if (unlikely(!page)) {
> +		nc->va = NULL;
> +		return false;
> +	}
> +
> +	nc->va = page_address(page);
>  
> -	return page;
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +	VM_BUG_ON(nc->pagecnt_bias & nc->size_mask);
> +	page_ref_add(page, nc->size_mask - 1);
> +	nc->pagecnt_bias |= nc->size_mask;
> +#else
> +	VM_BUG_ON(nc->pagecnt_bias & (PAGE_SIZE - 1));
> +	page_ref_add(page, PAGE_SIZE - 2);
> +	nc->pagecnt_bias |= (PAGE_SIZE - 1);
> +#endif
> +
> +	nc->pfmemalloc = page_is_pfmemalloc(page);
> +	nc->offset = 0;
> +	return true;
>  }
>  
>  void page_frag_cache_drain(struct page_frag_cache *nc)
> @@ -65,38 +82,31 @@ EXPORT_SYMBOL(__page_frag_cache_drain);
>  void *page_frag_alloc_va(struct page_frag_cache *nc, unsigned int fragsz,
>  			 gfp_t gfp_mask)
>  {
> -	unsigned int size, offset;
> +	unsigned long size_mask;
> +	unsigned int offset;
>  	struct page *page;
> +	void *va;
>  
>  	if (unlikely(!nc->va)) {
>  refill:
> -		page = __page_frag_cache_refill(nc, gfp_mask);
> -		if (!page)
> +		if (!__page_frag_cache_refill(nc, gfp_mask))
>  			return NULL;
> -
> -		/* Even if we own the page, we do not use atomic_set().
> -		 * This would break get_page_unless_zero() users.
> -		 */
> -		page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
> -
> -		/* reset page count bias and offset to start of new frag */
> -		nc->pfmemalloc = page_is_pfmemalloc(page);
> -		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
> -		nc->offset = 0;
>  	}
>  
>  #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>  	/* if size can vary use size else just use PAGE_SIZE */
> -	size = nc->size_mask + 1;
> +	size_mask = nc->size_mask;
>  #else
> -	size = PAGE_SIZE;
> +	size_mask = PAGE_SIZE - 1;
>  #endif
>  
> +	va = (void *)((unsigned long)nc->va & ~size_mask);
>  	offset = nc->offset;
> -	if (unlikely(offset + fragsz > size)) {
> -		page = virt_to_page(nc->va);
>  
> -		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
> +	if (unlikely(offset + fragsz > (size_mask + 1))) {
> +		page = virt_to_page(va);
> +
> +		if (!page_ref_sub_and_test(page, nc->pagecnt_bias & size_mask))
>  			goto refill;
>  
>  		if (unlikely(nc->pfmemalloc)) {
> @@ -105,12 +115,11 @@ void *page_frag_alloc_va(struct page_frag_cache *nc, unsigned int fragsz,
>  		}
>  
>  		/* OK, page count is 0, we can safely set it */
> -		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
> +		set_page_count(page, size_mask);
> +		nc->pagecnt_bias |= size_mask;
>  
> -		/* reset page count bias and offset to start of new frag */
> -		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
>  		offset = 0;
> -		if (unlikely(fragsz > size)) {
> +		if (unlikely(fragsz > (size_mask + 1))) {
>  			/*
>  			 * The caller is trying to allocate a fragment
>  			 * with fragsz > PAGE_SIZE but the cache isn't big
> @@ -127,7 +136,7 @@ void *page_frag_alloc_va(struct page_frag_cache *nc, unsigned int fragsz,
>  	nc->pagecnt_bias--;
>  	nc->offset = offset + fragsz;
>  
> -	return nc->va + offset;
> +	return va + offset;
>  }
>  EXPORT_SYMBOL(page_frag_alloc_va);
>  

The rest of this seems like unnecessary obfuscation and change.
Basically it is adding more overhead to page allocation for no reward.
Yunsheng Lin April 17, 2024, 1:23 p.m. UTC | #2
On 2024/4/17 0:33, Alexander H Duyck wrote:
> On Mon, 2024-04-15 at 21:19 +0800, Yunsheng Lin wrote:
>> As alignment of 'va' is always aligned with the order of the
>> page allocated, we can reuse the LSB bits for the pagecount
>> bias, and remove the orginal space needed by 'pagecnt_bias'.
>> Also limit the 'fragsz' to be at least the size of
>> 'usigned int' to match the limited pagecnt_bias.
>>
>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> 
> What is the point of this? You are trading off space for size on a data
> structure that is only something like 24B in size and only allocated a
> few times.

As we are going to replace page_frag with page_frag_cache in patch 13,
it is not going to only be allocated a few times as mentioned.

> 
>> ---
>>  include/linux/page_frag_cache.h | 20 +++++++----
>>  mm/page_frag_cache.c            | 63 +++++++++++++++++++--------------
>>  2 files changed, 50 insertions(+), 33 deletions(-)
>>
>> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
>> index 40a7d6da9ef0..a97a1ac017d6 100644
>> --- a/include/linux/page_frag_cache.h
>> +++ b/include/linux/page_frag_cache.h
>> @@ -9,7 +9,18 @@
>>  #define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
>>  
>>  struct page_frag_cache {
>> -	void *va;
>> +	union {
>> +		void *va;
>> +		/* we maintain a pagecount bias, so that we dont dirty cache
>> +		 * line containing page->_refcount every time we allocate a
>> +		 * fragment. As 'va' is always aligned with the order of the
>> +		 * page allocated, we can reuse the LSB bits for the pagecount
>> +		 * bias, and its bit width happens to be indicated by the
>> +		 * 'size_mask' below.
>> +		 */
>> +		unsigned long pagecnt_bias;
>> +
>> +	};
> 
> Both va and pagecnt_bias are frequently accessed items. If pagecnt_bias
> somehow ends up exceeding the alignment of the page we run the risk of
> corrupting data or creating an page fault.
> 
> In my opinion this is not worth the risk especially since with the
> previous change your new change results in 0 size savings on 64b
> systems as the structure will be aligned to the size of the pointer.

But aren't we going to avoid a register usage and loading if reusing
the lower bits of 'va' for the 64b systems? And added benefit is the
memory saving for 32b systems as mentioned in previous patch.

> 
>>  #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>>  	__u16 offset;
>>  	__u16 size_mask:15;
>> @@ -18,10 +29,6 @@ struct page_frag_cache {
>>  	__u32 offset:31;
>>  	__u32 pfmemalloc:1;
>>  #endif
>> -	/* we maintain a pagecount bias, so that we dont dirty cache line
>> -	 * containing page->_refcount every time we allocate a fragment.
>> -	 */
>> -	unsigned int		pagecnt_bias;
>>  };
>>  
>>  static inline void page_frag_cache_init(struct page_frag_cache *nc)
>> @@ -56,7 +63,8 @@ static inline void *page_frag_alloc_va_align(struct page_frag_cache *nc,
>>  					     gfp_t gfp_mask,
>>  					     unsigned int align)
>>  {
>> -	WARN_ON_ONCE(!is_power_of_2(align) || align >= PAGE_SIZE);
>> +	WARN_ON_ONCE(!is_power_of_2(align) || align >= PAGE_SIZE ||
>> +		     fragsz < sizeof(unsigned int));
> 
> What is the reason for this change? Seems like it is to account for an
> issue somewhere.

If the fragsz is one, we might not have enough pagecnt_bias for it,
as we are using the lower bits of 'va' now.

> 
>>  
>>  	return __page_frag_alloc_va_align(nc, fragsz, gfp_mask, align);
diff mbox series

Patch

diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
index 40a7d6da9ef0..a97a1ac017d6 100644
--- a/include/linux/page_frag_cache.h
+++ b/include/linux/page_frag_cache.h
@@ -9,7 +9,18 @@ 
 #define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
 
 struct page_frag_cache {
-	void *va;
+	union {
+		void *va;
+		/* we maintain a pagecount bias, so that we dont dirty cache
+		 * line containing page->_refcount every time we allocate a
+		 * fragment. As 'va' is always aligned with the order of the
+		 * page allocated, we can reuse the LSB bits for the pagecount
+		 * bias, and its bit width happens to be indicated by the
+		 * 'size_mask' below.
+		 */
+		unsigned long pagecnt_bias;
+
+	};
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
 	__u16 offset;
 	__u16 size_mask:15;
@@ -18,10 +29,6 @@  struct page_frag_cache {
 	__u32 offset:31;
 	__u32 pfmemalloc:1;
 #endif
-	/* we maintain a pagecount bias, so that we dont dirty cache line
-	 * containing page->_refcount every time we allocate a fragment.
-	 */
-	unsigned int		pagecnt_bias;
 };
 
 static inline void page_frag_cache_init(struct page_frag_cache *nc)
@@ -56,7 +63,8 @@  static inline void *page_frag_alloc_va_align(struct page_frag_cache *nc,
 					     gfp_t gfp_mask,
 					     unsigned int align)
 {
-	WARN_ON_ONCE(!is_power_of_2(align) || align >= PAGE_SIZE);
+	WARN_ON_ONCE(!is_power_of_2(align) || align >= PAGE_SIZE ||
+		     fragsz < sizeof(unsigned int));
 
 	return __page_frag_alloc_va_align(nc, fragsz, gfp_mask, align);
 }
diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
index 8d93029116e1..5f7f96c88163 100644
--- a/mm/page_frag_cache.c
+++ b/mm/page_frag_cache.c
@@ -18,8 +18,8 @@ 
 #include <linux/page_frag_cache.h>
 #include "internal.h"
 
-static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
-					     gfp_t gfp_mask)
+static bool __page_frag_cache_refill(struct page_frag_cache *nc,
+				     gfp_t gfp_mask)
 {
 	struct page *page = NULL;
 	gfp_t gfp = gfp_mask;
@@ -38,9 +38,26 @@  static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
 	if (unlikely(!page))
 		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
 
-	nc->va = page ? page_address(page) : NULL;
+	if (unlikely(!page)) {
+		nc->va = NULL;
+		return false;
+	}
+
+	nc->va = page_address(page);
 
-	return page;
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+	VM_BUG_ON(nc->pagecnt_bias & nc->size_mask);
+	page_ref_add(page, nc->size_mask - 1);
+	nc->pagecnt_bias |= nc->size_mask;
+#else
+	VM_BUG_ON(nc->pagecnt_bias & (PAGE_SIZE - 1));
+	page_ref_add(page, PAGE_SIZE - 2);
+	nc->pagecnt_bias |= (PAGE_SIZE - 1);
+#endif
+
+	nc->pfmemalloc = page_is_pfmemalloc(page);
+	nc->offset = 0;
+	return true;
 }
 
 void page_frag_cache_drain(struct page_frag_cache *nc)
@@ -65,38 +82,31 @@  EXPORT_SYMBOL(__page_frag_cache_drain);
 void *page_frag_alloc_va(struct page_frag_cache *nc, unsigned int fragsz,
 			 gfp_t gfp_mask)
 {
-	unsigned int size, offset;
+	unsigned long size_mask;
+	unsigned int offset;
 	struct page *page;
+	void *va;
 
 	if (unlikely(!nc->va)) {
 refill:
-		page = __page_frag_cache_refill(nc, gfp_mask);
-		if (!page)
+		if (!__page_frag_cache_refill(nc, gfp_mask))
 			return NULL;
-
-		/* Even if we own the page, we do not use atomic_set().
-		 * This would break get_page_unless_zero() users.
-		 */
-		page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
-
-		/* reset page count bias and offset to start of new frag */
-		nc->pfmemalloc = page_is_pfmemalloc(page);
-		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
-		nc->offset = 0;
 	}
 
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
 	/* if size can vary use size else just use PAGE_SIZE */
-	size = nc->size_mask + 1;
+	size_mask = nc->size_mask;
 #else
-	size = PAGE_SIZE;
+	size_mask = PAGE_SIZE - 1;
 #endif
 
+	va = (void *)((unsigned long)nc->va & ~size_mask);
 	offset = nc->offset;
-	if (unlikely(offset + fragsz > size)) {
-		page = virt_to_page(nc->va);
 
-		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
+	if (unlikely(offset + fragsz > (size_mask + 1))) {
+		page = virt_to_page(va);
+
+		if (!page_ref_sub_and_test(page, nc->pagecnt_bias & size_mask))
 			goto refill;
 
 		if (unlikely(nc->pfmemalloc)) {
@@ -105,12 +115,11 @@  void *page_frag_alloc_va(struct page_frag_cache *nc, unsigned int fragsz,
 		}
 
 		/* OK, page count is 0, we can safely set it */
-		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
+		set_page_count(page, size_mask);
+		nc->pagecnt_bias |= size_mask;
 
-		/* reset page count bias and offset to start of new frag */
-		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
 		offset = 0;
-		if (unlikely(fragsz > size)) {
+		if (unlikely(fragsz > (size_mask + 1))) {
 			/*
 			 * The caller is trying to allocate a fragment
 			 * with fragsz > PAGE_SIZE but the cache isn't big
@@ -127,7 +136,7 @@  void *page_frag_alloc_va(struct page_frag_cache *nc, unsigned int fragsz,
 	nc->pagecnt_bias--;
 	nc->offset = offset + fragsz;
 
-	return nc->va + offset;
+	return va + offset;
 }
 EXPORT_SYMBOL(page_frag_alloc_va);