diff mbox series

[net-next,v5,1/5] mm: add a signature in struct page

Message ID 20210513165846.23722-2-mcroce@linux.microsoft.com (mailing list archive)
State New, archived
Headers show
Series page_pool: recycle buffers | expand

Commit Message

Matteo Croce May 13, 2021, 4:58 p.m. UTC
From: Matteo Croce <mcroce@microsoft.com>

This is needed by the page_pool to avoid recycling a page not allocated
via page_pool.

The page->signature field is aliased to page->lru.next and
page->compound_head, but it can't be set by mistake because the
signature value is a bad pointer, and can't trigger a false positive
in PageTail() because the last bit is 0.

Co-developed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
---
 include/linux/mm.h       | 12 +++++++-----
 include/linux/mm_types.h | 12 ++++++++++++
 include/net/page_pool.h  |  2 ++
 net/core/page_pool.c     |  4 ++++
 4 files changed, 25 insertions(+), 5 deletions(-)

Comments

Matthew Wilcox May 14, 2021, 1 a.m. UTC | #1
On Thu, May 13, 2021 at 06:58:42PM +0200, Matteo Croce wrote:
>  		struct {	/* page_pool used by netstack */
> +			/**
> +			 * @pp_magic: magic value to avoid recycling non
> +			 * page_pool allocated pages.
> +			 * It aliases with page->lru.next

I'm not really keen on documenting what aliases with what.
pp_magic also aliases with compound_head, 'next' (for slab),
and dev_pagemap.  This is an O(n^2) documentation problem ...

I feel like I want to document the pfmemalloc bit in mm_types.h,
but I don't have a concrete suggestion yet.

> +++ b/include/net/page_pool.h
> @@ -63,6 +63,8 @@
>   */
>  #define PP_ALLOC_CACHE_SIZE	128
>  #define PP_ALLOC_CACHE_REFILL	64
> +#define PP_SIGNATURE		(POISON_POINTER_DELTA + 0x40)

I wonder if this wouldn't be better in linux/poison.h?
Matteo Croce May 14, 2021, 1:34 a.m. UTC | #2
On Fri, May 14, 2021 at 3:01 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Thu, May 13, 2021 at 06:58:42PM +0200, Matteo Croce wrote:
> >               struct {        /* page_pool used by netstack */
> > +                     /**
> > +                      * @pp_magic: magic value to avoid recycling non
> > +                      * page_pool allocated pages.
> > +                      * It aliases with page->lru.next
>
> I'm not really keen on documenting what aliases with what.
> pp_magic also aliases with compound_head, 'next' (for slab),
> and dev_pagemap.  This is an O(n^2) documentation problem ...
>

Eric asked to document what page->signature aliases, so I did it in
the commit message and in a comment.
I can drop the code comment and leave it just the commit message.

> I feel like I want to document the pfmemalloc bit in mm_types.h,
> but I don't have a concrete suggestion yet.
>
> > +++ b/include/net/page_pool.h
> > @@ -63,6 +63,8 @@
> >   */
> >  #define PP_ALLOC_CACHE_SIZE  128
> >  #define PP_ALLOC_CACHE_REFILL        64
> > +#define PP_SIGNATURE         (POISON_POINTER_DELTA + 0x40)
>
> I wonder if this wouldn't be better in linux/poison.h?
>

I was thinking the same, I'll do it in the v6.

Regards,
Matteo Croce May 18, 2021, 3:44 p.m. UTC | #3
On Fri, May 14, 2021 at 3:01 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> I feel like I want to document the pfmemalloc bit in mm_types.h,
> but I don't have a concrete suggestion yet.
>

Maybe simply:

/* Bit zero is set
 * Bit one if pfmemalloc page
 */
 unsigned long compound_head;

Regards,
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 322ec61d0da7..48268d2d0282 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1668,10 +1668,12 @@  struct address_space *page_mapping(struct page *page);
 static inline bool page_is_pfmemalloc(const struct page *page)
 {
 	/*
-	 * Page index cannot be this large so this must be
-	 * a pfmemalloc page.
+	 * This is not a tail page; compound_head of a head page is unused
+	 * at return from the page allocator, and will be overwritten
+	 * by callers who do not care whether the page came from the
+	 * reserves.
 	 */
-	return page->index == -1UL;
+	return page->compound_head & 2;
 }
 
 /*
@@ -1680,12 +1682,12 @@  static inline bool page_is_pfmemalloc(const struct page *page)
  */
 static inline void set_page_pfmemalloc(struct page *page)
 {
-	page->index = -1UL;
+	page->compound_head = 2;
 }
 
 static inline void clear_page_pfmemalloc(struct page *page)
 {
-	page->index = 0;
+	page->compound_head = 0;
 }
 
 /*
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5aacc1c10a45..44cf328e94e2 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -96,6 +96,18 @@  struct page {
 			unsigned long private;
 		};
 		struct {	/* page_pool used by netstack */
+			/**
+			 * @pp_magic: magic value to avoid recycling non
+			 * page_pool allocated pages.
+			 * It aliases with page->lru.next
+			 */
+			unsigned long pp_magic;
+			/**
+			 * @pp: pointer to page_pool.
+			 * It aliases with page->lru.prev
+			 */
+			struct page_pool *pp;
+			unsigned long _pp_mapping_pad;
 			/**
 			 * @dma_addr: might require a 64-bit value on
 			 * 32-bit architectures.
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index b4b6de909c93..24b3d42c62c0 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -63,6 +63,8 @@ 
  */
 #define PP_ALLOC_CACHE_SIZE	128
 #define PP_ALLOC_CACHE_REFILL	64
+#define PP_SIGNATURE		(POISON_POINTER_DELTA + 0x40)
+
 struct pp_alloc_cache {
 	u32 count;
 	struct page *cache[PP_ALLOC_CACHE_SIZE];
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 3c4c4c7a0402..9de5d8c08c17 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -221,6 +221,8 @@  static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
 		return NULL;
 	}
 
+	page->pp_magic = PP_SIGNATURE;
+
 	/* Track how many pages are held 'in-flight' */
 	pool->pages_state_hold_cnt++;
 	trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
@@ -341,6 +343,8 @@  void page_pool_release_page(struct page_pool *pool, struct page *page)
 			     DMA_ATTR_SKIP_CPU_SYNC);
 	page_pool_set_dma_addr(page, 0);
 skip_dma_unmap:
+	page->pp_magic = 0;
+
 	/* This may be the last page returned, releasing the pool, so
 	 * it is not safe to reference pool afterwards.
 	 */