diff mbox series

bootmem: Stop using page->index

Message ID 20240912185602.2342148-1-willy@infradead.org (mailing list archive)
State New
Headers show
Series bootmem: Stop using page->index | expand

Commit Message

Matthew Wilcox Sept. 12, 2024, 6:55 p.m. UTC
Encode the type into the bottom four bits of page->private and the
info into the remaining bits.  Also turn the bootmem type into a
named enum.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 arch/x86/mm/init_64.c        |  9 ++++-----
 include/linux/bootmem_info.h | 25 +++++++++++++++++--------
 mm/bootmem_info.c            | 11 ++++++-----
 mm/sparse.c                  |  8 ++++----
 4 files changed, 31 insertions(+), 22 deletions(-)

Comments

Yosry Ahmed Sept. 12, 2024, 7:04 p.m. UTC | #1
On Thu, Sep 12, 2024 at 11:56 AM Matthew Wilcox (Oracle)
<willy@infradead.org> wrote:
>
> Encode the type into the bottom four bits of page->private and the
> info into the remaining bits.  Also turn the bootmem type into a
> named enum.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  arch/x86/mm/init_64.c        |  9 ++++-----
>  include/linux/bootmem_info.h | 25 +++++++++++++++++--------
>  mm/bootmem_info.c            | 11 ++++++-----
>  mm/sparse.c                  |  8 ++++----
>  4 files changed, 31 insertions(+), 22 deletions(-)
>
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index ff253648706f..4d5fde324136 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -987,13 +987,12 @@ int arch_add_memory(int nid, u64 start, u64 size,
>
>  static void __meminit free_pagetable(struct page *page, int order)
>  {
> -       unsigned long magic;
> -       unsigned int nr_pages = 1 << order;
> -
>         /* bootmem page has reserved flag */
>         if (PageReserved(page)) {
> -               magic = page->index;
> -               if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
> +               enum bootmem_type type = bootmem_type(page);
> +               unsigned long nr_pages = 1 << order;
> +
> +               if (type == SECTION_INFO || type == MIX_SECTION_INFO) {
>                         while (nr_pages--)
>                                 put_page_bootmem(page++);
>                 } else
> diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h
> index cffa38a73618..e2fe5de93dcc 100644
> --- a/include/linux/bootmem_info.h
> +++ b/include/linux/bootmem_info.h
> @@ -6,11 +6,10 @@
>  #include <linux/kmemleak.h>
>
>  /*
> - * Types for free bootmem stored in page->lru.next. These have to be in
> - * some random range in unsigned long space for debugging purposes.
> + * Types for free bootmem stored in the low bits of page->private.
>   */
> -enum {
> -       MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12,
> +enum bootmem_type {
> +       MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 1,
>         SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE,
>         MIX_SECTION_INFO,
>         NODE_INFO,
> @@ -21,9 +20,19 @@ enum {
>  void __init register_page_bootmem_info_node(struct pglist_data *pgdat);
>
>  void get_page_bootmem(unsigned long info, struct page *page,
> -                     unsigned long type);
> +               enum bootmem_type type);
>  void put_page_bootmem(struct page *page);
>
> +static inline enum bootmem_type bootmem_type(const struct page *page)
> +{
> +       return (unsigned long)page->private & 0xf;
> +}
> +
> +static inline unsigned long bootmem_info(const struct page *page)
> +{
> +       return (unsigned long)page->private >> 4;
> +}

Would it be better to define the number of bits used for the type as a
macro and use it throughout (bootmem_type(), bootmem_info(),
get_page_bootmem())?.

We can also add a static assertion or build bug in case 4 bits is not
enough anymore (e.g. if someone changes the enum values).

> +
>  /*
>   * Any memory allocated via the memblock allocator and not via the
>   * buddy will be marked reserved already in the memmap. For those
> @@ -31,7 +40,7 @@ void put_page_bootmem(struct page *page);
>   */
>  static inline void free_bootmem_page(struct page *page)
>  {
> -       unsigned long magic = page->index;
> +       enum bootmem_type type = bootmem_type(page);
>
>         /*
>          * The reserve_bootmem_region sets the reserved flag on bootmem
> @@ -39,7 +48,7 @@ static inline void free_bootmem_page(struct page *page)
>          */
>         VM_BUG_ON_PAGE(page_ref_count(page) != 2, page);
>
> -       if (magic == SECTION_INFO || magic == MIX_SECTION_INFO)
> +       if (type == SECTION_INFO || type == MIX_SECTION_INFO)
>                 put_page_bootmem(page);
>         else
>                 VM_BUG_ON_PAGE(1, page);
> @@ -54,7 +63,7 @@ static inline void put_page_bootmem(struct page *page)
>  }
>
>  static inline void get_page_bootmem(unsigned long info, struct page *page,
> -                                   unsigned long type)
> +                                   enum bootmem_type type)
>  {
>  }
>
> diff --git a/mm/bootmem_info.c b/mm/bootmem_info.c
> index fa7cb0c87c03..95f288169a38 100644
> --- a/mm/bootmem_info.c
> +++ b/mm/bootmem_info.c
> @@ -14,23 +14,24 @@
>  #include <linux/memory_hotplug.h>
>  #include <linux/kmemleak.h>
>
> -void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
> +void get_page_bootmem(unsigned long info, struct page *page,
> +               enum bootmem_type type)
>  {
> -       page->index = type;
> +       BUG_ON(type > 0xf);
> +       BUG_ON(info > (ULONG_MAX >> 4));
>         SetPagePrivate(page);
> -       set_page_private(page, info);
> +       set_page_private(page, info << 4 | type);
>         page_ref_inc(page);
>  }
>
>  void put_page_bootmem(struct page *page)
>  {
> -       unsigned long type = page->index;
> +       enum bootmem_type type = bootmem_type(page);
>
>         BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
>                type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
>
>         if (page_ref_dec_return(page) == 1) {
> -               page->index = 0;
>                 ClearPagePrivate(page);
>                 set_page_private(page, 0);
>                 INIT_LIST_HEAD(&page->lru);
> diff --git a/mm/sparse.c b/mm/sparse.c
> index dc38539f8560..6ba5354cf2e1 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -720,19 +720,19 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
>  static void free_map_bootmem(struct page *memmap)
>  {
>         unsigned long maps_section_nr, removing_section_nr, i;
> -       unsigned long magic, nr_pages;
> +       unsigned long type, nr_pages;
>         struct page *page = virt_to_page(memmap);
>
>         nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
>                 >> PAGE_SHIFT;
>
>         for (i = 0; i < nr_pages; i++, page++) {
> -               magic = page->index;
> +               type = bootmem_type(page);
>
> -               BUG_ON(magic == NODE_INFO);
> +               BUG_ON(type == NODE_INFO);
>
>                 maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
> -               removing_section_nr = page_private(page);
> +               removing_section_nr = bootmem_info(page);
>
>                 /*
>                  * When this function is called, the removing section is
> --
> 2.43.0
>
>
Matthew Wilcox Sept. 12, 2024, 9:51 p.m. UTC | #2
On Thu, Sep 12, 2024 at 12:04:18PM -0700, Yosry Ahmed wrote:
> On Thu, Sep 12, 2024 at 11:56 AM Matthew Wilcox (Oracle)
> <willy@infradead.org> wrote:
> >
> > Encode the type into the bottom four bits of page->private and the
> > info into the remaining bits.  Also turn the bootmem type into a
> > named enum.

> > @@ -21,9 +20,19 @@ enum {
> >  void __init register_page_bootmem_info_node(struct pglist_data *pgdat);
> >
> >  void get_page_bootmem(unsigned long info, struct page *page,
> > -                     unsigned long type);
> > +               enum bootmem_type type);
> >  void put_page_bootmem(struct page *page);
> >
> > +static inline enum bootmem_type bootmem_type(const struct page *page)
> > +{
> > +       return (unsigned long)page->private & 0xf;
> > +}
> > +
> > +static inline unsigned long bootmem_info(const struct page *page)
> > +{
> > +       return (unsigned long)page->private >> 4;
> > +}
> 
> Would it be better to define the number of bits used for the type as a
> macro and use it throughout (bootmem_type(), bootmem_info(),
> get_page_bootmem())?.

I think this is an adequate abstraction already.
Yosry Ahmed Sept. 12, 2024, 9:56 p.m. UTC | #3
On Thu, Sep 12, 2024 at 2:51 PM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Thu, Sep 12, 2024 at 12:04:18PM -0700, Yosry Ahmed wrote:
> > On Thu, Sep 12, 2024 at 11:56 AM Matthew Wilcox (Oracle)
> > <willy@infradead.org> wrote:
> > >
> > > Encode the type into the bottom four bits of page->private and the
> > > info into the remaining bits.  Also turn the bootmem type into a
> > > named enum.
>
> > > @@ -21,9 +20,19 @@ enum {
> > >  void __init register_page_bootmem_info_node(struct pglist_data *pgdat);
> > >
> > >  void get_page_bootmem(unsigned long info, struct page *page,
> > > -                     unsigned long type);
> > > +               enum bootmem_type type);
> > >  void put_page_bootmem(struct page *page);
> > >
> > > +static inline enum bootmem_type bootmem_type(const struct page *page)
> > > +{
> > > +       return (unsigned long)page->private & 0xf;
> > > +}
> > > +
> > > +static inline unsigned long bootmem_info(const struct page *page)
> > > +{
> > > +       return (unsigned long)page->private >> 4;
> > > +}
> >
> > Would it be better to define the number of bits used for the type as a
> > macro and use it throughout (bootmem_type(), bootmem_info(),
> > get_page_bootmem())?.
>
> I think this is an adequate abstraction already.

It's not a big deal, but if we want to change the number of bits later
we change one macro definition instead of 5 lines of code across two
different files.
Matthew Wilcox Sept. 24, 2024, 4:51 p.m. UTC | #4
On Thu, Sep 12, 2024 at 02:56:44PM -0700, Yosry Ahmed wrote:
> On Thu, Sep 12, 2024 at 2:51 PM Matthew Wilcox <willy@infradead.org> wrote:
> >
> > On Thu, Sep 12, 2024 at 12:04:18PM -0700, Yosry Ahmed wrote:
> > > On Thu, Sep 12, 2024 at 11:56 AM Matthew Wilcox (Oracle)
> > > <willy@infradead.org> wrote:
> > > >
> > > > Encode the type into the bottom four bits of page->private and the
> > > > info into the remaining bits.  Also turn the bootmem type into a
> > > > named enum.
> >
> > > > @@ -21,9 +20,19 @@ enum {
> > > >  void __init register_page_bootmem_info_node(struct pglist_data *pgdat);
> > > >
> > > >  void get_page_bootmem(unsigned long info, struct page *page,
> > > > -                     unsigned long type);
> > > > +               enum bootmem_type type);
> > > >  void put_page_bootmem(struct page *page);
> > > >
> > > > +static inline enum bootmem_type bootmem_type(const struct page *page)
> > > > +{
> > > > +       return (unsigned long)page->private & 0xf;
> > > > +}
> > > > +
> > > > +static inline unsigned long bootmem_info(const struct page *page)
> > > > +{
> > > > +       return (unsigned long)page->private >> 4;
> > > > +}
> > >
> > > Would it be better to define the number of bits used for the type as a
> > > macro and use it throughout (bootmem_type(), bootmem_info(),
> > > get_page_bootmem())?.
> >
> > I think this is an adequate abstraction already.
> 
> It's not a big deal, but if we want to change the number of bits later
> we change one macro definition instead of 5 lines of code across two
> different files.

That doesn't sound like an obvious win to me.  Look, if you care, send
a patch.  What I have is definitely an improvement over what is there
now; you're just complaining that you think it could be even better,
and I disagree that what you want will be better than what I have.
The only reasonable approach is to merge my patch and then discuss
yours separately.
diff mbox series

Patch

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ff253648706f..4d5fde324136 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -987,13 +987,12 @@  int arch_add_memory(int nid, u64 start, u64 size,
 
 static void __meminit free_pagetable(struct page *page, int order)
 {
-	unsigned long magic;
-	unsigned int nr_pages = 1 << order;
-
 	/* bootmem page has reserved flag */
 	if (PageReserved(page)) {
-		magic = page->index;
-		if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+		enum bootmem_type type = bootmem_type(page);
+		unsigned long nr_pages = 1 << order;
+
+		if (type == SECTION_INFO || type == MIX_SECTION_INFO) {
 			while (nr_pages--)
 				put_page_bootmem(page++);
 		} else
diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h
index cffa38a73618..e2fe5de93dcc 100644
--- a/include/linux/bootmem_info.h
+++ b/include/linux/bootmem_info.h
@@ -6,11 +6,10 @@ 
 #include <linux/kmemleak.h>
 
 /*
- * Types for free bootmem stored in page->lru.next. These have to be in
- * some random range in unsigned long space for debugging purposes.
+ * Types for free bootmem stored in the low bits of page->private.
  */
-enum {
-	MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12,
+enum bootmem_type {
+	MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 1,
 	SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE,
 	MIX_SECTION_INFO,
 	NODE_INFO,
@@ -21,9 +20,19 @@  enum {
 void __init register_page_bootmem_info_node(struct pglist_data *pgdat);
 
 void get_page_bootmem(unsigned long info, struct page *page,
-		      unsigned long type);
+		enum bootmem_type type);
 void put_page_bootmem(struct page *page);
 
+static inline enum bootmem_type bootmem_type(const struct page *page)
+{
+	return (unsigned long)page->private & 0xf;
+}
+
+static inline unsigned long bootmem_info(const struct page *page)
+{
+	return (unsigned long)page->private >> 4;
+}
+
 /*
  * Any memory allocated via the memblock allocator and not via the
  * buddy will be marked reserved already in the memmap. For those
@@ -31,7 +40,7 @@  void put_page_bootmem(struct page *page);
  */
 static inline void free_bootmem_page(struct page *page)
 {
-	unsigned long magic = page->index;
+	enum bootmem_type type = bootmem_type(page);
 
 	/*
 	 * The reserve_bootmem_region sets the reserved flag on bootmem
@@ -39,7 +48,7 @@  static inline void free_bootmem_page(struct page *page)
 	 */
 	VM_BUG_ON_PAGE(page_ref_count(page) != 2, page);
 
-	if (magic == SECTION_INFO || magic == MIX_SECTION_INFO)
+	if (type == SECTION_INFO || type == MIX_SECTION_INFO)
 		put_page_bootmem(page);
 	else
 		VM_BUG_ON_PAGE(1, page);
@@ -54,7 +63,7 @@  static inline void put_page_bootmem(struct page *page)
 }
 
 static inline void get_page_bootmem(unsigned long info, struct page *page,
-				    unsigned long type)
+				    enum bootmem_type type)
 {
 }
 
diff --git a/mm/bootmem_info.c b/mm/bootmem_info.c
index fa7cb0c87c03..95f288169a38 100644
--- a/mm/bootmem_info.c
+++ b/mm/bootmem_info.c
@@ -14,23 +14,24 @@ 
 #include <linux/memory_hotplug.h>
 #include <linux/kmemleak.h>
 
-void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
+void get_page_bootmem(unsigned long info, struct page *page,
+		enum bootmem_type type)
 {
-	page->index = type;
+	BUG_ON(type > 0xf);
+	BUG_ON(info > (ULONG_MAX >> 4));
 	SetPagePrivate(page);
-	set_page_private(page, info);
+	set_page_private(page, info << 4 | type);
 	page_ref_inc(page);
 }
 
 void put_page_bootmem(struct page *page)
 {
-	unsigned long type = page->index;
+	enum bootmem_type type = bootmem_type(page);
 
 	BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
 	       type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
 
 	if (page_ref_dec_return(page) == 1) {
-		page->index = 0;
 		ClearPagePrivate(page);
 		set_page_private(page, 0);
 		INIT_LIST_HEAD(&page->lru);
diff --git a/mm/sparse.c b/mm/sparse.c
index dc38539f8560..6ba5354cf2e1 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -720,19 +720,19 @@  static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
 static void free_map_bootmem(struct page *memmap)
 {
 	unsigned long maps_section_nr, removing_section_nr, i;
-	unsigned long magic, nr_pages;
+	unsigned long type, nr_pages;
 	struct page *page = virt_to_page(memmap);
 
 	nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
 		>> PAGE_SHIFT;
 
 	for (i = 0; i < nr_pages; i++, page++) {
-		magic = page->index;
+		type = bootmem_type(page);
 
-		BUG_ON(magic == NODE_INFO);
+		BUG_ON(type == NODE_INFO);
 
 		maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
-		removing_section_nr = page_private(page);
+		removing_section_nr = bootmem_info(page);
 
 		/*
 		 * When this function is called, the removing section is