diff mbox series

[v10,10/11] mm/hugetlb: Gather discrete indexes of tail page

Message ID 20201217121303.13386-11-songmuchun@bytedance.com (mailing list archive)
State New, archived
Headers show
Series Free some vmemmap pages of HugeTLB page | expand

Commit Message

Muchun Song Dec. 17, 2020, 12:13 p.m. UTC
For HugeTLB page, there are more metadata to save in the struct page.
But the head struct page cannot meet our needs, so we have to abuse
other tail struct page to store the metadata. In order to avoid
conflicts caused by subsequent use of more tail struct pages, we can
gather these discrete indexes of tail struct page. In this case, it
will be easier to add a new tail page index later.

There are only (RESERVE_VMEMMAP_SIZE / sizeof(struct page)) struct
page structs that can be used when CONFIG_HUGETLB_PAGE_FREE_VMEMMAP,
so add a BUILD_BUG_ON to catch invalid usage of the tail struct page.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
---
 include/linux/hugetlb.h        | 13 +++++++++++++
 include/linux/hugetlb_cgroup.h | 15 +++++++++------
 mm/hugetlb.c                   | 16 ++++++++--------
 mm/hugetlb_vmemmap.c           |  8 ++++++++
 4 files changed, 38 insertions(+), 14 deletions(-)

Comments

Oscar Salvador Dec. 18, 2020, 9:06 a.m. UTC | #1
On Thu, Dec 17, 2020 at 08:13:02PM +0800, Muchun Song wrote:
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 6c02f49959fd..78dd88dda857 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1360,7 +1360,7 @@ static inline void hwpoison_subpage_deliver(struct hstate *h, struct page *head)
>  	if (!PageHWPoison(head) || !free_vmemmap_pages_per_hpage(h))
>  		return;
>  
> -	page = head + page_private(head + 4);
> +	page = head + page_private(head + SUBPAGE_INDEX_HWPOISON);
>  
>  	/*
>  	 * Move PageHWPoison flag from head page to the raw error page,
> @@ -1379,7 +1379,7 @@ static inline void hwpoison_subpage_set(struct hstate *h, struct page *head,
>  		return;
>  
>  	if (free_vmemmap_pages_per_hpage(h)) {
> -		set_page_private(head + 4, page - head);
> +		set_page_private(head + SUBPAGE_INDEX_HWPOISON, page - head);

Ok, I was too eager here.

If CONFIG_HUGETLB_PAGE_FREE_VMEMMAP is not set for whatever reason
(e.g: CONFIG_MEMORY_HOTREMOVE is disabled), when you convert "+4"
to its index (SUBPAGE_INDEX_HWPOISON), this will no longer build
since we only define SUBPAGE_INDEX_HWPOISON when the config
option CONFIG_HUGETLB_PAGE_FREE_VMEMMAP is set.

Different things can be done to fix this:

e.g:

 - Define a two different hwpoison_subpage_{deliver,set}
   and have them under
   #ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
   ...
   #else
   ...
   #endif

 - Work it around as is with IS_ENABLED(CONFIG_HUGETLB_...
 - Have a common entry and decide depending on whether
   the config is enabled.

I guess option #1 might be cleaner.
Muchun Song Dec. 18, 2020, 9:41 a.m. UTC | #2
On Fri, Dec 18, 2020 at 5:06 PM Oscar Salvador <osalvador@suse.de> wrote:
>
> On Thu, Dec 17, 2020 at 08:13:02PM +0800, Muchun Song wrote:
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index 6c02f49959fd..78dd88dda857 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -1360,7 +1360,7 @@ static inline void hwpoison_subpage_deliver(struct hstate *h, struct page *head)
> >       if (!PageHWPoison(head) || !free_vmemmap_pages_per_hpage(h))
> >               return;
> >
> > -     page = head + page_private(head + 4);
> > +     page = head + page_private(head + SUBPAGE_INDEX_HWPOISON);
> >
> >       /*
> >        * Move PageHWPoison flag from head page to the raw error page,
> > @@ -1379,7 +1379,7 @@ static inline void hwpoison_subpage_set(struct hstate *h, struct page *head,
> >               return;
> >
> >       if (free_vmemmap_pages_per_hpage(h)) {
> > -             set_page_private(head + 4, page - head);
> > +             set_page_private(head + SUBPAGE_INDEX_HWPOISON, page - head);
>
> Ok, I was too eager here.
>
> If CONFIG_HUGETLB_PAGE_FREE_VMEMMAP is not set for whatever reason
> (e.g: CONFIG_MEMORY_HOTREMOVE is disabled), when you convert "+4"
> to its index (SUBPAGE_INDEX_HWPOISON), this will no longer build
> since we only define SUBPAGE_INDEX_HWPOISON when the config
> option CONFIG_HUGETLB_PAGE_FREE_VMEMMAP is set.

Yeah, it is my mistake. Thanks for pointing that out.

>
> Different things can be done to fix this:
>
> e.g:
>
>  - Define a two different hwpoison_subpage_{deliver,set}
>    and have them under
>    #ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
>    ...
>    #else
>    ...
>    #endif
>
>  - Work it around as is with IS_ENABLED(CONFIG_HUGETLB_...
>  - Have a common entry and decide depending on whether
>    the config is enabled.
>
> I guess option #1 might be cleaner.

Thanks for your suggestion. I also prefer option #1.

>
> --
> Oscar Salvador
> SUSE L3
diff mbox series

Patch

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 66d82ae7b712..7295f6b3d55e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -28,6 +28,19 @@  typedef struct { unsigned long pd; } hugepd_t;
 #include <linux/shm.h>
 #include <asm/tlbflush.h>
 
+enum {
+	SUBPAGE_INDEX_ACTIVE = 1,	/* reuse page flags of PG_private */
+	SUBPAGE_INDEX_TEMPORARY,	/* reuse page->mapping */
+#ifdef CONFIG_CGROUP_HUGETLB
+	SUBPAGE_INDEX_CGROUP = SUBPAGE_INDEX_TEMPORARY,/* reuse page->private */
+	SUBPAGE_INDEX_CGROUP_RSVD,	/* reuse page->private */
+#endif
+#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
+	SUBPAGE_INDEX_HWPOISON,		/* reuse page->private */
+#endif
+	NR_USED_SUBPAGE,
+};
+
 struct hugepage_subpool {
 	spinlock_t lock;
 	long count;
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 2ad6e92f124a..3d3c1c49efe4 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -24,8 +24,9 @@  struct file_region;
 /*
  * Minimum page order trackable by hugetlb cgroup.
  * At least 4 pages are necessary for all the tracking information.
- * The second tail page (hpage[2]) is the fault usage cgroup.
- * The third tail page (hpage[3]) is the reservation usage cgroup.
+ * The second tail page (hpage[SUBPAGE_INDEX_CGROUP]) is the fault
+ * usage cgroup. The third tail page (hpage[SUBPAGE_INDEX_CGROUP_RSVD])
+ * is the reservation usage cgroup.
  */
 #define HUGETLB_CGROUP_MIN_ORDER	2
 
@@ -66,9 +67,9 @@  __hugetlb_cgroup_from_page(struct page *page, bool rsvd)
 	if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
 		return NULL;
 	if (rsvd)
-		return (struct hugetlb_cgroup *)page[3].private;
+		return (void *)page_private(page + SUBPAGE_INDEX_CGROUP_RSVD);
 	else
-		return (struct hugetlb_cgroup *)page[2].private;
+		return (void *)page_private(page + SUBPAGE_INDEX_CGROUP);
 }
 
 static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
@@ -90,9 +91,11 @@  static inline int __set_hugetlb_cgroup(struct page *page,
 	if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
 		return -1;
 	if (rsvd)
-		page[3].private = (unsigned long)h_cg;
+		set_page_private(page + SUBPAGE_INDEX_CGROUP_RSVD,
+				 (unsigned long)h_cg);
 	else
-		page[2].private = (unsigned long)h_cg;
+		set_page_private(page + SUBPAGE_INDEX_CGROUP,
+				 (unsigned long)h_cg);
 	return 0;
 }
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6c02f49959fd..78dd88dda857 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1360,7 +1360,7 @@  static inline void hwpoison_subpage_deliver(struct hstate *h, struct page *head)
 	if (!PageHWPoison(head) || !free_vmemmap_pages_per_hpage(h))
 		return;
 
-	page = head + page_private(head + 4);
+	page = head + page_private(head + SUBPAGE_INDEX_HWPOISON);
 
 	/*
 	 * Move PageHWPoison flag from head page to the raw error page,
@@ -1379,7 +1379,7 @@  static inline void hwpoison_subpage_set(struct hstate *h, struct page *head,
 		return;
 
 	if (free_vmemmap_pages_per_hpage(h)) {
-		set_page_private(head + 4, page - head);
+		set_page_private(head + SUBPAGE_INDEX_HWPOISON, page - head);
 	} else if (page != head) {
 		/*
 		 * Move PageHWPoison flag from head page to the raw error page,
@@ -1459,20 +1459,20 @@  struct hstate *size_to_hstate(unsigned long size)
 bool page_huge_active(struct page *page)
 {
 	VM_BUG_ON_PAGE(!PageHuge(page), page);
-	return PageHead(page) && PagePrivate(&page[1]);
+	return PageHead(page) && PagePrivate(&page[SUBPAGE_INDEX_ACTIVE]);
 }
 
 /* never called for tail page */
 static void set_page_huge_active(struct page *page)
 {
 	VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
-	SetPagePrivate(&page[1]);
+	SetPagePrivate(&page[SUBPAGE_INDEX_ACTIVE]);
 }
 
 static void clear_page_huge_active(struct page *page)
 {
 	VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
-	ClearPagePrivate(&page[1]);
+	ClearPagePrivate(&page[SUBPAGE_INDEX_ACTIVE]);
 }
 
 /*
@@ -1484,17 +1484,17 @@  static inline bool PageHugeTemporary(struct page *page)
 	if (!PageHuge(page))
 		return false;
 
-	return (unsigned long)page[2].mapping == -1U;
+	return (unsigned long)page[SUBPAGE_INDEX_TEMPORARY].mapping == -1U;
 }
 
 static inline void SetPageHugeTemporary(struct page *page)
 {
-	page[2].mapping = (void *)-1U;
+	page[SUBPAGE_INDEX_TEMPORARY].mapping = (void *)-1U;
 }
 
 static inline void ClearPageHugeTemporary(struct page *page)
 {
-	page[2].mapping = NULL;
+	page[SUBPAGE_INDEX_TEMPORARY].mapping = NULL;
 }
 
 static void __free_huge_page(struct page *page)
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 3ebfe1706c77..ad123b760245 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -234,6 +234,14 @@  void __init hugetlb_vmemmap_init(struct hstate *h)
 	unsigned int nr_pages = pages_per_huge_page(h);
 	unsigned int vmemmap_pages;
 
+	/*
+	 * There are only (RESERVE_VMEMMAP_SIZE / sizeof(struct page)) struct
+	 * page structs that can be used when CONFIG_HUGETLB_PAGE_FREE_VMEMMAP,
+	 * so add a BUILD_BUG_ON to catch invalid usage of the tail struct page.
+	 */
+	BUILD_BUG_ON(NR_USED_SUBPAGE >=
+		     RESERVE_VMEMMAP_SIZE / sizeof(struct page));
+
 	if (!hugetlb_free_vmemmap_enabled)
 		return;