diff mbox series

[RFC,v2,31/47] hugetlb: sort hstates in hugetlb_init_hstates

Message ID 20221021163703.3218176-32-jthoughton@google.com (mailing list archive)
State New
Headers show
Series hugetlb: introduce HugeTLB high-granularity mapping | expand

Commit Message

James Houghton Oct. 21, 2022, 4:36 p.m. UTC
When using HugeTLB high-granularity mapping, we need to go through the
supported hugepage sizes in decreasing order so that we pick the largest
size that works. Consider the case where we're faulting in a 1G hugepage
for the first time: we want hugetlb_fault/hugetlb_no_page to map it with
a PUD. By going through the sizes in decreasing order, we will find that
PUD_SIZE works before finding out that PMD_SIZE or PAGE_SIZE work too.

This commit also changes bootmem hugepages from storing hstate pointers
directly to storing the hstate sizes. The hstate pointers used for
boot-time-allocated hugepages become invalid after we sort the hstates.
`gather_bootmem_prealloc`, called after the hstates have been sorted,
now converts the size to the correct hstate.

Signed-off-by: James Houghton <jthoughton@google.com>
---
 include/linux/hugetlb.h |  2 +-
 mm/hugetlb.c            | 49 ++++++++++++++++++++++++++++++++---------
 2 files changed, 40 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d305742e9d44..e25f97cdd086 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -772,7 +772,7 @@  struct hstate {
 
 struct huge_bootmem_page {
 	struct list_head list;
-	struct hstate *hstate;
+	unsigned long hstate_sz;
 };
 
 int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bb0005d57cab..d6f07968156c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -34,6 +34,7 @@ 
 #include <linux/nospec.h>
 #include <linux/delayacct.h>
 #include <linux/memory.h>
+#include <linux/sort.h>
 
 #include <asm/page.h>
 #include <asm/pgalloc.h>
@@ -49,6 +50,10 @@ 
 
 int hugetlb_max_hstate __read_mostly;
 unsigned int default_hstate_idx;
+/*
+ * After hugetlb_init_hstates is called, hstates will be sorted from largest
+ * to smallest.
+ */
 struct hstate hstates[HUGE_MAX_HSTATE];
 
 #ifdef CONFIG_CMA
@@ -3189,7 +3194,7 @@  int __alloc_bootmem_huge_page(struct hstate *h, int nid)
 	/* Put them into a private list first because mem_map is not up yet */
 	INIT_LIST_HEAD(&m->list);
 	list_add(&m->list, &huge_boot_pages);
-	m->hstate = h;
+	m->hstate_sz = huge_page_size(h);
 	return 1;
 }
 
@@ -3203,7 +3208,7 @@  static void __init gather_bootmem_prealloc(void)
 
 	list_for_each_entry(m, &huge_boot_pages, list) {
 		struct page *page = virt_to_page(m);
-		struct hstate *h = m->hstate;
+		struct hstate *h = size_to_hstate(m->hstate_sz);
 
 		VM_BUG_ON(!hstate_is_gigantic(h));
 		WARN_ON(page_count(page) != 1);
@@ -3319,9 +3324,38 @@  static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 	kfree(node_alloc_noretry);
 }
 
+static int compare_hstates_decreasing(const void *a, const void *b)
+{
+	unsigned long sz_a = huge_page_size((const struct hstate *)a);
+	unsigned long sz_b = huge_page_size((const struct hstate *)b);
+
+	if (sz_a < sz_b)
+		return 1;
+	if (sz_a > sz_b)
+		return -1;
+	return 0;
+}
+
+static void sort_hstates(void)
+{
+	unsigned long default_hstate_sz = huge_page_size(&default_hstate);
+
+	/* Sort from largest to smallest. */
+	sort(hstates, hugetlb_max_hstate, sizeof(*hstates),
+	     compare_hstates_decreasing, NULL);
+
+	/*
+	 * We may have changed the location of the default hstate, so we need to
+	 * update it.
+	 */
+	default_hstate_idx = hstate_index(size_to_hstate(default_hstate_sz));
+}
+
 static void __init hugetlb_init_hstates(void)
 {
-	struct hstate *h, *h2;
+	struct hstate *h;
+
+	sort_hstates();
 
 	for_each_hstate(h) {
 		/* oversize hugepages were init'ed in early boot */
@@ -3340,13 +3374,8 @@  static void __init hugetlb_init_hstates(void)
 			continue;
 		if (hugetlb_cma_size && h->order <= HUGETLB_PAGE_ORDER)
 			continue;
-		for_each_hstate(h2) {
-			if (h2 == h)
-				continue;
-			if (h2->order < h->order &&
-			    h2->order > h->demote_order)
-				h->demote_order = h2->order;
-		}
+		if (h - 1 >= &hstates[0])
+			h->demote_order = huge_page_order(h - 1);
 	}
 }