diff mbox series

[v3,11/14] mm/mempolicy: huge-page allocation for many preferred

Message ID 1614766858-90344-12-git-send-email-feng.tang@intel.com (mailing list archive)
State New
Headers show
Series Introduced multi-preference mempolicy | expand

Commit Message

Feng Tang March 3, 2021, 10:20 a.m. UTC
From: Ben Widawsky <ben.widawsky@intel.com>

Implement the missing huge page allocation functionality while obeying
the preferred node semantics.

This uses a fallback mechanism to try multiple preferred nodes first,
and then all other nodes. It cannot use the helper function that was
introduced because huge page allocation already has its own helpers and
it was more LOC, and effort to try to consolidate that.

The weirdness is MPOL_PREFERRED_MANY can't be called yet because it is
part of the UAPI we haven't yet exposed. Instead of make that define
global, it's simply changed with the UAPI patch.

v3: add __GFP_NOWARN for first try of prefer_many allocation (Feng)

Link: https://lore.kernel.org/r/20200630212517.308045-12-ben.widawsky@intel.com
Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
Signed-off-by: Feng Tang <feng.tang@intel.com>
---
 mm/hugetlb.c   | 22 +++++++++++++++++++---
 mm/mempolicy.c |  3 ++-
 2 files changed, 21 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4bdb58a..c7c9ef3 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1110,7 +1110,7 @@  static struct page *dequeue_huge_page_vma(struct hstate *h,
 				unsigned long address, int avoid_reserve,
 				long chg)
 {
-	struct page *page;
+	struct page *page = NULL;
 	struct mempolicy *mpol;
 	gfp_t gfp_mask;
 	nodemask_t *nodemask;
@@ -1131,7 +1131,15 @@  static struct page *dequeue_huge_page_vma(struct hstate *h,
 
 	gfp_mask = htlb_alloc_mask(h);
 	nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
-	page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+	if (mpol->mode != MPOL_BIND && nodemask) { /* AKA MPOL_PREFERRED_MANY */
+		page = dequeue_huge_page_nodemask(h,
+				gfp_mask | __GFP_RETRY_MAYFAIL | __GFP_NOWARN,
+				nid, nodemask);
+		if (!page)
+			page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL);
+	} else {
+		page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+	}
 	if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
 		SetPagePrivate(page);
 		h->resv_huge_pages--;
@@ -1935,7 +1943,15 @@  struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
 	nodemask_t *nodemask;
 
 	nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask);
-	page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask);
+	if (mpol->mode != MPOL_BIND && nodemask) { /* AKA MPOL_PREFERRED_MANY */
+		page = alloc_surplus_huge_page(h,
+				gfp_mask | __GFP_RETRY_MAYFAIL | __GFP_NOWARN,
+				nid, nodemask);
+		if (!page)
+			alloc_surplus_huge_page(h, gfp_mask, nid, NULL);
+	} else {
+		page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask);
+	}
 	mpol_cond_put(mpol);
 
 	return page;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 0cb92ab..f9b2167 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2075,7 +2075,8 @@  int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags,
 					huge_page_shift(hstate_vma(vma)));
 	} else {
 		nid = policy_node(gfp_flags, *mpol, numa_node_id());
-		if ((*mpol)->mode == MPOL_BIND)
+		if ((*mpol)->mode == MPOL_BIND ||
+		    (*mpol)->mode == MPOL_PREFERRED_MANY)
 			*nodemask = &(*mpol)->nodes;
 	}
 	return nid;