diff mbox series

[11/12] mm/mempolicy: huge-page allocation for many preferred

Message ID 20200630212517.308045-12-ben.widawsky@intel.com (mailing list archive)
State New, archived
Headers show
Series Introduced multi-preference mempolicy | expand

Commit Message

Ben Widawsky June 30, 2020, 9:25 p.m. UTC
This patch implements the missing huge page allocation functionality
while obeying the preferred node semantics.

Like the previous patches, this uses a fallback mechanism to try
multiple preferred nodes first, and then all other nodes. It cannot use
the helper function that was introduced because huge page allocation
already has its own helpers and it was more LOC, and effort to try to
consolidate that.

The weirdness in this patch is it cannot yet use MPOL_PREFERRED_MANY
because it is part of the UAPI we haven't yet exposed. Instead of
make that define global, it's simply changed with the UAPI patch.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
---
 mm/hugetlb.c   | 20 +++++++++++++++++---
 mm/mempolicy.c |  3 ++-
 2 files changed, 19 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 57ece74e3aae..46e94675de44 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1103,7 +1103,7 @@  static struct page *dequeue_huge_page_vma(struct hstate *h,
 				unsigned long address, int avoid_reserve,
 				long chg)
 {
-	struct page *page;
+	struct page *page = NULL;
 	struct mempolicy *mpol;
 	gfp_t gfp_mask;
 	nodemask_t *nodemask;
@@ -1124,7 +1124,14 @@  static struct page *dequeue_huge_page_vma(struct hstate *h,
 
 	gfp_mask = htlb_alloc_mask(h);
 	nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
-	page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+	if (mpol->mode != MPOL_BIND && nodemask) { /* AKA MPOL_PREFERRED_MANY */
+		page = dequeue_huge_page_nodemask(h, gfp_mask | __GFP_RETRY_MAYFAIL,
+						  nid, nodemask);
+		if (!page)
+			page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL);
+	} else {
+		page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+	}
 	if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
 		SetPagePrivate(page);
 		h->resv_huge_pages--;
@@ -1972,7 +1979,14 @@  struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
 	nodemask_t *nodemask;
 
 	nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask);
-	page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask);
+	if (mpol->mode != MPOL_BIND && nodemask) { /* AKA MPOL_PREFERRED_MANY */
+		page = alloc_surplus_huge_page(h, gfp_mask | __GFP_RETRY_MAYFAIL,
+					       nid, nodemask);
+		if (!page)
+			alloc_surplus_huge_page(h, gfp_mask, nid, NULL);
+	} else {
+		page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask);
+	}
 	mpol_cond_put(mpol);
 
 	return page;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 51ac0d4a2eda..53390c2e0aca 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2097,7 +2097,8 @@  int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags,
 					huge_page_shift(hstate_vma(vma)));
 	} else {
 		nid = policy_node(gfp_flags, *mpol, numa_node_id());
-		if ((*mpol)->mode == MPOL_BIND)
+		if ((*mpol)->mode == MPOL_BIND ||
+		    (*mpol)->mode == MPOL_PREFERRED_MANY)
 			*nodemask = &(*mpol)->nodes;
 	}
 	return nid;