diff mbox series

[RFC,v2,20/30] mm: page_vma_walk: teach it about PMD-mapped PUD THP.

Message ID 20200928175428.4110504-21-zi.yan@sent.com (mailing list archive)
State New, archived
Headers show
Series 1GB PUD THP support on x86_64 | expand

Commit Message

Zi Yan Sept. 28, 2020, 5:54 p.m. UTC
From: Zi Yan <ziy@nvidia.com>

We now have PMD-mapped PUD THP and PTE-mapped PUD THP, page_vma_walk
should handle them properly.

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 mm/page_vma_mapped.c | 152 +++++++++++++++++++++++++++++++++----------
 1 file changed, 118 insertions(+), 34 deletions(-)
diff mbox series

Patch

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index f88e845ad5e6..5a3c1b561ff5 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -7,6 +7,12 @@ 
 
 #include "internal.h"
 
+enum check_pmd_result {
+	PVM_NOT_MAPPED = 0,
+	PVM_LEAF_ENTRY,
+	PVM_NONLEAF_ENTRY,
+};
+
 static inline bool not_found(struct page_vma_mapped_walk *pvmw)
 {
 	page_vma_mapped_walk_done(pvmw);
@@ -52,6 +58,22 @@  static bool map_pte(struct page_vma_mapped_walk *pvmw)
 	return true;
 }
 
+static bool map_pmd(struct page_vma_mapped_walk *pvmw)
+{
+	pmd_t pmde;
+
+	pvmw->pmd = pmd_offset(pvmw->pud, pvmw->address);
+	pmde = READ_ONCE(*pvmw->pmd);
+	if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
+		pvmw->ptl = pmd_lock(pvmw->vma->vm_mm, pvmw->pmd);
+		return true;
+	} else if (!pmd_present(pmde))
+		return false;
+
+	pvmw->ptl = pmd_lock(pvmw->vma->vm_mm, pvmw->pmd);
+	return true;
+}
+
 static inline bool pfn_is_match(struct page *page, unsigned long pfn)
 {
 	unsigned long page_pfn = page_to_pfn(page);
@@ -115,6 +137,57 @@  static bool check_pte(struct page_vma_mapped_walk *pvmw)
 	return pfn_is_match(pvmw->page, pfn);
 }
 
+/**
+ * check_pmd - check if @pvmw->page is mapped at the @pvmw->pmd
+ *
+ * page_vma_mapped_walk() found a place where @pvmw->page is *potentially*
+ * mapped. check_pmd() has to validate this.
+ *
+ * @pvmw->pmd may point to empty PMD, migraiton PMD, PMD pointing to arbitrary
+ * huge page, or PMD pointing to a PTE page table page.
+ *
+ * If PVMW_MIGRATION flag is set, returns PVM_LEAF_ENTRY if @pvmw->pmd contains
+ * migration entry that points to @pvmw->page.
+ *
+ * If PVMW_MIGRATION flag is not set, returns PVM_LEAF_ENTRY if @pvmw->pmd
+ * points to @pvmw->page.
+ *
+ * If @pvmw->pmd points to a PTE page table page, returns PVM_NONLEAF_ENTRY.
+ *
+ * Otherwise, return PVM_NOT_MAPPED.
+ *
+ */
+static enum check_pmd_result check_pmd(struct page_vma_mapped_walk *pvmw)
+{
+	unsigned long pfn;
+
+	if (likely(pmd_trans_huge(*pvmw->pmd))) {
+		if (pvmw->flags & PVMW_MIGRATION)
+			return 0;
+		pfn = pmd_pfn(*pvmw->pmd);
+		if (!pfn_is_match(pvmw->page, pfn))
+			return PVM_NOT_MAPPED;
+		return PVM_LEAF_ENTRY;
+	} else if (!pmd_present(*pvmw->pmd)) {
+		if (thp_migration_supported()) {
+			if (!(pvmw->flags & PVMW_MIGRATION))
+				return 0;
+			if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) {
+				swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd);
+
+				pfn = migration_entry_to_pfn(entry);
+				if (!pfn_is_match(pvmw->page, pfn))
+					return PVM_NOT_MAPPED;
+				return PVM_LEAF_ENTRY;
+			}
+		}
+		return 0;
+	}
+	/* THP pmd was split under us: handle on pte level */
+	spin_unlock(pvmw->ptl);
+	pvmw->ptl = NULL;
+	return PVM_NONLEAF_ENTRY;
+}
 /**
  * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
  * @pvmw->address
@@ -146,14 +219,14 @@  bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 	pgd_t *pgd;
 	p4d_t *p4d;
 	pud_t pude;
-	pmd_t pmde;
+	enum check_pmd_result pmd_check_res;
 
 	if (!pvmw->pte && !pvmw->pmd && pvmw->pud)
 		return not_found(pvmw);
 
 	/* The only possible pmd mapping has been handled on last iteration */
 	if (pvmw->pmd && !pvmw->pte)
-		return not_found(pvmw);
+		goto next_pmd;
 
 	if (pvmw->pte)
 		goto next_pte;
@@ -202,42 +275,47 @@  bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 	} else if (!pud_present(pude))
 		return false;
 
-	pvmw->pmd = pmd_offset(pvmw->pud, pvmw->address);
-	/*
-	 * Make sure the pmd value isn't cached in a register by the
-	 * compiler and used as a stale value after we've observed a
-	 * subsequent update.
-	 */
-	pmde = READ_ONCE(*pvmw->pmd);
-	if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
-		pvmw->ptl = pmd_lock(mm, pvmw->pmd);
-		if (likely(pmd_trans_huge(*pvmw->pmd))) {
-			if (pvmw->flags & PVMW_MIGRATION)
-				return not_found(pvmw);
-			if (pmd_page(*pvmw->pmd) != page)
-				return not_found(pvmw);
+	if (!map_pmd(pvmw))
+		goto next_pmd;
+	/* pmd locked after map_pmd  */
+	while (1) {
+		pmd_check_res = check_pmd(pvmw);
+		if (pmd_check_res == PVM_LEAF_ENTRY)
 			return true;
-		} else if (!pmd_present(*pvmw->pmd)) {
-			if (thp_migration_supported()) {
-				if (!(pvmw->flags & PVMW_MIGRATION))
-					return not_found(pvmw);
-				if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) {
-					swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd);
-
-					if (migration_entry_to_page(entry) != page)
-						return not_found(pvmw);
-					return true;
+		else if (pmd_check_res == PVM_NONLEAF_ENTRY)
+			goto pte_level;
+next_pmd:
+		/* Only PMD-mapped PUD THP has next pmd. */
+		if (!(PageTransHuge(pvmw->page) && compound_order(pvmw->page) == HPAGE_PUD_ORDER))
+			return not_found(pvmw);
+		do {
+			pvmw->address += HPAGE_PMD_SIZE;
+			if (pvmw->address >= pvmw->vma->vm_end ||
+			    pvmw->address >=
+					__vma_address(pvmw->page, pvmw->vma) +
+					thp_nr_pages(pvmw->page) * PAGE_SIZE)
+				return not_found(pvmw);
+			/* Did we cross page table boundary? */
+			if (pvmw->address % PUD_SIZE == 0) {
+				/*
+				 * Reset pmd here, so we will no stay at PMD
+				 * level after restart.
+				 */
+				pvmw->pmd = NULL;
+				if (pvmw->ptl) {
+					spin_unlock(pvmw->ptl);
+					pvmw->ptl = NULL;
 				}
+				goto restart;
+			} else {
+				pvmw->pmd++;
 			}
-			return not_found(pvmw);
-		} else {
-			/* THP pmd was split under us: handle on pte level */
-			spin_unlock(pvmw->ptl);
-			pvmw->ptl = NULL;
-		}
-	} else if (!pmd_present(pmde)) {
-		return false;
+		} while (pmd_none(*pvmw->pmd));
+
+		if (!pvmw->ptl)
+			pvmw->ptl = pmd_lock(mm, pvmw->pmd);
 	}
+pte_level:
 	if (!map_pte(pvmw))
 		goto next_pte;
 	while (1) {
@@ -257,6 +335,12 @@  bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 			/* Did we cross page table boundary? */
 			if (pvmw->address % PMD_SIZE == 0) {
 				pte_unmap(pvmw->pte);
+				/*
+				 * In the case of PTE-mapped PUD THP, next entry
+				 * can be PMD. Reset pte here, so we will not
+				 * stay at PTE level after restart.
+				 */
+				pvmw->pte = NULL;
 				if (pvmw->ptl) {
 					spin_unlock(pvmw->ptl);
 					pvmw->ptl = NULL;