diff mbox series

[v2,29/46] hugetlb: update page_vma_mapped to do high-granularity walks

Message ID 20230218002819.1486479-30-jthoughton@google.com (mailing list archive)
State New
Headers show
Series hugetlb: introduce HugeTLB high-granularity mapping | expand

Commit Message

James Houghton Feb. 18, 2023, 12:28 a.m. UTC
Update the HugeTLB logic to look a lot more like the PTE-mapped THP
logic. When a user calls us in a loop, we will update pvmw->address to
walk to each page table entry that could possibly map the hugepage
containing pvmw->pfn.

Make use of the new pte_order so callers know what size PTE
they're getting.

The !pte failure case is changed to call not_found() instead of just
returning false. This should be a no-op, but if somehow the hstate-level
PTE were deallocated between iterations, not_found() should be called to
drop locks.

Signed-off-by: James Houghton <jthoughton@google.com>
diff mbox series

Patch

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 08295b122ad6..03e8a4987272 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -133,7 +133,8 @@  static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
  *
  * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point
  * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is
- * adjusted if needed (for PTE-mapped THPs).
+ * adjusted if needed (for PTE-mapped THPs and high-granularity-mapped HugeTLB
+ * pages).
  *
  * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page
  * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in
@@ -165,23 +166,47 @@  bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 
 	if (unlikely(is_vm_hugetlb_page(vma))) {
 		struct hstate *hstate = hstate_vma(vma);
-		unsigned long size = huge_page_size(hstate);
-		/* The only possible mapping was handled on last iteration */
-		if (pvmw->pte)
-			return not_found(pvmw);
-		/*
-		 * All callers that get here will already hold the
-		 * i_mmap_rwsem.  Therefore, no additional locks need to be
-		 * taken before calling hugetlb_walk().
-		 */
-		pvmw->pte = hugetlb_walk(vma, pvmw->address, size);
-		if (!pvmw->pte)
-			return false;
+		struct hugetlb_pte hpte;
+		pte_t pteval;
+
+		end = (pvmw->address & huge_page_mask(hstate)) +
+			huge_page_size(hstate);
+
+		do {
+			if (pvmw->pte) {
+				if (pvmw->ptl)
+					spin_unlock(pvmw->ptl);
+				pvmw->ptl = NULL;
+				pvmw->address += PAGE_SIZE << pvmw->pte_order;
+				if (pvmw->address >= end)
+					return not_found(pvmw);
+			}
 
-		pvmw->pte_order = huge_page_order(hstate);
-		pvmw->ptl = huge_pte_lock(hstate, mm, pvmw->pte);
-		if (!check_pte(pvmw))
-			return not_found(pvmw);
+			/*
+			 * All callers that get here will already hold the
+			 * i_mmap_rwsem. Therefore, no additional locks need to
+			 * be taken before calling hugetlb_walk().
+			 */
+			if (hugetlb_full_walk(&hpte, vma, pvmw->address))
+				return not_found(pvmw);
+
+retry:
+			pvmw->pte = hpte.ptep;
+			pvmw->pte_order = hpte.shift - PAGE_SHIFT;
+			pvmw->ptl = hugetlb_pte_lock(&hpte);
+			pteval = huge_ptep_get(hpte.ptep);
+			if (pte_present(pteval) && !hugetlb_pte_present_leaf(
+						&hpte, pteval)) {
+				/*
+				 * Someone split from under us, so keep
+				 * walking.
+				 */
+				spin_unlock(pvmw->ptl);
+				hugetlb_full_walk_continue(&hpte, vma,
+						pvmw->address);
+				goto retry;
+			}
+		} while (!check_pte(pvmw));
 		return true;
 	}