diff mbox series

[v3,2/2] mm/khugepaged: map exclusive anonymous pte-mapped THPs by pmds

Message ID b15ab2928515f8fbbddd89434baf00c7c8117968.1702882426.git.xuyu@linux.alibaba.com (mailing list archive)
State New
Headers show
Series mm/khugepaged: map anonymous pte-mapped THPs by pmds | expand

Commit Message

Xu Yu Dec. 18, 2023, 7:06 a.m. UTC
This adds another case, as David points out, which is suitable for
mapping anonymous pte-mapped THPs by pmds. When all subpages are
PageAnonExclusive (PTEs may be either R/O or R/W), we can clear
PageAnonExclusive on all tail pages but the first (head) page and
collapse to a R/W PMD with VM_WRITE or a R/O PMD without VM_WRITE.

Signed-off-by: Xu Yu <xuyu@linux.alibaba.com>
---
 mm/khugepaged.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 57e261387124..d843c1e3ec39 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1285,6 +1285,7 @@  static int collapse_pte_mapped_anon_thp(struct mm_struct *mm,
 	unsigned long addr;
 	int exclusive = 0;
 	bool writable = false;
+	rmap_t rmap_flags = RMAP_COMPOUND;
 	int result, i;
 
 	/* Fast check before locking folio if already PMD-mapped */
@@ -1366,8 +1367,14 @@  static int collapse_pte_mapped_anon_thp(struct mm_struct *mm,
 	 * Case 1:
 	 * No subpages are PageAnonExclusive (PTEs must be R/O), we can
 	 * collapse into a R/O PMD without further action.
+	 *
+	 * Case 2:
+	 * All subpages are PageAnonExclusive (PTEs may be either R/O or R/W),
+	 * we clear PageAnonExclusive on all tail pages but the head page and
+	 * collapse to a R/W PMD with VM_WRITE or a R/O PMD without VM_WRITE.
 	 */
-	if (!(exclusive == 0 && !writable))
+	if (!((exclusive == 0 && !writable) ||
+	      (exclusive == HPAGE_PMD_NR)))
 		goto up_write;
 
 	/* Collapse pmd entry */
@@ -1390,7 +1397,9 @@  static int collapse_pte_mapped_anon_thp(struct mm_struct *mm,
 	 * false-negative page_mapped().
 	 */
 	folio_get(folio);
-	page_add_anon_rmap(&folio->page, vma, haddr, RMAP_COMPOUND);
+	if (exclusive == HPAGE_PMD_NR)
+		rmap_flags |= RMAP_EXCLUSIVE;
+	page_add_anon_rmap(&folio->page, vma, haddr, rmap_flags);
 
 	start_pte = pte_offset_map_lock(mm, &pmdval, haddr, &ptl);
 	for (i = 0, addr = haddr, pte = start_pte;
@@ -1401,6 +1410,10 @@  static int collapse_pte_mapped_anon_thp(struct mm_struct *mm,
 		ptep_clear(mm, addr, pte);
 		subpage = vm_normal_page(vma, addr, pteval);
 		page_remove_rmap(subpage, vma, false);
+
+		/* Clear PageAnonExclusive on tail pages */
+		if (exclusive == HPAGE_PMD_NR && i != 0)
+			ClearPageAnonExclusive(subpage);
 	}
 	pte_unmap_unlock(start_pte, ptl);
 	folio_ref_sub(folio, HPAGE_PMD_NR);
@@ -1408,6 +1421,8 @@  static int collapse_pte_mapped_anon_thp(struct mm_struct *mm,
 	/* Install pmd entry */
 	pgtable = pmd_pgtable(pmdval);
 	pmdval = mk_huge_pmd(&folio->page, vma->vm_page_prot);
+	if (exclusive == HPAGE_PMD_NR)
+		pmdval = maybe_pmd_mkwrite(pmd_mkdirty(pmdval), vma);
 	spin_lock(pml);
 	pgtable_trans_huge_deposit(mm, pmd, pgtable);
 	set_pmd_at(mm, haddr, pmd, pmdval);
@@ -1595,7 +1610,9 @@  static int hpage_collapse_scan_pmd(struct mm_struct *mm,
 out_unmap:
 	pte_unmap_unlock(pte, ptl);
 
-	if (is_hpage && (exclusive == 0 && !writable)) {
+	if (is_hpage &&
+	    ((exclusive == 0 && !writable) ||
+	     (exclusive == HPAGE_PMD_NR))) {
 		int res;
 
 		res = collapse_pte_mapped_anon_thp(mm, vma, address,