diff mbox series

[01/12] mm/khugepaged: record SCAN_PMD_MAPPED when scan_pmd() finds THP

Message ID 20220410135445.3897054-2-zokeefe@google.com (mailing list archive)
State New
Headers show
Series mm: userspace hugepage collapse | expand

Commit Message

Zach O'Keefe April 10, 2022, 1:54 p.m. UTC
When scanning an anon pmd to see if it's eligible for collapse, return
SCAN_PMD_MAPPED if the pmd already maps a THP.  Note that
SCAN_PMD_MAPPED is different from SCAN_PAGE_COMPOUND used in the
file-collapse path, since the latter might identify pte-mapped compound
pages.  This is required by MADV_COLLAPSE which necessarily needs to
know what hugepage-aligned/sized regions are already pmd-mapped.

Signed-off-by: Zach O'Keefe <zokeefe@google.com>
---
 include/trace/events/huge_memory.h |  3 ++-
 mm/internal.h                      |  1 +
 mm/khugepaged.c                    | 30 ++++++++++++++++++++++++++----
 mm/rmap.c                          | 15 +++++++++++++--
 4 files changed, 42 insertions(+), 7 deletions(-)
diff mbox series

Patch

diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index d651f3437367..9faa678e0a5b 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -33,7 +33,8 @@ 
 	EM( SCAN_ALLOC_HUGE_PAGE_FAIL,	"alloc_huge_page_failed")	\
 	EM( SCAN_CGROUP_CHARGE_FAIL,	"ccgroup_charge_failed")	\
 	EM( SCAN_TRUNCATED,		"truncated")			\
-	EMe(SCAN_PAGE_HAS_PRIVATE,	"page_has_private")		\
+	EM( SCAN_PAGE_HAS_PRIVATE,	"page_has_private")		\
+	EMe(SCAN_PMD_MAPPED,		"page_pmd_mapped")		\
 
 #undef EM
 #undef EMe
diff --git a/mm/internal.h b/mm/internal.h
index 1d3fb3c0f971..db594d611925 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -172,6 +172,7 @@  extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason
 /*
  * in mm/rmap.c:
  */
+pmd_t *mm_find_pmd_raw(struct mm_struct *mm, unsigned long address);
 extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
 
 /*
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 0cde4b44d799..b403f056a847 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -51,6 +51,7 @@  enum scan_result {
 	SCAN_CGROUP_CHARGE_FAIL,
 	SCAN_TRUNCATED,
 	SCAN_PAGE_HAS_PRIVATE,
+	SCAN_PMD_MAPPED,
 };
 
 #define CREATE_TRACE_POINTS
@@ -987,6 +988,29 @@  static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
 	return 0;
 }
 
+static int find_pmd_or_thp_or_none(struct mm_struct *mm,
+				   unsigned long address,
+				   pmd_t **pmd)
+{
+	*pmd = mm_find_pmd_raw(mm, address);
+	pmd_t pmde;
+
+	if (!*pmd)
+		return SCAN_PMD_NULL;
+
+	pmde = pmd_read_atomic(*pmd);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	/* See comments in pmd_none_or_trans_huge_or_clear_bad() */
+	barrier();
+#endif
+	if (!pmd_present(pmde) || pmd_none(pmde))
+		return SCAN_PMD_NULL;
+	if (pmd_trans_huge(pmde))
+		return SCAN_PMD_MAPPED;
+	return SCAN_SUCCEED;
+}
+
 /*
  * Bring missing pages in from swap, to complete THP collapse.
  * Only done if khugepaged_scan_pmd believes it is worthwhile.
@@ -1238,11 +1262,9 @@  static int khugepaged_scan_pmd(struct mm_struct *mm,
 
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 
-	pmd = mm_find_pmd(mm, address);
-	if (!pmd) {
-		result = SCAN_PMD_NULL;
+	result = find_pmd_or_thp_or_none(mm, address, &pmd);
+	if (result != SCAN_SUCCEED)
 		goto out;
-	}
 
 	memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
 	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
diff --git a/mm/rmap.c b/mm/rmap.c
index a1211fa879cf..fb47443f44c6 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -758,13 +758,12 @@  unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
 	return vma_address(page, vma);
 }
 
-pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
+pmd_t *mm_find_pmd_raw(struct mm_struct *mm, unsigned long address)
 {
 	pgd_t *pgd;
 	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd = NULL;
-	pmd_t pmde;
 
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
@@ -779,6 +778,18 @@  pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
 		goto out;
 
 	pmd = pmd_offset(pud, address);
+out:
+	return pmd;
+}
+
+pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
+{
+	pmd_t pmde;
+	pmd_t *pmd;
+
+	pmd = mm_find_pmd_raw(mm, address);
+	if (!pmd)
+		goto out;
 	/*
 	 * Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at()
 	 * without holding anon_vma lock for write.  So when looking for a