diff mbox series

[11/45] fs/proc: Enable smaps_pte_entry to handle cont-pte mapped hugetlb vmas

Message ID 20240704043132.28501-12-osalvador@suse.de (mailing list archive)
State New
Headers show
Series hugetlb pagewalk unification | expand

Commit Message

Oscar Salvador July 4, 2024, 4:30 a.m. UTC
HugeTLB pages can be cont-pte mapped, so teach smaps_pte_entry to handle
them.

Signed-off-by: Oscar Salvador <osalvador@suse.de>
---
 fs/proc/task_mmu.c      | 19 +++++++++++++------
 include/linux/pgtable.h | 12 ++++++++++++
 2 files changed, 25 insertions(+), 6 deletions(-)

Comments

David Hildenbrand July 4, 2024, 10:30 a.m. UTC | #1
>   
>   #ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
> @@ -952,6 +956,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
>   	struct vm_area_struct *vma = walk->vma;
>   	pte_t *pte;
>   	spinlock_t *ptl;
> +	unsigned long size, cont_ptes;
>   
>   	ptl = pmd_huge_lock(pmd, vma);
>   	if (ptl) {
> @@ -965,7 +970,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
>   		walk->action = ACTION_AGAIN;
>   		return 0;
>   	}
> -	for (; addr != end; pte++, addr += PAGE_SIZE)
> +	size = pte_cont(ptep_get(pte)) ? PAGE_SIZE * CONT_PTES : PAGE_SIZE;
> +	cont_ptes = pte_cont(ptep_get(pte)) ? CONT_PTES : 1;
> +	for (; addr != end; pte += cont_ptes, addr += size)

The better way to do this is to actually batch PTEs also when cont-pte 
is not around (e.g., x86). folio_pte_batch() does that and optimized 
automatically for the cont-pte bit -- which should only apply if we have 
a present folio.

So this code might need some slight reshuffling (lookup the folio first, 
if it's large use folio_pte_batch(), otherwise (small/no normal folio) 
process individual PTEs).
diff mbox series

Patch

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3f3460ff03b0..4d94b6ce58dd 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -825,6 +825,7 @@  static void smaps_pte_entry(pte_t *pte, unsigned long addr,
 	struct page *page = NULL;
 	bool present = false, young = false, dirty = false;
 	pte_t ptent = ptep_get(pte);
+	unsigned long size = pte_cont(ptent) ? PAGE_SIZE * CONT_PTES : PAGE_SIZE;
 
 	if (pte_present(ptent)) {
 		page = vm_normal_page(vma, addr, ptent);
@@ -834,18 +835,18 @@  static void smaps_pte_entry(pte_t *pte, unsigned long addr,
 	} else if (is_swap_pte(ptent)) {
 		swp_entry_t swpent = pte_to_swp_entry(ptent);
 
-		if (!non_swap_entry(swpent)) {
+		if (!is_vm_hugetlb_page(vma) && !non_swap_entry(swpent)) {
 			int mapcount;
 
-			mss->swap += PAGE_SIZE;
+			mss->swap += size;
 			mapcount = swp_swapcount(swpent);
 			if (mapcount >= 2) {
-				u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT;
+				u64 pss_delta = (u64)size << PSS_SHIFT;
 
 				do_div(pss_delta, mapcount);
 				mss->swap_pss += pss_delta;
 			} else {
-				mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
+				mss->swap_pss += (u64)size << PSS_SHIFT;
 			}
 		} else if (is_pfn_swap_entry(swpent)) {
 			if (is_device_private_entry(swpent))
@@ -860,7 +861,10 @@  static void smaps_pte_entry(pte_t *pte, unsigned long addr,
 	if (!page)
 		return;
 
-	smaps_account(mss, page, false, young, dirty, locked, present);
+	if (is_vm_hugetlb_page(vma))
+		mss_hugetlb_update(mss, page_folio(page), vma, pte);
+	else
+		smaps_account(mss, page, false, young, dirty, locked, present);
 }
 
 #ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
@@ -952,6 +956,7 @@  static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	struct vm_area_struct *vma = walk->vma;
 	pte_t *pte;
 	spinlock_t *ptl;
+	unsigned long size, cont_ptes;
 
 	ptl = pmd_huge_lock(pmd, vma);
 	if (ptl) {
@@ -965,7 +970,9 @@  static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 		walk->action = ACTION_AGAIN;
 		return 0;
 	}
-	for (; addr != end; pte++, addr += PAGE_SIZE)
+	size = pte_cont(ptep_get(pte)) ? PAGE_SIZE * CONT_PTES : PAGE_SIZE;
+	cont_ptes = pte_cont(ptep_get(pte)) ? CONT_PTES : 1;
+	for (; addr != end; pte += cont_ptes, addr += size)
 		smaps_pte_entry(pte, addr, walk);
 	pte_unmap_unlock(pte - 1, ptl);
 out:
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index a9edeb86b7fe..991137dab87e 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1926,6 +1926,18 @@  typedef unsigned int pgtbl_mod_mask;
 #define CONT_PMDS 0
 #endif
 
+#ifndef pte_cont
+#define pte_cont(x) false
+#endif
+
+#ifndef CONT_PTE_SIZE
+#define CONT_PTE_SIZE 0
+#endif
+
+#ifndef CONT_PTES
+#define CONT_PTES 0
+#endif
+
 /*
  * We always define pmd_pfn for all archs as it's used in lots of generic
  * code.  Now it happens too for pud_pfn (and can happen for larger