diff mbox series

[v6,3/5] mm: do not remap clean subpages when splitting isolated thp

Message ID ddc216d41ff8cf2953488a2e041856c6e8dbd51e.1667454613.git.alexlzhu@fb.com (mailing list archive)
State New
Headers show
Series THP Shrinker | expand

Commit Message

Alex Zhu (Kernel) Nov. 3, 2022, 6:01 a.m. UTC
From: Alexander Zhu <alexlzhu@fb.com>

Changes to avoid remap on zero pages that are free'd in split_huge_page().
Pages are not remapped except in the case of userfaultfd. In the case
of userfaultfd we remap to the shared zero page, similar to what is
done by KSM.

Signed-off-by: Alexander Zhu <alexlzhu@fb.com>
---
 include/linux/rmap.h          |  2 +-
 include/linux/vm_event_item.h |  2 +
 mm/huge_memory.c              |  8 ++--
 mm/migrate.c                  | 73 +++++++++++++++++++++++++++++++----
 mm/migrate_device.c           |  4 +-
 mm/vmstat.c                   |  2 +
 6 files changed, 77 insertions(+), 14 deletions(-)

Comments

kernel test robot Nov. 3, 2022, 12:48 p.m. UTC | #1
Hi,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.1-rc3]
[cannot apply to shuah-kselftest/next next-20221103]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/alexlzhu-fb-com/THP-Shrinker/20221103-140549
patch link:    https://lore.kernel.org/r/ddc216d41ff8cf2953488a2e041856c6e8dbd51e.1667454613.git.alexlzhu%40fb.com
patch subject: [PATCH v6 3/5] mm: do not remap clean subpages when splitting isolated thp
config: ia64-randconfig-r006-20221102
compiler: ia64-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/4048042cde24d1c71ae7ce0770af3098195cc45c
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review alexlzhu-fb-com/THP-Shrinker/20221103-140549
        git checkout 4048042cde24d1c71ae7ce0770af3098195cc45c
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=ia64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   mm/migrate.c: In function 'try_to_unmap_clean':
>> mm/migrate.c:206:32: error: 'THP_SPLIT_REMAP_READONLY_ZERO_PAGE' undeclared (first use in this function)
     206 |                 count_vm_event(THP_SPLIT_REMAP_READONLY_ZERO_PAGE);
         |                                ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   mm/migrate.c:206:32: note: each undeclared identifier is reported only once for each function it appears in
>> mm/migrate.c:211:24: error: 'THP_SPLIT_UNMAP' undeclared (first use in this function)
     211 |         count_vm_event(THP_SPLIT_UNMAP);
         |                        ^~~~~~~~~~~~~~~


vim +/THP_SPLIT_REMAP_READONLY_ZERO_PAGE +206 mm/migrate.c

   171	
   172	static bool try_to_unmap_clean(struct page_vma_mapped_walk *pvmw, struct page *page)
   173	{
   174		void *addr;
   175		bool dirty;
   176		pte_t newpte;
   177	
   178		VM_BUG_ON_PAGE(PageCompound(page), page);
   179		VM_BUG_ON_PAGE(!PageAnon(page), page);
   180		VM_BUG_ON_PAGE(!PageLocked(page), page);
   181		VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
   182	
   183		if (PageMlocked(page) || (pvmw->vma->vm_flags & VM_LOCKED))
   184			return false;
   185	
   186		/*
   187		 * The pmd entry mapping the old thp was flushed and the pte mapping
   188		 * this subpage has been non present. Therefore, this subpage is
   189		 * inaccessible. We don't need to remap it if it contains only zeros.
   190		 */
   191		addr = kmap_local_page(page);
   192		dirty = memchr_inv(addr, 0, PAGE_SIZE);
   193		kunmap_local(addr);
   194	
   195		if (dirty)
   196			return false;
   197	
   198		pte_clear_not_present_full(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, false);
   199	
   200		if (userfaultfd_armed(pvmw->vma)) {
   201			newpte = pte_mkspecial(pfn_pte(page_to_pfn(ZERO_PAGE(pvmw->address)),
   202						       pvmw->vma->vm_page_prot));
   203			ptep_clear_flush(pvmw->vma, pvmw->address, pvmw->pte);
   204			set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte);
   205			dec_mm_counter(pvmw->vma->vm_mm, MM_ANONPAGES);
 > 206			count_vm_event(THP_SPLIT_REMAP_READONLY_ZERO_PAGE);
   207			return true;
   208		}
   209	
   210		dec_mm_counter(pvmw->vma->vm_mm, mm_counter(page));
 > 211		count_vm_event(THP_SPLIT_UNMAP);
   212		return true;
   213	}
   214
kernel test robot Nov. 3, 2022, 1:19 p.m. UTC | #2
Hi,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.1-rc3]
[cannot apply to shuah-kselftest/next next-20221103]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/alexlzhu-fb-com/THP-Shrinker/20221103-140549
patch link:    https://lore.kernel.org/r/ddc216d41ff8cf2953488a2e041856c6e8dbd51e.1667454613.git.alexlzhu%40fb.com
patch subject: [PATCH v6 3/5] mm: do not remap clean subpages when splitting isolated thp
config: i386-randconfig-a004
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/4048042cde24d1c71ae7ce0770af3098195cc45c
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review alexlzhu-fb-com/THP-Shrinker/20221103-140549
        git checkout 4048042cde24d1c71ae7ce0770af3098195cc45c
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> mm/migrate.c:206:18: error: use of undeclared identifier 'THP_SPLIT_REMAP_READONLY_ZERO_PAGE'
                   count_vm_event(THP_SPLIT_REMAP_READONLY_ZERO_PAGE);
                                  ^
>> mm/migrate.c:211:17: error: use of undeclared identifier 'THP_SPLIT_UNMAP'
           count_vm_event(THP_SPLIT_UNMAP);
                          ^
   2 errors generated.


vim +/THP_SPLIT_REMAP_READONLY_ZERO_PAGE +206 mm/migrate.c

   171	
   172	static bool try_to_unmap_clean(struct page_vma_mapped_walk *pvmw, struct page *page)
   173	{
   174		void *addr;
   175		bool dirty;
   176		pte_t newpte;
   177	
   178		VM_BUG_ON_PAGE(PageCompound(page), page);
   179		VM_BUG_ON_PAGE(!PageAnon(page), page);
   180		VM_BUG_ON_PAGE(!PageLocked(page), page);
   181		VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
   182	
   183		if (PageMlocked(page) || (pvmw->vma->vm_flags & VM_LOCKED))
   184			return false;
   185	
   186		/*
   187		 * The pmd entry mapping the old thp was flushed and the pte mapping
   188		 * this subpage has been non present. Therefore, this subpage is
   189		 * inaccessible. We don't need to remap it if it contains only zeros.
   190		 */
   191		addr = kmap_local_page(page);
   192		dirty = memchr_inv(addr, 0, PAGE_SIZE);
   193		kunmap_local(addr);
   194	
   195		if (dirty)
   196			return false;
   197	
   198		pte_clear_not_present_full(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, false);
   199	
   200		if (userfaultfd_armed(pvmw->vma)) {
   201			newpte = pte_mkspecial(pfn_pte(page_to_pfn(ZERO_PAGE(pvmw->address)),
   202						       pvmw->vma->vm_page_prot));
   203			ptep_clear_flush(pvmw->vma, pvmw->address, pvmw->pte);
   204			set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte);
   205			dec_mm_counter(pvmw->vma->vm_mm, MM_ANONPAGES);
 > 206			count_vm_event(THP_SPLIT_REMAP_READONLY_ZERO_PAGE);
   207			return true;
   208		}
   209	
   210		dec_mm_counter(pvmw->vma->vm_mm, mm_counter(page));
 > 211		count_vm_event(THP_SPLIT_UNMAP);
   212		return true;
   213	}
   214
diff mbox series

Patch

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bd3504d11b15..3f83bbcf1333 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -428,7 +428,7 @@  int folio_mkclean(struct folio *);
 int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
 		      struct vm_area_struct *vma);
 
-void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked);
+void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked, bool unmap_clean);
 
 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
 
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index f733ffc5f6f3..3618b10ddec9 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -112,6 +112,8 @@  enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_SPLIT_PUD,
 #endif
 		THP_SPLIT_FREE,
+		THP_SPLIT_UNMAP,
+		THP_SPLIT_REMAP_READONLY_ZERO_PAGE,
 		THP_ZERO_PAGE_ALLOC,
 		THP_ZERO_PAGE_ALLOC_FAILED,
 		THP_SWPOUT,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 6a5c70080c07..cba0bbbb2a93 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2373,7 +2373,7 @@  static void unmap_folio(struct folio *folio)
 		try_to_unmap(folio, ttu_flags | TTU_IGNORE_MLOCK);
 }
 
-static void remap_page(struct folio *folio, unsigned long nr)
+static void remap_page(struct folio *folio, unsigned long nr, bool unmap_clean)
 {
 	int i = 0;
 
@@ -2381,7 +2381,7 @@  static void remap_page(struct folio *folio, unsigned long nr)
 	if (!folio_test_anon(folio))
 		return;
 	for (;;) {
-		remove_migration_ptes(folio, folio, true);
+		remove_migration_ptes(folio, folio, true, unmap_clean);
 		i += folio_nr_pages(folio);
 		if (i >= nr)
 			break;
@@ -2569,7 +2569,7 @@  static void __split_huge_page(struct page *page, struct list_head *list,
 	}
 	local_irq_enable();
 
-	remap_page(folio, nr);
+	remap_page(folio, nr, PageAnon(head));
 
 	if (PageSwapCache(head)) {
 		swp_entry_t entry = { .val = page_private(head) };
@@ -2798,7 +2798,7 @@  int split_huge_page_to_list(struct page *page, struct list_head *list)
 		if (mapping)
 			xas_unlock(&xas);
 		local_irq_enable();
-		remap_page(folio, folio_nr_pages(folio));
+		remap_page(folio, folio_nr_pages(folio), false);
 		ret = -EBUSY;
 	}
 
diff --git a/mm/migrate.c b/mm/migrate.c
index dff333593a8a..2764b14d3383 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -30,6 +30,7 @@ 
 #include <linux/writeback.h>
 #include <linux/mempolicy.h>
 #include <linux/vmalloc.h>
+#include <linux/vm_event_item.h>
 #include <linux/security.h>
 #include <linux/backing-dev.h>
 #include <linux/compaction.h>
@@ -168,13 +169,62 @@  void putback_movable_pages(struct list_head *l)
 	}
 }
 
+static bool try_to_unmap_clean(struct page_vma_mapped_walk *pvmw, struct page *page)
+{
+	void *addr;
+	bool dirty;
+	pte_t newpte;
+
+	VM_BUG_ON_PAGE(PageCompound(page), page);
+	VM_BUG_ON_PAGE(!PageAnon(page), page);
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+	VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
+
+	if (PageMlocked(page) || (pvmw->vma->vm_flags & VM_LOCKED))
+		return false;
+
+	/*
+	 * The pmd entry mapping the old thp was flushed and the pte mapping
+	 * this subpage has been non present. Therefore, this subpage is
+	 * inaccessible. We don't need to remap it if it contains only zeros.
+	 */
+	addr = kmap_local_page(page);
+	dirty = memchr_inv(addr, 0, PAGE_SIZE);
+	kunmap_local(addr);
+
+	if (dirty)
+		return false;
+
+	pte_clear_not_present_full(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, false);
+
+	if (userfaultfd_armed(pvmw->vma)) {
+		newpte = pte_mkspecial(pfn_pte(page_to_pfn(ZERO_PAGE(pvmw->address)),
+					       pvmw->vma->vm_page_prot));
+		ptep_clear_flush(pvmw->vma, pvmw->address, pvmw->pte);
+		set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte);
+		dec_mm_counter(pvmw->vma->vm_mm, MM_ANONPAGES);
+		count_vm_event(THP_SPLIT_REMAP_READONLY_ZERO_PAGE);
+		return true;
+	}
+
+	dec_mm_counter(pvmw->vma->vm_mm, mm_counter(page));
+	count_vm_event(THP_SPLIT_UNMAP);
+	return true;
+}
+
+struct rmap_walk_arg {
+	struct folio *folio;
+	bool unmap_clean;
+};
+
 /*
  * Restore a potential migration pte to a working pte entry
  */
 static bool remove_migration_pte(struct folio *folio,
-		struct vm_area_struct *vma, unsigned long addr, void *old)
+		struct vm_area_struct *vma, unsigned long addr, void *arg)
 {
-	DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
+	struct rmap_walk_arg *rmap_walk_arg = arg;
+	DEFINE_FOLIO_VMA_WALK(pvmw, rmap_walk_arg->folio, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
 
 	while (page_vma_mapped_walk(&pvmw)) {
 		rmap_t rmap_flags = RMAP_NONE;
@@ -197,6 +247,8 @@  static bool remove_migration_pte(struct folio *folio,
 			continue;
 		}
 #endif
+		if (rmap_walk_arg->unmap_clean && try_to_unmap_clean(&pvmw, new))
+			continue;
 
 		folio_get(folio);
 		pte = mk_pte(new, READ_ONCE(vma->vm_page_prot));
@@ -272,13 +324,20 @@  static bool remove_migration_pte(struct folio *folio,
  * Get rid of all migration entries and replace them by
  * references to the indicated page.
  */
-void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked)
+void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked, bool unmap_clean)
 {
+	struct rmap_walk_arg rmap_walk_arg = {
+		.folio = src,
+		.unmap_clean = unmap_clean,
+	};
+
 	struct rmap_walk_control rwc = {
 		.rmap_one = remove_migration_pte,
-		.arg = src,
+		.arg = &rmap_walk_arg,
 	};
 
+	VM_BUG_ON_FOLIO(unmap_clean && src != dst, src);
+
 	if (locked)
 		rmap_walk_locked(dst, &rwc);
 	else
@@ -872,7 +931,7 @@  static int writeout(struct address_space *mapping, struct folio *folio)
 	 * At this point we know that the migration attempt cannot
 	 * be successful.
 	 */
-	remove_migration_ptes(folio, folio, false);
+	remove_migration_ptes(folio, folio, false, false);
 
 	rc = mapping->a_ops->writepage(&folio->page, &wbc);
 
@@ -1128,7 +1187,7 @@  static int __unmap_and_move(struct folio *src, struct folio *dst,
 
 	if (page_was_mapped)
 		remove_migration_ptes(src,
-			rc == MIGRATEPAGE_SUCCESS ? dst : src, false);
+			rc == MIGRATEPAGE_SUCCESS ? dst : src, false, false);
 
 out_unlock_both:
 	folio_unlock(dst);
@@ -1338,7 +1397,7 @@  static int unmap_and_move_huge_page(new_page_t get_new_page,
 
 	if (page_was_mapped)
 		remove_migration_ptes(src,
-			rc == MIGRATEPAGE_SUCCESS ? dst : src, false);
+			rc == MIGRATEPAGE_SUCCESS ? dst : src, false, false);
 
 unlock_put_anon:
 	folio_unlock(dst);
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 6fa682eef7a0..6508a083d7fd 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -421,7 +421,7 @@  static unsigned long migrate_device_unmap(unsigned long *src_pfns,
 			continue;
 
 		folio = page_folio(page);
-		remove_migration_ptes(folio, folio, false);
+		remove_migration_ptes(folio, folio, false, false);
 
 		src_pfns[i] = 0;
 		folio_unlock(folio);
@@ -847,7 +847,7 @@  void migrate_device_finalize(unsigned long *src_pfns,
 
 		src = page_folio(page);
 		dst = page_folio(newpage);
-		remove_migration_ptes(src, dst, false);
+		remove_migration_ptes(src, dst, false, false);
 		folio_unlock(src);
 
 		if (is_zone_device_page(page))
diff --git a/mm/vmstat.c b/mm/vmstat.c
index a2ba5d7922f4..3d802eb6754d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1360,6 +1360,8 @@  const char * const vmstat_text[] = {
 	"thp_split_pud",
 #endif
 	"thp_split_free",
+	"thp_split_unmap",
+	"thp_split_remap_readonly_zero_page",
 	"thp_zero_page_alloc",
 	"thp_zero_page_alloc_failed",
 	"thp_swpout",