diff mbox series

[3/3] mm: don't remap clean subpages when splitting isolated thp

Message ID 20210731063938.1391602-4-yuzhao@google.com (mailing list archive)
State New
Headers show
Series mm: optimize thp for reclaim and migration | expand

Commit Message

Yu Zhao July 31, 2021, 6:39 a.m. UTC
Here being clean means containing only zeros and inaccessible to
userspace. When splitting an isolated thp under reclaim or migration,
there is no need to remap its clean subpages because they can be
faulted in anew. Not remapping them avoids writeback or copying during
reclaim or migration. This is particularly helpful when the internal
fragmentation of a thp is high, i.e., it has many untouched subpages.

Signed-off-by: Yu Zhao <yuzhao@google.com>
Tested-by: Shuang Zhai <zhais@google.com>
---
 include/linux/rmap.h          |  2 +-
 include/linux/vm_event_item.h |  1 +
 mm/huge_memory.c              | 10 +++---
 mm/migrate.c                  | 65 ++++++++++++++++++++++++++++++-----
 mm/vmstat.c                   |  1 +
 5 files changed, 64 insertions(+), 15 deletions(-)

Comments

kernel test robot July 31, 2021, 9:53 a.m. UTC | #1
Hi Yu,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.14-rc3]
[cannot apply to hnaz-linux-mm/master linux/master next-20210730]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Yu-Zhao/mm-optimize-thp-for-reclaim-and-migration/20210731-144129
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git c7d102232649226a69dddd58a4942cf13cff4f7c
config: x86_64-randconfig-a001-20210730 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 4f71f59bf3d9914188a11d0c41bedbb339d36ff5)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/e4e76c4915b364558aacae2cf320a43306a20fa1
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Yu-Zhao/mm-optimize-thp-for-reclaim-and-migration/20210731-144129
        git checkout e4e76c4915b364558aacae2cf320a43306a20fa1
        # save the attached .config to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> mm/migrate.c:199:17: error: use of undeclared identifier 'THP_SPLIT_UNMAP'
           count_vm_event(THP_SPLIT_UNMAP);
                          ^
   mm/migrate.c:2606:16: warning: variable 'addr' set but not used [-Wunused-but-set-variable]
           unsigned long addr, i, restore = 0;
                         ^
   1 warning and 1 error generated.


vim +/THP_SPLIT_UNMAP +199 mm/migrate.c

   170	
   171	static bool try_to_unmap_clean(struct page_vma_mapped_walk *pvmw, struct page *page)
   172	{
   173		void *addr;
   174		bool dirty;
   175	
   176		VM_BUG_ON_PAGE(PageLRU(page), page);
   177		VM_BUG_ON_PAGE(PageCompound(page), page);
   178		VM_BUG_ON_PAGE(!PageAnon(page), page);
   179		VM_BUG_ON_PAGE(!PageLocked(page), page);
   180		VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
   181	
   182		if (PageMlocked(page) || (pvmw->vma->vm_flags & VM_LOCKED))
   183			return false;
   184	
   185		/*
   186		 * The pmd entry mapping the old thp was flushed and the pte mapping
   187		 * this subpage has been non present. Therefore, this subpage is
   188		 * inaccessible. We don't need to remap it if it contains only zeros.
   189		 */
   190		addr = kmap_atomic(page);
   191		dirty = !!memchr_inv(addr, 0, PAGE_SIZE);
   192		kunmap_atomic(addr);
   193	
   194		if (dirty)
   195			return false;
   196	
   197		pte_clear_not_present_full(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, false);
   198		dec_mm_counter(pvmw->vma->vm_mm, mm_counter(page));
 > 199		count_vm_event(THP_SPLIT_UNMAP);
   200	
   201		return true;
   202	}
   203	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot July 31, 2021, 3:45 p.m. UTC | #2
Hi Yu,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.14-rc3]
[cannot apply to hnaz-linux-mm/master linux/master next-20210730]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Yu-Zhao/mm-optimize-thp-for-reclaim-and-migration/20210731-144129
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git c7d102232649226a69dddd58a4942cf13cff4f7c
config: i386-randconfig-a003-20210730 (attached as .config)
compiler: gcc-10 (Ubuntu 10.3.0-1ubuntu1~20.04) 10.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/e4e76c4915b364558aacae2cf320a43306a20fa1
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Yu-Zhao/mm-optimize-thp-for-reclaim-and-migration/20210731-144129
        git checkout e4e76c4915b364558aacae2cf320a43306a20fa1
        # save the attached .config to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   mm/migrate.c: In function 'try_to_unmap_clean':
>> mm/migrate.c:199:17: error: 'THP_SPLIT_UNMAP' undeclared (first use in this function)
     199 |  count_vm_event(THP_SPLIT_UNMAP);
         |                 ^~~~~~~~~~~~~~~
   mm/migrate.c:199:17: note: each undeclared identifier is reported only once for each function it appears in


vim +/THP_SPLIT_UNMAP +199 mm/migrate.c

   170	
   171	static bool try_to_unmap_clean(struct page_vma_mapped_walk *pvmw, struct page *page)
   172	{
   173		void *addr;
   174		bool dirty;
   175	
   176		VM_BUG_ON_PAGE(PageLRU(page), page);
   177		VM_BUG_ON_PAGE(PageCompound(page), page);
   178		VM_BUG_ON_PAGE(!PageAnon(page), page);
   179		VM_BUG_ON_PAGE(!PageLocked(page), page);
   180		VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
   181	
   182		if (PageMlocked(page) || (pvmw->vma->vm_flags & VM_LOCKED))
   183			return false;
   184	
   185		/*
   186		 * The pmd entry mapping the old thp was flushed and the pte mapping
   187		 * this subpage has been non present. Therefore, this subpage is
   188		 * inaccessible. We don't need to remap it if it contains only zeros.
   189		 */
   190		addr = kmap_atomic(page);
   191		dirty = !!memchr_inv(addr, 0, PAGE_SIZE);
   192		kunmap_atomic(addr);
   193	
   194		if (dirty)
   195			return false;
   196	
   197		pte_clear_not_present_full(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, false);
   198		dec_mm_counter(pvmw->vma->vm_mm, mm_counter(page));
 > 199		count_vm_event(THP_SPLIT_UNMAP);
   200	
   201		return true;
   202	}
   203	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Matthew Wilcox (Oracle) Aug. 3, 2021, 11:25 a.m. UTC | #3
On Sat, Jul 31, 2021 at 12:39:38AM -0600, Yu Zhao wrote:
> +++ b/mm/migrate.c
> @@ -168,14 +168,53 @@ void putback_movable_pages(struct list_head *l)
>  	}
>  }
>  
> +static bool try_to_unmap_clean(struct page_vma_mapped_walk *pvmw, struct page *page)
> +{
> +	void *addr;
> +	bool dirty;
> +
> +	VM_BUG_ON_PAGE(PageLRU(page), page);
> +	VM_BUG_ON_PAGE(PageCompound(page), page);
> +	VM_BUG_ON_PAGE(!PageAnon(page), page);
> +	VM_BUG_ON_PAGE(!PageLocked(page), page);
> +	VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
> +
> +	if (PageMlocked(page) || (pvmw->vma->vm_flags & VM_LOCKED))
> +		return false;
> +
> +	/*
> +	 * The pmd entry mapping the old thp was flushed and the pte mapping
> +	 * this subpage has been non present. Therefore, this subpage is
> +	 * inaccessible. We don't need to remap it if it contains only zeros.
> +	 */
> +	addr = kmap_atomic(page);
> +	dirty = !!memchr_inv(addr, 0, PAGE_SIZE);
> +	kunmap_atomic(addr);

kmap_local() is preferred now.  Also, Linus has a particular hatred for
the !! idiom; just compare against NULL.
Matthew Wilcox (Oracle) Aug. 3, 2021, 11:36 a.m. UTC | #4
On Sat, Jul 31, 2021 at 12:39:38AM -0600, Yu Zhao wrote:
> +++ b/include/linux/rmap.h
> @@ -243,7 +243,7 @@ int page_mkclean(struct page *);
>   */
>  void page_mlock(struct page *page);
>  
> -void remove_migration_ptes(struct page *old, struct page *new, bool locked);
> +void remove_migration_ptes(struct page *old, struct page *new, bool locked, bool unmap_clean);

I'm not a big fan of 'bool, bool'.  Could we use a flag word instead?

#define MIGRATE_REMOVE_LOCKED	1
#define MIGRATE_UNMAP_CLEAN	2
Kirill A. Shutemov Aug. 4, 2021, 2:27 p.m. UTC | #5
On Sat, Jul 31, 2021 at 12:39:38AM -0600, Yu Zhao wrote:
> @@ -267,13 +308,19 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
>   * Get rid of all migration entries and replace them by
>   * references to the indicated page.
>   */
> -void remove_migration_ptes(struct page *old, struct page *new, bool locked)
> +void remove_migration_ptes(struct page *old, struct page *new, bool locked, bool unmap_clean)
>  {
> +	struct rmap_walk_arg rmap_walk_arg = {
> +		.page = old,
> +		.unmap_clean = unmap_clean,
> +	};
>  	struct rmap_walk_control rwc = {
>  		.rmap_one = remove_migration_pte,
> -		.arg = old,
> +		.arg = &rmap_walk_arg,
>  	};

'old' pointer has few low always-zero bit :P
Yu Zhao Aug. 8, 2021, 5:20 p.m. UTC | #6
On Wed, Aug 4, 2021 at 8:27 AM Kirill A. Shutemov <kirill@shutemov.name>
wrote:

> On Sat, Jul 31, 2021 at 12:39:38AM -0600, Yu Zhao wrote:
> > @@ -267,13 +308,19 @@ static bool remove_migration_pte(struct page
> *page, struct vm_area_struct *vma,
> >   * Get rid of all migration entries and replace them by
> >   * references to the indicated page.
> >   */
> > -void remove_migration_ptes(struct page *old, struct page *new, bool
> locked)
> > +void remove_migration_ptes(struct page *old, struct page *new, bool
> locked, bool unmap_clean)
> >  {
> > +     struct rmap_walk_arg rmap_walk_arg = {
> > +             .page = old,
> > +             .unmap_clean = unmap_clean,
> > +     };
> >       struct rmap_walk_control rwc = {
> >               .rmap_one = remove_migration_pte,
> > -             .arg = old,
> > +             .arg = &rmap_walk_arg,
> >       };
>
> 'old' pointer has few low always-zero bit :P
>

Will do. Thanks.
Yu Zhao Aug. 8, 2021, 5:21 p.m. UTC | #7
On Tue, Aug 3, 2021 at 5:38 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Sat, Jul 31, 2021 at 12:39:38AM -0600, Yu Zhao wrote:
> > +++ b/include/linux/rmap.h
> > @@ -243,7 +243,7 @@ int page_mkclean(struct page *);
> >   */
> >  void page_mlock(struct page *page);
> >
> > -void remove_migration_ptes(struct page *old, struct page *new, bool locked);
> > +void remove_migration_ptes(struct page *old, struct page *new, bool locked, bool unmap_clean);
>
> I'm not a big fan of 'bool, bool'.  Could we use a flag word instead?
>
> #define MIGRATE_REMOVE_LOCKED   1
> #define MIGRATE_UNMAP_CLEAN     2

Will do. Thanks.
diff mbox series

Patch

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index c976cc6de257..4c2789d3fafb 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -243,7 +243,7 @@  int page_mkclean(struct page *);
  */
 void page_mlock(struct page *page);
 
-void remove_migration_ptes(struct page *old, struct page *new, bool locked);
+void remove_migration_ptes(struct page *old, struct page *new, bool locked, bool unmap_clean);
 
 /*
  * Called by memory-failure.c to kill processes.
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 829eeac84094..a08fa70d8026 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -100,6 +100,7 @@  enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_SPLIT_PUD,
 #endif
 		THP_SPLIT_FREE,
+		THP_SPLIT_UNMAP,
 		THP_ZERO_PAGE_ALLOC,
 		THP_ZERO_PAGE_ALLOC_FAILED,
 		THP_SWPOUT,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5120478bca41..1653b84dc800 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2327,7 +2327,7 @@  static void unmap_page(struct page *page)
 	VM_WARN_ON_ONCE_PAGE(page_mapped(page), page);
 }
 
-static void remap_page(struct page *page, unsigned int nr)
+static void remap_page(struct page *page, unsigned int nr, bool unmap_clean)
 {
 	int i;
 
@@ -2335,10 +2335,10 @@  static void remap_page(struct page *page, unsigned int nr)
 	if (!PageAnon(page))
 		return;
 	if (PageTransHuge(page)) {
-		remove_migration_ptes(page, page, true);
+		remove_migration_ptes(page, page, true, false);
 	} else {
 		for (i = 0; i < nr; i++)
-			remove_migration_ptes(page + i, page + i, true);
+			remove_migration_ptes(page + i, page + i, true, unmap_clean);
 	}
 }
 
@@ -2494,7 +2494,7 @@  static void __split_huge_page(struct page *page, struct list_head *list,
 	}
 	local_irq_enable();
 
-	remap_page(head, nr);
+	remap_page(head, nr, !!list);
 
 	if (PageSwapCache(head)) {
 		swp_entry_t entry = { .val = page_private(head) };
@@ -2769,7 +2769,7 @@  int split_huge_page_to_list(struct page *page, struct list_head *list)
 		if (mapping)
 			xa_unlock(&mapping->i_pages);
 		local_irq_enable();
-		remap_page(head, thp_nr_pages(head));
+		remap_page(head, thp_nr_pages(head), false);
 		ret = -EBUSY;
 	}
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 7e240437e7d9..46c2a54c2ac1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -168,14 +168,53 @@  void putback_movable_pages(struct list_head *l)
 	}
 }
 
+static bool try_to_unmap_clean(struct page_vma_mapped_walk *pvmw, struct page *page)
+{
+	void *addr;
+	bool dirty;
+
+	VM_BUG_ON_PAGE(PageLRU(page), page);
+	VM_BUG_ON_PAGE(PageCompound(page), page);
+	VM_BUG_ON_PAGE(!PageAnon(page), page);
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+	VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
+
+	if (PageMlocked(page) || (pvmw->vma->vm_flags & VM_LOCKED))
+		return false;
+
+	/*
+	 * The pmd entry mapping the old thp was flushed and the pte mapping
+	 * this subpage has been non present. Therefore, this subpage is
+	 * inaccessible. We don't need to remap it if it contains only zeros.
+	 */
+	addr = kmap_atomic(page);
+	dirty = !!memchr_inv(addr, 0, PAGE_SIZE);
+	kunmap_atomic(addr);
+
+	if (dirty)
+		return false;
+
+	pte_clear_not_present_full(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, false);
+	dec_mm_counter(pvmw->vma->vm_mm, mm_counter(page));
+	count_vm_event(THP_SPLIT_UNMAP);
+
+	return true;
+}
+
+struct rmap_walk_arg {
+	struct page *page;
+	bool unmap_clean;
+};
+
 /*
  * Restore a potential migration pte to a working pte entry
  */
 static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
-				 unsigned long addr, void *old)
+				 unsigned long addr, void *arg)
 {
+	struct rmap_walk_arg *rmap_walk_arg = arg;
 	struct page_vma_mapped_walk pvmw = {
-		.page = old,
+		.page = rmap_walk_arg->page,
 		.vma = vma,
 		.address = addr,
 		.flags = PVMW_SYNC | PVMW_MIGRATION,
@@ -200,6 +239,8 @@  static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
 			continue;
 		}
 #endif
+		if (rmap_walk_arg->unmap_clean && try_to_unmap_clean(&pvmw, new))
+			continue;
 
 		get_page(new);
 		pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
@@ -267,13 +308,19 @@  static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
  * Get rid of all migration entries and replace them by
  * references to the indicated page.
  */
-void remove_migration_ptes(struct page *old, struct page *new, bool locked)
+void remove_migration_ptes(struct page *old, struct page *new, bool locked, bool unmap_clean)
 {
+	struct rmap_walk_arg rmap_walk_arg = {
+		.page = old,
+		.unmap_clean = unmap_clean,
+	};
 	struct rmap_walk_control rwc = {
 		.rmap_one = remove_migration_pte,
-		.arg = old,
+		.arg = &rmap_walk_arg,
 	};
 
+	VM_BUG_ON_PAGE(unmap_clean && old != new, old);
+
 	if (locked)
 		rmap_walk_locked(new, &rwc);
 	else
@@ -827,7 +874,7 @@  static int writeout(struct address_space *mapping, struct page *page)
 	 * At this point we know that the migration attempt cannot
 	 * be successful.
 	 */
-	remove_migration_ptes(page, page, false);
+	remove_migration_ptes(page, page, false, false);
 
 	rc = mapping->a_ops->writepage(page, &wbc);
 
@@ -1070,7 +1117,7 @@  static int __unmap_and_move(struct page *page, struct page *newpage,
 
 	if (page_was_mapped)
 		remove_migration_ptes(page,
-			rc == MIGRATEPAGE_SUCCESS ? newpage : page, false);
+			rc == MIGRATEPAGE_SUCCESS ? newpage : page, false, false);
 
 out_unlock_both:
 	unlock_page(newpage);
@@ -1292,7 +1339,7 @@  static int unmap_and_move_huge_page(new_page_t get_new_page,
 
 	if (page_was_mapped)
 		remove_migration_ptes(hpage,
-			rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false);
+			rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false, false);
 
 unlock_put_anon:
 	unlock_page(new_hpage);
@@ -2585,7 +2632,7 @@  static void migrate_vma_unmap(struct migrate_vma *migrate)
 		if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
 			continue;
 
-		remove_migration_ptes(page, page, false);
+		remove_migration_ptes(page, page, false, false);
 
 		migrate->src[i] = 0;
 		unlock_page(page);
@@ -2963,7 +3010,7 @@  void migrate_vma_finalize(struct migrate_vma *migrate)
 			newpage = page;
 		}
 
-		remove_migration_ptes(page, newpage, false);
+		remove_migration_ptes(page, newpage, false, false);
 		unlock_page(page);
 
 		if (is_zone_device_page(page))
diff --git a/mm/vmstat.c b/mm/vmstat.c
index f486e5d98d96..a83cbb6a4d70 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1301,6 +1301,7 @@  const char * const vmstat_text[] = {
 	"thp_split_pud",
 #endif
 	"thp_split_free",
+	"thp_split_unmap",
 	"thp_zero_page_alloc",
 	"thp_zero_page_alloc_failed",
 	"thp_swpout",