diff mbox series

[03/12] mm/khugepaged: make hugepage allocation context-specific

Message ID 20220410135445.3897054-4-zokeefe@google.com (mailing list archive)
State New
Headers show
Series mm: userspace hugepage collapse | expand

Commit Message

Zach O'Keefe April 10, 2022, 1:54 p.m. UTC
Add hugepage allocation context to struct collapse_context, allowing
different collapse contexts to allocate hugepages differently.  For
example, khugepaged decides to allocate differently in NUMA and UMA
configurations, and other collapse contexts shouldn't be coupled to this
decision.

Additionally, move [pre]allocated hugepage pointer into
struct collapse_context.

Signed-off-by: Zach O'Keefe <zokeefe@google.com>
---
 mm/khugepaged.c | 96 ++++++++++++++++++++++++-------------------------
 1 file changed, 48 insertions(+), 48 deletions(-)

Comments

kernel test robot April 10, 2022, 5:47 p.m. UTC | #1
Hi Zach,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on hnaz-mm/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220410-215722
base:   https://github.com/hnaz/linux-mm master
config: i386-randconfig-a002 (https://download.01.org/0day-ci/archive/20220411/202204110122.yKx76cVq-lkp@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project 256c6b0ba14e8a7ab6373b61b7193ea8c0a3651c)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/93731be575c612b28ee4c7711ebab9e81960f213
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220410-215722
        git checkout 93731be575c612b28ee4c7711ebab9e81960f213
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   mm/khugepaged.c:1006:8: warning: mixing declarations and code is incompatible with standards before C99 [-Wdeclaration-after-statement]
           pmd_t pmde;
                 ^
>> mm/khugepaged.c:2339:18: error: incompatible function pointer types initializing 'struct page *(*)(struct collapse_control *, gfp_t, int)' (aka 'struct page *(*)(struct collapse_control *, unsigned int, int)') with an expression of type 'struct page *(*)(struct collapse_control *, gfp_t)' (aka 'struct page *(*)(struct collapse_control *, unsigned int)') [-Werror,-Wincompatible-function-pointer-types]
                   .alloc_hpage = &khugepaged_alloc_page,
                                  ^~~~~~~~~~~~~~~~~~~~~~
   1 warning and 1 error generated.


vim +2339 mm/khugepaged.c

  2333	
  2334	static int khugepaged(void *none)
  2335	{
  2336		struct mm_slot *mm_slot;
  2337		struct collapse_control cc = {
  2338			.last_target_node = NUMA_NO_NODE,
> 2339			.alloc_hpage = &khugepaged_alloc_page,
  2340		};
  2341	
  2342		set_freezable();
  2343		set_user_nice(current, MAX_NICE);
  2344	
  2345		while (!kthread_should_stop()) {
  2346			khugepaged_do_scan(&cc);
  2347			khugepaged_wait_work();
  2348		}
  2349	
  2350		spin_lock(&khugepaged_mm_lock);
  2351		mm_slot = khugepaged_scan.mm_slot;
  2352		khugepaged_scan.mm_slot = NULL;
  2353		if (mm_slot)
  2354			collect_mm_slot(mm_slot);
  2355		spin_unlock(&khugepaged_mm_lock);
  2356		return 0;
  2357	}
  2358
kernel test robot April 10, 2022, 5:47 p.m. UTC | #2
Hi Zach,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on hnaz-mm/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220410-215722
base:   https://github.com/hnaz/linux-mm master
config: i386-randconfig-a001 (https://download.01.org/0day-ci/archive/20220411/202204110146.7vOFQ9VD-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.2.0-19) 11.2.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/93731be575c612b28ee4c7711ebab9e81960f213
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220410-215722
        git checkout 93731be575c612b28ee4c7711ebab9e81960f213
        # save the config file to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   mm/khugepaged.c: In function 'find_pmd_or_thp_or_none':
   mm/khugepaged.c:1006:9: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
    1006 |         pmd_t pmde;
         |         ^~~~~
   mm/khugepaged.c: In function 'khugepaged':
>> mm/khugepaged.c:2339:32: error: initialization of 'struct page * (*)(struct collapse_control *, gfp_t,  int)' {aka 'struct page * (*)(struct collapse_control *, unsigned int,  int)'} from incompatible pointer type 'struct page * (*)(struct collapse_control *, gfp_t)' {aka 'struct page * (*)(struct collapse_control *, unsigned int)'} [-Werror=incompatible-pointer-types]
    2339 |                 .alloc_hpage = &khugepaged_alloc_page,
         |                                ^
   mm/khugepaged.c:2339:32: note: (near initialization for 'cc.alloc_hpage')
   cc1: some warnings being treated as errors


vim +2339 mm/khugepaged.c

  2333	
  2334	static int khugepaged(void *none)
  2335	{
  2336		struct mm_slot *mm_slot;
  2337		struct collapse_control cc = {
  2338			.last_target_node = NUMA_NO_NODE,
> 2339			.alloc_hpage = &khugepaged_alloc_page,
  2340		};
  2341	
  2342		set_freezable();
  2343		set_user_nice(current, MAX_NICE);
  2344	
  2345		while (!kthread_should_stop()) {
  2346			khugepaged_do_scan(&cc);
  2347			khugepaged_wait_work();
  2348		}
  2349	
  2350		spin_lock(&khugepaged_mm_lock);
  2351		mm_slot = khugepaged_scan.mm_slot;
  2352		khugepaged_scan.mm_slot = NULL;
  2353		if (mm_slot)
  2354			collect_mm_slot(mm_slot);
  2355		spin_unlock(&khugepaged_mm_lock);
  2356		return 0;
  2357	}
  2358
Zach O'Keefe April 11, 2022, 5:28 p.m. UTC | #3
Sorry about this. I thought I had built with !NUMA and
TRANSPARENT_HUGEPAGE. Fixed.

On Sun, Apr 10, 2022 at 12:48 PM kernel test robot <lkp@intel.com> wrote:
>
> Hi Zach,
>
> Thank you for the patch! Yet something to improve:
>
> [auto build test ERROR on hnaz-mm/master]
>
> url:    https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220410-215722
> base:   https://github.com/hnaz/linux-mm master
> config: i386-randconfig-a001 (https://download.01.org/0day-ci/archive/20220411/202204110146.7vOFQ9VD-lkp@intel.com/config)
> compiler: gcc-11 (Debian 11.2.0-19) 11.2.0
> reproduce (this is a W=1 build):
>         # https://github.com/intel-lab-lkp/linux/commit/93731be575c612b28ee4c7711ebab9e81960f213
>         git remote add linux-review https://github.com/intel-lab-lkp/linux
>         git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220410-215722
>         git checkout 93731be575c612b28ee4c7711ebab9e81960f213
>         # save the config file to linux build tree
>         mkdir build_dir
>         make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
>
> All errors (new ones prefixed by >>):
>
>    mm/khugepaged.c: In function 'find_pmd_or_thp_or_none':
>    mm/khugepaged.c:1006:9: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
>     1006 |         pmd_t pmde;
>          |         ^~~~~
>    mm/khugepaged.c: In function 'khugepaged':
> >> mm/khugepaged.c:2339:32: error: initialization of 'struct page * (*)(struct collapse_control *, gfp_t,  int)' {aka 'struct page * (*)(struct collapse_control *, unsigned int,  int)'} from incompatible pointer type 'struct page * (*)(struct collapse_control *, gfp_t)' {aka 'struct page * (*)(struct collapse_control *, unsigned int)'} [-Werror=incompatible-pointer-types]
>     2339 |                 .alloc_hpage = &khugepaged_alloc_page,
>          |                                ^
>    mm/khugepaged.c:2339:32: note: (near initialization for 'cc.alloc_hpage')
>    cc1: some warnings being treated as errors
>
>
> vim +2339 mm/khugepaged.c
>
>   2333
>   2334  static int khugepaged(void *none)
>   2335  {
>   2336          struct mm_slot *mm_slot;
>   2337          struct collapse_control cc = {
>   2338                  .last_target_node = NUMA_NO_NODE,
> > 2339                  .alloc_hpage = &khugepaged_alloc_page,
>   2340          };
>   2341
>   2342          set_freezable();
>   2343          set_user_nice(current, MAX_NICE);
>   2344
>   2345          while (!kthread_should_stop()) {
>   2346                  khugepaged_do_scan(&cc);
>   2347                  khugepaged_wait_work();
>   2348          }
>   2349
>   2350          spin_lock(&khugepaged_mm_lock);
>   2351          mm_slot = khugepaged_scan.mm_slot;
>   2352          khugepaged_scan.mm_slot = NULL;
>   2353          if (mm_slot)
>   2354                  collect_mm_slot(mm_slot);
>   2355          spin_unlock(&khugepaged_mm_lock);
>   2356          return 0;
>   2357  }
>   2358
>
> --
> 0-DAY CI Kernel Test Service
> https://01.org/lkp
>
diff mbox series

Patch

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index eca61eb88dda..180d99a6b571 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -92,6 +92,10 @@  struct collapse_control {
 
 	/* Last target selected in khugepaged_find_target_node() for this scan */
 	int last_target_node;
+
+	struct page *hpage;
+	struct page* (*alloc_hpage)(struct collapse_control *cc, gfp_t gfp,
+				    int node);
 };
 
 /**
@@ -877,21 +881,21 @@  static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
 	return true;
 }
 
-static struct page *
-khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
+static struct page *khugepaged_alloc_page(struct collapse_control *cc,
+					  gfp_t gfp, int node)
 {
-	VM_BUG_ON_PAGE(*hpage, *hpage);
+	VM_BUG_ON_PAGE(cc->hpage, cc->hpage);
 
-	*hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
-	if (unlikely(!*hpage)) {
+	cc->hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
+	if (unlikely(!cc->hpage)) {
 		count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
-		*hpage = ERR_PTR(-ENOMEM);
+		cc->hpage = ERR_PTR(-ENOMEM);
 		return NULL;
 	}
 
-	prep_transhuge_page(*hpage);
+	prep_transhuge_page(cc->hpage);
 	count_vm_event(THP_COLLAPSE_ALLOC);
-	return *hpage;
+	return cc->hpage;
 }
 #else
 static int khugepaged_find_target_node(struct collapse_control *cc)
@@ -953,12 +957,12 @@  static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
 	return true;
 }
 
-static struct page *
-khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
+static struct page *khugepaged_alloc_page(struct collapse_control *cc,
+					  gfp_t gfp)
 {
-	VM_BUG_ON(!*hpage);
+	VM_BUG_ON(!cc->hpage);
 
-	return  *hpage;
+	return cc->hpage;
 }
 #endif
 
@@ -1080,10 +1084,9 @@  static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 	return true;
 }
 
-static void collapse_huge_page(struct mm_struct *mm,
-				   unsigned long address,
-				   struct page **hpage,
-				   int node, int referenced, int unmapped)
+static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
+			       struct collapse_control *cc, int referenced,
+			       int unmapped)
 {
 	LIST_HEAD(compound_pagelist);
 	pmd_t *pmd, _pmd;
@@ -1096,6 +1099,7 @@  static void collapse_huge_page(struct mm_struct *mm,
 	struct mmu_notifier_range range;
 	gfp_t gfp;
 	const struct cpumask *cpumask;
+	int node;
 
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 
@@ -1110,13 +1114,14 @@  static void collapse_huge_page(struct mm_struct *mm,
 	 */
 	mmap_read_unlock(mm);
 
+	node = khugepaged_find_target_node(cc);
 	/* sched to specified node before huage page memory copy */
 	if (task_node(current) != node) {
 		cpumask = cpumask_of_node(node);
 		if (!cpumask_empty(cpumask))
 			set_cpus_allowed_ptr(current, cpumask);
 	}
-	new_page = khugepaged_alloc_page(hpage, gfp, node);
+	new_page = cc->alloc_hpage(cc, gfp, node);
 	if (!new_page) {
 		result = SCAN_ALLOC_HUGE_PAGE_FAIL;
 		goto out_nolock;
@@ -1238,15 +1243,15 @@  static void collapse_huge_page(struct mm_struct *mm,
 	update_mmu_cache_pmd(vma, address, pmd);
 	spin_unlock(pmd_ptl);
 
-	*hpage = NULL;
+	cc->hpage = NULL;
 
 	khugepaged_pages_collapsed++;
 	result = SCAN_SUCCEED;
 out_up_write:
 	mmap_write_unlock(mm);
 out_nolock:
-	if (!IS_ERR_OR_NULL(*hpage))
-		mem_cgroup_uncharge(page_folio(*hpage));
+	if (!IS_ERR_OR_NULL(cc->hpage))
+		mem_cgroup_uncharge(page_folio(cc->hpage));
 	trace_mm_collapse_huge_page(mm, isolated, result);
 	return;
 }
@@ -1254,7 +1259,6 @@  static void collapse_huge_page(struct mm_struct *mm,
 static int khugepaged_scan_pmd(struct mm_struct *mm,
 			       struct vm_area_struct *vma,
 			       unsigned long address,
-			       struct page **hpage,
 			       struct collapse_control *cc)
 {
 	pmd_t *pmd;
@@ -1399,10 +1403,8 @@  static int khugepaged_scan_pmd(struct mm_struct *mm,
 out_unmap:
 	pte_unmap_unlock(pte, ptl);
 	if (ret) {
-		node = khugepaged_find_target_node(cc);
 		/* collapse_huge_page will return with the mmap_lock released */
-		collapse_huge_page(mm, address, hpage, node,
-				referenced, unmapped);
+		collapse_huge_page(mm, address, cc, referenced, unmapped);
 	}
 out:
 	trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
@@ -1655,8 +1657,7 @@  static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
  * @mm: process address space where collapse happens
  * @file: file that collapse on
  * @start: collapse start address
- * @hpage: new allocated huge page for collapse
- * @node: appointed node the new huge page allocate from
+ * @collapse_control: collapse context and scratchpad
  *
  * Basic scheme is simple, details are more complex:
  *  - allocate and lock a new huge page;
@@ -1674,8 +1675,8 @@  static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
  *    + unlock and free huge page;
  */
 static void collapse_file(struct mm_struct *mm,
-		struct file *file, pgoff_t start,
-		struct page **hpage, int node)
+			  struct file *file, pgoff_t start,
+			  struct collapse_control *cc)
 {
 	struct address_space *mapping = file->f_mapping;
 	gfp_t gfp;
@@ -1685,15 +1686,16 @@  static void collapse_file(struct mm_struct *mm,
 	XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
 	int nr_none = 0, result = SCAN_SUCCEED;
 	bool is_shmem = shmem_file(file);
-	int nr;
+	int nr, node;
 
 	VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
 	VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
 
 	/* Only allocate from the target node */
 	gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
+	node = khugepaged_find_target_node(cc);
 
-	new_page = khugepaged_alloc_page(hpage, gfp, node);
+	new_page = cc->alloc_hpage(cc, gfp, node);
 	if (!new_page) {
 		result = SCAN_ALLOC_HUGE_PAGE_FAIL;
 		goto out;
@@ -1986,7 +1988,7 @@  static void collapse_file(struct mm_struct *mm,
 		 * Remove pte page tables, so we can re-fault the page as huge.
 		 */
 		retract_page_tables(mapping, start);
-		*hpage = NULL;
+		cc->hpage = NULL;
 
 		khugepaged_pages_collapsed++;
 	} else {
@@ -2033,14 +2035,14 @@  static void collapse_file(struct mm_struct *mm,
 	unlock_page(new_page);
 out:
 	VM_BUG_ON(!list_empty(&pagelist));
-	if (!IS_ERR_OR_NULL(*hpage))
-		mem_cgroup_uncharge(page_folio(*hpage));
+	if (!IS_ERR_OR_NULL(cc->hpage))
+		mem_cgroup_uncharge(page_folio(cc->hpage));
 	/* TODO: tracepoints */
 }
 
 static void khugepaged_scan_file(struct mm_struct *mm,
-		struct file *file, pgoff_t start, struct page **hpage,
-		struct collapse_control *cc)
+				 struct file *file, pgoff_t start,
+				 struct collapse_control *cc)
 {
 	struct page *page = NULL;
 	struct address_space *mapping = file->f_mapping;
@@ -2113,8 +2115,7 @@  static void khugepaged_scan_file(struct mm_struct *mm,
 			result = SCAN_EXCEED_NONE_PTE;
 			count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
 		} else {
-			node = khugepaged_find_target_node(cc);
-			collapse_file(mm, file, start, hpage, node);
+			collapse_file(mm, file, start, cc);
 		}
 	}
 
@@ -2122,8 +2123,8 @@  static void khugepaged_scan_file(struct mm_struct *mm,
 }
 #else
 static void khugepaged_scan_file(struct mm_struct *mm,
-		struct file *file, pgoff_t start, struct page **hpage,
-		struct collapse_control *cc)
+				 struct file *file, pgoff_t start,
+				 struct collapse_control *cc)
 {
 	BUILD_BUG();
 }
@@ -2134,7 +2135,6 @@  static void khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
 #endif
 
 static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
-					    struct page **hpage,
 					    struct collapse_control *cc)
 	__releases(&khugepaged_mm_lock)
 	__acquires(&khugepaged_mm_lock)
@@ -2211,12 +2211,11 @@  static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 
 				mmap_read_unlock(mm);
 				ret = 1;
-				khugepaged_scan_file(mm, file, pgoff, hpage, cc);
+				khugepaged_scan_file(mm, file, pgoff, cc);
 				fput(file);
 			} else {
 				ret = khugepaged_scan_pmd(mm, vma,
-						khugepaged_scan.address,
-						hpage, cc);
+						khugepaged_scan.address, cc);
 			}
 			/* move to next address */
 			khugepaged_scan.address += HPAGE_PMD_SIZE;
@@ -2274,15 +2273,15 @@  static int khugepaged_wait_event(void)
 
 static void khugepaged_do_scan(struct collapse_control *cc)
 {
-	struct page *hpage = NULL;
 	unsigned int progress = 0, pass_through_head = 0;
 	unsigned int pages = READ_ONCE(khugepaged_pages_to_scan);
 	bool wait = true;
 
+	cc->hpage = NULL;
 	lru_add_drain_all();
 
 	while (progress < pages) {
-		if (!khugepaged_prealloc_page(&hpage, &wait))
+		if (!khugepaged_prealloc_page(&cc->hpage, &wait))
 			break;
 
 		cond_resched();
@@ -2296,14 +2295,14 @@  static void khugepaged_do_scan(struct collapse_control *cc)
 		if (khugepaged_has_work() &&
 		    pass_through_head < 2)
 			progress += khugepaged_scan_mm_slot(pages - progress,
-							    &hpage, cc);
+							    cc);
 		else
 			progress = pages;
 		spin_unlock(&khugepaged_mm_lock);
 	}
 
-	if (!IS_ERR_OR_NULL(hpage))
-		put_page(hpage);
+	if (!IS_ERR_OR_NULL(cc->hpage))
+		put_page(cc->hpage);
 }
 
 static bool khugepaged_should_wakeup(void)
@@ -2337,6 +2336,7 @@  static int khugepaged(void *none)
 	struct mm_slot *mm_slot;
 	struct collapse_control cc = {
 		.last_target_node = NUMA_NO_NODE,
+		.alloc_hpage = &khugepaged_alloc_page,
 	};
 
 	set_freezable();