diff mbox series

[v6,03/15] mm/khugepaged: add struct collapse_control

Message ID 20220604004004.954674-4-zokeefe@google.com (mailing list archive)
State New
Headers show
Series mm: userspace hugepage collapse | expand

Commit Message

Zach O'Keefe June 4, 2022, 12:39 a.m. UTC
Modularize hugepage collapse by introducing struct collapse_control.
This structure serves to describe the properties of the requested
collapse, as well as serve as a local scratch pad to use during the
collapse itself.

Start by moving global per-node khugepaged statistics into this
new structure, and stack allocate one for khugepaged collapse
context.

Signed-off-by: Zach O'Keefe <zokeefe@google.com>
---
 mm/khugepaged.c | 87 ++++++++++++++++++++++++++++---------------------
 1 file changed, 49 insertions(+), 38 deletions(-)

Comments

kernel test robot June 6, 2022, 2:41 a.m. UTC | #1
Hi Zach,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-1) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
        git checkout d87b6065d6050b89930cca0814921aca7c269286
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   mm/khugepaged.c: In function 'khugepaged':
>> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=]
    2284 | }
         | ^


vim +2284 mm/khugepaged.c

b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2261  
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2262  static int khugepaged(void *none)
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2263  {
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2264  	struct mm_slot *mm_slot;
d87b6065d6050b Zach O'Keefe       2022-06-03  2265  	struct collapse_control cc = {
d87b6065d6050b Zach O'Keefe       2022-06-03  2266  		.last_target_node = NUMA_NO_NODE,
d87b6065d6050b Zach O'Keefe       2022-06-03  2267  	};
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2268  
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2269  	set_freezable();
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2270  	set_user_nice(current, MAX_NICE);
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2271  
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2272  	while (!kthread_should_stop()) {
d87b6065d6050b Zach O'Keefe       2022-06-03  2273  		khugepaged_do_scan(&cc);
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2274  		khugepaged_wait_work();
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2275  	}
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2276  
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2277  	spin_lock(&khugepaged_mm_lock);
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2278  	mm_slot = khugepaged_scan.mm_slot;
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2279  	khugepaged_scan.mm_slot = NULL;
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2280  	if (mm_slot)
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2281  		collect_mm_slot(mm_slot);
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2282  	spin_unlock(&khugepaged_mm_lock);
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2283  	return 0;
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 @2284  }
b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2285
Zach O'Keefe June 6, 2022, 4:40 p.m. UTC | #2
On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote:
>
> Hi Zach,
>
> Thank you for the patch! Perhaps something to improve:
>
> [auto build test WARNING on akpm-mm/mm-everything]
>
> url:    https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config)
> compiler: gcc-11 (Debian 11.3.0-1) 11.3.0
> reproduce (this is a W=1 build):
>         # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286
>         git remote add linux-review https://github.com/intel-lab-lkp/linux
>         git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
>         git checkout d87b6065d6050b89930cca0814921aca7c269286
>         # save the config file
>         mkdir build_dir && cp config build_dir/.config
>         make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
>
> If you fix the issue, kindly add following tag where applicable
> Reported-by: kernel test robot <lkp@intel.com>
>
> All warnings (new ones prefixed by >>):
>
>    mm/khugepaged.c: In function 'khugepaged':
> >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=]
>     2284 | }
>          | ^

Thanks lkp@intel.com.

This is due to config with:

CONFIG_FRAME_WARN=2048
CONFIG_NODES_SHIFT=10

Where struct collapse_control has a member int
node_load[MAX_NUMNODES], and we stack allocate one.

Is this a configuration that needs to be supported? 1024 nodes seems
like a lot and I'm not sure if these configs are randomly generated or
are reminiscent of real systems.

Thanks,
Zach

>
> vim +2284 mm/khugepaged.c
>
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2261
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2262  static int khugepaged(void *none)
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2263  {
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2264      struct mm_slot *mm_slot;
> d87b6065d6050b Zach O'Keefe       2022-06-03  2265      struct collapse_control cc = {
> d87b6065d6050b Zach O'Keefe       2022-06-03  2266              .last_target_node = NUMA_NO_NODE,
> d87b6065d6050b Zach O'Keefe       2022-06-03  2267      };
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2268
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2269      set_freezable();
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2270      set_user_nice(current, MAX_NICE);
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2271
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2272      while (!kthread_should_stop()) {
> d87b6065d6050b Zach O'Keefe       2022-06-03  2273              khugepaged_do_scan(&cc);
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2274              khugepaged_wait_work();
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2275      }
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2276
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2277      spin_lock(&khugepaged_mm_lock);
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2278      mm_slot = khugepaged_scan.mm_slot;
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2279      khugepaged_scan.mm_slot = NULL;
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2280      if (mm_slot)
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2281              collect_mm_slot(mm_slot);
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2282      spin_unlock(&khugepaged_mm_lock);
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2283      return 0;
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26 @2284  }
> b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2285
>
> --
> 0-DAY CI Kernel Test Service
> https://01.org/lkp
>
Yang Shi June 6, 2022, 8:20 p.m. UTC | #3
On Mon, Jun 6, 2022 at 9:40 AM Zach O'Keefe <zokeefe@google.com> wrote:
>
> On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote:
> >
> > Hi Zach,
> >
> > Thank you for the patch! Perhaps something to improve:
> >
> > [auto build test WARNING on akpm-mm/mm-everything]
> >
> > url:    https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
> > base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> > config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config)
> > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0
> > reproduce (this is a W=1 build):
> >         # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286
> >         git remote add linux-review https://github.com/intel-lab-lkp/linux
> >         git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
> >         git checkout d87b6065d6050b89930cca0814921aca7c269286
> >         # save the config file
> >         mkdir build_dir && cp config build_dir/.config
> >         make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
> >
> > If you fix the issue, kindly add following tag where applicable
> > Reported-by: kernel test robot <lkp@intel.com>
> >
> > All warnings (new ones prefixed by >>):
> >
> >    mm/khugepaged.c: In function 'khugepaged':
> > >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=]
> >     2284 | }
> >          | ^
>
> Thanks lkp@intel.com.
>
> This is due to config with:
>
> CONFIG_FRAME_WARN=2048
> CONFIG_NODES_SHIFT=10
>
> Where struct collapse_control has a member int
> node_load[MAX_NUMNODES], and we stack allocate one.
>
> Is this a configuration that needs to be supported? 1024 nodes seems
> like a lot and I'm not sure if these configs are randomly generated or
> are reminiscent of real systems.

I don't have a better idea other than moving it out of the
collapse_control struct. You may consider changing node_load to two
dimensions, for example:

node_load[2][MAX_NUMNODES], then define:
enum {
    /* khugepaged */
    COLLAPSE_ASYNC,
    /* MADV_COLLAPSE */
    COLLAPSE_SYNC
}

Then khugepaged and MADV_COLLAPSE get their dedicated node_load respectively.

The more aggressive approach may be just killing node_load, but I'm
not sure what impact it may incur.

>
> Thanks,
> Zach
>
> >
> > vim +2284 mm/khugepaged.c
> >
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2261
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2262  static int khugepaged(void *none)
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2263  {
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2264      struct mm_slot *mm_slot;
> > d87b6065d6050b Zach O'Keefe       2022-06-03  2265      struct collapse_control cc = {
> > d87b6065d6050b Zach O'Keefe       2022-06-03  2266              .last_target_node = NUMA_NO_NODE,
> > d87b6065d6050b Zach O'Keefe       2022-06-03  2267      };
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2268
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2269      set_freezable();
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2270      set_user_nice(current, MAX_NICE);
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2271
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2272      while (!kthread_should_stop()) {
> > d87b6065d6050b Zach O'Keefe       2022-06-03  2273              khugepaged_do_scan(&cc);
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2274              khugepaged_wait_work();
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2275      }
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2276
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2277      spin_lock(&khugepaged_mm_lock);
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2278      mm_slot = khugepaged_scan.mm_slot;
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2279      khugepaged_scan.mm_slot = NULL;
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2280      if (mm_slot)
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2281              collect_mm_slot(mm_slot);
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2282      spin_unlock(&khugepaged_mm_lock);
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2283      return 0;
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 @2284  }
> > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2285
> >
> > --
> > 0-DAY CI Kernel Test Service
> > https://01.org/lkp
> >
Yang Shi June 6, 2022, 9:22 p.m. UTC | #4
On Mon, Jun 6, 2022 at 1:20 PM Yang Shi <shy828301@gmail.com> wrote:
>
> On Mon, Jun 6, 2022 at 9:40 AM Zach O'Keefe <zokeefe@google.com> wrote:
> >
> > On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote:
> > >
> > > Hi Zach,
> > >
> > > Thank you for the patch! Perhaps something to improve:
> > >
> > > [auto build test WARNING on akpm-mm/mm-everything]
> > >
> > > url:    https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
> > > base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> > > config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config)
> > > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0
> > > reproduce (this is a W=1 build):
> > >         # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286
> > >         git remote add linux-review https://github.com/intel-lab-lkp/linux
> > >         git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
> > >         git checkout d87b6065d6050b89930cca0814921aca7c269286
> > >         # save the config file
> > >         mkdir build_dir && cp config build_dir/.config
> > >         make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
> > >
> > > If you fix the issue, kindly add following tag where applicable
> > > Reported-by: kernel test robot <lkp@intel.com>
> > >
> > > All warnings (new ones prefixed by >>):
> > >
> > >    mm/khugepaged.c: In function 'khugepaged':
> > > >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=]
> > >     2284 | }
> > >          | ^
> >
> > Thanks lkp@intel.com.
> >
> > This is due to config with:
> >
> > CONFIG_FRAME_WARN=2048
> > CONFIG_NODES_SHIFT=10
> >
> > Where struct collapse_control has a member int
> > node_load[MAX_NUMNODES], and we stack allocate one.
> >
> > Is this a configuration that needs to be supported? 1024 nodes seems
> > like a lot and I'm not sure if these configs are randomly generated or
> > are reminiscent of real systems.
>
> I don't have a better idea other than moving it out of the
> collapse_control struct. You may consider changing node_load to two
> dimensions, for example:
>
> node_load[2][MAX_NUMNODES], then define:
> enum {
>     /* khugepaged */
>     COLLAPSE_ASYNC,
>     /* MADV_COLLAPSE */
>     COLLAPSE_SYNC
> }
>
> Then khugepaged and MADV_COLLAPSE get their dedicated node_load respectively.

Sorry, I just realized this won't work for MADV_COLLAPSE since
multiple processes may call it at the same time. We may consider
allocating it dynamically. Have node_load the last element of
collapse_control struct, then do:
for_each_node(node)
    kmalloc(sizeof(int), GFP_KERNEL);

MADV_COLLAPSE or khugepaged could just fail if it fails since THP
allocation is unlikely to succeed in this case. But not sure if it is
worth the complexity rather than just killing it.

>
> The more aggressive approach may be just killing node_load, but I'm
> not sure what impact it may incur.
>
> >
> > Thanks,
> > Zach
> >
> > >
> > > vim +2284 mm/khugepaged.c
> > >
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2261
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2262  static int khugepaged(void *none)
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2263  {
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2264      struct mm_slot *mm_slot;
> > > d87b6065d6050b Zach O'Keefe       2022-06-03  2265      struct collapse_control cc = {
> > > d87b6065d6050b Zach O'Keefe       2022-06-03  2266              .last_target_node = NUMA_NO_NODE,
> > > d87b6065d6050b Zach O'Keefe       2022-06-03  2267      };
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2268
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2269      set_freezable();
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2270      set_user_nice(current, MAX_NICE);
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2271
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2272      while (!kthread_should_stop()) {
> > > d87b6065d6050b Zach O'Keefe       2022-06-03  2273              khugepaged_do_scan(&cc);
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2274              khugepaged_wait_work();
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2275      }
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2276
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2277      spin_lock(&khugepaged_mm_lock);
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2278      mm_slot = khugepaged_scan.mm_slot;
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2279      khugepaged_scan.mm_slot = NULL;
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2280      if (mm_slot)
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2281              collect_mm_slot(mm_slot);
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2282      spin_unlock(&khugepaged_mm_lock);
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2283      return 0;
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 @2284  }
> > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26  2285
> > >
> > > --
> > > 0-DAY CI Kernel Test Service
> > > https://01.org/lkp
> > >
Andrew Morton June 6, 2022, 10:23 p.m. UTC | #5
On Mon, 6 Jun 2022 09:40:20 -0700 "Zach O'Keefe" <zokeefe@google.com> wrote:

> On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote:
> >
> > Hi Zach,
> >
> > Thank you for the patch! Perhaps something to improve:
> >
> > [auto build test WARNING on akpm-mm/mm-everything]
> >
> > url:    https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
> > base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> > config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config)
> > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0
> > reproduce (this is a W=1 build):
> >         # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286
> >         git remote add linux-review https://github.com/intel-lab-lkp/linux
> >         git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
> >         git checkout d87b6065d6050b89930cca0814921aca7c269286
> >         # save the config file
> >         mkdir build_dir && cp config build_dir/.config
> >         make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
> >
> > If you fix the issue, kindly add following tag where applicable
> > Reported-by: kernel test robot <lkp@intel.com>
> >
> > All warnings (new ones prefixed by >>):
> >
> >    mm/khugepaged.c: In function 'khugepaged':
> > >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=]
> >     2284 | }
> >          | ^
> 
> Thanks lkp@intel.com.
> 
> This is due to config with:
> 
> CONFIG_FRAME_WARN=2048
> CONFIG_NODES_SHIFT=10
> 
> Where struct collapse_control has a member int
> node_load[MAX_NUMNODES], and we stack allocate one.
> 
> Is this a configuration that needs to be supported? 1024 nodes seems
> like a lot and I'm not sure if these configs are randomly generated or
> are reminiscent of real systems.

Adding 4k to the stack isn't a good thing to do.  It's trivial to
kmalloc the thing, so why not do that?

I'll await some reviewer input (hopefully positive ;)) before merging
this series.
Yang Shi June 6, 2022, 11:53 p.m. UTC | #6
On Mon, Jun 6, 2022 at 3:23 PM Andrew Morton <akpm@linux-foundation.org> wrote:
>
> On Mon, 6 Jun 2022 09:40:20 -0700 "Zach O'Keefe" <zokeefe@google.com> wrote:
>
> > On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote:
> > >
> > > Hi Zach,
> > >
> > > Thank you for the patch! Perhaps something to improve:
> > >
> > > [auto build test WARNING on akpm-mm/mm-everything]
> > >
> > > url:    https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
> > > base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> > > config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config)
> > > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0
> > > reproduce (this is a W=1 build):
> > >         # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286
> > >         git remote add linux-review https://github.com/intel-lab-lkp/linux
> > >         git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
> > >         git checkout d87b6065d6050b89930cca0814921aca7c269286
> > >         # save the config file
> > >         mkdir build_dir && cp config build_dir/.config
> > >         make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
> > >
> > > If you fix the issue, kindly add following tag where applicable
> > > Reported-by: kernel test robot <lkp@intel.com>
> > >
> > > All warnings (new ones prefixed by >>):
> > >
> > >    mm/khugepaged.c: In function 'khugepaged':
> > > >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=]
> > >     2284 | }
> > >          | ^
> >
> > Thanks lkp@intel.com.
> >
> > This is due to config with:
> >
> > CONFIG_FRAME_WARN=2048
> > CONFIG_NODES_SHIFT=10
> >
> > Where struct collapse_control has a member int
> > node_load[MAX_NUMNODES], and we stack allocate one.
> >
> > Is this a configuration that needs to be supported? 1024 nodes seems
> > like a lot and I'm not sure if these configs are randomly generated or
> > are reminiscent of real systems.
>
> Adding 4k to the stack isn't a good thing to do.  It's trivial to
> kmalloc the thing, so why not do that?

Thanks, Andrew. Yeah, I just suggested that too.

>
> I'll await some reviewer input (hopefully positive ;)) before merging
> this series.
Zach O'Keefe June 8, 2022, 12:42 a.m. UTC | #7
On Mon, Jun 6, 2022 at 4:54 PM Yang Shi <shy828301@gmail.com> wrote:
>
> On Mon, Jun 6, 2022 at 3:23 PM Andrew Morton <akpm@linux-foundation.org> wrote:
> >
> > On Mon, 6 Jun 2022 09:40:20 -0700 "Zach O'Keefe" <zokeefe@google.com> wrote:
> >
> > > On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote:
> > > >
> > > > Hi Zach,
> > > >
> > > > Thank you for the patch! Perhaps something to improve:
> > > >
> > > > [auto build test WARNING on akpm-mm/mm-everything]
> > > >
> > > > url:    https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
> > > > base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> > > > config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config)
> > > > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0
> > > > reproduce (this is a W=1 build):
> > > >         # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286
> > > >         git remote add linux-review https://github.com/intel-lab-lkp/linux
> > > >         git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
> > > >         git checkout d87b6065d6050b89930cca0814921aca7c269286
> > > >         # save the config file
> > > >         mkdir build_dir && cp config build_dir/.config
> > > >         make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
> > > >
> > > > If you fix the issue, kindly add following tag where applicable
> > > > Reported-by: kernel test robot <lkp@intel.com>
> > > >
> > > > All warnings (new ones prefixed by >>):
> > > >
> > > >    mm/khugepaged.c: In function 'khugepaged':
> > > > >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=]
> > > >     2284 | }
> > > >          | ^
> > >
> > > Thanks lkp@intel.com.
> > >
> > > This is due to config with:
> > >
> > > CONFIG_FRAME_WARN=2048
> > > CONFIG_NODES_SHIFT=10
> > >
> > > Where struct collapse_control has a member int
> > > node_load[MAX_NUMNODES], and we stack allocate one.
> > >
> > > Is this a configuration that needs to be supported? 1024 nodes seems
> > > like a lot and I'm not sure if these configs are randomly generated or
> > > are reminiscent of real systems.
> >
> > Adding 4k to the stack isn't a good thing to do.  It's trivial to
> > kmalloc the thing, so why not do that?
>
> Thanks, Andrew. Yeah, I just suggested that too.

Thanks Yang / Andrew for taking the time to voice your suggestions.

I'll go ahead and just kmalloc() the thing and fail if we can't.

Yang, is there a reason to kmalloc() the entire struct
collapse_control with trailing flex array vs stack allocating the
struct collapse_control + kmalloc()'ing the node_load array?


> >
> > I'll await some reviewer input (hopefully positive ;)) before merging
> > this series.
Yang Shi June 8, 2022, 1 a.m. UTC | #8
On Tue, Jun 7, 2022 at 5:43 PM Zach O'Keefe <zokeefe@google.com> wrote:
>
> On Mon, Jun 6, 2022 at 4:54 PM Yang Shi <shy828301@gmail.com> wrote:
> >
> > On Mon, Jun 6, 2022 at 3:23 PM Andrew Morton <akpm@linux-foundation.org> wrote:
> > >
> > > On Mon, 6 Jun 2022 09:40:20 -0700 "Zach O'Keefe" <zokeefe@google.com> wrote:
> > >
> > > > On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote:
> > > > >
> > > > > Hi Zach,
> > > > >
> > > > > Thank you for the patch! Perhaps something to improve:
> > > > >
> > > > > [auto build test WARNING on akpm-mm/mm-everything]
> > > > >
> > > > > url:    https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
> > > > > base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> > > > > config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config)
> > > > > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0
> > > > > reproduce (this is a W=1 build):
> > > > >         # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286
> > > > >         git remote add linux-review https://github.com/intel-lab-lkp/linux
> > > > >         git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
> > > > >         git checkout d87b6065d6050b89930cca0814921aca7c269286
> > > > >         # save the config file
> > > > >         mkdir build_dir && cp config build_dir/.config
> > > > >         make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
> > > > >
> > > > > If you fix the issue, kindly add following tag where applicable
> > > > > Reported-by: kernel test robot <lkp@intel.com>
> > > > >
> > > > > All warnings (new ones prefixed by >>):
> > > > >
> > > > >    mm/khugepaged.c: In function 'khugepaged':
> > > > > >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=]
> > > > >     2284 | }
> > > > >          | ^
> > > >
> > > > Thanks lkp@intel.com.
> > > >
> > > > This is due to config with:
> > > >
> > > > CONFIG_FRAME_WARN=2048
> > > > CONFIG_NODES_SHIFT=10
> > > >
> > > > Where struct collapse_control has a member int
> > > > node_load[MAX_NUMNODES], and we stack allocate one.
> > > >
> > > > Is this a configuration that needs to be supported? 1024 nodes seems
> > > > like a lot and I'm not sure if these configs are randomly generated or
> > > > are reminiscent of real systems.
> > >
> > > Adding 4k to the stack isn't a good thing to do.  It's trivial to
> > > kmalloc the thing, so why not do that?
> >
> > Thanks, Andrew. Yeah, I just suggested that too.
>
> Thanks Yang / Andrew for taking the time to voice your suggestions.
>
> I'll go ahead and just kmalloc() the thing and fail if we can't.
>
> Yang, is there a reason to kmalloc() the entire struct
> collapse_control with trailing flex array vs stack allocating the
> struct collapse_control + kmalloc()'ing the node_load array?

I don't think those two have too much difference. I don't have a
strong preference personally. However you could choose:

Define collapse_control as:
struct collapse_control {
    xxx;
    ...
    int node_load[MAX_NUMANODES];
}
Then you could kmalloc the whole struct.

Or it could be defined as:
struct collapse_control {
    xxx;
    ...
    int *node_load[];
}
In this way you could allocate collapse_control on stack or by
kmalloc, then kmalloc node_load for all possible nodes instead of
MAX_NUMANODES. This may have a better success rate since you do
kmalloc much less memory (typically the number of possible nodes is
much less than MAX_NUMANODES), but it may be not worth it since the
error handling path is more complicated and it may not make too much
difference.

The first choice is definitely much simpler, you may want to try that first.

>
>
> > >
> > > I'll await some reviewer input (hopefully positive ;)) before merging
> > > this series.
Zach O'Keefe June 8, 2022, 1:06 a.m. UTC | #9
On Tue, Jun 7, 2022 at 6:00 PM Yang Shi <shy828301@gmail.com> wrote:
>
> On Tue, Jun 7, 2022 at 5:43 PM Zach O'Keefe <zokeefe@google.com> wrote:
> >
> > On Mon, Jun 6, 2022 at 4:54 PM Yang Shi <shy828301@gmail.com> wrote:
> > >
> > > On Mon, Jun 6, 2022 at 3:23 PM Andrew Morton <akpm@linux-foundation.org> wrote:
> > > >
> > > > On Mon, 6 Jun 2022 09:40:20 -0700 "Zach O'Keefe" <zokeefe@google.com> wrote:
> > > >
> > > > > On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote:
> > > > > >
> > > > > > Hi Zach,
> > > > > >
> > > > > > Thank you for the patch! Perhaps something to improve:
> > > > > >
> > > > > > [auto build test WARNING on akpm-mm/mm-everything]
> > > > > >
> > > > > > url:    https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
> > > > > > base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> > > > > > config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config)
> > > > > > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0
> > > > > > reproduce (this is a W=1 build):
> > > > > >         # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286
> > > > > >         git remote add linux-review https://github.com/intel-lab-lkp/linux
> > > > > >         git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
> > > > > >         git checkout d87b6065d6050b89930cca0814921aca7c269286
> > > > > >         # save the config file
> > > > > >         mkdir build_dir && cp config build_dir/.config
> > > > > >         make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
> > > > > >
> > > > > > If you fix the issue, kindly add following tag where applicable
> > > > > > Reported-by: kernel test robot <lkp@intel.com>
> > > > > >
> > > > > > All warnings (new ones prefixed by >>):
> > > > > >
> > > > > >    mm/khugepaged.c: In function 'khugepaged':
> > > > > > >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=]
> > > > > >     2284 | }
> > > > > >          | ^
> > > > >
> > > > > Thanks lkp@intel.com.
> > > > >
> > > > > This is due to config with:
> > > > >
> > > > > CONFIG_FRAME_WARN=2048
> > > > > CONFIG_NODES_SHIFT=10
> > > > >
> > > > > Where struct collapse_control has a member int
> > > > > node_load[MAX_NUMNODES], and we stack allocate one.
> > > > >
> > > > > Is this a configuration that needs to be supported? 1024 nodes seems
> > > > > like a lot and I'm not sure if these configs are randomly generated or
> > > > > are reminiscent of real systems.
> > > >
> > > > Adding 4k to the stack isn't a good thing to do.  It's trivial to
> > > > kmalloc the thing, so why not do that?
> > >
> > > Thanks, Andrew. Yeah, I just suggested that too.
> >
> > Thanks Yang / Andrew for taking the time to voice your suggestions.
> >
> > I'll go ahead and just kmalloc() the thing and fail if we can't.
> >
> > Yang, is there a reason to kmalloc() the entire struct
> > collapse_control with trailing flex array vs stack allocating the
> > struct collapse_control + kmalloc()'ing the node_load array?
>
> I don't think those two have too much difference. I don't have a
> strong preference personally. However you could choose:
>
> Define collapse_control as:
> struct collapse_control {
>     xxx;
>     ...
>     int node_load[MAX_NUMANODES];
> }
> Then you could kmalloc the whole struct.
>
> Or it could be defined as:
> struct collapse_control {
>     xxx;
>     ...
>     int *node_load[];
> }
> In this way you could allocate collapse_control on stack or by
> kmalloc, then kmalloc node_load for all possible nodes instead of
> MAX_NUMANODES. This may have a better success rate since you do
> kmalloc much less memory (typically the number of possible nodes is
> much less than MAX_NUMANODES), but it may be not worth it since the
> error handling path is more complicated and it may not make too much
> difference.
>
> The first choice is definitely much simpler, you may want to try that first.

Thanks for the suggestion. First approach also has the benefit of
being able to statically allocate one for khugepaged and simplifies
error paths there. I'll try that.

Again, thanks for taking the time to review and help out / suggest
improvements :)

Best,
Zach

> >
> >
> > > >
> > > > I'll await some reviewer input (hopefully positive ;)) before merging
> > > > this series.
diff mbox series

Patch

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 7a914ca19e96..907d0b2bd4bd 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -86,6 +86,14 @@  static struct kmem_cache *mm_slot_cache __read_mostly;
 
 #define MAX_PTE_MAPPED_THP 8
 
+struct collapse_control {
+	/* Num pages scanned per node */
+	int node_load[MAX_NUMNODES];
+
+	/* Last target selected in khugepaged_find_target_node() */
+	int last_target_node;
+};
+
 /**
  * struct mm_slot - hash lookup from mm to mm_slot
  * @hash: hash collision list
@@ -777,9 +785,7 @@  static void khugepaged_alloc_sleep(void)
 	remove_wait_queue(&khugepaged_wait, &wait);
 }
 
-static int khugepaged_node_load[MAX_NUMNODES];
-
-static bool khugepaged_scan_abort(int nid)
+static bool khugepaged_scan_abort(int nid, struct collapse_control *cc)
 {
 	int i;
 
@@ -791,11 +797,11 @@  static bool khugepaged_scan_abort(int nid)
 		return false;
 
 	/* If there is a count for this node already, it must be acceptable */
-	if (khugepaged_node_load[nid])
+	if (cc->node_load[nid])
 		return false;
 
 	for (i = 0; i < MAX_NUMNODES; i++) {
-		if (!khugepaged_node_load[i])
+		if (!cc->node_load[i])
 			continue;
 		if (node_distance(nid, i) > node_reclaim_distance)
 			return true;
@@ -810,32 +816,31 @@  static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void)
 }
 
 #ifdef CONFIG_NUMA
-static int khugepaged_find_target_node(void)
+static int khugepaged_find_target_node(struct collapse_control *cc)
 {
-	static int last_khugepaged_target_node = NUMA_NO_NODE;
 	int nid, target_node = 0, max_value = 0;
 
 	/* find first node with max normal pages hit */
 	for (nid = 0; nid < MAX_NUMNODES; nid++)
-		if (khugepaged_node_load[nid] > max_value) {
-			max_value = khugepaged_node_load[nid];
+		if (cc->node_load[nid] > max_value) {
+			max_value = cc->node_load[nid];
 			target_node = nid;
 		}
 
 	/* do some balance if several nodes have the same hit record */
-	if (target_node <= last_khugepaged_target_node)
-		for (nid = last_khugepaged_target_node + 1; nid < MAX_NUMNODES;
-				nid++)
-			if (max_value == khugepaged_node_load[nid]) {
+	if (target_node <= cc->last_target_node)
+		for (nid = cc->last_target_node + 1; nid < MAX_NUMNODES;
+		     nid++)
+			if (max_value == cc->node_load[nid]) {
 				target_node = nid;
 				break;
 			}
 
-	last_khugepaged_target_node = target_node;
+	cc->last_target_node = target_node;
 	return target_node;
 }
 #else
-static int khugepaged_find_target_node(void)
+static int khugepaged_find_target_node(struct collapse_control *cc)
 {
 	return 0;
 }
@@ -1155,10 +1160,9 @@  static void collapse_huge_page(struct mm_struct *mm,
 	return;
 }
 
-static int khugepaged_scan_pmd(struct mm_struct *mm,
-			       struct vm_area_struct *vma,
-			       unsigned long address,
-			       struct page **hpage)
+static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
+			       unsigned long address, struct page **hpage,
+			       struct collapse_control *cc)
 {
 	pmd_t *pmd;
 	pte_t *pte, *_pte;
@@ -1176,7 +1180,7 @@  static int khugepaged_scan_pmd(struct mm_struct *mm,
 	if (result != SCAN_SUCCEED)
 		goto out;
 
-	memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
+	memset(cc->node_load, 0, sizeof(cc->node_load));
 	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
 	for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR;
 	     _pte++, _address += PAGE_SIZE) {
@@ -1242,16 +1246,16 @@  static int khugepaged_scan_pmd(struct mm_struct *mm,
 
 		/*
 		 * Record which node the original page is from and save this
-		 * information to khugepaged_node_load[].
+		 * information to cc->node_load[].
 		 * Khugepaged will allocate hugepage from the node has the max
 		 * hit record.
 		 */
 		node = page_to_nid(page);
-		if (khugepaged_scan_abort(node)) {
+		if (khugepaged_scan_abort(node, cc)) {
 			result = SCAN_SCAN_ABORT;
 			goto out_unmap;
 		}
-		khugepaged_node_load[node]++;
+		cc->node_load[node]++;
 		if (!PageLRU(page)) {
 			result = SCAN_PAGE_LRU;
 			goto out_unmap;
@@ -1302,7 +1306,7 @@  static int khugepaged_scan_pmd(struct mm_struct *mm,
 out_unmap:
 	pte_unmap_unlock(pte, ptl);
 	if (ret) {
-		node = khugepaged_find_target_node();
+		node = khugepaged_find_target_node(cc);
 		/* collapse_huge_page will return with the mmap_lock released */
 		collapse_huge_page(mm, address, hpage, node,
 				referenced, unmapped);
@@ -1958,8 +1962,9 @@  static void collapse_file(struct mm_struct *mm,
 	/* TODO: tracepoints */
 }
 
-static void khugepaged_scan_file(struct mm_struct *mm,
-		struct file *file, pgoff_t start, struct page **hpage)
+static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
+				 pgoff_t start, struct page **hpage,
+				 struct collapse_control *cc)
 {
 	struct page *page = NULL;
 	struct address_space *mapping = file->f_mapping;
@@ -1970,7 +1975,7 @@  static void khugepaged_scan_file(struct mm_struct *mm,
 
 	present = 0;
 	swap = 0;
-	memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
+	memset(cc->node_load, 0, sizeof(cc->node_load));
 	rcu_read_lock();
 	xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) {
 		if (xas_retry(&xas, page))
@@ -1995,11 +2000,11 @@  static void khugepaged_scan_file(struct mm_struct *mm,
 		}
 
 		node = page_to_nid(page);
-		if (khugepaged_scan_abort(node)) {
+		if (khugepaged_scan_abort(node, cc)) {
 			result = SCAN_SCAN_ABORT;
 			break;
 		}
-		khugepaged_node_load[node]++;
+		cc->node_load[node]++;
 
 		if (!PageLRU(page)) {
 			result = SCAN_PAGE_LRU;
@@ -2032,7 +2037,7 @@  static void khugepaged_scan_file(struct mm_struct *mm,
 			result = SCAN_EXCEED_NONE_PTE;
 			count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
 		} else {
-			node = khugepaged_find_target_node();
+			node = khugepaged_find_target_node(cc);
 			collapse_file(mm, file, start, hpage, node);
 		}
 	}
@@ -2040,8 +2045,9 @@  static void khugepaged_scan_file(struct mm_struct *mm,
 	/* TODO: tracepoints */
 }
 #else
-static void khugepaged_scan_file(struct mm_struct *mm,
-		struct file *file, pgoff_t start, struct page **hpage)
+static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
+				 pgoff_t start, struct page **hpage,
+				 struct collapse_control *cc)
 {
 	BUILD_BUG();
 }
@@ -2052,7 +2058,8 @@  static void khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
 #endif
 
 static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
-					    struct page **hpage)
+					    struct page **hpage,
+					    struct collapse_control *cc)
 	__releases(&khugepaged_mm_lock)
 	__acquires(&khugepaged_mm_lock)
 {
@@ -2133,12 +2140,13 @@  static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 
 				mmap_read_unlock(mm);
 				ret = 1;
-				khugepaged_scan_file(mm, file, pgoff, hpage);
+				khugepaged_scan_file(mm, file, pgoff, hpage,
+						     cc);
 				fput(file);
 			} else {
 				ret = khugepaged_scan_pmd(mm, vma,
 						khugepaged_scan.address,
-						hpage);
+						hpage, cc);
 			}
 			/* move to next address */
 			khugepaged_scan.address += HPAGE_PMD_SIZE;
@@ -2194,7 +2202,7 @@  static int khugepaged_wait_event(void)
 		kthread_should_stop();
 }
 
-static void khugepaged_do_scan(void)
+static void khugepaged_do_scan(struct collapse_control *cc)
 {
 	struct page *hpage = NULL;
 	unsigned int progress = 0, pass_through_head = 0;
@@ -2218,7 +2226,7 @@  static void khugepaged_do_scan(void)
 		if (khugepaged_has_work() &&
 		    pass_through_head < 2)
 			progress += khugepaged_scan_mm_slot(pages - progress,
-							    &hpage);
+							    &hpage, cc);
 		else
 			progress = pages;
 		spin_unlock(&khugepaged_mm_lock);
@@ -2254,12 +2262,15 @@  static void khugepaged_wait_work(void)
 static int khugepaged(void *none)
 {
 	struct mm_slot *mm_slot;
+	struct collapse_control cc = {
+		.last_target_node = NUMA_NO_NODE,
+	};
 
 	set_freezable();
 	set_user_nice(current, MAX_NICE);
 
 	while (!kthread_should_stop()) {
-		khugepaged_do_scan();
+		khugepaged_do_scan(&cc);
 		khugepaged_wait_work();
 	}