Message ID | 20220604004004.954674-4-zokeefe@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm: userspace hugepage collapse | expand |
Hi Zach,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on akpm-mm/mm-everything]
url: https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-1) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953
git checkout d87b6065d6050b89930cca0814921aca7c269286
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
mm/khugepaged.c: In function 'khugepaged':
>> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=]
2284 | }
| ^
vim +2284 mm/khugepaged.c
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2261
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2262 static int khugepaged(void *none)
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2263 {
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2264 struct mm_slot *mm_slot;
d87b6065d6050b Zach O'Keefe 2022-06-03 2265 struct collapse_control cc = {
d87b6065d6050b Zach O'Keefe 2022-06-03 2266 .last_target_node = NUMA_NO_NODE,
d87b6065d6050b Zach O'Keefe 2022-06-03 2267 };
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2268
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2269 set_freezable();
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2270 set_user_nice(current, MAX_NICE);
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2271
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2272 while (!kthread_should_stop()) {
d87b6065d6050b Zach O'Keefe 2022-06-03 2273 khugepaged_do_scan(&cc);
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2274 khugepaged_wait_work();
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2275 }
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2276
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2277 spin_lock(&khugepaged_mm_lock);
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2278 mm_slot = khugepaged_scan.mm_slot;
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2279 khugepaged_scan.mm_slot = NULL;
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2280 if (mm_slot)
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2281 collect_mm_slot(mm_slot);
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2282 spin_unlock(&khugepaged_mm_lock);
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2283 return 0;
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 @2284 }
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2285
On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote: > > Hi Zach, > > Thank you for the patch! Perhaps something to improve: > > [auto build test WARNING on akpm-mm/mm-everything] > > url: https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything > config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config) > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0 > reproduce (this is a W=1 build): > # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286 > git remote add linux-review https://github.com/intel-lab-lkp/linux > git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > git checkout d87b6065d6050b89930cca0814921aca7c269286 > # save the config file > mkdir build_dir && cp config build_dir/.config > make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash > > If you fix the issue, kindly add following tag where applicable > Reported-by: kernel test robot <lkp@intel.com> > > All warnings (new ones prefixed by >>): > > mm/khugepaged.c: In function 'khugepaged': > >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=] > 2284 | } > | ^ Thanks lkp@intel.com. This is due to config with: CONFIG_FRAME_WARN=2048 CONFIG_NODES_SHIFT=10 Where struct collapse_control has a member int node_load[MAX_NUMNODES], and we stack allocate one. Is this a configuration that needs to be supported? 1024 nodes seems like a lot and I'm not sure if these configs are randomly generated or are reminiscent of real systems. Thanks, Zach > > vim +2284 mm/khugepaged.c > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2261 > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2262 static int khugepaged(void *none) > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2263 { > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2264 struct mm_slot *mm_slot; > d87b6065d6050b Zach O'Keefe 2022-06-03 2265 struct collapse_control cc = { > d87b6065d6050b Zach O'Keefe 2022-06-03 2266 .last_target_node = NUMA_NO_NODE, > d87b6065d6050b Zach O'Keefe 2022-06-03 2267 }; > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2268 > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2269 set_freezable(); > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2270 set_user_nice(current, MAX_NICE); > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2271 > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2272 while (!kthread_should_stop()) { > d87b6065d6050b Zach O'Keefe 2022-06-03 2273 khugepaged_do_scan(&cc); > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2274 khugepaged_wait_work(); > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2275 } > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2276 > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2277 spin_lock(&khugepaged_mm_lock); > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2278 mm_slot = khugepaged_scan.mm_slot; > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2279 khugepaged_scan.mm_slot = NULL; > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2280 if (mm_slot) > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2281 collect_mm_slot(mm_slot); > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2282 spin_unlock(&khugepaged_mm_lock); > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2283 return 0; > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 @2284 } > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2285 > > -- > 0-DAY CI Kernel Test Service > https://01.org/lkp >
On Mon, Jun 6, 2022 at 9:40 AM Zach O'Keefe <zokeefe@google.com> wrote: > > On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote: > > > > Hi Zach, > > > > Thank you for the patch! Perhaps something to improve: > > > > [auto build test WARNING on akpm-mm/mm-everything] > > > > url: https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > > base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything > > config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config) > > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0 > > reproduce (this is a W=1 build): > > # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286 > > git remote add linux-review https://github.com/intel-lab-lkp/linux > > git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > > git checkout d87b6065d6050b89930cca0814921aca7c269286 > > # save the config file > > mkdir build_dir && cp config build_dir/.config > > make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash > > > > If you fix the issue, kindly add following tag where applicable > > Reported-by: kernel test robot <lkp@intel.com> > > > > All warnings (new ones prefixed by >>): > > > > mm/khugepaged.c: In function 'khugepaged': > > >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=] > > 2284 | } > > | ^ > > Thanks lkp@intel.com. > > This is due to config with: > > CONFIG_FRAME_WARN=2048 > CONFIG_NODES_SHIFT=10 > > Where struct collapse_control has a member int > node_load[MAX_NUMNODES], and we stack allocate one. > > Is this a configuration that needs to be supported? 1024 nodes seems > like a lot and I'm not sure if these configs are randomly generated or > are reminiscent of real systems. I don't have a better idea other than moving it out of the collapse_control struct. You may consider changing node_load to two dimensions, for example: node_load[2][MAX_NUMNODES], then define: enum { /* khugepaged */ COLLAPSE_ASYNC, /* MADV_COLLAPSE */ COLLAPSE_SYNC } Then khugepaged and MADV_COLLAPSE get their dedicated node_load respectively. The more aggressive approach may be just killing node_load, but I'm not sure what impact it may incur. > > Thanks, > Zach > > > > > vim +2284 mm/khugepaged.c > > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2261 > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2262 static int khugepaged(void *none) > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2263 { > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2264 struct mm_slot *mm_slot; > > d87b6065d6050b Zach O'Keefe 2022-06-03 2265 struct collapse_control cc = { > > d87b6065d6050b Zach O'Keefe 2022-06-03 2266 .last_target_node = NUMA_NO_NODE, > > d87b6065d6050b Zach O'Keefe 2022-06-03 2267 }; > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2268 > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2269 set_freezable(); > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2270 set_user_nice(current, MAX_NICE); > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2271 > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2272 while (!kthread_should_stop()) { > > d87b6065d6050b Zach O'Keefe 2022-06-03 2273 khugepaged_do_scan(&cc); > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2274 khugepaged_wait_work(); > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2275 } > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2276 > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2277 spin_lock(&khugepaged_mm_lock); > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2278 mm_slot = khugepaged_scan.mm_slot; > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2279 khugepaged_scan.mm_slot = NULL; > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2280 if (mm_slot) > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2281 collect_mm_slot(mm_slot); > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2282 spin_unlock(&khugepaged_mm_lock); > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2283 return 0; > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 @2284 } > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2285 > > > > -- > > 0-DAY CI Kernel Test Service > > https://01.org/lkp > >
On Mon, Jun 6, 2022 at 1:20 PM Yang Shi <shy828301@gmail.com> wrote: > > On Mon, Jun 6, 2022 at 9:40 AM Zach O'Keefe <zokeefe@google.com> wrote: > > > > On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote: > > > > > > Hi Zach, > > > > > > Thank you for the patch! Perhaps something to improve: > > > > > > [auto build test WARNING on akpm-mm/mm-everything] > > > > > > url: https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > > > base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything > > > config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config) > > > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0 > > > reproduce (this is a W=1 build): > > > # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286 > > > git remote add linux-review https://github.com/intel-lab-lkp/linux > > > git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > > > git checkout d87b6065d6050b89930cca0814921aca7c269286 > > > # save the config file > > > mkdir build_dir && cp config build_dir/.config > > > make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash > > > > > > If you fix the issue, kindly add following tag where applicable > > > Reported-by: kernel test robot <lkp@intel.com> > > > > > > All warnings (new ones prefixed by >>): > > > > > > mm/khugepaged.c: In function 'khugepaged': > > > >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=] > > > 2284 | } > > > | ^ > > > > Thanks lkp@intel.com. > > > > This is due to config with: > > > > CONFIG_FRAME_WARN=2048 > > CONFIG_NODES_SHIFT=10 > > > > Where struct collapse_control has a member int > > node_load[MAX_NUMNODES], and we stack allocate one. > > > > Is this a configuration that needs to be supported? 1024 nodes seems > > like a lot and I'm not sure if these configs are randomly generated or > > are reminiscent of real systems. > > I don't have a better idea other than moving it out of the > collapse_control struct. You may consider changing node_load to two > dimensions, for example: > > node_load[2][MAX_NUMNODES], then define: > enum { > /* khugepaged */ > COLLAPSE_ASYNC, > /* MADV_COLLAPSE */ > COLLAPSE_SYNC > } > > Then khugepaged and MADV_COLLAPSE get their dedicated node_load respectively. Sorry, I just realized this won't work for MADV_COLLAPSE since multiple processes may call it at the same time. We may consider allocating it dynamically. Have node_load the last element of collapse_control struct, then do: for_each_node(node) kmalloc(sizeof(int), GFP_KERNEL); MADV_COLLAPSE or khugepaged could just fail if it fails since THP allocation is unlikely to succeed in this case. But not sure if it is worth the complexity rather than just killing it. > > The more aggressive approach may be just killing node_load, but I'm > not sure what impact it may incur. > > > > > Thanks, > > Zach > > > > > > > > vim +2284 mm/khugepaged.c > > > > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2261 > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2262 static int khugepaged(void *none) > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2263 { > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2264 struct mm_slot *mm_slot; > > > d87b6065d6050b Zach O'Keefe 2022-06-03 2265 struct collapse_control cc = { > > > d87b6065d6050b Zach O'Keefe 2022-06-03 2266 .last_target_node = NUMA_NO_NODE, > > > d87b6065d6050b Zach O'Keefe 2022-06-03 2267 }; > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2268 > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2269 set_freezable(); > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2270 set_user_nice(current, MAX_NICE); > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2271 > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2272 while (!kthread_should_stop()) { > > > d87b6065d6050b Zach O'Keefe 2022-06-03 2273 khugepaged_do_scan(&cc); > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2274 khugepaged_wait_work(); > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2275 } > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2276 > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2277 spin_lock(&khugepaged_mm_lock); > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2278 mm_slot = khugepaged_scan.mm_slot; > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2279 khugepaged_scan.mm_slot = NULL; > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2280 if (mm_slot) > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2281 collect_mm_slot(mm_slot); > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2282 spin_unlock(&khugepaged_mm_lock); > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2283 return 0; > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 @2284 } > > > b46e756f5e4703 Kirill A. Shutemov 2016-07-26 2285 > > > > > > -- > > > 0-DAY CI Kernel Test Service > > > https://01.org/lkp > > >
On Mon, 6 Jun 2022 09:40:20 -0700 "Zach O'Keefe" <zokeefe@google.com> wrote: > On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote: > > > > Hi Zach, > > > > Thank you for the patch! Perhaps something to improve: > > > > [auto build test WARNING on akpm-mm/mm-everything] > > > > url: https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > > base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything > > config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config) > > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0 > > reproduce (this is a W=1 build): > > # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286 > > git remote add linux-review https://github.com/intel-lab-lkp/linux > > git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > > git checkout d87b6065d6050b89930cca0814921aca7c269286 > > # save the config file > > mkdir build_dir && cp config build_dir/.config > > make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash > > > > If you fix the issue, kindly add following tag where applicable > > Reported-by: kernel test robot <lkp@intel.com> > > > > All warnings (new ones prefixed by >>): > > > > mm/khugepaged.c: In function 'khugepaged': > > >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=] > > 2284 | } > > | ^ > > Thanks lkp@intel.com. > > This is due to config with: > > CONFIG_FRAME_WARN=2048 > CONFIG_NODES_SHIFT=10 > > Where struct collapse_control has a member int > node_load[MAX_NUMNODES], and we stack allocate one. > > Is this a configuration that needs to be supported? 1024 nodes seems > like a lot and I'm not sure if these configs are randomly generated or > are reminiscent of real systems. Adding 4k to the stack isn't a good thing to do. It's trivial to kmalloc the thing, so why not do that? I'll await some reviewer input (hopefully positive ;)) before merging this series.
On Mon, Jun 6, 2022 at 3:23 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > On Mon, 6 Jun 2022 09:40:20 -0700 "Zach O'Keefe" <zokeefe@google.com> wrote: > > > On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote: > > > > > > Hi Zach, > > > > > > Thank you for the patch! Perhaps something to improve: > > > > > > [auto build test WARNING on akpm-mm/mm-everything] > > > > > > url: https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > > > base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything > > > config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config) > > > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0 > > > reproduce (this is a W=1 build): > > > # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286 > > > git remote add linux-review https://github.com/intel-lab-lkp/linux > > > git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > > > git checkout d87b6065d6050b89930cca0814921aca7c269286 > > > # save the config file > > > mkdir build_dir && cp config build_dir/.config > > > make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash > > > > > > If you fix the issue, kindly add following tag where applicable > > > Reported-by: kernel test robot <lkp@intel.com> > > > > > > All warnings (new ones prefixed by >>): > > > > > > mm/khugepaged.c: In function 'khugepaged': > > > >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=] > > > 2284 | } > > > | ^ > > > > Thanks lkp@intel.com. > > > > This is due to config with: > > > > CONFIG_FRAME_WARN=2048 > > CONFIG_NODES_SHIFT=10 > > > > Where struct collapse_control has a member int > > node_load[MAX_NUMNODES], and we stack allocate one. > > > > Is this a configuration that needs to be supported? 1024 nodes seems > > like a lot and I'm not sure if these configs are randomly generated or > > are reminiscent of real systems. > > Adding 4k to the stack isn't a good thing to do. It's trivial to > kmalloc the thing, so why not do that? Thanks, Andrew. Yeah, I just suggested that too. > > I'll await some reviewer input (hopefully positive ;)) before merging > this series.
On Mon, Jun 6, 2022 at 4:54 PM Yang Shi <shy828301@gmail.com> wrote: > > On Mon, Jun 6, 2022 at 3:23 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > > > On Mon, 6 Jun 2022 09:40:20 -0700 "Zach O'Keefe" <zokeefe@google.com> wrote: > > > > > On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote: > > > > > > > > Hi Zach, > > > > > > > > Thank you for the patch! Perhaps something to improve: > > > > > > > > [auto build test WARNING on akpm-mm/mm-everything] > > > > > > > > url: https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > > > > base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything > > > > config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config) > > > > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0 > > > > reproduce (this is a W=1 build): > > > > # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286 > > > > git remote add linux-review https://github.com/intel-lab-lkp/linux > > > > git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > > > > git checkout d87b6065d6050b89930cca0814921aca7c269286 > > > > # save the config file > > > > mkdir build_dir && cp config build_dir/.config > > > > make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash > > > > > > > > If you fix the issue, kindly add following tag where applicable > > > > Reported-by: kernel test robot <lkp@intel.com> > > > > > > > > All warnings (new ones prefixed by >>): > > > > > > > > mm/khugepaged.c: In function 'khugepaged': > > > > >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=] > > > > 2284 | } > > > > | ^ > > > > > > Thanks lkp@intel.com. > > > > > > This is due to config with: > > > > > > CONFIG_FRAME_WARN=2048 > > > CONFIG_NODES_SHIFT=10 > > > > > > Where struct collapse_control has a member int > > > node_load[MAX_NUMNODES], and we stack allocate one. > > > > > > Is this a configuration that needs to be supported? 1024 nodes seems > > > like a lot and I'm not sure if these configs are randomly generated or > > > are reminiscent of real systems. > > > > Adding 4k to the stack isn't a good thing to do. It's trivial to > > kmalloc the thing, so why not do that? > > Thanks, Andrew. Yeah, I just suggested that too. Thanks Yang / Andrew for taking the time to voice your suggestions. I'll go ahead and just kmalloc() the thing and fail if we can't. Yang, is there a reason to kmalloc() the entire struct collapse_control with trailing flex array vs stack allocating the struct collapse_control + kmalloc()'ing the node_load array? > > > > I'll await some reviewer input (hopefully positive ;)) before merging > > this series.
On Tue, Jun 7, 2022 at 5:43 PM Zach O'Keefe <zokeefe@google.com> wrote: > > On Mon, Jun 6, 2022 at 4:54 PM Yang Shi <shy828301@gmail.com> wrote: > > > > On Mon, Jun 6, 2022 at 3:23 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > > > > > On Mon, 6 Jun 2022 09:40:20 -0700 "Zach O'Keefe" <zokeefe@google.com> wrote: > > > > > > > On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote: > > > > > > > > > > Hi Zach, > > > > > > > > > > Thank you for the patch! Perhaps something to improve: > > > > > > > > > > [auto build test WARNING on akpm-mm/mm-everything] > > > > > > > > > > url: https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > > > > > base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything > > > > > config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config) > > > > > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0 > > > > > reproduce (this is a W=1 build): > > > > > # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286 > > > > > git remote add linux-review https://github.com/intel-lab-lkp/linux > > > > > git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > > > > > git checkout d87b6065d6050b89930cca0814921aca7c269286 > > > > > # save the config file > > > > > mkdir build_dir && cp config build_dir/.config > > > > > make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash > > > > > > > > > > If you fix the issue, kindly add following tag where applicable > > > > > Reported-by: kernel test robot <lkp@intel.com> > > > > > > > > > > All warnings (new ones prefixed by >>): > > > > > > > > > > mm/khugepaged.c: In function 'khugepaged': > > > > > >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=] > > > > > 2284 | } > > > > > | ^ > > > > > > > > Thanks lkp@intel.com. > > > > > > > > This is due to config with: > > > > > > > > CONFIG_FRAME_WARN=2048 > > > > CONFIG_NODES_SHIFT=10 > > > > > > > > Where struct collapse_control has a member int > > > > node_load[MAX_NUMNODES], and we stack allocate one. > > > > > > > > Is this a configuration that needs to be supported? 1024 nodes seems > > > > like a lot and I'm not sure if these configs are randomly generated or > > > > are reminiscent of real systems. > > > > > > Adding 4k to the stack isn't a good thing to do. It's trivial to > > > kmalloc the thing, so why not do that? > > > > Thanks, Andrew. Yeah, I just suggested that too. > > Thanks Yang / Andrew for taking the time to voice your suggestions. > > I'll go ahead and just kmalloc() the thing and fail if we can't. > > Yang, is there a reason to kmalloc() the entire struct > collapse_control with trailing flex array vs stack allocating the > struct collapse_control + kmalloc()'ing the node_load array? I don't think those two have too much difference. I don't have a strong preference personally. However you could choose: Define collapse_control as: struct collapse_control { xxx; ... int node_load[MAX_NUMANODES]; } Then you could kmalloc the whole struct. Or it could be defined as: struct collapse_control { xxx; ... int *node_load[]; } In this way you could allocate collapse_control on stack or by kmalloc, then kmalloc node_load for all possible nodes instead of MAX_NUMANODES. This may have a better success rate since you do kmalloc much less memory (typically the number of possible nodes is much less than MAX_NUMANODES), but it may be not worth it since the error handling path is more complicated and it may not make too much difference. The first choice is definitely much simpler, you may want to try that first. > > > > > > > > I'll await some reviewer input (hopefully positive ;)) before merging > > > this series.
On Tue, Jun 7, 2022 at 6:00 PM Yang Shi <shy828301@gmail.com> wrote: > > On Tue, Jun 7, 2022 at 5:43 PM Zach O'Keefe <zokeefe@google.com> wrote: > > > > On Mon, Jun 6, 2022 at 4:54 PM Yang Shi <shy828301@gmail.com> wrote: > > > > > > On Mon, Jun 6, 2022 at 3:23 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > > > > > > > On Mon, 6 Jun 2022 09:40:20 -0700 "Zach O'Keefe" <zokeefe@google.com> wrote: > > > > > > > > > On Sun, Jun 5, 2022 at 7:42 PM kernel test robot <lkp@intel.com> wrote: > > > > > > > > > > > > Hi Zach, > > > > > > > > > > > > Thank you for the patch! Perhaps something to improve: > > > > > > > > > > > > [auto build test WARNING on akpm-mm/mm-everything] > > > > > > > > > > > > url: https://github.com/intel-lab-lkp/linux/commits/Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > > > > > > base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything > > > > > > config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220606/202206060911.I8rRqGwC-lkp@intel.com/config) > > > > > > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0 > > > > > > reproduce (this is a W=1 build): > > > > > > # https://github.com/intel-lab-lkp/linux/commit/d87b6065d6050b89930cca0814921aca7c269286 > > > > > > git remote add linux-review https://github.com/intel-lab-lkp/linux > > > > > > git fetch --no-tags linux-review Zach-O-Keefe/mm-userspace-hugepage-collapse/20220606-012953 > > > > > > git checkout d87b6065d6050b89930cca0814921aca7c269286 > > > > > > # save the config file > > > > > > mkdir build_dir && cp config build_dir/.config > > > > > > make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash > > > > > > > > > > > > If you fix the issue, kindly add following tag where applicable > > > > > > Reported-by: kernel test robot <lkp@intel.com> > > > > > > > > > > > > All warnings (new ones prefixed by >>): > > > > > > > > > > > > mm/khugepaged.c: In function 'khugepaged': > > > > > > >> mm/khugepaged.c:2284:1: warning: the frame size of 4160 bytes is larger than 2048 bytes [-Wframe-larger-than=] > > > > > > 2284 | } > > > > > > | ^ > > > > > > > > > > Thanks lkp@intel.com. > > > > > > > > > > This is due to config with: > > > > > > > > > > CONFIG_FRAME_WARN=2048 > > > > > CONFIG_NODES_SHIFT=10 > > > > > > > > > > Where struct collapse_control has a member int > > > > > node_load[MAX_NUMNODES], and we stack allocate one. > > > > > > > > > > Is this a configuration that needs to be supported? 1024 nodes seems > > > > > like a lot and I'm not sure if these configs are randomly generated or > > > > > are reminiscent of real systems. > > > > > > > > Adding 4k to the stack isn't a good thing to do. It's trivial to > > > > kmalloc the thing, so why not do that? > > > > > > Thanks, Andrew. Yeah, I just suggested that too. > > > > Thanks Yang / Andrew for taking the time to voice your suggestions. > > > > I'll go ahead and just kmalloc() the thing and fail if we can't. > > > > Yang, is there a reason to kmalloc() the entire struct > > collapse_control with trailing flex array vs stack allocating the > > struct collapse_control + kmalloc()'ing the node_load array? > > I don't think those two have too much difference. I don't have a > strong preference personally. However you could choose: > > Define collapse_control as: > struct collapse_control { > xxx; > ... > int node_load[MAX_NUMANODES]; > } > Then you could kmalloc the whole struct. > > Or it could be defined as: > struct collapse_control { > xxx; > ... > int *node_load[]; > } > In this way you could allocate collapse_control on stack or by > kmalloc, then kmalloc node_load for all possible nodes instead of > MAX_NUMANODES. This may have a better success rate since you do > kmalloc much less memory (typically the number of possible nodes is > much less than MAX_NUMANODES), but it may be not worth it since the > error handling path is more complicated and it may not make too much > difference. > > The first choice is definitely much simpler, you may want to try that first. Thanks for the suggestion. First approach also has the benefit of being able to statically allocate one for khugepaged and simplifies error paths there. I'll try that. Again, thanks for taking the time to review and help out / suggest improvements :) Best, Zach > > > > > > > > > > > > I'll await some reviewer input (hopefully positive ;)) before merging > > > > this series.
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 7a914ca19e96..907d0b2bd4bd 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -86,6 +86,14 @@ static struct kmem_cache *mm_slot_cache __read_mostly; #define MAX_PTE_MAPPED_THP 8 +struct collapse_control { + /* Num pages scanned per node */ + int node_load[MAX_NUMNODES]; + + /* Last target selected in khugepaged_find_target_node() */ + int last_target_node; +}; + /** * struct mm_slot - hash lookup from mm to mm_slot * @hash: hash collision list @@ -777,9 +785,7 @@ static void khugepaged_alloc_sleep(void) remove_wait_queue(&khugepaged_wait, &wait); } -static int khugepaged_node_load[MAX_NUMNODES]; - -static bool khugepaged_scan_abort(int nid) +static bool khugepaged_scan_abort(int nid, struct collapse_control *cc) { int i; @@ -791,11 +797,11 @@ static bool khugepaged_scan_abort(int nid) return false; /* If there is a count for this node already, it must be acceptable */ - if (khugepaged_node_load[nid]) + if (cc->node_load[nid]) return false; for (i = 0; i < MAX_NUMNODES; i++) { - if (!khugepaged_node_load[i]) + if (!cc->node_load[i]) continue; if (node_distance(nid, i) > node_reclaim_distance) return true; @@ -810,32 +816,31 @@ static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void) } #ifdef CONFIG_NUMA -static int khugepaged_find_target_node(void) +static int khugepaged_find_target_node(struct collapse_control *cc) { - static int last_khugepaged_target_node = NUMA_NO_NODE; int nid, target_node = 0, max_value = 0; /* find first node with max normal pages hit */ for (nid = 0; nid < MAX_NUMNODES; nid++) - if (khugepaged_node_load[nid] > max_value) { - max_value = khugepaged_node_load[nid]; + if (cc->node_load[nid] > max_value) { + max_value = cc->node_load[nid]; target_node = nid; } /* do some balance if several nodes have the same hit record */ - if (target_node <= last_khugepaged_target_node) - for (nid = last_khugepaged_target_node + 1; nid < MAX_NUMNODES; - nid++) - if (max_value == khugepaged_node_load[nid]) { + if (target_node <= cc->last_target_node) + for (nid = cc->last_target_node + 1; nid < MAX_NUMNODES; + nid++) + if (max_value == cc->node_load[nid]) { target_node = nid; break; } - last_khugepaged_target_node = target_node; + cc->last_target_node = target_node; return target_node; } #else -static int khugepaged_find_target_node(void) +static int khugepaged_find_target_node(struct collapse_control *cc) { return 0; } @@ -1155,10 +1160,9 @@ static void collapse_huge_page(struct mm_struct *mm, return; } -static int khugepaged_scan_pmd(struct mm_struct *mm, - struct vm_area_struct *vma, - unsigned long address, - struct page **hpage) +static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, struct page **hpage, + struct collapse_control *cc) { pmd_t *pmd; pte_t *pte, *_pte; @@ -1176,7 +1180,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, if (result != SCAN_SUCCEED) goto out; - memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load)); + memset(cc->node_load, 0, sizeof(cc->node_load)); pte = pte_offset_map_lock(mm, pmd, address, &ptl); for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++, _address += PAGE_SIZE) { @@ -1242,16 +1246,16 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, /* * Record which node the original page is from and save this - * information to khugepaged_node_load[]. + * information to cc->node_load[]. * Khugepaged will allocate hugepage from the node has the max * hit record. */ node = page_to_nid(page); - if (khugepaged_scan_abort(node)) { + if (khugepaged_scan_abort(node, cc)) { result = SCAN_SCAN_ABORT; goto out_unmap; } - khugepaged_node_load[node]++; + cc->node_load[node]++; if (!PageLRU(page)) { result = SCAN_PAGE_LRU; goto out_unmap; @@ -1302,7 +1306,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, out_unmap: pte_unmap_unlock(pte, ptl); if (ret) { - node = khugepaged_find_target_node(); + node = khugepaged_find_target_node(cc); /* collapse_huge_page will return with the mmap_lock released */ collapse_huge_page(mm, address, hpage, node, referenced, unmapped); @@ -1958,8 +1962,9 @@ static void collapse_file(struct mm_struct *mm, /* TODO: tracepoints */ } -static void khugepaged_scan_file(struct mm_struct *mm, - struct file *file, pgoff_t start, struct page **hpage) +static void khugepaged_scan_file(struct mm_struct *mm, struct file *file, + pgoff_t start, struct page **hpage, + struct collapse_control *cc) { struct page *page = NULL; struct address_space *mapping = file->f_mapping; @@ -1970,7 +1975,7 @@ static void khugepaged_scan_file(struct mm_struct *mm, present = 0; swap = 0; - memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load)); + memset(cc->node_load, 0, sizeof(cc->node_load)); rcu_read_lock(); xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) { if (xas_retry(&xas, page)) @@ -1995,11 +2000,11 @@ static void khugepaged_scan_file(struct mm_struct *mm, } node = page_to_nid(page); - if (khugepaged_scan_abort(node)) { + if (khugepaged_scan_abort(node, cc)) { result = SCAN_SCAN_ABORT; break; } - khugepaged_node_load[node]++; + cc->node_load[node]++; if (!PageLRU(page)) { result = SCAN_PAGE_LRU; @@ -2032,7 +2037,7 @@ static void khugepaged_scan_file(struct mm_struct *mm, result = SCAN_EXCEED_NONE_PTE; count_vm_event(THP_SCAN_EXCEED_NONE_PTE); } else { - node = khugepaged_find_target_node(); + node = khugepaged_find_target_node(cc); collapse_file(mm, file, start, hpage, node); } } @@ -2040,8 +2045,9 @@ static void khugepaged_scan_file(struct mm_struct *mm, /* TODO: tracepoints */ } #else -static void khugepaged_scan_file(struct mm_struct *mm, - struct file *file, pgoff_t start, struct page **hpage) +static void khugepaged_scan_file(struct mm_struct *mm, struct file *file, + pgoff_t start, struct page **hpage, + struct collapse_control *cc) { BUILD_BUG(); } @@ -2052,7 +2058,8 @@ static void khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot) #endif static unsigned int khugepaged_scan_mm_slot(unsigned int pages, - struct page **hpage) + struct page **hpage, + struct collapse_control *cc) __releases(&khugepaged_mm_lock) __acquires(&khugepaged_mm_lock) { @@ -2133,12 +2140,13 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, mmap_read_unlock(mm); ret = 1; - khugepaged_scan_file(mm, file, pgoff, hpage); + khugepaged_scan_file(mm, file, pgoff, hpage, + cc); fput(file); } else { ret = khugepaged_scan_pmd(mm, vma, khugepaged_scan.address, - hpage); + hpage, cc); } /* move to next address */ khugepaged_scan.address += HPAGE_PMD_SIZE; @@ -2194,7 +2202,7 @@ static int khugepaged_wait_event(void) kthread_should_stop(); } -static void khugepaged_do_scan(void) +static void khugepaged_do_scan(struct collapse_control *cc) { struct page *hpage = NULL; unsigned int progress = 0, pass_through_head = 0; @@ -2218,7 +2226,7 @@ static void khugepaged_do_scan(void) if (khugepaged_has_work() && pass_through_head < 2) progress += khugepaged_scan_mm_slot(pages - progress, - &hpage); + &hpage, cc); else progress = pages; spin_unlock(&khugepaged_mm_lock); @@ -2254,12 +2262,15 @@ static void khugepaged_wait_work(void) static int khugepaged(void *none) { struct mm_slot *mm_slot; + struct collapse_control cc = { + .last_target_node = NUMA_NO_NODE, + }; set_freezable(); set_user_nice(current, MAX_NICE); while (!kthread_should_stop()) { - khugepaged_do_scan(); + khugepaged_do_scan(&cc); khugepaged_wait_work(); }
Modularize hugepage collapse by introducing struct collapse_control. This structure serves to describe the properties of the requested collapse, as well as serve as a local scratch pad to use during the collapse itself. Start by moving global per-node khugepaged statistics into this new structure, and stack allocate one for khugepaged collapse context. Signed-off-by: Zach O'Keefe <zokeefe@google.com> --- mm/khugepaged.c | 87 ++++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 38 deletions(-)