Message ID | 20210308202047.1903802-1-minchan@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] mm: page_alloc: dump migrate-failed pages | expand |
Hi Minchan, I love your patch! Perhaps something to improve: [auto build test WARNING on hnaz-linux-mm/master] url: https://github.com/0day-ci/linux/commits/Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205 base: https://github.com/hnaz/linux-mm master config: m68k-allmodconfig (attached as .config) compiler: m68k-linux-gcc (GCC) 9.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/3c635af37b862e9c601ee8d5818f7da9cd3e2e57 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205 git checkout 3c635af37b862e9c601ee8d5818f7da9cd3e2e57 # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=m68k If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra] 169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory) | ^~ include/linux/compiler.h:78:42: note: in definition of macro 'unlikely' 78 | # define unlikely(x) __builtin_expect(!!(x), 0) | ^ include/linux/scatterlist.h:143:2: note: in expansion of macro 'BUG_ON' 143 | BUG_ON(!virt_addr_valid(buf)); | ^~~~~~ include/linux/scatterlist.h:143:10: note: in expansion of macro 'virt_addr_valid' 143 | BUG_ON(!virt_addr_valid(buf)); | ^~~~~~~~~~~~~~~ In file included from arch/m68k/include/asm/page.h:60, from arch/m68k/include/asm/thread_info.h:6, from include/linux/thread_info.h:38, from include/asm-generic/preempt.h:5, from ./arch/m68k/include/generated/asm/preempt.h:1, from include/linux/preempt.h:78, from include/linux/spinlock.h:51, from include/linux/mmzone.h:8, from include/linux/gfp.h:6, from include/linux/mm.h:10, from mm/page_alloc.c:19: mm/internal.h: In function 'mem_map_next': arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra] 169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory) | ^~ arch/m68k/include/asm/page_mm.h:170:25: note: in expansion of macro 'virt_addr_valid' 170 | #define pfn_valid(pfn) virt_addr_valid(pfn_to_virt(pfn)) | ^~~~~~~~~~~~~~~ mm/internal.h:456:8: note: in expansion of macro 'pfn_valid' 456 | if (!pfn_valid(pfn)) | ^~~~~~~~~ mm/page_alloc.c: In function 'reserve_bootmem_region': arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra] 169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory) | ^~ arch/m68k/include/asm/page_mm.h:170:25: note: in expansion of macro 'virt_addr_valid' 170 | #define pfn_valid(pfn) virt_addr_valid(pfn_to_virt(pfn)) | ^~~~~~~~~~~~~~~ mm/page_alloc.c:1444:7: note: in expansion of macro 'pfn_valid' 1444 | if (pfn_valid(start_pfn)) { | ^~~~~~~~~ mm/page_alloc.c: In function '__pageblock_pfn_to_page': arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra] 169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory) | ^~ arch/m68k/include/asm/page_mm.h:170:25: note: in expansion of macro 'virt_addr_valid' 170 | #define pfn_valid(pfn) virt_addr_valid(pfn_to_virt(pfn)) | ^~~~~~~~~~~~~~~ mm/page_alloc.c:1576:7: note: in expansion of macro 'pfn_valid' 1576 | if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn)) | ^~~~~~~~~ arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra] 169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory) | ^~ arch/m68k/include/asm/page_mm.h:170:25: note: in expansion of macro 'virt_addr_valid' 170 | #define pfn_valid(pfn) virt_addr_valid(pfn_to_virt(pfn)) | ^~~~~~~~~~~~~~~ mm/page_alloc.c:1576:32: note: in expansion of macro 'pfn_valid' 1576 | if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn)) | ^~~~~~~~~ arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra] 169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory) | ^~ arch/m68k/include/asm/page_mm.h:170:25: note: in expansion of macro 'virt_addr_valid' 170 | #define pfn_valid(pfn) virt_addr_valid(pfn_to_virt(pfn)) | ^~~~~~~~~~~~~~~ include/linux/memory_hotplug.h:242:6: note: in expansion of macro 'pfn_valid' 242 | if (pfn_valid(pfn)) \ | ^~~~~~~~~ mm/page_alloc.c:1579:15: note: in expansion of macro 'pfn_to_online_page' 1579 | start_page = pfn_to_online_page(start_pfn); | ^~~~~~~~~~~~~~~~~~ In file included from include/asm-generic/bug.h:5, from arch/m68k/include/asm/bug.h:32, from include/linux/bug.h:5, from include/linux/mmdebug.h:5, from include/linux/mm.h:9, from mm/page_alloc.c:19: mm/page_alloc.c: In function 'free_pages': arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra] 169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory) | ^~ include/linux/compiler.h:78:42: note: in definition of macro 'unlikely' 78 | # define unlikely(x) __builtin_expect(!!(x), 0) | ^ include/linux/mmdebug.h:18:25: note: in expansion of macro 'BUG_ON' 18 | #define VM_BUG_ON(cond) BUG_ON(cond) | ^~~~~~ mm/page_alloc.c:4953:3: note: in expansion of macro 'VM_BUG_ON' 4953 | VM_BUG_ON(!virt_addr_valid((void *)addr)); | ^~~~~~~~~ mm/page_alloc.c:4953:14: note: in expansion of macro 'virt_addr_valid' 4953 | VM_BUG_ON(!virt_addr_valid((void *)addr)); | ^~~~~~~~~~~~~~~ mm/page_alloc.c: At top level: mm/page_alloc.c:6122:23: warning: no previous prototype for 'memmap_init' [-Wmissing-prototypes] 6122 | void __meminit __weak memmap_init(unsigned long size, int nid, | ^~~~~~~~~~~ >> mm/page_alloc.c:8348:5: warning: no previous prototype for 'alloc_contig_ratelimit' [-Wmissing-prototypes] 8348 | int alloc_contig_ratelimit(void) | ^~~~~~~~~~~~~~~~~~~~~~ >> mm/page_alloc.c:8353:6: warning: no previous prototype for 'dump_migrate_failure_pages' [-Wmissing-prototypes] 8353 | void dump_migrate_failure_pages(struct list_head *page_list) | ^~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from arch/m68k/include/asm/page.h:60, from arch/m68k/include/asm/thread_info.h:6, from include/linux/thread_info.h:38, from include/asm-generic/preempt.h:5, from ./arch/m68k/include/generated/asm/preempt.h:1, from include/linux/preempt.h:78, from include/linux/spinlock.h:51, from include/linux/mmzone.h:8, from include/linux/gfp.h:6, from include/linux/mm.h:10, from mm/page_alloc.c:19: mm/page_alloc.c: In function 'pfn_range_valid_contig': arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra] 169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory) | ^~ arch/m68k/include/asm/page_mm.h:170:25: note: in expansion of macro 'virt_addr_valid' 170 | #define pfn_valid(pfn) virt_addr_valid(pfn_to_virt(pfn)) | ^~~~~~~~~~~~~~~ include/linux/memory_hotplug.h:242:6: note: in expansion of macro 'pfn_valid' 242 | if (pfn_valid(pfn)) \ | ^~~~~~~~~ mm/page_alloc.c:8592:10: note: in expansion of macro 'pfn_to_online_page' 8592 | page = pfn_to_online_page(i); | ^~~~~~~~~~~~~~~~~~ Kconfig warnings: (for reference only) WARNING: unmet direct dependencies detected for NEED_MULTIPLE_NODES Depends on DISCONTIGMEM || NUMA Selected by - SINGLE_MEMORY_CHUNK && MMU vim +/alloc_contig_ratelimit +8348 mm/page_alloc.c 8343 8344 #if defined(CONFIG_DYNAMIC_DEBUG) || \ 8345 (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) 8346 static DEFINE_RATELIMIT_STATE(alloc_contig_ratelimit_state, 8347 DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); > 8348 int alloc_contig_ratelimit(void) 8349 { 8350 return __ratelimit(&alloc_contig_ratelimit_state); 8351 } 8352 > 8353 void dump_migrate_failure_pages(struct list_head *page_list) 8354 { 8355 DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, 8356 "migrate failure"); 8357 if (DYNAMIC_DEBUG_BRANCH(descriptor) && 8358 alloc_contig_ratelimit()) { 8359 struct page *page; 8360 8361 WARN(1, "failed callstack"); 8362 list_for_each_entry(page, page_list, lru) 8363 dump_page(page, "migration failure"); 8364 } 8365 } 8366 #else 8367 static inline void dump_migrate_failure_pages(struct list_head *page_list) 8368 { 8369 } 8370 #endif 8371 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On Tue, Mar 09, 2021 at 05:29:30AM +0800, kernel test robot wrote: > Hi Minchan, > > I love your patch! Perhaps something to improve: > > [auto build test WARNING on hnaz-linux-mm/master] > > url: https://github.com/0day-ci/linux/commits/Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205 > base: https://github.com/hnaz/linux-mm master > config: m68k-allmodconfig (attached as .config) > compiler: m68k-linux-gcc (GCC) 9.3.0 > reproduce (this is a W=1 build): > wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross > chmod +x ~/bin/make.cross > # https://github.com/0day-ci/linux/commit/3c635af37b862e9c601ee8d5818f7da9cd3e2e57 > git remote add linux-review https://github.com/0day-ci/linux > git fetch --no-tags linux-review Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205 > git checkout 3c635af37b862e9c601ee8d5818f7da9cd3e2e57 > # save the attached .config to linux build tree > COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=m68k > > If you fix the issue, kindly add following tag as appropriate > Reported-by: kernel test robot <lkp@intel.com> > > All warnings (new ones prefixed by >>): > > arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra] > 169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory) > | ^~ > include/linux/compiler.h:78:42: note: in definition of macro 'unlikely' > 78 | # define unlikely(x) __builtin_expect(!!(x), 0) > | ^ > include/linux/scatterlist.h:143:2: note: in expansion of macro 'BUG_ON' > 143 | BUG_ON(!virt_addr_valid(buf)); > | ^~~~~~ > include/linux/scatterlist.h:143:10: note: in expansion of macro 'virt_addr_valid' > 143 | BUG_ON(!virt_addr_valid(buf)); > | ^~~~~~~~~~~~~~~ > In file included from arch/m68k/include/asm/page.h:60, > from arch/m68k/include/asm/thread_info.h:6, > from include/linux/thread_info.h:38, > from include/asm-generic/preempt.h:5, > from ./arch/m68k/include/generated/asm/preempt.h:1, > from include/linux/preempt.h:78, > from include/linux/spinlock.h:51, > from include/linux/mmzone.h:8, > from include/linux/gfp.h:6, > from include/linux/mm.h:10, > from mm/page_alloc.c:19: I am not sure this is triggered by the patch since I could see the warn with reverting the patch.
On Mon, 8 Mar 2021 12:20:47 -0800 Minchan Kim <minchan@kernel.org> wrote: > alloc_contig_range is usually used on cma area or movable zone. > It's critical if the page migration fails on those areas so > dump more debugging message. > > page refcount, mapcount with page flags on dump_page are > helpful information to deduce the culprit. Furthermore, > dump_page_owner was super helpful to find long term pinner > who initiated the page allocation. > > Admin could enable the dump like this(by default, disabled) > > echo "func dump_migrate_failure_pages +p" > control > > Admin could disable it. > > echo "func dump_migrate_failure_pages =_" > control > > ... > > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -8453,6 +8453,34 @@ static unsigned long pfn_max_align_up(unsigned long pfn) > pageblock_nr_pages)); > } > > +#if defined(CONFIG_DYNAMIC_DEBUG) || \ > + (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) > +static DEFINE_RATELIMIT_STATE(alloc_contig_ratelimit_state, > + DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); > +int alloc_contig_ratelimit(void) > +{ > + return __ratelimit(&alloc_contig_ratelimit_state); > +} Wow, that's an eyesore. We're missing helpers in the ratelimit code. Can we do something like /* description goes here */ #define RATELIMIT2(interval, burst) ({ static DEFINE_RATELIMIT_STATE(_rs, interval, burst); __ratelimit(_rs); }) #define RATELIMIT() RATELIMIT2(DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST) > +void dump_migrate_failure_pages(struct list_head *page_list) > +{ > + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, > + "migrate failure"); > + if (DYNAMIC_DEBUG_BRANCH(descriptor) && > + alloc_contig_ratelimit()) { > + struct page *page; > + > + WARN(1, "failed callstack"); > + list_for_each_entry(page, page_list, lru) > + dump_page(page, "migration failure"); > + } > +} Then we can simply do if (DYNAMIC_DEBUG_BRANCH(descriptor) && RATELIMIT())
Hi Minchan, I love your patch! Perhaps something to improve: [auto build test WARNING on hnaz-linux-mm/master] url: https://github.com/0day-ci/linux/commits/Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205 base: https://github.com/hnaz/linux-mm master config: i386-randconfig-s001-20210309 (attached as .config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce: # apt-get install sparse # sparse version: v0.6.3-262-g5e674421-dirty # https://github.com/0day-ci/linux/commit/3c635af37b862e9c601ee8d5818f7da9cd3e2e57 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205 git checkout 3c635af37b862e9c601ee8d5818f7da9cd3e2e57 # save the attached .config to linux build tree make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> "sparse warnings: (new ones prefixed by >>)" >> mm/page_alloc.c:8348:5: sparse: sparse: symbol 'alloc_contig_ratelimit' was not declared. Should it be static? >> mm/page_alloc.c:8353:6: sparse: sparse: symbol 'dump_migrate_failure_pages' was not declared. Should it be static? mm/page_alloc.c: note: in included file (through include/linux/mm.h): include/linux/gfp.h:325:27: sparse: sparse: restricted gfp_t degrades to integer include/linux/gfp.h:325:27: sparse: sparse: restricted gfp_t degrades to integer include/linux/gfp.h:325:27: sparse: sparse: restricted gfp_t degrades to integer include/linux/gfp.h:325:27: sparse: sparse: restricted gfp_t degrades to integer Please review and possibly fold the followup patch. --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On 3/9/21 6:29 AM, Minchan Kim wrote: > On Tue, Mar 09, 2021 at 05:29:30AM +0800, kernel test robot wrote: >> Hi Minchan, >> >> I love your patch! Perhaps something to improve: >> >> [auto build test WARNING on hnaz-linux-mm/master] >> >> url: https://github.com/0day-ci/linux/commits/Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205 >> base: https://github.com/hnaz/linux-mm master >> config: m68k-allmodconfig (attached as .config) >> compiler: m68k-linux-gcc (GCC) 9.3.0 >> reproduce (this is a W=1 build): >> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross >> chmod +x ~/bin/make.cross >> # https://github.com/0day-ci/linux/commit/3c635af37b862e9c601ee8d5818f7da9cd3e2e57 >> git remote add linux-review https://github.com/0day-ci/linux >> git fetch --no-tags linux-review Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205 >> git checkout 3c635af37b862e9c601ee8d5818f7da9cd3e2e57 >> # save the attached .config to linux build tree >> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=m68k >> >> If you fix the issue, kindly add following tag as appropriate >> Reported-by: kernel test robot <lkp@intel.com> >> >> All warnings (new ones prefixed by >>): >> >> arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra] >> 169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory) >> | ^~ >> include/linux/compiler.h:78:42: note: in definition of macro 'unlikely' >> 78 | # define unlikely(x) __builtin_expect(!!(x), 0) >> | ^ >> include/linux/scatterlist.h:143:2: note: in expansion of macro 'BUG_ON' >> 143 | BUG_ON(!virt_addr_valid(buf)); >> | ^~~~~~ >> include/linux/scatterlist.h:143:10: note: in expansion of macro 'virt_addr_valid' >> 143 | BUG_ON(!virt_addr_valid(buf)); >> | ^~~~~~~~~~~~~~~ >> In file included from arch/m68k/include/asm/page.h:60, >> from arch/m68k/include/asm/thread_info.h:6, >> from include/linux/thread_info.h:38, >> from include/asm-generic/preempt.h:5, >> from ./arch/m68k/include/generated/asm/preempt.h:1, >> from include/linux/preempt.h:78, >> from include/linux/spinlock.h:51, >> from include/linux/mmzone.h:8, >> from include/linux/gfp.h:6, >> from include/linux/mm.h:10, >> from mm/page_alloc.c:19: > I am not sure this is triggered by the patch since I could see the > warn with reverting the patch. Hi Minchan, Only the lines prefixed by ">>" are related with the patch: >> mm/page_alloc.c:8348:5: warning: no previous prototype for 'alloc_contig_ratelimit' [-Wmissing-prototypes] 8348 | int alloc_contig_ratelimit(void) | ^~~~~~~~~~~~~~~~~~~~~~ >> mm/page_alloc.c:8353:6: warning: no previous prototype for 'dump_migrate_failure_pages' [-Wmissing-prototypes] 8353 | void dump_migrate_failure_pages(struct list_head *page_list) Best Regards, Rong Chen
On Mon, Mar 08, 2021 at 04:21:28PM -0800, Andrew Morton wrote: > On Mon, 8 Mar 2021 12:20:47 -0800 Minchan Kim <minchan@kernel.org> wrote: > > > alloc_contig_range is usually used on cma area or movable zone. > > It's critical if the page migration fails on those areas so > > dump more debugging message. > > > > page refcount, mapcount with page flags on dump_page are > > helpful information to deduce the culprit. Furthermore, > > dump_page_owner was super helpful to find long term pinner > > who initiated the page allocation. > > > > Admin could enable the dump like this(by default, disabled) > > > > echo "func dump_migrate_failure_pages +p" > control > > > > Admin could disable it. > > > > echo "func dump_migrate_failure_pages =_" > control > > > > ... > > > > --- a/mm/page_alloc.c > > +++ b/mm/page_alloc.c > > @@ -8453,6 +8453,34 @@ static unsigned long pfn_max_align_up(unsigned long pfn) > > pageblock_nr_pages)); > > } > > > > +#if defined(CONFIG_DYNAMIC_DEBUG) || \ > > + (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) > > +static DEFINE_RATELIMIT_STATE(alloc_contig_ratelimit_state, > > + DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); > > +int alloc_contig_ratelimit(void) > > +{ > > + return __ratelimit(&alloc_contig_ratelimit_state); > > +} > > Wow, that's an eyesore. We're missing helpers in the ratelimit code. > Can we do something like > > /* description goes here */ > #define RATELIMIT2(interval, burst) > ({ > static DEFINE_RATELIMIT_STATE(_rs, interval, burst); > > __ratelimit(_rs); > }) > > #define RATELIMIT() > RATELIMIT2(DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST) > > > +void dump_migrate_failure_pages(struct list_head *page_list) > > +{ > > + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, > > + "migrate failure"); > > + if (DYNAMIC_DEBUG_BRANCH(descriptor) && > > + alloc_contig_ratelimit()) { > > + struct page *page; > > + > > + WARN(1, "failed callstack"); > > + list_for_each_entry(page, page_list, lru) > > + dump_page(page, "migration failure"); > > + } > > +} > > Then we can simply do > > if (DYNAMIC_DEBUG_BRANCH(descriptor) && RATELIMIT()) Sounds good idea to me. There are many places to take the benefit. However, let me leave it until we could discuss this patch. We could clean them up as follow patch. Thank you.
On Tue, Mar 09, 2021 at 08:41:44AM +0800, Rong Chen wrote: > > > On 3/9/21 6:29 AM, Minchan Kim wrote: > > On Tue, Mar 09, 2021 at 05:29:30AM +0800, kernel test robot wrote: > > > Hi Minchan, > > > > > > I love your patch! Perhaps something to improve: > > > > > > [auto build test WARNING on hnaz-linux-mm/master] > > > > > > url: https://github.com/0day-ci/linux/commits/Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205 > > > base: https://github.com/hnaz/linux-mm master > > > config: m68k-allmodconfig (attached as .config) > > > compiler: m68k-linux-gcc (GCC) 9.3.0 > > > reproduce (this is a W=1 build): > > > wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross > > > chmod +x ~/bin/make.cross > > > # https://github.com/0day-ci/linux/commit/3c635af37b862e9c601ee8d5818f7da9cd3e2e57 > > > git remote add linux-review https://github.com/0day-ci/linux > > > git fetch --no-tags linux-review Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205 > > > git checkout 3c635af37b862e9c601ee8d5818f7da9cd3e2e57 > > > # save the attached .config to linux build tree > > > COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=m68k > > > > > > If you fix the issue, kindly add following tag as appropriate > > > Reported-by: kernel test robot <lkp@intel.com> > > > > > > All warnings (new ones prefixed by >>): > > > > > > arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra] > > > 169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory) > > > | ^~ > > > include/linux/compiler.h:78:42: note: in definition of macro 'unlikely' > > > 78 | # define unlikely(x) __builtin_expect(!!(x), 0) > > > | ^ > > > include/linux/scatterlist.h:143:2: note: in expansion of macro 'BUG_ON' > > > 143 | BUG_ON(!virt_addr_valid(buf)); > > > | ^~~~~~ > > > include/linux/scatterlist.h:143:10: note: in expansion of macro 'virt_addr_valid' > > > 143 | BUG_ON(!virt_addr_valid(buf)); > > > | ^~~~~~~~~~~~~~~ > > > In file included from arch/m68k/include/asm/page.h:60, > > > from arch/m68k/include/asm/thread_info.h:6, > > > from include/linux/thread_info.h:38, > > > from include/asm-generic/preempt.h:5, > > > from ./arch/m68k/include/generated/asm/preempt.h:1, > > > from include/linux/preempt.h:78, > > > from include/linux/spinlock.h:51, > > > from include/linux/mmzone.h:8, > > > from include/linux/gfp.h:6, > > > from include/linux/mm.h:10, > > > from mm/page_alloc.c:19: > > I am not sure this is triggered by the patch since I could see the > > warn with reverting the patch. > > Hi Minchan, > > Only the lines prefixed by ">>" are related with the patch: > > > > mm/page_alloc.c:8348:5: warning: no previous prototype for 'alloc_contig_ratelimit' [-Wmissing-prototypes] > > 8348 | int alloc_contig_ratelimit(void) > | ^~~~~~~~~~~~~~~~~~~~~~ > > > > mm/page_alloc.c:8353:6: warning: no previous prototype for 'dump_migrate_failure_pages' [-Wmissing-prototypes] > > 8353 | void dump_migrate_failure_pages(struct list_head *page_list) > Thanks for the clarification. Then, below should fix the warning. I will respin it at next revision once I get more feedback. +#if defined(CONFIG_DYNAMIC_DEBUG) || \ + (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) +static void dump_migrate_failure_pages(struct list_head *page_list) +{ + static DEFINE_RATELIMIT_STATE(_rs, + DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, + "migrate failure"); + if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) { + struct page *page; + + WARN(1, "failed callstack"); + list_for_each_entry(page, page_list, lru) + dump_page(page, "migration failure"); + } +} +#else +static inline void dump_migrate_failure_pages(struct list_head *page_list) +{ +} +#endif + /* [start, end) must belong to a single zone. */ static int __alloc_contig_migrate_range(struct compact_control *cc, unsigned long start, unsigned long end) @@ -8496,6 +8520,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE); } if (ret < 0) { + dump_migrate_failure_pages(&cc->migratepages); putback_movable_pages(&cc->migratepages); return ret; }
On Mon 08-03-21 12:20:47, Minchan Kim wrote: > alloc_contig_range is usually used on cma area or movable zone. > It's critical if the page migration fails on those areas so > dump more debugging message. I disagree with this statement. alloc_contig_range is not a reliable allocator. Any user, be it CMA or direct users of alloc_contig_range have to deal with allocation failures. Debugging information can be still useful but considering migration failures critical is overstatement to say the least. > page refcount, mapcount with page flags on dump_page are > helpful information to deduce the culprit. Furthermore, > dump_page_owner was super helpful to find long term pinner > who initiated the page allocation. > > Admin could enable the dump like this(by default, disabled) > > echo "func dump_migrate_failure_pages +p" > control > > Admin could disable it. > > echo "func dump_migrate_failure_pages =_" > control My original idea was to add few pr_debug and -DDYNAMIC_DEBUG_MODULE for page_alloc.c. It makes sense to enable a whole bunch at once though. The naming should better reflect this is alloc_contig_rage related because the above sounds like a generic migration failure thing. Somebody more familiar with the dynamic debugging infrastructure needs to have a look but from from a quick look it seems ok. Do we really need all the ugly ifdefery, though? Don't we want to have this compiled in all the time and just rely on the static branch managed by the dynamic debugging framework? [...] > +void dump_migrate_failure_pages(struct list_head *page_list) > +{ > + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, > + "migrate failure"); > + if (DYNAMIC_DEBUG_BRANCH(descriptor) && > + alloc_contig_ratelimit()) { > + struct page *page; > + > + WARN(1, "failed callstack"); > + list_for_each_entry(page, page_list, lru) > + dump_page(page, "migration failure"); > + } Apart from the above, do we have to warn for something that is a debugging aid? A similar concern wrt dump_page which uses pr_warn and page owner is using even pr_alert. Would it make sense to add a loglevel parameter both into __dump_page and dump_page_owner?
On Tue, Mar 09, 2021 at 10:32:51AM +0100, Michal Hocko wrote: > On Mon 08-03-21 12:20:47, Minchan Kim wrote: > > alloc_contig_range is usually used on cma area or movable zone. > > It's critical if the page migration fails on those areas so > > dump more debugging message. > > I disagree with this statement. alloc_contig_range is not a reliable > allocator. Any user, be it CMA or direct users of alloc_contig_range > have to deal with allocation failures. Debugging information can be > still useful but considering migration failures critical is > overstatement to say the least. Fair enough. Let's change it. "Currently, debugging CMA allocation failure is too hard due to lacking of page information. alloc_contig_range is proper place to dump them since it has migrate-failed page list." > > > page refcount, mapcount with page flags on dump_page are > > helpful information to deduce the culprit. Furthermore, > > dump_page_owner was super helpful to find long term pinner > > who initiated the page allocation. > > > > Admin could enable the dump like this(by default, disabled) > > > > echo "func dump_migrate_failure_pages +p" > control > > > > Admin could disable it. > > > > echo "func dump_migrate_failure_pages =_" > control > > My original idea was to add few pr_debug and -DDYNAMIC_DEBUG_MODULE for > page_alloc.c. It makes sense to enable a whole bunch at once though. > The naming should better reflect this is alloc_contig_rage related > because the above sounds like a generic migration failure thing. alloc_contig_dump_pages? > > Somebody more familiar with the dynamic debugging infrastructure needs > to have a look but from from a quick look it seems ok. > > Do we really need all the ugly ifdefery, though? Don't we want to have > this compiled in all the time and just rely on the static branch managed > by the dynamic debugging framework? I have no further idea to make it simple while we keep the flexibility for arguments and print format. #if defined(CONFIG_DYNAMIC_DEBUG) || \ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) static void alloc_contig_dump_pages(struct list_head *page_list) { static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, "migrate failure"); if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) { struct page *page; WARN(1, "failed callstack"); list_for_each_entry(page, page_list, lru) dump_page(page, "migration failure"); } } #else static inline void alloc_contig_dump_pages(struct list_head *page_list) { } #endif > > [...] > > +void dump_migrate_failure_pages(struct list_head *page_list) > > +{ > > + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, > > + "migrate failure"); > > + if (DYNAMIC_DEBUG_BRANCH(descriptor) && > > + alloc_contig_ratelimit()) { > > + struct page *page; > > + > > + WARN(1, "failed callstack"); > > + list_for_each_entry(page, page_list, lru) > > + dump_page(page, "migration failure"); > > + } > > Apart from the above, do we have to warn for something that is a > debugging aid? A similar concern wrt dump_page which uses pr_warn and Make sense. > page owner is using even pr_alert. > Would it make sense to add a loglevel parameter both into __dump_page > and dump_page_owner? Let me try it.
On Tue 09-03-21 08:15:41, Minchan Kim wrote: > On Tue, Mar 09, 2021 at 10:32:51AM +0100, Michal Hocko wrote: > > On Mon 08-03-21 12:20:47, Minchan Kim wrote: > > > alloc_contig_range is usually used on cma area or movable zone. > > > It's critical if the page migration fails on those areas so > > > dump more debugging message. > > > > I disagree with this statement. alloc_contig_range is not a reliable > > allocator. Any user, be it CMA or direct users of alloc_contig_range > > have to deal with allocation failures. Debugging information can be > > still useful but considering migration failures critical is > > overstatement to say the least. > > Fair enough. Let's change it. > > "Currently, debugging CMA allocation failure is too hard > due to lacking of page information. alloc_contig_range is > proper place to dump them since it has migrate-failed page > list." "Currently, debugging CMA allocation failures is quite limited. The most commong source of these failures seems to be page migration which doesn't provide any useful information on the reason of the failure by itself. alloc_contig_range can report those failures as it holds a list of migrate-failed pages." > > > page refcount, mapcount with page flags on dump_page are > > > helpful information to deduce the culprit. Furthermore, > > > dump_page_owner was super helpful to find long term pinner > > > who initiated the page allocation. > > > > > > Admin could enable the dump like this(by default, disabled) > > > > > > echo "func dump_migrate_failure_pages +p" > control > > > > > > Admin could disable it. > > > > > > echo "func dump_migrate_failure_pages =_" > control > > > > My original idea was to add few pr_debug and -DDYNAMIC_DEBUG_MODULE for > > page_alloc.c. It makes sense to enable a whole bunch at once though. > > The naming should better reflect this is alloc_contig_rage related > > because the above sounds like a generic migration failure thing. > > alloc_contig_dump_pages? Yes this is more clear. > > Somebody more familiar with the dynamic debugging infrastructure needs > > to have a look but from from a quick look it seems ok. > > > > Do we really need all the ugly ifdefery, though? Don't we want to have > > this compiled in all the time and just rely on the static branch managed > > by the dynamic debugging framework? > > I have no further idea to make it simple while we keep the flexibility > for arguments and print format. > > #if defined(CONFIG_DYNAMIC_DEBUG) || \ > (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) > static void alloc_contig_dump_pages(struct list_head *page_list) > { > static DEFINE_RATELIMIT_STATE(_rs, > DEFAULT_RATELIMIT_INTERVAL, > DEFAULT_RATELIMIT_BURST); > > DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, > "migrate failure"); > if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) { > struct page *page; > > WARN(1, "failed callstack"); > list_for_each_entry(page, page_list, lru) > dump_page(page, "migration failure"); > } > } > #else > static inline void alloc_contig_dump_pages(struct list_head *page_list) > { > } > #endif First, you would be much better off by droping the rate limitting. I am nt really convinced this is really necessary as this is a debugging aid enabled on request. A single list can be large enough to swamp logs so why bother? Also are all those CONFIG_DYNAMIC_DEBUG* ifdefs necessary? Can we simply enable DYNAMIC_DEBUG for page_alloc as I've suggested above?
On Tue, Mar 09, 2021 at 05:32:08PM +0100, Michal Hocko wrote: > On Tue 09-03-21 08:15:41, Minchan Kim wrote: > > On Tue, Mar 09, 2021 at 10:32:51AM +0100, Michal Hocko wrote: > > > On Mon 08-03-21 12:20:47, Minchan Kim wrote: > > > > alloc_contig_range is usually used on cma area or movable zone. > > > > It's critical if the page migration fails on those areas so > > > > dump more debugging message. > > > > > > I disagree with this statement. alloc_contig_range is not a reliable > > > allocator. Any user, be it CMA or direct users of alloc_contig_range > > > have to deal with allocation failures. Debugging information can be > > > still useful but considering migration failures critical is > > > overstatement to say the least. > > > > Fair enough. Let's change it. > > > > "Currently, debugging CMA allocation failure is too hard > > due to lacking of page information. alloc_contig_range is > > proper place to dump them since it has migrate-failed page > > list." > > "Currently, debugging CMA allocation failures is quite limited. The most > commong source of these failures seems to be page migration which > doesn't provide any useful information on the reason of the failure by > itself. alloc_contig_range can report those failures as it holds a list > of migrate-failed pages." Will take it. Thanks. < snip > > > > Somebody more familiar with the dynamic debugging infrastructure needs > > > to have a look but from from a quick look it seems ok. > > > > > > Do we really need all the ugly ifdefery, though? Don't we want to have > > > this compiled in all the time and just rely on the static branch managed > > > by the dynamic debugging framework? > > > > I have no further idea to make it simple while we keep the flexibility > > for arguments and print format. > > > > #if defined(CONFIG_DYNAMIC_DEBUG) || \ > > (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) > > static void alloc_contig_dump_pages(struct list_head *page_list) > > { > > static DEFINE_RATELIMIT_STATE(_rs, > > DEFAULT_RATELIMIT_INTERVAL, > > DEFAULT_RATELIMIT_BURST); > > > > DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, > > "migrate failure"); > > if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) { > > struct page *page; > > > > WARN(1, "failed callstack"); > > list_for_each_entry(page, page_list, lru) > > dump_page(page, "migration failure"); > > } > > } > > #else > > static inline void alloc_contig_dump_pages(struct list_head *page_list) > > { > > } > > #endif > > First, you would be much better off by droping the rate limitting. I am > nt really convinced this is really necessary as this is a debugging aid > enabled on request. A single list can be large enough to swamp logs so > why bother? No problem. Just added since David mentioned hugetlb pages are easily fail to mgirate at this moment. Yes, We could add the ratelimit if we get complain. > > Also are all those CONFIG_DYNAMIC_DEBUG* ifdefs necessary? Can we > simply enable DYNAMIC_DEBUG for page_alloc as I've suggested above? They are different usecases. With DYNAMIC_DEBUG_MODULE with CONFIG_DYNAMIC_DEBUG_CORE, it works for only specific compile flags as you suggested. (CONFIG_DYNAMIC_DEBUG_CORE is requirement to work DYNAMIC_DEBUG_MODULE. With CONFIG_DYNAMIC_DEBUG, user could enable/disable every dynamic debug places without needing DYNAMIC_DEBUG_MODULE flags for source files. Both usecase makes sense to me.
On Tue, Mar 09, 2021 at 08:15:41AM -0800, Minchan Kim wrote: < snip > > > [...] > > > +void dump_migrate_failure_pages(struct list_head *page_list) > > > +{ > > > + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, > > > + "migrate failure"); > > > + if (DYNAMIC_DEBUG_BRANCH(descriptor) && > > > + alloc_contig_ratelimit()) { > > > + struct page *page; > > > + > > > + WARN(1, "failed callstack"); > > > + list_for_each_entry(page, page_list, lru) > > > + dump_page(page, "migration failure"); > > > + } > > > > Apart from the above, do we have to warn for something that is a > > debugging aid? A similar concern wrt dump_page which uses pr_warn and > > Make sense. > > > page owner is using even pr_alert. > > Would it make sense to add a loglevel parameter both into __dump_page > > and dump_page_owner? > > Let me try it. I looked though them and made first draft to clean them up. It's bigger than my initial expectaion because there are many callsites to use dump_page and stack_trace_print inconsistent loglevel. Since it's not a specific problem for this work, I'd like to deal with it as separate patchset since I don't want to be stuck on here for my initial goal. FYI, Subject: [RFC 0/5] make dump_page aware of loglevel - Forked from [1] dump_page uses __dump_page and dump_page_owner internally to print various information. However, their printk loglevel are inconsistent in that __dump_page: KERN_WARNING __dump_page_owner: KERN_ALERT stack_trace_print: KERN_DEFAULT To make them consistent from dump_page, this patch introduces pr_loglevel in printk and make the utility functions aware of loglevel. Finally, last patch changes dump_page to support loglevel to make the printing level consistent. [1] https://lore.kernel.org/linux-mm/YEdAw6gnp9XxoWUQ@dhcp22.suse.cz/ Minchan Kim (5): mm: introduce pr_loglevel for __dump_[page]_owner stacktrace: stack_trace_print aware of loglevel mm: page_owner: dump_page_owner aware of loglevel mm: debug: __dump_page aware of loglevel mm: debug: dump_page aware of loglevel drivers/md/dm-bufio.c | 2 +- drivers/virtio/virtio_mem.c | 2 +- fs/btrfs/ref-verify.c | 2 +- fs/fuse/dev.c | 2 +- include/linux/mmdebug.h | 10 ++++++---- include/linux/page_owner.h | 8 ++++---- include/linux/printk.h | 12 +++++++++++ include/linux/stacktrace.h | 4 ++-- kernel/backtracetest.c | 2 +- kernel/dma/debug.c | 3 ++- kernel/kcsan/report.c | 7 ++++--- kernel/locking/lockdep.c | 3 ++- kernel/stacktrace.c | 5 +++-- mm/debug.c | 40 ++++++++++++++++++------------------- mm/filemap.c | 2 +- mm/gup_test.c | 4 ++-- mm/huge_memory.c | 4 ++-- mm/kasan/report.c | 4 ++-- mm/kfence/report.c | 3 ++- mm/kmemleak.c | 2 +- mm/memory.c | 2 +- mm/memory_hotplug.c | 4 ++-- mm/page_alloc.c | 4 ++-- mm/page_isolation.c | 2 +- mm/page_owner.c | 24 +++++++++++----------- 25 files changed, 88 insertions(+), 69 deletions(-)
On 10.03.21 08:42, Minchan Kim wrote: > On Tue, Mar 09, 2021 at 08:15:41AM -0800, Minchan Kim wrote: > > < snip > > >>> [...] >>>> +void dump_migrate_failure_pages(struct list_head *page_list) >>>> +{ >>>> + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, >>>> + "migrate failure"); >>>> + if (DYNAMIC_DEBUG_BRANCH(descriptor) && >>>> + alloc_contig_ratelimit()) { >>>> + struct page *page; >>>> + >>>> + WARN(1, "failed callstack"); >>>> + list_for_each_entry(page, page_list, lru) >>>> + dump_page(page, "migration failure"); >>>> + } >>> >>> Apart from the above, do we have to warn for something that is a >>> debugging aid? A similar concern wrt dump_page which uses pr_warn and >> >> Make sense. >> >>> page owner is using even pr_alert. >>> Would it make sense to add a loglevel parameter both into __dump_page >>> and dump_page_owner? >> >> Let me try it. > > I looked though them and made first draft to clean them up. > > It's bigger than my initial expectaion because there are many callsites > to use dump_page and stack_trace_print inconsistent loglevel. > Since it's not a specific problem for this work, I'd like to deal with > it as separate patchset since I don't want to be stuck on here for my > initial goal. Why the need to rush regarding your series? If it will clean up your patch significantly, then I think doing the cleanups first is the proper way to go. I really don't get why this is a real problem.
On Tue 09-03-21 09:27:51, Minchan Kim wrote: > On Tue, Mar 09, 2021 at 05:32:08PM +0100, Michal Hocko wrote: > > On Tue 09-03-21 08:15:41, Minchan Kim wrote: > > > On Tue, Mar 09, 2021 at 10:32:51AM +0100, Michal Hocko wrote: > > > > On Mon 08-03-21 12:20:47, Minchan Kim wrote: > > > > > alloc_contig_range is usually used on cma area or movable zone. > > > > > It's critical if the page migration fails on those areas so > > > > > dump more debugging message. > > > > > > > > I disagree with this statement. alloc_contig_range is not a reliable > > > > allocator. Any user, be it CMA or direct users of alloc_contig_range > > > > have to deal with allocation failures. Debugging information can be > > > > still useful but considering migration failures critical is > > > > overstatement to say the least. > > > > > > Fair enough. Let's change it. > > > > > > "Currently, debugging CMA allocation failure is too hard > > > due to lacking of page information. alloc_contig_range is > > > proper place to dump them since it has migrate-failed page > > > list." > > > > "Currently, debugging CMA allocation failures is quite limited. The most > > commong source of these failures seems to be page migration which > > doesn't provide any useful information on the reason of the failure by > > itself. alloc_contig_range can report those failures as it holds a list > > of migrate-failed pages." > > Will take it. Thanks. > > < snip > > > > > > Somebody more familiar with the dynamic debugging infrastructure needs > > > > to have a look but from from a quick look it seems ok. > > > > > > > > Do we really need all the ugly ifdefery, though? Don't we want to have > > > > this compiled in all the time and just rely on the static branch managed > > > > by the dynamic debugging framework? > > > > > > I have no further idea to make it simple while we keep the flexibility > > > for arguments and print format. > > > > > > #if defined(CONFIG_DYNAMIC_DEBUG) || \ > > > (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) > > > static void alloc_contig_dump_pages(struct list_head *page_list) > > > { > > > static DEFINE_RATELIMIT_STATE(_rs, > > > DEFAULT_RATELIMIT_INTERVAL, > > > DEFAULT_RATELIMIT_BURST); > > > > > > DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, > > > "migrate failure"); > > > if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) { > > > struct page *page; > > > > > > WARN(1, "failed callstack"); > > > list_for_each_entry(page, page_list, lru) > > > dump_page(page, "migration failure"); > > > } > > > } > > > #else > > > static inline void alloc_contig_dump_pages(struct list_head *page_list) > > > { > > > } > > > #endif > > > > First, you would be much better off by droping the rate limitting. I am > > nt really convinced this is really necessary as this is a debugging aid > > enabled on request. A single list can be large enough to swamp logs so > > why bother? > > No problem. Just added since David mentioned hugetlb pages are easily > fail to mgirate at this moment. Well a single hugetlb allocation can swamp the log already because the ratelimit as suggested above will allow the full list to be dumped. Moreover if there is an additional failure right afterwards then it would be allowed as well. Not to mention that rate limiting is terrible for long taking operations by design as it doesn't know when the operation ended. So above construct is merely a code complication rather than a useful tool to reduce the log flow. > Yes, We could add the ratelimit if we get complain. > > > > > Also are all those CONFIG_DYNAMIC_DEBUG* ifdefs necessary? Can we > > simply enable DYNAMIC_DEBUG for page_alloc as I've suggested above? > > They are different usecases. > > With DYNAMIC_DEBUG_MODULE with CONFIG_DYNAMIC_DEBUG_CORE, > it works for only specific compile flags as you suggested. > (CONFIG_DYNAMIC_DEBUG_CORE is requirement to work DYNAMIC_DEBUG_MODULE. > > With CONFIG_DYNAMIC_DEBUG, user could enable/disable every dynamic > debug places without needing DYNAMIC_DEBUG_MODULE flags for source > files. > > Both usecase makes sense to me. Well, this is more of a question for dynamic debugging maintainers. But it would be really great to reduce the ifdefery as much as possible.
On Tue 09-03-21 23:42:46, Minchan Kim wrote: > On Tue, Mar 09, 2021 at 08:15:41AM -0800, Minchan Kim wrote: > > < snip > > > > > [...] > > > > +void dump_migrate_failure_pages(struct list_head *page_list) > > > > +{ > > > > + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, > > > > + "migrate failure"); > > > > + if (DYNAMIC_DEBUG_BRANCH(descriptor) && > > > > + alloc_contig_ratelimit()) { > > > > + struct page *page; > > > > + > > > > + WARN(1, "failed callstack"); > > > > + list_for_each_entry(page, page_list, lru) > > > > + dump_page(page, "migration failure"); > > > > + } > > > > > > Apart from the above, do we have to warn for something that is a > > > debugging aid? A similar concern wrt dump_page which uses pr_warn and > > > > Make sense. > > > > > page owner is using even pr_alert. > > > Would it make sense to add a loglevel parameter both into __dump_page > > > and dump_page_owner? > > > > Let me try it. > > I looked though them and made first draft to clean them up. > > It's bigger than my initial expectaion because there are many callsites > to use dump_page and stack_trace_print inconsistent loglevel. > Since it's not a specific problem for this work, I'd like to deal with > it as separate patchset since I don't want to be stuck on here for my > initial goal. > > FYI, > > Subject: [RFC 0/5] make dump_page aware of loglevel > > - Forked from [1] > > dump_page uses __dump_page and dump_page_owner internally to > print various information. However, their printk loglevel are > inconsistent in that > > __dump_page: KERN_WARNING > __dump_page_owner: KERN_ALERT > stack_trace_print: KERN_DEFAULT > > To make them consistent from dump_page, this patch introduces > pr_loglevel in printk and make the utility functions aware of > loglevel. Finally, last patch changes dump_page to support > loglevel to make the printing level consistent. > > [1] https://lore.kernel.org/linux-mm/YEdAw6gnp9XxoWUQ@dhcp22.suse.cz/ > > Minchan Kim (5): > mm: introduce pr_loglevel for __dump_[page]_owner > stacktrace: stack_trace_print aware of loglevel > mm: page_owner: dump_page_owner aware of loglevel > mm: debug: __dump_page aware of loglevel > mm: debug: dump_page aware of loglevel > drivers/md/dm-bufio.c | 2 +- > drivers/virtio/virtio_mem.c | 2 +- > fs/btrfs/ref-verify.c | 2 +- > fs/fuse/dev.c | 2 +- > include/linux/mmdebug.h | 10 ++++++---- > include/linux/page_owner.h | 8 ++++---- > include/linux/printk.h | 12 +++++++++++ > include/linux/stacktrace.h | 4 ++-- > kernel/backtracetest.c | 2 +- > kernel/dma/debug.c | 3 ++- > kernel/kcsan/report.c | 7 ++++--- > kernel/locking/lockdep.c | 3 ++- > kernel/stacktrace.c | 5 +++-- > mm/debug.c | 40 ++++++++++++++++++------------------- > mm/filemap.c | 2 +- > mm/gup_test.c | 4 ++-- > mm/huge_memory.c | 4 ++-- > mm/kasan/report.c | 4 ++-- > mm/kfence/report.c | 3 ++- > mm/kmemleak.c | 2 +- > mm/memory.c | 2 +- > mm/memory_hotplug.c | 4 ++-- > mm/page_alloc.c | 4 ++-- > mm/page_isolation.c | 2 +- > mm/page_owner.c | 24 +++++++++++----------- > 25 files changed, 88 insertions(+), 69 deletions(-) The is a lot of churn indeed. Have you considered adding $FOO_lglvl variants for those so that you can use them for your particular case without affecting most of existing users? Something similar we have discussed in other email thread regarding lru_add_drain_all?
On Tue, Mar 09, 2021 at 10:32:51AM +0100, Michal Hocko wrote: > Apart from the above, do we have to warn for something that is a > debugging aid? A similar concern wrt dump_page which uses pr_warn and > page owner is using even pr_alert. > Would it make sense to add a loglevel parameter both into __dump_page > and dump_page_owner? No. What would make sense is turning __dump_page() inside-out. Something like printk("%pP\n"); In lib/vsprintf.c, there's a big switch statement in the function pointer() that handles printing things like IPv6 addresses, dentries, and function symbols. Then we can do whatever we want around the new %pP, including choosing the log level, adding additional information, choosing to dump the page to a sysfs file, etc, etc.
On Wed 10-03-21 13:26:23, Matthew Wilcox wrote: > On Tue, Mar 09, 2021 at 10:32:51AM +0100, Michal Hocko wrote: > > Apart from the above, do we have to warn for something that is a > > debugging aid? A similar concern wrt dump_page which uses pr_warn and > > page owner is using even pr_alert. > > Would it make sense to add a loglevel parameter both into __dump_page > > and dump_page_owner? > > No. What would make sense is turning __dump_page() inside-out. > Something like printk("%pP\n"); > > In lib/vsprintf.c, there's a big switch statement in the function > pointer() that handles printing things like IPv6 addresses, dentries, > and function symbols. > > Then we can do whatever we want around the new %pP, including choosing > the log level, adding additional information, choosing to dump the page > to a sysfs file, etc, etc. Hmm, __dump_page has grown quite some heavy lifting over time and I am not sure this is a good candidate to put into printk proper (e.g. is it safe/reasonable to call get_kernel_nofault from printk - aka arbitrary context)?. But you've got a point that such a printk format wouldn't need to be 1:1 with the existing __dump_page. There is quite a lot to infer from page count, map count, flags, page type already. Then a question would be what is an actual advantage of %pP over dump_page_info(loglvl, p). One I can see is that %pP would allow to dump the state into a string and so it would be more versatile but I am not aware of a usecase for that (maybe tracing?).
On Wed, Mar 10, 2021 at 09:20:40AM +0100, David Hildenbrand wrote: > On 10.03.21 08:42, Minchan Kim wrote: > > On Tue, Mar 09, 2021 at 08:15:41AM -0800, Minchan Kim wrote: > > > > < snip > > > > > > > [...] > > > > > +void dump_migrate_failure_pages(struct list_head *page_list) > > > > > +{ > > > > > + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, > > > > > + "migrate failure"); > > > > > + if (DYNAMIC_DEBUG_BRANCH(descriptor) && > > > > > + alloc_contig_ratelimit()) { > > > > > + struct page *page; > > > > > + > > > > > + WARN(1, "failed callstack"); > > > > > + list_for_each_entry(page, page_list, lru) > > > > > + dump_page(page, "migration failure"); > > > > > + } > > > > > > > > Apart from the above, do we have to warn for something that is a > > > > debugging aid? A similar concern wrt dump_page which uses pr_warn and > > > > > > Make sense. > > > > > > > page owner is using even pr_alert. > > > > Would it make sense to add a loglevel parameter both into __dump_page > > > > and dump_page_owner? > > > > > > Let me try it. > > > > I looked though them and made first draft to clean them up. > > > > It's bigger than my initial expectaion because there are many callsites > > to use dump_page and stack_trace_print inconsistent loglevel. > > Since it's not a specific problem for this work, I'd like to deal with > > it as separate patchset since I don't want to be stuck on here for my > > initial goal. > > Why the need to rush regarding your series? > > If it will clean up your patch significantly, then I think doing the > cleanups first is the proper way to go. It doesn't clean up my patch at all. dump_page and internal functions are already broken in several places from print level point of view. I agreed that it's good to fix but it shouldn't be a block for the work since it's not a new particular problem this patch introduce. > > I really don't get why this is a real problem. That's because it's not my top priority.
On Wed, Mar 10, 2021 at 02:04:55PM +0100, Michal Hocko wrote: < snip > > > > Also are all those CONFIG_DYNAMIC_DEBUG* ifdefs necessary? Can we > > > simply enable DYNAMIC_DEBUG for page_alloc as I've suggested above? > > > > They are different usecases. > > > > With DYNAMIC_DEBUG_MODULE with CONFIG_DYNAMIC_DEBUG_CORE, > > it works for only specific compile flags as you suggested. > > (CONFIG_DYNAMIC_DEBUG_CORE is requirement to work DYNAMIC_DEBUG_MODULE. > > > > With CONFIG_DYNAMIC_DEBUG, user could enable/disable every dynamic > > debug places without needing DYNAMIC_DEBUG_MODULE flags for source > > files. > > > > Both usecase makes sense to me. > > Well, this is more of a question for dynamic debugging maintainers. But > it would be really great to reduce the ifdefery as much as possible. I don't understand why this is something particular case which is okay to lose either way to make dyndbg dynamic.
On Wed, Mar 10, 2021 at 02:07:05PM +0100, Michal Hocko wrote: > On Tue 09-03-21 23:42:46, Minchan Kim wrote: > > On Tue, Mar 09, 2021 at 08:15:41AM -0800, Minchan Kim wrote: > > > > < snip > > > > > > > [...] > > > > > +void dump_migrate_failure_pages(struct list_head *page_list) > > > > > +{ > > > > > + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, > > > > > + "migrate failure"); > > > > > + if (DYNAMIC_DEBUG_BRANCH(descriptor) && > > > > > + alloc_contig_ratelimit()) { > > > > > + struct page *page; > > > > > + > > > > > + WARN(1, "failed callstack"); > > > > > + list_for_each_entry(page, page_list, lru) > > > > > + dump_page(page, "migration failure"); > > > > > + } > > > > > > > > Apart from the above, do we have to warn for something that is a > > > > debugging aid? A similar concern wrt dump_page which uses pr_warn and > > > > > > Make sense. > > > > > > > page owner is using even pr_alert. > > > > Would it make sense to add a loglevel parameter both into __dump_page > > > > and dump_page_owner? > > > > > > Let me try it. > > > > I looked though them and made first draft to clean them up. > > > > It's bigger than my initial expectaion because there are many callsites > > to use dump_page and stack_trace_print inconsistent loglevel. > > Since it's not a specific problem for this work, I'd like to deal with > > it as separate patchset since I don't want to be stuck on here for my > > initial goal. > > > > FYI, > > > > Subject: [RFC 0/5] make dump_page aware of loglevel > > > > - Forked from [1] > > > > dump_page uses __dump_page and dump_page_owner internally to > > print various information. However, their printk loglevel are > > inconsistent in that > > > > __dump_page: KERN_WARNING > > __dump_page_owner: KERN_ALERT > > stack_trace_print: KERN_DEFAULT > > > > To make them consistent from dump_page, this patch introduces > > pr_loglevel in printk and make the utility functions aware of > > loglevel. Finally, last patch changes dump_page to support > > loglevel to make the printing level consistent. > > > > [1] https://lore.kernel.org/linux-mm/YEdAw6gnp9XxoWUQ@dhcp22.suse.cz/ > > > > Minchan Kim (5): > > mm: introduce pr_loglevel for __dump_[page]_owner > > stacktrace: stack_trace_print aware of loglevel > > mm: page_owner: dump_page_owner aware of loglevel > > mm: debug: __dump_page aware of loglevel > > mm: debug: dump_page aware of loglevel > > drivers/md/dm-bufio.c | 2 +- > > drivers/virtio/virtio_mem.c | 2 +- > > fs/btrfs/ref-verify.c | 2 +- > > fs/fuse/dev.c | 2 +- > > include/linux/mmdebug.h | 10 ++++++---- > > include/linux/page_owner.h | 8 ++++---- > > include/linux/printk.h | 12 +++++++++++ > > include/linux/stacktrace.h | 4 ++-- > > kernel/backtracetest.c | 2 +- > > kernel/dma/debug.c | 3 ++- > > kernel/kcsan/report.c | 7 ++++--- > > kernel/locking/lockdep.c | 3 ++- > > kernel/stacktrace.c | 5 +++-- > > mm/debug.c | 40 ++++++++++++++++++------------------- > > mm/filemap.c | 2 +- > > mm/gup_test.c | 4 ++-- > > mm/huge_memory.c | 4 ++-- > > mm/kasan/report.c | 4 ++-- > > mm/kfence/report.c | 3 ++- > > mm/kmemleak.c | 2 +- > > mm/memory.c | 2 +- > > mm/memory_hotplug.c | 4 ++-- > > mm/page_alloc.c | 4 ++-- > > mm/page_isolation.c | 2 +- > > mm/page_owner.c | 24 +++++++++++----------- > > 25 files changed, 88 insertions(+), 69 deletions(-) > > The is a lot of churn indeed. Have you considered adding $FOO_lglvl > variants for those so that you can use them for your particular case > without affecting most of existing users? Something similar we have > discussed in other email thread regarding lru_add_drain_all? I thought that way but didn't try since it couldn't make them atomic(For example, other printk place in other context will affect by the $FOO_lglvl).
On Wed 10-03-21 08:05:36, Minchan Kim wrote: > On Wed, Mar 10, 2021 at 02:07:05PM +0100, Michal Hocko wrote: [...] > > The is a lot of churn indeed. Have you considered adding $FOO_lglvl > > variants for those so that you can use them for your particular case > > without affecting most of existing users? Something similar we have > > discussed in other email thread regarding lru_add_drain_all? > > I thought that way but didn't try since it couldn't make them > atomic(For example, other printk place in other context will > affect by the $FOO_lglvl). I do not follow. I meant something like the following (likely incomplete but you should get an idea). diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 3468794f83d2..71b402eb8f78 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -14,7 +14,7 @@ extern void __set_page_owner(struct page *page, extern void __split_page_owner(struct page *page, unsigned int nr); extern void __copy_page_owner(struct page *oldpage, struct page *newpage); extern void __set_page_owner_migrate_reason(struct page *page, int reason); -extern void __dump_page_owner(struct page *page); +extern void __dump_page_owner(struct page *page, const char *loglvl); extern void pagetypeinfo_showmixedcount_print(struct seq_file *m, pg_data_t *pgdat, struct zone *zone); @@ -46,10 +46,10 @@ static inline void set_page_owner_migrate_reason(struct page *page, int reason) if (static_branch_unlikely(&page_owner_inited)) __set_page_owner_migrate_reason(page, reason); } -static inline void dump_page_owner(struct page *page) +static inline void dump_page_owner(struct page *page, const char *loglvl) { if (static_branch_unlikely(&page_owner_inited)) - __dump_page_owner(page); + __dump_page_owner(page, loglvl); } #else static inline void reset_page_owner(struct page *page, unsigned int order) @@ -69,7 +69,7 @@ static inline void copy_page_owner(struct page *oldpage, struct page *newpage) static inline void set_page_owner_migrate_reason(struct page *page, int reason) { } -static inline void dump_page_owner(struct page *page) +static inline void dump_page_owner(struct page *page, const char *loglvl) { } #endif /* CONFIG_PAGE_OWNER */ diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index 9f8117c7cfdd..1b13135d9916 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -14,6 +14,18 @@ #include <linux/kallsyms.h> #include <linux/stacktrace.h> +void __stack_trace_print(const unsigned long *entries, unsigned int nr_entries, + int spacesconst, char *loglvl) +{ + unsigned int i; + + if (WARN_ON(!entries)) + return; + + for (i = 0; i < nr_entries; i++) + printk("%s%*c%pS\n", loglvl, 1 + spaces, ' ', (void *)entries[i]); +} + /** * stack_trace_print - Print the entries in the stack trace * @entries: Pointer to storage array @@ -23,13 +35,7 @@ void stack_trace_print(const unsigned long *entries, unsigned int nr_entries, int spaces) { - unsigned int i; - - if (WARN_ON(!entries)) - return; - - for (i = 0; i < nr_entries; i++) - printk("%*c%pS\n", 1 + spaces, ' ', (void *)entries[i]); + __stack_trace_print(entries, nr_entries, spaces, KERN_DEFAULT); } EXPORT_SYMBOL_GPL(stack_trace_print); diff --git a/mm/debug.c b/mm/debug.c index 8a40b3fefbeb..b989ee2ffa89 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -42,7 +42,7 @@ const struct trace_print_flags vmaflag_names[] = { {0, NULL} }; -void __dump_page(struct page *page, const char *reason) +void __dump_page(struct page *page, const char *reason, const char *loglvl) { struct page *head = compound_head(page); struct address_space *mapping; @@ -64,7 +64,7 @@ void __dump_page(struct page *page, const char *reason) * dump_page() when detected. */ if (page_poisoned) { - pr_warn("page:%px is uninitialized and poisoned", page); + printk("%spage:%px is uninitialized and poisoned", loglvl, page); goto hex_only; } @@ -95,17 +95,17 @@ void __dump_page(struct page *page, const char *reason) */ mapcount = PageSlab(head) ? 0 : page_mapcount(page); - pr_warn("page:%p refcount:%d mapcount:%d mapping:%p index:%#lx pfn:%#lx\n", + printk("%spage:%p refcount:%d mapcount:%d mapping:%p index:%#lx pfn:%#lx\n", loglvl, page, page_ref_count(head), mapcount, mapping, page_to_pgoff(page), page_to_pfn(page)); if (compound) { if (hpage_pincount_available(page)) { - pr_warn("head:%p order:%u compound_mapcount:%d compound_pincount:%d\n", + printk("%shead:%p order:%u compound_mapcount:%d compound_pincount:%d\n", loglvl, head, compound_order(head), head_compound_mapcount(head), head_compound_pincount(head)); } else { - pr_warn("head:%p order:%u compound_mapcount:%d\n", + printk("%shead:%p order:%u compound_mapcount:%d\n", loglvl, head, compound_order(head), head_compound_mapcount(head)); } @@ -128,30 +128,31 @@ void __dump_page(struct page *page, const char *reason) */ if (get_kernel_nofault(host, &mapping->host) || get_kernel_nofault(a_ops, &mapping->a_ops)) { - pr_warn("failed to read mapping contents, not a valid kernel address?\n"); + prrintk("%sfailed to read mapping contents, not a valid kernel address?\n", + loglvl); goto out_mapping; } if (!host) { - pr_warn("aops:%ps\n", a_ops); + printk("%saops:%ps\n", loglvl, a_ops); goto out_mapping; } if (get_kernel_nofault(dentry_first, &host->i_dentry.first) || get_kernel_nofault(ino, &host->i_ino)) { - pr_warn("aops:%ps with invalid host inode %px\n", + printk("%saops:%ps with invalid host inode %px\n", loglvl, a_ops, host); goto out_mapping; } if (!dentry_first) { - pr_warn("aops:%ps ino:%lx\n", a_ops, ino); + printk("%saops:%ps ino:%lx\n", loglvl, a_ops, ino); goto out_mapping; } dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias); if (get_kernel_nofault(dentry, dentry_ptr)) { - pr_warn("aops:%ps ino:%lx with invalid dentry %px\n", + printk("%saops:%ps ino:%lx with invalid dentry %px\n", loglvl, a_ops, ino, dentry_ptr); } else { /* @@ -159,38 +160,38 @@ void __dump_page(struct page *page, const char *reason) * crash, but it's unlikely that we reach here with a * corrupted struct page */ - pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", + printk("%saops:%ps ino:%lx dentry name:\"%pd\"\n", loglvl, a_ops, ino, &dentry); } } out_mapping: BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1); - pr_warn("%sflags: %#lx(%pGp)%s\n", type, head->flags, &head->flags, + printk("%s%sflags: %#lx(%pGp)%s\n", loglvl, type, head->flags, &head->flags, page_cma ? " CMA" : ""); hex_only: - print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32, + print_hex_dump(loglvl, "%sraw: ", DUMP_PREFIX_NONE, 32, sizeof(unsigned long), page, sizeof(struct page), false); if (head != page) - print_hex_dump(KERN_WARNING, "head: ", DUMP_PREFIX_NONE, 32, + print_hex_dump(loglvl, "head: ", DUMP_PREFIX_NONE, 32, sizeof(unsigned long), head, sizeof(struct page), false); if (reason) - pr_warn("page dumped because: %s\n", reason); + printk("%spage dumped because: %s\n", loglvl, reason); #ifdef CONFIG_MEMCG if (!page_poisoned && page->memcg_data) - pr_warn("pages's memcg:%lx\n", page->memcg_data); + printk("%spages's memcg:%lx\n", loglvl, page->memcg_data); #endif } void dump_page(struct page *page, const char *reason) { - __dump_page(page, reason); - dump_page_owner(page); + __dump_page(page, reason, KERN_WARNING); + dump_page_owner(page, KERN_ALERT); } EXPORT_SYMBOL(dump_page); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 519a60d5b6f7..20b500f76667 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -637,7 +637,7 @@ static void bad_page(struct page *page, const char *reason) pr_alert("BUG: Bad page state in process %s pfn:%05lx\n", current->comm, page_to_pfn(page)); __dump_page(page, reason); - dump_page_owner(page); + dump_page_owner(page, KERN_ALERT); print_modules(); dump_stack(); diff --git a/mm/page_owner.c b/mm/page_owner.c index af464bb7fbe7..ff5908605925 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -406,7 +406,7 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn, return -ENOMEM; } -void __dump_page_owner(struct page *page) +void __dump_page_owner(struct page *page, const char *loglvl) { struct page_ext *page_ext = lookup_page_ext(page); struct page_owner *page_owner; @@ -417,7 +417,7 @@ void __dump_page_owner(struct page *page) int mt; if (unlikely(!page_ext)) { - pr_alert("There is not page extension available.\n"); + printk("There is not page extension available.\n", loglvl); return; } @@ -426,38 +426,38 @@ void __dump_page_owner(struct page *page) mt = gfp_migratetype(gfp_mask); if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) { - pr_alert("page_owner info is not present (never set?)\n"); + printk("%spage_owner info is not present (never set?)\n", loglvl); return; } if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) - pr_alert("page_owner tracks the page as allocated\n"); + printk("%spage_owner tracks the page as allocated\n", loglvl); else - pr_alert("page_owner tracks the page as freed\n"); + printk("page_owner tracks the page as freed\n", loglvl); - pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, ts %llu\n", + printk("%spage last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, ts %llu\n", loglvl, page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask, page_owner->pid, page_owner->ts_nsec); handle = READ_ONCE(page_owner->handle); if (!handle) { - pr_alert("page_owner allocation stack trace missing\n"); + printk("%spage_owner allocation stack trace missing\n", loglvl); } else { nr_entries = stack_depot_fetch(handle, &entries); - stack_trace_print(entries, nr_entries, 0); + __stack_trace_print(entries, nr_entries, 0, loglvl); } handle = READ_ONCE(page_owner->free_handle); if (!handle) { - pr_alert("page_owner free stack trace missing\n"); + printk("%spage_owner free stack trace missing\n", loglvl); } else { nr_entries = stack_depot_fetch(handle, &entries); - pr_alert("page last free stack trace:\n"); - stack_trace_print(entries, nr_entries, 0); + printk("page last free stack trace:\n", loglvl); + __stack_trace_print(entries, nr_entries, 0, loglvl); } if (page_owner->last_migrate_reason != -1) - pr_alert("page has been migrated, last migrate reason: %s\n", + printk("%spage has been migrated, last migrate reason: %s\n", loglvl, migrate_reason_names[page_owner->last_migrate_reason]); }
On Wed, Mar 10, 2021 at 05:46:56PM +0100, Michal Hocko wrote: > On Wed 10-03-21 08:05:36, Minchan Kim wrote: > > On Wed, Mar 10, 2021 at 02:07:05PM +0100, Michal Hocko wrote: > [...] > > > The is a lot of churn indeed. Have you considered adding $FOO_lglvl > > > variants for those so that you can use them for your particular case > > > without affecting most of existing users? Something similar we have > > > discussed in other email thread regarding lru_add_drain_all? > > > > I thought that way but didn't try since it couldn't make them > > atomic(For example, other printk place in other context will > > affect by the $FOO_lglvl). > > I do not follow. I meant something like the following (likely incomplete > but you should get an idea). Oh, I thought you wanted to override loglevel temporally. old_lvl = save_printk_lvl(new level); dump_page(); restore_printk_lvl(old_lvl); > > diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h > index 3468794f83d2..71b402eb8f78 100644 > --- a/include/linux/page_owner.h > +++ b/include/linux/page_owner.h > @@ -14,7 +14,7 @@ extern void __set_page_owner(struct page *page, > extern void __split_page_owner(struct page *page, unsigned int nr); > extern void __copy_page_owner(struct page *oldpage, struct page *newpage); > extern void __set_page_owner_migrate_reason(struct page *page, int reason); > -extern void __dump_page_owner(struct page *page); > +extern void __dump_page_owner(struct page *page, const char *loglvl); > extern void pagetypeinfo_showmixedcount_print(struct seq_file *m, > pg_data_t *pgdat, struct zone *zone); > > @@ -46,10 +46,10 @@ static inline void set_page_owner_migrate_reason(struct page *page, int reason) > if (static_branch_unlikely(&page_owner_inited)) > __set_page_owner_migrate_reason(page, reason); > } > -static inline void dump_page_owner(struct page *page) > +static inline void dump_page_owner(struct page *page, const char *loglvl) > { > if (static_branch_unlikely(&page_owner_inited)) > - __dump_page_owner(page); > + __dump_page_owner(page, loglvl); > } > #else > static inline void reset_page_owner(struct page *page, unsigned int order) > @@ -69,7 +69,7 @@ static inline void copy_page_owner(struct page *oldpage, struct page *newpage) > static inline void set_page_owner_migrate_reason(struct page *page, int reason) > { > } > -static inline void dump_page_owner(struct page *page) > +static inline void dump_page_owner(struct page *page, const char *loglvl) > { > } > #endif /* CONFIG_PAGE_OWNER */ > diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c > index 9f8117c7cfdd..1b13135d9916 100644 > --- a/kernel/stacktrace.c > +++ b/kernel/stacktrace.c > @@ -14,6 +14,18 @@ > #include <linux/kallsyms.h> > #include <linux/stacktrace.h> > > +void __stack_trace_print(const unsigned long *entries, unsigned int nr_entries, > + int spacesconst, char *loglvl) > +{ > + unsigned int i; > + > + if (WARN_ON(!entries)) > + return; > + > + for (i = 0; i < nr_entries; i++) > + printk("%s%*c%pS\n", loglvl, 1 + spaces, ' ', (void *)entries[i]); > +} That's exactly I did with introducing pr_loglevel. I wanted to address *all places* to use dump_page and stack_trace_print since some folks might ask me to fix all the broken place all at once. I'm getting tired with such hassle. void dump_page(const char *log_lvl, struct page *page, const char *reason) { __dump_page(log_lvl, page, reason); dump_page_owner(log_lvl, page); } EXPORT_SYMBOL(dump_page); /** * pr_loglevel - Print an loglevel message * @level: loglevel * @fmt: format string * @...: arguments for the format string * * This macro expands to a printk with @loglevel. It uses pr_fmt() to * generate the format string. */ #define pr_loglevel(level, fmt, ...) \ printk("%s" pr_fmt(fmt), level, ##__VA_ARGS__) void __dump_page(const char *log_lvl, struct page *page, const char *reason) { .. .. if (page_poisoned) { pr_loglevel(log_lvl, "page:%px is uninitialized and poisoned", page); goto hex_only; } .. } static inline void dump_page_owner(const char *log_lvl, struct page *page) { if (static_branch_unlikely(&page_owner_inited)) __dump_page_owner(log_lvl, page); } void stack_trace_print(const char *log_lvl, const unsigned long *entries, unsigned int nr_entries, int spaces) { unsigned int i; if (WARN_ON(!entries)) return; for (i = 0; i < nr_entries; i++) pr_loglevel(log_lvl, "%*c%pS\n", 1 + spaces, ' ', (void *)entries[i]); } EXPORT_SYMBOL_GPL(stack_trace_print);
On Wed, Mar 10, 2021 at 09:06:48AM -0800, Minchan Kim wrote: > On Wed, Mar 10, 2021 at 05:46:56PM +0100, Michal Hocko wrote: > > On Wed 10-03-21 08:05:36, Minchan Kim wrote: > > > On Wed, Mar 10, 2021 at 02:07:05PM +0100, Michal Hocko wrote: > > [...] > > > > The is a lot of churn indeed. Have you considered adding $FOO_lglvl > > > > variants for those so that you can use them for your particular case > > > > without affecting most of existing users? Something similar we have > > > > discussed in other email thread regarding lru_add_drain_all? > > > > > > I thought that way but didn't try since it couldn't make them > > > atomic(For example, other printk place in other context will > > > affect by the $FOO_lglvl). > > > > I do not follow. I meant something like the following (likely incomplete > > but you should get an idea). > > Oh, I thought you wanted to override loglevel temporally. > > old_lvl = save_printk_lvl(new level); > dump_page(); > restore_printk_lvl(old_lvl); > > > > > diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h > > index 3468794f83d2..71b402eb8f78 100644 > > --- a/include/linux/page_owner.h > > +++ b/include/linux/page_owner.h > > @@ -14,7 +14,7 @@ extern void __set_page_owner(struct page *page, > > extern void __split_page_owner(struct page *page, unsigned int nr); > > extern void __copy_page_owner(struct page *oldpage, struct page *newpage); > > extern void __set_page_owner_migrate_reason(struct page *page, int reason); > > -extern void __dump_page_owner(struct page *page); > > +extern void __dump_page_owner(struct page *page, const char *loglvl); > > extern void pagetypeinfo_showmixedcount_print(struct seq_file *m, > > pg_data_t *pgdat, struct zone *zone); > > > > @@ -46,10 +46,10 @@ static inline void set_page_owner_migrate_reason(struct page *page, int reason) > > if (static_branch_unlikely(&page_owner_inited)) > > __set_page_owner_migrate_reason(page, reason); > > } > > -static inline void dump_page_owner(struct page *page) > > +static inline void dump_page_owner(struct page *page, const char *loglvl) > > { > > if (static_branch_unlikely(&page_owner_inited)) > > - __dump_page_owner(page); > > + __dump_page_owner(page, loglvl); > > } > > #else > > static inline void reset_page_owner(struct page *page, unsigned int order) > > @@ -69,7 +69,7 @@ static inline void copy_page_owner(struct page *oldpage, struct page *newpage) > > static inline void set_page_owner_migrate_reason(struct page *page, int reason) > > { > > } > > -static inline void dump_page_owner(struct page *page) > > +static inline void dump_page_owner(struct page *page, const char *loglvl) > > { > > } > > #endif /* CONFIG_PAGE_OWNER */ > > diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c > > index 9f8117c7cfdd..1b13135d9916 100644 > > --- a/kernel/stacktrace.c > > +++ b/kernel/stacktrace.c > > @@ -14,6 +14,18 @@ > > #include <linux/kallsyms.h> > > #include <linux/stacktrace.h> > > > > +void __stack_trace_print(const unsigned long *entries, unsigned int nr_entries, > > + int spacesconst, char *loglvl) > > +{ > > + unsigned int i; > > + > > + if (WARN_ON(!entries)) > > + return; > > + > > + for (i = 0; i < nr_entries; i++) > > + printk("%s%*c%pS\n", loglvl, 1 + spaces, ' ', (void *)entries[i]); > > +} > > That's exactly I did with introducing pr_loglevel. I wanted to address > *all places* to use dump_page and stack_trace_print since some folks > might ask me to fix all the broken place all at once. I'm getting tired > with such hassle. > > void dump_page(const char *log_lvl, struct page *page, const char *reason) > { > __dump_page(log_lvl, page, reason); > dump_page_owner(log_lvl, page); > } > EXPORT_SYMBOL(dump_page); Since it's good to have regardless of the patch, I posted next revision with removeing ratelimit and put something more in description to proceed the work. Posted v3 - https://lore.kernel.org/linux-mm/20210310180104.517886-1-minchan@kernel.org/
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3e4b29ee2b1e..bb0aeca2069c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8453,6 +8453,34 @@ static unsigned long pfn_max_align_up(unsigned long pfn) pageblock_nr_pages)); } +#if defined(CONFIG_DYNAMIC_DEBUG) || \ + (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) +static DEFINE_RATELIMIT_STATE(alloc_contig_ratelimit_state, + DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); +int alloc_contig_ratelimit(void) +{ + return __ratelimit(&alloc_contig_ratelimit_state); +} + +void dump_migrate_failure_pages(struct list_head *page_list) +{ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, + "migrate failure"); + if (DYNAMIC_DEBUG_BRANCH(descriptor) && + alloc_contig_ratelimit()) { + struct page *page; + + WARN(1, "failed callstack"); + list_for_each_entry(page, page_list, lru) + dump_page(page, "migration failure"); + } +} +#else +static inline void dump_migrate_failure_pages(struct list_head *page_list) +{ +} +#endif + /* [start, end) must belong to a single zone. */ static int __alloc_contig_migrate_range(struct compact_control *cc, unsigned long start, unsigned long end) @@ -8496,6 +8524,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE); } if (ret < 0) { + dump_migrate_failure_pages(&cc->migratepages); putback_movable_pages(&cc->migratepages); return ret; }
alloc_contig_range is usually used on cma area or movable zone. It's critical if the page migration fails on those areas so dump more debugging message. page refcount, mapcount with page flags on dump_page are helpful information to deduce the culprit. Furthermore, dump_page_owner was super helpful to find long term pinner who initiated the page allocation. Admin could enable the dump like this(by default, disabled) echo "func dump_migrate_failure_pages +p" > control Admin could disable it. echo "func dump_migrate_failure_pages =_" > control Signed-off-by: Minchan Kim <minchan@kernel.org> --- * from v1 - https://lore.kernel.org/linux-mm/20210217163603.429062-1-minchan@kernel.org/ * use dynamic debugging with system wide instead of per-call site - mhocko mm/page_alloc.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+)