diff mbox series

[v2] mm: page_alloc: dump migrate-failed pages

Message ID 20210308202047.1903802-1-minchan@kernel.org (mailing list archive)
State New, archived
Headers show
Series [v2] mm: page_alloc: dump migrate-failed pages | expand

Commit Message

Minchan Kim March 8, 2021, 8:20 p.m. UTC
alloc_contig_range is usually used on cma area or movable zone.
It's critical if the page migration fails on those areas so
dump more debugging message.

page refcount, mapcount with page flags on dump_page are
helpful information to deduce the culprit. Furthermore,
dump_page_owner was super helpful to find long term pinner
who initiated the page allocation.

Admin could enable the dump like this(by default, disabled)

	echo "func dump_migrate_failure_pages +p" > control

Admin could disable it.

	echo "func dump_migrate_failure_pages =_" > control

Signed-off-by: Minchan Kim <minchan@kernel.org>
---
* from v1 - https://lore.kernel.org/linux-mm/20210217163603.429062-1-minchan@kernel.org/
  * use dynamic debugging with system wide instead of per-call site - mhocko

 mm/page_alloc.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

Comments

kernel test robot March 8, 2021, 9:29 p.m. UTC | #1
Hi Minchan,

I love your patch! Perhaps something to improve:

[auto build test WARNING on hnaz-linux-mm/master]

url:    https://github.com/0day-ci/linux/commits/Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205
base:   https://github.com/hnaz/linux-mm master
config: m68k-allmodconfig (attached as .config)
compiler: m68k-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/3c635af37b862e9c601ee8d5818f7da9cd3e2e57
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205
        git checkout 3c635af37b862e9c601ee8d5818f7da9cd3e2e57
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=m68k 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra]
     169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
         |                                                 ^~
   include/linux/compiler.h:78:42: note: in definition of macro 'unlikely'
      78 | # define unlikely(x) __builtin_expect(!!(x), 0)
         |                                          ^
   include/linux/scatterlist.h:143:2: note: in expansion of macro 'BUG_ON'
     143 |  BUG_ON(!virt_addr_valid(buf));
         |  ^~~~~~
   include/linux/scatterlist.h:143:10: note: in expansion of macro 'virt_addr_valid'
     143 |  BUG_ON(!virt_addr_valid(buf));
         |          ^~~~~~~~~~~~~~~
   In file included from arch/m68k/include/asm/page.h:60,
                    from arch/m68k/include/asm/thread_info.h:6,
                    from include/linux/thread_info.h:38,
                    from include/asm-generic/preempt.h:5,
                    from ./arch/m68k/include/generated/asm/preempt.h:1,
                    from include/linux/preempt.h:78,
                    from include/linux/spinlock.h:51,
                    from include/linux/mmzone.h:8,
                    from include/linux/gfp.h:6,
                    from include/linux/mm.h:10,
                    from mm/page_alloc.c:19:
   mm/internal.h: In function 'mem_map_next':
   arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra]
     169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
         |                                                 ^~
   arch/m68k/include/asm/page_mm.h:170:25: note: in expansion of macro 'virt_addr_valid'
     170 | #define pfn_valid(pfn)  virt_addr_valid(pfn_to_virt(pfn))
         |                         ^~~~~~~~~~~~~~~
   mm/internal.h:456:8: note: in expansion of macro 'pfn_valid'
     456 |   if (!pfn_valid(pfn))
         |        ^~~~~~~~~
   mm/page_alloc.c: In function 'reserve_bootmem_region':
   arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra]
     169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
         |                                                 ^~
   arch/m68k/include/asm/page_mm.h:170:25: note: in expansion of macro 'virt_addr_valid'
     170 | #define pfn_valid(pfn)  virt_addr_valid(pfn_to_virt(pfn))
         |                         ^~~~~~~~~~~~~~~
   mm/page_alloc.c:1444:7: note: in expansion of macro 'pfn_valid'
    1444 |   if (pfn_valid(start_pfn)) {
         |       ^~~~~~~~~
   mm/page_alloc.c: In function '__pageblock_pfn_to_page':
   arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra]
     169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
         |                                                 ^~
   arch/m68k/include/asm/page_mm.h:170:25: note: in expansion of macro 'virt_addr_valid'
     170 | #define pfn_valid(pfn)  virt_addr_valid(pfn_to_virt(pfn))
         |                         ^~~~~~~~~~~~~~~
   mm/page_alloc.c:1576:7: note: in expansion of macro 'pfn_valid'
    1576 |  if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
         |       ^~~~~~~~~
   arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra]
     169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
         |                                                 ^~
   arch/m68k/include/asm/page_mm.h:170:25: note: in expansion of macro 'virt_addr_valid'
     170 | #define pfn_valid(pfn)  virt_addr_valid(pfn_to_virt(pfn))
         |                         ^~~~~~~~~~~~~~~
   mm/page_alloc.c:1576:32: note: in expansion of macro 'pfn_valid'
    1576 |  if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
         |                                ^~~~~~~~~
   arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra]
     169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
         |                                                 ^~
   arch/m68k/include/asm/page_mm.h:170:25: note: in expansion of macro 'virt_addr_valid'
     170 | #define pfn_valid(pfn)  virt_addr_valid(pfn_to_virt(pfn))
         |                         ^~~~~~~~~~~~~~~
   include/linux/memory_hotplug.h:242:6: note: in expansion of macro 'pfn_valid'
     242 |  if (pfn_valid(pfn))   \
         |      ^~~~~~~~~
   mm/page_alloc.c:1579:15: note: in expansion of macro 'pfn_to_online_page'
    1579 |  start_page = pfn_to_online_page(start_pfn);
         |               ^~~~~~~~~~~~~~~~~~
   In file included from include/asm-generic/bug.h:5,
                    from arch/m68k/include/asm/bug.h:32,
                    from include/linux/bug.h:5,
                    from include/linux/mmdebug.h:5,
                    from include/linux/mm.h:9,
                    from mm/page_alloc.c:19:
   mm/page_alloc.c: In function 'free_pages':
   arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra]
     169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
         |                                                 ^~
   include/linux/compiler.h:78:42: note: in definition of macro 'unlikely'
      78 | # define unlikely(x) __builtin_expect(!!(x), 0)
         |                                          ^
   include/linux/mmdebug.h:18:25: note: in expansion of macro 'BUG_ON'
      18 | #define VM_BUG_ON(cond) BUG_ON(cond)
         |                         ^~~~~~
   mm/page_alloc.c:4953:3: note: in expansion of macro 'VM_BUG_ON'
    4953 |   VM_BUG_ON(!virt_addr_valid((void *)addr));
         |   ^~~~~~~~~
   mm/page_alloc.c:4953:14: note: in expansion of macro 'virt_addr_valid'
    4953 |   VM_BUG_ON(!virt_addr_valid((void *)addr));
         |              ^~~~~~~~~~~~~~~
   mm/page_alloc.c: At top level:
   mm/page_alloc.c:6122:23: warning: no previous prototype for 'memmap_init' [-Wmissing-prototypes]
    6122 | void __meminit __weak memmap_init(unsigned long size, int nid,
         |                       ^~~~~~~~~~~
>> mm/page_alloc.c:8348:5: warning: no previous prototype for 'alloc_contig_ratelimit' [-Wmissing-prototypes]
    8348 | int alloc_contig_ratelimit(void)
         |     ^~~~~~~~~~~~~~~~~~~~~~
>> mm/page_alloc.c:8353:6: warning: no previous prototype for 'dump_migrate_failure_pages' [-Wmissing-prototypes]
    8353 | void dump_migrate_failure_pages(struct list_head *page_list)
         |      ^~~~~~~~~~~~~~~~~~~~~~~~~~
   In file included from arch/m68k/include/asm/page.h:60,
                    from arch/m68k/include/asm/thread_info.h:6,
                    from include/linux/thread_info.h:38,
                    from include/asm-generic/preempt.h:5,
                    from ./arch/m68k/include/generated/asm/preempt.h:1,
                    from include/linux/preempt.h:78,
                    from include/linux/spinlock.h:51,
                    from include/linux/mmzone.h:8,
                    from include/linux/gfp.h:6,
                    from include/linux/mm.h:10,
                    from mm/page_alloc.c:19:
   mm/page_alloc.c: In function 'pfn_range_valid_contig':
   arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra]
     169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
         |                                                 ^~
   arch/m68k/include/asm/page_mm.h:170:25: note: in expansion of macro 'virt_addr_valid'
     170 | #define pfn_valid(pfn)  virt_addr_valid(pfn_to_virt(pfn))
         |                         ^~~~~~~~~~~~~~~
   include/linux/memory_hotplug.h:242:6: note: in expansion of macro 'pfn_valid'
     242 |  if (pfn_valid(pfn))   \
         |      ^~~~~~~~~
   mm/page_alloc.c:8592:10: note: in expansion of macro 'pfn_to_online_page'
    8592 |   page = pfn_to_online_page(i);
         |          ^~~~~~~~~~~~~~~~~~

Kconfig warnings: (for reference only)
   WARNING: unmet direct dependencies detected for NEED_MULTIPLE_NODES
   Depends on DISCONTIGMEM || NUMA
   Selected by
   - SINGLE_MEMORY_CHUNK && MMU


vim +/alloc_contig_ratelimit +8348 mm/page_alloc.c

  8343	
  8344	#if defined(CONFIG_DYNAMIC_DEBUG) || \
  8345		(defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
  8346	static DEFINE_RATELIMIT_STATE(alloc_contig_ratelimit_state,
  8347			DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
> 8348	int alloc_contig_ratelimit(void)
  8349	{
  8350		return __ratelimit(&alloc_contig_ratelimit_state);
  8351	}
  8352	
> 8353	void dump_migrate_failure_pages(struct list_head *page_list)
  8354	{
  8355		DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
  8356				"migrate failure");
  8357		if (DYNAMIC_DEBUG_BRANCH(descriptor) &&
  8358				alloc_contig_ratelimit()) {
  8359			struct page *page;
  8360	
  8361			WARN(1, "failed callstack");
  8362			list_for_each_entry(page, page_list, lru)
  8363				dump_page(page, "migration failure");
  8364		}
  8365	}
  8366	#else
  8367	static inline void dump_migrate_failure_pages(struct list_head *page_list)
  8368	{
  8369	}
  8370	#endif
  8371	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Minchan Kim March 8, 2021, 10:29 p.m. UTC | #2
On Tue, Mar 09, 2021 at 05:29:30AM +0800, kernel test robot wrote:
> Hi Minchan,
> 
> I love your patch! Perhaps something to improve:
> 
> [auto build test WARNING on hnaz-linux-mm/master]
> 
> url:    https://github.com/0day-ci/linux/commits/Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205
> base:   https://github.com/hnaz/linux-mm master
> config: m68k-allmodconfig (attached as .config)
> compiler: m68k-linux-gcc (GCC) 9.3.0
> reproduce (this is a W=1 build):
>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # https://github.com/0day-ci/linux/commit/3c635af37b862e9c601ee8d5818f7da9cd3e2e57
>         git remote add linux-review https://github.com/0day-ci/linux
>         git fetch --no-tags linux-review Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205
>         git checkout 3c635af37b862e9c601ee8d5818f7da9cd3e2e57
>         # save the attached .config to linux build tree
>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=m68k 
> 
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
> 
> All warnings (new ones prefixed by >>):
> 
>    arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra]
>      169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
>          |                                                 ^~
>    include/linux/compiler.h:78:42: note: in definition of macro 'unlikely'
>       78 | # define unlikely(x) __builtin_expect(!!(x), 0)
>          |                                          ^
>    include/linux/scatterlist.h:143:2: note: in expansion of macro 'BUG_ON'
>      143 |  BUG_ON(!virt_addr_valid(buf));
>          |  ^~~~~~
>    include/linux/scatterlist.h:143:10: note: in expansion of macro 'virt_addr_valid'
>      143 |  BUG_ON(!virt_addr_valid(buf));
>          |          ^~~~~~~~~~~~~~~
>    In file included from arch/m68k/include/asm/page.h:60,
>                     from arch/m68k/include/asm/thread_info.h:6,
>                     from include/linux/thread_info.h:38,
>                     from include/asm-generic/preempt.h:5,
>                     from ./arch/m68k/include/generated/asm/preempt.h:1,
>                     from include/linux/preempt.h:78,
>                     from include/linux/spinlock.h:51,
>                     from include/linux/mmzone.h:8,
>                     from include/linux/gfp.h:6,
>                     from include/linux/mm.h:10,
>                     from mm/page_alloc.c:19:

I am not sure this is triggered by the patch since I could see the
warn with reverting the patch.
Andrew Morton March 9, 2021, 12:21 a.m. UTC | #3
On Mon,  8 Mar 2021 12:20:47 -0800 Minchan Kim <minchan@kernel.org> wrote:

> alloc_contig_range is usually used on cma area or movable zone.
> It's critical if the page migration fails on those areas so
> dump more debugging message.
> 
> page refcount, mapcount with page flags on dump_page are
> helpful information to deduce the culprit. Furthermore,
> dump_page_owner was super helpful to find long term pinner
> who initiated the page allocation.
> 
> Admin could enable the dump like this(by default, disabled)
> 
> 	echo "func dump_migrate_failure_pages +p" > control
> 
> Admin could disable it.
> 
> 	echo "func dump_migrate_failure_pages =_" > control
> 
> ...
>
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -8453,6 +8453,34 @@ static unsigned long pfn_max_align_up(unsigned long pfn)
>  				pageblock_nr_pages));
>  }
>  
> +#if defined(CONFIG_DYNAMIC_DEBUG) || \
> +	(defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
> +static DEFINE_RATELIMIT_STATE(alloc_contig_ratelimit_state,
> +		DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
> +int alloc_contig_ratelimit(void)
> +{
> +	return __ratelimit(&alloc_contig_ratelimit_state);
> +}

Wow, that's an eyesore.  We're missing helpers in the ratelimit code. 
Can we do something like

/* description goes here */
#define RATELIMIT2(interval, burst)
({
	static DEFINE_RATELIMIT_STATE(_rs, interval, burst);

	__ratelimit(_rs);
})

#define RATELIMIT()
	RATELIMIT2(DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST)

> +void dump_migrate_failure_pages(struct list_head *page_list)
> +{
> +	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
> +			"migrate failure");
> +	if (DYNAMIC_DEBUG_BRANCH(descriptor) &&
> +			alloc_contig_ratelimit()) {
> +		struct page *page;
> +
> +		WARN(1, "failed callstack");
> +		list_for_each_entry(page, page_list, lru)
> +			dump_page(page, "migration failure");
> +	}
> +}

Then we can simply do

	if (DYNAMIC_DEBUG_BRANCH(descriptor) && RATELIMIT())
kernel test robot March 9, 2021, 12:30 a.m. UTC | #4
Hi Minchan,

I love your patch! Perhaps something to improve:

[auto build test WARNING on hnaz-linux-mm/master]

url:    https://github.com/0day-ci/linux/commits/Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205
base:   https://github.com/hnaz/linux-mm master
config: i386-randconfig-s001-20210309 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.3-262-g5e674421-dirty
        # https://github.com/0day-ci/linux/commit/3c635af37b862e9c601ee8d5818f7da9cd3e2e57
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205
        git checkout 3c635af37b862e9c601ee8d5818f7da9cd3e2e57
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


"sparse warnings: (new ones prefixed by >>)"
>> mm/page_alloc.c:8348:5: sparse: sparse: symbol 'alloc_contig_ratelimit' was not declared. Should it be static?
>> mm/page_alloc.c:8353:6: sparse: sparse: symbol 'dump_migrate_failure_pages' was not declared. Should it be static?
   mm/page_alloc.c: note: in included file (through include/linux/mm.h):
   include/linux/gfp.h:325:27: sparse: sparse: restricted gfp_t degrades to integer
   include/linux/gfp.h:325:27: sparse: sparse: restricted gfp_t degrades to integer
   include/linux/gfp.h:325:27: sparse: sparse: restricted gfp_t degrades to integer
   include/linux/gfp.h:325:27: sparse: sparse: restricted gfp_t degrades to integer

Please review and possibly fold the followup patch.

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Chen, Rong A March 9, 2021, 12:41 a.m. UTC | #5
On 3/9/21 6:29 AM, Minchan Kim wrote:
> On Tue, Mar 09, 2021 at 05:29:30AM +0800, kernel test robot wrote:
>> Hi Minchan,
>>
>> I love your patch! Perhaps something to improve:
>>
>> [auto build test WARNING on hnaz-linux-mm/master]
>>
>> url:    https://github.com/0day-ci/linux/commits/Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205
>> base:   https://github.com/hnaz/linux-mm master
>> config: m68k-allmodconfig (attached as .config)
>> compiler: m68k-linux-gcc (GCC) 9.3.0
>> reproduce (this is a W=1 build):
>>          wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>>          chmod +x ~/bin/make.cross
>>          # https://github.com/0day-ci/linux/commit/3c635af37b862e9c601ee8d5818f7da9cd3e2e57
>>          git remote add linux-review https://github.com/0day-ci/linux
>>          git fetch --no-tags linux-review Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205
>>          git checkout 3c635af37b862e9c601ee8d5818f7da9cd3e2e57
>>          # save the attached .config to linux build tree
>>          COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=m68k
>>
>> If you fix the issue, kindly add following tag as appropriate
>> Reported-by: kernel test robot <lkp@intel.com>
>>
>> All warnings (new ones prefixed by >>):
>>
>>     arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra]
>>       169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
>>           |                                                 ^~
>>     include/linux/compiler.h:78:42: note: in definition of macro 'unlikely'
>>        78 | # define unlikely(x) __builtin_expect(!!(x), 0)
>>           |                                          ^
>>     include/linux/scatterlist.h:143:2: note: in expansion of macro 'BUG_ON'
>>       143 |  BUG_ON(!virt_addr_valid(buf));
>>           |  ^~~~~~
>>     include/linux/scatterlist.h:143:10: note: in expansion of macro 'virt_addr_valid'
>>       143 |  BUG_ON(!virt_addr_valid(buf));
>>           |          ^~~~~~~~~~~~~~~
>>     In file included from arch/m68k/include/asm/page.h:60,
>>                      from arch/m68k/include/asm/thread_info.h:6,
>>                      from include/linux/thread_info.h:38,
>>                      from include/asm-generic/preempt.h:5,
>>                      from ./arch/m68k/include/generated/asm/preempt.h:1,
>>                      from include/linux/preempt.h:78,
>>                      from include/linux/spinlock.h:51,
>>                      from include/linux/mmzone.h:8,
>>                      from include/linux/gfp.h:6,
>>                      from include/linux/mm.h:10,
>>                      from mm/page_alloc.c:19:
> I am not sure this is triggered by the patch since I could see the
> warn with reverting the patch.

Hi Minchan,

Only the lines prefixed by ">>" are related with the patch:

>> mm/page_alloc.c:8348:5: warning: no previous prototype for 'alloc_contig_ratelimit' [-Wmissing-prototypes]

     8348 | int alloc_contig_ratelimit(void)
          |     ^~~~~~~~~~~~~~~~~~~~~~

>> mm/page_alloc.c:8353:6: warning: no previous prototype for 'dump_migrate_failure_pages' [-Wmissing-prototypes]

     8353 | void dump_migrate_failure_pages(struct list_head *page_list)



Best Regards,
Rong Chen
Minchan Kim March 9, 2021, 2:21 a.m. UTC | #6
On Mon, Mar 08, 2021 at 04:21:28PM -0800, Andrew Morton wrote:
> On Mon,  8 Mar 2021 12:20:47 -0800 Minchan Kim <minchan@kernel.org> wrote:
> 
> > alloc_contig_range is usually used on cma area or movable zone.
> > It's critical if the page migration fails on those areas so
> > dump more debugging message.
> > 
> > page refcount, mapcount with page flags on dump_page are
> > helpful information to deduce the culprit. Furthermore,
> > dump_page_owner was super helpful to find long term pinner
> > who initiated the page allocation.
> > 
> > Admin could enable the dump like this(by default, disabled)
> > 
> > 	echo "func dump_migrate_failure_pages +p" > control
> > 
> > Admin could disable it.
> > 
> > 	echo "func dump_migrate_failure_pages =_" > control
> > 
> > ...
> >
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -8453,6 +8453,34 @@ static unsigned long pfn_max_align_up(unsigned long pfn)
> >  				pageblock_nr_pages));
> >  }
> >  
> > +#if defined(CONFIG_DYNAMIC_DEBUG) || \
> > +	(defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
> > +static DEFINE_RATELIMIT_STATE(alloc_contig_ratelimit_state,
> > +		DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
> > +int alloc_contig_ratelimit(void)
> > +{
> > +	return __ratelimit(&alloc_contig_ratelimit_state);
> > +}
> 
> Wow, that's an eyesore.  We're missing helpers in the ratelimit code. 
> Can we do something like
> 
> /* description goes here */
> #define RATELIMIT2(interval, burst)
> ({
> 	static DEFINE_RATELIMIT_STATE(_rs, interval, burst);
> 
> 	__ratelimit(_rs);
> })
> 
> #define RATELIMIT()
> 	RATELIMIT2(DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST)
> 
> > +void dump_migrate_failure_pages(struct list_head *page_list)
> > +{
> > +	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
> > +			"migrate failure");
> > +	if (DYNAMIC_DEBUG_BRANCH(descriptor) &&
> > +			alloc_contig_ratelimit()) {
> > +		struct page *page;
> > +
> > +		WARN(1, "failed callstack");
> > +		list_for_each_entry(page, page_list, lru)
> > +			dump_page(page, "migration failure");
> > +	}
> > +}
> 
> Then we can simply do
> 
> 	if (DYNAMIC_DEBUG_BRANCH(descriptor) && RATELIMIT())

Sounds good idea to me. There are many places to take the
benefit. However, let me leave it until we could discuss
this patch. We could clean them up as follow patch.

Thank you.
Minchan Kim March 9, 2021, 2:23 a.m. UTC | #7
On Tue, Mar 09, 2021 at 08:41:44AM +0800, Rong Chen wrote:
> 
> 
> On 3/9/21 6:29 AM, Minchan Kim wrote:
> > On Tue, Mar 09, 2021 at 05:29:30AM +0800, kernel test robot wrote:
> > > Hi Minchan,
> > > 
> > > I love your patch! Perhaps something to improve:
> > > 
> > > [auto build test WARNING on hnaz-linux-mm/master]
> > > 
> > > url:    https://github.com/0day-ci/linux/commits/Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205
> > > base:   https://github.com/hnaz/linux-mm master
> > > config: m68k-allmodconfig (attached as .config)
> > > compiler: m68k-linux-gcc (GCC) 9.3.0
> > > reproduce (this is a W=1 build):
> > >          wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> > >          chmod +x ~/bin/make.cross
> > >          # https://github.com/0day-ci/linux/commit/3c635af37b862e9c601ee8d5818f7da9cd3e2e57
> > >          git remote add linux-review https://github.com/0day-ci/linux
> > >          git fetch --no-tags linux-review Minchan-Kim/mm-page_alloc-dump-migrate-failed-pages/20210309-042205
> > >          git checkout 3c635af37b862e9c601ee8d5818f7da9cd3e2e57
> > >          # save the attached .config to linux build tree
> > >          COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=m68k
> > > 
> > > If you fix the issue, kindly add following tag as appropriate
> > > Reported-by: kernel test robot <lkp@intel.com>
> > > 
> > > All warnings (new ones prefixed by >>):
> > > 
> > >     arch/m68k/include/asm/page_mm.h:169:49: warning: ordered comparison of pointer with null pointer [-Wextra]
> > >       169 | #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
> > >           |                                                 ^~
> > >     include/linux/compiler.h:78:42: note: in definition of macro 'unlikely'
> > >        78 | # define unlikely(x) __builtin_expect(!!(x), 0)
> > >           |                                          ^
> > >     include/linux/scatterlist.h:143:2: note: in expansion of macro 'BUG_ON'
> > >       143 |  BUG_ON(!virt_addr_valid(buf));
> > >           |  ^~~~~~
> > >     include/linux/scatterlist.h:143:10: note: in expansion of macro 'virt_addr_valid'
> > >       143 |  BUG_ON(!virt_addr_valid(buf));
> > >           |          ^~~~~~~~~~~~~~~
> > >     In file included from arch/m68k/include/asm/page.h:60,
> > >                      from arch/m68k/include/asm/thread_info.h:6,
> > >                      from include/linux/thread_info.h:38,
> > >                      from include/asm-generic/preempt.h:5,
> > >                      from ./arch/m68k/include/generated/asm/preempt.h:1,
> > >                      from include/linux/preempt.h:78,
> > >                      from include/linux/spinlock.h:51,
> > >                      from include/linux/mmzone.h:8,
> > >                      from include/linux/gfp.h:6,
> > >                      from include/linux/mm.h:10,
> > >                      from mm/page_alloc.c:19:
> > I am not sure this is triggered by the patch since I could see the
> > warn with reverting the patch.
> 
> Hi Minchan,
> 
> Only the lines prefixed by ">>" are related with the patch:
> 
> > > mm/page_alloc.c:8348:5: warning: no previous prototype for 'alloc_contig_ratelimit' [-Wmissing-prototypes]
> 
>     8348 | int alloc_contig_ratelimit(void)
>          |     ^~~~~~~~~~~~~~~~~~~~~~
> 
> > > mm/page_alloc.c:8353:6: warning: no previous prototype for 'dump_migrate_failure_pages' [-Wmissing-prototypes]
> 
>     8353 | void dump_migrate_failure_pages(struct list_head *page_list)
> 

Thanks for the clarification.

Then, below should fix the warning.
I will respin it at next revision once I get more feedback.

+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+       (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
+static void dump_migrate_failure_pages(struct list_head *page_list)
+{
+       static DEFINE_RATELIMIT_STATE(_rs,
+                                       DEFAULT_RATELIMIT_INTERVAL,
+                                       DEFAULT_RATELIMIT_BURST);
+
+       DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
+                       "migrate failure");
+       if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) {
+               struct page *page;
+
+               WARN(1, "failed callstack");
+               list_for_each_entry(page, page_list, lru)
+                       dump_page(page, "migration failure");
+       }
+}
+#else
+static inline void dump_migrate_failure_pages(struct list_head *page_list)
+{
+}
+#endif
+
 /* [start, end) must belong to a single zone. */
 static int __alloc_contig_migrate_range(struct compact_control *cc,
                                        unsigned long start, unsigned long end)
@@ -8496,6 +8520,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
                                NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE);
        }
        if (ret < 0) {
+               dump_migrate_failure_pages(&cc->migratepages);
                putback_movable_pages(&cc->migratepages);
                return ret;
        }
Michal Hocko March 9, 2021, 9:32 a.m. UTC | #8
On Mon 08-03-21 12:20:47, Minchan Kim wrote:
> alloc_contig_range is usually used on cma area or movable zone.
> It's critical if the page migration fails on those areas so
> dump more debugging message.

I disagree with this statement. alloc_contig_range is not a reliable
allocator. Any user, be it CMA or direct users of alloc_contig_range
have to deal with allocation failures. Debugging information can be
still useful but considering migration failures critical is
overstatement to say the least.

> page refcount, mapcount with page flags on dump_page are
> helpful information to deduce the culprit. Furthermore,
> dump_page_owner was super helpful to find long term pinner
> who initiated the page allocation.
> 
> Admin could enable the dump like this(by default, disabled)
> 
> 	echo "func dump_migrate_failure_pages +p" > control
> 
> Admin could disable it.
> 
> 	echo "func dump_migrate_failure_pages =_" > control

My original idea was to add few pr_debug and -DDYNAMIC_DEBUG_MODULE for
page_alloc.c. It makes sense to enable a whole bunch at once though.
The naming should better reflect this is alloc_contig_rage related
because the above sounds like a generic migration failure thing.

Somebody more familiar with the dynamic debugging infrastructure needs
to have a look but from from a quick look it seems ok.

Do we really need all the ugly ifdefery, though? Don't we want to have
this compiled in all the time and just rely on the static branch managed
by the dynamic debugging framework?
 
[...]
> +void dump_migrate_failure_pages(struct list_head *page_list)
> +{
> +	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
> +			"migrate failure");
> +	if (DYNAMIC_DEBUG_BRANCH(descriptor) &&
> +			alloc_contig_ratelimit()) {
> +		struct page *page;
> +
> +		WARN(1, "failed callstack");
> +		list_for_each_entry(page, page_list, lru)
> +			dump_page(page, "migration failure");
> +	}

Apart from the above, do we have to warn for something that is a
debugging aid? A similar concern wrt dump_page which uses pr_warn and
page owner is using even pr_alert.
Would it make sense to add a loglevel parameter both into __dump_page
and dump_page_owner?
Minchan Kim March 9, 2021, 4:15 p.m. UTC | #9
On Tue, Mar 09, 2021 at 10:32:51AM +0100, Michal Hocko wrote:
> On Mon 08-03-21 12:20:47, Minchan Kim wrote:
> > alloc_contig_range is usually used on cma area or movable zone.
> > It's critical if the page migration fails on those areas so
> > dump more debugging message.
> 
> I disagree with this statement. alloc_contig_range is not a reliable
> allocator. Any user, be it CMA or direct users of alloc_contig_range
> have to deal with allocation failures. Debugging information can be
> still useful but considering migration failures critical is
> overstatement to say the least.

Fair enough. Let's change it.

"Currently, debugging CMA allocation failure is too hard
due to lacking of page information. alloc_contig_range is
proper place to dump them since it has migrate-failed page
list."


> 
> > page refcount, mapcount with page flags on dump_page are
> > helpful information to deduce the culprit. Furthermore,
> > dump_page_owner was super helpful to find long term pinner
> > who initiated the page allocation.
> > 
> > Admin could enable the dump like this(by default, disabled)
> > 
> > 	echo "func dump_migrate_failure_pages +p" > control
> > 
> > Admin could disable it.
> > 
> > 	echo "func dump_migrate_failure_pages =_" > control
> 
> My original idea was to add few pr_debug and -DDYNAMIC_DEBUG_MODULE for
> page_alloc.c. It makes sense to enable a whole bunch at once though.
> The naming should better reflect this is alloc_contig_rage related
> because the above sounds like a generic migration failure thing.

alloc_contig_dump_pages?

> 
> Somebody more familiar with the dynamic debugging infrastructure needs
> to have a look but from from a quick look it seems ok.
> 
> Do we really need all the ugly ifdefery, though? Don't we want to have
> this compiled in all the time and just rely on the static branch managed
> by the dynamic debugging framework?

I have no further idea to make it simple while we keep the flexibility
for arguments and print format.

#if defined(CONFIG_DYNAMIC_DEBUG) || \
        (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
static void alloc_contig_dump_pages(struct list_head *page_list)
{
        static DEFINE_RATELIMIT_STATE(_rs,
                                        DEFAULT_RATELIMIT_INTERVAL,
                                        DEFAULT_RATELIMIT_BURST);

        DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
                        "migrate failure");
        if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) {
                struct page *page;

                WARN(1, "failed callstack");
                list_for_each_entry(page, page_list, lru)
                        dump_page(page, "migration failure");
        }
}
#else
static inline void alloc_contig_dump_pages(struct list_head *page_list)
{
}
#endif

>  
> [...]
> > +void dump_migrate_failure_pages(struct list_head *page_list)
> > +{
> > +	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
> > +			"migrate failure");
> > +	if (DYNAMIC_DEBUG_BRANCH(descriptor) &&
> > +			alloc_contig_ratelimit()) {
> > +		struct page *page;
> > +
> > +		WARN(1, "failed callstack");
> > +		list_for_each_entry(page, page_list, lru)
> > +			dump_page(page, "migration failure");
> > +	}
> 
> Apart from the above, do we have to warn for something that is a
> debugging aid? A similar concern wrt dump_page which uses pr_warn and

Make sense.

> page owner is using even pr_alert.
> Would it make sense to add a loglevel parameter both into __dump_page
> and dump_page_owner?

Let me try it.
Michal Hocko March 9, 2021, 4:32 p.m. UTC | #10
On Tue 09-03-21 08:15:41, Minchan Kim wrote:
> On Tue, Mar 09, 2021 at 10:32:51AM +0100, Michal Hocko wrote:
> > On Mon 08-03-21 12:20:47, Minchan Kim wrote:
> > > alloc_contig_range is usually used on cma area or movable zone.
> > > It's critical if the page migration fails on those areas so
> > > dump more debugging message.
> > 
> > I disagree with this statement. alloc_contig_range is not a reliable
> > allocator. Any user, be it CMA or direct users of alloc_contig_range
> > have to deal with allocation failures. Debugging information can be
> > still useful but considering migration failures critical is
> > overstatement to say the least.
> 
> Fair enough. Let's change it.
> 
> "Currently, debugging CMA allocation failure is too hard
> due to lacking of page information. alloc_contig_range is
> proper place to dump them since it has migrate-failed page
> list."

"Currently, debugging CMA allocation failures is quite limited. The most
commong source of these failures seems to be page migration which
doesn't provide any useful information on the reason of the failure by
itself. alloc_contig_range can report those failures as it holds a list
of migrate-failed pages."
 
> > > page refcount, mapcount with page flags on dump_page are
> > > helpful information to deduce the culprit. Furthermore,
> > > dump_page_owner was super helpful to find long term pinner
> > > who initiated the page allocation.
> > > 
> > > Admin could enable the dump like this(by default, disabled)
> > > 
> > > 	echo "func dump_migrate_failure_pages +p" > control
> > > 
> > > Admin could disable it.
> > > 
> > > 	echo "func dump_migrate_failure_pages =_" > control
> > 
> > My original idea was to add few pr_debug and -DDYNAMIC_DEBUG_MODULE for
> > page_alloc.c. It makes sense to enable a whole bunch at once though.
> > The naming should better reflect this is alloc_contig_rage related
> > because the above sounds like a generic migration failure thing.
> 
> alloc_contig_dump_pages?

Yes this is more clear.

> > Somebody more familiar with the dynamic debugging infrastructure needs
> > to have a look but from from a quick look it seems ok.
> > 
> > Do we really need all the ugly ifdefery, though? Don't we want to have
> > this compiled in all the time and just rely on the static branch managed
> > by the dynamic debugging framework?
> 
> I have no further idea to make it simple while we keep the flexibility
> for arguments and print format.
> 
> #if defined(CONFIG_DYNAMIC_DEBUG) || \
>         (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
> static void alloc_contig_dump_pages(struct list_head *page_list)
> {
>         static DEFINE_RATELIMIT_STATE(_rs,
>                                         DEFAULT_RATELIMIT_INTERVAL,
>                                         DEFAULT_RATELIMIT_BURST);
> 
>         DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
>                         "migrate failure");
>         if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) {
>                 struct page *page;
> 
>                 WARN(1, "failed callstack");
>                 list_for_each_entry(page, page_list, lru)
>                         dump_page(page, "migration failure");
>         }
> }
> #else
> static inline void alloc_contig_dump_pages(struct list_head *page_list)
> {
> }
> #endif

First, you would be much better off by droping the rate limitting. I am
nt really convinced this is really necessary as this is a debugging aid
enabled on request. A single list can be large enough to swamp logs so
why bother?

Also are all those CONFIG_DYNAMIC_DEBUG* ifdefs necessary?  Can we
simply enable DYNAMIC_DEBUG for page_alloc as I've suggested above?
Minchan Kim March 9, 2021, 5:27 p.m. UTC | #11
On Tue, Mar 09, 2021 at 05:32:08PM +0100, Michal Hocko wrote:
> On Tue 09-03-21 08:15:41, Minchan Kim wrote:
> > On Tue, Mar 09, 2021 at 10:32:51AM +0100, Michal Hocko wrote:
> > > On Mon 08-03-21 12:20:47, Minchan Kim wrote:
> > > > alloc_contig_range is usually used on cma area or movable zone.
> > > > It's critical if the page migration fails on those areas so
> > > > dump more debugging message.
> > > 
> > > I disagree with this statement. alloc_contig_range is not a reliable
> > > allocator. Any user, be it CMA or direct users of alloc_contig_range
> > > have to deal with allocation failures. Debugging information can be
> > > still useful but considering migration failures critical is
> > > overstatement to say the least.
> > 
> > Fair enough. Let's change it.
> > 
> > "Currently, debugging CMA allocation failure is too hard
> > due to lacking of page information. alloc_contig_range is
> > proper place to dump them since it has migrate-failed page
> > list."
> 
> "Currently, debugging CMA allocation failures is quite limited. The most
> commong source of these failures seems to be page migration which
> doesn't provide any useful information on the reason of the failure by
> itself. alloc_contig_range can report those failures as it holds a list
> of migrate-failed pages."

Will take it. Thanks.

< snip >

> > > Somebody more familiar with the dynamic debugging infrastructure needs
> > > to have a look but from from a quick look it seems ok.
> > > 
> > > Do we really need all the ugly ifdefery, though? Don't we want to have
> > > this compiled in all the time and just rely on the static branch managed
> > > by the dynamic debugging framework?
> > 
> > I have no further idea to make it simple while we keep the flexibility
> > for arguments and print format.
> > 
> > #if defined(CONFIG_DYNAMIC_DEBUG) || \
> >         (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
> > static void alloc_contig_dump_pages(struct list_head *page_list)
> > {
> >         static DEFINE_RATELIMIT_STATE(_rs,
> >                                         DEFAULT_RATELIMIT_INTERVAL,
> >                                         DEFAULT_RATELIMIT_BURST);
> > 
> >         DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
> >                         "migrate failure");
> >         if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) {
> >                 struct page *page;
> > 
> >                 WARN(1, "failed callstack");
> >                 list_for_each_entry(page, page_list, lru)
> >                         dump_page(page, "migration failure");
> >         }
> > }
> > #else
> > static inline void alloc_contig_dump_pages(struct list_head *page_list)
> > {
> > }
> > #endif
> 
> First, you would be much better off by droping the rate limitting. I am
> nt really convinced this is really necessary as this is a debugging aid
> enabled on request. A single list can be large enough to swamp logs so
> why bother?

No problem. Just added since David mentioned hugetlb pages are easily
fail to mgirate at this moment.
Yes, We could add the ratelimit if we get complain.

> 
> Also are all those CONFIG_DYNAMIC_DEBUG* ifdefs necessary?  Can we
> simply enable DYNAMIC_DEBUG for page_alloc as I've suggested above?

They are different usecases.

With DYNAMIC_DEBUG_MODULE with CONFIG_DYNAMIC_DEBUG_CORE,
it works for only specific compile flags as you suggested.
(CONFIG_DYNAMIC_DEBUG_CORE is requirement to work DYNAMIC_DEBUG_MODULE.

With CONFIG_DYNAMIC_DEBUG, user could enable/disable every dynamic
debug places without needing DYNAMIC_DEBUG_MODULE flags for source
files.

Both usecase makes sense to me.
Minchan Kim March 10, 2021, 7:42 a.m. UTC | #12
On Tue, Mar 09, 2021 at 08:15:41AM -0800, Minchan Kim wrote:

< snip >

> > [...]
> > > +void dump_migrate_failure_pages(struct list_head *page_list)
> > > +{
> > > +	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
> > > +			"migrate failure");
> > > +	if (DYNAMIC_DEBUG_BRANCH(descriptor) &&
> > > +			alloc_contig_ratelimit()) {
> > > +		struct page *page;
> > > +
> > > +		WARN(1, "failed callstack");
> > > +		list_for_each_entry(page, page_list, lru)
> > > +			dump_page(page, "migration failure");
> > > +	}
> > 
> > Apart from the above, do we have to warn for something that is a
> > debugging aid? A similar concern wrt dump_page which uses pr_warn and
> 
> Make sense.
> 
> > page owner is using even pr_alert.
> > Would it make sense to add a loglevel parameter both into __dump_page
> > and dump_page_owner?
> 
> Let me try it.

I looked though them and made first draft to clean them up.

It's bigger than my initial expectaion because there are many callsites
to use dump_page and stack_trace_print inconsistent loglevel. 
Since it's not a specific problem for this work, I'd like to deal with
it as separate patchset since I don't want to be stuck on here for my
initial goal.

FYI,

Subject: [RFC 0/5] make dump_page aware of loglevel

- Forked from [1]

dump_page uses __dump_page and dump_page_owner internally to
print various information. However, their printk loglevel are
inconsistent in that

__dump_page: KERN_WARNING
__dump_page_owner: KERN_ALERT
        stack_trace_print: KERN_DEFAULT

To make them consistent from dump_page, this patch introduces
pr_loglevel in printk and make the utility functions aware of
loglevel. Finally, last patch changes dump_page to support
loglevel to make the printing level consistent.

[1] https://lore.kernel.org/linux-mm/YEdAw6gnp9XxoWUQ@dhcp22.suse.cz/

Minchan Kim (5):
  mm: introduce pr_loglevel for __dump_[page]_owner
  stacktrace: stack_trace_print aware of loglevel
  mm: page_owner: dump_page_owner aware of loglevel
  mm: debug: __dump_page aware of loglevel
  mm: debug: dump_page aware of loglevel
  drivers/md/dm-bufio.c       |  2 +-
 drivers/virtio/virtio_mem.c |  2 +-
 fs/btrfs/ref-verify.c       |  2 +-
 fs/fuse/dev.c               |  2 +-
 include/linux/mmdebug.h     | 10 ++++++----
 include/linux/page_owner.h  |  8 ++++----
 include/linux/printk.h      | 12 +++++++++++
 include/linux/stacktrace.h  |  4 ++--
 kernel/backtracetest.c      |  2 +-
 kernel/dma/debug.c          |  3 ++-
 kernel/kcsan/report.c       |  7 ++++---
 kernel/locking/lockdep.c    |  3 ++-
 kernel/stacktrace.c         |  5 +++--
 mm/debug.c                  | 40 ++++++++++++++++++-------------------
 mm/filemap.c                |  2 +-
 mm/gup_test.c               |  4 ++--
 mm/huge_memory.c            |  4 ++--
 mm/kasan/report.c           |  4 ++--
 mm/kfence/report.c          |  3 ++-
 mm/kmemleak.c               |  2 +-
 mm/memory.c                 |  2 +-
 mm/memory_hotplug.c         |  4 ++--
 mm/page_alloc.c             |  4 ++--
 mm/page_isolation.c         |  2 +-
 mm/page_owner.c             | 24 +++++++++++-----------
 25 files changed, 88 insertions(+), 69 deletions(-)
David Hildenbrand March 10, 2021, 8:20 a.m. UTC | #13
On 10.03.21 08:42, Minchan Kim wrote:
> On Tue, Mar 09, 2021 at 08:15:41AM -0800, Minchan Kim wrote:
> 
> < snip >
> 
>>> [...]
>>>> +void dump_migrate_failure_pages(struct list_head *page_list)
>>>> +{
>>>> +	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
>>>> +			"migrate failure");
>>>> +	if (DYNAMIC_DEBUG_BRANCH(descriptor) &&
>>>> +			alloc_contig_ratelimit()) {
>>>> +		struct page *page;
>>>> +
>>>> +		WARN(1, "failed callstack");
>>>> +		list_for_each_entry(page, page_list, lru)
>>>> +			dump_page(page, "migration failure");
>>>> +	}
>>>
>>> Apart from the above, do we have to warn for something that is a
>>> debugging aid? A similar concern wrt dump_page which uses pr_warn and
>>
>> Make sense.
>>
>>> page owner is using even pr_alert.
>>> Would it make sense to add a loglevel parameter both into __dump_page
>>> and dump_page_owner?
>>
>> Let me try it.
> 
> I looked though them and made first draft to clean them up.
> 
> It's bigger than my initial expectaion because there are many callsites
> to use dump_page and stack_trace_print inconsistent loglevel.
> Since it's not a specific problem for this work, I'd like to deal with
> it as separate patchset since I don't want to be stuck on here for my
> initial goal.

Why the need to rush regarding your series?

If it will clean up your patch significantly, then I think doing the 
cleanups first is the proper way to go.

I really don't get why this is a real problem.
Michal Hocko March 10, 2021, 1:04 p.m. UTC | #14
On Tue 09-03-21 09:27:51, Minchan Kim wrote:
> On Tue, Mar 09, 2021 at 05:32:08PM +0100, Michal Hocko wrote:
> > On Tue 09-03-21 08:15:41, Minchan Kim wrote:
> > > On Tue, Mar 09, 2021 at 10:32:51AM +0100, Michal Hocko wrote:
> > > > On Mon 08-03-21 12:20:47, Minchan Kim wrote:
> > > > > alloc_contig_range is usually used on cma area or movable zone.
> > > > > It's critical if the page migration fails on those areas so
> > > > > dump more debugging message.
> > > > 
> > > > I disagree with this statement. alloc_contig_range is not a reliable
> > > > allocator. Any user, be it CMA or direct users of alloc_contig_range
> > > > have to deal with allocation failures. Debugging information can be
> > > > still useful but considering migration failures critical is
> > > > overstatement to say the least.
> > > 
> > > Fair enough. Let's change it.
> > > 
> > > "Currently, debugging CMA allocation failure is too hard
> > > due to lacking of page information. alloc_contig_range is
> > > proper place to dump them since it has migrate-failed page
> > > list."
> > 
> > "Currently, debugging CMA allocation failures is quite limited. The most
> > commong source of these failures seems to be page migration which
> > doesn't provide any useful information on the reason of the failure by
> > itself. alloc_contig_range can report those failures as it holds a list
> > of migrate-failed pages."
> 
> Will take it. Thanks.
> 
> < snip >
> 
> > > > Somebody more familiar with the dynamic debugging infrastructure needs
> > > > to have a look but from from a quick look it seems ok.
> > > > 
> > > > Do we really need all the ugly ifdefery, though? Don't we want to have
> > > > this compiled in all the time and just rely on the static branch managed
> > > > by the dynamic debugging framework?
> > > 
> > > I have no further idea to make it simple while we keep the flexibility
> > > for arguments and print format.
> > > 
> > > #if defined(CONFIG_DYNAMIC_DEBUG) || \
> > >         (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
> > > static void alloc_contig_dump_pages(struct list_head *page_list)
> > > {
> > >         static DEFINE_RATELIMIT_STATE(_rs,
> > >                                         DEFAULT_RATELIMIT_INTERVAL,
> > >                                         DEFAULT_RATELIMIT_BURST);
> > > 
> > >         DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
> > >                         "migrate failure");
> > >         if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) {
> > >                 struct page *page;
> > > 
> > >                 WARN(1, "failed callstack");
> > >                 list_for_each_entry(page, page_list, lru)
> > >                         dump_page(page, "migration failure");
> > >         }
> > > }
> > > #else
> > > static inline void alloc_contig_dump_pages(struct list_head *page_list)
> > > {
> > > }
> > > #endif
> > 
> > First, you would be much better off by droping the rate limitting. I am
> > nt really convinced this is really necessary as this is a debugging aid
> > enabled on request. A single list can be large enough to swamp logs so
> > why bother?
> 
> No problem. Just added since David mentioned hugetlb pages are easily
> fail to mgirate at this moment.

Well a single hugetlb allocation can swamp the log already because the
ratelimit as suggested above will allow the full list to be dumped.
Moreover if there is an additional failure right afterwards then it
would be allowed as well. Not to mention that rate limiting is terrible
for long taking operations by design as it doesn't know when the
operation ended. So above construct is merely a code complication rather
than a useful tool to reduce the log flow.

> Yes, We could add the ratelimit if we get complain.
> 
> > 
> > Also are all those CONFIG_DYNAMIC_DEBUG* ifdefs necessary?  Can we
> > simply enable DYNAMIC_DEBUG for page_alloc as I've suggested above?
> 
> They are different usecases.
> 
> With DYNAMIC_DEBUG_MODULE with CONFIG_DYNAMIC_DEBUG_CORE,
> it works for only specific compile flags as you suggested.
> (CONFIG_DYNAMIC_DEBUG_CORE is requirement to work DYNAMIC_DEBUG_MODULE.
> 
> With CONFIG_DYNAMIC_DEBUG, user could enable/disable every dynamic
> debug places without needing DYNAMIC_DEBUG_MODULE flags for source
> files.
> 
> Both usecase makes sense to me.

Well, this is more of a question for dynamic debugging maintainers. But
it would be really great to reduce the ifdefery as much as possible.
Michal Hocko March 10, 2021, 1:07 p.m. UTC | #15
On Tue 09-03-21 23:42:46, Minchan Kim wrote:
> On Tue, Mar 09, 2021 at 08:15:41AM -0800, Minchan Kim wrote:
> 
> < snip >
> 
> > > [...]
> > > > +void dump_migrate_failure_pages(struct list_head *page_list)
> > > > +{
> > > > +	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
> > > > +			"migrate failure");
> > > > +	if (DYNAMIC_DEBUG_BRANCH(descriptor) &&
> > > > +			alloc_contig_ratelimit()) {
> > > > +		struct page *page;
> > > > +
> > > > +		WARN(1, "failed callstack");
> > > > +		list_for_each_entry(page, page_list, lru)
> > > > +			dump_page(page, "migration failure");
> > > > +	}
> > > 
> > > Apart from the above, do we have to warn for something that is a
> > > debugging aid? A similar concern wrt dump_page which uses pr_warn and
> > 
> > Make sense.
> > 
> > > page owner is using even pr_alert.
> > > Would it make sense to add a loglevel parameter both into __dump_page
> > > and dump_page_owner?
> > 
> > Let me try it.
> 
> I looked though them and made first draft to clean them up.
> 
> It's bigger than my initial expectaion because there are many callsites
> to use dump_page and stack_trace_print inconsistent loglevel. 
> Since it's not a specific problem for this work, I'd like to deal with
> it as separate patchset since I don't want to be stuck on here for my
> initial goal.
> 
> FYI,
> 
> Subject: [RFC 0/5] make dump_page aware of loglevel
> 
> - Forked from [1]
> 
> dump_page uses __dump_page and dump_page_owner internally to
> print various information. However, their printk loglevel are
> inconsistent in that
> 
> __dump_page: KERN_WARNING
> __dump_page_owner: KERN_ALERT
>         stack_trace_print: KERN_DEFAULT
> 
> To make them consistent from dump_page, this patch introduces
> pr_loglevel in printk and make the utility functions aware of
> loglevel. Finally, last patch changes dump_page to support
> loglevel to make the printing level consistent.
> 
> [1] https://lore.kernel.org/linux-mm/YEdAw6gnp9XxoWUQ@dhcp22.suse.cz/
> 
> Minchan Kim (5):
>   mm: introduce pr_loglevel for __dump_[page]_owner
>   stacktrace: stack_trace_print aware of loglevel
>   mm: page_owner: dump_page_owner aware of loglevel
>   mm: debug: __dump_page aware of loglevel
>   mm: debug: dump_page aware of loglevel
>   drivers/md/dm-bufio.c       |  2 +-
>  drivers/virtio/virtio_mem.c |  2 +-
>  fs/btrfs/ref-verify.c       |  2 +-
>  fs/fuse/dev.c               |  2 +-
>  include/linux/mmdebug.h     | 10 ++++++----
>  include/linux/page_owner.h  |  8 ++++----
>  include/linux/printk.h      | 12 +++++++++++
>  include/linux/stacktrace.h  |  4 ++--
>  kernel/backtracetest.c      |  2 +-
>  kernel/dma/debug.c          |  3 ++-
>  kernel/kcsan/report.c       |  7 ++++---
>  kernel/locking/lockdep.c    |  3 ++-
>  kernel/stacktrace.c         |  5 +++--
>  mm/debug.c                  | 40 ++++++++++++++++++-------------------
>  mm/filemap.c                |  2 +-
>  mm/gup_test.c               |  4 ++--
>  mm/huge_memory.c            |  4 ++--
>  mm/kasan/report.c           |  4 ++--
>  mm/kfence/report.c          |  3 ++-
>  mm/kmemleak.c               |  2 +-
>  mm/memory.c                 |  2 +-
>  mm/memory_hotplug.c         |  4 ++--
>  mm/page_alloc.c             |  4 ++--
>  mm/page_isolation.c         |  2 +-
>  mm/page_owner.c             | 24 +++++++++++-----------
>  25 files changed, 88 insertions(+), 69 deletions(-)

The is a lot of churn indeed. Have you considered adding $FOO_lglvl
variants for those so that you can use them for your particular case
without affecting most of existing users? Something similar we have
discussed in other email thread regarding lru_add_drain_all?
Matthew Wilcox March 10, 2021, 1:26 p.m. UTC | #16
On Tue, Mar 09, 2021 at 10:32:51AM +0100, Michal Hocko wrote:
> Apart from the above, do we have to warn for something that is a
> debugging aid? A similar concern wrt dump_page which uses pr_warn and
> page owner is using even pr_alert.
> Would it make sense to add a loglevel parameter both into __dump_page
> and dump_page_owner?

No.  What would make sense is turning __dump_page() inside-out.
Something like printk("%pP\n");

In lib/vsprintf.c, there's a big switch statement in the function
pointer() that handles printing things like IPv6 addresses, dentries,
and function symbols.

Then we can do whatever we want around the new %pP, including choosing
the log level, adding additional information, choosing to dump the page
to a sysfs file, etc, etc.
Michal Hocko March 10, 2021, 1:54 p.m. UTC | #17
On Wed 10-03-21 13:26:23, Matthew Wilcox wrote:
> On Tue, Mar 09, 2021 at 10:32:51AM +0100, Michal Hocko wrote:
> > Apart from the above, do we have to warn for something that is a
> > debugging aid? A similar concern wrt dump_page which uses pr_warn and
> > page owner is using even pr_alert.
> > Would it make sense to add a loglevel parameter both into __dump_page
> > and dump_page_owner?
> 
> No.  What would make sense is turning __dump_page() inside-out.
> Something like printk("%pP\n");
> 
> In lib/vsprintf.c, there's a big switch statement in the function
> pointer() that handles printing things like IPv6 addresses, dentries,
> and function symbols.
> 
> Then we can do whatever we want around the new %pP, including choosing
> the log level, adding additional information, choosing to dump the page
> to a sysfs file, etc, etc.

Hmm, __dump_page has grown quite some heavy lifting over time and I am
not sure this is a good candidate to put into printk proper (e.g. is it
safe/reasonable to call get_kernel_nofault from printk - aka arbitrary
context)?.

But you've got a point that such a printk format wouldn't need to be 1:1
with the existing __dump_page. There is quite a lot to infer from page
count, map count, flags, page type already. Then a question would be
what is an actual advantage of %pP over dump_page_info(loglvl, p). One I
can see is that %pP would allow to dump the state into a string and so
it would be more versatile but I am not aware of a usecase for that
(maybe tracing?).
Minchan Kim March 10, 2021, 3:45 p.m. UTC | #18
On Wed, Mar 10, 2021 at 09:20:40AM +0100, David Hildenbrand wrote:
> On 10.03.21 08:42, Minchan Kim wrote:
> > On Tue, Mar 09, 2021 at 08:15:41AM -0800, Minchan Kim wrote:
> > 
> > < snip >
> > 
> > > > [...]
> > > > > +void dump_migrate_failure_pages(struct list_head *page_list)
> > > > > +{
> > > > > +	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
> > > > > +			"migrate failure");
> > > > > +	if (DYNAMIC_DEBUG_BRANCH(descriptor) &&
> > > > > +			alloc_contig_ratelimit()) {
> > > > > +		struct page *page;
> > > > > +
> > > > > +		WARN(1, "failed callstack");
> > > > > +		list_for_each_entry(page, page_list, lru)
> > > > > +			dump_page(page, "migration failure");
> > > > > +	}
> > > > 
> > > > Apart from the above, do we have to warn for something that is a
> > > > debugging aid? A similar concern wrt dump_page which uses pr_warn and
> > > 
> > > Make sense.
> > > 
> > > > page owner is using even pr_alert.
> > > > Would it make sense to add a loglevel parameter both into __dump_page
> > > > and dump_page_owner?
> > > 
> > > Let me try it.
> > 
> > I looked though them and made first draft to clean them up.
> > 
> > It's bigger than my initial expectaion because there are many callsites
> > to use dump_page and stack_trace_print inconsistent loglevel.
> > Since it's not a specific problem for this work, I'd like to deal with
> > it as separate patchset since I don't want to be stuck on here for my
> > initial goal.
> 
> Why the need to rush regarding your series?
> 
> If it will clean up your patch significantly, then I think doing the
> cleanups first is the proper way to go.

It doesn't clean up my patch at all. dump_page and internal functions
are already broken in several places from print level point of view.

I agreed that it's good to fix but it shouldn't be a block for the work
since it's not a new particular problem this patch introduce.

> 
> I really don't get why this is a real problem.

That's because it's not my top priority.
Minchan Kim March 10, 2021, 3:59 p.m. UTC | #19
On Wed, Mar 10, 2021 at 02:04:55PM +0100, Michal Hocko wrote:

< snip >

> > > Also are all those CONFIG_DYNAMIC_DEBUG* ifdefs necessary?  Can we
> > > simply enable DYNAMIC_DEBUG for page_alloc as I've suggested above?
> > 
> > They are different usecases.
> > 
> > With DYNAMIC_DEBUG_MODULE with CONFIG_DYNAMIC_DEBUG_CORE,
> > it works for only specific compile flags as you suggested.
> > (CONFIG_DYNAMIC_DEBUG_CORE is requirement to work DYNAMIC_DEBUG_MODULE.
> > 
> > With CONFIG_DYNAMIC_DEBUG, user could enable/disable every dynamic
> > debug places without needing DYNAMIC_DEBUG_MODULE flags for source
> > files.
> > 
> > Both usecase makes sense to me.
> 
> Well, this is more of a question for dynamic debugging maintainers. But
> it would be really great to reduce the ifdefery as much as possible.

I don't understand why this is something particular case which is okay
to lose either way to make dyndbg dynamic.
Minchan Kim March 10, 2021, 4:05 p.m. UTC | #20
On Wed, Mar 10, 2021 at 02:07:05PM +0100, Michal Hocko wrote:
> On Tue 09-03-21 23:42:46, Minchan Kim wrote:
> > On Tue, Mar 09, 2021 at 08:15:41AM -0800, Minchan Kim wrote:
> > 
> > < snip >
> > 
> > > > [...]
> > > > > +void dump_migrate_failure_pages(struct list_head *page_list)
> > > > > +{
> > > > > +	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
> > > > > +			"migrate failure");
> > > > > +	if (DYNAMIC_DEBUG_BRANCH(descriptor) &&
> > > > > +			alloc_contig_ratelimit()) {
> > > > > +		struct page *page;
> > > > > +
> > > > > +		WARN(1, "failed callstack");
> > > > > +		list_for_each_entry(page, page_list, lru)
> > > > > +			dump_page(page, "migration failure");
> > > > > +	}
> > > > 
> > > > Apart from the above, do we have to warn for something that is a
> > > > debugging aid? A similar concern wrt dump_page which uses pr_warn and
> > > 
> > > Make sense.
> > > 
> > > > page owner is using even pr_alert.
> > > > Would it make sense to add a loglevel parameter both into __dump_page
> > > > and dump_page_owner?
> > > 
> > > Let me try it.
> > 
> > I looked though them and made first draft to clean them up.
> > 
> > It's bigger than my initial expectaion because there are many callsites
> > to use dump_page and stack_trace_print inconsistent loglevel. 
> > Since it's not a specific problem for this work, I'd like to deal with
> > it as separate patchset since I don't want to be stuck on here for my
> > initial goal.
> > 
> > FYI,
> > 
> > Subject: [RFC 0/5] make dump_page aware of loglevel
> > 
> > - Forked from [1]
> > 
> > dump_page uses __dump_page and dump_page_owner internally to
> > print various information. However, their printk loglevel are
> > inconsistent in that
> > 
> > __dump_page: KERN_WARNING
> > __dump_page_owner: KERN_ALERT
> >         stack_trace_print: KERN_DEFAULT
> > 
> > To make them consistent from dump_page, this patch introduces
> > pr_loglevel in printk and make the utility functions aware of
> > loglevel. Finally, last patch changes dump_page to support
> > loglevel to make the printing level consistent.
> > 
> > [1] https://lore.kernel.org/linux-mm/YEdAw6gnp9XxoWUQ@dhcp22.suse.cz/
> > 
> > Minchan Kim (5):
> >   mm: introduce pr_loglevel for __dump_[page]_owner
> >   stacktrace: stack_trace_print aware of loglevel
> >   mm: page_owner: dump_page_owner aware of loglevel
> >   mm: debug: __dump_page aware of loglevel
> >   mm: debug: dump_page aware of loglevel
> >   drivers/md/dm-bufio.c       |  2 +-
> >  drivers/virtio/virtio_mem.c |  2 +-
> >  fs/btrfs/ref-verify.c       |  2 +-
> >  fs/fuse/dev.c               |  2 +-
> >  include/linux/mmdebug.h     | 10 ++++++----
> >  include/linux/page_owner.h  |  8 ++++----
> >  include/linux/printk.h      | 12 +++++++++++
> >  include/linux/stacktrace.h  |  4 ++--
> >  kernel/backtracetest.c      |  2 +-
> >  kernel/dma/debug.c          |  3 ++-
> >  kernel/kcsan/report.c       |  7 ++++---
> >  kernel/locking/lockdep.c    |  3 ++-
> >  kernel/stacktrace.c         |  5 +++--
> >  mm/debug.c                  | 40 ++++++++++++++++++-------------------
> >  mm/filemap.c                |  2 +-
> >  mm/gup_test.c               |  4 ++--
> >  mm/huge_memory.c            |  4 ++--
> >  mm/kasan/report.c           |  4 ++--
> >  mm/kfence/report.c          |  3 ++-
> >  mm/kmemleak.c               |  2 +-
> >  mm/memory.c                 |  2 +-
> >  mm/memory_hotplug.c         |  4 ++--
> >  mm/page_alloc.c             |  4 ++--
> >  mm/page_isolation.c         |  2 +-
> >  mm/page_owner.c             | 24 +++++++++++-----------
> >  25 files changed, 88 insertions(+), 69 deletions(-)
> 
> The is a lot of churn indeed. Have you considered adding $FOO_lglvl
> variants for those so that you can use them for your particular case
> without affecting most of existing users? Something similar we have
> discussed in other email thread regarding lru_add_drain_all?

I thought that way but didn't try since it couldn't make them
atomic(For example, other printk place in other context will
affect by the $FOO_lglvl).
Michal Hocko March 10, 2021, 4:46 p.m. UTC | #21
On Wed 10-03-21 08:05:36, Minchan Kim wrote:
> On Wed, Mar 10, 2021 at 02:07:05PM +0100, Michal Hocko wrote:
[...]
> > The is a lot of churn indeed. Have you considered adding $FOO_lglvl
> > variants for those so that you can use them for your particular case
> > without affecting most of existing users? Something similar we have
> > discussed in other email thread regarding lru_add_drain_all?
> 
> I thought that way but didn't try since it couldn't make them
> atomic(For example, other printk place in other context will
> affect by the $FOO_lglvl).

I do not follow. I meant something like the following (likely incomplete
but you should get an idea).

diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 3468794f83d2..71b402eb8f78 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -14,7 +14,7 @@ extern void __set_page_owner(struct page *page,
 extern void __split_page_owner(struct page *page, unsigned int nr);
 extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
 extern void __set_page_owner_migrate_reason(struct page *page, int reason);
-extern void __dump_page_owner(struct page *page);
+extern void __dump_page_owner(struct page *page, const char *loglvl);
 extern void pagetypeinfo_showmixedcount_print(struct seq_file *m,
 					pg_data_t *pgdat, struct zone *zone);
 
@@ -46,10 +46,10 @@ static inline void set_page_owner_migrate_reason(struct page *page, int reason)
 	if (static_branch_unlikely(&page_owner_inited))
 		__set_page_owner_migrate_reason(page, reason);
 }
-static inline void dump_page_owner(struct page *page)
+static inline void dump_page_owner(struct page *page, const char *loglvl)
 {
 	if (static_branch_unlikely(&page_owner_inited))
-		__dump_page_owner(page);
+		__dump_page_owner(page, loglvl);
 }
 #else
 static inline void reset_page_owner(struct page *page, unsigned int order)
@@ -69,7 +69,7 @@ static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
 static inline void set_page_owner_migrate_reason(struct page *page, int reason)
 {
 }
-static inline void dump_page_owner(struct page *page)
+static inline void dump_page_owner(struct page *page, const char *loglvl)
 {
 }
 #endif /* CONFIG_PAGE_OWNER */
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index 9f8117c7cfdd..1b13135d9916 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -14,6 +14,18 @@
 #include <linux/kallsyms.h>
 #include <linux/stacktrace.h>
 
+void __stack_trace_print(const unsigned long *entries, unsigned int nr_entries,
+		       int spacesconst, char *loglvl)
+{
+	unsigned int i;
+
+	if (WARN_ON(!entries))
+		return;
+
+	for (i = 0; i < nr_entries; i++)
+		printk("%s%*c%pS\n", loglvl, 1 + spaces, ' ', (void *)entries[i]);
+}
+
 /**
  * stack_trace_print - Print the entries in the stack trace
  * @entries:	Pointer to storage array
@@ -23,13 +35,7 @@
 void stack_trace_print(const unsigned long *entries, unsigned int nr_entries,
 		       int spaces)
 {
-	unsigned int i;
-
-	if (WARN_ON(!entries))
-		return;
-
-	for (i = 0; i < nr_entries; i++)
-		printk("%*c%pS\n", 1 + spaces, ' ', (void *)entries[i]);
+	__stack_trace_print(entries, nr_entries, spaces, KERN_DEFAULT);
 }
 EXPORT_SYMBOL_GPL(stack_trace_print);
 
diff --git a/mm/debug.c b/mm/debug.c
index 8a40b3fefbeb..b989ee2ffa89 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -42,7 +42,7 @@ const struct trace_print_flags vmaflag_names[] = {
 	{0, NULL}
 };
 
-void __dump_page(struct page *page, const char *reason)
+void __dump_page(struct page *page, const char *reason, const char *loglvl)
 {
 	struct page *head = compound_head(page);
 	struct address_space *mapping;
@@ -64,7 +64,7 @@ void __dump_page(struct page *page, const char *reason)
 	 * dump_page() when detected.
 	 */
 	if (page_poisoned) {
-		pr_warn("page:%px is uninitialized and poisoned", page);
+		printk("%spage:%px is uninitialized and poisoned", loglvl, page);
 		goto hex_only;
 	}
 
@@ -95,17 +95,17 @@ void __dump_page(struct page *page, const char *reason)
 	 */
 	mapcount = PageSlab(head) ? 0 : page_mapcount(page);
 
-	pr_warn("page:%p refcount:%d mapcount:%d mapping:%p index:%#lx pfn:%#lx\n",
+	printk("%spage:%p refcount:%d mapcount:%d mapping:%p index:%#lx pfn:%#lx\n", loglvl,
 			page, page_ref_count(head), mapcount, mapping,
 			page_to_pgoff(page), page_to_pfn(page));
 	if (compound) {
 		if (hpage_pincount_available(page)) {
-			pr_warn("head:%p order:%u compound_mapcount:%d compound_pincount:%d\n",
+			printk("%shead:%p order:%u compound_mapcount:%d compound_pincount:%d\n", loglvl,
 					head, compound_order(head),
 					head_compound_mapcount(head),
 					head_compound_pincount(head));
 		} else {
-			pr_warn("head:%p order:%u compound_mapcount:%d\n",
+			printk("%shead:%p order:%u compound_mapcount:%d\n", loglvl,
 					head, compound_order(head),
 					head_compound_mapcount(head));
 		}
@@ -128,30 +128,31 @@ void __dump_page(struct page *page, const char *reason)
 		 */
 		if (get_kernel_nofault(host, &mapping->host) ||
 		    get_kernel_nofault(a_ops, &mapping->a_ops)) {
-			pr_warn("failed to read mapping contents, not a valid kernel address?\n");
+			prrintk("%sfailed to read mapping contents, not a valid kernel address?\n",
+					loglvl);
 			goto out_mapping;
 		}
 
 		if (!host) {
-			pr_warn("aops:%ps\n", a_ops);
+			printk("%saops:%ps\n", loglvl, a_ops);
 			goto out_mapping;
 		}
 
 		if (get_kernel_nofault(dentry_first, &host->i_dentry.first) ||
 		    get_kernel_nofault(ino, &host->i_ino)) {
-			pr_warn("aops:%ps with invalid host inode %px\n",
+			printk("%saops:%ps with invalid host inode %px\n", loglvl,
 					a_ops, host);
 			goto out_mapping;
 		}
 
 		if (!dentry_first) {
-			pr_warn("aops:%ps ino:%lx\n", a_ops, ino);
+			printk("%saops:%ps ino:%lx\n", loglvl, a_ops, ino);
 			goto out_mapping;
 		}
 
 		dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
 		if (get_kernel_nofault(dentry, dentry_ptr)) {
-			pr_warn("aops:%ps ino:%lx with invalid dentry %px\n",
+			printk("%saops:%ps ino:%lx with invalid dentry %px\n", loglvl,
 					a_ops, ino, dentry_ptr);
 		} else {
 			/*
@@ -159,38 +160,38 @@ void __dump_page(struct page *page, const char *reason)
 			 * crash, but it's unlikely that we reach here with a
 			 * corrupted struct page
 			 */
-			pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n",
+			printk("%saops:%ps ino:%lx dentry name:\"%pd\"\n", loglvl,
 					a_ops, ino, &dentry);
 		}
 	}
 out_mapping:
 	BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1);
 
-	pr_warn("%sflags: %#lx(%pGp)%s\n", type, head->flags, &head->flags,
+	printk("%s%sflags: %#lx(%pGp)%s\n", loglvl, type, head->flags, &head->flags,
 		page_cma ? " CMA" : "");
 
 hex_only:
-	print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32,
+	print_hex_dump(loglvl, "%sraw: ", DUMP_PREFIX_NONE, 32,
 			sizeof(unsigned long), page,
 			sizeof(struct page), false);
 	if (head != page)
-		print_hex_dump(KERN_WARNING, "head: ", DUMP_PREFIX_NONE, 32,
+		print_hex_dump(loglvl, "head: ", DUMP_PREFIX_NONE, 32,
 			sizeof(unsigned long), head,
 			sizeof(struct page), false);
 
 	if (reason)
-		pr_warn("page dumped because: %s\n", reason);
+		printk("%spage dumped because: %s\n", loglvl, reason);
 
 #ifdef CONFIG_MEMCG
 	if (!page_poisoned && page->memcg_data)
-		pr_warn("pages's memcg:%lx\n", page->memcg_data);
+		printk("%spages's memcg:%lx\n", loglvl, page->memcg_data);
 #endif
 }
 
 void dump_page(struct page *page, const char *reason)
 {
-	__dump_page(page, reason);
-	dump_page_owner(page);
+	__dump_page(page, reason, KERN_WARNING);
+	dump_page_owner(page, KERN_ALERT);
 }
 EXPORT_SYMBOL(dump_page);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 519a60d5b6f7..20b500f76667 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -637,7 +637,7 @@ static void bad_page(struct page *page, const char *reason)
 	pr_alert("BUG: Bad page state in process %s  pfn:%05lx\n",
 		current->comm, page_to_pfn(page));
 	__dump_page(page, reason);
-	dump_page_owner(page);
+	dump_page_owner(page, KERN_ALERT);
 
 	print_modules();
 	dump_stack();
diff --git a/mm/page_owner.c b/mm/page_owner.c
index af464bb7fbe7..ff5908605925 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -406,7 +406,7 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 	return -ENOMEM;
 }
 
-void __dump_page_owner(struct page *page)
+void __dump_page_owner(struct page *page, const char *loglvl)
 {
 	struct page_ext *page_ext = lookup_page_ext(page);
 	struct page_owner *page_owner;
@@ -417,7 +417,7 @@ void __dump_page_owner(struct page *page)
 	int mt;
 
 	if (unlikely(!page_ext)) {
-		pr_alert("There is not page extension available.\n");
+		printk("There is not page extension available.\n", loglvl);
 		return;
 	}
 
@@ -426,38 +426,38 @@ void __dump_page_owner(struct page *page)
 	mt = gfp_migratetype(gfp_mask);
 
 	if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
-		pr_alert("page_owner info is not present (never set?)\n");
+		printk("%spage_owner info is not present (never set?)\n", loglvl);
 		return;
 	}
 
 	if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
-		pr_alert("page_owner tracks the page as allocated\n");
+		printk("%spage_owner tracks the page as allocated\n", loglvl);
 	else
-		pr_alert("page_owner tracks the page as freed\n");
+		printk("page_owner tracks the page as freed\n", loglvl);
 
-	pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, ts %llu\n",
+	printk("%spage last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, ts %llu\n", loglvl,
 		 page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask,
 		 page_owner->pid, page_owner->ts_nsec);
 
 	handle = READ_ONCE(page_owner->handle);
 	if (!handle) {
-		pr_alert("page_owner allocation stack trace missing\n");
+		printk("%spage_owner allocation stack trace missing\n", loglvl);
 	} else {
 		nr_entries = stack_depot_fetch(handle, &entries);
-		stack_trace_print(entries, nr_entries, 0);
+		__stack_trace_print(entries, nr_entries, 0, loglvl);
 	}
 
 	handle = READ_ONCE(page_owner->free_handle);
 	if (!handle) {
-		pr_alert("page_owner free stack trace missing\n");
+		printk("%spage_owner free stack trace missing\n", loglvl);
 	} else {
 		nr_entries = stack_depot_fetch(handle, &entries);
-		pr_alert("page last free stack trace:\n");
-		stack_trace_print(entries, nr_entries, 0);
+		printk("page last free stack trace:\n", loglvl);
+		__stack_trace_print(entries, nr_entries, 0, loglvl);
 	}
 
 	if (page_owner->last_migrate_reason != -1)
-		pr_alert("page has been migrated, last migrate reason: %s\n",
+		printk("%spage has been migrated, last migrate reason: %s\n", loglvl,
 			migrate_reason_names[page_owner->last_migrate_reason]);
 }
Minchan Kim March 10, 2021, 5:06 p.m. UTC | #22
On Wed, Mar 10, 2021 at 05:46:56PM +0100, Michal Hocko wrote:
> On Wed 10-03-21 08:05:36, Minchan Kim wrote:
> > On Wed, Mar 10, 2021 at 02:07:05PM +0100, Michal Hocko wrote:
> [...]
> > > The is a lot of churn indeed. Have you considered adding $FOO_lglvl
> > > variants for those so that you can use them for your particular case
> > > without affecting most of existing users? Something similar we have
> > > discussed in other email thread regarding lru_add_drain_all?
> > 
> > I thought that way but didn't try since it couldn't make them
> > atomic(For example, other printk place in other context will
> > affect by the $FOO_lglvl).
> 
> I do not follow. I meant something like the following (likely incomplete
> but you should get an idea).

Oh, I thought you wanted to override loglevel temporally.

old_lvl = save_printk_lvl(new level);
dump_page();
restore_printk_lvl(old_lvl);

> 
> diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
> index 3468794f83d2..71b402eb8f78 100644
> --- a/include/linux/page_owner.h
> +++ b/include/linux/page_owner.h
> @@ -14,7 +14,7 @@ extern void __set_page_owner(struct page *page,
>  extern void __split_page_owner(struct page *page, unsigned int nr);
>  extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
>  extern void __set_page_owner_migrate_reason(struct page *page, int reason);
> -extern void __dump_page_owner(struct page *page);
> +extern void __dump_page_owner(struct page *page, const char *loglvl);
>  extern void pagetypeinfo_showmixedcount_print(struct seq_file *m,
>  					pg_data_t *pgdat, struct zone *zone);
>  
> @@ -46,10 +46,10 @@ static inline void set_page_owner_migrate_reason(struct page *page, int reason)
>  	if (static_branch_unlikely(&page_owner_inited))
>  		__set_page_owner_migrate_reason(page, reason);
>  }
> -static inline void dump_page_owner(struct page *page)
> +static inline void dump_page_owner(struct page *page, const char *loglvl)
>  {
>  	if (static_branch_unlikely(&page_owner_inited))
> -		__dump_page_owner(page);
> +		__dump_page_owner(page, loglvl);
>  }
>  #else
>  static inline void reset_page_owner(struct page *page, unsigned int order)
> @@ -69,7 +69,7 @@ static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
>  static inline void set_page_owner_migrate_reason(struct page *page, int reason)
>  {
>  }
> -static inline void dump_page_owner(struct page *page)
> +static inline void dump_page_owner(struct page *page, const char *loglvl)
>  {
>  }
>  #endif /* CONFIG_PAGE_OWNER */
> diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
> index 9f8117c7cfdd..1b13135d9916 100644
> --- a/kernel/stacktrace.c
> +++ b/kernel/stacktrace.c
> @@ -14,6 +14,18 @@
>  #include <linux/kallsyms.h>
>  #include <linux/stacktrace.h>
>  
> +void __stack_trace_print(const unsigned long *entries, unsigned int nr_entries,
> +		       int spacesconst, char *loglvl)
> +{
> +	unsigned int i;
> +
> +	if (WARN_ON(!entries))
> +		return;
> +
> +	for (i = 0; i < nr_entries; i++)
> +		printk("%s%*c%pS\n", loglvl, 1 + spaces, ' ', (void *)entries[i]);
> +}

That's exactly I did with introducing pr_loglevel. I wanted to address
*all places* to use dump_page and stack_trace_print since some folks
might ask me to fix all the broken place all at once. I'm getting tired
with such hassle.

void dump_page(const char *log_lvl, struct page *page, const char *reason)
{
        __dump_page(log_lvl, page, reason);
        dump_page_owner(log_lvl, page);
}
EXPORT_SYMBOL(dump_page);

/**
 * pr_loglevel - Print an loglevel message
 * @level: loglevel
 * @fmt: format string
 * @...: arguments for the format string
 *
 * This macro expands to a printk with @loglevel. It uses pr_fmt() to
 * generate the format string.
 */
#define pr_loglevel(level, fmt, ...) \
        printk("%s" pr_fmt(fmt), level, ##__VA_ARGS__)

void __dump_page(const char *log_lvl, struct page *page, const char *reason)
{
..
..
        if (page_poisoned) {
                pr_loglevel(log_lvl, "page:%px is uninitialized and poisoned", page);
                goto hex_only;
        }
..
}

static inline void dump_page_owner(const char *log_lvl, struct page *page)
{
        if (static_branch_unlikely(&page_owner_inited))
                __dump_page_owner(log_lvl, page);
}

void stack_trace_print(const char *log_lvl, const unsigned long *entries,
                unsigned int nr_entries, int spaces)
{
        unsigned int i;

        if (WARN_ON(!entries))
                return;

        for (i = 0; i < nr_entries; i++)
                pr_loglevel(log_lvl, "%*c%pS\n", 1 + spaces, ' ', (void *)entries[i]);
}
EXPORT_SYMBOL_GPL(stack_trace_print);
Minchan Kim March 10, 2021, 6:07 p.m. UTC | #23
On Wed, Mar 10, 2021 at 09:06:48AM -0800, Minchan Kim wrote:
> On Wed, Mar 10, 2021 at 05:46:56PM +0100, Michal Hocko wrote:
> > On Wed 10-03-21 08:05:36, Minchan Kim wrote:
> > > On Wed, Mar 10, 2021 at 02:07:05PM +0100, Michal Hocko wrote:
> > [...]
> > > > The is a lot of churn indeed. Have you considered adding $FOO_lglvl
> > > > variants for those so that you can use them for your particular case
> > > > without affecting most of existing users? Something similar we have
> > > > discussed in other email thread regarding lru_add_drain_all?
> > > 
> > > I thought that way but didn't try since it couldn't make them
> > > atomic(For example, other printk place in other context will
> > > affect by the $FOO_lglvl).
> > 
> > I do not follow. I meant something like the following (likely incomplete
> > but you should get an idea).
> 
> Oh, I thought you wanted to override loglevel temporally.
> 
> old_lvl = save_printk_lvl(new level);
> dump_page();
> restore_printk_lvl(old_lvl);
> 
> > 
> > diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
> > index 3468794f83d2..71b402eb8f78 100644
> > --- a/include/linux/page_owner.h
> > +++ b/include/linux/page_owner.h
> > @@ -14,7 +14,7 @@ extern void __set_page_owner(struct page *page,
> >  extern void __split_page_owner(struct page *page, unsigned int nr);
> >  extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
> >  extern void __set_page_owner_migrate_reason(struct page *page, int reason);
> > -extern void __dump_page_owner(struct page *page);
> > +extern void __dump_page_owner(struct page *page, const char *loglvl);
> >  extern void pagetypeinfo_showmixedcount_print(struct seq_file *m,
> >  					pg_data_t *pgdat, struct zone *zone);
> >  
> > @@ -46,10 +46,10 @@ static inline void set_page_owner_migrate_reason(struct page *page, int reason)
> >  	if (static_branch_unlikely(&page_owner_inited))
> >  		__set_page_owner_migrate_reason(page, reason);
> >  }
> > -static inline void dump_page_owner(struct page *page)
> > +static inline void dump_page_owner(struct page *page, const char *loglvl)
> >  {
> >  	if (static_branch_unlikely(&page_owner_inited))
> > -		__dump_page_owner(page);
> > +		__dump_page_owner(page, loglvl);
> >  }
> >  #else
> >  static inline void reset_page_owner(struct page *page, unsigned int order)
> > @@ -69,7 +69,7 @@ static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
> >  static inline void set_page_owner_migrate_reason(struct page *page, int reason)
> >  {
> >  }
> > -static inline void dump_page_owner(struct page *page)
> > +static inline void dump_page_owner(struct page *page, const char *loglvl)
> >  {
> >  }
> >  #endif /* CONFIG_PAGE_OWNER */
> > diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
> > index 9f8117c7cfdd..1b13135d9916 100644
> > --- a/kernel/stacktrace.c
> > +++ b/kernel/stacktrace.c
> > @@ -14,6 +14,18 @@
> >  #include <linux/kallsyms.h>
> >  #include <linux/stacktrace.h>
> >  
> > +void __stack_trace_print(const unsigned long *entries, unsigned int nr_entries,
> > +		       int spacesconst, char *loglvl)
> > +{
> > +	unsigned int i;
> > +
> > +	if (WARN_ON(!entries))
> > +		return;
> > +
> > +	for (i = 0; i < nr_entries; i++)
> > +		printk("%s%*c%pS\n", loglvl, 1 + spaces, ' ', (void *)entries[i]);
> > +}
> 
> That's exactly I did with introducing pr_loglevel. I wanted to address
> *all places* to use dump_page and stack_trace_print since some folks
> might ask me to fix all the broken place all at once. I'm getting tired
> with such hassle.
> 
> void dump_page(const char *log_lvl, struct page *page, const char *reason)
> {
>         __dump_page(log_lvl, page, reason);
>         dump_page_owner(log_lvl, page);
> }
> EXPORT_SYMBOL(dump_page);

Since it's good to have regardless of the patch, I posted next revision
with removeing ratelimit and put something more in description to
proceed the work.

Posted v3 - https://lore.kernel.org/linux-mm/20210310180104.517886-1-minchan@kernel.org/
diff mbox series

Patch

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3e4b29ee2b1e..bb0aeca2069c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8453,6 +8453,34 @@  static unsigned long pfn_max_align_up(unsigned long pfn)
 				pageblock_nr_pages));
 }
 
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+	(defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
+static DEFINE_RATELIMIT_STATE(alloc_contig_ratelimit_state,
+		DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
+int alloc_contig_ratelimit(void)
+{
+	return __ratelimit(&alloc_contig_ratelimit_state);
+}
+
+void dump_migrate_failure_pages(struct list_head *page_list)
+{
+	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,
+			"migrate failure");
+	if (DYNAMIC_DEBUG_BRANCH(descriptor) &&
+			alloc_contig_ratelimit()) {
+		struct page *page;
+
+		WARN(1, "failed callstack");
+		list_for_each_entry(page, page_list, lru)
+			dump_page(page, "migration failure");
+	}
+}
+#else
+static inline void dump_migrate_failure_pages(struct list_head *page_list)
+{
+}
+#endif
+
 /* [start, end) must belong to a single zone. */
 static int __alloc_contig_migrate_range(struct compact_control *cc,
 					unsigned long start, unsigned long end)
@@ -8496,6 +8524,7 @@  static int __alloc_contig_migrate_range(struct compact_control *cc,
 				NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE);
 	}
 	if (ret < 0) {
+		dump_migrate_failure_pages(&cc->migratepages);
 		putback_movable_pages(&cc->migratepages);
 		return ret;
 	}