diff mbox series

[v4,1/2] panic: add option to dump task maps info in panic_print

Message ID 20240924074341.37272-1-qiwu.chen@transsion.com (mailing list archive)
State New
Headers show
Series [v4,1/2] panic: add option to dump task maps info in panic_print | expand

Commit Message

chenqiwu Sept. 24, 2024, 7:43 a.m. UTC
Currently, it's hard to debug panic issues caused by kill init,
since there is no debug info from user mode in current panic msg
such as the user_regs and maps info.

This patch adds an option to dump task maps info in panic_print.

- changes history:
v3:
https://lore.kernel.org/all/20240922095504.7182-1-qiwu.chen@transsion.com/
https://lore.kernel.org/all/20240922095504.7182-2-qiwu.chen@transsion.com/
v2: https://lore.kernel.org/all/20231110031553.33186-1-qiwu.chen@transsion.com/
v1: https://lore.kernel.org/all/20231110022720.GA3087@rlk/

Signed-off-by: qiwu.chen <qiwu.chen@transsion.com>
---
 .../admin-guide/kernel-parameters.txt         |  1 +
 Documentation/admin-guide/sysctl/kernel.rst   |  1 +
 fs/proc/task_mmu.c                            |  3 +-
 include/linux/mm.h                            |  4 ++
 kernel/panic.c                                | 52 +++++++++++++++++++
 5 files changed, 60 insertions(+), 1 deletion(-)

Comments

Oleg Nesterov Sept. 24, 2024, 11:33 a.m. UTC | #1
On 09/24, qiwu.chen wrote:
>
> +EXPORT_SYMBOL(get_vma_name);

Why?

> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3566,6 +3566,10 @@ static inline bool range_in_vma(struct vm_area_struct *vma,
>  #ifdef CONFIG_MMU
>  pgprot_t vm_get_page_prot(unsigned long vm_flags);
>  void vma_set_page_prot(struct vm_area_struct *vma);
> +void get_vma_name(struct vm_area_struct *vma,
> +			 const struct path **path,
> +			 const char **name,
> +			 const char **name_fmt);
>  #else

You didn't move get_vma_name() from fs/proc/task_mmu.c, so it also depends
on CONFIG_PROC_FS.

> +/*
> + * This function is called in panic proccess if the PANIC_PRINT_TASK_MAPS_INFO
> + * flag is specified in panic_print, which is helpful to debug panic issues due
> + * to an unhandled falut in user mode such as kill init.
> + */
> +static void dump_task_maps_info(struct task_struct *tsk)
> +{
> +	struct pt_regs *user_ret = task_pt_regs(tsk);
> +	struct mm_struct *mm = tsk->mm;
> +	struct vm_area_struct *vma;
> +
> +	if (!mm || !user_mode(user_ret))
> +		return;
> +
> +	pr_info("Dump task %s:%d maps start\n", tsk->comm, task_pid_nr(tsk));
> +	mmap_read_lock(mm);
> +	VMA_ITERATOR(vmi, mm, 0);
> +	for_each_vma(vmi, vma) {
> +		int flags = vma->vm_flags;
> +		unsigned long long pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
> +		const struct path *path;
> +		const char *name_fmt, *name;
> +		char name_buf[SZ_256];
> +
> +		get_vma_name(vma, &path, &name, &name_fmt);

So this code won't compile if CONFIG_MMU=n ?

> +		if (path) {
> +			name = d_path(path, name_buf, sizeof(name_buf));
> +			name = IS_ERR(name) ? "?" : name;

perhaps this needs mangle_path() ...

> +		} else if (name || name_fmt) {
> +			snprintf(name_buf, sizeof(name_buf), name_fmt ?: "%s", name);
> +			name = name_buf;
> +		}

Why not

		} else if (name_fmt) {
			snprintf(name_buf, sizeof(name_buf), name_fmt, name);
			name = name_buf;
		}
?

> +		if (name)
> +			pr_info("%08lx-%08lx %c%c%c%c %08llx %s\n",
> +				vma->vm_start, vma->vm_end,
> +				flags & VM_READ ? 'r' : '-',
> +				flags & VM_WRITE ? 'w' : '-',
> +				flags & VM_EXEC ? 'x' : '-',
> +				flags & VM_MAYSHARE ? 's' : 'p',
> +				pgoff, name);

I don't really understand why you skip vma if !name...

Oleg.
kernel test robot Sept. 24, 2024, 3:06 p.m. UTC | #2
Hi qiwu.chen,

kernel test robot noticed the following build errors:

[auto build test ERROR on arm64/for-next/core]
[also build test ERROR on brauner-vfs/vfs.all akpm-mm/mm-everything linus/master v6.11 next-20240924]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/qiwu-chen/arm64-show-signal-info-for-global-init/20240924-154508
base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
patch link:    https://lore.kernel.org/r/20240924074341.37272-1-qiwu.chen%40transsion.com
patch subject: [PATCH v4 1/2] panic: add option to dump task maps info in panic_print
config: arm-stm32_defconfig (https://download.01.org/0day-ci/archive/20240924/202409242252.cefLq5jM-lkp@intel.com/config)
compiler: arm-linux-gnueabi-gcc (GCC) 14.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240924/202409242252.cefLq5jM-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202409242252.cefLq5jM-lkp@intel.com/

All errors (new ones prefixed by >>):

   kernel/panic.c: In function 'dump_task_maps_info':
>> kernel/panic.c:235:17: error: implicit declaration of function 'get_vma_name'; did you mean 'arch_vma_name'? [-Wimplicit-function-declaration]
     235 |                 get_vma_name(vma, &path, &name, &name_fmt);
         |                 ^~~~~~~~~~~~
         |                 arch_vma_name


vim +235 kernel/panic.c

   210	
   211	/*
   212	 * This function is called in panic proccess if the PANIC_PRINT_TASK_MAPS_INFO
   213	 * flag is specified in panic_print, which is helpful to debug panic issues due
   214	 * to an unhandled falut in user mode such as kill init.
   215	 */
   216	static void dump_task_maps_info(struct task_struct *tsk)
   217	{
   218		struct pt_regs *user_ret = task_pt_regs(tsk);
   219		struct mm_struct *mm = tsk->mm;
   220		struct vm_area_struct *vma;
   221	
   222		if (!mm || !user_mode(user_ret))
   223			return;
   224	
   225		pr_info("Dump task %s:%d maps start\n", tsk->comm, task_pid_nr(tsk));
   226		mmap_read_lock(mm);
   227		VMA_ITERATOR(vmi, mm, 0);
   228		for_each_vma(vmi, vma) {
   229			int flags = vma->vm_flags;
   230			unsigned long long pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
   231			const struct path *path;
   232			const char *name_fmt, *name;
   233			char name_buf[SZ_256];
   234	
 > 235			get_vma_name(vma, &path, &name, &name_fmt);
   236			if (path) {
   237				name = d_path(path, name_buf, sizeof(name_buf));
   238				name = IS_ERR(name) ? "?" : name;
   239			} else if (name || name_fmt) {
   240				snprintf(name_buf, sizeof(name_buf), name_fmt ?: "%s", name);
   241				name = name_buf;
   242			}
   243	
   244			if (name)
   245				pr_info("%08lx-%08lx %c%c%c%c %08llx %s\n",
   246					vma->vm_start, vma->vm_end,
   247					flags & VM_READ ? 'r' : '-',
   248					flags & VM_WRITE ? 'w' : '-',
   249					flags & VM_EXEC ? 'x' : '-',
   250					flags & VM_MAYSHARE ? 's' : 'p',
   251					pgoff, name);
   252	
   253		}
   254		mmap_read_unlock(mm);
   255		pr_info("Dump task %s:%d maps end\n", tsk->comm, task_pid_nr(tsk));
   256	}
   257
kernel test robot Sept. 24, 2024, 3:06 p.m. UTC | #3
Hi qiwu.chen,

kernel test robot noticed the following build errors:

[auto build test ERROR on arm64/for-next/core]
[also build test ERROR on brauner-vfs/vfs.all akpm-mm/mm-everything linus/master v6.11 next-20240924]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/qiwu-chen/arm64-show-signal-info-for-global-init/20240924-154508
base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
patch link:    https://lore.kernel.org/r/20240924074341.37272-1-qiwu.chen%40transsion.com
patch subject: [PATCH v4 1/2] panic: add option to dump task maps info in panic_print
config: arm-allnoconfig (https://download.01.org/0day-ci/archive/20240924/202409242208.SqjERCDw-lkp@intel.com/config)
compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 8663a75fa2f31299ab8d1d90288d9df92aadee88)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240924/202409242208.SqjERCDw-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202409242208.SqjERCDw-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from kernel/panic.c:15:
   In file included from include/linux/kgdb.h:19:
   In file included from include/linux/kprobes.h:28:
   In file included from include/linux/ftrace.h:13:
   In file included from include/linux/kallsyms.h:13:
   In file included from include/linux/mm.h:2232:
   include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
>> kernel/panic.c:235:3: error: call to undeclared function 'get_vma_name'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     235 |                 get_vma_name(vma, &path, &name, &name_fmt);
         |                 ^
   1 warning and 1 error generated.


vim +/get_vma_name +235 kernel/panic.c

   210	
   211	/*
   212	 * This function is called in panic proccess if the PANIC_PRINT_TASK_MAPS_INFO
   213	 * flag is specified in panic_print, which is helpful to debug panic issues due
   214	 * to an unhandled falut in user mode such as kill init.
   215	 */
   216	static void dump_task_maps_info(struct task_struct *tsk)
   217	{
   218		struct pt_regs *user_ret = task_pt_regs(tsk);
   219		struct mm_struct *mm = tsk->mm;
   220		struct vm_area_struct *vma;
   221	
   222		if (!mm || !user_mode(user_ret))
   223			return;
   224	
   225		pr_info("Dump task %s:%d maps start\n", tsk->comm, task_pid_nr(tsk));
   226		mmap_read_lock(mm);
   227		VMA_ITERATOR(vmi, mm, 0);
   228		for_each_vma(vmi, vma) {
   229			int flags = vma->vm_flags;
   230			unsigned long long pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
   231			const struct path *path;
   232			const char *name_fmt, *name;
   233			char name_buf[SZ_256];
   234	
 > 235			get_vma_name(vma, &path, &name, &name_fmt);
   236			if (path) {
   237				name = d_path(path, name_buf, sizeof(name_buf));
   238				name = IS_ERR(name) ? "?" : name;
   239			} else if (name || name_fmt) {
   240				snprintf(name_buf, sizeof(name_buf), name_fmt ?: "%s", name);
   241				name = name_buf;
   242			}
   243	
   244			if (name)
   245				pr_info("%08lx-%08lx %c%c%c%c %08llx %s\n",
   246					vma->vm_start, vma->vm_end,
   247					flags & VM_READ ? 'r' : '-',
   248					flags & VM_WRITE ? 'w' : '-',
   249					flags & VM_EXEC ? 'x' : '-',
   250					flags & VM_MAYSHARE ? 's' : 'p',
   251					pgoff, name);
   252	
   253		}
   254		mmap_read_unlock(mm);
   255		pr_info("Dump task %s:%d maps end\n", tsk->comm, task_pid_nr(tsk));
   256	}
   257
chenqiwu Sept. 25, 2024, 8:27 a.m. UTC | #4
On Tue, Sep 24, 2024 at 01:33:54PM +0200, Oleg Nesterov wrote:
> 
> You didn't move get_vma_name() from fs/proc/task_mmu.c, so it also depends
> on CONFIG_PROC_FS.
>
Sure, thanks for your advice. it should be moved to mm tree without CONFIG_PROC_FS
dependence.

> > +		if (path) {
> > +			name = d_path(path, name_buf, sizeof(name_buf));
> > +			name = IS_ERR(name) ? "?" : name;
> 
I think this is an easier way to get file path name which deals with IS_ERR(name) case.
> perhaps this needs mangle_path() ...
>

> > +		} else if (name || name_fmt) {
> > +			snprintf(name_buf, sizeof(name_buf), name_fmt ?: "%s", name);
> > +			name = name_buf;
> > +		}
> 
This refers to the code section of do_procmap_query().

> Why not
> 
> 		} else if (name_fmt) {
> 			snprintf(name_buf, sizeof(name_buf), name_fmt, name);
> 			name = name_buf;
> 		}
> ?
>
Sure, this is a better way to deal with name_fmt case.

> > +		if (name)
> > +			pr_info("%08lx-%08lx %c%c%c%c %08llx %s\n",
> > +				vma->vm_start, vma->vm_end,
> > +				flags & VM_READ ? 'r' : '-',
> > +				flags & VM_WRITE ? 'w' : '-',
> > +				flags & VM_EXEC ? 'x' : '-',
> > +				flags & VM_MAYSHARE ? 's' : 'p',
> > +				pgoff, name);
> 
> I don't really understand why you skip vma if !name...
> 
Well, the vma without name seems no sense for debugging, skipping it can reduce the
maps space. But the disadvantage is debuggers cannot get full maps of task,
perhaps we shouldn't skip it.

Thanks
Qiwu
Oleg Nesterov Sept. 25, 2024, 12:19 p.m. UTC | #5
On 09/25, chenqiwu wrote:
>
> On Tue, Sep 24, 2024 at 01:33:54PM +0200, Oleg Nesterov wrote:
> >
> > > +		if (path) {
> > > +			name = d_path(path, name_buf, sizeof(name_buf));
> > > +			name = IS_ERR(name) ? "?" : name;
> >
> I think this is an easier way to get file path name which deals with IS_ERR(name) case.
> > perhaps this needs mangle_path() ...

Sorry, I don't understand your reply...

Oleg.
chenqiwu Sept. 26, 2024, 3:30 a.m. UTC | #6
On Wed, Sep 25, 2024 at 02:19:58PM +0200, Oleg Nesterov wrote:
> On 09/25, chenqiwu wrote:
> >
> > On Tue, Sep 24, 2024 at 01:33:54PM +0200, Oleg Nesterov wrote:
> > >
> > > > +		if (path) {
> > > > +			name = d_path(path, name_buf, sizeof(name_buf));
> > > > +			name = IS_ERR(name) ? "?" : name;
> > >
> > I think this is an easier way to get file path name which deals with IS_ERR(name) case.
> > > perhaps this needs mangle_path() ...
> 
> Sorry, I don't understand your reply...
> 
I means we can dump the file path name by d_path() directly without escaping "\n"
by mangle_path(). I try to test the patch in arm64 qemu, it can show the same file
path name as /proc/$pid/maps.

Thanks
Qiwu
Lorenzo Stoakes Oct. 1, 2024, 11:23 a.m. UTC | #7
+ Liam, Vlastimil

Sigh I hate to do it (honestly!), but NACK.

But, wait, I'll explain :)


I don't know what's going on with this thread, you have a 4th iteration of
a series I've never seen before that plays with VMAs, no cover letter, only
this one cc's linux-mm.

This is making my life really hard to review your work, you're playing with
VMAs but only partly cc'ing...

If you go for a v5, PLEASE create a cover letter and reply patches to
that. And cc everyone involve on all patches.

Something like

$ git format-patch -s --cover-letter --thread ...

Would work great for this, or even better b4 :)


On Tue, Sep 24, 2024 at 03:43:40PM GMT, qiwu.chen wrote:
> Currently, it's hard to debug panic issues caused by kill init,
> since there is no debug info from user mode in current panic msg
> such as the user_regs and maps info.

You haven't explained why printing all the VMAs for a specific process
helps?

We have dump_mm() that provides a sensible blob of mm information in such
situations already. Yes it's behind CONFIG_DEBUG_VM, that's on purpose.

You're STILL failing to provide a user who you are exporting symbols to.

>
> This patch adds an option to dump task maps info in panic_print.
>
> - changes history:
> v3:
> https://lore.kernel.org/all/20240922095504.7182-1-qiwu.chen@transsion.com/
> https://lore.kernel.org/all/20240922095504.7182-2-qiwu.chen@transsion.com/
> v2: https://lore.kernel.org/all/20231110031553.33186-1-qiwu.chen@transsion.com/
> v1: https://lore.kernel.org/all/20231110022720.GA3087@rlk/
>
> Signed-off-by: qiwu.chen <qiwu.chen@transsion.com>
> ---
>  .../admin-guide/kernel-parameters.txt         |  1 +
>  Documentation/admin-guide/sysctl/kernel.rst   |  1 +
>  fs/proc/task_mmu.c                            |  3 +-
>  include/linux/mm.h                            |  4 ++
>  kernel/panic.c                                | 52 +++++++++++++++++++
>  5 files changed, 60 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 8337d0fed311..f76709deef6c 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -4253,6 +4253,7 @@
>  			bit 5: print all printk messages in buffer
>  			bit 6: print all CPUs backtrace (if available in the arch)
>  			bit 7: print only tasks in uninterruptible (blocked) state
> +			bit 8: print task maps info

Woefully, woefully inadequate documentation.

>  			*Be aware* that this option may print a _lot_ of lines,
>  			so there are risks of losing older messages in the log.
>  			Use this option carefully, maybe worth to setup a
> diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
> index f8bc1630eba0..558e365b76a9 100644
> --- a/Documentation/admin-guide/sysctl/kernel.rst
> +++ b/Documentation/admin-guide/sysctl/kernel.rst
> @@ -872,6 +872,7 @@ bit 4  print ftrace buffer
>  bit 5  print all printk messages in buffer
>  bit 6  print all CPUs backtrace (if available in the arch)
>  bit 7  print only tasks in uninterruptible (blocked) state
> +bit 8  print task maps info

Equally woefully inadequate.

>  =====  ============================================
>
>  So for example to print tasks and memory info on panic, user can::
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index ade74a396968..37169ae36542 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -240,7 +240,7 @@ static int do_maps_open(struct inode *inode, struct file *file,
>  				sizeof(struct proc_maps_private));
>  }
>
> -static void get_vma_name(struct vm_area_struct *vma,
> +void get_vma_name(struct vm_area_struct *vma,
>  			 const struct path **path,
>  			 const char **name,
>  			 const char **name_fmt)
> @@ -300,6 +300,7 @@ static void get_vma_name(struct vm_area_struct *vma,
>  		return;
>  	}
>  }
> +EXPORT_SYMBOL(get_vma_name);

Please stop exporting symbols arbitrarily. Oleg already raised this.

I aleady explained at [0] that I am not in favour of you exporting this and
you have failed to give a justification.

So NACK on this alone.

[0]: https://lore.kernel.org/linux-mm/20240929093212.40449-1-qiwu.chen@transsion.com/

>
>  static void show_vma_header_prefix(struct seq_file *m,
>  				   unsigned long start, unsigned long end,
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 13bff7cf03b7..2fa403aae1de 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3566,6 +3566,10 @@ static inline bool range_in_vma(struct vm_area_struct *vma,
>  #ifdef CONFIG_MMU
>  pgprot_t vm_get_page_prot(unsigned long vm_flags);
>  void vma_set_page_prot(struct vm_area_struct *vma);
> +void get_vma_name(struct vm_area_struct *vma,
> +			 const struct path **path,
> +			 const char **name,
> +			 const char **name_fmt);
>  #else
>  static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
>  {
> diff --git a/kernel/panic.c b/kernel/panic.c
> index 753d12f4dc8f..2217e1d0ad44 100644
> --- a/kernel/panic.c
> +++ b/kernel/panic.c
> @@ -77,6 +77,8 @@ EXPORT_SYMBOL_GPL(panic_timeout);
>  #define PANIC_PRINT_ALL_PRINTK_MSG	0x00000020
>  #define PANIC_PRINT_ALL_CPU_BT		0x00000040
>  #define PANIC_PRINT_BLOCKED_TASKS	0x00000080
> +#define PANIC_PRINT_TASK_MAPS_INFO	0x00000100
> +
>  unsigned long panic_print;
>
>  ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
> @@ -208,6 +210,53 @@ void nmi_panic(struct pt_regs *regs, const char *msg)
>  }
>  EXPORT_SYMBOL(nmi_panic);
>
> +/*
> + * This function is called in panic proccess if the PANIC_PRINT_TASK_MAPS_INFO
> + * flag is specified in panic_print, which is helpful to debug panic issues due
> + * to an unhandled falut in user mode such as kill init.
> + */
> +static void dump_task_maps_info(struct task_struct *tsk)
> +{
> +	struct pt_regs *user_ret = task_pt_regs(tsk);
> +	struct mm_struct *mm = tsk->mm;
> +	struct vm_area_struct *vma;
> +
> +	if (!mm || !user_mode(user_ret))
> +		return;
> +
> +	pr_info("Dump task %s:%d maps start\n", tsk->comm, task_pid_nr(tsk));
> +	mmap_read_lock(mm);
> +	VMA_ITERATOR(vmi, mm, 0);
> +	for_each_vma(vmi, vma) {
> +		int flags = vma->vm_flags;
> +		unsigned long long pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
> +		const struct path *path;
> +		const char *name_fmt, *name;
> +		char name_buf[SZ_256];
> +
> +		get_vma_name(vma, &path, &name, &name_fmt);
> +		if (path) {
> +			name = d_path(path, name_buf, sizeof(name_buf));
> +			name = IS_ERR(name) ? "?" : name;
> +		} else if (name || name_fmt) {
> +			snprintf(name_buf, sizeof(name_buf), name_fmt ?: "%s", name);
> +			name = name_buf;
> +		}
> +
> +		if (name)
> +			pr_info("%08lx-%08lx %c%c%c%c %08llx %s\n",
> +				vma->vm_start, vma->vm_end,
> +				flags & VM_READ ? 'r' : '-',
> +				flags & VM_WRITE ? 'w' : '-',
> +				flags & VM_EXEC ? 'x' : '-',
> +				flags & VM_MAYSHARE ? 's' : 'p',
> +				pgoff, name);
> +
> +	}
> +	mmap_read_unlock(mm);
> +	pr_info("Dump task %s:%d maps end\n", tsk->comm, task_pid_nr(tsk));
> +}

Surely taking the mm sem on panic (!) is a really bad idea? Not sure if we
do this elsewhere but the lock might deadlock, you're not using a 'try'
variant or a killable variant, you're just saying 'hey wait forever if we
can't get this even though I know the kernel is inconsistent'. That's just
crazy to me?

Also PLEASE PLEASE NO. Do not put arbitrary mm, VMA code in a random file,
this is just not acceptable.

All mm code needs to live in mm files. As I said in the other thread, we
are planning to change how we lock things around /proc/$pid/maps
iterations, and sorry but having a random other place do this with whatever
locking is applied is just a big, big no.

> +
>  static void panic_print_sys_info(bool console_flush)
>  {
>  	if (console_flush) {
> @@ -233,6 +282,9 @@ static void panic_print_sys_info(bool console_flush)
>
>  	if (panic_print & PANIC_PRINT_BLOCKED_TASKS)
>  		show_state_filter(TASK_UNINTERRUPTIBLE);
> +
> +	if (panic_print & PANIC_PRINT_TASK_MAPS_INFO)
> +		dump_task_maps_info(current);
>  }
>
>  void check_panic_on_warn(const char *origin)
> --
> 2.25.1
>
>
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 8337d0fed311..f76709deef6c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4253,6 +4253,7 @@ 
 			bit 5: print all printk messages in buffer
 			bit 6: print all CPUs backtrace (if available in the arch)
 			bit 7: print only tasks in uninterruptible (blocked) state
+			bit 8: print task maps info
 			*Be aware* that this option may print a _lot_ of lines,
 			so there are risks of losing older messages in the log.
 			Use this option carefully, maybe worth to setup a
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index f8bc1630eba0..558e365b76a9 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -872,6 +872,7 @@  bit 4  print ftrace buffer
 bit 5  print all printk messages in buffer
 bit 6  print all CPUs backtrace (if available in the arch)
 bit 7  print only tasks in uninterruptible (blocked) state
+bit 8  print task maps info
 =====  ============================================
 
 So for example to print tasks and memory info on panic, user can::
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ade74a396968..37169ae36542 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -240,7 +240,7 @@  static int do_maps_open(struct inode *inode, struct file *file,
 				sizeof(struct proc_maps_private));
 }
 
-static void get_vma_name(struct vm_area_struct *vma,
+void get_vma_name(struct vm_area_struct *vma,
 			 const struct path **path,
 			 const char **name,
 			 const char **name_fmt)
@@ -300,6 +300,7 @@  static void get_vma_name(struct vm_area_struct *vma,
 		return;
 	}
 }
+EXPORT_SYMBOL(get_vma_name);
 
 static void show_vma_header_prefix(struct seq_file *m,
 				   unsigned long start, unsigned long end,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 13bff7cf03b7..2fa403aae1de 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3566,6 +3566,10 @@  static inline bool range_in_vma(struct vm_area_struct *vma,
 #ifdef CONFIG_MMU
 pgprot_t vm_get_page_prot(unsigned long vm_flags);
 void vma_set_page_prot(struct vm_area_struct *vma);
+void get_vma_name(struct vm_area_struct *vma,
+			 const struct path **path,
+			 const char **name,
+			 const char **name_fmt);
 #else
 static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
 {
diff --git a/kernel/panic.c b/kernel/panic.c
index 753d12f4dc8f..2217e1d0ad44 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -77,6 +77,8 @@  EXPORT_SYMBOL_GPL(panic_timeout);
 #define PANIC_PRINT_ALL_PRINTK_MSG	0x00000020
 #define PANIC_PRINT_ALL_CPU_BT		0x00000040
 #define PANIC_PRINT_BLOCKED_TASKS	0x00000080
+#define PANIC_PRINT_TASK_MAPS_INFO	0x00000100
+
 unsigned long panic_print;
 
 ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
@@ -208,6 +210,53 @@  void nmi_panic(struct pt_regs *regs, const char *msg)
 }
 EXPORT_SYMBOL(nmi_panic);
 
+/*
+ * This function is called in panic proccess if the PANIC_PRINT_TASK_MAPS_INFO
+ * flag is specified in panic_print, which is helpful to debug panic issues due
+ * to an unhandled falut in user mode such as kill init.
+ */
+static void dump_task_maps_info(struct task_struct *tsk)
+{
+	struct pt_regs *user_ret = task_pt_regs(tsk);
+	struct mm_struct *mm = tsk->mm;
+	struct vm_area_struct *vma;
+
+	if (!mm || !user_mode(user_ret))
+		return;
+
+	pr_info("Dump task %s:%d maps start\n", tsk->comm, task_pid_nr(tsk));
+	mmap_read_lock(mm);
+	VMA_ITERATOR(vmi, mm, 0);
+	for_each_vma(vmi, vma) {
+		int flags = vma->vm_flags;
+		unsigned long long pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
+		const struct path *path;
+		const char *name_fmt, *name;
+		char name_buf[SZ_256];
+
+		get_vma_name(vma, &path, &name, &name_fmt);
+		if (path) {
+			name = d_path(path, name_buf, sizeof(name_buf));
+			name = IS_ERR(name) ? "?" : name;
+		} else if (name || name_fmt) {
+			snprintf(name_buf, sizeof(name_buf), name_fmt ?: "%s", name);
+			name = name_buf;
+		}
+
+		if (name)
+			pr_info("%08lx-%08lx %c%c%c%c %08llx %s\n",
+				vma->vm_start, vma->vm_end,
+				flags & VM_READ ? 'r' : '-',
+				flags & VM_WRITE ? 'w' : '-',
+				flags & VM_EXEC ? 'x' : '-',
+				flags & VM_MAYSHARE ? 's' : 'p',
+				pgoff, name);
+
+	}
+	mmap_read_unlock(mm);
+	pr_info("Dump task %s:%d maps end\n", tsk->comm, task_pid_nr(tsk));
+}
+
 static void panic_print_sys_info(bool console_flush)
 {
 	if (console_flush) {
@@ -233,6 +282,9 @@  static void panic_print_sys_info(bool console_flush)
 
 	if (panic_print & PANIC_PRINT_BLOCKED_TASKS)
 		show_state_filter(TASK_UNINTERRUPTIBLE);
+
+	if (panic_print & PANIC_PRINT_TASK_MAPS_INFO)
+		dump_task_maps_info(current);
 }
 
 void check_panic_on_warn(const char *origin)