diff mbox series

[1/2] ARC: show_regs: avoid page allocator

Message ID 1545159239-30628-2-git-send-email-vgupta@synopsys.com (mailing list archive)
State New, archived
Headers show
Series ARC show_regs fixes | expand

Commit Message

Vineet Gupta Dec. 18, 2018, 6:53 p.m. UTC
Use on-stack smaller buffers instead of dynamic pages.

The motivation for this change was to address lockdep splat when
signal handling code calls show_regs (with preemption disabled) and
ARC show_regs calls into sleepable page allocator.

| potentially unexpected fatal signal 11.
| BUG: sleeping function called from invalid context at ../mm/page_alloc.c:4317
| in_atomic(): 1, irqs_disabled(): 0, pid: 57, name: segv
| no locks held by segv/57.
| Preemption disabled at:
| [<8182f17e>] get_signal+0x4a6/0x7c4
| CPU: 0 PID: 57 Comm: segv Not tainted 4.17.0+ #23
|
| Stack Trace:
|  arc_unwind_core.constprop.1+0xd0/0xf4
|  __might_sleep+0x1f6/0x234
|  __get_free_pages+0x174/0xca0
|  show_regs+0x22/0x330
|  get_signal+0x4ac/0x7c4     # print_fatal_signals() -> preempt_disable()
|  do_signal+0x30/0x224
|  resume_user_mode_begin+0x90/0xd8

Despite this, lockdep still barfs (see next change), but this patch
still has merit as in we use smaller/localized buffers now and there's
less instructoh trace to sift thru when debugging pesky issues.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/troubleshoot.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

Comments

Eugeniy Paltsev Dec. 19, 2018, 5:04 p.m. UTC | #1
Hi Vineet,

Just curious: isn't that enough to use GFP_NOWAIT instead
of GFP_KERNEL when we allocate page in show_regs()?

As I can see x86 use print_vma_addr() in their show_signal_msg()
function which allocate page with __get_free_page(GFP_NOWAIT);

On Tue, 2018-12-18 at 10:53 -0800, Vineet Gupta wrote:
> Use on-stack smaller buffers instead of dynamic pages.
> 
> The motivation for this change was to address lockdep splat when
> signal handling code calls show_regs (with preemption disabled) and
> ARC show_regs calls into sleepable page allocator.
> 
> > potentially unexpected fatal signal 11.
> > BUG: sleeping function called from invalid context at ../mm/page_alloc.c:4317
> > in_atomic(): 1, irqs_disabled(): 0, pid: 57, name: segv
> > no locks held by segv/57.
> > Preemption disabled at:
> > [<8182f17e>] get_signal+0x4a6/0x7c4
> > CPU: 0 PID: 57 Comm: segv Not tainted 4.17.0+ #23
> > 
> > Stack Trace:
> >  arc_unwind_core.constprop.1+0xd0/0xf4
> >  __might_sleep+0x1f6/0x234
> >  __get_free_pages+0x174/0xca0
> >  show_regs+0x22/0x330
> >  get_signal+0x4ac/0x7c4     # print_fatal_signals() -> preempt_disable()
> >  do_signal+0x30/0x224
> >  resume_user_mode_begin+0x90/0xd8
> 
> Despite this, lockdep still barfs (see next change), but this patch
> still has merit as in we use smaller/localized buffers now and there's
> less instructoh trace to sift thru when debugging pesky issues.
> 
> Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
> ---
>  arch/arc/kernel/troubleshoot.c | 22 +++++++++-------------
>  1 file changed, 9 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
> index e8d9fb452346..2885bec71fb8 100644
> --- a/arch/arc/kernel/troubleshoot.c
> +++ b/arch/arc/kernel/troubleshoot.c
> @@ -58,11 +58,12 @@ static void show_callee_regs(struct callee_regs *cregs)
>  	print_reg_file(&(cregs->r13), 13);
>  }
>  
> -static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
> +static void print_task_path_n_nm(struct task_struct *tsk)
>  {
>  	char *path_nm = NULL;
>  	struct mm_struct *mm;
>  	struct file *exe_file;
> +	char buf[256];
>  
>  	mm = get_task_mm(tsk);
>  	if (!mm)
> @@ -80,10 +81,9 @@ static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
>  	pr_info("Path: %s\n", !IS_ERR(path_nm) ? path_nm : "?");
>  }
>  
> -static void show_faulting_vma(unsigned long address, char *buf)
> +static void show_faulting_vma(unsigned long address)
>  {
>  	struct vm_area_struct *vma;
> -	char *nm = buf;
>  	struct mm_struct *active_mm = current->active_mm;
>  
>  	/* can't use print_vma_addr() yet as it doesn't check for
> @@ -96,8 +96,11 @@ static void show_faulting_vma(unsigned long address, char *buf)
>  	 * if the container VMA is not found
>  	 */
>  	if (vma && (vma->vm_start <= address)) {
> +		char buf[256];
> +		char *nm = "?";
> +
>  		if (vma->vm_file) {
> -			nm = file_path(vma->vm_file, buf, PAGE_SIZE - 1);
> +			nm = file_path(vma->vm_file, buf, 256-1);
>  			if (IS_ERR(nm))
>  				nm = "?";
>  		}
> @@ -173,13 +176,8 @@ void show_regs(struct pt_regs *regs)
>  {
>  	struct task_struct *tsk = current;
>  	struct callee_regs *cregs;
> -	char *buf;
> -
> -	buf = (char *)__get_free_page(GFP_KERNEL);
> -	if (!buf)
> -		return;
>  
> -	print_task_path_n_nm(tsk, buf);
> +	print_task_path_n_nm(tsk);
>  	show_regs_print_info(KERN_INFO);
>  
>  	show_ecr_verbose(regs);
> @@ -189,7 +187,7 @@ void show_regs(struct pt_regs *regs)
>  		(void *)regs->blink, (void *)regs->ret);
>  
>  	if (user_mode(regs))
> -		show_faulting_vma(regs->ret, buf); /* faulting code, not data */
> +		show_faulting_vma(regs->ret); /* faulting code, not data */
>  
>  	pr_info("[STAT32]: 0x%08lx", regs->status32);
>  
> @@ -221,8 +219,6 @@ void show_regs(struct pt_regs *regs)
>  	cregs = (struct callee_regs *)current->thread.callee_reg;
>  	if (cregs)
>  		show_callee_regs(cregs);
> -
> -	free_page((unsigned long)buf);
>  }
>  
>  void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
Vineet Gupta Dec. 19, 2018, 5:36 p.m. UTC | #2
On 12/19/18 9:04 AM, Eugeniy Paltsev wrote:
> Just curious: isn't that enough to use GFP_NOWAIT instead
> of GFP_KERNEL when we allocate page in show_regs()?
>
> As I can see x86 use print_vma_addr() in their show_signal_msg()
> function which allocate page with __get_free_page(GFP_NOWAIT);

I'm not sure if lockdep will be happy with it still.

At any rate, as explained in changelog, this still has merit, since the buffer is
only needed for nested d_path calls, which are better served with a smaller
on-stack buffer. For cases such as kernel crash, we want lesser code/traces in
fault path to sift thru !

-Vineet
William Kucharski Dec. 19, 2018, 8:46 p.m. UTC | #3
> On Dec 18, 2018, at 11:53 AM, Vineet Gupta <Vineet.Gupta1@synopsys.com> wrote:
> 
> Use on-stack smaller buffers instead of dynamic pages.
> 
> The motivation for this change was to address lockdep splat when
> signal handling code calls show_regs (with preemption disabled) and
> ARC show_regs calls into sleepable page allocator.
> 
> | potentially unexpected fatal signal 11.
> | BUG: sleeping function called from invalid context at ../mm/page_alloc.c:4317
> | in_atomic(): 1, irqs_disabled(): 0, pid: 57, name: segv
> | no locks held by segv/57.
> | Preemption disabled at:
> | [<8182f17e>] get_signal+0x4a6/0x7c4
> | CPU: 0 PID: 57 Comm: segv Not tainted 4.17.0+ #23
> |
> | Stack Trace:
> |  arc_unwind_core.constprop.1+0xd0/0xf4
> |  __might_sleep+0x1f6/0x234
> |  __get_free_pages+0x174/0xca0
> |  show_regs+0x22/0x330
> |  get_signal+0x4ac/0x7c4     # print_fatal_signals() -> preempt_disable()
> |  do_signal+0x30/0x224
> |  resume_user_mode_begin+0x90/0xd8
> 
> Despite this, lockdep still barfs (see next change), but this patch
> still has merit as in we use smaller/localized buffers now and there's
> less instructoh trace to sift thru when debugging pesky issues.
> 
> Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

I would rather see 256 as a #define somewhere rather than a magic number sprinkled
around arch/arc/kernel/troubleshoot.c.

Still, that's what the existing code does, so I suppose it's OK.

Otherwise the change looks good.

Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Vineet Gupta Dec. 19, 2018, 9:36 p.m. UTC | #4
On 12/19/18 12:46 PM, William Kucharski wrote:
> I would rather see 256 as a #define somewhere rather than a magic number sprinkled
> around arch/arc/kernel/troubleshoot.c.

That bothered me as well, but I was too lazy to define one and the existing ones
don't apply. PATH_MAX is 4K which will blow up the stack usage.
> 
> Still, that's what the existing code does, so I suppose it's OK.

I'll define one locally.

> Otherwise the change looks good.

Thx for taking a look.

> Reviewed-by: William Kucharski <william.kucharski@oracle.com>

I'll add this to the patch.

Thx,
-Vineet
Vineet Gupta Dec. 20, 2018, 1:16 a.m. UTC | #5
On 12/19/18 9:04 AM, Eugeniy Paltsev wrote:
> As I can see x86 use print_vma_addr() in their show_signal_msg()
> function which allocate page with __get_free_page(GFP_NOWAIT);

Indeed with that the __get_free_page() lockdep splat is gone.

There's a different one now hence my other patch.

| [ARCLinux]# ./segv-null-ptr
| potentially unexpected fatal signal 11.
| BUG: sleeping function called from invalid context at kernel/fork.c:1011
| in_atomic(): 1, irqs_disabled(): 0, pid: 70, name: segv-null-ptr
| no locks held by segv-null-ptr/70.
| CPU: 0 PID: 70 Comm: segv-null-ptr Not tainted 4.18.0+ #69
|
| Stack Trace:
|  arc_unwind_core+0xcc/0x100
|  ___might_sleep+0x17a/0x190
|  mmput+0x16/0xb8
|  show_regs+0x52/0x310
|  get_signal+0x5ee/0x610
|  do_signal+0x2c/0x218
|  resume_user_mode_begin+0x90/0xd8
Michal Hocko Dec. 20, 2018, 12:57 p.m. UTC | #6
On Tue 18-12-18 10:53:58, Vineet Gupta wrote:
> Use on-stack smaller buffers instead of dynamic pages.
> 
> The motivation for this change was to address lockdep splat when
> signal handling code calls show_regs (with preemption disabled) and
> ARC show_regs calls into sleepable page allocator.
> 
> | potentially unexpected fatal signal 11.
> | BUG: sleeping function called from invalid context at ../mm/page_alloc.c:4317
> | in_atomic(): 1, irqs_disabled(): 0, pid: 57, name: segv
> | no locks held by segv/57.
> | Preemption disabled at:
> | [<8182f17e>] get_signal+0x4a6/0x7c4
> | CPU: 0 PID: 57 Comm: segv Not tainted 4.17.0+ #23
> |
> | Stack Trace:
> |  arc_unwind_core.constprop.1+0xd0/0xf4
> |  __might_sleep+0x1f6/0x234
> |  __get_free_pages+0x174/0xca0
> |  show_regs+0x22/0x330
> |  get_signal+0x4ac/0x7c4     # print_fatal_signals() -> preempt_disable()
> |  do_signal+0x30/0x224
> |  resume_user_mode_begin+0x90/0xd8
> 
> Despite this, lockdep still barfs (see next change), but this patch
> still has merit as in we use smaller/localized buffers now and there's
> less instructoh trace to sift thru when debugging pesky issues.

But show_regs is called from contexts which might be called from deep
call chains (e.g WARN). Is it safe to allocate such a large stack there?
Tetsuo Handa Dec. 20, 2018, 1:30 p.m. UTC | #7
On 2018/12/20 10:16, Vineet Gupta wrote:
> On 12/19/18 9:04 AM, Eugeniy Paltsev wrote:
>> As I can see x86 use print_vma_addr() in their show_signal_msg()
>> function which allocate page with __get_free_page(GFP_NOWAIT);
> 
> Indeed with that the __get_free_page() lockdep splat is gone.
> 
> There's a different one now hence my other patch.
> 
> | [ARCLinux]# ./segv-null-ptr
> | potentially unexpected fatal signal 11.
> | BUG: sleeping function called from invalid context at kernel/fork.c:1011
> | in_atomic(): 1, irqs_disabled(): 0, pid: 70, name: segv-null-ptr
> | no locks held by segv-null-ptr/70.
> | CPU: 0 PID: 70 Comm: segv-null-ptr Not tainted 4.18.0+ #69
> |
> | Stack Trace:
> |  arc_unwind_core+0xcc/0x100
> |  ___might_sleep+0x17a/0x190
> |  mmput+0x16/0xb8

Then, does mmput_async() help?

> |  show_regs+0x52/0x310
> |  get_signal+0x5ee/0x610
> |  do_signal+0x2c/0x218
> |  resume_user_mode_begin+0x90/0xd8
Vineet Gupta Dec. 20, 2018, 6:36 p.m. UTC | #8
On 12/20/18 5:30 AM, Tetsuo Handa wrote:
>> |  mmput+0x16/0xb8
> Then, does mmput_async() help?

Probably, I can try.
Vineet Gupta Dec. 20, 2018, 6:38 p.m. UTC | #9
On 12/20/18 4:57 AM, Michal Hocko wrote:
>> Despite this, lockdep still barfs (see next change), but this patch
>> still has merit as in we use smaller/localized buffers now and there's
>> less instructoh trace to sift thru when debugging pesky issues.
> But show_regs is called from contexts which might be called from deep
> call chains (e.g WARN). Is it safe to allocate such a large stack there?

ARC has 8K pages and 256 additional bytes of stack usage doesn't seem absurdly
high to me !

-Vineet
Vineet Gupta Dec. 20, 2018, 6:43 p.m. UTC | #10
On 12/20/18 5:30 AM, Tetsuo Handa wrote:
>> | Stack Trace:
>> |  arc_unwind_core+0xcc/0x100
>> |  ___might_sleep+0x17a/0x190
>> |  mmput+0x16/0xb8
> Then, does mmput_async() help?
>

It helps, but then we get the next one (w/o my patch 2/2)

BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:23
in_atomic(): 1, irqs_disabled(): 0, pid: 69, name: segv-null-ptr
no locks held by segv-null-ptr/69.
CPU: 0 PID: 69 Comm: segv-null-ptr Not tainted 4.18.0+ #72

Stack Trace:
  arc_unwind_core+0xcc/0x100
  ___might_sleep+0x17a/0x190
  down_read+0x18/0x38
  show_regs+0x102/0x310
  get_signal+0x5ee/0x610
  do_signal+0x2c/0x218
  resume_user_mode_begin+0x90/0xd8
    @off 0x103d4 in [/segv-null-pt
diff mbox series

Patch

diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index e8d9fb452346..2885bec71fb8 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -58,11 +58,12 @@  static void show_callee_regs(struct callee_regs *cregs)
 	print_reg_file(&(cregs->r13), 13);
 }
 
-static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
+static void print_task_path_n_nm(struct task_struct *tsk)
 {
 	char *path_nm = NULL;
 	struct mm_struct *mm;
 	struct file *exe_file;
+	char buf[256];
 
 	mm = get_task_mm(tsk);
 	if (!mm)
@@ -80,10 +81,9 @@  static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
 	pr_info("Path: %s\n", !IS_ERR(path_nm) ? path_nm : "?");
 }
 
-static void show_faulting_vma(unsigned long address, char *buf)
+static void show_faulting_vma(unsigned long address)
 {
 	struct vm_area_struct *vma;
-	char *nm = buf;
 	struct mm_struct *active_mm = current->active_mm;
 
 	/* can't use print_vma_addr() yet as it doesn't check for
@@ -96,8 +96,11 @@  static void show_faulting_vma(unsigned long address, char *buf)
 	 * if the container VMA is not found
 	 */
 	if (vma && (vma->vm_start <= address)) {
+		char buf[256];
+		char *nm = "?";
+
 		if (vma->vm_file) {
-			nm = file_path(vma->vm_file, buf, PAGE_SIZE - 1);
+			nm = file_path(vma->vm_file, buf, 256-1);
 			if (IS_ERR(nm))
 				nm = "?";
 		}
@@ -173,13 +176,8 @@  void show_regs(struct pt_regs *regs)
 {
 	struct task_struct *tsk = current;
 	struct callee_regs *cregs;
-	char *buf;
-
-	buf = (char *)__get_free_page(GFP_KERNEL);
-	if (!buf)
-		return;
 
-	print_task_path_n_nm(tsk, buf);
+	print_task_path_n_nm(tsk);
 	show_regs_print_info(KERN_INFO);
 
 	show_ecr_verbose(regs);
@@ -189,7 +187,7 @@  void show_regs(struct pt_regs *regs)
 		(void *)regs->blink, (void *)regs->ret);
 
 	if (user_mode(regs))
-		show_faulting_vma(regs->ret, buf); /* faulting code, not data */
+		show_faulting_vma(regs->ret); /* faulting code, not data */
 
 	pr_info("[STAT32]: 0x%08lx", regs->status32);
 
@@ -221,8 +219,6 @@  void show_regs(struct pt_regs *regs)
 	cregs = (struct callee_regs *)current->thread.callee_reg;
 	if (cregs)
 		show_callee_regs(cregs);
-
-	free_page((unsigned long)buf);
 }
 
 void show_kernel_fault_diag(const char *str, struct pt_regs *regs,