diff mbox series

[2/9] ARM: traps: use get_kernel_nofault instead of set_fs()

Message ID 20200907153701.2981205-3-arnd@arndb.de (mailing list archive)
State New, archived
Headers show
Series ARM: remove set_fs callers and implementation | expand

Commit Message

Arnd Bergmann Sept. 7, 2020, 3:36 p.m. UTC
The stack dumping code needs to work for both kernel and user mode,
and currently this works by using set_fs() and then calling get_user()
to carefully access a potentially invalid pointer.

Change both locations to handle user and kernel mode differently, using
get_kernel_nofault() in case of kernel pointers.

I change __get_user() to get_user() here for consistency, as user space
stacks should not point into kernel memory.

In dump_backtrace_entry() I assume that dump_mem() can only operate on
kernel pointers when in_entry_text(from) is true, rather than checking
the mode register.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/kernel/traps.c | 69 ++++++++++++++++++-----------------------
 1 file changed, 31 insertions(+), 38 deletions(-)

Comments

Christoph Hellwig Sept. 8, 2020, 6:15 a.m. UTC | #1
> +static void dump_mem(const char *, const char *, unsigned long, unsigned long, bool kernel_mode);

This adds a pointlessly long line.  

And looking at the code I don't see why the argument is even needed.

dump_mem() currently does an unconditional set_fs(KERNEL_DS), so it
should always use get_kernel_nofault.

> +static void dump_instr(const char *lvl, struct pt_regs *regs)
>  {
>  	unsigned long addr = instruction_pointer(regs);
>  	const int thumb = thumb_mode(regs);
> @@ -173,10 +169,20 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs)
>  	for (i = -4; i < 1 + !!thumb; i++) {
>  		unsigned int val, bad;
>  
> -		if (thumb)
> -			bad = get_user(val, &((u16 *)addr)[i]);
> -		else
> -			bad = get_user(val, &((u32 *)addr)[i]);
> +		if (!user_mode(regs)) {
> +			if (thumb) {
> +				u16 val16;
> +				bad = get_kernel_nofault(val16, &((u16 *)addr)[i]);
> +				val = val16;
> +			} else {
> +				bad = get_kernel_nofault(val, &((u32 *)addr)[i]);
> +			}
> +		} else {
> +			if (thumb)
> +				bad = get_user(val, &((u16 *)addr)[i]);
> +			else
> +				bad = get_user(val, &((u32 *)addr)[i]);
> +		}

When I looked at this earlier I just added a little helper to make
this a little easier to read.   Here is my patch from an old tree:

http://git.infradead.org/users/hch/misc.git/commitdiff/67413030ccb7a64a7eb828e13ff0795f4eadfeb7
Arnd Bergmann Sept. 17, 2020, 5:29 p.m. UTC | #2
On Tue, Sep 8, 2020 at 8:15 AM Christoph Hellwig <hch@lst.de> wrote:
>
> > +static void dump_mem(const char *, const char *, unsigned long, unsigned long, bool kernel_mode);
>
> This adds a pointlessly long line.

Fixed.

> And looking at the code I don't see why the argument is even needed.
>
> dump_mem() currently does an unconditional set_fs(KERNEL_DS), so it
> should always use get_kernel_nofault.

I had looked at

        if (!user_mode(regs) || in_interrupt()) {
                dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
                         THREAD_SIZE + (unsigned
long)task_stack_page(tsk), kernel_mode);

which told me that there should be at least some code path ending up in
__die() that has in_interrupt() set but comes from user mode.

In this case, the memory to print would be a user pointer and cannot be
accessed by get_kernel_nofault() (but could be accessed with
"set_fs(KERNEL_DS); __get_user()").

I looked through the history now and the only code path I could
find that would arrive here this way is from bad_mode(), indicating
that there is probably a hardware bug or the contents of *regs are
corrupted.

Russell might have a better explanation for this, but I would assume
now that you are right in that we don't ever need to care about
dumping user space addresses here.

> > +static void dump_instr(const char *lvl, struct pt_regs *regs)
> >  {
> >       unsigned long addr = instruction_pointer(regs);
> >       const int thumb = thumb_mode(regs);
> > @@ -173,10 +169,20 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs)
> >       for (i = -4; i < 1 + !!thumb; i++) {
> >               unsigned int val, bad;
> >
> > -             if (thumb)
> > -                     bad = get_user(val, &((u16 *)addr)[i]);
> > -             else
> > -                     bad = get_user(val, &((u32 *)addr)[i]);
> > +             if (!user_mode(regs)) {
> > +                     if (thumb) {
> > +                             u16 val16;
> > +                             bad = get_kernel_nofault(val16, &((u16 *)addr)[i]);
> > +                             val = val16;
> > +                     } else {
> > +                             bad = get_kernel_nofault(val, &((u32 *)addr)[i]);
> > +                     }
> > +             } else {
> > +                     if (thumb)
> > +                             bad = get_user(val, &((u16 *)addr)[i]);
> > +                     else
> > +                             bad = get_user(val, &((u32 *)addr)[i]);
> > +             }
>
> When I looked at this earlier I just added a little helper to make
> this a little easier to read.   Here is my patch from an old tree:
>
> http://git.infradead.org/users/hch/misc.git/commitdiff/67413030ccb7a64a7eb828e13ff0795f4eadfeb7

I think your version was broken for the in-kernel thumb version
because get_kernel_nofault does not widen the result
from 16 to 32 bits. I tried fixing this in your version, but the
result ended up more complex than my version here, so I
decided to keep what I had.

       Arnd
Russell King (Oracle) Sept. 18, 2020, 7:42 a.m. UTC | #3
On Thu, Sep 17, 2020 at 07:29:37PM +0200, Arnd Bergmann wrote:
> On Tue, Sep 8, 2020 at 8:15 AM Christoph Hellwig <hch@lst.de> wrote:
> >
> > > +static void dump_mem(const char *, const char *, unsigned long, unsigned long, bool kernel_mode);
> >
> > This adds a pointlessly long line.
> 
> Fixed.
> 
> > And looking at the code I don't see why the argument is even needed.
> >
> > dump_mem() currently does an unconditional set_fs(KERNEL_DS), so it
> > should always use get_kernel_nofault.
> 
> I had looked at
> 
>         if (!user_mode(regs) || in_interrupt()) {
>                 dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
>                          THREAD_SIZE + (unsigned
> long)task_stack_page(tsk), kernel_mode);
> 
> which told me that there should be at least some code path ending up in
> __die() that has in_interrupt() set but comes from user mode.
> 
> In this case, the memory to print would be a user pointer and cannot be
> accessed by get_kernel_nofault() (but could be accessed with
> "set_fs(KERNEL_DS); __get_user()").
> 
> I looked through the history now and the only code path I could
> find that would arrive here this way is from bad_mode(), indicating
> that there is probably a hardware bug or the contents of *regs are
> corrupted.

Yes, that's correct.  It isn't something entirely theoretical, although
we never see it now, it used to happen in the distant past due to saved
regs corruption.  If bad_mode() ever gets called, all bets are off and
we're irrecoverably crashing.

Note that in that case, while user_mode(regs) may return true or false,
regs->ARM_sp and regs->ARM_lr are always the SVC mode stack and return
address after regs has been stacked, and not the expected values for
the parent context (which we have most likely long since destroyed.)
Arnd Bergmann Sept. 18, 2020, 12:36 p.m. UTC | #4
On Fri, Sep 18, 2020 at 9:42 AM Russell King - ARM Linux admin
<linux@armlinux.org.uk> wrote:
>
> On Thu, Sep 17, 2020 at 07:29:37PM +0200, Arnd Bergmann wrote:
> > On Tue, Sep 8, 2020 at 8:15 AM Christoph Hellwig <hch@lst.de> wrote:
> >
> > I looked through the history now and the only code path I could
> > find that would arrive here this way is from bad_mode(), indicating
> > that there is probably a hardware bug or the contents of *regs are
> > corrupted.
>
> Yes, that's correct.  It isn't something entirely theoretical, although
> we never see it now, it used to happen in the distant past due to saved
> regs corruption.  If bad_mode() ever gets called, all bets are off and
> we're irrecoverably crashing.
>
> Note that in that case, while user_mode(regs) may return true or false,
> regs->ARM_sp and regs->ARM_lr are always the SVC mode stack and return
> address after regs has been stacked, and not the expected values for
> the parent context (which we have most likely long since destroyed.)

Ok, I have rewritten the patch and my changelog text accordingly, sending
an updated version now.

Thanks,

      Arnd
diff mbox series

Patch

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 17d5a785df28..ebed261b356f 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -60,7 +60,7 @@  static int __init user_debug_setup(char *str)
 __setup("user_debug=", user_debug_setup);
 #endif
 
-static void dump_mem(const char *, const char *, unsigned long, unsigned long);
+static void dump_mem(const char *, const char *, unsigned long, unsigned long, bool kernel_mode);
 
 void dump_backtrace_entry(unsigned long where, unsigned long from,
 			  unsigned long frame, const char *loglvl)
@@ -76,7 +76,7 @@  void dump_backtrace_entry(unsigned long where, unsigned long from,
 #endif
 
 	if (in_entry_text(from) && end <= ALIGN(frame, THREAD_SIZE))
-		dump_mem(loglvl, "Exception stack", frame + 4, end);
+		dump_mem(loglvl, "Exception stack", frame + 4, end, true);
 }
 
 void dump_backtrace_stm(u32 *stack, u32 instruction, const char *loglvl)
@@ -119,20 +119,11 @@  static int verify_stack(unsigned long sp)
  * Dump out the contents of some memory nicely...
  */
 static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
-		     unsigned long top)
+		     unsigned long top, bool kernel_mode)
 {
 	unsigned long first;
-	mm_segment_t fs;
 	int i;
 
-	/*
-	 * We need to switch to kernel mode so that we can use __get_user
-	 * to safely read from kernel space.  Note that we now dump the
-	 * code first, just in case the backtrace kills us.
-	 */
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-
 	printk("%s%s(0x%08lx to 0x%08lx)\n", lvl, str, bottom, top);
 
 	for (first = bottom & ~31; first < top; first += 32) {
@@ -144,20 +135,25 @@  static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
 
 		for (p = first, i = 0; i < 8 && p < top; i++, p += 4) {
 			if (p >= bottom && p < top) {
-				unsigned long val;
-				if (__get_user(val, (unsigned long *)p) == 0)
-					sprintf(str + i * 9, " %08lx", val);
+				u32 val;
+				int err;
+
+				if (kernel_mode)
+					err = get_kernel_nofault(val, (u32 *)p);
+				else
+					err = get_user(val, (u32 *)p);
+
+				if (!err)
+					sprintf(str + i * 9, " %08x", val);
 				else
 					sprintf(str + i * 9, " ????????");
 			}
 		}
 		printk("%s%04lx:%s\n", lvl, first & 0xffff, str);
 	}
-
-	set_fs(fs);
 }
 
-static void __dump_instr(const char *lvl, struct pt_regs *regs)
+static void dump_instr(const char *lvl, struct pt_regs *regs)
 {
 	unsigned long addr = instruction_pointer(regs);
 	const int thumb = thumb_mode(regs);
@@ -173,10 +169,20 @@  static void __dump_instr(const char *lvl, struct pt_regs *regs)
 	for (i = -4; i < 1 + !!thumb; i++) {
 		unsigned int val, bad;
 
-		if (thumb)
-			bad = get_user(val, &((u16 *)addr)[i]);
-		else
-			bad = get_user(val, &((u32 *)addr)[i]);
+		if (!user_mode(regs)) {
+			if (thumb) {
+				u16 val16;
+				bad = get_kernel_nofault(val16, &((u16 *)addr)[i]);
+				val = val16;
+			} else {
+				bad = get_kernel_nofault(val, &((u32 *)addr)[i]);
+			}
+		} else {
+			if (thumb)
+				bad = get_user(val, &((u16 *)addr)[i]);
+			else
+				bad = get_user(val, &((u32 *)addr)[i]);
+		}
 
 		if (!bad)
 			p += sprintf(p, i == 0 ? "(%0*x) " : "%0*x ",
@@ -189,20 +195,6 @@  static void __dump_instr(const char *lvl, struct pt_regs *regs)
 	printk("%sCode: %s\n", lvl, str);
 }
 
-static void dump_instr(const char *lvl, struct pt_regs *regs)
-{
-	mm_segment_t fs;
-
-	if (!user_mode(regs)) {
-		fs = get_fs();
-		set_fs(KERNEL_DS);
-		__dump_instr(lvl, regs);
-		set_fs(fs);
-	} else {
-		__dump_instr(lvl, regs);
-	}
-}
-
 #ifdef CONFIG_ARM_UNWIND
 static inline void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
 				  const char *loglvl)
@@ -276,6 +268,7 @@  static int __die(const char *str, int err, struct pt_regs *regs)
 	struct task_struct *tsk = current;
 	static int die_counter;
 	int ret;
+	bool kernel_mode = !user_mode(regs);
 
 	pr_emerg("Internal error: %s: %x [#%d]" S_PREEMPT S_SMP S_ISA "\n",
 	         str, err, ++die_counter);
@@ -290,9 +283,9 @@  static int __die(const char *str, int err, struct pt_regs *regs)
 	pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
 		 TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk));
 
-	if (!user_mode(regs) || in_interrupt()) {
+	if (kernel_mode || in_interrupt()) {
 		dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
-			 THREAD_SIZE + (unsigned long)task_stack_page(tsk));
+			 THREAD_SIZE + (unsigned long)task_stack_page(tsk), kernel_mode);
 		dump_backtrace(regs, tsk, KERN_EMERG);
 		dump_instr(KERN_EMERG, regs);
 	}