diff mbox

arm64: traps: Mark __le16, __le32, __user variables properly

Message ID 20170217165112.17512-1-stephen.boyd@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Stephen Boyd Feb. 17, 2017, 4:51 p.m. UTC
Sparse complains a bit on this file about endian issues and
__user casting:

arch/arm64/kernel/traps.c:87:37: warning: incorrect type in argument 1 (different address spaces)
arch/arm64/kernel/traps.c:87:37:    expected void const volatile [noderef] <asn:1>*<noident>
arch/arm64/kernel/traps.c:87:37:    got unsigned long *<noident>
arch/arm64/kernel/traps.c:116:23: warning: incorrect type in argument 1 (different address spaces)
arch/arm64/kernel/traps.c:116:23:    expected void const volatile [noderef] <asn:1>*<noident>
arch/arm64/kernel/traps.c:116:23:    got unsigned int [usertype] *
arch/arm64/kernel/traps.c:346:25: warning: cast to restricted __le16
arch/arm64/kernel/traps.c:352:34: warning: cast to restricted __le16
arch/arm64/kernel/traps.c:359:25: warning: cast to restricted __le32

Mark the types appropriately, and force the cast in get_user()
when assigning to 0 so sparse doesn't complain. The resulting
object code is the same before and after this commit.

Cc: Punit Agrawal <punit.agrawal@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Stephen Boyd <stephen.boyd@linaro.org>
---

Noticed while making other changes to this file. There are other issues still
about marking symbols static, but I'm not sure we want to introduce another
header file for the asmlinkage functions?

arch/arm64/kernel/traps.c:429:29: warning: symbol 'do_undefinstr' was not declared. Should it be static?
arch/arm64/kernel/traps.c:529:29: warning: symbol 'do_sysinstr' was not declared. Should it be static?
arch/arm64/kernel/traps.c:544:17: warning: symbol 'do_ni_syscall' was not declared. Should it be static?
arch/arm64/kernel/traps.c:615:17: warning: symbol 'bad_mode' was not declared. Should it be static?
arch/arm64/kernel/traps.c:632:17: warning: symbol 'bad_el0_sync' was not declared. Should it be static?
arch/arm64/kernel/traps.c:722:12: warning: symbol 'early_brk64' was not declared. Should it be static?
arch/arm64/kernel/traps.c:567:10: warning: Initializer entry defined twice
arch/arm64/kernel/traps.c:568:10:   also defined here

 arch/arm64/include/asm/uaccess.h |  2 +-
 arch/arm64/kernel/traps.c        | 23 ++++++++++++++---------
 2 files changed, 15 insertions(+), 10 deletions(-)

Comments

Russell King (Oracle) Feb. 17, 2017, 5:31 p.m. UTC | #1
On Fri, Feb 17, 2017 at 08:51:12AM -0800, Stephen Boyd wrote:
> diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> index 659b2e6b6cf7..23959cb70ded 100644
> --- a/arch/arm64/kernel/traps.c
> +++ b/arch/arm64/kernel/traps.c
> @@ -84,7 +84,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
>  			if (p >= bottom && p < top) {
>  				unsigned long val;
>  
> -				if (__get_user(val, (unsigned long *)p) == 0)
> +				if (__get_user(val, (unsigned long __user *)p) == 0)
>  					sprintf(str + i * 17, " %016lx", val);
>  				else
>  					sprintf(str + i * 17, " ????????????????");
> @@ -113,7 +113,7 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs)
>  	for (i = -4; i < 1; i++) {
>  		unsigned int val, bad;
>  
> -		bad = __get_user(val, &((u32 *)addr)[i]);
> +		bad = __get_user(val, &((u32 __user *)addr)[i]);
>  
>  		if (!bad)
>  			p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
> @@ -340,23 +340,28 @@ static int call_undef_hook(struct pt_regs *regs)
>  		return 1;
>  
>  	if (compat_thumb_mode(regs)) {
> +		__le16 tinst;
> +
>  		/* 16-bit Thumb instruction */
> -		if (get_user(instr, (u16 __user *)pc))
> +		if (get_user(tinst, (__le16 __user *)pc))
>  			goto exit;
> -		instr = le16_to_cpu(instr);
> +		instr = le16_to_cpu(tinst);
>  		if (aarch32_insn_is_wide(instr)) {
> -			u32 instr2;
> +			__le16 tinstr2;
> +			u16 instr2;
>  
> -			if (get_user(instr2, (u16 __user *)(pc + 2)))
> +			if (get_user(tinstr2, (__le16 __user *)(pc + 2)))
>  				goto exit;
> -			instr2 = le16_to_cpu(instr2);
> +			instr2 = le16_to_cpu(tinstr2);
>  			instr = (instr << 16) | instr2;
>  		}
>  	} else {
> +		__le32 ainst;
> +
>  		/* 32-bit ARM instruction */
> -		if (get_user(instr, (u32 __user *)pc))
> +		if (get_user(ainst, (__le32 __user *)pc))
>  			goto exit;
> -		instr = le32_to_cpu(instr);
> +		instr = le32_to_cpu(ainst);

For the majority of causes, these are _not_ user addresses, they are
kernel addresses.  The use of get_user() at these locations is a way
to safely access these kernel addresses when something has gone wrong
without causing a further oops.

Annotating them with __user to shut up sparse is incorrect.  The point
with sparse is _not_ to end up with a warning free kernel, but for it
to warn where things are not quite right in terms of semantics.  These
warnings should stay.

So, the warnings about lack of __user should stay.
Stephen Boyd Feb. 17, 2017, 5:46 p.m. UTC | #2
Quoting Russell King - ARM Linux (2017-02-17 09:31:01)
> On Fri, Feb 17, 2017 at 08:51:12AM -0800, Stephen Boyd wrote:
> > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> > index 659b2e6b6cf7..23959cb70ded 100644
> > --- a/arch/arm64/kernel/traps.c
> > +++ b/arch/arm64/kernel/traps.c
> > @@ -84,7 +84,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
> >                       if (p >= bottom && p < top) {
> >                               unsigned long val;
> >  
> > -                             if (__get_user(val, (unsigned long *)p) == 0)
> > +                             if (__get_user(val, (unsigned long __user *)p) == 0)
> >                                       sprintf(str + i * 17, " %016lx", val);
> >                               else
> >                                       sprintf(str + i * 17, " ????????????????");
> > @@ -113,7 +113,7 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs)
> >       for (i = -4; i < 1; i++) {
> >               unsigned int val, bad;
> >  
> > -             bad = __get_user(val, &((u32 *)addr)[i]);
> > +             bad = __get_user(val, &((u32 __user *)addr)[i]);
> >  
> >               if (!bad)
> >                       p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
> 
> For the majority of causes, these are _not_ user addresses, they are
> kernel addresses.  The use of get_user() at these locations is a way
> to safely access these kernel addresses when something has gone wrong
> without causing a further oops.

Understood. It looks like a custom version of probe_kernel_read() (where
we force the pointer to __user BTW). Is that to only set_fs() once
instead of many times? Maybe we need a ____probe_kernel_read() that
forces it to __user under the covers and doesn't do any set_fs() and
also indicates we know what we're doing?

> 
> Annotating them with __user to shut up sparse is incorrect.  The point
> with sparse is _not_ to end up with a warning free kernel, but for it
> to warn where things are not quite right in terms of semantics.  These
> warnings should stay.
> 
> So, the warnings about lack of __user should stay.
> 

Ok. I usually compile things with 'make -s' and C=2 and then if sparse
complains I want to keep it quiet so I don't have to worry about looking
through the warnings. So in my workflow I _do_ want a warning free
kernel.
Luc Van Oostenryck Feb. 19, 2017, 1:58 a.m. UTC | #3
On Fri, Feb 17, 2017 at 08:51:12AM -0800, Stephen Boyd wrote:
> Sparse complains a bit on this file about endian issues and
> __user casting:
> 
> arch/arm64/kernel/traps.c:87:37: warning: incorrect type in argument 1 (different address spaces)
> arch/arm64/kernel/traps.c:87:37:    expected void const volatile [noderef] <asn:1>*<noident>
> arch/arm64/kernel/traps.c:87:37:    got unsigned long *<noident>
> arch/arm64/kernel/traps.c:116:23: warning: incorrect type in argument 1 (different address spaces)
> arch/arm64/kernel/traps.c:116:23:    expected void const volatile [noderef] <asn:1>*<noident>
> arch/arm64/kernel/traps.c:116:23:    got unsigned int [usertype] *

The fact that __get_user() can and is used for both __kernel & __user pointers
defeat any sensible annotation. The proper way would be to have a special
version of __get_user() which would ignore the __user part of the pointer,
something like "__get_user_but_accept_any_pointer()" ...


> arch/arm64/kernel/traps.c:346:25: warning: cast to restricted __le16
> arch/arm64/kernel/traps.c:352:34: warning: cast to restricted __le16
> arch/arm64/kernel/traps.c:359:25: warning: cast to restricted __le32

Your patch looked correct to me for those.
 
> Mark the types appropriately, and force the cast in get_user()
> when assigning to 0 so sparse doesn't complain.
I didn't looked deeply at this one but I don't think it is needed.
Care to give more details?


> Noticed while making other changes to this file. There are other issues still
> about marking symbols static, but I'm not sure we want to introduce another
> header file for the asmlinkage functions?
Probably not, indeed.

> arch/arm64/kernel/traps.c:567:10: warning: Initializer entry defined twice
> arch/arm64/kernel/traps.c:568:10:   also defined here
This one I find strange. Can you tell which are those two entries?

 
> diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
> index 46da3ea638bb..2f5b4ae98ee0 100644
> --- a/arch/arm64/include/asm/uaccess.h
> +++ b/arch/arm64/include/asm/uaccess.h
> @@ -287,7 +287,7 @@ do {									\
>  	might_fault();							\
>  	access_ok(VERIFY_READ, __p, sizeof(*__p)) ?			\
>  		__get_user((x), __p) :					\
> -		((x) = 0, -EFAULT);					\
> +		((x) = (__force __typeof__(*(ptr)))0, -EFAULT);		\
>  })

As said above, this one is dubious.


Luc Van Oostenryck
Stephen Boyd Feb. 20, 2017, 8:47 a.m. UTC | #4
Quoting Luc Van Oostenryck (2017-02-18 17:58:09)
> On Fri, Feb 17, 2017 at 08:51:12AM -0800, Stephen Boyd wrote:
>  
> > Mark the types appropriately, and force the cast in get_user()
> > when assigning to 0 so sparse doesn't complain.
> I didn't looked deeply at this one but I don't think it is needed.
> Care to give more details?

I think my sparse is old. I was using v0.5.0 from my distro, but using
sparse-next branch from git.kernel.org doesn't show the problem anymore.
I suppose I'll upgrade this machine's sparse version manually to avoid
this problem in the future and withdraw my other patch to asm-generic.

> 
> 
> > Noticed while making other changes to this file. There are other issues still
> > about marking symbols static, but I'm not sure we want to introduce another
> > header file for the asmlinkage functions?
> Probably not, indeed.
> 
> > arch/arm64/kernel/traps.c:567:10: warning: Initializer entry defined twice
> > arch/arm64/kernel/traps.c:568:10:   also defined here
> This one I find strange. Can you tell which are those two entries?
> 

This is:

 static const char *esr_class_str[] = {
         [0 ... ESR_ELx_EC_MAX]          = "UNRECOGNIZED EC",
         [ESR_ELx_EC_UNKNOWN]            = "Unknown/Uncategorized",

where we initialize the entire array to an "unknown" value once, and
then fill in the known values after that. This isn't a very common
pattern, but it is used from time to time to avoid having lots of lines
to do the same thing.
Luc Van Oostenryck Feb. 20, 2017, 10:04 a.m. UTC | #5
On Mon, Feb 20, 2017 at 9:47 AM, Stephen Boyd <stephen.boyd@linaro.org> wrote:
> Quoting Luc Van Oostenryck (2017-02-18 17:58:09)
>> On Fri, Feb 17, 2017 at 08:51:12AM -0800, Stephen Boyd wrote:
>> > arch/arm64/kernel/traps.c:567:10: warning: Initializer entry defined twice
>> > arch/arm64/kernel/traps.c:568:10:   also defined here
>> This one I find strange. Can you tell which are those two entries?
>>
>
> This is:
>
>  static const char *esr_class_str[] = {
>          [0 ... ESR_ELx_EC_MAX]          = "UNRECOGNIZED EC",
>          [ESR_ELx_EC_UNKNOWN]            = "Unknown/Uncategorized",
>
> where we initialize the entire array to an "unknown" value once, and
> then fill in the known values after that. This isn't a very common
> pattern, but it is used from time to time to avoid having lots of lines
> to do the same thing.

I see, yes can be handy but indeed spaese doesn't know about it yet.

Luc
Mark Rutland Feb. 20, 2017, 10:49 a.m. UTC | #6
On Mon, Feb 20, 2017 at 12:47:57AM -0800, Stephen Boyd wrote:
> Quoting Luc Van Oostenryck (2017-02-18 17:58:09)
> > On Fri, Feb 17, 2017 at 08:51:12AM -0800, Stephen Boyd wrote:
> >  
> > > arch/arm64/kernel/traps.c:567:10: warning: Initializer entry defined twice
> > > arch/arm64/kernel/traps.c:568:10:   also defined here
> > This one I find strange. Can you tell which are those two entries?
> 
> This is:
> 
>  static const char *esr_class_str[] = {
>          [0 ... ESR_ELx_EC_MAX]          = "UNRECOGNIZED EC",
>          [ESR_ELx_EC_UNKNOWN]            = "Unknown/Uncategorized",
> 
> where we initialize the entire array to an "unknown" value once, and
> then fill in the known values after that. This isn't a very common
> pattern, but it is used from time to time to avoid having lots of lines
> to do the same thing.

FWIW, it's a fairly common trick for syscall tables, which is where I
copied it from for the above. Certainly it's not that common elsewhere.

[mark@leverpostej:~/src/linux]% tail -n 11  arch/arm64/kernel/sys.c
#undef __SYSCALL
#define __SYSCALL(nr, sym)      [nr] = sym,

/*
 * The sys_call_table array must be 4K aligned to be accessible from
 * kernel/entry.S.
 */
void * const sys_call_table[__NR_syscalls] __aligned(4096) = {
        [0 ... __NR_syscalls - 1] = sys_ni_syscall,
#include <asm/unistd.h>
};

[mark@leverpostej:~/src/linux]% git grep '\[0 \.\.\. ' | grep sys
arch/arc/kernel/sys.c:  [0 ... NR_syscalls-1] = sys_ni_syscall,
arch/arm64/kernel/sys.c:        [0 ... __NR_syscalls - 1] = sys_ni_syscall,
arch/arm64/kernel/sys32.c:      [0 ... __NR_compat_syscalls - 1] = sys_ni_syscall,
arch/c6x/kernel/sys_c6x.c:      [0 ... __NR_syscalls-1] = sys_ni_syscall,
arch/metag/kernel/sys_metag.c:  [0 ... __NR_syscalls-1] = sys_ni_syscall,
arch/tile/kernel/compat.c:      [0 ... __NR_syscalls-1] = sys_ni_syscall,
arch/tile/kernel/sys.c: [0 ... __NR_syscalls-1] = sys_ni_syscall,
arch/unicore32/kernel/sys.c:    [0 ... __NR_syscalls-1] = sys_ni_syscall,
arch/x86/entry/syscall_32.c:    [0 ... __NR_syscall_compat_max] = &sys_ni_syscall,
arch/x86/entry/syscall_64.c:    [0 ... __NR_syscall_max] = &sys_ni_syscall,
arch/x86/um/sys_call_table_32.c:        [0 ... __NR_syscall_max] = &sys_ni_syscall,
arch/x86/um/sys_call_table_64.c:        [0 ... __NR_syscall_max] = &sys_ni_syscall,
arch/xtensa/kernel/syscall.c:   [0 ... __NR_syscall_count - 1] = (syscall_t)&sys_ni_syscall,

It would be nice to make sparse aware of this somehow, even if it
requires some annotation.

Thanks,
Mark.
Luc Van Oostenryck Feb. 20, 2017, 9:33 p.m. UTC | #7
On Mon, Feb 20, 2017 at 10:49:47AM +0000, Mark Rutland wrote:
> On Mon, Feb 20, 2017 at 12:47:57AM -0800, Stephen Boyd wrote:
> > Quoting Luc Van Oostenryck (2017-02-18 17:58:09)
> > > On Fri, Feb 17, 2017 at 08:51:12AM -0800, Stephen Boyd wrote:
> > >  
> > > > arch/arm64/kernel/traps.c:567:10: warning: Initializer entry defined twice
> > > > arch/arm64/kernel/traps.c:568:10:   also defined here
> > > This one I find strange. Can you tell which are those two entries?
> > 
> > This is:
> > 
> >  static const char *esr_class_str[] = {
> >          [0 ... ESR_ELx_EC_MAX]          = "UNRECOGNIZED EC",
> >          [ESR_ELx_EC_UNKNOWN]            = "Unknown/Uncategorized",
> > 
> > where we initialize the entire array to an "unknown" value once, and
> > then fill in the known values after that. This isn't a very common
> > pattern, but it is used from time to time to avoid having lots of lines
> > to do the same thing.
> 
> FWIW, it's a fairly common trick for syscall tables, which is where I
> copied it from for the above. Certainly it's not that common elsewhere.
> 
> ...
> 
> It would be nice to make sparse aware of this somehow, even if it
> requires some annotation.
> 
> Thanks,
> Mark.

I just checked this and I'm not very sure what's best.
Sparse is very well aware of the '...' to specify a range
in an array initializer or in switch-case. The warning
is there only because those entries are later overridden
with some value.
When checking what GCC is doing in this situation is saw
that by default even in cases like:
	static in ref[] = {
		[1] = 1,
		[2] = 2,
	};
GCC doesn't issue a warning. You need to use the flag
-Woverride-init for that. But then you also have a warning
for our current case:
	static in foo[] = {
		[0 ... 3] = 1,
		[0] = 2,
	};

It's easy enough to patch sparse to not issue a warning when the
override concerns a range (which would be perfect for the situation here),
Controlled or not by a new warning flag. But I'm far from convinced
that all uses of such "ranged-initialization" is used for default values
that may be later overridden.

I'm also reluctant to introduce yet another special annotation.

Any thoughts anyone about what we woudl like?


Note: sparse is quite incomplete regarding these overridden entries
  since it issues a warning only when the override is on the first
  element of the range. In others words, the range itself is not
  taken in account. So, no warnigs for code like:
	static in foo[] = {
		[0 ... 3] = 1,
		[1] = 2,
	};


-- Luc Van Oostenryck
Will Deacon Feb. 21, 2017, 11:03 a.m. UTC | #8
On Mon, Feb 20, 2017 at 10:33:45PM +0100, Luc Van Oostenryck wrote:
> I just checked this and I'm not very sure what's best.
> Sparse is very well aware of the '...' to specify a range
> in an array initializer or in switch-case. The warning
> is there only because those entries are later overridden
> with some value.
> When checking what GCC is doing in this situation is saw
> that by default even in cases like:
> 	static in ref[] = {
> 		[1] = 1,
> 		[2] = 2,
> 	};
> GCC doesn't issue a warning. You need to use the flag
> -Woverride-init for that. But then you also have a warning
> for our current case:
> 	static in foo[] = {
> 		[0 ... 3] = 1,
> 		[0] = 2,
> 	};
> 
> It's easy enough to patch sparse to not issue a warning when the
> override concerns a range (which would be perfect for the situation here),
> Controlled or not by a new warning flag. But I'm far from convinced
> that all uses of such "ranged-initialization" is used for default values
> that may be later overridden.

How about not warning only when the overridden range covers the entire
length of the array? The only broken case I can think of that slips
through the cracks then is if somebody typoed the range so that it
accidentally covered the whole array and therefore suppressed the override
warning.

Will
Luc Van Oostenryck Feb. 22, 2017, 1 p.m. UTC | #9
On Tue, Feb 21, 2017 at 11:03:56AM +0000, Will Deacon wrote:
> On Mon, Feb 20, 2017 at 10:33:45PM +0100, Luc Van Oostenryck wrote:
> > It's easy enough to patch sparse to not issue a warning when the
> > override concerns a range (which would be perfect for the situation here),
> > Controlled or not by a new warning flag. But I'm far from convinced
> > that all uses of such "ranged-initialization" is used for default values
> > that may be later overridden.
> 
> How about not warning only when the overridden range covers the entire
> length of the array? The only broken case I can think of that slips
> through the cracks then is if somebody typoed the range so that it
> accidentally covered the whole array and therefore suppressed the override
> warning.
> 
> Will

I like it. Patch is coming.

Luc
diff mbox

Patch

diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 46da3ea638bb..2f5b4ae98ee0 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -287,7 +287,7 @@  do {									\
 	might_fault();							\
 	access_ok(VERIFY_READ, __p, sizeof(*__p)) ?			\
 		__get_user((x), __p) :					\
-		((x) = 0, -EFAULT);					\
+		((x) = (__force __typeof__(*(ptr)))0, -EFAULT);		\
 })
 
 #define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature)	\
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 659b2e6b6cf7..23959cb70ded 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -84,7 +84,7 @@  static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
 			if (p >= bottom && p < top) {
 				unsigned long val;
 
-				if (__get_user(val, (unsigned long *)p) == 0)
+				if (__get_user(val, (unsigned long __user *)p) == 0)
 					sprintf(str + i * 17, " %016lx", val);
 				else
 					sprintf(str + i * 17, " ????????????????");
@@ -113,7 +113,7 @@  static void __dump_instr(const char *lvl, struct pt_regs *regs)
 	for (i = -4; i < 1; i++) {
 		unsigned int val, bad;
 
-		bad = __get_user(val, &((u32 *)addr)[i]);
+		bad = __get_user(val, &((u32 __user *)addr)[i]);
 
 		if (!bad)
 			p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
@@ -340,23 +340,28 @@  static int call_undef_hook(struct pt_regs *regs)
 		return 1;
 
 	if (compat_thumb_mode(regs)) {
+		__le16 tinst;
+
 		/* 16-bit Thumb instruction */
-		if (get_user(instr, (u16 __user *)pc))
+		if (get_user(tinst, (__le16 __user *)pc))
 			goto exit;
-		instr = le16_to_cpu(instr);
+		instr = le16_to_cpu(tinst);
 		if (aarch32_insn_is_wide(instr)) {
-			u32 instr2;
+			__le16 tinstr2;
+			u16 instr2;
 
-			if (get_user(instr2, (u16 __user *)(pc + 2)))
+			if (get_user(tinstr2, (__le16 __user *)(pc + 2)))
 				goto exit;
-			instr2 = le16_to_cpu(instr2);
+			instr2 = le16_to_cpu(tinstr2);
 			instr = (instr << 16) | instr2;
 		}
 	} else {
+		__le32 ainst;
+
 		/* 32-bit ARM instruction */
-		if (get_user(instr, (u32 __user *)pc))
+		if (get_user(ainst, (__le32 __user *)pc))
 			goto exit;
-		instr = le32_to_cpu(instr);
+		instr = le32_to_cpu(ainst);
 	}
 
 	raw_spin_lock_irqsave(&undef_lock, flags);