Message ID | 20170426183425.32158-1-thgarnie@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
* Thomas Garnier <thgarnie@google.com> wrote: > + > +/* > + * Called before coming back to user-mode. Returning to user-mode with an > + * address limit different than USER_DS can allow to overwrite kernel memory. > + */ > +static inline void addr_limit_check_syscall(void) > +{ > + BUG_ON(!segment_eq(get_fs(), USER_DS)); > +} > + > +#ifndef CONFIG_ADDR_LIMIT_CHECK > +#define __CHECK_USERMODE_SYSCALL() \ > + bool user_caller = segment_eq(get_fs(), USER_DS) > +#define __VERIFY_ADDR_LIMIT() \ > + if (user_caller) addr_limit_check_syscall() > +#else > +#define __CHECK_USERMODE_SYSCALL() > +#define __VERIFY_ADDR_LIMIT() > +asmlinkage void addr_limit_check_failed(void) __noreturn; > +#endif _Please_ harmonize all the externally exposed names and symbols. There's no reason for this mismash of names: CONFIG_ADDR_LIMIT_CHECK __CHECK_USERMODE_SYSCALL __VERIFY_ADDR_LIMIT When we could just as easily name them consistently, along the existing pattern: CONFIG_ADDR_LIMIT_CHECK __SYSCALL_ADDR_LIMIT_CHECK __ADDR_LIMIT_CHECK which should fit into existing nomenclature: > #define __SYSCALL_DEFINEx(x, name, ...) \ But even with that fixed, the whole construct still looks pretty weird: > { \ > - long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ > + long ret; \ > + __CHECK_USERMODE_SYSCALL(); \ > + ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ > + __ADDR_LIMIT_CHECK(); \ > __MAP(x,__SC_TEST,__VA_ARGS__); \ > __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ > return ret; \ I think something like this would be more natural to read: > + ADDR_LIMIT_CHECK_PRE(); \ > + ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ > + ADDR_LIMIT_CHECK_POST(); \ it's a clear pre/post construct. Also note the lack of double underscores. BTW., a further simplification would be: #ifndef ADDR_LIMIT_CHECK_PRE # define ADDR_LIMIT_CHECK_PRE ... #endif This way architectures could override this generic functionality simply by defining the helpers. Architectures that don't do that get the generic version. Thanks, Ingo
On Wed, Apr 26, 2017 at 11:49 PM, Ingo Molnar <mingo@kernel.org> wrote: > > * Thomas Garnier <thgarnie@google.com> wrote: > >> + >> +/* >> + * Called before coming back to user-mode. Returning to user-mode with an >> + * address limit different than USER_DS can allow to overwrite kernel memory. >> + */ >> +static inline void addr_limit_check_syscall(void) >> +{ >> + BUG_ON(!segment_eq(get_fs(), USER_DS)); >> +} >> + >> +#ifndef CONFIG_ADDR_LIMIT_CHECK >> +#define __CHECK_USERMODE_SYSCALL() \ >> + bool user_caller = segment_eq(get_fs(), USER_DS) >> +#define __VERIFY_ADDR_LIMIT() \ >> + if (user_caller) addr_limit_check_syscall() >> +#else >> +#define __CHECK_USERMODE_SYSCALL() >> +#define __VERIFY_ADDR_LIMIT() >> +asmlinkage void addr_limit_check_failed(void) __noreturn; >> +#endif > > _Please_ harmonize all the externally exposed names and symbols. > > There's no reason for this mismash of names: > > CONFIG_ADDR_LIMIT_CHECK > > __CHECK_USERMODE_SYSCALL > __VERIFY_ADDR_LIMIT > > When we could just as easily name them consistently, along the existing pattern: > > CONFIG_ADDR_LIMIT_CHECK > > __SYSCALL_ADDR_LIMIT_CHECK > __ADDR_LIMIT_CHECK > > which should fit into existing nomenclature: > >> #define __SYSCALL_DEFINEx(x, name, ...) \ > > But even with that fixed, the whole construct still looks pretty weird: > >> { \ >> - long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ >> + long ret; \ >> + __CHECK_USERMODE_SYSCALL(); \ >> + ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ >> + __ADDR_LIMIT_CHECK(); \ >> __MAP(x,__SC_TEST,__VA_ARGS__); \ >> __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ >> return ret; \ > > I think something like this would be more natural to read: > >> + ADDR_LIMIT_CHECK_PRE(); \ >> + ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ >> + ADDR_LIMIT_CHECK_POST(); \ > > it's a clear pre/post construct. Also note the lack of double underscores. I think this construct makes more sense because the first macro check if the syscall was called by user-mode. I will send an update for this on this thread. > > BTW., a further simplification would be: > > #ifndef ADDR_LIMIT_CHECK_PRE > # define ADDR_LIMIT_CHECK_PRE ... > #endif > > This way architectures could override this generic functionality simply by > defining the helpers. Architectures that don't do that get the generic version. I don't think architectures need to do that. The optimizations are embedding the checks on their architecture-specific code to make it faster and remove the size impact. The pre/post is fine for the rest. > > Thanks, > > Ingo
* Thomas Garnier <thgarnie@google.com> wrote: > > BTW., a further simplification would be: > > > > #ifndef ADDR_LIMIT_CHECK_PRE > > # define ADDR_LIMIT_CHECK_PRE ... > > #endif > > > > This way architectures could override this generic functionality simply by > > defining the helpers. Architectures that don't do that get the generic version. > > I don't think architectures need to do that. The optimizations are > embedding the checks on their architecture-specific code to make it > faster and remove the size impact. The pre/post is fine for the rest. Indeed, only the generic code needs to turn off that code - architectures will place these callbacks elsewhere. Thanks, Ingo
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index d25435d94b6e..164de1d24e92 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -103,6 +103,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_BH select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + select ADDR_LIMIT_CHECK select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 980c3c9b06f8..ebde64f1622c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -191,6 +191,28 @@ extern struct trace_event_functions exit_syscall_print_funcs; SYSCALL_METADATA(sname, x, __VA_ARGS__) \ __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) + +/* + * Called before coming back to user-mode. Returning to user-mode with an + * address limit different than USER_DS can allow to overwrite kernel memory. + */ +static inline void addr_limit_check_syscall(void) +{ + BUG_ON(!segment_eq(get_fs(), USER_DS)); +} + +#ifndef CONFIG_ADDR_LIMIT_CHECK +#define __CHECK_USERMODE_SYSCALL() \ + bool user_caller = segment_eq(get_fs(), USER_DS) +#define __VERIFY_ADDR_LIMIT() \ + if (user_caller) addr_limit_check_syscall() +#else +#define __CHECK_USERMODE_SYSCALL() +#define __VERIFY_ADDR_LIMIT() +asmlinkage void addr_limit_check_failed(void) __noreturn; +#endif + + #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__) #define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ @@ -199,7 +221,10 @@ extern struct trace_event_functions exit_syscall_print_funcs; asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ - long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ + long ret; \ + __CHECK_USERMODE_SYSCALL(); \ + ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ + __VERIFY_ADDR_LIMIT(); \ __MAP(x,__SC_TEST,__VA_ARGS__); \ __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ return ret; \ diff --git a/init/Kconfig b/init/Kconfig index 42a346b0df43..599d9fe30703 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1961,6 +1961,12 @@ config PROFILING config TRACEPOINTS bool +config ADDR_LIMIT_CHECK + bool + help + Disable the generic address limit check. Allow each architecture to + optimize how and when the verification is done. + source "arch/Kconfig" endmenu # General setup diff --git a/kernel/sys.c b/kernel/sys.c index 8a94b4eabcaa..a1cbcd715d62 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2458,3 +2458,16 @@ COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) return 0; } #endif /* CONFIG_COMPAT */ + +#ifdef CONFIG_ADDR_LIMIT_CHECK +/* + * Used when an architecture specific implementation detects an invalid address + * limit. This function does not return. + */ +asmlinkage void addr_limit_check_failed(void) +{ + /* Try to fail on the generic address limit check */ + addr_limit_check_syscall(); + panic("Invalid address limit before returning to user-mode"); +} +#endif