diff mbox series

[v5,33/37] s390/uaccess: Add KMSAN support to put_user() and get_user()

Message ID 20240619154530.163232-34-iii@linux.ibm.com (mailing list archive)
State Superseded
Headers show
Series kmsan: Enable on s390 | expand

Commit Message

Ilya Leoshkevich June 19, 2024, 3:44 p.m. UTC
put_user() uses inline assembly with precise constraints, so Clang is
in principle capable of instrumenting it automatically. Unfortunately,
one of the constraints contains a dereferenced user pointer, and Clang
does not currently distinguish user and kernel pointers. Therefore
KMSAN attempts to access shadow for user pointers, which is not a right
thing to do.

An obvious fix to add __no_sanitize_memory to __put_user_fn() does not
work, since it's __always_inline. And __always_inline cannot be removed
due to the __put_user_bad() trick.

A different obvious fix of using the "a" instead of the "+Q" constraint
degrades the code quality, which is very important here, since it's a
hot path.

Instead, repurpose the __put_user_asm() macro to define
__put_user_{char,short,int,long}_noinstr() functions and mark them with
__no_sanitize_memory. For the non-KMSAN builds make them
__always_inline in order to keep the generated code quality. Also
define __put_user_{char,short,int,long}() functions, which call the
aforementioned ones and which *are* instrumented, because they call
KMSAN hooks, which may be implemented as macros.

The same applies to get_user() as well.

Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
 arch/s390/include/asm/uaccess.h | 111 +++++++++++++++++++++++---------
 1 file changed, 79 insertions(+), 32 deletions(-)

Comments

Alexander Potapenko June 20, 2024, 8:36 a.m. UTC | #1
On Wed, Jun 19, 2024 at 5:45 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> put_user() uses inline assembly with precise constraints, so Clang is
> in principle capable of instrumenting it automatically. Unfortunately,
> one of the constraints contains a dereferenced user pointer, and Clang
> does not currently distinguish user and kernel pointers. Therefore
> KMSAN attempts to access shadow for user pointers, which is not a right
> thing to do.

By the way, how does this problem manifest?
I was expecting KMSAN to generate dummy shadow accesses in this case,
and reading/writing 1-8 bytes from dummy shadow shouldn't be a
problem.

(On the other hand, not inlining the get_user/put_user functions is
probably still faster than retrieving the dummy shadow, so I'm fine
either way)

>
> An obvious fix to add __no_sanitize_memory to __put_user_fn() does not
> work, since it's __always_inline. And __always_inline cannot be removed
> due to the __put_user_bad() trick.
>
> A different obvious fix of using the "a" instead of the "+Q" constraint
> degrades the code quality, which is very important here, since it's a
> hot path.
>
> Instead, repurpose the __put_user_asm() macro to define
> __put_user_{char,short,int,long}_noinstr() functions and mark them with
> __no_sanitize_memory. For the non-KMSAN builds make them
> __always_inline in order to keep the generated code quality. Also
> define __put_user_{char,short,int,long}() functions, which call the
> aforementioned ones and which *are* instrumented, because they call
> KMSAN hooks, which may be implemented as macros.
>
> The same applies to get_user() as well.
>
> Acked-by: Heiko Carstens <hca@linux.ibm.com>
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> ---
>  arch/s390/include/asm/uaccess.h | 111 +++++++++++++++++++++++---------
>  1 file changed, 79 insertions(+), 32 deletions(-)
>
> diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
> index 81ae8a98e7ec..70f0edc00c2a 100644
> --- a/arch/s390/include/asm/uaccess.h
> +++ b/arch/s390/include/asm/uaccess.h
> @@ -78,13 +78,24 @@ union oac {
>
>  int __noreturn __put_user_bad(void);
>
> -#define __put_user_asm(to, from, size)                                 \
> -({                                                                     \
> +#ifdef CONFIG_KMSAN
> +#define get_put_user_noinstr_attributes \
> +       noinline __maybe_unused __no_sanitize_memory
> +#else
> +#define get_put_user_noinstr_attributes __always_inline
> +#endif
> +
> +#define DEFINE_PUT_USER(type)                                          \
> +static get_put_user_noinstr_attributes int                             \
> +__put_user_##type##_noinstr(unsigned type __user *to,                  \
> +                           unsigned type *from,                        \
> +                           unsigned long size)                         \
> +{                                                                      \
>         union oac __oac_spec = {                                        \
>                 .oac1.as = PSW_BITS_AS_SECONDARY,                       \
>                 .oac1.a = 1,                                            \
>         };                                                              \
> -       int __rc;                                                       \
> +       int rc;                                                         \
>                                                                         \
>         asm volatile(                                                   \
>                 "       lr      0,%[spec]\n"                            \
> @@ -93,12 +104,28 @@ int __noreturn __put_user_bad(void);
>                 "2:\n"                                                  \
>                 EX_TABLE_UA_STORE(0b, 2b, %[rc])                        \
>                 EX_TABLE_UA_STORE(1b, 2b, %[rc])                        \
> -               : [rc] "=&d" (__rc), [_to] "+Q" (*(to))                 \
> +               : [rc] "=&d" (rc), [_to] "+Q" (*(to))                   \
>                 : [_size] "d" (size), [_from] "Q" (*(from)),            \
>                   [spec] "d" (__oac_spec.val)                           \
>                 : "cc", "0");                                           \
> -       __rc;                                                           \
> -})
> +       return rc;                                                      \
> +}                                                                      \
> +                                                                       \
> +static __always_inline int                                             \
> +__put_user_##type(unsigned type __user *to, unsigned type *from,       \
> +                 unsigned long size)                                   \
> +{                                                                      \
> +       int rc;                                                         \
> +                                                                       \
> +       rc = __put_user_##type##_noinstr(to, from, size);               \
> +       instrument_put_user(*from, to, size);                           \
> +       return rc;                                                      \
> +}
> +
> +DEFINE_PUT_USER(char);
> +DEFINE_PUT_USER(short);
> +DEFINE_PUT_USER(int);
> +DEFINE_PUT_USER(long);
>
>  static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
>  {
> @@ -106,24 +133,24 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon
>
>         switch (size) {
>         case 1:
> -               rc = __put_user_asm((unsigned char __user *)ptr,
> -                                   (unsigned char *)x,
> -                                   size);
> +               rc = __put_user_char((unsigned char __user *)ptr,
> +                                    (unsigned char *)x,
> +                                    size);
>                 break;
>         case 2:
> -               rc = __put_user_asm((unsigned short __user *)ptr,
> -                                   (unsigned short *)x,
> -                                   size);
> +               rc = __put_user_short((unsigned short __user *)ptr,
> +                                     (unsigned short *)x,
> +                                     size);
>                 break;
>         case 4:
> -               rc = __put_user_asm((unsigned int __user *)ptr,
> +               rc = __put_user_int((unsigned int __user *)ptr,
>                                     (unsigned int *)x,
>                                     size);
>                 break;
>         case 8:
> -               rc = __put_user_asm((unsigned long __user *)ptr,
> -                                   (unsigned long *)x,
> -                                   size);
> +               rc = __put_user_long((unsigned long __user *)ptr,
> +                                    (unsigned long *)x,
> +                                    size);
>                 break;
>         default:
>                 __put_user_bad();
> @@ -134,13 +161,17 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon
>
>  int __noreturn __get_user_bad(void);
>
> -#define __get_user_asm(to, from, size)                                 \
> -({                                                                     \
> +#define DEFINE_GET_USER(type)                                          \
> +static get_put_user_noinstr_attributes int                             \
> +__get_user_##type##_noinstr(unsigned type *to,                         \
> +                           unsigned type __user *from,                 \
> +                           unsigned long size)                         \
> +{                                                                      \
>         union oac __oac_spec = {                                        \
>                 .oac2.as = PSW_BITS_AS_SECONDARY,                       \
>                 .oac2.a = 1,                                            \
>         };                                                              \
> -       int __rc;                                                       \
> +       int rc;                                                         \
>                                                                         \
>         asm volatile(                                                   \
>                 "       lr      0,%[spec]\n"                            \
> @@ -149,13 +180,29 @@ int __noreturn __get_user_bad(void);
>                 "2:\n"                                                  \
>                 EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize])  \
>                 EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize])  \
> -               : [rc] "=&d" (__rc), "=Q" (*(to))                       \
> +               : [rc] "=&d" (rc), "=Q" (*(to))                         \
>                 : [_size] "d" (size), [_from] "Q" (*(from)),            \
>                   [spec] "d" (__oac_spec.val), [_to] "a" (to),          \
>                   [_ksize] "K" (size)                                   \
>                 : "cc", "0");                                           \
> -       __rc;                                                           \
> -})
> +       return rc;                                                      \
> +}                                                                      \
> +                                                                       \
> +static __always_inline int                                             \
> +__get_user_##type(unsigned type *to, unsigned type __user *from,       \
> +                 unsigned long size)                                   \
> +{                                                                      \
> +       int rc;                                                         \
> +                                                                       \
> +       rc = __get_user_##type##_noinstr(to, from, size);               \
> +       instrument_get_user(*to);                                       \
> +       return rc;                                                      \
> +}
> +
> +DEFINE_GET_USER(char);
> +DEFINE_GET_USER(short);
> +DEFINE_GET_USER(int);
> +DEFINE_GET_USER(long);
>
>  static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
>  {
> @@ -163,24 +210,24 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign
>
>         switch (size) {
>         case 1:
> -               rc = __get_user_asm((unsigned char *)x,
> -                                   (unsigned char __user *)ptr,
> -                                   size);
> +               rc = __get_user_char((unsigned char *)x,
> +                                    (unsigned char __user *)ptr,
> +                                    size);
>                 break;
>         case 2:
> -               rc = __get_user_asm((unsigned short *)x,
> -                                   (unsigned short __user *)ptr,
> -                                   size);
> +               rc = __get_user_short((unsigned short *)x,
> +                                     (unsigned short __user *)ptr,
> +                                     size);
>                 break;
>         case 4:
> -               rc = __get_user_asm((unsigned int *)x,
> +               rc = __get_user_int((unsigned int *)x,
>                                     (unsigned int __user *)ptr,
>                                     size);
>                 break;
>         case 8:
> -               rc = __get_user_asm((unsigned long *)x,
> -                                   (unsigned long __user *)ptr,
> -                                   size);
> +               rc = __get_user_long((unsigned long *)x,
> +                                    (unsigned long __user *)ptr,
> +                                    size);
>                 break;
>         default:
>                 __get_user_bad();
> --
> 2.45.1
>
> --
> You received this message because you are subscribed to the Google Groups "kasan-dev" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20240619154530.163232-34-iii%40linux.ibm.com.
Ilya Leoshkevich June 20, 2024, 11:19 a.m. UTC | #2
On Thu, 2024-06-20 at 10:36 +0200, Alexander Potapenko wrote:
> On Wed, Jun 19, 2024 at 5:45 PM Ilya Leoshkevich <iii@linux.ibm.com>
> wrote:
> > 
> > put_user() uses inline assembly with precise constraints, so Clang
> > is
> > in principle capable of instrumenting it automatically.
> > Unfortunately,
> > one of the constraints contains a dereferenced user pointer, and
> > Clang
> > does not currently distinguish user and kernel pointers. Therefore
> > KMSAN attempts to access shadow for user pointers, which is not a
> > right
> > thing to do.
> 
> By the way, how does this problem manifest?
> I was expecting KMSAN to generate dummy shadow accesses in this case,
> and reading/writing 1-8 bytes from dummy shadow shouldn't be a
> problem.
> 
> (On the other hand, not inlining the get_user/put_user functions is
> probably still faster than retrieving the dummy shadow, so I'm fine
> either way)

We have two problems here: not only clang can't distinguish user and
kernel pointers, the KMSAN runtime - which is supposed to clean that
up - can't do that either due to overlapping kernel and user address
spaces on s390. So the instrumentation ultimately tries to access the
real shadow.

I forgot what the consequences of that were exactly, so I reverted the
patch and now I get:

Unable to handle kernel pointer dereference in virtual kernel address
space
Failing address: 000003fed25fa000 TEID: 000003fed25fa403
Fault in home space mode while using kernel ASCE.
AS:0000000005a70007 R3:00000000824d8007 S:0000000000000020 
Oops: 0010 ilc:2 [#1] SMP 
Modules linked in:
CPU: 3 PID: 1 Comm: init Tainted: G    B            N 6.10.0-rc4-
g8aadb00f495e #11
Hardware name: IBM 3931 A01 704 (KVM/Linux)
Krnl PSW : 0704c00180000000 000003ffe288975a (memset+0x3a/0xa0)
           R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3
Krnl GPRS: 0000000000000000 000003fed25fa180 000003fed25fa180
000003ffe28897a6
           0000000000000007 000003ffe0000000 0000000000000000
000002ee06e68190
           000002ee06f19000 000003fed25fa180 000003ffd25fa180
000003ffd25fa180
           0000000000000008 0000000000000000 000003ffe17262e0
0000037ee000f730
Krnl Code: 000003ffe288974c: 41101100           la      %r1,256(%r1)
           000003ffe2889750: a737fffb           brctg  
%r3,000003ffe2889746
          #000003ffe2889754: c03000000029       larl   
%r3,000003ffe28897a6
          >000003ffe288975a: 44403000           ex      %r4,0(%r3)
           000003ffe288975e: 07fe               bcr     15,%r14
           000003ffe2889760: a74f0001           cghi    %r4,1
           000003ffe2889764: b9040012           lgr     %r1,%r2
           000003ffe2889768: a784001c           brc    
8,000003ffe28897a0
Call Trace:
 [<000003ffe288975a>] memset+0x3a/0xa0 
([<000003ffe17262bc>] kmsan_internal_set_shadow_origin+0x21c/0x3a0)
 [<000003ffe1725fb6>] kmsan_internal_unpoison_memory+0x26/0x30 
 [<000003ffe1c1c646>] create_elf_tables+0x13c6/0x2620 
 [<000003ffe1c0ebaa>] load_elf_binary+0x50da/0x68f0  
 [<000003ffe18c41fc>] bprm_execve+0x201c/0x2f40 
 [<000003ffe18bff9a>] kernel_execve+0x2cda/0x2d00 
 [<000003ffe49b745a>] kernel_init+0x9ba/0x1630 
 [<000003ffe000cd5c>] __ret_from_fork+0xbc/0x180 
 [<000003ffe4a1907a>] ret_from_fork+0xa/0x30 
Last Breaking-Event-Address:
 [<000003ffe2889742>] memset+0x22/0xa0
Kernel panic - not syncing: Fatal exception: panic_on_oops

So is_bad_asm_addr() returned false for a userspace address.
Why? Because it happened to collide with the kernel modules area:
precisely the effect of overlapping.

VMALLOC_START: 0x37ee0000000
VMALLOC_END:   0x3a960000000
MODULES_VADDR: 0x3ff60000000
Address:       0x3ffd157a580
MODULES_END:   0x3ffe0000000

Now the question is, why do we crash when accessing shadow for modules?
I'll need to investigate, this does not look normal. But even if that
worked, we clearly wouldn't want userspace accesses to pollute module
shadow, so I think we need this patch in its current form.

[...]
Alexander Potapenko June 20, 2024, 11:46 a.m. UTC | #3
On Thu, Jun 20, 2024 at 1:19 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> On Thu, 2024-06-20 at 10:36 +0200, Alexander Potapenko wrote:
> > On Wed, Jun 19, 2024 at 5:45 PM Ilya Leoshkevich <iii@linux.ibm.com>
> > wrote:
> > >
> > > put_user() uses inline assembly with precise constraints, so Clang
> > > is
> > > in principle capable of instrumenting it automatically.
> > > Unfortunately,
> > > one of the constraints contains a dereferenced user pointer, and
> > > Clang
> > > does not currently distinguish user and kernel pointers. Therefore
> > > KMSAN attempts to access shadow for user pointers, which is not a
> > > right
> > > thing to do.
> >
> > By the way, how does this problem manifest?
> > I was expecting KMSAN to generate dummy shadow accesses in this case,
> > and reading/writing 1-8 bytes from dummy shadow shouldn't be a
> > problem.
> >
> > (On the other hand, not inlining the get_user/put_user functions is
> > probably still faster than retrieving the dummy shadow, so I'm fine
> > either way)
>
> We have two problems here: not only clang can't distinguish user and
> kernel pointers, the KMSAN runtime - which is supposed to clean that
> up - can't do that either due to overlapping kernel and user address
> spaces on s390. So the instrumentation ultimately tries to access the
> real shadow.
>
> I forgot what the consequences of that were exactly, so I reverted the
> patch and now I get:
>
> Unable to handle kernel pointer dereference in virtual kernel address
> space
> Failing address: 000003fed25fa000 TEID: 000003fed25fa403
> Fault in home space mode while using kernel ASCE.
> AS:0000000005a70007 R3:00000000824d8007 S:0000000000000020
> Oops: 0010 ilc:2 [#1] SMP
> Modules linked in:
> CPU: 3 PID: 1 Comm: init Tainted: G    B            N 6.10.0-rc4-
> g8aadb00f495e #11
> Hardware name: IBM 3931 A01 704 (KVM/Linux)
> Krnl PSW : 0704c00180000000 000003ffe288975a (memset+0x3a/0xa0)
>            R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3
> Krnl GPRS: 0000000000000000 000003fed25fa180 000003fed25fa180
> 000003ffe28897a6
>            0000000000000007 000003ffe0000000 0000000000000000
> 000002ee06e68190
>            000002ee06f19000 000003fed25fa180 000003ffd25fa180
> 000003ffd25fa180
>            0000000000000008 0000000000000000 000003ffe17262e0
> 0000037ee000f730
> Krnl Code: 000003ffe288974c: 41101100           la      %r1,256(%r1)
>            000003ffe2889750: a737fffb           brctg
> %r3,000003ffe2889746
>           #000003ffe2889754: c03000000029       larl
> %r3,000003ffe28897a6
>           >000003ffe288975a: 44403000           ex      %r4,0(%r3)
>            000003ffe288975e: 07fe               bcr     15,%r14
>            000003ffe2889760: a74f0001           cghi    %r4,1
>            000003ffe2889764: b9040012           lgr     %r1,%r2
>            000003ffe2889768: a784001c           brc
> 8,000003ffe28897a0
> Call Trace:
>  [<000003ffe288975a>] memset+0x3a/0xa0
> ([<000003ffe17262bc>] kmsan_internal_set_shadow_origin+0x21c/0x3a0)
>  [<000003ffe1725fb6>] kmsan_internal_unpoison_memory+0x26/0x30
>  [<000003ffe1c1c646>] create_elf_tables+0x13c6/0x2620
>  [<000003ffe1c0ebaa>] load_elf_binary+0x50da/0x68f0
>  [<000003ffe18c41fc>] bprm_execve+0x201c/0x2f40
>  [<000003ffe18bff9a>] kernel_execve+0x2cda/0x2d00
>  [<000003ffe49b745a>] kernel_init+0x9ba/0x1630
>  [<000003ffe000cd5c>] __ret_from_fork+0xbc/0x180
>  [<000003ffe4a1907a>] ret_from_fork+0xa/0x30
> Last Breaking-Event-Address:
>  [<000003ffe2889742>] memset+0x22/0xa0
> Kernel panic - not syncing: Fatal exception: panic_on_oops
>
> So is_bad_asm_addr() returned false for a userspace address.
> Why? Because it happened to collide with the kernel modules area:
> precisely the effect of overlapping.
>
> VMALLOC_START: 0x37ee0000000
> VMALLOC_END:   0x3a960000000
> MODULES_VADDR: 0x3ff60000000
> Address:       0x3ffd157a580
> MODULES_END:   0x3ffe0000000

I see, thanks for the clarification!

> Now the question is, why do we crash when accessing shadow for modules?
> I'll need to investigate, this does not look normal. But even if that
> worked, we clearly wouldn't want userspace accesses to pollute module
> shadow, so I think we need this patch in its current form.

Ok, it indeed makes sense.

Reviewed-by: Alexander Potapenko <glider@google.com>
Ilya Leoshkevich June 20, 2024, 5:05 p.m. UTC | #4
On Thu, 2024-06-20 at 13:19 +0200, Ilya Leoshkevich wrote:
> On Thu, 2024-06-20 at 10:36 +0200, Alexander Potapenko wrote:
> > On Wed, Jun 19, 2024 at 5:45 PM Ilya Leoshkevich
> > <iii@linux.ibm.com>
> > wrote:
> > > 
> > > put_user() uses inline assembly with precise constraints, so
> > > Clang
> > > is
> > > in principle capable of instrumenting it automatically.
> > > Unfortunately,
> > > one of the constraints contains a dereferenced user pointer, and
> > > Clang
> > > does not currently distinguish user and kernel pointers.
> > > Therefore
> > > KMSAN attempts to access shadow for user pointers, which is not a
> > > right
> > > thing to do.
> > 
> > By the way, how does this problem manifest?
> > I was expecting KMSAN to generate dummy shadow accesses in this
> > case,
> > and reading/writing 1-8 bytes from dummy shadow shouldn't be a
> > problem.
> > 
> > (On the other hand, not inlining the get_user/put_user functions is
> > probably still faster than retrieving the dummy shadow, so I'm fine
> > either way)
> 
> We have two problems here: not only clang can't distinguish user and
> kernel pointers, the KMSAN runtime - which is supposed to clean that
> up - can't do that either due to overlapping kernel and user address
> spaces on s390. So the instrumentation ultimately tries to access the
> real shadow.
> 
> I forgot what the consequences of that were exactly, so I reverted
> the
> patch and now I get:
> 
> Unable to handle kernel pointer dereference in virtual kernel address
> space
> Failing address: 000003fed25fa000 TEID: 000003fed25fa403
> Fault in home space mode while using kernel ASCE.
> AS:0000000005a70007 R3:00000000824d8007 S:0000000000000020 
> Oops: 0010 ilc:2 [#1] SMP 
> Modules linked in:
> CPU: 3 PID: 1 Comm: init Tainted: G    B            N 6.10.0-rc4-
> g8aadb00f495e #11
> Hardware name: IBM 3931 A01 704 (KVM/Linux)
> Krnl PSW : 0704c00180000000 000003ffe288975a (memset+0x3a/0xa0)
>            R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0
> EA:3
> Krnl GPRS: 0000000000000000 000003fed25fa180 000003fed25fa180
> 000003ffe28897a6
>            0000000000000007 000003ffe0000000 0000000000000000
> 000002ee06e68190
>            000002ee06f19000 000003fed25fa180 000003ffd25fa180
> 000003ffd25fa180
>            0000000000000008 0000000000000000 000003ffe17262e0
> 0000037ee000f730
> Krnl Code: 000003ffe288974c: 41101100           la      %r1,256(%r1)
>            000003ffe2889750: a737fffb           brctg  
> %r3,000003ffe2889746
>           #000003ffe2889754: c03000000029       larl   
> %r3,000003ffe28897a6
>           >000003ffe288975a: 44403000           ex      %r4,0(%r3)
>            000003ffe288975e: 07fe               bcr     15,%r14
>            000003ffe2889760: a74f0001           cghi    %r4,1
>            000003ffe2889764: b9040012           lgr     %r1,%r2
>            000003ffe2889768: a784001c           brc    
> 8,000003ffe28897a0
> Call Trace:
>  [<000003ffe288975a>] memset+0x3a/0xa0 
> ([<000003ffe17262bc>] kmsan_internal_set_shadow_origin+0x21c/0x3a0)
>  [<000003ffe1725fb6>] kmsan_internal_unpoison_memory+0x26/0x30 
>  [<000003ffe1c1c646>] create_elf_tables+0x13c6/0x2620 
>  [<000003ffe1c0ebaa>] load_elf_binary+0x50da/0x68f0  
>  [<000003ffe18c41fc>] bprm_execve+0x201c/0x2f40 
>  [<000003ffe18bff9a>] kernel_execve+0x2cda/0x2d00 
>  [<000003ffe49b745a>] kernel_init+0x9ba/0x1630 
>  [<000003ffe000cd5c>] __ret_from_fork+0xbc/0x180 
>  [<000003ffe4a1907a>] ret_from_fork+0xa/0x30 
> Last Breaking-Event-Address:
>  [<000003ffe2889742>] memset+0x22/0xa0
> Kernel panic - not syncing: Fatal exception: panic_on_oops
> 
> So is_bad_asm_addr() returned false for a userspace address.
> Why? Because it happened to collide with the kernel modules area:
> precisely the effect of overlapping.
> 
> VMALLOC_START: 0x37ee0000000
> VMALLOC_END:   0x3a960000000
> MODULES_VADDR: 0x3ff60000000
> Address:       0x3ffd157a580
> MODULES_END:   0x3ffe0000000
> 
> Now the question is, why do we crash when accessing shadow for
> modules?

So, Alexander G. and I have figured it out. KMSAN maps vmalloc/modules
metadata lazily - when the corresponding memory is allocated. Here we
have a completely random address that did not come from a prior
vmalloc()/execmem_alloc(), so the corresponding metadata pages are
missing.

We could probably detect this situation and perform the lazy
initialization in this case as well, but I don't know if it's worth the
effort.

> I'll need to investigate, this does not look normal. But even if that
> worked, we clearly wouldn't want userspace accesses to pollute module
> shadow, so I think we need this patch in its current form.
> 
> [...]
diff mbox series

Patch

diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 81ae8a98e7ec..70f0edc00c2a 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -78,13 +78,24 @@  union oac {
 
 int __noreturn __put_user_bad(void);
 
-#define __put_user_asm(to, from, size)					\
-({									\
+#ifdef CONFIG_KMSAN
+#define get_put_user_noinstr_attributes \
+	noinline __maybe_unused __no_sanitize_memory
+#else
+#define get_put_user_noinstr_attributes __always_inline
+#endif
+
+#define DEFINE_PUT_USER(type)						\
+static get_put_user_noinstr_attributes int				\
+__put_user_##type##_noinstr(unsigned type __user *to,			\
+			    unsigned type *from,			\
+			    unsigned long size)				\
+{									\
 	union oac __oac_spec = {					\
 		.oac1.as = PSW_BITS_AS_SECONDARY,			\
 		.oac1.a = 1,						\
 	};								\
-	int __rc;							\
+	int rc;								\
 									\
 	asm volatile(							\
 		"	lr	0,%[spec]\n"				\
@@ -93,12 +104,28 @@  int __noreturn __put_user_bad(void);
 		"2:\n"							\
 		EX_TABLE_UA_STORE(0b, 2b, %[rc])			\
 		EX_TABLE_UA_STORE(1b, 2b, %[rc])			\
-		: [rc] "=&d" (__rc), [_to] "+Q" (*(to))			\
+		: [rc] "=&d" (rc), [_to] "+Q" (*(to))			\
 		: [_size] "d" (size), [_from] "Q" (*(from)),		\
 		  [spec] "d" (__oac_spec.val)				\
 		: "cc", "0");						\
-	__rc;								\
-})
+	return rc;							\
+}									\
+									\
+static __always_inline int						\
+__put_user_##type(unsigned type __user *to, unsigned type *from,	\
+		  unsigned long size)					\
+{									\
+	int rc;								\
+									\
+	rc = __put_user_##type##_noinstr(to, from, size);		\
+	instrument_put_user(*from, to, size);				\
+	return rc;							\
+}
+
+DEFINE_PUT_USER(char);
+DEFINE_PUT_USER(short);
+DEFINE_PUT_USER(int);
+DEFINE_PUT_USER(long);
 
 static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
 {
@@ -106,24 +133,24 @@  static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon
 
 	switch (size) {
 	case 1:
-		rc = __put_user_asm((unsigned char __user *)ptr,
-				    (unsigned char *)x,
-				    size);
+		rc = __put_user_char((unsigned char __user *)ptr,
+				     (unsigned char *)x,
+				     size);
 		break;
 	case 2:
-		rc = __put_user_asm((unsigned short __user *)ptr,
-				    (unsigned short *)x,
-				    size);
+		rc = __put_user_short((unsigned short __user *)ptr,
+				      (unsigned short *)x,
+				      size);
 		break;
 	case 4:
-		rc = __put_user_asm((unsigned int __user *)ptr,
+		rc = __put_user_int((unsigned int __user *)ptr,
 				    (unsigned int *)x,
 				    size);
 		break;
 	case 8:
-		rc = __put_user_asm((unsigned long __user *)ptr,
-				    (unsigned long *)x,
-				    size);
+		rc = __put_user_long((unsigned long __user *)ptr,
+				     (unsigned long *)x,
+				     size);
 		break;
 	default:
 		__put_user_bad();
@@ -134,13 +161,17 @@  static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon
 
 int __noreturn __get_user_bad(void);
 
-#define __get_user_asm(to, from, size)					\
-({									\
+#define DEFINE_GET_USER(type)						\
+static get_put_user_noinstr_attributes int				\
+__get_user_##type##_noinstr(unsigned type *to,				\
+			    unsigned type __user *from,			\
+			    unsigned long size)				\
+{									\
 	union oac __oac_spec = {					\
 		.oac2.as = PSW_BITS_AS_SECONDARY,			\
 		.oac2.a = 1,						\
 	};								\
-	int __rc;							\
+	int rc;								\
 									\
 	asm volatile(							\
 		"	lr	0,%[spec]\n"				\
@@ -149,13 +180,29 @@  int __noreturn __get_user_bad(void);
 		"2:\n"							\
 		EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize])	\
 		EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize])	\
-		: [rc] "=&d" (__rc), "=Q" (*(to))			\
+		: [rc] "=&d" (rc), "=Q" (*(to))				\
 		: [_size] "d" (size), [_from] "Q" (*(from)),		\
 		  [spec] "d" (__oac_spec.val), [_to] "a" (to),		\
 		  [_ksize] "K" (size)					\
 		: "cc", "0");						\
-	__rc;								\
-})
+	return rc;							\
+}									\
+									\
+static __always_inline int						\
+__get_user_##type(unsigned type *to, unsigned type __user *from,	\
+		  unsigned long size)					\
+{									\
+	int rc;								\
+									\
+	rc = __get_user_##type##_noinstr(to, from, size);		\
+	instrument_get_user(*to);					\
+	return rc;							\
+}
+
+DEFINE_GET_USER(char);
+DEFINE_GET_USER(short);
+DEFINE_GET_USER(int);
+DEFINE_GET_USER(long);
 
 static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
 {
@@ -163,24 +210,24 @@  static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign
 
 	switch (size) {
 	case 1:
-		rc = __get_user_asm((unsigned char *)x,
-				    (unsigned char __user *)ptr,
-				    size);
+		rc = __get_user_char((unsigned char *)x,
+				     (unsigned char __user *)ptr,
+				     size);
 		break;
 	case 2:
-		rc = __get_user_asm((unsigned short *)x,
-				    (unsigned short __user *)ptr,
-				    size);
+		rc = __get_user_short((unsigned short *)x,
+				      (unsigned short __user *)ptr,
+				      size);
 		break;
 	case 4:
-		rc = __get_user_asm((unsigned int *)x,
+		rc = __get_user_int((unsigned int *)x,
 				    (unsigned int __user *)ptr,
 				    size);
 		break;
 	case 8:
-		rc = __get_user_asm((unsigned long *)x,
-				    (unsigned long __user *)ptr,
-				    size);
+		rc = __get_user_long((unsigned long *)x,
+				     (unsigned long __user *)ptr,
+				     size);
 		break;
 	default:
 		__get_user_bad();