diff mbox

[9/9] ARM: software-based priviledged-no-access support

Message ID E1ZSmQG-0002za-E3@rmk-PC.arm.linux.org.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Russell King Aug. 21, 2015, 1:31 p.m. UTC
Provide a software-based implementation of the priviledged no access
support found in ARMv8.1.

Userspace pages are mapped using a different domain number from the
kernel and IO mappings.  If we switch the user domain to "no access"
when we enter the kernel, we can prevent the kernel from touching
userspace.

However, the kernel needs to be able to access userspace via the
various user accessor functions.  With the wrapping in the previous
patch, we can temporarily enable access when the kernel needs user
access, and re-disable it afterwards.

This allows us to trap non-intended accesses to userspace, eg, caused
by an inadvertent dereference of the LIST_POISON* values, which, with
appropriate user mappings setup, can be made to succeed.  This in turn
can allow use-after-free bugs to be further exploited than would
otherwise be possible.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig               | 15 +++++++++++++++
 arch/arm/include/asm/domain.h  | 15 ++++++++++++---
 arch/arm/include/asm/uaccess.h | 14 ++++++++++++++
 arch/arm/kernel/entry-header.S | 25 +++++++++++++++++++++++++
 arch/arm/kernel/process.c      | 24 ++++++++++++++++++------
 5 files changed, 84 insertions(+), 9 deletions(-)

Comments

Geert Uytterhoeven Aug. 25, 2015, 10:32 a.m. UTC | #1
Hi Russell,

On Fri, Aug 21, 2015 at 3:31 PM, Russell King
<rmk+kernel@arm.linux.org.uk> wrote:
> Provide a software-based implementation of the priviledged no access
> support found in ARMv8.1.
>
> Userspace pages are mapped using a different domain number from the
> kernel and IO mappings.  If we switch the user domain to "no access"
> when we enter the kernel, we can prevent the kernel from touching
> userspace.
>
> However, the kernel needs to be able to access userspace via the
> various user accessor functions.  With the wrapping in the previous
> patch, we can temporarily enable access when the kernel needs user
> access, and re-disable it afterwards.
>
> This allows us to trap non-intended accesses to userspace, eg, caused
> by an inadvertent dereference of the LIST_POISON* values, which, with
> appropriate user mappings setup, can be made to succeed.  This in turn
> can allow use-after-free bugs to be further exploited than would
> otherwise be possible.
>
> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

This patch, which is now in arm-soc/for-next, breaks shmobile_defconfig
on r8a7791/koelsch, which has a dual core CA15:

    [ ok ] Configuring network interfaces...done.
    Unhandled fault: page domain fault (0x01b) at 0xbe8e6120
    pgd = edbb0000
    [be8e6120] *pgd=6da77831, *pte=bf4d075f, *ppte=bf4d0c7f
    Internal error: : 1b [#1] SMP ARM
    CPU: 1 PID: 1629 Comm: ntpdate Not tainted
4.2.0-rc8-06444-g3c24fd89c9421db1 #31
    9
    Hardware name: Generic R8A7791 (Flattened Device Tree)
    task: ed883a80 ti: ed41c000 task.ti: ed41c000
    PC is at csum_partial_copy_from_user+0x28/0x3d8
    LR is at csum_and_copy_from_iter+0x334/0x4c0
    pc : [<c04ba510>]    lr : [<c01c82e8>]    psr: 000f0013
    sp : ed41db00  ip : 00000020  fp : ed41db6c
    r10: ed41ddc0  r9 : 00000027  r8 : ed41dc20
    r7 : 00000027  r6 : eda52653  r5 : ed41dec8  r4 : 00000000
    r3 : 00000000  r2 : 00000027  r1 : eda5262c  r0 : be8e6120
    Flags: nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
    Control: 10c5307d  Table: 6dbb006a  DAC: 00000051
    Process ntpdate (pid: 1629, stack limit = 0xed41c210)
    Stack: (0xed41db00 to 0xed41e000)
    db00: eda5262c 00000027 00000000 ed41dec8 eda52653 00000027
ed41dc20 c01c82e8
    db20: ed41db3c c03d7d44 000000d0 c00a85a0 ed41db74 00000000
ed41dba4 00000000
    db40: 00000000 00000027 edb36940 ed9b9380 00000000 ed41dc20
0000002f ed41dc30
    db60: ed41db8c ed41db70 c040dd5c c01c7fc0 00000000 00000000
00000027 edb36940
    db80: ed41dc04 ed41db90 c040c454 c040dd04 00000000 edb36940
ed41dbc4 00000043
    dba0: 000005c8 000005c8 0000002f 00000000 00000000 00000010
000005dc ee3c7280
    dba0: 000005c8 000005c8 0000002f 00000000 00000000 00000010
000005dc ee3c7280
    dbc0: 00000000 000005dc 00000000 00000014 ed41dc04 ffffff97
c040bde4 00004040
    dbe0: ed41dc20 ed9b95a8 ed9b9380 ed41dec0 c040dcf8 00003500
ed41dc74 ed41dc08
    dc00: c040e7f4 c040be8c ed883e5c c040dcf8 ed41dec0 0000002f
00000008 00004040
    dc20: ed41dc20 ed41dc20 00000000 c067bc40 00000000 00000000
00000000 000005dc
    dc40: 0000002f ee3c7280 ffff0000 ed41dc00 ed9b9380 ed9b95a8
ed41dec0 fe61a8c0
    dc60: 00000000 fe61a8c0 ed41dd64 ed41dc78 c0432118 c040e758
0000002f 00000008
    dc80: ed41dcb4 ed41dcb0 00004040 ffffffff 00000000 00000000
ed9b95a8 00000000
    dca0: c040dcf8 1c61a8c0 00000000 00000027 00000000 fe61a8c0
00000000 00000000
    dcc0: ffff0000 00000000 01ffffff b6d21000 edbb2db0 edb81580
ed41dd74 ed41dce8
    dce0: c0098d60 c00985d0 c04c27f8 ed41ddc0 00000001 be8e6068
00000051 ed41ddc0
    dd00: 00000008 00000000 00000008 c00cc668 00000008 ed41dec8
ed41dd9c 00000001
    dd20: 00000001 00000001 ed41dd64 ed41dd38 c01c8c7c c01c62f0
00000027 ed9b9380
    dd40: ed41dec0 00000027 ed41dda0 edc78c80 ed41deec 00004040
ed41dd84 ed41dd68
    dd60: c043b224 c0431c30 c043b198 ed41dec0 be8e6078 00000000
ed41dd94 ed41dd88
    dd80: c03cbaf0 c043b1a4 ed41deac ed41dd98 c03cbd3c c03cbae0
6f7f979f 00000000
    dda0: eedaf25c b6d21000 edb12484 edbb2db0 ed41de24 ed41ddc0
c00b1898 c00b02d8
    ddc0: be8e6120 00000027 00000001 000000fe 00000001 ee36d740
ed41ddf4 ed9b95a8
    dde0: c06a5b80 00000000 00000000 ed9b95a8 ed9b95a8 ee25f580
ed41de64 ed41de08
    de00: c0407274 00000000 c06a5b80 00000000 ee3c7280 00000006
c06a5b80 ee3c7280
    de20: c06a5b80 c06a5b80 ed9b9380 ed8736f0 ed41de4c ed41de40
ed41de94 ed41de48
    de40: c042e1c8 c04049b8 c0432688 c04c5a44 ed9b9380 ed9b944c
ee3c7280 ed41df08
    de60: ed9b95a8 00000000 ed41de8c ed41de78 ed9b9380 00000000
ed41de94 ed41de88
    de80: c00e5c08 00000000 be8e6078 edc78c80 00000002 00004000
ed41c030 00000000
    dea0: ed41df94 ed41deb0 c03ccfe8 c03cbbc0 ed41deec ed41dec0
00000000 00000000
    dec0: 00000000 00000000 00000001 00000000 00000027 ed41ddc0
00000001 00000000
    dee0: 00000000 00004040 00000000 c037ff04 ed41df44 ed41df00
c007181c c03801b0
    df00: 08cc6da6 00000000 00000000 002aea54 ffffffff 00ffffff
ed41df44 ed41df80
    df20: be8e5f88 00000005 0000004e c000fea4 ed41c000 00000000
ed41df54 ed41df48
    df40: c0071918 c00717dc ed41df7c ed41df58 c0071f04 00000000
00000001 be8e6060
    df60: 00000000 c000fea4 ed41c000 ffffffff 00000000 00004000
00000002 00000176
    df80: c000fea4 ed41c000 ed41dfa4 ed41df98 c03cd080 c03ccf80
00000000 ed41dfa8
    dfa0: c000fce0 c03cd07c 00000000 00004000 00000003 be8e6078
00000002 00004000
    dfc0: 00000000 00004000 00000002 00000176 00000003 00000005
b6e4ec14 2af73cb0
    dfe0: 00000176 be8e5f70 b6df6191 b6d798e6 800f0030 00000003
00000000 00000000
    Backtrace:
    [<c01c7fb4>] (csum_and_copy_from_iter) from [<c040dd5c>]
(ip_generic_getfrag+0x64/0xb4)
     r10:ed41dc30 r9:0000002f r8:ed41dc20 r7:00000000 r6:ed9b9380 r5:edb36940
     r4:00000027
    [<c040dcf8>] (ip_generic_getfrag) from [<c040c454>]
(__ip_append_data.isra.37+0x5d4/0x9b0)
     r5:edb36940 r4:00000027
    [<c040be80>] (__ip_append_data.isra.37) from [<c040e7f4>]
(ip_make_skb+0xa8/0xe0)
     r10:00003500 r9:c040dcf8 r8:ed41dec0 r7:ed9b9380 r6:ed9b95a8 r5:ed41dc20
     r4:00004040
    [<c040e74c>] (ip_make_skb) from [<c0432118>] (udp_sendmsg+0x4f4/0x6d8)
     r9:fe61a8c0 r8:00000000 r7:fe61a8c0 r6:ed41dec0 r5:ed9b95a8 r4:ed9b9380
    [<c0431c24>] (udp_sendmsg) from [<c043b224>] (inet_sendmsg+0x8c/0xc0)
     r10:00004040 r9:ed41deec r8:edc78c80 r7:ed41dda0 r6:00000027 r5:ed41dec0
     r4:ed9b9380
    [<c043b198>] (inet_sendmsg) from [<c03cbaf0>] (sock_sendmsg+0x1c/0x2c)
     r6:00000000 r5:be8e6078 r4:ed41dec0 r3:c043b198
    [<c03cbad4>] (sock_sendmsg) from [<c03cbd3c>] (___sys_sendmsg+0x188/0x1f8)
    [<c03cbbb4>] (___sys_sendmsg) from [<c03ccfe8>] (__sys_sendmmsg+0x74/0xfc)
     r10:00000000 r9:ed41c030 r8:00004000 r7:00000002 r6:edc78c80 r5:be8e6078
     r4:00000000
    [<c03ccf74>] (__sys_sendmmsg) from [<c03cd080>] (SyS_sendmmsg+0x10/0x14)
     r9:ed41c000 r8:c000fea4 r7:00000176 r6:00000002 r5:00004000 r4:00000000
    [<c03cd070>] (SyS_sendmmsg) from [<c000fce0>] (ret_fast_syscall+0x0/0x3c)
    Code: e3100003 1a00002f e3d2c00f 0a00000b (e4904004)
    ---[ end trace 21df281cc5d080da ]---

There are a few more networking-related backtraces during further booting
of userspace.

After disabling CONFIG_CPU_SW_DOMAIN_PAN it  fails differently:

    VFS: Mounted root (nfs filesystem) readonly on device 0:13.
    devtmpfs: mounted
    Freeing unused kernel memory: 300K (c0629000 - c0674000)
    Unhandled fault: page domain fault (0x81b) at 0x000263e0
    pgd = ed908000
    [000263e0] *pgd=6e299831, *pte=bf81d75f, *ppte=bf81dc7f
    Internal error: : 81b [#1] SMP ARM
    CPU: 1 PID: 1 Comm: init Not tainted 4.2.0-rc8-06444-g3c24fd89c9421db1 #332
    Hardware name: Generic R8A7791 (Flattened Device Tree)
    task: ee0319c0 ti: ee04e000 task.ti: ee04e000
    PC is at __clear_user_std+0x34/0x68
    LR is at padzero+0x4c/0x60
    pc : [<c01b2bd8>]    lr : [<c010a470>]    psr: 20000113
    sp : ee04fe40  ip : 00000000  fp : ee04fe54
    r10: ee0f5300  r9 : ee316120  r8 : 00000000
    r7 : 000265fc  r6 : 000263e0  r5 : ee314400  r4 : ee290e00
    r3 : 00000000  r2 : 00000000  r1 : 00000c18  r0 : 000263e0
    Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
    Control: 10c5307d  Table: 6d90806a  DAC: 00000051
    Process init (pid: 1, stack limit = 0xee04e210)
    Stack: (0xee04fe40 to 0xee050000)
    fe40: 00000c20 c010a470 ee04fed4 ee04fe58 c010ae78 c010a430
00001812 00000000
    fe60: ee04fe94 ee04fe58 ee04e018 00025ef4 00015ad8 00010000
00000009 00010000
    fe80: 00000001 ee316000 ee31b300 000263e0 ee3d3600 00000000
ef7e93c0 00000000
    fea0: ee04febc ee04feb0 c001dde4 fffffff8 ee0f5300 c06c3ccc
c06c3ccc c067ff0c
    fec0: c0680374 c06c3ccc ee04ff04 ee04fed8 c00cf0b8 c010a7cc
c067c8b8 ee0f5300
    fee0: 00000000 ee13a000 00000001 00000000 ed9d5040 c0679318
ee04ff4c ee04ff08
    ff00: c00cf5a4 c00cf038 c05d6ab9 ed9d5078 c0679290 00000000
00000000 ee031c18
    ff20: ee04ff44 c0679318 c0679290 00000000 00000000 00000000
00000000 00000000
    ff40: ee04ff64 ee04ff50 c00cf784 c00cf198 00000000 00000000
ee04ff7c ee04ff68
    ff60: c000a5c8 c00cf75c c06a6000 c05ca7cd ee04ff94 ee04ff80
c000a5e4 c000a5ac
    ff80: c06a6000 c04b54c4 ee04ffac ee04ff98 c04b5544 c000a5dc
ee04e000 00000000
    ffa0: 00000000 ee04ffb0 c000fc88 c04b54d0 00000000 00000000
00000000 00000000
    ffc0: 00000000 00000000 00000000 00000000 00000000 00000000
00000000 00000000
    ffe0: 00000000 00000000 00000000 00000000 00000013 00000000
00000000 00000000
    Backtrace:
    [<c010a424>] (padzero) from [<c010ae78>] (load_elf_binary+0x6b8/0xfbc)
    [<c010a7c0>] (load_elf_binary) from [<c00cf0b8>]
(search_binary_handler+0x8c/0x160)
     r10:c06c3ccc r9:c0680374 r8:c067ff0c r7:c06c3ccc r6:c06c3ccc r5:ee0f5300
     r4:fffffff8
    [<c00cf02c>] (search_binary_handler) from [<c00cf5a4>]
(do_execveat_common+0x418/0x5c4)
     r10:c0679318 r9:ed9d5040 r8:00000000 r7:00000001 r6:ee13a000 r5:00000000
     r4:ee0f5300 r3:c067c8b8
    [<c00cf18c>] (do_execveat_common) from [<c00cf784>] (do_execve+0x34/0x3c)
     r10:00000000 r9:00000000 r8:00000000 r7:00000000 r6:00000000 r5:c0679290
     r4:c0679318
    [<c00cf750>] (do_execve) from [<c000a5c8>] (run_init_process+0x28/0x30)
    [<c000a5a0>] (run_init_process) from [<c000a5e4>]
(try_to_run_init_process+0x14/0x40)
     r5:c05ca7cd r4:c06a6000
    [<c000a5d0>] (try_to_run_init_process) from [<c04b5544>]
(kernel_init+0x80/0xec)
     r5:c04b54c4 r4:c06a6000
    [<c04b54c4>] (kernel_init) from [<c000fc88>] (ret_from_fork+0x14/0x2c)
     r4:00000000 r3:ee04e000
    Code: b4c02001 e26cc004 e041100c e2511008 (54802004)
    ---[ end trace 807fed3702987ba4 ]---
    Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b

Reverting commit 0db805aa8c96f0ea ("ARM: software-based priviledged-no-access
support") fixes it.

Another board-specific config that has CONFIG_ARM_LPAE=y runs fine on the
same hardware. Disabling CONFIG_ARM_LPAE breaks it.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Russell King - ARM Linux Aug. 25, 2015, 10:44 a.m. UTC | #2
On Tue, Aug 25, 2015 at 12:32:51PM +0200, Geert Uytterhoeven wrote:
> This patch, which is now in arm-soc/for-next, breaks shmobile_defconfig
> on r8a7791/koelsch, which has a dual core CA15:
> 
>     [ ok ] Configuring network interfaces...done.
>     Unhandled fault: page domain fault (0x01b) at 0xbe8e6120
>     pgd = edbb0000
>     [be8e6120] *pgd=6da77831, *pte=bf4d075f, *ppte=bf4d0c7f
>     Internal error: : 1b [#1] SMP ARM
>     CPU: 1 PID: 1629 Comm: ntpdate Not tainted
> 4.2.0-rc8-06444-g3c24fd89c9421db1 #31
>     9
>     Hardware name: Generic R8A7791 (Flattened Device Tree)
>     task: ed883a80 ti: ed41c000 task.ti: ed41c000
>     PC is at csum_partial_copy_from_user+0x28/0x3d8
>     LR is at csum_and_copy_from_iter+0x334/0x4c0
>     pc : [<c04ba510>]    lr : [<c01c82e8>]    psr: 000f0013
>     sp : ed41db00  ip : 00000020  fp : ed41db6c
>     r10: ed41ddc0  r9 : 00000027  r8 : ed41dc20
>     r7 : 00000027  r6 : eda52653  r5 : ed41dec8  r4 : 00000000
>     r3 : 00000000  r2 : 00000027  r1 : eda5262c  r0 : be8e6120
>     Flags: nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
>     Control: 10c5307d  Table: 6dbb006a  DAC: 00000051
>     Process ntpdate (pid: 1629, stack limit = 0xed41c210)

Thanks.  I wonder what's different about your ntpdate that triggers
this, and why all my iMX6 behave fine, which have desktop-like ubuntu
installs on (of two different versions.)

What it's basically showing is that (unsurprisingly)
csum_partial_copy_from_user is trying to access userspace.  I'll see
about fixing that today, or pulling the patch from -next if I can't.

I've also noticed that on rpc_defconfig, the 0-day builder shows that
this triggers an ICE as the compiler appears to think it's run out of
registers.

> After disabling CONFIG_CPU_SW_DOMAIN_PAN it  fails differently:
> 
>     VFS: Mounted root (nfs filesystem) readonly on device 0:13.
>     devtmpfs: mounted
>     Freeing unused kernel memory: 300K (c0629000 - c0674000)
>     Unhandled fault: page domain fault (0x81b) at 0x000263e0
>     pgd = ed908000
>     [000263e0] *pgd=6e299831, *pte=bf81d75f, *ppte=bf81dc7f

Yes, this one is because I forgot to provide the non-protected default
for bootup, which I've already merged a fix for.
Geert Uytterhoeven Aug. 25, 2015, 11:21 a.m. UTC | #3
Hi Russell,

On Tue, Aug 25, 2015 at 12:44 PM, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:
> On Tue, Aug 25, 2015 at 12:32:51PM +0200, Geert Uytterhoeven wrote:
>> This patch, which is now in arm-soc/for-next, breaks shmobile_defconfig
>> on r8a7791/koelsch, which has a dual core CA15:
>>
>>     [ ok ] Configuring network interfaces...done.
>>     Unhandled fault: page domain fault (0x01b) at 0xbe8e6120
>>     pgd = edbb0000
>>     [be8e6120] *pgd=6da77831, *pte=bf4d075f, *ppte=bf4d0c7f
>>     Internal error: : 1b [#1] SMP ARM
>>     CPU: 1 PID: 1629 Comm: ntpdate Not tainted
>> 4.2.0-rc8-06444-g3c24fd89c9421db1 #31
>>     9
>>     Hardware name: Generic R8A7791 (Flattened Device Tree)
>>     task: ed883a80 ti: ed41c000 task.ti: ed41c000
>>     PC is at csum_partial_copy_from_user+0x28/0x3d8
>>     LR is at csum_and_copy_from_iter+0x334/0x4c0
>>     pc : [<c04ba510>]    lr : [<c01c82e8>]    psr: 000f0013
>>     sp : ed41db00  ip : 00000020  fp : ed41db6c
>>     r10: ed41ddc0  r9 : 00000027  r8 : ed41dc20
>>     r7 : 00000027  r6 : eda52653  r5 : ed41dec8  r4 : 00000000
>>     r3 : 00000000  r2 : 00000027  r1 : eda5262c  r0 : be8e6120
>>     Flags: nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
>>     Control: 10c5307d  Table: 6dbb006a  DAC: 00000051
>>     Process ntpdate (pid: 1629, stack limit = 0xed41c210)
>
> Thanks.  I wonder what's different about your ntpdate that triggers
> this, and why all my iMX6 behave fine, which have desktop-like ubuntu
> installs on (of two different versions.)

It's ntpdate 1:4.2.6.p5+dfsg-7 from desktop-like Debian jessie.

But I get similar dumps during boot up from rpc.idmapd (SyS_send),
rsyslogd (SyS_send), and from sshd (SyS_write) when trying to log in.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Russell King - ARM Linux Aug. 25, 2015, 12:38 p.m. UTC | #4
On Tue, Aug 25, 2015 at 01:21:04PM +0200, Geert Uytterhoeven wrote:
> Hi Russell,
> 
> On Tue, Aug 25, 2015 at 12:44 PM, Russell King - ARM Linux
> <linux@arm.linux.org.uk> wrote:
> > On Tue, Aug 25, 2015 at 12:32:51PM +0200, Geert Uytterhoeven wrote:
> >> This patch, which is now in arm-soc/for-next, breaks shmobile_defconfig
> >> on r8a7791/koelsch, which has a dual core CA15:
> >>
> >>     [ ok ] Configuring network interfaces...done.
> >>     Unhandled fault: page domain fault (0x01b) at 0xbe8e6120
> >>     pgd = edbb0000
> >>     [be8e6120] *pgd=6da77831, *pte=bf4d075f, *ppte=bf4d0c7f
> >>     Internal error: : 1b [#1] SMP ARM
> >>     CPU: 1 PID: 1629 Comm: ntpdate Not tainted
> >> 4.2.0-rc8-06444-g3c24fd89c9421db1 #31
> >>     9
> >>     Hardware name: Generic R8A7791 (Flattened Device Tree)
> >>     task: ed883a80 ti: ed41c000 task.ti: ed41c000
> >>     PC is at csum_partial_copy_from_user+0x28/0x3d8
> >>     LR is at csum_and_copy_from_iter+0x334/0x4c0
> >>     pc : [<c04ba510>]    lr : [<c01c82e8>]    psr: 000f0013
> >>     sp : ed41db00  ip : 00000020  fp : ed41db6c
> >>     r10: ed41ddc0  r9 : 00000027  r8 : ed41dc20
> >>     r7 : 00000027  r6 : eda52653  r5 : ed41dec8  r4 : 00000000
> >>     r3 : 00000000  r2 : 00000027  r1 : eda5262c  r0 : be8e6120
> >>     Flags: nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> >>     Control: 10c5307d  Table: 6dbb006a  DAC: 00000051
> >>     Process ntpdate (pid: 1629, stack limit = 0xed41c210)
> >
> > Thanks.  I wonder what's different about your ntpdate that triggers
> > this, and why all my iMX6 behave fine, which have desktop-like ubuntu
> > installs on (of two different versions.)
> 
> It's ntpdate 1:4.2.6.p5+dfsg-7 from desktop-like Debian jessie.

Hmm, I think I tried at one time to install Debian on an iMX6 platform
and gave up with it after spending 50 minutes with the installer getting
so far, and then killing the network - it was very repeatable, and always
happened at the same point in the installation.  I gave up with Debian
at that point, as I didn't have lots of 50 minutes to babysit the silly
installer (which can't ask the questions up-front) nor did I want to
waste my monthly internet allowance on multiple failed install attempts.

The reports I was getting from other iMX6 users was that Debian Jessie
had lots of problems at that time.

> But I get similar dumps during boot up from rpc.idmapd (SyS_send),
> rsyslogd (SyS_send), and from sshd (SyS_write) when trying to log in.

Hmm.

root       693  0.0  0.1   4944  3196 ?        Ss   01:22   0:00 /usr/sbin/sshd -D
syslog     720  0.2  0.0  30404  2032 ?        Sl   01:23   1:19 rsyslogd -c5
root       722  0.0  0.0   2392  1340 ?        Ss   01:23   0:00 rpc.idmapd

So, the question I need to find an answer to is... why hasn't this path
been exercised on my platforms during my testing.  It's certainly
compiled into the kernel...

Anyway, I've now (hopefully) fixed the bug, but I've nobbled
csum_partial_copy_from_user to ensure that it will always oops the kernel
if called:

000000b4 <csum_partial_copy_from_user>:
  b4:   ee133f10        mrc     15, 0, r3, cr3, cr0, {0}
  b8:   e92d41fe        push    {r1, r2, r3, r4, r5, r6, r7, r8, lr}
  bc:   e3a03055        mov     r3, #85 ; 0x55
  c0:   ee033f10        mcr     15, 0, r3, cr3, cr0, {0}
  c4:   e7033003        str     r3, [r3, -r3]

and... it doesn't trigger.  I can only assume that this is because the
iMX6 ethernet interface uses TSO (which implies checksum offload), there's
no need to use these csum functions - and that would explain why it never
came up in my local testing.
Geert Uytterhoeven Aug. 25, 2015, 12:47 p.m. UTC | #5
On Tue, Aug 25, 2015 at 2:38 PM, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:
>> It's ntpdate 1:4.2.6.p5+dfsg-7 from desktop-like Debian jessie.
>
> Hmm, I think I tried at one time to install Debian on an iMX6 platform
> and gave up with it after spending 50 minutes with the installer getting
> so far, and then killing the network - it was very repeatable, and always
> happened at the same point in the installation.  I gave up with Debian
> at that point, as I didn't have lots of 50 minutes to babysit the silly
> installer (which can't ask the questions up-front) nor did I want to
> waste my monthly internet allowance on multiple failed install attempts.

debootstrap is your friend.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Nicolas Schichan Aug. 25, 2015, 1:55 p.m. UTC | #6
On 08/25/2015 02:38 PM, Russell King - ARM Linux wrote:
> On Tue, Aug 25, 2015 at 01:21:04PM +0200, Geert Uytterhoeven wrote:
>> Hi Russell,
>>
>> On Tue, Aug 25, 2015 at 12:44 PM, Russell King - ARM Linux
>> <linux@arm.linux.org.uk> wrote:
>>> On Tue, Aug 25, 2015 at 12:32:51PM +0200, Geert Uytterhoeven wrote:
>>>> This patch, which is now in arm-soc/for-next, breaks shmobile_defconfig
>>>> on r8a7791/koelsch, which has a dual core CA15:
>>>>
>>>>     [ ok ] Configuring network interfaces...done.
>>>>     Unhandled fault: page domain fault (0x01b) at 0xbe8e6120
>>>>     pgd = edbb0000
>>>>     [be8e6120] *pgd=6da77831, *pte=bf4d075f, *ppte=bf4d0c7f
>>>>     Internal error: : 1b [#1] SMP ARM
>>>>     CPU: 1 PID: 1629 Comm: ntpdate Not tainted
>>>> 4.2.0-rc8-06444-g3c24fd89c9421db1 #31
>>>>     9
>>>>     Hardware name: Generic R8A7791 (Flattened Device Tree)
>>>>     task: ed883a80 ti: ed41c000 task.ti: ed41c000
>>>>     PC is at csum_partial_copy_from_user+0x28/0x3d8
>>>>     LR is at csum_and_copy_from_iter+0x334/0x4c0
>>>>     pc : [<c04ba510>]    lr : [<c01c82e8>]    psr: 000f0013
>>>>     sp : ed41db00  ip : 00000020  fp : ed41db6c
>>>>     r10: ed41ddc0  r9 : 00000027  r8 : ed41dc20
>>>>     r7 : 00000027  r6 : eda52653  r5 : ed41dec8  r4 : 00000000
>>>>     r3 : 00000000  r2 : 00000027  r1 : eda5262c  r0 : be8e6120
>>>>     Flags: nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
>>>>     Control: 10c5307d  Table: 6dbb006a  DAC: 00000051
>>>>     Process ntpdate (pid: 1629, stack limit = 0xed41c210)
>>>
>>> Thanks.  I wonder what's different about your ntpdate that triggers
>>> this, and why all my iMX6 behave fine, which have desktop-like ubuntu
>>> installs on (of two different versions.)
>>
>> It's ntpdate 1:4.2.6.p5+dfsg-7 from desktop-like Debian jessie.
> 
> Hmm, I think I tried at one time to install Debian on an iMX6 platform
> and gave up with it after spending 50 minutes with the installer getting
> so far, and then killing the network - it was very repeatable, and always
> happened at the same point in the installation.  I gave up with Debian
> at that point, as I didn't have lots of 50 minutes to babysit the silly
> installer (which can't ask the questions up-front) nor did I want to
> waste my monthly internet allowance on multiple failed install attempts.
> 
> The reports I was getting from other iMX6 users was that Debian Jessie
> had lots of problems at that time.
> 
>> But I get similar dumps during boot up from rpc.idmapd (SyS_send),
>> rsyslogd (SyS_send), and from sshd (SyS_write) when trying to log in.
> 
> Hmm.
> 
> root       693  0.0  0.1   4944  3196 ?        Ss   01:22   0:00 /usr/sbin/sshd -D
> syslog     720  0.2  0.0  30404  2032 ?        Sl   01:23   1:19 rsyslogd -c5
> root       722  0.0  0.0   2392  1340 ?        Ss   01:23   0:00 rpc.idmapd
> 
> So, the question I need to find an answer to is... why hasn't this path
> been exercised on my platforms during my testing.  It's certainly
> compiled into the kernel...
> 
> Anyway, I've now (hopefully) fixed the bug, but I've nobbled
> csum_partial_copy_from_user to ensure that it will always oops the kernel
> if called:
> 
> 000000b4 <csum_partial_copy_from_user>:
>   b4:   ee133f10        mrc     15, 0, r3, cr3, cr0, {0}
>   b8:   e92d41fe        push    {r1, r2, r3, r4, r5, r6, r7, r8, lr}
>   bc:   e3a03055        mov     r3, #85 ; 0x55
>   c0:   ee033f10        mcr     15, 0, r3, cr3, cr0, {0}
>   c4:   e7033003        str     r3, [r3, -r3]
> 
> and... it doesn't trigger.  I can only assume that this is because the
> iMX6 ethernet interface uses TSO (which implies checksum offload), there's
> no need to use these csum functions - and that would explain why it never
> came up in my local testing.

[resent with the list and other original recipients this time]

I have the csum_partial_copy_from_user issue too, but with radvd (which sends
ipv6 packets). ipv4 networking is fine on the other hand. The kirkwood
platform I use does have checksum offload for ipv4 only and not ipv6 so the
csum functions will get called in the ipv6 case.
Will Deacon Aug. 25, 2015, 2:05 p.m. UTC | #7
On Fri, Aug 21, 2015 at 02:31:56PM +0100, Russell King wrote:
> Provide a software-based implementation of the priviledged no access
> support found in ARMv8.1.
> 
> Userspace pages are mapped using a different domain number from the
> kernel and IO mappings.  If we switch the user domain to "no access"
> when we enter the kernel, we can prevent the kernel from touching
> userspace.
> 
> However, the kernel needs to be able to access userspace via the
> various user accessor functions.  With the wrapping in the previous
> patch, we can temporarily enable access when the kernel needs user
> access, and re-disable it afterwards.
> 
> This allows us to trap non-intended accesses to userspace, eg, caused
> by an inadvertent dereference of the LIST_POISON* values, which, with
> appropriate user mappings setup, can be made to succeed.  This in turn
> can allow use-after-free bugs to be further exploited than would
> otherwise be possible.
> 
> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
> ---
>  arch/arm/Kconfig               | 15 +++++++++++++++
>  arch/arm/include/asm/domain.h  | 15 ++++++++++++---
>  arch/arm/include/asm/uaccess.h | 14 ++++++++++++++
>  arch/arm/kernel/entry-header.S | 25 +++++++++++++++++++++++++
>  arch/arm/kernel/process.c      | 24 ++++++++++++++++++------
>  5 files changed, 84 insertions(+), 9 deletions(-)

[...]

> diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
> index 3aa6c3742182..bec7ee0764e1 100644
> --- a/arch/arm/kernel/entry-header.S
> +++ b/arch/arm/kernel/entry-header.S
> @@ -54,15 +54,40 @@
>  	.endm
>  
>  	.macro	uaccess_disable, tmp
> +#ifdef CONFIG_CPU_SW_DOMAIN_PAN
> +	/*
> +	 * Whenever we re-enter userspace, the domains should always be
> +	 * set appropriately.
> +	 */
> +	mov	\tmp, #DACR_UACCESS_DISABLE
> +	mcr	p15, 0, \tmp, c3, c0, 0		@ Set domain register
> +#endif

Missing ISB?

>  	.endm
>  
>  	.macro	uaccess_enable, tmp
> +#ifdef CONFIG_CPU_SW_DOMAIN_PAN
> +	/*
> +	 * Whenever we re-enter userspace, the domains should always be
> +	 * set appropriately.
> +	 */
> +	mov	\tmp, #DACR_UACCESS_ENABLE
> +	mcr	p15, 0, \tmp, c3, c0, 0
> +#endif
>  	.endm
>  
>  	.macro	uaccess_save_and_disable, tmp
> +#ifdef CONFIG_CPU_SW_DOMAIN_PAN
> +	mrc	p15, 0, \tmp, c3, c0, 0
> +	str	\tmp, [sp, #S_FRAME_SIZE]
> +#endif
> +	uaccess_disable \tmp
>  	.endm

Same here. For the enable/restore cases, the exception return will
synchronise the DACR for us, but I think we need the ISB to be sure that
the change has taken effect on the exception entry paths.

Will
diff mbox

Patch

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a750c1425c3a..a898eb72da51 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1694,6 +1694,21 @@  config HIGHPTE
 	bool "Allocate 2nd-level pagetables from highmem"
 	depends on HIGHMEM
 
+config CPU_SW_DOMAIN_PAN
+	bool "Enable use of CPU domains to implement priviledged no-access"
+	depends on MMU && !ARM_LPAE
+	default y
+	help
+	  Increase kernel security by ensuring that normal kernel accesses
+	  are unable to access userspace addresses.  This can help prevent
+	  use-after-free bugs becoming an exploitable privilege escalation
+	  by ensuring that magic values (such as LIST_POISON) will always
+	  fault when dereferenced.
+
+	  CPUs with low-vector mappings use a best-efforts implementation.
+	  Their lower 1MB needs to remain accessible for the vectors, but
+	  the remainder of userspace will become appropriately inaccessible.
+
 config HW_PERF_EVENTS
 	bool "Enable hardware performance counter support for perf events"
 	depends on PERF_EVENTS
diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index 2be929549938..0c373979af00 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -58,11 +58,21 @@ 
 #define domain_val(dom,type)	((type) << (2 * (dom)))
 
 #define DACR_INIT \
-	(domain_val(DOMAIN_USER, DOMAIN_CLIENT) | \
+	(domain_val(DOMAIN_USER, DOMAIN_NOACCESS) | \
 	 domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
 	 domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \
 	 domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT))
 
+#define __DACR_DEFAULT \
+	domain_val(DOMAIN_KERNEL, DOMAIN_CLIENT) | \
+	domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \
+	domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT)
+
+#define DACR_UACCESS_DISABLE	\
+	(__DACR_DEFAULT | domain_val(DOMAIN_USER, DOMAIN_NOACCESS))
+#define DACR_UACCESS_ENABLE	\
+	(__DACR_DEFAULT | domain_val(DOMAIN_USER, DOMAIN_CLIENT))
+
 #ifndef __ASSEMBLY__
 
 static inline unsigned int get_domain(void)
@@ -76,7 +86,6 @@  static inline unsigned int get_domain(void)
 	return domain;
 }
 
-#ifdef CONFIG_CPU_USE_DOMAINS
 static inline void set_domain(unsigned val)
 {
 	asm volatile(
@@ -85,6 +94,7 @@  static inline void set_domain(unsigned val)
 	isb();
 }
 
+#ifdef CONFIG_CPU_USE_DOMAINS
 #define modify_domain(dom,type)					\
 	do {							\
 		unsigned int domain = get_domain();		\
@@ -94,7 +104,6 @@  static inline void set_domain(unsigned val)
 	} while (0)
 
 #else
-static inline void set_domain(unsigned val) { }
 static inline void modify_domain(unsigned dom, unsigned type)	{ }
 #endif
 
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 4ae10967a8ba..cb802870ffb9 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -101,11 +101,25 @@  static inline void set_fs(mm_segment_t fs)
  */
 static inline unsigned int uaccess_save_and_enable(void)
 {
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+	unsigned int old_domain = get_domain();
+
+	/* Set the current domain access to permit user accesses */
+	set_domain((old_domain & ~domain_mask(DOMAIN_USER)) |
+		   domain_val(DOMAIN_USER, DOMAIN_CLIENT));
+
+	return old_domain;
+#else
 	return 0;
+#endif
 }
 
 static inline void uaccess_restore(unsigned int flags)
 {
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+	/* Restore the user access mask */
+	set_domain(flags);
+#endif
 }
 
 /*
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 3aa6c3742182..bec7ee0764e1 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -54,15 +54,40 @@ 
 	.endm
 
 	.macro	uaccess_disable, tmp
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+	/*
+	 * Whenever we re-enter userspace, the domains should always be
+	 * set appropriately.
+	 */
+	mov	\tmp, #DACR_UACCESS_DISABLE
+	mcr	p15, 0, \tmp, c3, c0, 0		@ Set domain register
+#endif
 	.endm
 
 	.macro	uaccess_enable, tmp
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+	/*
+	 * Whenever we re-enter userspace, the domains should always be
+	 * set appropriately.
+	 */
+	mov	\tmp, #DACR_UACCESS_ENABLE
+	mcr	p15, 0, \tmp, c3, c0, 0
+#endif
 	.endm
 
 	.macro	uaccess_save_and_disable, tmp
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+	mrc	p15, 0, \tmp, c3, c0, 0
+	str	\tmp, [sp, #S_FRAME_SIZE]
+#endif
+	uaccess_disable \tmp
 	.endm
 
 	.macro	uaccess_restore
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+	ldr	r0, [sp, #S_FRAME_SIZE]
+	mcr	p15, 0, r0, c3, c0, 0
+#endif
 	.endm
 
 #ifdef CONFIG_CPU_V7M
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index e722f9b3c9b1..b407cc7a7b55 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -129,12 +129,24 @@  void __show_regs(struct pt_regs *regs)
 	buf[4] = '\0';
 
 #ifndef CONFIG_CPU_V7M
-	printk("Flags: %s  IRQs o%s  FIQs o%s  Mode %s  ISA %s  Segment %s\n",
-		buf, interrupts_enabled(regs) ? "n" : "ff",
-		fast_interrupts_enabled(regs) ? "n" : "ff",
-		processor_modes[processor_mode(regs)],
-		isa_modes[isa_mode(regs)],
-		get_fs() == get_ds() ? "kernel" : "user");
+	{
+		unsigned int domain = get_domain();
+		const char *segment;
+
+		if ((domain & domain_mask(DOMAIN_USER)) ==
+		    domain_val(DOMAIN_USER, DOMAIN_NOACCESS))
+			segment = "none";
+		else if (get_fs() == get_ds())
+			segment = "kernel";
+		else
+			segment = "user";
+
+		printk("Flags: %s  IRQs o%s  FIQs o%s  Mode %s  ISA %s  Segment %s\n",
+			buf, interrupts_enabled(regs) ? "n" : "ff",
+			fast_interrupts_enabled(regs) ? "n" : "ff",
+			processor_modes[processor_mode(regs)],
+			isa_modes[isa_mode(regs)], segment);
+	}
 #else
 	printk("xPSR: %08lx\n", regs->ARM_cpsr);
 #endif