diff mbox series

[v2] x86/mm: Refuse W^X violations

Message ID YwySW3ROc21hN7g9@hirez.programming.kicks-ass.net (mailing list archive)
State Mainlined
Commit 652c5bf380ad018e15006a7f8349800245ddbbad
Headers show
Series [v2] x86/mm: Refuse W^X violations | expand

Commit Message

Peter Zijlstra Aug. 29, 2022, 10:18 a.m. UTC
x86 has STRICT_*_RWX, but not even a warning when someone violates it.

Add this warning and fully refuse the transition.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/mm/pat/set_memory.c |   32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

Comments

Kees Cook Aug. 29, 2022, 7:08 p.m. UTC | #1
On Mon, Aug 29, 2022 at 12:18:03PM +0200, Peter Zijlstra wrote:
> 
> x86 has STRICT_*_RWX, but not even a warning when someone violates it.
> 
> Add this warning and fully refuse the transition.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Reviewed-by: Kees Cook <keescook@chromium.org>
Guenter Roeck Sept. 21, 2022, 8:07 p.m. UTC | #2
Hi,

On Mon, Aug 29, 2022 at 12:18:03PM +0200, Peter Zijlstra wrote:
> 
> x86 has STRICT_*_RWX, but not even a warning when someone violates it.
> 
> Add this warning and fully refuse the transition.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

I see the following crash when trying to boot qemu using images with
PAE enabled. I checked again after applying "x86/mm/32: Fix W^X detection
when page tables do not support NX", but that did not fix the problem.

Guenter

---
[    2.042861] CPA refuse W^X violation: 8000000000000063 -> 0000000000000063 range: 0x00000000c00a0000 - 0x00000000c00a0fff PFN a0
ILLOPC: cbc65efa: 0f 0b
[    2.043267] WARNING: CPU: 0 PID: 1 at arch/x86/mm/pat/set_memory.c:600 __change_page_attr_set_clr+0xdca/0xdd0
[    2.043743] Modules linked in:
[    2.043978] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.0.0-rc6-next-20220921 #1
[    2.044277] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
[    2.044572] EIP: __change_page_attr_set_clr+0xdca/0xdd0
[    2.044751] Code: 10 8b 45 ac 89 7c 24 04 89 74 24 14 89 4c 24 1c 8d 8e ff 0f 00 00 89 4c 24 18 89 44 24 08 c7 04 24 44 67 08 cd e8 56 38 fb 00 <0f> 0b eb 83 66 90 55 89 e5 57 56 89 d6 53 89 c3 83 ec 58 31 d2 8b
[    2.045179] EAX: 00000074 EBX: 000a0063 ECX: 00000000 EDX: 00000002
[    2.045315] ESI: c00a0000 EDI: 00000063 EBP: c115fe4c ESP: c115fd34
[    2.045445] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00000282
[    2.045585] CR0: 80050033 CR2: ffbff000 CR3: 0d57c000 CR4: 000006f0
[    2.046170] Call Trace:
[    2.046631]  ? __purge_vmap_area_lazy+0x6c/0x640
[    2.046768]  ? _vm_unmap_aliases.part.0+0x1d8/0x1f0
[    2.046923]  ? __mutex_unlock_slowpath+0x2b/0x2b0
[    2.047035]  ? purge_fragmented_blocks_allcpus+0x64/0x2c0
[    2.047199]  ? _vm_unmap_aliases.part.0+0x1d8/0x1f0
[    2.047315]  ? _vm_unmap_aliases.part.0+0x54/0x1f0
[    2.047496]  change_page_attr_set_clr+0x11d/0x2d0
[    2.047738]  set_memory_x+0x56/0x60
[    2.047863]  pci_pcbios_init+0xc8/0x28c
[    2.047981]  ? pcibios_resource_survey+0x63/0x63
[    2.048152]  pci_arch_init+0x3c/0x73
[    2.048242]  ? pcibios_resource_survey+0x63/0x63
[    2.048340]  do_one_initcall+0x4f/0x2e0
[    2.048442]  ? __this_cpu_preempt_check+0xf/0x11
[    2.048578]  ? rcu_read_lock_sched_held+0x41/0x70
[    2.048684]  ? trace_initcall_level+0x65/0xa6
[    2.048805]  kernel_init_freeable+0x210/0x264
[    2.048908]  ? rest_init+0x140/0x140
[    2.049002]  kernel_init+0x15/0x110
[    2.049211]  ? schedule_tail_wrapper+0x9/0xc
[    2.049312]  ret_from_fork+0x1c/0x28
[    2.049547] irq event stamp: 7715
[    2.049633] hardirqs last  enabled at (7723): [<cbce7119>] __up_console_sem+0x69/0x80
[    2.049822] hardirqs last disabled at (7730): [<cbce70fd>] __up_console_sem+0x4d/0x80
[    2.049972] softirqs last  enabled at (7176): [<cbc29ac7>] call_on_stack+0x47/0x60
[    2.050153] softirqs last disabled at (7167): [<cbc29ac7>] call_on_stack+0x47/0x60
[    2.050307] ---[ end trace 0000000000000000 ]---
[    2.050762] PCI: PCI BIOS area is rw and x. Use pci=nobios if you want it NX.
[    2.051115] kernel tried to execute NX-protected page - exploit attempt? (uid: 0)
[    2.051115] BUG: unable to handle page fault for address: c00fd2bf
[    2.051115] #PF: supervisor instruction fetch in kernel mode
[    2.051115] #PF: error_code(0x0011) - permissions violation
[    2.051115] *pdpt = 000000000d578001 *pde = 000000000dc18063 *pte = 80000000000fd063
[    2.051115] Oops: 0011 [#1] PREEMPT SMP PTI
[    2.051115] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G        W          6.0.0-rc6-next-20220921 #1
[    2.051115] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
[    2.051115] EIP: 0xc00fd2bf
[    2.051115] Code: 06 1e 8c d0 8e d8 66 89 e3 66 0f b7 e4 66 89 e0 66 e8 43 e8 ff ff 66 89 dc 1f 07 66 5f 66 5e 66 5d 66 5b 66 5a 66 59 66 58 cf <9c> 3d 24 50 43 49 75 13 bb 00 00 0f 00 b9 00 00 01 00 ba 1d d2 00
[    2.051115] EAX: 49435024 EBX: 00000000 ECX: 00000000 EDX: cd1a027f
[    2.051115] ESI: 00000200 EDI: cd50e7f4 EBP: c115ff08 ESP: c115fee0
[    2.051115] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00000046
[    2.051115] CR0: 80050033 CR2: c00fd2bf CR3: 0d57c000 CR4: 000006f0
[    2.051115] Call Trace:
[    2.051115]  ? pci_pcbios_init+0xfa/0x28c
[    2.051115]  ? pcibios_resource_survey+0x63/0x63
[    2.051115]  pci_arch_init+0x3c/0x73
[    2.051115]  ? pcibios_resource_survey+0x63/0x63
[    2.051115]  do_one_initcall+0x4f/0x2e0
[    2.051115]  ? __this_cpu_preempt_check+0xf/0x11
[    2.051115]  ? rcu_read_lock_sched_held+0x41/0x70
[    2.051115]  ? trace_initcall_level+0x65/0xa6
[    2.051115]  kernel_init_freeable+0x210/0x264
[    2.051115]  ? rest_init+0x140/0x140
[    2.051115]  kernel_init+0x15/0x110
[    2.051115]  ? schedule_tail_wrapper+0x9/0xc
[    2.051115]  ret_from_fork+0x1c/0x28
[    2.051115] Modules linked in:
[    2.051115] CR2: 00000000c00fd2bf
[    2.051115] ---[ end trace 0000000000000000 ]---
[    2.051115] EIP: 0xc00fd2bf
[    2.051115] Code: 06 1e 8c d0 8e d8 66 89 e3 66 0f b7 e4 66 89 e0 66 e8 43 e8 ff ff 66 89 dc 1f 07 66 5f 66 5e 66 5d 66 5b 66 5a 66 59 66 58 cf <9c> 3d 24 50 43 49 75 13 bb 00 00 0f 00 b9 00 00 01 00 ba 1d d2 00
[    2.051115] EAX: 49435024 EBX: 00000000 ECX: 00000000 EDX: cd1a027f
[    2.051115] ESI: 00000200 EDI: cd50e7f4 EBP: c115ff08 ESP: c115fee0
[    2.051115] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00000046
[    2.051115] CR0: 80050033 CR2: c00fd2bf CR3: 0d57c000 CR4: 000006f0
[    2.051426] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009

---
# bad: [ef08d387bbbc20df740ced8caee0ffac835869ac] Add linux-next specific files for 20220920
# good: [521a547ced6477c54b4b0cc206000406c221b4d6] Linux 6.0-rc6
git bisect start 'HEAD' 'v6.0-rc6'
# good: [df970c033333b10c728198606fe787535e08ab8a] Merge branch 'drm-next' of git://git.freedesktop.org/git/drm/drm.git
git bisect good df970c033333b10c728198606fe787535e08ab8a
# bad: [c46ae7d9b6ad0283ffd7b40117b52444d68e083e] Merge branch 'usb-next' of git://git.kernel.org/pub/scm/linux/kernel/git/johan/usb-serial.git
git bisect bad c46ae7d9b6ad0283ffd7b40117b52444d68e083e
# good: [6a21588fd7f579342d71f2c543d7dca6fd44ff8a] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git
git bisect good 6a21588fd7f579342d71f2c543d7dca6fd44ff8a
# bad: [9b5a7d7a43dc87c6326a23394f37d0786dc9e712] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
git bisect bad 9b5a7d7a43dc87c6326a23394f37d0786dc9e712
# good: [00a0886a99d2aba28e8c9f1c124d9cbbaadab693] Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git
git bisect good 00a0886a99d2aba28e8c9f1c124d9cbbaadab693
# good: [57b16b0bfae3a029815b845e8e623fb02d255d68] Merge branch into tip/master: 'x86/cache'
git bisect good 57b16b0bfae3a029815b845e8e623fb02d255d68
# good: [2632186d3de796a47b2dc00ac9dc9bbe6e70796b] Merge remote-tracking branch 'spi/for-6.1' into spi-next
git bisect good 2632186d3de796a47b2dc00ac9dc9bbe6e70796b
# good: [65c4764941bb230ef00164771fba0cdad0bfd3e4] dt-bindings: phy: hisilicon,hi3670-usb3: simplify example
git bisect good 65c4764941bb230ef00164771fba0cdad0bfd3e4
# bad: [32aefecc271aa1ca4431e0f9094e5a578922527b] Merge branch into tip/master: 'x86/mm'
git bisect bad 32aefecc271aa1ca4431e0f9094e5a578922527b
# good: [16ac81825892970fbe5f32fb379466d19d3d3134] Merge branch into tip/master: 'x86/cpu'
git bisect good 16ac81825892970fbe5f32fb379466d19d3d3134
# good: [77614503f9f135323315a53d60dc001f1a429f7c] Merge branch into tip/master: 'x86/misc'
git bisect good 77614503f9f135323315a53d60dc001f1a429f7c
# bad: [1043897681808118c0f7e70b210774000fe06621] Merge branch 'linus' into x86/mm, to refresh the branch
git bisect bad 1043897681808118c0f7e70b210774000fe06621
# bad: [652c5bf380ad018e15006a7f8349800245ddbbad] x86/mm: Refuse W^X violations
git bisect bad 652c5bf380ad018e15006a7f8349800245ddbbad
# good: [86af8230ce138e0423f43f6b104f3fa050aced6d] x86/mm: Rename set_memory_present() to set_memory_p()
git bisect good 86af8230ce138e0423f43f6b104f3fa050aced6d
# first bad commit: [652c5bf380ad018e15006a7f8349800245ddbbad] x86/mm: Refuse W^X violations
Dave Hansen Sept. 21, 2022, 8:59 p.m. UTC | #3
On 9/21/22 13:07, Guenter Roeck wrote:
> [    2.042861] CPA refuse W^X violation: 8000000000000063 -> 0000000000000063 range: 0x00000000c00a0000 - 0x00000000c00a0fff PFN a0
> ILLOPC: cbc65efa: 0f 0b
> [    2.043267] WARNING: CPU: 0 PID: 1 at arch/x86/mm/pat/set_memory.c:600 __change_page_attr_set_clr+0xdca/0xdd0
...
> [    2.050307] ---[ end trace 0000000000000000 ]---
> [    2.050762] PCI: PCI BIOS area is rw and x. Use pci=nobios if you want it NX.
> [    2.051115] kernel tried to execute NX-protected page - exploit attempt? (uid: 0)
> [    2.051115] BUG: unable to handle page fault for address: c00fd2bf

This _looks_ like it is working as intended.  The PCI BIOS code tried to
make a RWX page.  The CPA code refused to do it and presumably returned
an error, leaving a RW page, non-executable page.  The PCI code didn't
check the set_memory_x() return code and tried to go execute anyway.
That resulted in the oops.

I was able to reproduce this pretty easily.  The workaround from dmesg
is pci=nobios.  That seems to do the trick for me, although that advise
was sandwiched between a warning and an oops, so not the easiest to find.

I'm a bit torn what to do on this one.  Breaking the boot is bad, but so
is leaving RWX memory around.

Thoughts?
Guenter Roeck Sept. 21, 2022, 10:59 p.m. UTC | #4
On 9/21/22 13:59, Dave Hansen wrote:
> On 9/21/22 13:07, Guenter Roeck wrote:
>> [    2.042861] CPA refuse W^X violation: 8000000000000063 -> 0000000000000063 range: 0x00000000c00a0000 - 0x00000000c00a0fff PFN a0
>> ILLOPC: cbc65efa: 0f 0b
>> [    2.043267] WARNING: CPU: 0 PID: 1 at arch/x86/mm/pat/set_memory.c:600 __change_page_attr_set_clr+0xdca/0xdd0
> ...
>> [    2.050307] ---[ end trace 0000000000000000 ]---
>> [    2.050762] PCI: PCI BIOS area is rw and x. Use pci=nobios if you want it NX.
>> [    2.051115] kernel tried to execute NX-protected page - exploit attempt? (uid: 0)
>> [    2.051115] BUG: unable to handle page fault for address: c00fd2bf
> 
> This _looks_ like it is working as intended.  The PCI BIOS code tried to
> make a RWX page.  The CPA code refused to do it and presumably returned
> an error, leaving a RW page, non-executable page.  The PCI code didn't
> check the set_memory_x() return code and tried to go execute anyway.
> That resulted in the oops.
> 
> I was able to reproduce this pretty easily.  The workaround from dmesg
> is pci=nobios.  That seems to do the trick for me, although that advise
> was sandwiched between a warning and an oops, so not the easiest to find.
> 
> I'm a bit torn what to do on this one.  Breaking the boot is bad, but so
> is leaving RWX memory around.
> 
> Thoughts?

For my part I'll do what the above suggests, ie run tests with PAE enabled
with pci=nobios command line option. AFAICS that hides the problem in my tests.
I am just not sure if that is really appropriate.

Guenter
Guenter Roeck Sept. 22, 2022, 3:09 a.m. UTC | #5
On 9/21/22 15:59, Guenter Roeck wrote:
> On 9/21/22 13:59, Dave Hansen wrote:
>> On 9/21/22 13:07, Guenter Roeck wrote:
>>> [    2.042861] CPA refuse W^X violation: 8000000000000063 -> 0000000000000063 range: 0x00000000c00a0000 - 0x00000000c00a0fff PFN a0
>>> ILLOPC: cbc65efa: 0f 0b
>>> [    2.043267] WARNING: CPU: 0 PID: 1 at arch/x86/mm/pat/set_memory.c:600 __change_page_attr_set_clr+0xdca/0xdd0
>> ...
>>> [    2.050307] ---[ end trace 0000000000000000 ]---
>>> [    2.050762] PCI: PCI BIOS area is rw and x. Use pci=nobios if you want it NX.
>>> [    2.051115] kernel tried to execute NX-protected page - exploit attempt? (uid: 0)
>>> [    2.051115] BUG: unable to handle page fault for address: c00fd2bf
>>
>> This _looks_ like it is working as intended.  The PCI BIOS code tried to
>> make a RWX page.  The CPA code refused to do it and presumably returned
>> an error, leaving a RW page, non-executable page.  The PCI code didn't
>> check the set_memory_x() return code and tried to go execute anyway.
>> That resulted in the oops.
>>
>> I was able to reproduce this pretty easily.  The workaround from dmesg
>> is pci=nobios.  That seems to do the trick for me, although that advise
>> was sandwiched between a warning and an oops, so not the easiest to find.
>>
>> I'm a bit torn what to do on this one.  Breaking the boot is bad, but so
>> is leaving RWX memory around.
>>
>> Thoughts?
> 
> For my part I'll do what the above suggests, ie run tests with PAE enabled
> with pci=nobios command line option. AFAICS that hides the problem in my tests.
> I am just not sure if that is really appropriate.
> 

Oh well, that "helped" to hide one of the crashes. Here is another one.
This is with PAE enabled and booting through efi32.

Guenter

---
[    1.080779] ------------[ cut here ]------------
[    1.080959] CPA refuse W^X violation: 8000000000000063 -> 0000000000000063 range: 0x00000000d0770000 - 0x00000000d0770fff PFN edcd
ILLOPC: c7465efa: 0f 0b
[    1.081467] WARNING: CPU: 0 PID: 0 at arch/x86/mm/pat/set_memory.c:600 __change_page_attr_set_clr+0xdca/0xdd0
[    1.082120] Modules linked in:
[    1.082476] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.0.0-rc6-next-20220921 #1
[    1.082706] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
[    1.082988] EIP: __change_page_attr_set_clr+0xdca/0xdd0
[    1.083187] Code: 10 8b 45 ac 89 7c 24 04 89 74 24 14 89 4c 24 1c 8d 8e ff 0f 00 00 89 4c 24 18 89 44 24 08 c7 04 24 38 67 88 c8 e8 56 38 fb 00 <0f> 0b eb 83 66 90 55 89 e5 57 56 89 d6 53 89 c3 83 ec 58 31 d2 8b
[    1.083672] EAX: 00000076 EBX: 0edcd063 ECX: 00000000 EDX: 00000003
[    1.083830] ESI: d0770000 EDI: 00000063 EBP: c8a3dea8 ESP: c8a3dd90
[    1.083984] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00200296
[    1.084286] CR0: 80050033 CR2: ffbff000 CR3: 08d7c000 CR4: 000006b0
[    1.084501] Call Trace:
[    1.084849]  ? __this_cpu_preempt_check+0xf/0x11
[    1.085053]  ? __purge_vmap_area_lazy+0x6c/0x640
[    1.085269]  ? _vm_unmap_aliases.part.0+0x1d8/0x1f0
[    1.085415]  ? __mutex_unlock_slowpath+0x2b/0x2b0
[    1.085536]  ? purge_fragmented_blocks_allcpus+0x64/0x2c0
[    1.085696]  ? _vm_unmap_aliases.part.0+0x1d8/0x1f0
[    1.085820]  ? _vm_unmap_aliases.part.0+0x54/0x1f0
[    1.086004]  change_page_attr_set_clr+0x11d/0x2d0
[    1.086313]  ? __efi_memmap_init+0x70/0xd3
[    1.086475]  set_memory_x+0x56/0x60
[    1.086592]  efi_runtime_update_mappings+0x36/0x42
[    1.086717]  efi_enter_virtual_mode+0x351/0x36e
[    1.086860]  start_kernel+0x57d/0x60f
[    1.086956]  ? set_intr_gate+0x42/0x55
[    1.087079]  i386_start_kernel+0x43/0x45
[    1.087272]  startup_32_smp+0x161/0x164
[    1.087491] irq event stamp: 6582
[    1.087593] hardirqs last  enabled at (6590): [<c74e7119>] __up_console_sem+0x69/0x80
[    1.087824] hardirqs last disabled at (6597): [<c74e70fd>] __up_console_sem+0x4d/0x80
[    1.088010] softirqs last  enabled at (6571): [<c7429a94>] call_on_stack+0x14/0x60
[    1.088278] softirqs last disabled at (6614): [<c7429a94>] call_on_stack+0x14/0x60
[    1.088466] ---[ end trace 0000000000000000 ]---
[    1.089237] kernel tried to execute NX-protected page - exploit attempt? (uid: 0)
[    1.089237] BUG: unable to handle page fault for address: d0810e2a
[    1.089237] #PF: supervisor instruction fetch in kernel mode
[    1.089237] #PF: error_code(0x0011) - permissions violation
[    1.089237] *pdpt = 0000000008d78001 *pde = 000000000eec6067 *pte = 800000000fe98063
[    1.089237] Oops: 0011 [#1] PREEMPT SMP PTI
[    1.089237] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G        W          6.0.0-rc6-next-20220921 #1
[    1.089237] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
[    1.089237] EIP: 0xd0810e2a
[    1.089237] Code: 75 0c ff 75 08 68 c1 45 81 d0 6a 40 e8 ef ce ff ff 83 c4 20 83 ec 0c 53 e8 d4 cf ff ff 83 c4 10 31 c0 8d 65 f4 5b 5e 5f 5d c3 <55> 89 e5 57 56 53 bb 02 00 00 80 83 ec 5c 8b 7d 08 85 ff 0f 84 ed
[    1.089237] EAX: d0810e2a EBX: 00200202 ECX: 00000049 EDX: 00000000
[    1.089237] ESI: c8a3df30 EDI: c84c5000 EBP: c8a3df20 ESP: c8a3def8
[    1.089237] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00200202
[    1.089237] CR0: 80050033 CR2: d0810e2a CR3: 08d7c000 CR4: 000006b0
[    1.089237] Call Trace:
[    1.089237]  ? virt_efi_set_variable_nonblocking+0x80/0xf0
[    1.089237]  ? virt_efi_reset_system+0xe0/0xe0
[    1.089237]  efi_delete_dummy_variable+0x55/0x70
[    1.089237]  efi_enter_virtual_mode+0x356/0x36e
[    1.089237]  start_kernel+0x57d/0x60f
[    1.089237]  ? set_intr_gate+0x42/0x55
[    1.089237]  i386_start_kernel+0x43/0x45
[    1.089237]  startup_32_smp+0x161/0x164
[    1.089237] Modules linked in:
[    1.089237] CR2: 00000000d0810e2a
[    1.089237] ---[ end trace 0000000000000000 ]---
[    1.089237] EIP: 0xd0810e2a
Peter Zijlstra Sept. 22, 2022, 7:46 a.m. UTC | #6
On Wed, Sep 21, 2022 at 08:09:13PM -0700, Guenter Roeck wrote:

> Oh well, that "helped" to hide one of the crashes. Here is another one.
> This is with PAE enabled and booting through efi32.

> [    1.086592]  efi_runtime_update_mappings+0x36/0x42
> [    1.086717]  efi_enter_virtual_mode+0x351/0x36e
> [    1.086860]  start_kernel+0x57d/0x60f
> [    1.086956]  ? set_intr_gate+0x42/0x55
> [    1.087079]  i386_start_kernel+0x43/0x45
> [    1.087272]  startup_32_smp+0x161/0x164

Does this help? Dave; perhaps we should just let i386 be i386 and let it
bitrot :/

diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index e06a199423c0..d81e379fcd43 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -136,6 +136,7 @@ void __init efi_runtime_update_mappings(void)
 			if (md->type != EFI_RUNTIME_SERVICES_CODE)
 				continue;
 
+			set_memory_ro(md->virt_addr, md->num_pages);
 			set_memory_x(md->virt_addr, md->num_pages);
 		}
 	}
Dave Hansen Sept. 22, 2022, 3 p.m. UTC | #7
On 9/22/22 00:46, Peter Zijlstra wrote:
> On Wed, Sep 21, 2022 at 08:09:13PM -0700, Guenter Roeck wrote:
> 
>> Oh well, that "helped" to hide one of the crashes. Here is another one.
>> This is with PAE enabled and booting through efi32.
> 
>> [    1.086592]  efi_runtime_update_mappings+0x36/0x42
>> [    1.086717]  efi_enter_virtual_mode+0x351/0x36e
>> [    1.086860]  start_kernel+0x57d/0x60f
>> [    1.086956]  ? set_intr_gate+0x42/0x55
>> [    1.087079]  i386_start_kernel+0x43/0x45
>> [    1.087272]  startup_32_smp+0x161/0x164
> 
> Does this help? Dave; perhaps we should just let i386 be i386 and let it
> bitrot :/

How about we just turn off enforcement for now so that the poor i386
folks can at least boot?  I have the feeling we're going to get bored
with even the warnings if they persist for too long, though.

Untested patch to make i386 violations harmless is attached.
Guenter Roeck Sept. 22, 2022, 4:29 p.m. UTC | #8
On 9/22/22 00:46, Peter Zijlstra wrote:
> On Wed, Sep 21, 2022 at 08:09:13PM -0700, Guenter Roeck wrote:
> 
>> Oh well, that "helped" to hide one of the crashes. Here is another one.
>> This is with PAE enabled and booting through efi32.
> 
>> [    1.086592]  efi_runtime_update_mappings+0x36/0x42
>> [    1.086717]  efi_enter_virtual_mode+0x351/0x36e
>> [    1.086860]  start_kernel+0x57d/0x60f
>> [    1.086956]  ? set_intr_gate+0x42/0x55
>> [    1.087079]  i386_start_kernel+0x43/0x45
>> [    1.087272]  startup_32_smp+0x161/0x164
> 
> Does this help? Dave; perhaps we should just let i386 be i386 and let it
> bitrot :/
> 
> diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
> index e06a199423c0..d81e379fcd43 100644
> --- a/arch/x86/platform/efi/efi_32.c
> +++ b/arch/x86/platform/efi/efi_32.c
> @@ -136,6 +136,7 @@ void __init efi_runtime_update_mappings(void)
>   			if (md->type != EFI_RUNTIME_SERVICES_CODE)
>   				continue;
>   
> +			set_memory_ro(md->virt_addr, md->num_pages);
>   			set_memory_x(md->virt_addr, md->num_pages);
>   		}
>   	}

Yes, it does.

Tested-by: Guenter Roeck <linux@roeck-us.net>

Guenter
Guenter Roeck Sept. 22, 2022, 4:38 p.m. UTC | #9
On 9/22/22 08:00, Dave Hansen wrote:
> On 9/22/22 00:46, Peter Zijlstra wrote:
>> On Wed, Sep 21, 2022 at 08:09:13PM -0700, Guenter Roeck wrote:
>>
>>> Oh well, that "helped" to hide one of the crashes. Here is another one.
>>> This is with PAE enabled and booting through efi32.
>>
>>> [    1.086592]  efi_runtime_update_mappings+0x36/0x42
>>> [    1.086717]  efi_enter_virtual_mode+0x351/0x36e
>>> [    1.086860]  start_kernel+0x57d/0x60f
>>> [    1.086956]  ? set_intr_gate+0x42/0x55
>>> [    1.087079]  i386_start_kernel+0x43/0x45
>>> [    1.087272]  startup_32_smp+0x161/0x164
>>
>> Does this help? Dave; perhaps we should just let i386 be i386 and let it
>> bitrot :/
> 
> How about we just turn off enforcement for now so that the poor i386
> folks can at least boot?  I have the feeling we're going to get bored
> with even the warnings if they persist for too long, though.
> 

Problem with unfixed warnings is that they hide other problems if persistent,
and they result in warnings to be seen just as useless noise.

Case in point: In ChromeOS, we get literally hundreds of thousands of warning
reports each day (most from drm and wireless drivers). Those originate from
upstream code. No one really cares, and none ever get fixed. Please don't add
more if you don't plan to fix them.

Thanks,
Guenter
Pavel Machek Oct. 2, 2022, 10:33 a.m. UTC | #10
On Wed 2022-09-21 13:59:06, Dave Hansen wrote:
> On 9/21/22 13:07, Guenter Roeck wrote:
> > [    2.042861] CPA refuse W^X violation: 8000000000000063 -> 0000000000000063 range: 0x00000000c00a0000 - 0x00000000c00a0fff PFN a0
> > ILLOPC: cbc65efa: 0f 0b
> > [    2.043267] WARNING: CPU: 0 PID: 1 at arch/x86/mm/pat/set_memory.c:600 __change_page_attr_set_clr+0xdca/0xdd0
> ...
> > [    2.050307] ---[ end trace 0000000000000000 ]---
> > [    2.050762] PCI: PCI BIOS area is rw and x. Use pci=nobios if you want it NX.
> > [    2.051115] kernel tried to execute NX-protected page - exploit attempt? (uid: 0)
> > [    2.051115] BUG: unable to handle page fault for address: c00fd2bf
> 
> This _looks_ like it is working as intended.  The PCI BIOS code tried to
> make a RWX page.  The CPA code refused to do it and presumably returned
> an error, leaving a RW page, non-executable page.  The PCI code didn't
> check the set_memory_x() return code and tried to go execute anyway.
> That resulted in the oops.
> 
> I was able to reproduce this pretty easily.  The workaround from dmesg
> is pci=nobios.  That seems to do the trick for me, although that advise
> was sandwiched between a warning and an oops, so not the easiest to find.
> 
> I'm a bit torn what to do on this one.  Breaking the boot is bad, but so
> is leaving RWX memory around.

Well, the original patch is bad. Boot regressions are not acceptable.

We should first add an WARN_ON(), debug and fix the failures, then we
can start refusing the transitions.

Best regards,
								Pavel
Steven Rostedt Oct. 24, 2022, 3:27 p.m. UTC | #11
On Mon, 29 Aug 2022 12:18:03 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> x86 has STRICT_*_RWX, but not even a warning when someone violates it.
> 
> Add this warning and fully refuse the transition.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

So, this now triggers on enabling function tracing at boot up:

  "ftrace=function"

This:

SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=4, Nodes=1
Kernel/User page tables isolation: enabled
ftrace: allocating 68738 entries in 269 pages
ftrace: allocated 269 pages with 4 groups
Starting tracer 'function'
------------[ cut here ]------------
CPA detected W^X violation: 8000000000000063 -> 0000000000000063 range: 0xffffffffc0013000 - 0xffffffffc0013fff PFN 10031b
WARNING: CPU: 0 PID: 0 at arch/x86/mm/pat/set_memory.c:609 verify_rwx+0x61/0x6d
Modules linked in:
CPU: 0 PID: 0 Comm: swapper Not tainted 6.1.0-rc1-test+ #3
Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014
RIP: 0010:verify_rwx+0x61/0x6d
Code: e5 01 00 75 27 49 c1 e0 0c 48 89 d1 48 89 fe 48 c7 c7 5b b3 92 84 4e 8d 44 02 ff 48 89 da c6 05 71 29 e5 01 01 e8 35 90 e2 00 <0f> 0b 48 89 d8 5b 5d e9 6f 95 1a 01 0f 1f 44 00 00 55 48 89 e5 53
RSP: 0000:ffffffff84c03b08 EFLAGS: 00010086
RAX: 0000000000000000 RBX: 0000000000000063 RCX: 0000000000000003
RDX: 0000000000000003 RSI: ffffffff84c039b0 RDI: 0000000000000001
RBP: ffffffff84c03b10 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000025 R12: ffff8e730031c098
R13: 000000000010031b R14: 800000010031b063 R15: 8000000000000063
FS:  0000000000000000(0000) GS:ffff8e7416a00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffff8e73fd801000 CR3: 00000001fcc22001 CR4: 00000000000606f0
Call Trace:
 <TASK>
 __change_page_attr_set_clr+0x146/0x8a6
 ? __mutex_unlock_slowpath+0x41/0x213
 ? mutex_unlock+0x12/0x18
 ? _vm_unmap_aliases+0x126/0x136
 change_page_attr_set_clr+0x135/0x268
 ? find_vmap_area+0x32/0x3e
 ? __fentry__+0x10/0x10
 change_page_attr_clear.constprop.0+0x16/0x1c
 set_memory_x+0x2c/0x32
 arch_ftrace_update_trampoline+0x218/0x2db
 ? ftrace_caller_op_ptr+0x17/0x17
 ftrace_update_trampoline+0x16/0xa1
 ? tracing_gen_ctx+0x1c/0x1c
 __register_ftrace_function+0x93/0xb2
 ftrace_startup+0x21/0xf0
 ? tracing_gen_ctx+0x1c/0x1c
 register_ftrace_function_nolock+0x26/0x40
 register_ftrace_function+0x4e/0x143
 ? mutex_unlock+0x12/0x18
 ? tracing_gen_ctx+0x1c/0x1c
 function_trace_init+0x7d/0xc3
 tracer_init+0x23/0x2c
 tracing_set_tracer+0x1d5/0x206
 register_tracer+0x1c0/0x1e4
 init_function_trace+0x90/0x96
 early_trace_init+0x25c/0x352
 start_kernel+0x424/0x6e4
 x86_64_start_reservations+0x24/0x2a
 x86_64_start_kernel+0x8c/0x95
 secondary_startup_64_no_verify+0xe0/0xeb
 </TASK>
---[ end trace 0000000000000000 ]---


-- Steve
diff mbox series

Patch

--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -580,6 +580,33 @@  static inline pgprot_t static_protection
 }
 
 /*
+ * Validate and enforce strict W^X semantics.
+ */
+static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long start,
+				  unsigned long pfn, unsigned long npg)
+{
+	unsigned long end;
+
+	if (!cpu_feature_enabled(X86_FEATURE_NX))
+		return new;
+
+	if (!((pgprot_val(old) ^ pgprot_val(new)) & (_PAGE_RW | _PAGE_NX)))
+		return new;
+
+	if ((pgprot_val(new) & (_PAGE_RW | _PAGE_NX)) != _PAGE_RW)
+		return new;
+
+	end = start + npg * PAGE_SIZE - 1;
+	WARN_ONCE(1, "CPA refuse W^X violation: %016llx -> %016llx range: 0x%016lx - 0x%016lx PFN %lx\n",
+		  (unsigned long long)pgprot_val(old),
+		  (unsigned long long)pgprot_val(new),
+		  start, end, pfn);
+
+	/* refuse the transition into WX */
+	return old;
+}
+
+/*
  * Lookup the page table entry for a virtual address in a specific pgd.
  * Return a pointer to the entry and the level of the mapping.
  */
@@ -885,6 +912,8 @@  static int __should_split_large_page(pte
 	new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
 				      psize, CPA_DETECT);
 
+	new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages);
+
 	/*
 	 * If there is a conflict, split the large page.
 	 *
@@ -1525,6 +1554,7 @@  static int __change_page_attr(struct cpa
 
 	if (level == PG_LEVEL_4K) {
 		pte_t new_pte;
+		pgprot_t old_prot = pte_pgprot(old_pte);
 		pgprot_t new_prot = pte_pgprot(old_pte);
 		unsigned long pfn = pte_pfn(old_pte);
 
@@ -1536,6 +1566,8 @@  static int __change_page_attr(struct cpa
 		new_prot = static_protections(new_prot, address, pfn, 1, 0,
 					      CPA_PROTECT);
 
+		new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1);
+
 		new_prot = pgprot_clear_protnone_bits(new_prot);
 
 		/*