diff mbox

BUG: sleeping function called from invalid context at ./include/linux/uaccess.h:LINE

Message ID 367fa416-1adb-53db-4295-c25cec17b893@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

David Hildenbrand Nov. 6, 2017, 11:51 a.m. UTC
On 31.10.2017 12:34, syzbot wrote:
> Hello,
> 
> syzkaller hit the following crash on  
> 91dfed74eabcdae9378131546c446442c29bf769
> git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master
> compiler: gcc (GCC) 7.1.1 20170620
> .config is attached
> Raw console output is attached.
> C reproducer is attached
> syzkaller reproducer is attached. See https://goo.gl/kgGztJ
> for information about syzkaller reproducers
> 
> 
> in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: syzkaller879109
> 2 locks held by syzkaller879109/2909:
>   #0:  (&vcpu->mutex){+.+.}, at: [<ffffffff8106222c>] vcpu_load+0x1c/0x70  
> arch/x86/kvm/../../../virt/kvm/kvm_main.c:154
>   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_enter_guest  
> arch/x86/kvm/x86.c:6983 [inline]
>   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_run  
> arch/x86/kvm/x86.c:7061 [inline]
>   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>]  
> kvm_arch_vcpu_ioctl_run+0x1bc2/0x58b0 arch/x86/kvm/x86.c:7222
> CPU: 1 PID: 2909 Comm: syzkaller879109 Not tainted 4.13.0-rc4-next-20170811  
> #1
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> Call Trace:
>   __dump_stack lib/dump_stack.c:16 [inline]
>   dump_stack+0x194/0x257 lib/dump_stack.c:52
>   ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6014
>   __might_sleep+0x95/0x190 kernel/sched/core.c:5967
>   __might_fault+0xab/0x1d0 mm/memory.c:4383
>   __copy_from_user include/linux/uaccess.h:71 [inline]
>   __kvm_read_guest_page+0x58/0xa0  
> arch/x86/kvm/../../../virt/kvm/kvm_main.c:1771
>   kvm_vcpu_read_guest_page+0x44/0x60  
> arch/x86/kvm/../../../virt/kvm/kvm_main.c:1791
>   kvm_read_guest_virt_helper+0x76/0x140 arch/x86/kvm/x86.c:4407
>   kvm_read_guest_virt_system+0x3c/0x50 arch/x86/kvm/x86.c:4466
>   segmented_read_std+0x10c/0x180 arch/x86/kvm/emulate.c:819
>   em_fxrstor+0x27b/0x410 arch/x86/kvm/emulate.c:4022


In em_fxrstor, we do a get_fpu(), which in return disables preemption.

With preempt_disable(), we do a
segmented_read_std()->kvm_vcpu_read_guest_page(), triggering the warning.

>   x86_emulate_insn+0x55d/0x3c50 arch/x86/kvm/emulate.c:5471
>   x86_emulate_instruction+0x411/0x1ca0 arch/x86/kvm/x86.c:5698
>   kvm_mmu_page_fault+0x18b/0x2c0 arch/x86/kvm/mmu.c:4854
>   handle_ept_violation+0x1fc/0x5e0 arch/x86/kvm/vmx.c:6400
>   vmx_handle_exit+0x281/0x1ab0 arch/x86/kvm/vmx.c:8718
>   vcpu_enter_guest arch/x86/kvm/x86.c:6999 [inline]
>   vcpu_run arch/x86/kvm/x86.c:7061 [inline]
>   kvm_arch_vcpu_ioctl_run+0x1cee/0x58b0 arch/x86/kvm/x86.c:7222
>   kvm_vcpu_ioctl+0x64c/0x1010 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2591
>   vfs_ioctl fs/ioctl.c:45 [inline]
>   do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685
>   SYSC_ioctl fs/ioctl.c:700 [inline]
>   SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691
>   entry_SYSCALL_64_fastpath+0x1f/0xbe

I don't really see a way to avoid two fxstate variables. Unloading the
fpu in between the fxstore/fxrstr could lead to host values getting
overwritten. Loading/saving the fpu in kvm_arch_vcpu_ioctl_run() would
most probably also not work, as the relevant portions of fxregs_state
would not get saved/restored. So the preemption would still be needed.


So all I can offer for now is the following (untested, can send as
proper patch if needed):


From f32d06c8c621c6d68e073e9bdb81a6280b6c9544 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 6 Nov 2017 12:35:39 +0100
Subject: [PATCH v1] KVM: x86: fix em_fxstor sleeping while in atomic

Commit 9d643f63128b tried to optimize the stack size, but introduced a
guest memory access which might sleep while in atomic.

Let's undo that part of the commit but keep the cleanups.

Reported by syzbot:

in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: syzkaller879109
2 locks held by syzkaller879109/2909:
  #0:  (&vcpu->mutex){+.+.}, at: [<ffffffff8106222c>] vcpu_load+0x1c/0x70
arch/x86/kvm/../../../virt/kvm/kvm_main.c:154
  #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_enter_guest
arch/x86/kvm/x86.c:6983 [inline]
  #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_run
arch/x86/kvm/x86.c:7061 [inline]
  #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>]
kvm_arch_vcpu_ioctl_run+0x1bc2/0x58b0 arch/x86/kvm/x86.c:7222
CPU: 1 PID: 2909 Comm: syzkaller879109 Not tainted 4.13.0-rc4-next-20170811
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:16 [inline]
  dump_stack+0x194/0x257 lib/dump_stack.c:52
  ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6014
  __might_sleep+0x95/0x190 kernel/sched/core.c:5967
  __might_fault+0xab/0x1d0 mm/memory.c:4383
  __copy_from_user include/linux/uaccess.h:71 [inline]
  __kvm_read_guest_page+0x58/0xa0
arch/x86/kvm/../../../virt/kvm/kvm_main.c:1771
  kvm_vcpu_read_guest_page+0x44/0x60
arch/x86/kvm/../../../virt/kvm/kvm_main.c:1791
  kvm_read_guest_virt_helper+0x76/0x140 arch/x86/kvm/x86.c:4407
  kvm_read_guest_virt_system+0x3c/0x50 arch/x86/kvm/x86.c:4466
  segmented_read_std+0x10c/0x180 arch/x86/kvm/emulate.c:819
  em_fxrstor+0x27b/0x410 arch/x86/kvm/emulate.c:4022
  x86_emulate_insn+0x55d/0x3c50 arch/x86/kvm/emulate.c:5471
  x86_emulate_instruction+0x411/0x1ca0 arch/x86/kvm/x86.c:5698
  kvm_mmu_page_fault+0x18b/0x2c0 arch/x86/kvm/mmu.c:4854
  handle_ept_violation+0x1fc/0x5e0 arch/x86/kvm/vmx.c:6400
  vmx_handle_exit+0x281/0x1ab0 arch/x86/kvm/vmx.c:8718
  vcpu_enter_guest arch/x86/kvm/x86.c:6999 [inline]
  vcpu_run arch/x86/kvm/x86.c:7061 [inline]
  kvm_arch_vcpu_ioctl_run+0x1cee/0x58b0 arch/x86/kvm/x86.c:7222
  kvm_vcpu_ioctl+0x64c/0x1010 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2591
  vfs_ioctl fs/ioctl.c:45 [inline]
  do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685
  SYSC_ioctl fs/ioctl.c:700 [inline]
  SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691
  entry_SYSCALL_64_fastpath+0x1f/0xbe
RIP: 0033:0x437fc9
RSP: 002b:00007ffc7b4d5ab8 EFLAGS: 00000206 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000437fc9
RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000005
RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000020ae8000
R10: 0000000000009120 R11: 0000000000000206 R12: 0000000000000000
R13: 0000000000000004 R14: 0000000000000004 R15: 0000000020077000

Fixes: 9d643f63128b ("KVM: x86: avoid large stack allocations in
em_fxrstor")
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 arch/x86/kvm/emulate.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

Comments

David Hildenbrand Nov. 6, 2017, 4:01 p.m. UTC | #1
On 06.11.2017 16:10, Nick Desaulniers wrote:
> Does it have to be stack allocated?

We can't use kmalloc and friends in emulate.c. We would have to
introduce new emulator callbacks.

a) for malloc and free. hmmm.
b) for carrying out the fxrstr/fixup.

Paolo, what do you suggest?

> 
> On Nov 6, 2017 3:52 AM, "David Hildenbrand" <david@redhat.com
> <mailto:david@redhat.com>> wrote:
> 
>     On 31.10.2017 12:34, syzbot wrote:
>     > Hello,
>     >
>     > syzkaller hit the following crash on
>     > 91dfed74eabcdae9378131546c446442c29bf769
>     >
>     git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master
>     <http://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master>
>     > compiler: gcc (GCC) 7.1.1 20170620
>     > .config is attached
>     > Raw console output is attached.
>     > C reproducer is attached
>     > syzkaller reproducer is attached. See https://goo.gl/kgGztJ
>     > for information about syzkaller reproducers
>     >
>     >
>     > in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: syzkaller879109
>     > 2 locks held by syzkaller879109/2909:
>     >   #0:  (&vcpu->mutex){+.+.}, at: [<ffffffff8106222c>]
>     vcpu_load+0x1c/0x70
>     > arch/x86/kvm/../../../virt/kvm/kvm_main.c:154
>     >   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_enter_guest
>     > arch/x86/kvm/x86.c:6983 [inline]
>     >   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_run
>     > arch/x86/kvm/x86.c:7061 [inline]
>     >   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>]
>     > kvm_arch_vcpu_ioctl_run+0x1bc2/0x58b0 arch/x86/kvm/x86.c:7222
>     > CPU: 1 PID: 2909 Comm: syzkaller879109 Not tainted
>     4.13.0-rc4-next-20170811
>     > #1
>     > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
>     01/01/2011
>     > Call Trace:
>     >   __dump_stack lib/dump_stack.c:16 [inline]
>     >   dump_stack+0x194/0x257 lib/dump_stack.c:52
>     >   ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6014
>     >   __might_sleep+0x95/0x190 kernel/sched/core.c:5967
>     >   __might_fault+0xab/0x1d0 mm/memory.c:4383
>     >   __copy_from_user include/linux/uaccess.h:71 [inline]
>     >   __kvm_read_guest_page+0x58/0xa0
>     > arch/x86/kvm/../../../virt/kvm/kvm_main.c:1771
>     >   kvm_vcpu_read_guest_page+0x44/0x60
>     > arch/x86/kvm/../../../virt/kvm/kvm_main.c:1791
>     >   kvm_read_guest_virt_helper+0x76/0x140 arch/x86/kvm/x86.c:4407
>     >   kvm_read_guest_virt_system+0x3c/0x50 arch/x86/kvm/x86.c:4466
>     >   segmented_read_std+0x10c/0x180 arch/x86/kvm/emulate.c:819
>     >   em_fxrstor+0x27b/0x410 arch/x86/kvm/emulate.c:4022
> 
> 
>     In em_fxrstor, we do a get_fpu(), which in return disables preemption.
> 
>     With preempt_disable(), we do a
>     segmented_read_std()->kvm_vcpu_read_guest_page(), triggering the
>     warning.
> 
>     >   x86_emulate_insn+0x55d/0x3c50 arch/x86/kvm/emulate.c:5471
>     >   x86_emulate_instruction+0x411/0x1ca0 arch/x86/kvm/x86.c:5698
>     >   kvm_mmu_page_fault+0x18b/0x2c0 arch/x86/kvm/mmu.c:4854
>     >   handle_ept_violation+0x1fc/0x5e0 arch/x86/kvm/vmx.c:6400
>     >   vmx_handle_exit+0x281/0x1ab0 arch/x86/kvm/vmx.c:8718
>     >   vcpu_enter_guest arch/x86/kvm/x86.c:6999 [inline]
>     >   vcpu_run arch/x86/kvm/x86.c:7061 [inline]
>     >   kvm_arch_vcpu_ioctl_run+0x1cee/0x58b0 arch/x86/kvm/x86.c:7222
>     >   kvm_vcpu_ioctl+0x64c/0x1010
>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:2591
>     >   vfs_ioctl fs/ioctl.c:45 [inline]
>     >   do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685
>     >   SYSC_ioctl fs/ioctl.c:700 [inline]
>     >   SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691
>     >   entry_SYSCALL_64_fastpath+0x1f/0xbe
> 
>     I don't really see a way to avoid two fxstate variables. Unloading the
>     fpu in between the fxstore/fxrstr could lead to host values getting
>     overwritten. Loading/saving the fpu in kvm_arch_vcpu_ioctl_run() would
>     most probably also not work, as the relevant portions of fxregs_state
>     would not get saved/restored. So the preemption would still be needed.
> 
> 
>     So all I can offer for now is the following (untested, can send as
>     proper patch if needed):
> 
> 
>     From f32d06c8c621c6d68e073e9bdb81a6280b6c9544 Mon Sep 17 00:00:00 2001
>     From: David Hildenbrand <david@redhat.com <mailto:david@redhat.com>>
>     Date: Mon, 6 Nov 2017 12:35:39 +0100
>     Subject: [PATCH v1] KVM: x86: fix em_fxstor sleeping while in atomic
> 
>     Commit 9d643f63128b tried to optimize the stack size, but introduced a
>     guest memory access which might sleep while in atomic.
> 
>     Let's undo that part of the commit but keep the cleanups.
> 
>     Reported by syzbot:
> 
>     in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: syzkaller879109
>     2 locks held by syzkaller879109/2909:
>       #0:  (&vcpu->mutex){+.+.}, at: [<ffffffff8106222c>]
>     vcpu_load+0x1c/0x70
>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:154
>       #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_enter_guest
>     arch/x86/kvm/x86.c:6983 [inline]
>       #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_run
>     arch/x86/kvm/x86.c:7061 [inline]
>       #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>]
>     kvm_arch_vcpu_ioctl_run+0x1bc2/0x58b0 arch/x86/kvm/x86.c:7222
>     CPU: 1 PID: 2909 Comm: syzkaller879109 Not tainted
>     4.13.0-rc4-next-20170811
>     Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
>     01/01/2011
>     Call Trace:
>       __dump_stack lib/dump_stack.c:16 [inline]
>       dump_stack+0x194/0x257 lib/dump_stack.c:52
>       ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6014
>       __might_sleep+0x95/0x190 kernel/sched/core.c:5967
>       __might_fault+0xab/0x1d0 mm/memory.c:4383
>       __copy_from_user include/linux/uaccess.h:71 [inline]
>       __kvm_read_guest_page+0x58/0xa0
>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:1771
>       kvm_vcpu_read_guest_page+0x44/0x60
>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:1791
>       kvm_read_guest_virt_helper+0x76/0x140 arch/x86/kvm/x86.c:4407
>       kvm_read_guest_virt_system+0x3c/0x50 arch/x86/kvm/x86.c:4466
>       segmented_read_std+0x10c/0x180 arch/x86/kvm/emulate.c:819
>       em_fxrstor+0x27b/0x410 arch/x86/kvm/emulate.c:4022
>       x86_emulate_insn+0x55d/0x3c50 arch/x86/kvm/emulate.c:5471
>       x86_emulate_instruction+0x411/0x1ca0 arch/x86/kvm/x86.c:5698
>       kvm_mmu_page_fault+0x18b/0x2c0 arch/x86/kvm/mmu.c:4854
>       handle_ept_violation+0x1fc/0x5e0 arch/x86/kvm/vmx.c:6400
>       vmx_handle_exit+0x281/0x1ab0 arch/x86/kvm/vmx.c:8718
>       vcpu_enter_guest arch/x86/kvm/x86.c:6999 [inline]
>       vcpu_run arch/x86/kvm/x86.c:7061 [inline]
>       kvm_arch_vcpu_ioctl_run+0x1cee/0x58b0 arch/x86/kvm/x86.c:7222
>       kvm_vcpu_ioctl+0x64c/0x1010
>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:2591
>       vfs_ioctl fs/ioctl.c:45 [inline]
>       do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685
>       SYSC_ioctl fs/ioctl.c:700 [inline]
>       SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691
>       entry_SYSCALL_64_fastpath+0x1f/0xbe
>     RIP: 0033:0x437fc9
>     RSP: 002b:00007ffc7b4d5ab8 EFLAGS: 00000206 ORIG_RAX: 0000000000000010
>     RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000437fc9
>     RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000005
>     RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000020ae8000
>     R10: 0000000000009120 R11: 0000000000000206 R12: 0000000000000000
>     R13: 0000000000000004 R14: 0000000000000004 R15: 0000000020077000
> 
>     Fixes: 9d643f63128b ("KVM: x86: avoid large stack allocations in
>     em_fxrstor")
>     Signed-off-by: David Hildenbrand <david@redhat.com
>     <mailto:david@redhat.com>>
>     ---
>      arch/x86/kvm/emulate.c | 16 +++++++++-------
>      1 file changed, 9 insertions(+), 7 deletions(-)
> 
>     diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
>     index fb0055953fbc..d87f01a2d6f4 100644
>     --- a/arch/x86/kvm/emulate.c
>     +++ b/arch/x86/kvm/emulate.c
>     @@ -4002,7 +4002,7 @@ static int em_fxsave(struct x86_emulate_ctxt
>     *ctxt)
> 
>      static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
>      {
>     -       struct fxregs_state fx_state;
>     +       struct fxregs_state fx_state, fx_old;
>             int rc;
>             size_t size;
> 
>     @@ -4010,19 +4010,21 @@ static int em_fxrstor(struct
>     x86_emulate_ctxt *ctxt)
>             if (rc != X86EMUL_CONTINUE)
>                     return rc;
> 
>     +       size = fxstate_size(ctxt);
>     +       rc = segmented_read_std(ctxt, ctxt->memop.addr.mem,
>     &fx_state, size);
>     +       if (rc != X86EMUL_CONTINUE)
>     +               return rc;
>     +
>             ctxt->ops->get_fpu(ctxt);
> 
>     -       size = fxstate_size(ctxt);
>             if (size < __fxstate_size(16)) {
>     -               rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
>     +               rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_old));
>                     if (rc != X86EMUL_CONTINUE)
>                             goto out;
>     +               memcpy(((void *)&fx_state) + size, ((void *)&fx_old)
>     + size,
>     +                      __fxstate_size(16) - size);
>             }
> 
>     -       rc = segmented_read_std(ctxt, ctxt->memop.addr.mem,
>     &fx_state, size);
>     -       if (rc != X86EMUL_CONTINUE)
>     -               goto out;
>     -
>             if (fx_state.mxcsr >> 16) {
>                     rc = emulate_gp(ctxt, 0);
>                     goto out;
>     --
>     2.13.6
> 
> 
> 
>     --
> 
>     Thanks,
> 
>     David / dhildenb
>
Paolo Bonzini Nov. 6, 2017, 4:14 p.m. UTC | #2
On 06/11/2017 17:01, David Hildenbrand wrote:
> On 06.11.2017 16:10, Nick Desaulniers wrote:
>> Does it have to be stack allocated?
> 
> We can't use kmalloc and friends in emulate.c. We would have to
> introduce new emulator callbacks.
> 
> a) for malloc and free. hmmm.
> b) for carrying out the fxrstr/fixup.
> 
> Paolo, what do you suggest?

You can use kmalloc.  Any userspace user of emulate.c would have to
write a wrapper.  But I'm not sure it's useful... maybe the
asm_safe+memcpy could be moved to a separate noinline function, so that
segmented_read_std is invoked with a leaner stack.

Paolo

>>
>> On Nov 6, 2017 3:52 AM, "David Hildenbrand" <david@redhat.com
>> <mailto:david@redhat.com>> wrote:
>>
>>     On 31.10.2017 12:34, syzbot wrote:
>>     > Hello,
>>     >
>>     > syzkaller hit the following crash on
>>     > 91dfed74eabcdae9378131546c446442c29bf769
>>     >
>>     git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master
>>     <http://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master>
>>     > compiler: gcc (GCC) 7.1.1 20170620
>>     > .config is attached
>>     > Raw console output is attached.
>>     > C reproducer is attached
>>     > syzkaller reproducer is attached. See https://goo.gl/kgGztJ
>>     > for information about syzkaller reproducers
>>     >
>>     >
>>     > in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: syzkaller879109
>>     > 2 locks held by syzkaller879109/2909:
>>     >   #0:  (&vcpu->mutex){+.+.}, at: [<ffffffff8106222c>]
>>     vcpu_load+0x1c/0x70
>>     > arch/x86/kvm/../../../virt/kvm/kvm_main.c:154
>>     >   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_enter_guest
>>     > arch/x86/kvm/x86.c:6983 [inline]
>>     >   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_run
>>     > arch/x86/kvm/x86.c:7061 [inline]
>>     >   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>]
>>     > kvm_arch_vcpu_ioctl_run+0x1bc2/0x58b0 arch/x86/kvm/x86.c:7222
>>     > CPU: 1 PID: 2909 Comm: syzkaller879109 Not tainted
>>     4.13.0-rc4-next-20170811
>>     > #1
>>     > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
>>     01/01/2011
>>     > Call Trace:
>>     >   __dump_stack lib/dump_stack.c:16 [inline]
>>     >   dump_stack+0x194/0x257 lib/dump_stack.c:52
>>     >   ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6014
>>     >   __might_sleep+0x95/0x190 kernel/sched/core.c:5967
>>     >   __might_fault+0xab/0x1d0 mm/memory.c:4383
>>     >   __copy_from_user include/linux/uaccess.h:71 [inline]
>>     >   __kvm_read_guest_page+0x58/0xa0
>>     > arch/x86/kvm/../../../virt/kvm/kvm_main.c:1771
>>     >   kvm_vcpu_read_guest_page+0x44/0x60
>>     > arch/x86/kvm/../../../virt/kvm/kvm_main.c:1791
>>     >   kvm_read_guest_virt_helper+0x76/0x140 arch/x86/kvm/x86.c:4407
>>     >   kvm_read_guest_virt_system+0x3c/0x50 arch/x86/kvm/x86.c:4466
>>     >   segmented_read_std+0x10c/0x180 arch/x86/kvm/emulate.c:819
>>     >   em_fxrstor+0x27b/0x410 arch/x86/kvm/emulate.c:4022
>>
>>
>>     In em_fxrstor, we do a get_fpu(), which in return disables preemption.
>>
>>     With preempt_disable(), we do a
>>     segmented_read_std()->kvm_vcpu_read_guest_page(), triggering the
>>     warning.
>>
>>     >   x86_emulate_insn+0x55d/0x3c50 arch/x86/kvm/emulate.c:5471
>>     >   x86_emulate_instruction+0x411/0x1ca0 arch/x86/kvm/x86.c:5698
>>     >   kvm_mmu_page_fault+0x18b/0x2c0 arch/x86/kvm/mmu.c:4854
>>     >   handle_ept_violation+0x1fc/0x5e0 arch/x86/kvm/vmx.c:6400
>>     >   vmx_handle_exit+0x281/0x1ab0 arch/x86/kvm/vmx.c:8718
>>     >   vcpu_enter_guest arch/x86/kvm/x86.c:6999 [inline]
>>     >   vcpu_run arch/x86/kvm/x86.c:7061 [inline]
>>     >   kvm_arch_vcpu_ioctl_run+0x1cee/0x58b0 arch/x86/kvm/x86.c:7222
>>     >   kvm_vcpu_ioctl+0x64c/0x1010
>>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:2591
>>     >   vfs_ioctl fs/ioctl.c:45 [inline]
>>     >   do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685
>>     >   SYSC_ioctl fs/ioctl.c:700 [inline]
>>     >   SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691
>>     >   entry_SYSCALL_64_fastpath+0x1f/0xbe
>>
>>     I don't really see a way to avoid two fxstate variables. Unloading the
>>     fpu in between the fxstore/fxrstr could lead to host values getting
>>     overwritten. Loading/saving the fpu in kvm_arch_vcpu_ioctl_run() would
>>     most probably also not work, as the relevant portions of fxregs_state
>>     would not get saved/restored. So the preemption would still be needed.
>>
>>
>>     So all I can offer for now is the following (untested, can send as
>>     proper patch if needed):
>>
>>
>>     From f32d06c8c621c6d68e073e9bdb81a6280b6c9544 Mon Sep 17 00:00:00 2001
>>     From: David Hildenbrand <david@redhat.com <mailto:david@redhat.com>>
>>     Date: Mon, 6 Nov 2017 12:35:39 +0100
>>     Subject: [PATCH v1] KVM: x86: fix em_fxstor sleeping while in atomic
>>
>>     Commit 9d643f63128b tried to optimize the stack size, but introduced a
>>     guest memory access which might sleep while in atomic.
>>
>>     Let's undo that part of the commit but keep the cleanups.
>>
>>     Reported by syzbot:
>>
>>     in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: syzkaller879109
>>     2 locks held by syzkaller879109/2909:
>>       #0:  (&vcpu->mutex){+.+.}, at: [<ffffffff8106222c>]
>>     vcpu_load+0x1c/0x70
>>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:154
>>       #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_enter_guest
>>     arch/x86/kvm/x86.c:6983 [inline]
>>       #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_run
>>     arch/x86/kvm/x86.c:7061 [inline]
>>       #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>]
>>     kvm_arch_vcpu_ioctl_run+0x1bc2/0x58b0 arch/x86/kvm/x86.c:7222
>>     CPU: 1 PID: 2909 Comm: syzkaller879109 Not tainted
>>     4.13.0-rc4-next-20170811
>>     Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
>>     01/01/2011
>>     Call Trace:
>>       __dump_stack lib/dump_stack.c:16 [inline]
>>       dump_stack+0x194/0x257 lib/dump_stack.c:52
>>       ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6014
>>       __might_sleep+0x95/0x190 kernel/sched/core.c:5967
>>       __might_fault+0xab/0x1d0 mm/memory.c:4383
>>       __copy_from_user include/linux/uaccess.h:71 [inline]
>>       __kvm_read_guest_page+0x58/0xa0
>>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:1771
>>       kvm_vcpu_read_guest_page+0x44/0x60
>>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:1791
>>       kvm_read_guest_virt_helper+0x76/0x140 arch/x86/kvm/x86.c:4407
>>       kvm_read_guest_virt_system+0x3c/0x50 arch/x86/kvm/x86.c:4466
>>       segmented_read_std+0x10c/0x180 arch/x86/kvm/emulate.c:819
>>       em_fxrstor+0x27b/0x410 arch/x86/kvm/emulate.c:4022
>>       x86_emulate_insn+0x55d/0x3c50 arch/x86/kvm/emulate.c:5471
>>       x86_emulate_instruction+0x411/0x1ca0 arch/x86/kvm/x86.c:5698
>>       kvm_mmu_page_fault+0x18b/0x2c0 arch/x86/kvm/mmu.c:4854
>>       handle_ept_violation+0x1fc/0x5e0 arch/x86/kvm/vmx.c:6400
>>       vmx_handle_exit+0x281/0x1ab0 arch/x86/kvm/vmx.c:8718
>>       vcpu_enter_guest arch/x86/kvm/x86.c:6999 [inline]
>>       vcpu_run arch/x86/kvm/x86.c:7061 [inline]
>>       kvm_arch_vcpu_ioctl_run+0x1cee/0x58b0 arch/x86/kvm/x86.c:7222
>>       kvm_vcpu_ioctl+0x64c/0x1010
>>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:2591
>>       vfs_ioctl fs/ioctl.c:45 [inline]
>>       do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685
>>       SYSC_ioctl fs/ioctl.c:700 [inline]
>>       SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691
>>       entry_SYSCALL_64_fastpath+0x1f/0xbe
>>     RIP: 0033:0x437fc9
>>     RSP: 002b:00007ffc7b4d5ab8 EFLAGS: 00000206 ORIG_RAX: 0000000000000010
>>     RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000437fc9
>>     RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000005
>>     RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000020ae8000
>>     R10: 0000000000009120 R11: 0000000000000206 R12: 0000000000000000
>>     R13: 0000000000000004 R14: 0000000000000004 R15: 0000000020077000
>>
>>     Fixes: 9d643f63128b ("KVM: x86: avoid large stack allocations in
>>     em_fxrstor")
>>     Signed-off-by: David Hildenbrand <david@redhat.com
>>     <mailto:david@redhat.com>>
>>     ---
>>      arch/x86/kvm/emulate.c | 16 +++++++++-------
>>      1 file changed, 9 insertions(+), 7 deletions(-)
>>
>>     diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
>>     index fb0055953fbc..d87f01a2d6f4 100644
>>     --- a/arch/x86/kvm/emulate.c
>>     +++ b/arch/x86/kvm/emulate.c
>>     @@ -4002,7 +4002,7 @@ static int em_fxsave(struct x86_emulate_ctxt
>>     *ctxt)
>>
>>      static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
>>      {
>>     -       struct fxregs_state fx_state;
>>     +       struct fxregs_state fx_state, fx_old;
>>             int rc;
>>             size_t size;
>>
>>     @@ -4010,19 +4010,21 @@ static int em_fxrstor(struct
>>     x86_emulate_ctxt *ctxt)
>>             if (rc != X86EMUL_CONTINUE)
>>                     return rc;
>>
>>     +       size = fxstate_size(ctxt);
>>     +       rc = segmented_read_std(ctxt, ctxt->memop.addr.mem,
>>     &fx_state, size);
>>     +       if (rc != X86EMUL_CONTINUE)
>>     +               return rc;
>>     +
>>             ctxt->ops->get_fpu(ctxt);
>>
>>     -       size = fxstate_size(ctxt);
>>             if (size < __fxstate_size(16)) {
>>     -               rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
>>     +               rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_old));
>>                     if (rc != X86EMUL_CONTINUE)
>>                             goto out;
>>     +               memcpy(((void *)&fx_state) + size, ((void *)&fx_old)
>>     + size,
>>     +                      __fxstate_size(16) - size);
>>             }
>>
>>     -       rc = segmented_read_std(ctxt, ctxt->memop.addr.mem,
>>     &fx_state, size);
>>     -       if (rc != X86EMUL_CONTINUE)
>>     -               goto out;
>>     -
>>             if (fx_state.mxcsr >> 16) {
>>                     rc = emulate_gp(ctxt, 0);
>>                     goto out;
>>     --
>>     2.13.6
>>
>>
>>
>>     --
>>
>>     Thanks,
>>
>>     David / dhildenb
>>
> 
>
David Hildenbrand Nov. 6, 2017, 4:19 p.m. UTC | #3
On 06.11.2017 17:14, Paolo Bonzini wrote:
> On 06/11/2017 17:01, David Hildenbrand wrote:
>> On 06.11.2017 16:10, Nick Desaulniers wrote:
>>> Does it have to be stack allocated?
>>
>> We can't use kmalloc and friends in emulate.c. We would have to
>> introduce new emulator callbacks.
>>
>> a) for malloc and free. hmmm.
>> b) for carrying out the fxrstr/fixup.
>>
>> Paolo, what do you suggest?
> 
> You can use kmalloc.  Any userspace user of emulate.c would have to
> write a wrapper.  But I'm not sure it's useful... maybe the
> asm_safe+memcpy could be moved to a separate noinline function, so that
> segmented_read_std is invoked with a leaner stack.

That's basically what we had before 9d643f63128b, however without the
"noinline".
Paolo Bonzini Nov. 6, 2017, 4:37 p.m. UTC | #4
On 06/11/2017 17:19, David Hildenbrand wrote:
> On 06.11.2017 17:14, Paolo Bonzini wrote:
>> On 06/11/2017 17:01, David Hildenbrand wrote:
>>> On 06.11.2017 16:10, Nick Desaulniers wrote:
>>>> Does it have to be stack allocated?
>>>
>>> We can't use kmalloc and friends in emulate.c. We would have to
>>> introduce new emulator callbacks.
>>>
>>> a) for malloc and free. hmmm.
>>> b) for carrying out the fxrstr/fixup.
>>>
>>> Paolo, what do you suggest?
>>
>> You can use kmalloc.  Any userspace user of emulate.c would have to
>> write a wrapper.  But I'm not sure it's useful... maybe the
>> asm_safe+memcpy could be moved to a separate noinline function, so that
>> segmented_read_std is invoked with a leaner stack.
> 
> That's basically what we had before 9d643f63128b, however without the
> "noinline".

Indeed.  I do prefer the usage of __fxstate_size though that was
introduced by 9d643f63128b.

Paolo
David Hildenbrand Nov. 6, 2017, 4:46 p.m. UTC | #5
On 06.11.2017 17:37, Paolo Bonzini wrote:
> On 06/11/2017 17:19, David Hildenbrand wrote:
>> On 06.11.2017 17:14, Paolo Bonzini wrote:
>>> On 06/11/2017 17:01, David Hildenbrand wrote:
>>>> On 06.11.2017 16:10, Nick Desaulniers wrote:
>>>>> Does it have to be stack allocated?
>>>>
>>>> We can't use kmalloc and friends in emulate.c. We would have to
>>>> introduce new emulator callbacks.
>>>>
>>>> a) for malloc and free. hmmm.
>>>> b) for carrying out the fxrstr/fixup.
>>>>
>>>> Paolo, what do you suggest?
>>>
>>> You can use kmalloc.  Any userspace user of emulate.c would have to
>>> write a wrapper.  But I'm not sure it's useful... maybe the
>>> asm_safe+memcpy could be moved to a separate noinline function, so that
>>> segmented_read_std is invoked with a leaner stack.
>>
>> That's basically what we had before 9d643f63128b, however without the
>> "noinline".
> 
> Indeed.  I do prefer the usage of __fxstate_size though that was
> introduced by 9d643f63128b.
> 
> Paolo
> 

So I'll modify my patch to have that inside a static noinline function
and resend.

Thanks.
Dmitry Vyukov Nov. 6, 2017, 7:04 p.m. UTC | #6
On Mon, Nov 6, 2017 at 5:14 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> On 06/11/2017 17:01, David Hildenbrand wrote:
>> On 06.11.2017 16:10, Nick Desaulniers wrote:
>>> Does it have to be stack allocated?
>>
>> We can't use kmalloc and friends in emulate.c. We would have to
>> introduce new emulator callbacks.
>>
>> a) for malloc and free. hmmm.
>> b) for carrying out the fxrstr/fixup.
>>
>> Paolo, what do you suggest?
>
> You can use kmalloc.  Any userspace user of emulate.c would have to
> write a wrapper.


Can you please tell me more about this? Is it used for testing? Is
there an example code that builds and tests this in user-space?


>  But I'm not sure it's useful... maybe the
> asm_safe+memcpy could be moved to a separate noinline function, so that
> segmented_read_std is invoked with a leaner stack.
>
> Paolo
>
>>>
>>> On Nov 6, 2017 3:52 AM, "David Hildenbrand" <david@redhat.com
>>> <mailto:david@redhat.com>> wrote:
>>>
>>>     On 31.10.2017 12:34, syzbot wrote:
>>>     > Hello,
>>>     >
>>>     > syzkaller hit the following crash on
>>>     > 91dfed74eabcdae9378131546c446442c29bf769
>>>     >
>>>     git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master
>>>     <http://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master>
>>>     > compiler: gcc (GCC) 7.1.1 20170620
>>>     > .config is attached
>>>     > Raw console output is attached.
>>>     > C reproducer is attached
>>>     > syzkaller reproducer is attached. See https://goo.gl/kgGztJ
>>>     > for information about syzkaller reproducers
>>>     >
>>>     >
>>>     > in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: syzkaller879109
>>>     > 2 locks held by syzkaller879109/2909:
>>>     >   #0:  (&vcpu->mutex){+.+.}, at: [<ffffffff8106222c>]
>>>     vcpu_load+0x1c/0x70
>>>     > arch/x86/kvm/../../../virt/kvm/kvm_main.c:154
>>>     >   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_enter_guest
>>>     > arch/x86/kvm/x86.c:6983 [inline]
>>>     >   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_run
>>>     > arch/x86/kvm/x86.c:7061 [inline]
>>>     >   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>]
>>>     > kvm_arch_vcpu_ioctl_run+0x1bc2/0x58b0 arch/x86/kvm/x86.c:7222
>>>     > CPU: 1 PID: 2909 Comm: syzkaller879109 Not tainted
>>>     4.13.0-rc4-next-20170811
>>>     > #1
>>>     > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
>>>     01/01/2011
>>>     > Call Trace:
>>>     >   __dump_stack lib/dump_stack.c:16 [inline]
>>>     >   dump_stack+0x194/0x257 lib/dump_stack.c:52
>>>     >   ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6014
>>>     >   __might_sleep+0x95/0x190 kernel/sched/core.c:5967
>>>     >   __might_fault+0xab/0x1d0 mm/memory.c:4383
>>>     >   __copy_from_user include/linux/uaccess.h:71 [inline]
>>>     >   __kvm_read_guest_page+0x58/0xa0
>>>     > arch/x86/kvm/../../../virt/kvm/kvm_main.c:1771
>>>     >   kvm_vcpu_read_guest_page+0x44/0x60
>>>     > arch/x86/kvm/../../../virt/kvm/kvm_main.c:1791
>>>     >   kvm_read_guest_virt_helper+0x76/0x140 arch/x86/kvm/x86.c:4407
>>>     >   kvm_read_guest_virt_system+0x3c/0x50 arch/x86/kvm/x86.c:4466
>>>     >   segmented_read_std+0x10c/0x180 arch/x86/kvm/emulate.c:819
>>>     >   em_fxrstor+0x27b/0x410 arch/x86/kvm/emulate.c:4022
>>>
>>>
>>>     In em_fxrstor, we do a get_fpu(), which in return disables preemption.
>>>
>>>     With preempt_disable(), we do a
>>>     segmented_read_std()->kvm_vcpu_read_guest_page(), triggering the
>>>     warning.
>>>
>>>     >   x86_emulate_insn+0x55d/0x3c50 arch/x86/kvm/emulate.c:5471
>>>     >   x86_emulate_instruction+0x411/0x1ca0 arch/x86/kvm/x86.c:5698
>>>     >   kvm_mmu_page_fault+0x18b/0x2c0 arch/x86/kvm/mmu.c:4854
>>>     >   handle_ept_violation+0x1fc/0x5e0 arch/x86/kvm/vmx.c:6400
>>>     >   vmx_handle_exit+0x281/0x1ab0 arch/x86/kvm/vmx.c:8718
>>>     >   vcpu_enter_guest arch/x86/kvm/x86.c:6999 [inline]
>>>     >   vcpu_run arch/x86/kvm/x86.c:7061 [inline]
>>>     >   kvm_arch_vcpu_ioctl_run+0x1cee/0x58b0 arch/x86/kvm/x86.c:7222
>>>     >   kvm_vcpu_ioctl+0x64c/0x1010
>>>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:2591
>>>     >   vfs_ioctl fs/ioctl.c:45 [inline]
>>>     >   do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685
>>>     >   SYSC_ioctl fs/ioctl.c:700 [inline]
>>>     >   SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691
>>>     >   entry_SYSCALL_64_fastpath+0x1f/0xbe
>>>
>>>     I don't really see a way to avoid two fxstate variables. Unloading the
>>>     fpu in between the fxstore/fxrstr could lead to host values getting
>>>     overwritten. Loading/saving the fpu in kvm_arch_vcpu_ioctl_run() would
>>>     most probably also not work, as the relevant portions of fxregs_state
>>>     would not get saved/restored. So the preemption would still be needed.
>>>
>>>
>>>     So all I can offer for now is the following (untested, can send as
>>>     proper patch if needed):
>>>
>>>
>>>     From f32d06c8c621c6d68e073e9bdb81a6280b6c9544 Mon Sep 17 00:00:00 2001
>>>     From: David Hildenbrand <david@redhat.com <mailto:david@redhat.com>>
>>>     Date: Mon, 6 Nov 2017 12:35:39 +0100
>>>     Subject: [PATCH v1] KVM: x86: fix em_fxstor sleeping while in atomic
>>>
>>>     Commit 9d643f63128b tried to optimize the stack size, but introduced a
>>>     guest memory access which might sleep while in atomic.
>>>
>>>     Let's undo that part of the commit but keep the cleanups.
>>>
>>>     Reported by syzbot:
>>>
>>>     in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: syzkaller879109
>>>     2 locks held by syzkaller879109/2909:
>>>       #0:  (&vcpu->mutex){+.+.}, at: [<ffffffff8106222c>]
>>>     vcpu_load+0x1c/0x70
>>>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:154
>>>       #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_enter_guest
>>>     arch/x86/kvm/x86.c:6983 [inline]
>>>       #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_run
>>>     arch/x86/kvm/x86.c:7061 [inline]
>>>       #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>]
>>>     kvm_arch_vcpu_ioctl_run+0x1bc2/0x58b0 arch/x86/kvm/x86.c:7222
>>>     CPU: 1 PID: 2909 Comm: syzkaller879109 Not tainted
>>>     4.13.0-rc4-next-20170811
>>>     Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
>>>     01/01/2011
>>>     Call Trace:
>>>       __dump_stack lib/dump_stack.c:16 [inline]
>>>       dump_stack+0x194/0x257 lib/dump_stack.c:52
>>>       ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6014
>>>       __might_sleep+0x95/0x190 kernel/sched/core.c:5967
>>>       __might_fault+0xab/0x1d0 mm/memory.c:4383
>>>       __copy_from_user include/linux/uaccess.h:71 [inline]
>>>       __kvm_read_guest_page+0x58/0xa0
>>>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:1771
>>>       kvm_vcpu_read_guest_page+0x44/0x60
>>>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:1791
>>>       kvm_read_guest_virt_helper+0x76/0x140 arch/x86/kvm/x86.c:4407
>>>       kvm_read_guest_virt_system+0x3c/0x50 arch/x86/kvm/x86.c:4466
>>>       segmented_read_std+0x10c/0x180 arch/x86/kvm/emulate.c:819
>>>       em_fxrstor+0x27b/0x410 arch/x86/kvm/emulate.c:4022
>>>       x86_emulate_insn+0x55d/0x3c50 arch/x86/kvm/emulate.c:5471
>>>       x86_emulate_instruction+0x411/0x1ca0 arch/x86/kvm/x86.c:5698
>>>       kvm_mmu_page_fault+0x18b/0x2c0 arch/x86/kvm/mmu.c:4854
>>>       handle_ept_violation+0x1fc/0x5e0 arch/x86/kvm/vmx.c:6400
>>>       vmx_handle_exit+0x281/0x1ab0 arch/x86/kvm/vmx.c:8718
>>>       vcpu_enter_guest arch/x86/kvm/x86.c:6999 [inline]
>>>       vcpu_run arch/x86/kvm/x86.c:7061 [inline]
>>>       kvm_arch_vcpu_ioctl_run+0x1cee/0x58b0 arch/x86/kvm/x86.c:7222
>>>       kvm_vcpu_ioctl+0x64c/0x1010
>>>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:2591
>>>       vfs_ioctl fs/ioctl.c:45 [inline]
>>>       do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685
>>>       SYSC_ioctl fs/ioctl.c:700 [inline]
>>>       SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691
>>>       entry_SYSCALL_64_fastpath+0x1f/0xbe
>>>     RIP: 0033:0x437fc9
>>>     RSP: 002b:00007ffc7b4d5ab8 EFLAGS: 00000206 ORIG_RAX: 0000000000000010
>>>     RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000437fc9
>>>     RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000005
>>>     RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000020ae8000
>>>     R10: 0000000000009120 R11: 0000000000000206 R12: 0000000000000000
>>>     R13: 0000000000000004 R14: 0000000000000004 R15: 0000000020077000
>>>
>>>     Fixes: 9d643f63128b ("KVM: x86: avoid large stack allocations in
>>>     em_fxrstor")
>>>     Signed-off-by: David Hildenbrand <david@redhat.com
>>>     <mailto:david@redhat.com>>
>>>     ---
>>>      arch/x86/kvm/emulate.c | 16 +++++++++-------
>>>      1 file changed, 9 insertions(+), 7 deletions(-)
>>>
>>>     diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
>>>     index fb0055953fbc..d87f01a2d6f4 100644
>>>     --- a/arch/x86/kvm/emulate.c
>>>     +++ b/arch/x86/kvm/emulate.c
>>>     @@ -4002,7 +4002,7 @@ static int em_fxsave(struct x86_emulate_ctxt
>>>     *ctxt)
>>>
>>>      static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
>>>      {
>>>     -       struct fxregs_state fx_state;
>>>     +       struct fxregs_state fx_state, fx_old;
>>>             int rc;
>>>             size_t size;
>>>
>>>     @@ -4010,19 +4010,21 @@ static int em_fxrstor(struct
>>>     x86_emulate_ctxt *ctxt)
>>>             if (rc != X86EMUL_CONTINUE)
>>>                     return rc;
>>>
>>>     +       size = fxstate_size(ctxt);
>>>     +       rc = segmented_read_std(ctxt, ctxt->memop.addr.mem,
>>>     &fx_state, size);
>>>     +       if (rc != X86EMUL_CONTINUE)
>>>     +               return rc;
>>>     +
>>>             ctxt->ops->get_fpu(ctxt);
>>>
>>>     -       size = fxstate_size(ctxt);
>>>             if (size < __fxstate_size(16)) {
>>>     -               rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
>>>     +               rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_old));
>>>                     if (rc != X86EMUL_CONTINUE)
>>>                             goto out;
>>>     +               memcpy(((void *)&fx_state) + size, ((void *)&fx_old)
>>>     + size,
>>>     +                      __fxstate_size(16) - size);
>>>             }
>>>
>>>     -       rc = segmented_read_std(ctxt, ctxt->memop.addr.mem,
>>>     &fx_state, size);
>>>     -       if (rc != X86EMUL_CONTINUE)
>>>     -               goto out;
>>>     -
>>>             if (fx_state.mxcsr >> 16) {
>>>                     rc = emulate_gp(ctxt, 0);
>>>                     goto out;
>>>     --
>>>     2.13.6
>>>
>>>
>>>
>>>     --
>>>
>>>     Thanks,
>>>
>>>     David / dhildenb
>>>
>>
>>
>
> --
> You received this message because you are subscribed to the Google Groups "syzkaller-bugs" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to syzkaller-bugs+unsubscribe@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/syzkaller-bugs/fc4fec6a-13fc-e5f4-e78a-711f2fccec92%40redhat.com.
> For more options, visit https://groups.google.com/d/optout.
Paolo Bonzini Nov. 7, 2017, 12:29 p.m. UTC | #7
On 06/11/2017 20:04, Dmitry Vyukov wrote:
> On Mon, Nov 6, 2017 at 5:14 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> On 06/11/2017 17:01, David Hildenbrand wrote:
>>> On 06.11.2017 16:10, Nick Desaulniers wrote:
>>>> Does it have to be stack allocated?
>>>
>>> We can't use kmalloc and friends in emulate.c. We would have to
>>> introduce new emulator callbacks.
>>>
>>> a) for malloc and free. hmmm.
>>> b) for carrying out the fxrstr/fixup.
>>>
>>> Paolo, what do you suggest?
>>
>> You can use kmalloc.  Any userspace user of emulate.c would have to
>> write a wrapper.
> 
> 
> Can you please tell me more about this? Is it used for testing? Is
> there an example code that builds and tests this in user-space?

Not quite, there's no user outside KVM yet.  But the emulator code is
designed to be independent from KVM's memory access primitives; with
"nm" you can see how there are very few undefined symbols:

                 U ex_handler_default
                 U find_first_bit
                 U find_next_bit
                 U memcpy
                 U printk

Exceptions are only used for div/idiv, if it gets in the way it's okay
to just revert commit b8c0b6ae498f ("KVM: x86 emulator: convert DIV/IDIV
to fastop", 2013-05-21).

On the other hand, dependencies on Linux headers have sneaked in more
and more, but refactoring those away should not be too hard.

Thanks,

Paolo


> 
> 
>>  But I'm not sure it's useful... maybe the
>> asm_safe+memcpy could be moved to a separate noinline function, so that
>> segmented_read_std is invoked with a leaner stack.
>>
>> Paolo
>>
>>>>
>>>> On Nov 6, 2017 3:52 AM, "David Hildenbrand" <david@redhat.com
>>>> <mailto:david@redhat.com>> wrote:
>>>>
>>>>     On 31.10.2017 12:34, syzbot wrote:
>>>>     > Hello,
>>>>     >
>>>>     > syzkaller hit the following crash on
>>>>     > 91dfed74eabcdae9378131546c446442c29bf769
>>>>     >
>>>>     git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master
>>>>     <http://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master>
>>>>     > compiler: gcc (GCC) 7.1.1 20170620
>>>>     > .config is attached
>>>>     > Raw console output is attached.
>>>>     > C reproducer is attached
>>>>     > syzkaller reproducer is attached. See https://goo.gl/kgGztJ
>>>>     > for information about syzkaller reproducers
>>>>     >
>>>>     >
>>>>     > in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: syzkaller879109
>>>>     > 2 locks held by syzkaller879109/2909:
>>>>     >   #0:  (&vcpu->mutex){+.+.}, at: [<ffffffff8106222c>]
>>>>     vcpu_load+0x1c/0x70
>>>>     > arch/x86/kvm/../../../virt/kvm/kvm_main.c:154
>>>>     >   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_enter_guest
>>>>     > arch/x86/kvm/x86.c:6983 [inline]
>>>>     >   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_run
>>>>     > arch/x86/kvm/x86.c:7061 [inline]
>>>>     >   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>]
>>>>     > kvm_arch_vcpu_ioctl_run+0x1bc2/0x58b0 arch/x86/kvm/x86.c:7222
>>>>     > CPU: 1 PID: 2909 Comm: syzkaller879109 Not tainted
>>>>     4.13.0-rc4-next-20170811
>>>>     > #1
>>>>     > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
>>>>     01/01/2011
>>>>     > Call Trace:
>>>>     >   __dump_stack lib/dump_stack.c:16 [inline]
>>>>     >   dump_stack+0x194/0x257 lib/dump_stack.c:52
>>>>     >   ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6014
>>>>     >   __might_sleep+0x95/0x190 kernel/sched/core.c:5967
>>>>     >   __might_fault+0xab/0x1d0 mm/memory.c:4383
>>>>     >   __copy_from_user include/linux/uaccess.h:71 [inline]
>>>>     >   __kvm_read_guest_page+0x58/0xa0
>>>>     > arch/x86/kvm/../../../virt/kvm/kvm_main.c:1771
>>>>     >   kvm_vcpu_read_guest_page+0x44/0x60
>>>>     > arch/x86/kvm/../../../virt/kvm/kvm_main.c:1791
>>>>     >   kvm_read_guest_virt_helper+0x76/0x140 arch/x86/kvm/x86.c:4407
>>>>     >   kvm_read_guest_virt_system+0x3c/0x50 arch/x86/kvm/x86.c:4466
>>>>     >   segmented_read_std+0x10c/0x180 arch/x86/kvm/emulate.c:819
>>>>     >   em_fxrstor+0x27b/0x410 arch/x86/kvm/emulate.c:4022
>>>>
>>>>
>>>>     In em_fxrstor, we do a get_fpu(), which in return disables preemption.
>>>>
>>>>     With preempt_disable(), we do a
>>>>     segmented_read_std()->kvm_vcpu_read_guest_page(), triggering the
>>>>     warning.
>>>>
>>>>     >   x86_emulate_insn+0x55d/0x3c50 arch/x86/kvm/emulate.c:5471
>>>>     >   x86_emulate_instruction+0x411/0x1ca0 arch/x86/kvm/x86.c:5698
>>>>     >   kvm_mmu_page_fault+0x18b/0x2c0 arch/x86/kvm/mmu.c:4854
>>>>     >   handle_ept_violation+0x1fc/0x5e0 arch/x86/kvm/vmx.c:6400
>>>>     >   vmx_handle_exit+0x281/0x1ab0 arch/x86/kvm/vmx.c:8718
>>>>     >   vcpu_enter_guest arch/x86/kvm/x86.c:6999 [inline]
>>>>     >   vcpu_run arch/x86/kvm/x86.c:7061 [inline]
>>>>     >   kvm_arch_vcpu_ioctl_run+0x1cee/0x58b0 arch/x86/kvm/x86.c:7222
>>>>     >   kvm_vcpu_ioctl+0x64c/0x1010
>>>>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:2591
>>>>     >   vfs_ioctl fs/ioctl.c:45 [inline]
>>>>     >   do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685
>>>>     >   SYSC_ioctl fs/ioctl.c:700 [inline]
>>>>     >   SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691
>>>>     >   entry_SYSCALL_64_fastpath+0x1f/0xbe
>>>>
>>>>     I don't really see a way to avoid two fxstate variables. Unloading the
>>>>     fpu in between the fxstore/fxrstr could lead to host values getting
>>>>     overwritten. Loading/saving the fpu in kvm_arch_vcpu_ioctl_run() would
>>>>     most probably also not work, as the relevant portions of fxregs_state
>>>>     would not get saved/restored. So the preemption would still be needed.
>>>>
>>>>
>>>>     So all I can offer for now is the following (untested, can send as
>>>>     proper patch if needed):
>>>>
>>>>
>>>>     From f32d06c8c621c6d68e073e9bdb81a6280b6c9544 Mon Sep 17 00:00:00 2001
>>>>     From: David Hildenbrand <david@redhat.com <mailto:david@redhat.com>>
>>>>     Date: Mon, 6 Nov 2017 12:35:39 +0100
>>>>     Subject: [PATCH v1] KVM: x86: fix em_fxstor sleeping while in atomic
>>>>
>>>>     Commit 9d643f63128b tried to optimize the stack size, but introduced a
>>>>     guest memory access which might sleep while in atomic.
>>>>
>>>>     Let's undo that part of the commit but keep the cleanups.
>>>>
>>>>     Reported by syzbot:
>>>>
>>>>     in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: syzkaller879109
>>>>     2 locks held by syzkaller879109/2909:
>>>>       #0:  (&vcpu->mutex){+.+.}, at: [<ffffffff8106222c>]
>>>>     vcpu_load+0x1c/0x70
>>>>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:154
>>>>       #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_enter_guest
>>>>     arch/x86/kvm/x86.c:6983 [inline]
>>>>       #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_run
>>>>     arch/x86/kvm/x86.c:7061 [inline]
>>>>       #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>]
>>>>     kvm_arch_vcpu_ioctl_run+0x1bc2/0x58b0 arch/x86/kvm/x86.c:7222
>>>>     CPU: 1 PID: 2909 Comm: syzkaller879109 Not tainted
>>>>     4.13.0-rc4-next-20170811
>>>>     Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
>>>>     01/01/2011
>>>>     Call Trace:
>>>>       __dump_stack lib/dump_stack.c:16 [inline]
>>>>       dump_stack+0x194/0x257 lib/dump_stack.c:52
>>>>       ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6014
>>>>       __might_sleep+0x95/0x190 kernel/sched/core.c:5967
>>>>       __might_fault+0xab/0x1d0 mm/memory.c:4383
>>>>       __copy_from_user include/linux/uaccess.h:71 [inline]
>>>>       __kvm_read_guest_page+0x58/0xa0
>>>>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:1771
>>>>       kvm_vcpu_read_guest_page+0x44/0x60
>>>>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:1791
>>>>       kvm_read_guest_virt_helper+0x76/0x140 arch/x86/kvm/x86.c:4407
>>>>       kvm_read_guest_virt_system+0x3c/0x50 arch/x86/kvm/x86.c:4466
>>>>       segmented_read_std+0x10c/0x180 arch/x86/kvm/emulate.c:819
>>>>       em_fxrstor+0x27b/0x410 arch/x86/kvm/emulate.c:4022
>>>>       x86_emulate_insn+0x55d/0x3c50 arch/x86/kvm/emulate.c:5471
>>>>       x86_emulate_instruction+0x411/0x1ca0 arch/x86/kvm/x86.c:5698
>>>>       kvm_mmu_page_fault+0x18b/0x2c0 arch/x86/kvm/mmu.c:4854
>>>>       handle_ept_violation+0x1fc/0x5e0 arch/x86/kvm/vmx.c:6400
>>>>       vmx_handle_exit+0x281/0x1ab0 arch/x86/kvm/vmx.c:8718
>>>>       vcpu_enter_guest arch/x86/kvm/x86.c:6999 [inline]
>>>>       vcpu_run arch/x86/kvm/x86.c:7061 [inline]
>>>>       kvm_arch_vcpu_ioctl_run+0x1cee/0x58b0 arch/x86/kvm/x86.c:7222
>>>>       kvm_vcpu_ioctl+0x64c/0x1010
>>>>     arch/x86/kvm/../../../virt/kvm/kvm_main.c:2591
>>>>       vfs_ioctl fs/ioctl.c:45 [inline]
>>>>       do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685
>>>>       SYSC_ioctl fs/ioctl.c:700 [inline]
>>>>       SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691
>>>>       entry_SYSCALL_64_fastpath+0x1f/0xbe
>>>>     RIP: 0033:0x437fc9
>>>>     RSP: 002b:00007ffc7b4d5ab8 EFLAGS: 00000206 ORIG_RAX: 0000000000000010
>>>>     RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000437fc9
>>>>     RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000005
>>>>     RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000020ae8000
>>>>     R10: 0000000000009120 R11: 0000000000000206 R12: 0000000000000000
>>>>     R13: 0000000000000004 R14: 0000000000000004 R15: 0000000020077000
>>>>
>>>>     Fixes: 9d643f63128b ("KVM: x86: avoid large stack allocations in
>>>>     em_fxrstor")
>>>>     Signed-off-by: David Hildenbrand <david@redhat.com
>>>>     <mailto:david@redhat.com>>
>>>>     ---
>>>>      arch/x86/kvm/emulate.c | 16 +++++++++-------
>>>>      1 file changed, 9 insertions(+), 7 deletions(-)
>>>>
>>>>     diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
>>>>     index fb0055953fbc..d87f01a2d6f4 100644
>>>>     --- a/arch/x86/kvm/emulate.c
>>>>     +++ b/arch/x86/kvm/emulate.c
>>>>     @@ -4002,7 +4002,7 @@ static int em_fxsave(struct x86_emulate_ctxt
>>>>     *ctxt)
>>>>
>>>>      static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
>>>>      {
>>>>     -       struct fxregs_state fx_state;
>>>>     +       struct fxregs_state fx_state, fx_old;
>>>>             int rc;
>>>>             size_t size;
>>>>
>>>>     @@ -4010,19 +4010,21 @@ static int em_fxrstor(struct
>>>>     x86_emulate_ctxt *ctxt)
>>>>             if (rc != X86EMUL_CONTINUE)
>>>>                     return rc;
>>>>
>>>>     +       size = fxstate_size(ctxt);
>>>>     +       rc = segmented_read_std(ctxt, ctxt->memop.addr.mem,
>>>>     &fx_state, size);
>>>>     +       if (rc != X86EMUL_CONTINUE)
>>>>     +               return rc;
>>>>     +
>>>>             ctxt->ops->get_fpu(ctxt);
>>>>
>>>>     -       size = fxstate_size(ctxt);
>>>>             if (size < __fxstate_size(16)) {
>>>>     -               rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
>>>>     +               rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_old));
>>>>                     if (rc != X86EMUL_CONTINUE)
>>>>                             goto out;
>>>>     +               memcpy(((void *)&fx_state) + size, ((void *)&fx_old)
>>>>     + size,
>>>>     +                      __fxstate_size(16) - size);
>>>>             }
>>>>
>>>>     -       rc = segmented_read_std(ctxt, ctxt->memop.addr.mem,
>>>>     &fx_state, size);
>>>>     -       if (rc != X86EMUL_CONTINUE)
>>>>     -               goto out;
>>>>     -
>>>>             if (fx_state.mxcsr >> 16) {
>>>>                     rc = emulate_gp(ctxt, 0);
>>>>                     goto out;
>>>>     --
>>>>     2.13.6
>>>>
>>>>
>>>>
>>>>     --
>>>>
>>>>     Thanks,
>>>>
>>>>     David / dhildenb
>>>>
>>>
>>>
>>
>> --
>> You received this message because you are subscribed to the Google Groups "syzkaller-bugs" group.
>> To unsubscribe from this group and stop receiving emails from it, send an email to syzkaller-bugs+unsubscribe@googlegroups.com.
>> To view this discussion on the web visit https://groups.google.com/d/msgid/syzkaller-bugs/fc4fec6a-13fc-e5f4-e78a-711f2fccec92%40redhat.com.
>> For more options, visit https://groups.google.com/d/optout.
Dmitry Vyukov Nov. 7, 2017, 12:44 p.m. UTC | #8
On Tue, Nov 7, 2017 at 1:29 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>>>> Does it have to be stack allocated?
>>>>
>>>> We can't use kmalloc and friends in emulate.c. We would have to
>>>> introduce new emulator callbacks.
>>>>
>>>> a) for malloc and free. hmmm.
>>>> b) for carrying out the fxrstr/fixup.
>>>>
>>>> Paolo, what do you suggest?
>>>
>>> You can use kmalloc.  Any userspace user of emulate.c would have to
>>> write a wrapper.
>>
>>
>> Can you please tell me more about this? Is it used for testing? Is
>> there an example code that builds and tests this in user-space?
>
> Not quite, there's no user outside KVM yet.  But the emulator code is
> designed to be independent from KVM's memory access primitives; with
> "nm" you can see how there are very few undefined symbols:
>
>                  U ex_handler_default
>                  U find_first_bit
>                  U find_next_bit
>                  U memcpy
>                  U printk
>
> Exceptions are only used for div/idiv, if it gets in the way it's okay
> to just revert commit b8c0b6ae498f ("KVM: x86 emulator: convert DIV/IDIV
> to fastop", 2013-05-21).
>
> On the other hand, dependencies on Linux headers have sneaked in more
> and more, but refactoring those away should not be too hard.


Thanks for the info. We may be potentially interested in fuzzing this
in user-space.
Eric Biggers Jan. 26, 2018, 9:11 p.m. UTC | #9
On Mon, Nov 06, 2017 at 12:51:57PM +0100, David Hildenbrand wrote:
> On 31.10.2017 12:34, syzbot wrote:
> > Hello,
> > 
> > syzkaller hit the following crash on  
> > 91dfed74eabcdae9378131546c446442c29bf769
> > git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master
> > compiler: gcc (GCC) 7.1.1 20170620
> > .config is attached
> > Raw console output is attached.
> > C reproducer is attached
> > syzkaller reproducer is attached. See https://goo.gl/kgGztJ
> > for information about syzkaller reproducers
> > 
> > 
> > in_atomic(): 1, irqs_disabled(): 0, pid: 2909, name: syzkaller879109
> > 2 locks held by syzkaller879109/2909:
> >   #0:  (&vcpu->mutex){+.+.}, at: [<ffffffff8106222c>] vcpu_load+0x1c/0x70  
> > arch/x86/kvm/../../../virt/kvm/kvm_main.c:154
> >   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_enter_guest  
> > arch/x86/kvm/x86.c:6983 [inline]
> >   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>] vcpu_run  
> > arch/x86/kvm/x86.c:7061 [inline]
> >   #1:  (&kvm->srcu){....}, at: [<ffffffff810dd162>]  
> > kvm_arch_vcpu_ioctl_run+0x1bc2/0x58b0 arch/x86/kvm/x86.c:7222
> > CPU: 1 PID: 2909 Comm: syzkaller879109 Not tainted 4.13.0-rc4-next-20170811  
> > #1
> > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> > Call Trace:
> >   __dump_stack lib/dump_stack.c:16 [inline]
> >   dump_stack+0x194/0x257 lib/dump_stack.c:52
> >   ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6014
> >   __might_sleep+0x95/0x190 kernel/sched/core.c:5967
> >   __might_fault+0xab/0x1d0 mm/memory.c:4383
> >   __copy_from_user include/linux/uaccess.h:71 [inline]
> >   __kvm_read_guest_page+0x58/0xa0  
> > arch/x86/kvm/../../../virt/kvm/kvm_main.c:1771
> >   kvm_vcpu_read_guest_page+0x44/0x60  
> > arch/x86/kvm/../../../virt/kvm/kvm_main.c:1791
> >   kvm_read_guest_virt_helper+0x76/0x140 arch/x86/kvm/x86.c:4407
> >   kvm_read_guest_virt_system+0x3c/0x50 arch/x86/kvm/x86.c:4466
> >   segmented_read_std+0x10c/0x180 arch/x86/kvm/emulate.c:819
> >   em_fxrstor+0x27b/0x410 arch/x86/kvm/emulate.c:4022
> 
> 
> In em_fxrstor, we do a get_fpu(), which in return disables preemption.
> 
> With preempt_disable(), we do a
> segmented_read_std()->kvm_vcpu_read_guest_page(), triggering the warning.
> 
> >   x86_emulate_insn+0x55d/0x3c50 arch/x86/kvm/emulate.c:5471
> >   x86_emulate_instruction+0x411/0x1ca0 arch/x86/kvm/x86.c:5698
> >   kvm_mmu_page_fault+0x18b/0x2c0 arch/x86/kvm/mmu.c:4854
> >   handle_ept_violation+0x1fc/0x5e0 arch/x86/kvm/vmx.c:6400
> >   vmx_handle_exit+0x281/0x1ab0 arch/x86/kvm/vmx.c:8718
> >   vcpu_enter_guest arch/x86/kvm/x86.c:6999 [inline]
> >   vcpu_run arch/x86/kvm/x86.c:7061 [inline]
> >   kvm_arch_vcpu_ioctl_run+0x1cee/0x58b0 arch/x86/kvm/x86.c:7222
> >   kvm_vcpu_ioctl+0x64c/0x1010 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2591
> >   vfs_ioctl fs/ioctl.c:45 [inline]
> >   do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685
> >   SYSC_ioctl fs/ioctl.c:700 [inline]
> >   SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691
> >   entry_SYSCALL_64_fastpath+0x1f/0xbe
> 

This is fixed now (thanks David!), so closing the bug:

#syz fix: KVM: x86: fix em_fxstor() sleeping while in atomic

However, isn't this also needed in 4.14-stable?
diff mbox

Patch

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index fb0055953fbc..d87f01a2d6f4 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4002,7 +4002,7 @@  static int em_fxsave(struct x86_emulate_ctxt *ctxt)

 static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
 {
-	struct fxregs_state fx_state;
+	struct fxregs_state fx_state, fx_old;
 	int rc;
 	size_t size;

@@ -4010,19 +4010,21 @@  static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;

+	size = fxstate_size(ctxt);
+	rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+
 	ctxt->ops->get_fpu(ctxt);

-	size = fxstate_size(ctxt);
 	if (size < __fxstate_size(16)) {
-		rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
+		rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_old));
 		if (rc != X86EMUL_CONTINUE)
 			goto out;
+		memcpy(((void *)&fx_state) + size, ((void *)&fx_old) + size,
+		       __fxstate_size(16) - size);
 	}

-	rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
-	if (rc != X86EMUL_CONTINUE)
-		goto out;
-
 	if (fx_state.mxcsr >> 16) {
 		rc = emulate_gp(ctxt, 0);
 		goto out;