Message ID | 1498705321-3927-3-git-send-email-wanpeng.li@hotmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
2017-06-28 20:01-0700, Wanpeng Li: > From: Wanpeng Li <wanpeng.li@hotmail.com> > > This patch adds the L1 guest async page fault #PF vmexit handler, such > #PF is converted into vmexit from L2 to L1 on #PF which is then handled > by L1 similar to ordinary async page fault. > > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Radim Krčmář <rkrcmar@redhat.com> > Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> > --- This patch breaks SVM, so I've taken the series off kvm/queue for now; I'll look into it tomorrow. The error is: BUG: unable to handle kernel paging request at ffffffffc0735ad2 IP: report_bug+0x94/0x120 PGD 43e14067 P4D 43e14067 PUD 43e16067 PMD 2164bf067 PTE 80000002181fc161 Oops: 0003 [#1] SMP Modules linked in: kvm_amd(OE) kvm(OE) irqbypass(E) xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack libcrc32c tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables sunrpc snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm ppdev joydev parport_serial parport_pc snd_timer parport k10temp sky2 snd shpchp sp5100_tco acpi_cpufreq wmi soundcore i2c_piix4 amdkfd amd_iommu_v2 radeon i2c_algo_bit drm_kms_helper uas serio_raw usb_storage ttm pata_atiixp drm ata_generic pata_acpi pata_jmicron [last unloaded: irqbypass] CPU: 3 PID: 1868 Comm: CPU 0/KVM Tainted: G OE 4.12.0+ #1 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080014 03/07/2008 task: ffff8bcbe3f1b140 task.stack: ffffabb481970000 RIP: 0010:report_bug+0x94/0x120 RSP: 0018:ffffabb481973a70 EFLAGS: 00010202 RAX: 0000000000000907 RBX: ffffabb481973bd8 RCX: ffffffffc0735ac8 RDX: 0000000000000001 RSI: 0000000000000ed0 RDI: 0000000000000001 RBP: ffffabb481973a90 R08: 0000000000000001 R09: 7f9f279200000000 R10: ffffabb4819739d0 R11: 0000000000000000 R12: ffffffffc07023d0 R13: ffffffffc0733078 R14: 0000000000000004 R15: ffffabb481973bd8 FS: 0000000000000000(0000) GS:ffff8bcbe7400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffc0735ad2 CR3: 00000002189d7000 CR4: 00000000000006e0 Call Trace: ? kvm_handle_page_fault+0x1f0/0x200 [kvm] fixup_bug+0x2e/0x50 do_trap+0x119/0x150 do_error_trap+0xa3/0x160 ? kvm_handle_page_fault+0x1f0/0x200 [kvm] ? trace_hardirqs_off_thunk+0x1a/0x1c do_invalid_op+0x20/0x30 invalid_op+0x1e/0x30 RIP: 0010:kvm_handle_page_fault+0x1f0/0x200 [kvm] RSP: 0018:ffffabb481973c80 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffff8bcbd7550000 RCX: 0000000000000000 RDX: 00000000fffffff0 RSI: 0000000000000014 RDI: ffff8bcbd7550000 RBP: ffffabb481973ca0 R08: 0000000000000001 R09: 27624b3d00000000 R10: ffffabb481973ca8 R11: ffff8bcbe3fb25f0 R12: 00000000fffffff0 R13: 0000000000000014 R14: ffff8bcbd7550000 R15: ffff8bcbd7550000 pf_interception+0x20/0x30 [kvm_amd] handle_exit+0x213/0xbb0 [kvm_amd] kvm_arch_vcpu_ioctl_run+0x7f1/0x1ae0 [kvm] kvm_vcpu_ioctl+0x2ac/0x6f0 [kvm] ? kvm_vcpu_ioctl+0x2ac/0x6f0 [kvm] ? sched_clock+0x9/0x10 ? debug_lockdep_rcu_enabled+0x1d/0x30 do_vfs_ioctl+0xa6/0x6c0 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x7fabf6d815c7 RSP: 002b:00007fabe87e77c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007fabf6d815c7 RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000010 RBP: 000055a7cb502fe0 R08: 000055a7cb51e410 R09: 000055a7cb509390 R10: 000055a7cdb01000 R11: 0000000000000246 R12: 000055a7cdace0a6 R13: 0000000000000000 R14: 00007fac00621000 R15: 000055a7cdace000 Code: 74 59 0f b7 41 0a 4c 63 69 04 0f b7 71 08 89 c7 49 01 cd 83 e7 01 a8 02 74 15 66 85 ff 74 10 a8 04 ba 01 00 00 00 75 26 83 c8 04 <66> 89 41 0a 66 85 ff 74 49 0f b6 49 0b 4c 89 e2 45 31 c9 49 89 RIP: report_bug+0x94/0x120 RSP: ffffabb481973a70 CR2: ffffffffc0735ad2 ---[ end trace aec3a1f15664a4af ]--- BUG: sleeping function called from invalid context at ./include/linux/percpu-rwsem.h:33 in_atomic(): 0, irqs_disabled(): 1, pid: 1868, name: CPU 0/KVM INFO: lockdep is turned off. irq event stamp: 1868 hardirqs last enabled at (1867): [<ffffffffa398eaab>] restore_regs_and_iret+0x0/0x1d hardirqs last disabled at (1868): [<ffffffffa398f7dc>] error_entry+0x7c/0xd0 softirqs last enabled at (1834): [<ffffffffa3992f62>] __do_softirq+0x382/0x4ed softirqs last disabled at (1817): [<ffffffffa30b9a2f>] irq_exit+0x10f/0x120 CPU: 3 PID: 1868 Comm: CPU 0/KVM Tainted: G D OE 4.12.0+ #1 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080014 03/07/2008 Call Trace: dump_stack+0x8e/0xcd ___might_sleep+0x164/0x250 __might_sleep+0x4a/0x80 exit_signals+0x33/0x240 do_exit+0xb4/0xd20 ? SyS_ioctl+0x79/0x90 rewind_stack_do_exit+0x17/0x20 RIP: 0033:0x7fabf6d815c7 RSP: 002b:00007fabe87e77c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000010000 RCX: 00007fabf6d815c7 RDX: 0000000000000000 RSI: 000000000000ae80 RDI: 0000000000000010 RBP: 000055a7cb502fe0 R08: 000055a7cb51e410 R09: 000055a7cb509390 R10: 000055a7cdb01000 R11: 0000000000000246 R12: 000055a7cdace0a6 R13: 0000000000000000 R14: 00007fac00621000 R15: 000055a7cdace000
2017-07-13 5:44 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>: > 2017-06-28 20:01-0700, Wanpeng Li: >> From: Wanpeng Li <wanpeng.li@hotmail.com> >> >> This patch adds the L1 guest async page fault #PF vmexit handler, such >> #PF is converted into vmexit from L2 to L1 on #PF which is then handled >> by L1 similar to ordinary async page fault. >> >> Cc: Paolo Bonzini <pbonzini@redhat.com> >> Cc: Radim Krčmář <rkrcmar@redhat.com> >> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> >> --- > > This patch breaks SVM, so I've taken the series off kvm/queue for now; > I'll look into it tomorrow. Thanks for the help. :) Regards, Wanpeng Li
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1f01bfb..e20d8a8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -645,6 +645,7 @@ struct kvm_vcpu_arch { u64 msr_val; u32 id; bool send_user_only; + u32 host_apf_reason; } apf; /* OSVW MSRs (AMD only) */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index cb82259..4a7dc00 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -46,6 +46,7 @@ #include <asm/io.h> #include <asm/vmx.h> #include <asm/kvm_page_track.h> +#include "trace.h" /* * When setting this variable to true it enables Two-Dimensional-Paging @@ -3736,6 +3737,38 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, return false; } +int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, + u64 fault_address) +{ + int r = 1; + + switch (vcpu->arch.apf.host_apf_reason) { + default: + /* TDP won't cause page fault directly */ + WARN_ON_ONCE(tdp_enabled); + trace_kvm_page_fault(fault_address, error_code); + + if (kvm_event_needs_reinjection(vcpu)) + kvm_mmu_unprotect_page_virt(vcpu, fault_address); + r = kvm_mmu_page_fault(vcpu, fault_address, error_code, NULL, 0); + break; + case KVM_PV_REASON_PAGE_NOT_PRESENT: + vcpu->arch.apf.host_apf_reason = 0; + local_irq_disable(); + kvm_async_pf_task_wait(fault_address); + local_irq_enable(); + break; + case KVM_PV_REASON_PAGE_READY: + vcpu->arch.apf.host_apf_reason = 0; + local_irq_disable(); + kvm_async_pf_task_wake(fault_address); + local_irq_enable(); + break; + } + return r; +} +EXPORT_SYMBOL_GPL(kvm_handle_page_fault); + static bool check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 330bf3a..2ae88f0 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -77,6 +77,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, bool accessed_dirty); bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); +int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, + u64 fault_address); static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e1f8e89..8f263bf 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -192,7 +192,6 @@ struct vcpu_svm { unsigned int3_injected; unsigned long int3_rip; - u32 apf_reason; /* cached guest cpuid flags for faster access */ bool nrips_enabled : 1; @@ -2071,34 +2070,9 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) static int pf_interception(struct vcpu_svm *svm) { u64 fault_address = svm->vmcb->control.exit_info_2; - u64 error_code; - int r = 1; + u64 error_code = svm->vmcb->control.exit_info_1; - switch (svm->apf_reason) { - default: - error_code = svm->vmcb->control.exit_info_1; - - trace_kvm_page_fault(fault_address, error_code); - if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu)) - kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); - r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code, - svm->vmcb->control.insn_bytes, - svm->vmcb->control.insn_len); - break; - case KVM_PV_REASON_PAGE_NOT_PRESENT: - svm->apf_reason = 0; - local_irq_disable(); - kvm_async_pf_task_wait(fault_address); - local_irq_enable(); - break; - case KVM_PV_REASON_PAGE_READY: - svm->apf_reason = 0; - local_irq_disable(); - kvm_async_pf_task_wake(fault_address); - local_irq_enable(); - break; - } - return r; + return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address); } static int db_interception(struct vcpu_svm *svm) @@ -2551,7 +2525,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) break; case SVM_EXIT_EXCP_BASE + PF_VECTOR: /* When we're shadowing, trap PFs, but not async PF */ - if (!npt_enabled && svm->apf_reason == 0) + if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0) return NESTED_EXIT_HOST; break; default: @@ -2594,7 +2568,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm) vmexit = NESTED_EXIT_DONE; /* async page fault always cause vmexit */ else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && - svm->apf_reason != 0) + svm->vcpu.arch.apf.host_apf_reason != 0) vmexit = NESTED_EXIT_DONE; break; } @@ -4891,7 +4865,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) /* if exit due to PF check for async PF */ if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) - svm->apf_reason = kvm_read_and_reset_pf_reason(); + svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); if (npt_enabled) { vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index df825bb..d20f794 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5648,14 +5648,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) } if (is_page_fault(intr_info)) { - /* EPT won't cause page fault directly */ - BUG_ON(enable_ept); cr2 = vmcs_readl(EXIT_QUALIFICATION); - trace_kvm_page_fault(cr2, error_code); - - if (kvm_event_needs_reinjection(vcpu)) - kvm_mmu_unprotect_page_virt(vcpu, cr2); - return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); + return kvm_handle_page_fault(vcpu, error_code, cr2); } ex_no = intr_info & INTR_INFO_VECTOR_MASK; @@ -8602,6 +8596,10 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); exit_intr_info = vmx->exit_intr_info; + /* if exit due to PF check for async PF */ + if (is_page_fault(exit_intr_info)) + vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); + /* Handle machine checks before interrupts are enabled */ if (is_machine_check(exit_intr_info)) kvm_machine_check();