Message ID | 20190420055059.16816-6-sean.j.christopherson@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: VMX: INTR, NMI and #MC cleanup | expand |
On 20/04/19 07:50, Sean Christopherson wrote: > Per commit 1b6269db3f833 ("KVM: VMX: Handle NMIs before enabling > interrupts and preemption"), NMIs are handled directly in vmx_vcpu_run() > to "make sure we handle NMI on the current cpu, and that we don't > service maskable interrupts before non-maskable ones". The other > exceptions handled by complete_atomic_exit(), e.g. async #PF and #MC, > have similar requirements, and are located there to avoid extra VMREADs > since VMX bins hardware exceptions and NMIs into a single exit reason. > > Clean up the code and eliminate the vaguely named complete_atomic_exit() > by moving the interrupts-disabled exception and NMI handling into the > existing handle_external_intrs() callback, and rename the callback to > a more appropriate name. > > In addition to improving code readability, this also ensures the NMI > handler is run with the host's debug registers loaded in the unlikely > event that the user is debugging NMIs. Accuracy of the last_guest_tsc > field is also improved when handling NMIs (and #MCs) as the handler > will run after updating said field. > > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> Very nice, just some changes I'd like to propose. "atomic" is Linux lingo for "irqs disabled", so I'd like to rename the handler to handle_exit_atomic so it has a correspondance with handle_exit. Likewise we could have handle_exception_nmi_atomic and handle_external_interrupt_atomic. Putting everything together we get: diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 35e7937cc9ac..b7d5935c1637 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1117,7 +1117,7 @@ struct kvm_x86_ops { int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage); - void (*handle_external_intr)(struct kvm_vcpu *vcpu); + void (*handle_exit_atomic)(struct kvm_vcpu *vcpu); bool (*mpx_supported)(void); bool (*xsaves_supported)(void); bool (*umip_emulated)(void); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index acc09e9fc173..9c6458e60558 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -6172,7 +6172,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, return ret; } -static void svm_handle_external_intr(struct kvm_vcpu *vcpu) +static void svm_handle_exit_atomic(struct kvm_vcpu *vcpu) { kvm_before_interrupt(vcpu); local_irq_enable(); @@ -7268,7 +7268,7 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) .set_tdp_cr3 = set_tdp_cr3, .check_intercept = svm_check_intercept, - .handle_external_intr = svm_handle_external_intr, + .handle_exit_atomic = svm_handle_exit_atomic, .request_immediate_exit = __kvm_request_immediate_exit, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 963c8c409223..dfaa770b9bb3 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4437,11 +4437,11 @@ static void kvm_machine_check(void) static int handle_machine_check(struct kvm_vcpu *vcpu) { - /* already handled by vcpu_run */ + /* handled by vmx_vcpu_run() */ return 1; } -static int handle_exception(struct kvm_vcpu *vcpu) +static int handle_exception_nmi(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct kvm_run *kvm_run = vcpu->run; @@ -4454,7 +4454,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) intr_info = vmx->exit_intr_info; if (is_machine_check(intr_info) || is_nmi(intr_info)) - return 1; /* already handled by vmx_complete_atomic_exit */ + return 1; /* handled by handle_exception_nmi_atomic() */ if (is_invalid_opcode(intr_info)) return handle_ud(vcpu); @@ -5462,7 +5462,7 @@ static int handle_encls(struct kvm_vcpu *vcpu) * to be done to userspace and return 0. */ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { - [EXIT_REASON_EXCEPTION_NMI] = handle_exception, + [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi, [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, [EXIT_REASON_NMI_WINDOW] = handle_nmi_window, @@ -6100,11 +6100,8 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); } -static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) +static void handle_exception_nmi_atomic(struct vcpu_vmx *vmx) { - if (vmx->exit_reason != EXIT_REASON_EXCEPTION_NMI) - return; - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); /* if exit due to PF check for async PF */ @@ -6123,7 +6120,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) } } -static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) +static void handle_external_interrupt_atomic(struct kvm_vcpu *vcpu) { unsigned int vector; unsigned long entry; @@ -6133,9 +6130,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) gate_desc *desc; u32 intr_info; - if (to_vmx(vcpu)->exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT) - return; - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); if (WARN_ONCE(!is_external_intr(intr_info), "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) @@ -6170,7 +6164,17 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) kvm_after_interrupt(vcpu); } -STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); +STACK_FRAME_NON_STANDARD(handle_external_interrupt_atomic); + +static void vmx_handle_exit_atomic(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) + handle_external_interrupt_atomic(vcpu); + else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI) + handle_exception_nmi_atomic(vmx); +} static bool vmx_has_emulated_msr(int index) { @@ -6540,7 +6544,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->launched = 1; vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - vmx_complete_atomic_exit(vmx); vmx_recover_nmi_blocking(vmx); vmx_complete_interrupts(vmx); } @@ -7694,7 +7697,7 @@ static __exit void hardware_unsetup(void) .set_tdp_cr3 = vmx_set_cr3, .check_intercept = vmx_check_intercept, - .handle_external_intr = vmx_handle_external_intr, + .handle_exit_atomic = vmx_handle_exit_atomic, .mpx_supported = vmx_mpx_supported, .xsaves_supported = vmx_xsaves_supported, .umip_emulated = vmx_umip_emulated, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6e2f53cd8ea8..88489af13e96 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7999,7 +7999,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - kvm_x86_ops->handle_external_intr(vcpu); + kvm_x86_ops->handle_exit_atomic(vcpu); ++vcpu->stat.exits;
On Thu, Jun 06, 2019 at 03:20:49PM +0200, Paolo Bonzini wrote: > On 20/04/19 07:50, Sean Christopherson wrote: > > Per commit 1b6269db3f833 ("KVM: VMX: Handle NMIs before enabling > > interrupts and preemption"), NMIs are handled directly in vmx_vcpu_run() > > to "make sure we handle NMI on the current cpu, and that we don't > > service maskable interrupts before non-maskable ones". The other > > exceptions handled by complete_atomic_exit(), e.g. async #PF and #MC, > > have similar requirements, and are located there to avoid extra VMREADs > > since VMX bins hardware exceptions and NMIs into a single exit reason. > > > > Clean up the code and eliminate the vaguely named complete_atomic_exit() > > by moving the interrupts-disabled exception and NMI handling into the > > existing handle_external_intrs() callback, and rename the callback to > > a more appropriate name. > > > > In addition to improving code readability, this also ensures the NMI > > handler is run with the host's debug registers loaded in the unlikely > > event that the user is debugging NMIs. Accuracy of the last_guest_tsc > > field is also improved when handling NMIs (and #MCs) as the handler > > will run after updating said field. > > > > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> > > Very nice, just some changes I'd like to propose. "atomic" is Linux > lingo for "irqs disabled", so I'd like to rename the handler to The code disagrees, e.g. /* * Are we running in atomic context? WARNING: this macro cannot * always detect atomic context; in particular, it cannot know about * held spinlocks in non-preemptible kernels. Thus it should not be * used in the general case to determine whether sleeping is possible. * Do not use in_atomic() in driver code. */ #define in_atomic() (preempt_count() != 0) and void ___might_sleep(...) { ... printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", in_atomic(), irqs_disabled(), current->pid, current->comm); } and static inline void *kmap_atomic(struct page *page) { preempt_disable(); pagefault_disable(); return page_address(page); } My interpretation of things is that the kernel's definition of an atomic context is with respect to preemption. Disabling IRQs would also provide atomicity, but the reverse is not true, i.e. entering an atomic context does not imply IRQs are disabled. As it pertains to KVM, we specifically care about IRQs being disabled, e.g. VMX needs to ensure #MC and NMI are handled before any pending IRQs, and both VMX and SVM need to ensure a pending perf interrupt is handled in the callback. And if "atomic" is interpreted as "IRQs disabled", one could argue that the SVM behavior is buggy since enabling IRQs would break atomicity. > handle_exit_atomic so it has a correspondance with handle_exit. > Likewise we could have handle_exception_nmi_atomic and > handle_external_interrupt_atomic.
On 06/06/19 17:14, Sean Christopherson wrote: > The code disagrees, e.g. > > /* > * Are we running in atomic context? WARNING: this macro cannot > * always detect atomic context; in particular, it cannot know about > * held spinlocks in non-preemptible kernels. Thus it should not be > * used in the general case to determine whether sleeping is possible. > * Do not use in_atomic() in driver code. > */ > #define in_atomic() (preempt_count() != 0) You're totally right. "_irqoff" seems to be the common suffix for irq-disabled functions. Paolo
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8d68ba0cba0c..cd60c3ae7f66 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1109,7 +1109,7 @@ struct kvm_x86_ops { int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage); - void (*handle_external_intr)(struct kvm_vcpu *vcpu); + void (*handle_events_irqs_disabled)(struct kvm_vcpu *vcpu); bool (*mpx_supported)(void); bool (*xsaves_supported)(void); bool (*umip_emulated)(void); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 38e1c7d382a1..e117058eba87 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -6160,7 +6160,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, return ret; } -static void svm_handle_external_intr(struct kvm_vcpu *vcpu) +static void svm_handle_events_irqs_disabled(struct kvm_vcpu *vcpu) { kvm_before_interrupt(vcpu); local_irq_enable(); @@ -7256,7 +7256,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .set_tdp_cr3 = set_tdp_cr3, .check_intercept = svm_check_intercept, - .handle_external_intr = svm_handle_external_intr, + .handle_events_irqs_disabled = svm_handle_events_irqs_disabled, .request_immediate_exit = __kvm_request_immediate_exit, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1fbd5a5dd6af..9b580749217f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4441,7 +4441,7 @@ static void kvm_machine_check(void) static int handle_machine_check(struct kvm_vcpu *vcpu) { - /* already handled by vcpu_run */ + /* handled by vmx_handle_events_irqs_disabled() */ return 1; } @@ -4461,7 +4461,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) return handle_machine_check(vcpu); if (is_nmi(intr_info)) - return 1; /* already handled by vmx_vcpu_run() */ + return 1; /* handled by vmx_handle_events_irqs_disabled() */ if (is_invalid_opcode(intr_info)) return handle_ud(vcpu); @@ -6099,11 +6099,8 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); } -static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) +static void vmx_handle_exception_nmi_irqs_disabled(struct vcpu_vmx *vmx) { - if (vmx->exit_reason != EXIT_REASON_EXCEPTION_NMI) - return; - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); /* if exit due to PF check for async PF */ @@ -6131,9 +6128,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) #endif u32 intr_info; - if (to_vmx(vcpu)->exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT) - return; - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); if (WARN_ONCE(!is_external_intr(intr_info), "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) @@ -6169,6 +6163,16 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) } STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); +static void vmx_handle_events_irqs_disabled(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI) + vmx_handle_exception_nmi_irqs_disabled(vmx); + else if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) + vmx_handle_external_intr(vcpu); +} + static bool vmx_has_emulated_msr(int index) { switch (index) { @@ -6533,7 +6537,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->launched = 1; vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - vmx_complete_atomic_exit(vmx); vmx_recover_nmi_blocking(vmx); vmx_complete_interrupts(vmx); } @@ -7708,7 +7711,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .set_tdp_cr3 = vmx_set_cr3, .check_intercept = vmx_check_intercept, - .handle_external_intr = vmx_handle_external_intr, + .handle_events_irqs_disabled = vmx_handle_events_irqs_disabled, .mpx_supported = vmx_mpx_supported, .xsaves_supported = vmx_xsaves_supported, .umip_emulated = vmx_umip_emulated, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7aa002b12f25..82d320f42b1d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7945,7 +7945,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - kvm_x86_ops->handle_external_intr(vcpu); + kvm_x86_ops->handle_events_irqs_disabled(vcpu); ++vcpu->stat.exits;
Per commit 1b6269db3f833 ("KVM: VMX: Handle NMIs before enabling interrupts and preemption"), NMIs are handled directly in vmx_vcpu_run() to "make sure we handle NMI on the current cpu, and that we don't service maskable interrupts before non-maskable ones". The other exceptions handled by complete_atomic_exit(), e.g. async #PF and #MC, have similar requirements, and are located there to avoid extra VMREADs since VMX bins hardware exceptions and NMIs into a single exit reason. Clean up the code and eliminate the vaguely named complete_atomic_exit() by moving the interrupts-disabled exception and NMI handling into the existing handle_external_intrs() callback, and rename the callback to a more appropriate name. In addition to improving code readability, this also ensures the NMI handler is run with the host's debug registers loaded in the unlikely event that the user is debugging NMIs. Accuracy of the last_guest_tsc field is also improved when handling NMIs (and #MCs) as the handler will run after updating said field. Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx/vmx.c | 25 ++++++++++++++----------- arch/x86/kvm/x86.c | 2 +- 4 files changed, 18 insertions(+), 15 deletions(-)