diff mbox series

[5/5] KVM: VMX: Handle NMIs, #MCs and async #PFs in common irqs-disabled fn

Message ID 20190420055059.16816-6-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: VMX: INTR, NMI and #MC cleanup | expand

Commit Message

Sean Christopherson April 20, 2019, 5:50 a.m. UTC
Per commit 1b6269db3f833 ("KVM: VMX: Handle NMIs before enabling
interrupts and preemption"), NMIs are handled directly in vmx_vcpu_run()
to "make sure we handle NMI on the current cpu, and that we don't
service maskable interrupts before non-maskable ones".  The other
exceptions handled by complete_atomic_exit(), e.g. async #PF and #MC,
have similar requirements, and are located there to avoid extra VMREADs
since VMX bins hardware exceptions and NMIs into a single exit reason.

Clean up the code and eliminate the vaguely named complete_atomic_exit()
by moving the interrupts-disabled exception and NMI handling into the
existing handle_external_intrs() callback, and rename the callback to
a more appropriate name.

In addition to improving code readability, this also ensures the NMI
handler is run with the host's debug registers loaded in the unlikely
event that the user is debugging NMIs.  Accuracy of the last_guest_tsc
field is also improved when handling NMIs (and #MCs) as the handler
will run after updating said field.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/svm.c              |  4 ++--
 arch/x86/kvm/vmx/vmx.c          | 25 ++++++++++++++-----------
 arch/x86/kvm/x86.c              |  2 +-
 4 files changed, 18 insertions(+), 15 deletions(-)

Comments

Paolo Bonzini June 6, 2019, 1:20 p.m. UTC | #1
On 20/04/19 07:50, Sean Christopherson wrote:
> Per commit 1b6269db3f833 ("KVM: VMX: Handle NMIs before enabling
> interrupts and preemption"), NMIs are handled directly in vmx_vcpu_run()
> to "make sure we handle NMI on the current cpu, and that we don't
> service maskable interrupts before non-maskable ones".  The other
> exceptions handled by complete_atomic_exit(), e.g. async #PF and #MC,
> have similar requirements, and are located there to avoid extra VMREADs
> since VMX bins hardware exceptions and NMIs into a single exit reason.
> 
> Clean up the code and eliminate the vaguely named complete_atomic_exit()
> by moving the interrupts-disabled exception and NMI handling into the
> existing handle_external_intrs() callback, and rename the callback to
> a more appropriate name.
> 
> In addition to improving code readability, this also ensures the NMI
> handler is run with the host's debug registers loaded in the unlikely
> event that the user is debugging NMIs.  Accuracy of the last_guest_tsc
> field is also improved when handling NMIs (and #MCs) as the handler
> will run after updating said field.
> 
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>

Very nice, just some changes I'd like to propose. "atomic" is Linux 
lingo for "irqs disabled", so I'd like to rename the handler to 
handle_exit_atomic so it has a correspondance with handle_exit.  
Likewise we could have handle_exception_nmi_atomic and 
handle_external_interrupt_atomic.

Putting everything together we get:

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 35e7937cc9ac..b7d5935c1637 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1117,7 +1117,7 @@ struct kvm_x86_ops {
 	int (*check_intercept)(struct kvm_vcpu *vcpu,
 			       struct x86_instruction_info *info,
 			       enum x86_intercept_stage stage);
-	void (*handle_external_intr)(struct kvm_vcpu *vcpu);
+	void (*handle_exit_atomic)(struct kvm_vcpu *vcpu);
 	bool (*mpx_supported)(void);
 	bool (*xsaves_supported)(void);
 	bool (*umip_emulated)(void);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index acc09e9fc173..9c6458e60558 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -6172,7 +6172,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
 	return ret;
 }
 
-static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
+static void svm_handle_exit_atomic(struct kvm_vcpu *vcpu)
 {
 	kvm_before_interrupt(vcpu);
 	local_irq_enable();
@@ -7268,7 +7268,7 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
 	.set_tdp_cr3 = set_tdp_cr3,
 
 	.check_intercept = svm_check_intercept,
-	.handle_external_intr = svm_handle_external_intr,
+	.handle_exit_atomic = svm_handle_exit_atomic,
 
 	.request_immediate_exit = __kvm_request_immediate_exit,
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 963c8c409223..dfaa770b9bb3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4437,11 +4437,11 @@ static void kvm_machine_check(void)
 
 static int handle_machine_check(struct kvm_vcpu *vcpu)
 {
-	/* already handled by vcpu_run */
+	/* handled by vmx_vcpu_run() */
 	return 1;
 }
 
-static int handle_exception(struct kvm_vcpu *vcpu)
+static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct kvm_run *kvm_run = vcpu->run;
@@ -4454,7 +4454,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 	intr_info = vmx->exit_intr_info;
 
 	if (is_machine_check(intr_info) || is_nmi(intr_info))
-		return 1;  /* already handled by vmx_complete_atomic_exit */
+		return 1; /* handled by handle_exception_nmi_atomic() */
 
 	if (is_invalid_opcode(intr_info))
 		return handle_ud(vcpu);
@@ -5462,7 +5462,7 @@ static int handle_encls(struct kvm_vcpu *vcpu)
  * to be done to userspace and return 0.
  */
 static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
-	[EXIT_REASON_EXCEPTION_NMI]           = handle_exception,
+	[EXIT_REASON_EXCEPTION_NMI]           = handle_exception_nmi,
 	[EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,
 	[EXIT_REASON_TRIPLE_FAULT]            = handle_triple_fault,
 	[EXIT_REASON_NMI_WINDOW]	      = handle_nmi_window,
@@ -6100,11 +6100,8 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
 	memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
 }
 
-static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
+static void handle_exception_nmi_atomic(struct vcpu_vmx *vmx)
 {
-	if (vmx->exit_reason != EXIT_REASON_EXCEPTION_NMI)
-		return;
-
 	vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 
 	/* if exit due to PF check for async PF */
@@ -6123,7 +6120,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 	}
 }
 
-static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
+static void handle_external_interrupt_atomic(struct kvm_vcpu *vcpu)
 {
 	unsigned int vector;
 	unsigned long entry;
@@ -6133,9 +6130,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 	gate_desc *desc;
 	u32 intr_info;
 
-	if (to_vmx(vcpu)->exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT)
-		return;
-
 	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 	if (WARN_ONCE(!is_external_intr(intr_info),
 	    "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
@@ -6170,7 +6164,17 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 
 	kvm_after_interrupt(vcpu);
 }
-STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
+STACK_FRAME_NON_STANDARD(handle_external_interrupt_atomic);
+
+static void vmx_handle_exit_atomic(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
+		handle_external_interrupt_atomic(vcpu);
+	else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
+		handle_exception_nmi_atomic(vmx);
+}
 
 static bool vmx_has_emulated_msr(int index)
 {
@@ -6540,7 +6544,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx->loaded_vmcs->launched = 1;
 	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 
-	vmx_complete_atomic_exit(vmx);
 	vmx_recover_nmi_blocking(vmx);
 	vmx_complete_interrupts(vmx);
 }
@@ -7694,7 +7697,7 @@ static __exit void hardware_unsetup(void)
 	.set_tdp_cr3 = vmx_set_cr3,
 
 	.check_intercept = vmx_check_intercept,
-	.handle_external_intr = vmx_handle_external_intr,
+	.handle_exit_atomic = vmx_handle_exit_atomic,
 	.mpx_supported = vmx_mpx_supported,
 	.xsaves_supported = vmx_xsaves_supported,
 	.umip_emulated = vmx_umip_emulated,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6e2f53cd8ea8..88489af13e96 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7999,7 +7999,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	smp_wmb();
 
-	kvm_x86_ops->handle_external_intr(vcpu);
+	kvm_x86_ops->handle_exit_atomic(vcpu);
 
 	++vcpu->stat.exits;
Sean Christopherson June 6, 2019, 3:14 p.m. UTC | #2
On Thu, Jun 06, 2019 at 03:20:49PM +0200, Paolo Bonzini wrote:
> On 20/04/19 07:50, Sean Christopherson wrote:
> > Per commit 1b6269db3f833 ("KVM: VMX: Handle NMIs before enabling
> > interrupts and preemption"), NMIs are handled directly in vmx_vcpu_run()
> > to "make sure we handle NMI on the current cpu, and that we don't
> > service maskable interrupts before non-maskable ones".  The other
> > exceptions handled by complete_atomic_exit(), e.g. async #PF and #MC,
> > have similar requirements, and are located there to avoid extra VMREADs
> > since VMX bins hardware exceptions and NMIs into a single exit reason.
> > 
> > Clean up the code and eliminate the vaguely named complete_atomic_exit()
> > by moving the interrupts-disabled exception and NMI handling into the
> > existing handle_external_intrs() callback, and rename the callback to
> > a more appropriate name.
> > 
> > In addition to improving code readability, this also ensures the NMI
> > handler is run with the host's debug registers loaded in the unlikely
> > event that the user is debugging NMIs.  Accuracy of the last_guest_tsc
> > field is also improved when handling NMIs (and #MCs) as the handler
> > will run after updating said field.
> > 
> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> 
> Very nice, just some changes I'd like to propose. "atomic" is Linux 
> lingo for "irqs disabled", so I'd like to rename the handler to 

The code disagrees, e.g.

  /*
   * Are we running in atomic context?  WARNING: this macro cannot
   * always detect atomic context; in particular, it cannot know about
   * held spinlocks in non-preemptible kernels.  Thus it should not be
   * used in the general case to determine whether sleeping is possible.
   * Do not use in_atomic() in driver code.
   */
  #define in_atomic()	(preempt_count() != 0)

and

  void ___might_sleep(...)
  {
	...

	printk(KERN_ERR
		"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
			in_atomic(), irqs_disabled(),
			current->pid, current->comm);
  }

and

  static inline void *kmap_atomic(struct page *page)
  {
	preempt_disable();
	pagefault_disable();
	return page_address(page);
  }

My interpretation of things is that the kernel's definition of an atomic
context is with respect to preemption.  Disabling IRQs would also provide
atomicity, but the reverse is not true, i.e. entering an atomic context
does not imply IRQs are disabled.

As it pertains to KVM, we specifically care about IRQs being disabled,
e.g. VMX needs to ensure #MC and NMI are handled before any pending IRQs,
and both VMX and SVM need to ensure a pending perf interrupt is handled
in the callback.

And if "atomic" is interpreted as "IRQs disabled", one could argue that
the SVM behavior is buggy since enabling IRQs would break atomicity.

> handle_exit_atomic so it has a correspondance with handle_exit.  
> Likewise we could have handle_exception_nmi_atomic and 
> handle_external_interrupt_atomic.
Paolo Bonzini June 7, 2019, 11:40 a.m. UTC | #3
On 06/06/19 17:14, Sean Christopherson wrote:
> The code disagrees, e.g.
> 
>   /*
>    * Are we running in atomic context?  WARNING: this macro cannot
>    * always detect atomic context; in particular, it cannot know about
>    * held spinlocks in non-preemptible kernels.  Thus it should not be
>    * used in the general case to determine whether sleeping is possible.
>    * Do not use in_atomic() in driver code.
>    */
>   #define in_atomic()	(preempt_count() != 0)

You're totally right.  "_irqoff" seems to be the common suffix for
irq-disabled functions.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8d68ba0cba0c..cd60c3ae7f66 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1109,7 +1109,7 @@  struct kvm_x86_ops {
 	int (*check_intercept)(struct kvm_vcpu *vcpu,
 			       struct x86_instruction_info *info,
 			       enum x86_intercept_stage stage);
-	void (*handle_external_intr)(struct kvm_vcpu *vcpu);
+	void (*handle_events_irqs_disabled)(struct kvm_vcpu *vcpu);
 	bool (*mpx_supported)(void);
 	bool (*xsaves_supported)(void);
 	bool (*umip_emulated)(void);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 38e1c7d382a1..e117058eba87 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -6160,7 +6160,7 @@  static int svm_check_intercept(struct kvm_vcpu *vcpu,
 	return ret;
 }
 
-static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
+static void svm_handle_events_irqs_disabled(struct kvm_vcpu *vcpu)
 {
 	kvm_before_interrupt(vcpu);
 	local_irq_enable();
@@ -7256,7 +7256,7 @@  static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.set_tdp_cr3 = set_tdp_cr3,
 
 	.check_intercept = svm_check_intercept,
-	.handle_external_intr = svm_handle_external_intr,
+	.handle_events_irqs_disabled = svm_handle_events_irqs_disabled,
 
 	.request_immediate_exit = __kvm_request_immediate_exit,
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1fbd5a5dd6af..9b580749217f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4441,7 +4441,7 @@  static void kvm_machine_check(void)
 
 static int handle_machine_check(struct kvm_vcpu *vcpu)
 {
-	/* already handled by vcpu_run */
+	/* handled by vmx_handle_events_irqs_disabled() */
 	return 1;
 }
 
@@ -4461,7 +4461,7 @@  static int handle_exception(struct kvm_vcpu *vcpu)
 		return handle_machine_check(vcpu);
 
 	if (is_nmi(intr_info))
-		return 1;  /* already handled by vmx_vcpu_run() */
+		return 1; /* handled by vmx_handle_events_irqs_disabled() */
 
 	if (is_invalid_opcode(intr_info))
 		return handle_ud(vcpu);
@@ -6099,11 +6099,8 @@  static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
 	memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
 }
 
-static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
+static void vmx_handle_exception_nmi_irqs_disabled(struct vcpu_vmx *vmx)
 {
-	if (vmx->exit_reason != EXIT_REASON_EXCEPTION_NMI)
-		return;
-
 	vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 
 	/* if exit due to PF check for async PF */
@@ -6131,9 +6128,6 @@  static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 #endif
 	u32 intr_info;
 
-	if (to_vmx(vcpu)->exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT)
-		return;
-
 	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 	if (WARN_ONCE(!is_external_intr(intr_info),
 	    "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
@@ -6169,6 +6163,16 @@  static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 }
 STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
 
+static void vmx_handle_events_irqs_disabled(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
+		vmx_handle_exception_nmi_irqs_disabled(vmx);
+	else if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
+		vmx_handle_external_intr(vcpu);
+}
+
 static bool vmx_has_emulated_msr(int index)
 {
 	switch (index) {
@@ -6533,7 +6537,6 @@  static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx->loaded_vmcs->launched = 1;
 	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 
-	vmx_complete_atomic_exit(vmx);
 	vmx_recover_nmi_blocking(vmx);
 	vmx_complete_interrupts(vmx);
 }
@@ -7708,7 +7711,7 @@  static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.set_tdp_cr3 = vmx_set_cr3,
 
 	.check_intercept = vmx_check_intercept,
-	.handle_external_intr = vmx_handle_external_intr,
+	.handle_events_irqs_disabled = vmx_handle_events_irqs_disabled,
 	.mpx_supported = vmx_mpx_supported,
 	.xsaves_supported = vmx_xsaves_supported,
 	.umip_emulated = vmx_umip_emulated,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7aa002b12f25..82d320f42b1d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7945,7 +7945,7 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	smp_wmb();
 
-	kvm_x86_ops->handle_external_intr(vcpu);
+	kvm_x86_ops->handle_events_irqs_disabled(vcpu);
 
 	++vcpu->stat.exits;