diff mbox

[v3,1/4] KVM: nVMX: Rework interception of IRQs and NMIs

Message ID c5b0a3c53c6d19092d1313fc7a6e807031ea9054.1394218994.git.jan.kiszka@siemens.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Kiszka March 7, 2014, 7:03 p.m. UTC
Move the check for leaving L2 on pending and intercepted IRQs or NMIs
from the *_allowed handler into a dedicated callback. Invoke this
callback at the relevant points before KVM checks if IRQs/NMIs can be
injected. The callback has the task to switch from L2 to L1 if needed
and inject the proper vmexit events.

The rework fixes L2 wakeups from HLT and provides the foundation for
preemption timer emulation.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/vmx.c              | 67 +++++++++++++++++++++++------------------
 arch/x86/kvm/x86.c              | 26 +++++++++++-----
 3 files changed, 59 insertions(+), 36 deletions(-)

Comments

Paolo Bonzini March 7, 2014, 7:48 p.m. UTC | #1
Il 07/03/2014 20:03, Jan Kiszka ha scritto:
> @@ -4631,22 +4631,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
>
>  static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>  {
> -	if (is_guest_mode(vcpu)) {
> -		if (to_vmx(vcpu)->nested.nested_run_pending)
> -			return 0;
> -		if (nested_exit_on_nmi(vcpu)) {
> -			nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
> -					  NMI_VECTOR | INTR_TYPE_NMI_INTR |
> -					  INTR_INFO_VALID_MASK, 0);
> -			/*
> -			 * The NMI-triggered VM exit counts as injection:
> -			 * clear this one and block further NMIs.
> -			 */
> -			vcpu->arch.nmi_pending = 0;
> -			vmx_set_nmi_mask(vcpu, true);
> -			return 0;
> -		}
> -	}
> +	if (to_vmx(vcpu)->nested.nested_run_pending)
> +		return 0;
>
>  	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
>  		return 0;
> @@ -4658,19 +4644,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>
>  static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
>  {
> -	if (is_guest_mode(vcpu)) {
> -		if (to_vmx(vcpu)->nested.nested_run_pending)
> -			return 0;
> -		if (nested_exit_on_intr(vcpu)) {
> -			nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
> -					  0, 0);
> -			/*
> -			 * fall through to normal code, but now in L1, not L2
> -			 */
> -		}
> -	}
> -
> -	return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
> +	return (!to_vmx(vcpu)->nested.nested_run_pending &&
> +		vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>  		!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
>  			(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));

The checks on nested_run_pending are not needed anymore and can be 
replaced with a WARN_ON.  Otherwise,

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka March 8, 2014, 9:21 a.m. UTC | #2
On 2014-03-07 20:48, Paolo Bonzini wrote:
> Il 07/03/2014 20:03, Jan Kiszka ha scritto:
>> @@ -4631,22 +4631,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu
>> *vcpu, bool masked)
>>
>>  static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>>  {
>> -    if (is_guest_mode(vcpu)) {
>> -        if (to_vmx(vcpu)->nested.nested_run_pending)
>> -            return 0;
>> -        if (nested_exit_on_nmi(vcpu)) {
>> -            nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
>> -                      NMI_VECTOR | INTR_TYPE_NMI_INTR |
>> -                      INTR_INFO_VALID_MASK, 0);
>> -            /*
>> -             * The NMI-triggered VM exit counts as injection:
>> -             * clear this one and block further NMIs.
>> -             */
>> -            vcpu->arch.nmi_pending = 0;
>> -            vmx_set_nmi_mask(vcpu, true);
>> -            return 0;
>> -        }
>> -    }
>> +    if (to_vmx(vcpu)->nested.nested_run_pending)
>> +        return 0;
>>
>>      if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
>>          return 0;
>> @@ -4658,19 +4644,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>>
>>  static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
>>  {
>> -    if (is_guest_mode(vcpu)) {
>> -        if (to_vmx(vcpu)->nested.nested_run_pending)
>> -            return 0;
>> -        if (nested_exit_on_intr(vcpu)) {
>> -            nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
>> -                      0, 0);
>> -            /*
>> -             * fall through to normal code, but now in L1, not L2
>> -             */
>> -        }
>> -    }
>> -
>> -    return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>> +    return (!to_vmx(vcpu)->nested.nested_run_pending &&
>> +        vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>          !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
>>              (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
> 
> The checks on nested_run_pending are not needed anymore and can be
> replaced with a WARN_ON.  Otherwise,

Nope, that won't be correct: If we have a pending interrupt that L1 does
not intercept, we still trigger this condition legally.

Jan

> 
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> Paolo
> -- 
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Paolo Bonzini March 9, 2014, 7:33 a.m. UTC | #3
Il 08/03/2014 10:21, Jan Kiszka ha scritto:
> On 2014-03-07 20:48, Paolo Bonzini wrote:
>> Il 07/03/2014 20:03, Jan Kiszka ha scritto:
>>> @@ -4631,22 +4631,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu
>>> *vcpu, bool masked)
>>>
>>>  static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>>>  {
>>> -    if (is_guest_mode(vcpu)) {
>>> -        if (to_vmx(vcpu)->nested.nested_run_pending)
>>> -            return 0;
>>> -        if (nested_exit_on_nmi(vcpu)) {
>>> -            nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
>>> -                      NMI_VECTOR | INTR_TYPE_NMI_INTR |
>>> -                      INTR_INFO_VALID_MASK, 0);
>>> -            /*
>>> -             * The NMI-triggered VM exit counts as injection:
>>> -             * clear this one and block further NMIs.
>>> -             */
>>> -            vcpu->arch.nmi_pending = 0;
>>> -            vmx_set_nmi_mask(vcpu, true);
>>> -            return 0;
>>> -        }
>>> -    }
>>> +    if (to_vmx(vcpu)->nested.nested_run_pending)
>>> +        return 0;
>>>
>>>      if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
>>>          return 0;
>>> @@ -4658,19 +4644,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>>>
>>>  static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
>>>  {
>>> -    if (is_guest_mode(vcpu)) {
>>> -        if (to_vmx(vcpu)->nested.nested_run_pending)
>>> -            return 0;
>>> -        if (nested_exit_on_intr(vcpu)) {
>>> -            nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
>>> -                      0, 0);
>>> -            /*
>>> -             * fall through to normal code, but now in L1, not L2
>>> -             */
>>> -        }
>>> -    }
>>> -
>>> -    return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>> +    return (!to_vmx(vcpu)->nested.nested_run_pending &&
>>> +        vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>>          !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
>>>              (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
>>
>> The checks on nested_run_pending are not needed anymore and can be
>> replaced with a WARN_ON.  Otherwise,
>
> Nope, that won't be correct: If we have a pending interrupt that L1 does
> not intercept, we still trigger this condition legally.

Right, this is the case of !nested_exit_on_intr(vcpu) or 
!nested_exit_on_nmi(vcpu).

Why don't we need to request an immediate exit in that case, in order to 
inject the interrupt into L2?

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka March 9, 2014, 8:01 a.m. UTC | #4
On 2014-03-09 08:33, Paolo Bonzini wrote:
> Il 08/03/2014 10:21, Jan Kiszka ha scritto:
>> On 2014-03-07 20:48, Paolo Bonzini wrote:
>>> Il 07/03/2014 20:03, Jan Kiszka ha scritto:
>>>> @@ -4631,22 +4631,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu
>>>> *vcpu, bool masked)
>>>>
>>>>  static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>>>>  {
>>>> -    if (is_guest_mode(vcpu)) {
>>>> -        if (to_vmx(vcpu)->nested.nested_run_pending)
>>>> -            return 0;
>>>> -        if (nested_exit_on_nmi(vcpu)) {
>>>> -            nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
>>>> -                      NMI_VECTOR | INTR_TYPE_NMI_INTR |
>>>> -                      INTR_INFO_VALID_MASK, 0);
>>>> -            /*
>>>> -             * The NMI-triggered VM exit counts as injection:
>>>> -             * clear this one and block further NMIs.
>>>> -             */
>>>> -            vcpu->arch.nmi_pending = 0;
>>>> -            vmx_set_nmi_mask(vcpu, true);
>>>> -            return 0;
>>>> -        }
>>>> -    }
>>>> +    if (to_vmx(vcpu)->nested.nested_run_pending)
>>>> +        return 0;
>>>>
>>>>      if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
>>>>          return 0;
>>>> @@ -4658,19 +4644,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu
>>>> *vcpu)
>>>>
>>>>  static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
>>>>  {
>>>> -    if (is_guest_mode(vcpu)) {
>>>> -        if (to_vmx(vcpu)->nested.nested_run_pending)
>>>> -            return 0;
>>>> -        if (nested_exit_on_intr(vcpu)) {
>>>> -            nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
>>>> -                      0, 0);
>>>> -            /*
>>>> -             * fall through to normal code, but now in L1, not L2
>>>> -             */
>>>> -        }
>>>> -    }
>>>> -
>>>> -    return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>>> +    return (!to_vmx(vcpu)->nested.nested_run_pending &&
>>>> +        vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>>>          !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
>>>>              (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
>>>
>>> The checks on nested_run_pending are not needed anymore and can be
>>> replaced with a WARN_ON.  Otherwise,
>>
>> Nope, that won't be correct: If we have a pending interrupt that L1 does
>> not intercept, we still trigger this condition legally.
> 
> Right, this is the case of !nested_exit_on_intr(vcpu) or
> !nested_exit_on_nmi(vcpu).
> 
> Why don't we need to request an immediate exit in that case, in order to
> inject the interrupt into L2?

We enable the hardware interrupt/NMI window request for L2 instead.

Jan
Paolo Bonzini March 9, 2014, 8:03 a.m. UTC | #5
Il 09/03/2014 08:33, Paolo Bonzini ha scritto:
> Il 08/03/2014 10:21, Jan Kiszka ha scritto:
>> On 2014-03-07 20:48, Paolo Bonzini wrote:
>>> Il 07/03/2014 20:03, Jan Kiszka ha scritto:
>>>> @@ -4631,22 +4631,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu
>>>> *vcpu, bool masked)
>>>>
>>>>  static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>>>>  {
>>>> -    if (is_guest_mode(vcpu)) {
>>>> -        if (to_vmx(vcpu)->nested.nested_run_pending)
>>>> -            return 0;
>>>> -        if (nested_exit_on_nmi(vcpu)) {
>>>> -            nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
>>>> -                      NMI_VECTOR | INTR_TYPE_NMI_INTR |
>>>> -                      INTR_INFO_VALID_MASK, 0);
>>>> -            /*
>>>> -             * The NMI-triggered VM exit counts as injection:
>>>> -             * clear this one and block further NMIs.
>>>> -             */
>>>> -            vcpu->arch.nmi_pending = 0;
>>>> -            vmx_set_nmi_mask(vcpu, true);
>>>> -            return 0;
>>>> -        }
>>>> -    }
>>>> +    if (to_vmx(vcpu)->nested.nested_run_pending)
>>>> +        return 0;
>>>>
>>>>      if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
>>>>          return 0;
>>>> @@ -4658,19 +4644,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu
>>>> *vcpu)
>>>>
>>>>  static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
>>>>  {
>>>> -    if (is_guest_mode(vcpu)) {
>>>> -        if (to_vmx(vcpu)->nested.nested_run_pending)
>>>> -            return 0;
>>>> -        if (nested_exit_on_intr(vcpu)) {
>>>> -            nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
>>>> -                      0, 0);
>>>> -            /*
>>>> -             * fall through to normal code, but now in L1, not L2
>>>> -             */
>>>> -        }
>>>> -    }
>>>> -
>>>> -    return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>>> +    return (!to_vmx(vcpu)->nested.nested_run_pending &&
>>>> +        vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>>>          !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
>>>>              (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
>>>
>>> The checks on nested_run_pending are not needed anymore and can be
>>> replaced with a WARN_ON.  Otherwise,
>>
>> Nope, that won't be correct: If we have a pending interrupt that L1 does
>> not intercept, we still trigger this condition legally.
>
> Right, this is the case of !nested_exit_on_intr(vcpu) or
> !nested_exit_on_nmi(vcpu).
>
> Why don't we need to request an immediate exit in that case, in order to
> inject the interrupt into L2?

Nevermind, this makes no sense.  I was confusing *_allowed with 
enable_*_window.

Applying v3 to kvm/queue, thanks!

Paolo

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka March 9, 2014, 8:07 a.m. UTC | #6
On 2014-03-09 09:03, Paolo Bonzini wrote:
> Il 09/03/2014 08:33, Paolo Bonzini ha scritto:
>> Il 08/03/2014 10:21, Jan Kiszka ha scritto:
>>> On 2014-03-07 20:48, Paolo Bonzini wrote:
>>>> Il 07/03/2014 20:03, Jan Kiszka ha scritto:
>>>>> @@ -4631,22 +4631,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu
>>>>> *vcpu, bool masked)
>>>>>
>>>>>  static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>>>>>  {
>>>>> -    if (is_guest_mode(vcpu)) {
>>>>> -        if (to_vmx(vcpu)->nested.nested_run_pending)
>>>>> -            return 0;
>>>>> -        if (nested_exit_on_nmi(vcpu)) {
>>>>> -            nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
>>>>> -                      NMI_VECTOR | INTR_TYPE_NMI_INTR |
>>>>> -                      INTR_INFO_VALID_MASK, 0);
>>>>> -            /*
>>>>> -             * The NMI-triggered VM exit counts as injection:
>>>>> -             * clear this one and block further NMIs.
>>>>> -             */
>>>>> -            vcpu->arch.nmi_pending = 0;
>>>>> -            vmx_set_nmi_mask(vcpu, true);
>>>>> -            return 0;
>>>>> -        }
>>>>> -    }
>>>>> +    if (to_vmx(vcpu)->nested.nested_run_pending)
>>>>> +        return 0;
>>>>>
>>>>>      if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
>>>>>          return 0;
>>>>> @@ -4658,19 +4644,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu
>>>>> *vcpu)
>>>>>
>>>>>  static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
>>>>>  {
>>>>> -    if (is_guest_mode(vcpu)) {
>>>>> -        if (to_vmx(vcpu)->nested.nested_run_pending)
>>>>> -            return 0;
>>>>> -        if (nested_exit_on_intr(vcpu)) {
>>>>> -            nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
>>>>> -                      0, 0);
>>>>> -            /*
>>>>> -             * fall through to normal code, but now in L1, not L2
>>>>> -             */
>>>>> -        }
>>>>> -    }
>>>>> -
>>>>> -    return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>>>> +    return (!to_vmx(vcpu)->nested.nested_run_pending &&
>>>>> +        vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>>>>          !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
>>>>>              (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
>>>>
>>>> The checks on nested_run_pending are not needed anymore and can be
>>>> replaced with a WARN_ON.  Otherwise,
>>>
>>> Nope, that won't be correct: If we have a pending interrupt that L1 does
>>> not intercept, we still trigger this condition legally.
>>
>> Right, this is the case of !nested_exit_on_intr(vcpu) or
>> !nested_exit_on_nmi(vcpu).
>>
>> Why don't we need to request an immediate exit in that case, in order to
>> inject the interrupt into L2?
> 
> Nevermind, this makes no sense.  I was confusing *_allowed with
> enable_*_window.

This code is mind-blowing and probably still not perfect. I wouldn't be
surprised if we are going to find bugs there until we retire. ;)

> 
> Applying v3 to kvm/queue, thanks!
> 

Great, thank you!
Jan
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e714f8c..84f7373 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -765,6 +765,8 @@  struct kvm_x86_ops {
 			       enum x86_intercept_stage stage);
 	void (*handle_external_intr)(struct kvm_vcpu *vcpu);
 	bool (*mpx_supported)(void);
+
+	int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 83ee24f..d66f739 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4631,22 +4631,8 @@  static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 
 static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
 {
-	if (is_guest_mode(vcpu)) {
-		if (to_vmx(vcpu)->nested.nested_run_pending)
-			return 0;
-		if (nested_exit_on_nmi(vcpu)) {
-			nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
-					  NMI_VECTOR | INTR_TYPE_NMI_INTR |
-					  INTR_INFO_VALID_MASK, 0);
-			/*
-			 * The NMI-triggered VM exit counts as injection:
-			 * clear this one and block further NMIs.
-			 */
-			vcpu->arch.nmi_pending = 0;
-			vmx_set_nmi_mask(vcpu, true);
-			return 0;
-		}
-	}
+	if (to_vmx(vcpu)->nested.nested_run_pending)
+		return 0;
 
 	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
 		return 0;
@@ -4658,19 +4644,8 @@  static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
 
 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
-	if (is_guest_mode(vcpu)) {
-		if (to_vmx(vcpu)->nested.nested_run_pending)
-			return 0;
-		if (nested_exit_on_intr(vcpu)) {
-			nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
-					  0, 0);
-			/*
-			 * fall through to normal code, but now in L1, not L2
-			 */
-		}
-	}
-
-	return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+	return (!to_vmx(vcpu)->nested.nested_run_pending &&
+		vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
 		!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
 			(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
 }
@@ -8172,6 +8147,35 @@  static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
 	}
 }
 
+static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
+		if (vmx->nested.nested_run_pending)
+			return -EBUSY;
+		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
+				  NMI_VECTOR | INTR_TYPE_NMI_INTR |
+				  INTR_INFO_VALID_MASK, 0);
+		/*
+		 * The NMI-triggered VM exit counts as injection:
+		 * clear this one and block further NMIs.
+		 */
+		vcpu->arch.nmi_pending = 0;
+		vmx_set_nmi_mask(vcpu, true);
+		return 0;
+	}
+
+	if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
+	    nested_exit_on_intr(vcpu)) {
+		if (vmx->nested.nested_run_pending)
+			return -EBUSY;
+		nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
+	}
+
+	return 0;
+}
+
 /*
  * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
  * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -8512,6 +8516,9 @@  static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 		nested_vmx_succeed(vcpu);
 	if (enable_shadow_vmcs)
 		vmx->nested.sync_shadow_vmcs = true;
+
+	/* in case we halted in L2 */
+	vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 }
 
 /*
@@ -8652,6 +8659,8 @@  static struct kvm_x86_ops vmx_x86_ops = {
 	.check_intercept = vmx_check_intercept,
 	.handle_external_intr = vmx_handle_external_intr,
 	.mpx_supported = vmx_mpx_supported,
+
+	.check_nested_events = vmx_check_nested_events,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4cca458..9ec2f26 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5788,8 +5788,10 @@  static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
 }
 
-static void inject_pending_event(struct kvm_vcpu *vcpu)
+static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 {
+	int r;
+
 	/* try to reinject previous events if any */
 	if (vcpu->arch.exception.pending) {
 		trace_kvm_inj_exception(vcpu->arch.exception.nr,
@@ -5799,17 +5801,23 @@  static void inject_pending_event(struct kvm_vcpu *vcpu)
 					  vcpu->arch.exception.has_error_code,
 					  vcpu->arch.exception.error_code,
 					  vcpu->arch.exception.reinject);
-		return;
+		return 0;
 	}
 
 	if (vcpu->arch.nmi_injected) {
 		kvm_x86_ops->set_nmi(vcpu);
-		return;
+		return 0;
 	}
 
 	if (vcpu->arch.interrupt.pending) {
 		kvm_x86_ops->set_irq(vcpu);
-		return;
+		return 0;
+	}
+
+	if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
+		r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
+		if (r != 0)
+			return r;
 	}
 
 	/* try to inject new event if pending */
@@ -5826,6 +5834,7 @@  static void inject_pending_event(struct kvm_vcpu *vcpu)
 			kvm_x86_ops->set_irq(vcpu);
 		}
 	}
+	return 0;
 }
 
 static void process_nmi(struct kvm_vcpu *vcpu)
@@ -5930,10 +5939,10 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			goto out;
 		}
 
-		inject_pending_event(vcpu);
-
+		if (inject_pending_event(vcpu, req_int_win) != 0)
+			req_immediate_exit = true;
 		/* enable NMI/IRQ window open exits if needed */
-		if (vcpu->arch.nmi_pending)
+		else if (vcpu->arch.nmi_pending)
 			req_immediate_exit =
 				kvm_x86_ops->enable_nmi_window(vcpu) != 0;
 		else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
@@ -7254,6 +7263,9 @@  void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
+	if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
+		kvm_x86_ops->check_nested_events(vcpu, false);
+
 	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
 		!vcpu->arch.apf.halted)
 		|| !list_empty_careful(&vcpu->async_pf.done)