diff mbox

[v4,5/6] KVM: nVMX: Fix conditions for NMI injection

Message ID 523b4691a695525a22102c496a078fda6a13d392.1365934368.git.jan.kiszka@web.de (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Kiszka April 14, 2013, 10:12 a.m. UTC
From: Jan Kiszka <jan.kiszka@siemens.com>

The logic for checking if interrupts can be injected has to be applied
also on NMIs. The difference is that if NMI interception is on these
events are consumed and blocked by the VM exit.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/vmx.c |   28 ++++++++++++++++++++++++++++
 1 files changed, 28 insertions(+), 0 deletions(-)

Comments

Gleb Natapov April 14, 2013, 3:23 p.m. UTC | #1
On Sun, Apr 14, 2013 at 12:12:49PM +0200, Jan Kiszka wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
> 
> The logic for checking if interrupts can be injected has to be applied
> also on NMIs. The difference is that if NMI interception is on these
> events are consumed and blocked by the VM exit.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  arch/x86/kvm/vmx.c |   28 ++++++++++++++++++++++++++++
>  1 files changed, 28 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 56e7519..ad9b4bc 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -4190,6 +4190,12 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
>  		PIN_BASED_EXT_INTR_MASK;
>  }
>  
> +static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
> +{
> +	return get_vmcs12(vcpu)->pin_based_vm_exec_control &
> +		PIN_BASED_NMI_EXITING;
> +}
> +
>  static void enable_irq_window(struct kvm_vcpu *vcpu)
>  {
>  	u32 cpu_based_vm_exec_control;
> @@ -4315,6 +4321,28 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
>  
>  static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>  {
> +	if (is_guest_mode(vcpu)) {
> +		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> +
> +		if (to_vmx(vcpu)->nested.nested_run_pending ||
> +		    vmcs_read32(GUEST_ACTIVITY_STATE) ==
> +			   GUEST_ACTIVITY_WAIT_SIPI)
The same is true for interrupt too, but I do not think that we should allow
nested guest directly enter GUEST_ACTIVITY_WAIT_SIPI state or any other
state except ACTIVE. They should be emulated instead. From quick look at
the spec it looks like external interrupts do not cause VMEXIT while
vcpu is in GUEST_ACTIVITY_WAIT_SIPI.

> +			return 0;
> +		if (nested_exit_on_nmi(vcpu)) {
> +			nested_vmx_vmexit(vcpu);
> +			vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI;
> +			vmcs12->vm_exit_intr_info = NMI_VECTOR |
> +				INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK;
> +			/*
> +			 * The NMI-triggered VM exit counts as injection:
> +			 * clear this one and block further NMIs.
> +			 */
> +			vcpu->arch.nmi_pending = 0;
> +			vmx_set_nmi_mask(vcpu, true);
> +			return 0;
> +		}
> +	}
> +
>  	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
>  		return 0;
>  
> -- 
> 1.7.3.4

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka April 14, 2013, 3:53 p.m. UTC | #2
On 2013-04-14 17:23, Gleb Natapov wrote:
> On Sun, Apr 14, 2013 at 12:12:49PM +0200, Jan Kiszka wrote:
>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>
>> The logic for checking if interrupts can be injected has to be applied
>> also on NMIs. The difference is that if NMI interception is on these
>> events are consumed and blocked by the VM exit.
>>
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>> ---
>>  arch/x86/kvm/vmx.c |   28 ++++++++++++++++++++++++++++
>>  1 files changed, 28 insertions(+), 0 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 56e7519..ad9b4bc 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -4190,6 +4190,12 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
>>  		PIN_BASED_EXT_INTR_MASK;
>>  }
>>  
>> +static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
>> +{
>> +	return get_vmcs12(vcpu)->pin_based_vm_exec_control &
>> +		PIN_BASED_NMI_EXITING;
>> +}
>> +
>>  static void enable_irq_window(struct kvm_vcpu *vcpu)
>>  {
>>  	u32 cpu_based_vm_exec_control;
>> @@ -4315,6 +4321,28 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
>>  
>>  static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>>  {
>> +	if (is_guest_mode(vcpu)) {
>> +		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>> +
>> +		if (to_vmx(vcpu)->nested.nested_run_pending ||
>> +		    vmcs_read32(GUEST_ACTIVITY_STATE) ==
>> +			   GUEST_ACTIVITY_WAIT_SIPI)
> The same is true for interrupt too,

Yes, but aren't we already waiting with interrupts disabled in that state?

> but I do not think that we should allow
> nested guest directly enter GUEST_ACTIVITY_WAIT_SIPI state or any other
> state except ACTIVE. They should be emulated instead.

Well, they aren't emulated yet but directly applied. So I think the
patch is correct in the current context at least.

What negative effects do you expect from entering those states with L2?

> From quick look at
> the spec it looks like external interrupts do not cause VMEXIT while
> vcpu is in GUEST_ACTIVITY_WAIT_SIPI.

Yes, see above.

Jan
Gleb Natapov April 14, 2013, 4:18 p.m. UTC | #3
On Sun, Apr 14, 2013 at 05:53:05PM +0200, Jan Kiszka wrote:
> On 2013-04-14 17:23, Gleb Natapov wrote:
> > On Sun, Apr 14, 2013 at 12:12:49PM +0200, Jan Kiszka wrote:
> >> From: Jan Kiszka <jan.kiszka@siemens.com>
> >>
> >> The logic for checking if interrupts can be injected has to be applied
> >> also on NMIs. The difference is that if NMI interception is on these
> >> events are consumed and blocked by the VM exit.
> >>
> >> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> >> ---
> >>  arch/x86/kvm/vmx.c |   28 ++++++++++++++++++++++++++++
> >>  1 files changed, 28 insertions(+), 0 deletions(-)
> >>
> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >> index 56e7519..ad9b4bc 100644
> >> --- a/arch/x86/kvm/vmx.c
> >> +++ b/arch/x86/kvm/vmx.c
> >> @@ -4190,6 +4190,12 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
> >>  		PIN_BASED_EXT_INTR_MASK;
> >>  }
> >>  
> >> +static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
> >> +{
> >> +	return get_vmcs12(vcpu)->pin_based_vm_exec_control &
> >> +		PIN_BASED_NMI_EXITING;
> >> +}
> >> +
> >>  static void enable_irq_window(struct kvm_vcpu *vcpu)
> >>  {
> >>  	u32 cpu_based_vm_exec_control;
> >> @@ -4315,6 +4321,28 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
> >>  
> >>  static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
> >>  {
> >> +	if (is_guest_mode(vcpu)) {
> >> +		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> >> +
> >> +		if (to_vmx(vcpu)->nested.nested_run_pending ||
> >> +		    vmcs_read32(GUEST_ACTIVITY_STATE) ==
> >> +			   GUEST_ACTIVITY_WAIT_SIPI)
> > The same is true for interrupt too,
> 
> Yes, but aren't we already waiting with interrupts disabled in that state?
> 
Why? L1 can do vmcs_write(GUEST_ACTIVITY_STATE,
GUEST_ACTIVITY_WAIT_SIPI) at any point.

> > but I do not think that we should allow
> > nested guest directly enter GUEST_ACTIVITY_WAIT_SIPI state or any other
> > state except ACTIVE. They should be emulated instead.
> 
> Well, they aren't emulated yet but directly applied. So I think the
> patch is correct in the current context at least.
> 
If my understanding is correct the facts that it is directly applied is
a serious bug and should be fixed.

> What negative effects do you expect from entering those states with L2?
> 
As I wrote below (and you misunderstood me :)) it looks like L0 external
interrupts do not generate vmexit while active VMCS is in Wait-For-SIPI
state. In any case we should carefully examine what are the implications
of this state since KVM never uses it and does not know how to handle it.

> > From quick look at
> > the spec it looks like external interrupts do not cause VMEXIT while
> > vcpu is in GUEST_ACTIVITY_WAIT_SIPI.
> 
> Yes, see above.
> 
> Jan
> 
> 



--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka April 14, 2013, 4:35 p.m. UTC | #4
On 2013-04-14 18:18, Gleb Natapov wrote:
> On Sun, Apr 14, 2013 at 05:53:05PM +0200, Jan Kiszka wrote:
>> On 2013-04-14 17:23, Gleb Natapov wrote:
>>> On Sun, Apr 14, 2013 at 12:12:49PM +0200, Jan Kiszka wrote:
>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>
>>>> The logic for checking if interrupts can be injected has to be applied
>>>> also on NMIs. The difference is that if NMI interception is on these
>>>> events are consumed and blocked by the VM exit.
>>>>
>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>> ---
>>>>  arch/x86/kvm/vmx.c |   28 ++++++++++++++++++++++++++++
>>>>  1 files changed, 28 insertions(+), 0 deletions(-)
>>>>
>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>>> index 56e7519..ad9b4bc 100644
>>>> --- a/arch/x86/kvm/vmx.c
>>>> +++ b/arch/x86/kvm/vmx.c
>>>> @@ -4190,6 +4190,12 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
>>>>  		PIN_BASED_EXT_INTR_MASK;
>>>>  }
>>>>  
>>>> +static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +	return get_vmcs12(vcpu)->pin_based_vm_exec_control &
>>>> +		PIN_BASED_NMI_EXITING;
>>>> +}
>>>> +
>>>>  static void enable_irq_window(struct kvm_vcpu *vcpu)
>>>>  {
>>>>  	u32 cpu_based_vm_exec_control;
>>>> @@ -4315,6 +4321,28 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
>>>>  
>>>>  static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>>>>  {
>>>> +	if (is_guest_mode(vcpu)) {
>>>> +		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>>>> +
>>>> +		if (to_vmx(vcpu)->nested.nested_run_pending ||
>>>> +		    vmcs_read32(GUEST_ACTIVITY_STATE) ==
>>>> +			   GUEST_ACTIVITY_WAIT_SIPI)
>>> The same is true for interrupt too,
>>
>> Yes, but aren't we already waiting with interrupts disabled in that state?
>>
> Why? L1 can do vmcs_write(GUEST_ACTIVITY_STATE,
> GUEST_ACTIVITY_WAIT_SIPI) at any point.

Hmm, ok.

> 
>>> but I do not think that we should allow
>>> nested guest directly enter GUEST_ACTIVITY_WAIT_SIPI state or any other
>>> state except ACTIVE. They should be emulated instead.
>>
>> Well, they aren't emulated yet but directly applied. So I think the
>> patch is correct in the current context at least.
>>
> If my understanding is correct the facts that it is directly applied is
> a serious bug and should be fixed.

Yeah, L1 could put the vCPU in wait-for-SIPI, and only that physical
signal will wake it up again. But L0 will not send it...

> 
>> What negative effects do you expect from entering those states with L2?
>>
> As I wrote below (and you misunderstood me :)) it looks like L0 external
> interrupts do not generate vmexit while active VMCS is in Wait-For-SIPI
> state. In any case we should carefully examine what are the implications
> of this state since KVM never uses it and does not know how to handle it.

OK, but this should be conceptually unrelated to this patch. So, given
that you applied patch 4, should I simply remove the activity state
check from this one in order to proceed with it?

Jan
Gleb Natapov April 14, 2013, 4:43 p.m. UTC | #5
On Sun, Apr 14, 2013 at 06:35:14PM +0200, Jan Kiszka wrote:
> On 2013-04-14 18:18, Gleb Natapov wrote:
> > On Sun, Apr 14, 2013 at 05:53:05PM +0200, Jan Kiszka wrote:
> >> On 2013-04-14 17:23, Gleb Natapov wrote:
> >>> On Sun, Apr 14, 2013 at 12:12:49PM +0200, Jan Kiszka wrote:
> >>>> From: Jan Kiszka <jan.kiszka@siemens.com>
> >>>>
> >>>> The logic for checking if interrupts can be injected has to be applied
> >>>> also on NMIs. The difference is that if NMI interception is on these
> >>>> events are consumed and blocked by the VM exit.
> >>>>
> >>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> >>>> ---
> >>>>  arch/x86/kvm/vmx.c |   28 ++++++++++++++++++++++++++++
> >>>>  1 files changed, 28 insertions(+), 0 deletions(-)
> >>>>
> >>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >>>> index 56e7519..ad9b4bc 100644
> >>>> --- a/arch/x86/kvm/vmx.c
> >>>> +++ b/arch/x86/kvm/vmx.c
> >>>> @@ -4190,6 +4190,12 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
> >>>>  		PIN_BASED_EXT_INTR_MASK;
> >>>>  }
> >>>>  
> >>>> +static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
> >>>> +{
> >>>> +	return get_vmcs12(vcpu)->pin_based_vm_exec_control &
> >>>> +		PIN_BASED_NMI_EXITING;
> >>>> +}
> >>>> +
> >>>>  static void enable_irq_window(struct kvm_vcpu *vcpu)
> >>>>  {
> >>>>  	u32 cpu_based_vm_exec_control;
> >>>> @@ -4315,6 +4321,28 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
> >>>>  
> >>>>  static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
> >>>>  {
> >>>> +	if (is_guest_mode(vcpu)) {
> >>>> +		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> >>>> +
> >>>> +		if (to_vmx(vcpu)->nested.nested_run_pending ||
> >>>> +		    vmcs_read32(GUEST_ACTIVITY_STATE) ==
> >>>> +			   GUEST_ACTIVITY_WAIT_SIPI)
> >>> The same is true for interrupt too,
> >>
> >> Yes, but aren't we already waiting with interrupts disabled in that state?
> >>
> > Why? L1 can do vmcs_write(GUEST_ACTIVITY_STATE,
> > GUEST_ACTIVITY_WAIT_SIPI) at any point.
> 
> Hmm, ok.
> 
> > 
> >>> but I do not think that we should allow
> >>> nested guest directly enter GUEST_ACTIVITY_WAIT_SIPI state or any other
> >>> state except ACTIVE. They should be emulated instead.
> >>
> >> Well, they aren't emulated yet but directly applied. So I think the
> >> patch is correct in the current context at least.
> >>
> > If my understanding is correct the facts that it is directly applied is
> > a serious bug and should be fixed.
> 
> Yeah, L1 could put the vCPU in wait-for-SIPI, and only that physical
> signal will wake it up again. But L0 will not send it...
> 
> > 
> >> What negative effects do you expect from entering those states with L2?
> >>
> > As I wrote below (and you misunderstood me :)) it looks like L0 external
> > interrupts do not generate vmexit while active VMCS is in Wait-For-SIPI
> > state. In any case we should carefully examine what are the implications
> > of this state since KVM never uses it and does not know how to handle it.
> 
> OK, but this should be conceptually unrelated to this patch. So, given
> that you applied patch 4, should I simply remove the activity state
> check from this one in order to proceed with it?
> 
Yes. Non ACTIVE vmcs state issue should be handled separately. But
until it is fixed we should remember to not enable vmx nesting by
default :)

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 56e7519..ad9b4bc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4190,6 +4190,12 @@  static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
 		PIN_BASED_EXT_INTR_MASK;
 }
 
+static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
+{
+	return get_vmcs12(vcpu)->pin_based_vm_exec_control &
+		PIN_BASED_NMI_EXITING;
+}
+
 static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
 	u32 cpu_based_vm_exec_control;
@@ -4315,6 +4321,28 @@  static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 
 static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
 {
+	if (is_guest_mode(vcpu)) {
+		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+		if (to_vmx(vcpu)->nested.nested_run_pending ||
+		    vmcs_read32(GUEST_ACTIVITY_STATE) ==
+			   GUEST_ACTIVITY_WAIT_SIPI)
+			return 0;
+		if (nested_exit_on_nmi(vcpu)) {
+			nested_vmx_vmexit(vcpu);
+			vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI;
+			vmcs12->vm_exit_intr_info = NMI_VECTOR |
+				INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK;
+			/*
+			 * The NMI-triggered VM exit counts as injection:
+			 * clear this one and block further NMIs.
+			 */
+			vcpu->arch.nmi_pending = 0;
+			vmx_set_nmi_mask(vcpu, true);
+			return 0;
+		}
+	}
+
 	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
 		return 0;