Message ID | d144ee7b8a4019cbe0b054f309f2e4d1a7cc0f41.1363193625.git.jan.kiszka@siemens.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Mar 13, 2013 at 05:53:45PM +0100, Jan Kiszka wrote: > If we are in guest mode, L0 can only inject events into L2 if L1 has > nothing pending. Otherwise, L0 would overwrite L1's events and they > would get lost. This check is conceptually independent of > nested_exit_on_intr. > > If L1 traps external interrupts, then we also need to look at L1's > idt_vectoring_info_field. If it is empty, we can kick the guest from L2 > to L1, just like the previous code worked. > > Finally, the logic for checking interrupt has to be applied also on NMIs > in an analogous way. This enables NMI interception for nested guests. > > Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> > --- > arch/x86/kvm/vmx.c | 59 ++++++++++++++++++++++++++++++++++++++++++++------- > 1 files changed, 51 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index b50174d..10de336 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -4211,6 +4211,12 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) > PIN_BASED_EXT_INTR_MASK; > } > > +static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) > +{ > + return get_vmcs12(vcpu)->pin_based_vm_exec_control & > + PIN_BASED_NMI_EXITING; > +} > + It will take me some time to review this, but I have a small nit now. You open code checking of this bit in your previous patch, why not move this hunk there? > static void enable_irq_window(struct kvm_vcpu *vcpu) > { > u32 cpu_based_vm_exec_control; > @@ -4307,6 +4313,30 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) > > static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) > { > + if (is_guest_mode(vcpu)) { > + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > + > + if (to_vmx(vcpu)->nested.nested_run_pending && > + (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) > + return 0; > + if (nested_exit_on_nmi(vcpu)) { > + /* > + * Check if the idt_vectoring_info_field is free. We > + * cannot raise EXIT_REASON_EXCEPTION_NMI if it isn't. > + */ > + if (vmcs12->idt_vectoring_info_field & > + VECTORING_INFO_VALID_MASK) > + return 0; > + nested_vmx_vmexit(vcpu); > + vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; > + vmcs12->vm_exit_intr_info = NMI_VECTOR | > + INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; > + /* > + * fall through to normal code, but now in L1, not L2 > + */ > + } > + } > + > if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) > return 0; > > @@ -4346,16 +4376,29 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) > > static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) > { > - if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { > + if (is_guest_mode(vcpu)) { > struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > - if (to_vmx(vcpu)->nested.nested_run_pending || > - (vmcs12->idt_vectoring_info_field & > - VECTORING_INFO_VALID_MASK)) > + > + if (to_vmx(vcpu)->nested.nested_run_pending && > + (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) > return 0; > - nested_vmx_vmexit(vcpu); > - vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; > - vmcs12->vm_exit_intr_info = 0; > - /* fall through to normal code, but now in L1, not L2 */ > + if (nested_exit_on_intr(vcpu)) { > + /* > + * Check if the idt_vectoring_info_field is free. We > + * cannot raise EXIT_REASON_EXTERNAL_INTERRUPT if it > + * isn't. > + */ > + if (vmcs12->idt_vectoring_info_field & > + VECTORING_INFO_VALID_MASK) > + return 0; > + nested_vmx_vmexit(vcpu); > + vmcs12->vm_exit_reason = > + EXIT_REASON_EXTERNAL_INTERRUPT; > + vmcs12->vm_exit_intr_info = 0; > + /* > + * fall through to normal code, but now in L1, not L2 > + */ > + } > } > > return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && > -- > 1.7.3.4 -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Mar 13, 2013 at 05:53:45PM +0100, Jan Kiszka wrote: > If we are in guest mode, L0 can only inject events into L2 if L1 has > nothing pending. Otherwise, L0 would overwrite L1's events and they > would get lost. This check is conceptually independent of > nested_exit_on_intr. > > If L1 traps external interrupts, then we also need to look at L1's > idt_vectoring_info_field. If it is empty, we can kick the guest from L2 > to L1, just like the previous code worked. > > Finally, the logic for checking interrupt has to be applied also on NMIs > in an analogous way. This enables NMI interception for nested guests. > > Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> > --- > arch/x86/kvm/vmx.c | 59 ++++++++++++++++++++++++++++++++++++++++++++------- > 1 files changed, 51 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index b50174d..10de336 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -4211,6 +4211,12 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) > PIN_BASED_EXT_INTR_MASK; > } > > +static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) > +{ > + return get_vmcs12(vcpu)->pin_based_vm_exec_control & > + PIN_BASED_NMI_EXITING; > +} > + > static void enable_irq_window(struct kvm_vcpu *vcpu) > { > u32 cpu_based_vm_exec_control; > @@ -4307,6 +4313,30 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) > > static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) > { > + if (is_guest_mode(vcpu)) { > + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > + > + if (to_vmx(vcpu)->nested.nested_run_pending && > + (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) > + return 0; > + if (nested_exit_on_nmi(vcpu)) { > + /* > + * Check if the idt_vectoring_info_field is free. We > + * cannot raise EXIT_REASON_EXCEPTION_NMI if it isn't. > + */ > + if (vmcs12->idt_vectoring_info_field & > + VECTORING_INFO_VALID_MASK) > + return 0; > + nested_vmx_vmexit(vcpu); > + vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; > + vmcs12->vm_exit_intr_info = NMI_VECTOR | > + INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; > + /* > + * fall through to normal code, but now in L1, not L2 > + */ > + } > + } > + > if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) > return 0; > > @@ -4346,16 +4376,29 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) > > static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) > { > - if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { > + if (is_guest_mode(vcpu)) { > struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > - if (to_vmx(vcpu)->nested.nested_run_pending || > - (vmcs12->idt_vectoring_info_field & > - VECTORING_INFO_VALID_MASK)) > + > + if (to_vmx(vcpu)->nested.nested_run_pending && > + (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) > return 0; I do not understand this. As far as I remember Nadav's explanation we have to enter guest if nested_run_pending is set because VMX does not expect vmexit to happen without running guest at all. May be idt_vectoring_info_field processing is the only reason, may be not. I wouldn't gamble on it. > - nested_vmx_vmexit(vcpu); > - vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; > - vmcs12->vm_exit_intr_info = 0; > - /* fall through to normal code, but now in L1, not L2 */ > + if (nested_exit_on_intr(vcpu)) { > + /* > + * Check if the idt_vectoring_info_field is free. We > + * cannot raise EXIT_REASON_EXTERNAL_INTERRUPT if it > + * isn't. > + */ > + if (vmcs12->idt_vectoring_info_field & > + VECTORING_INFO_VALID_MASK) > + return 0; I think we actually need to return 0 if idt_vectoring_info_field is valid even if !nested_exit_on_intr(). If we do not we let L0 inject interrupt into L2 and then overwrite it on entry from vmcs12->idt_vectoring_info_field. > + vmcs12->vm_exit_reason = > + EXIT_REASON_EXTERNAL_INTERRUPT; > + vmcs12->vm_exit_intr_info = 0; > + /* > + * fall through to normal code, but now in L1, not L2 > + */ > + } > } > > return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && > -- > 1.7.3.4 -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2013-03-14 16:12, Gleb Natapov wrote: > On Wed, Mar 13, 2013 at 05:53:45PM +0100, Jan Kiszka wrote: >> If we are in guest mode, L0 can only inject events into L2 if L1 has >> nothing pending. Otherwise, L0 would overwrite L1's events and they >> would get lost. This check is conceptually independent of >> nested_exit_on_intr. >> >> If L1 traps external interrupts, then we also need to look at L1's >> idt_vectoring_info_field. If it is empty, we can kick the guest from L2 >> to L1, just like the previous code worked. >> >> Finally, the logic for checking interrupt has to be applied also on NMIs >> in an analogous way. This enables NMI interception for nested guests. >> >> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> >> --- >> arch/x86/kvm/vmx.c | 59 ++++++++++++++++++++++++++++++++++++++++++++------- >> 1 files changed, 51 insertions(+), 8 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index b50174d..10de336 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -4211,6 +4211,12 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) >> PIN_BASED_EXT_INTR_MASK; >> } >> >> +static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) >> +{ >> + return get_vmcs12(vcpu)->pin_based_vm_exec_control & >> + PIN_BASED_NMI_EXITING; >> +} >> + >> static void enable_irq_window(struct kvm_vcpu *vcpu) >> { >> u32 cpu_based_vm_exec_control; >> @@ -4307,6 +4313,30 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) >> >> static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) >> { >> + if (is_guest_mode(vcpu)) { >> + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); >> + >> + if (to_vmx(vcpu)->nested.nested_run_pending && >> + (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) >> + return 0; >> + if (nested_exit_on_nmi(vcpu)) { >> + /* >> + * Check if the idt_vectoring_info_field is free. We >> + * cannot raise EXIT_REASON_EXCEPTION_NMI if it isn't. >> + */ >> + if (vmcs12->idt_vectoring_info_field & >> + VECTORING_INFO_VALID_MASK) >> + return 0; >> + nested_vmx_vmexit(vcpu); >> + vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; >> + vmcs12->vm_exit_intr_info = NMI_VECTOR | >> + INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; >> + /* >> + * fall through to normal code, but now in L1, not L2 >> + */ >> + } >> + } >> + >> if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) >> return 0; >> >> @@ -4346,16 +4376,29 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) >> >> static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) >> { >> - if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { >> + if (is_guest_mode(vcpu)) { >> struct vmcs12 *vmcs12 = get_vmcs12(vcpu); >> - if (to_vmx(vcpu)->nested.nested_run_pending || >> - (vmcs12->idt_vectoring_info_field & >> - VECTORING_INFO_VALID_MASK)) >> + >> + if (to_vmx(vcpu)->nested.nested_run_pending && >> + (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) >> return 0; > I do not understand this. As far as I remember Nadav's explanation we > have to enter guest if nested_run_pending is set because VMX does not > expect vmexit to happen without running guest at all. May be > idt_vectoring_info_field processing is the only reason, may be not. I > wouldn't gamble on it. What are the problems? Specifically if we emulate the effects of an immediate vmexit properly. I'm not categorically excluding that some case is missing. If we do not allow soft-vmexit here, we will set the interrupt window later and will get such an immediate exit as well (provided the L2 was interruptible). > >> - nested_vmx_vmexit(vcpu); >> - vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; >> - vmcs12->vm_exit_intr_info = 0; >> - /* fall through to normal code, but now in L1, not L2 */ >> + if (nested_exit_on_intr(vcpu)) { >> + /* >> + * Check if the idt_vectoring_info_field is free. We >> + * cannot raise EXIT_REASON_EXTERNAL_INTERRUPT if it >> + * isn't. >> + */ >> + if (vmcs12->idt_vectoring_info_field & >> + VECTORING_INFO_VALID_MASK) >> + return 0; > I think we actually need to return 0 if idt_vectoring_info_field is > valid even if !nested_exit_on_intr(). If we do not we let L0 inject > interrupt into L2 and then overwrite it on entry from > vmcs12->idt_vectoring_info_field. Sorry, don't understand the last sentence. This check is about the software idt_vectoring_info_field, the one we keep for L1, not the real field. Jan
On 2013-03-14 14:59, Gleb Natapov wrote: > On Wed, Mar 13, 2013 at 05:53:45PM +0100, Jan Kiszka wrote: >> If we are in guest mode, L0 can only inject events into L2 if L1 has >> nothing pending. Otherwise, L0 would overwrite L1's events and they >> would get lost. This check is conceptually independent of >> nested_exit_on_intr. >> >> If L1 traps external interrupts, then we also need to look at L1's >> idt_vectoring_info_field. If it is empty, we can kick the guest from L2 >> to L1, just like the previous code worked. >> >> Finally, the logic for checking interrupt has to be applied also on NMIs >> in an analogous way. This enables NMI interception for nested guests. >> >> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> >> --- >> arch/x86/kvm/vmx.c | 59 ++++++++++++++++++++++++++++++++++++++++++++------- >> 1 files changed, 51 insertions(+), 8 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index b50174d..10de336 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -4211,6 +4211,12 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) >> PIN_BASED_EXT_INTR_MASK; >> } >> >> +static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) >> +{ >> + return get_vmcs12(vcpu)->pin_based_vm_exec_control & >> + PIN_BASED_NMI_EXITING; >> +} >> + > It will take me some time to review this, but I have a small nit now. > You open code checking of this bit in your previous patch, why not move > this hunk there? True. Patch 1 is several weeks older, and I didn't recheck this. Will clean it up. Jan
On Thu, Mar 14, 2013 at 04:24:18PM +0100, Jan Kiszka wrote: > On 2013-03-14 16:12, Gleb Natapov wrote: > > On Wed, Mar 13, 2013 at 05:53:45PM +0100, Jan Kiszka wrote: > >> If we are in guest mode, L0 can only inject events into L2 if L1 has > >> nothing pending. Otherwise, L0 would overwrite L1's events and they > >> would get lost. This check is conceptually independent of > >> nested_exit_on_intr. > >> > >> If L1 traps external interrupts, then we also need to look at L1's > >> idt_vectoring_info_field. If it is empty, we can kick the guest from L2 > >> to L1, just like the previous code worked. > >> > >> Finally, the logic for checking interrupt has to be applied also on NMIs > >> in an analogous way. This enables NMI interception for nested guests. > >> > >> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> > >> --- > >> arch/x86/kvm/vmx.c | 59 ++++++++++++++++++++++++++++++++++++++++++++------- > >> 1 files changed, 51 insertions(+), 8 deletions(-) > >> > >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > >> index b50174d..10de336 100644 > >> --- a/arch/x86/kvm/vmx.c > >> +++ b/arch/x86/kvm/vmx.c > >> @@ -4211,6 +4211,12 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) > >> PIN_BASED_EXT_INTR_MASK; > >> } > >> > >> +static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) > >> +{ > >> + return get_vmcs12(vcpu)->pin_based_vm_exec_control & > >> + PIN_BASED_NMI_EXITING; > >> +} > >> + > >> static void enable_irq_window(struct kvm_vcpu *vcpu) > >> { > >> u32 cpu_based_vm_exec_control; > >> @@ -4307,6 +4313,30 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) > >> > >> static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) > >> { > >> + if (is_guest_mode(vcpu)) { > >> + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > >> + > >> + if (to_vmx(vcpu)->nested.nested_run_pending && > >> + (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) > >> + return 0; > >> + if (nested_exit_on_nmi(vcpu)) { > >> + /* > >> + * Check if the idt_vectoring_info_field is free. We > >> + * cannot raise EXIT_REASON_EXCEPTION_NMI if it isn't. > >> + */ > >> + if (vmcs12->idt_vectoring_info_field & > >> + VECTORING_INFO_VALID_MASK) > >> + return 0; > >> + nested_vmx_vmexit(vcpu); > >> + vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; > >> + vmcs12->vm_exit_intr_info = NMI_VECTOR | > >> + INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; > >> + /* > >> + * fall through to normal code, but now in L1, not L2 > >> + */ > >> + } > >> + } > >> + > >> if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) > >> return 0; > >> > >> @@ -4346,16 +4376,29 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) > >> > >> static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) > >> { > >> - if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { > >> + if (is_guest_mode(vcpu)) { > >> struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > >> - if (to_vmx(vcpu)->nested.nested_run_pending || > >> - (vmcs12->idt_vectoring_info_field & > >> - VECTORING_INFO_VALID_MASK)) > >> + > >> + if (to_vmx(vcpu)->nested.nested_run_pending && > >> + (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) > >> return 0; > > I do not understand this. As far as I remember Nadav's explanation we > > have to enter guest if nested_run_pending is set because VMX does not > > expect vmexit to happen without running guest at all. May be > > idt_vectoring_info_field processing is the only reason, may be not. I > > wouldn't gamble on it. > > What are the problems? Specifically if we emulate the effects of an > immediate vmexit properly. I'm not categorically excluding that some > case is missing. If we do not allow soft-vmexit here, we will set the > interrupt window later and will get such an immediate exit as well > (provided the L2 was interruptible). > Don't know. Some field that VMX change on vmresume and since from L2 point of view vmresume was executed, but in reality it was not the result that L2 will see will be incorrect. vm_entry_intr_info_field is on of such fields, may be there are or will be more (vAPIC+posted interrupt)? > > > >> - nested_vmx_vmexit(vcpu); > >> - vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; > >> - vmcs12->vm_exit_intr_info = 0; > >> - /* fall through to normal code, but now in L1, not L2 */ > >> + if (nested_exit_on_intr(vcpu)) { > >> + /* > >> + * Check if the idt_vectoring_info_field is free. We > >> + * cannot raise EXIT_REASON_EXTERNAL_INTERRUPT if it > >> + * isn't. > >> + */ > >> + if (vmcs12->idt_vectoring_info_field & > >> + VECTORING_INFO_VALID_MASK) > >> + return 0; > > I think we actually need to return 0 if idt_vectoring_info_field is > > valid even if !nested_exit_on_intr(). If we do not we let L0 inject > > interrupt into L2 and then overwrite it on entry from > > vmcs12->idt_vectoring_info_field. > > Sorry, don't understand the last sentence. This check is about the > software idt_vectoring_info_field, the one we keep for L1, not the real > field. > Suppose the vmcs12->idt_vectoring_info_field is valid and L0 want to inject an interrupt directly into L2 and L2 does not block interrupts. vmx_interrupt_allowed() will return true, so vmx_inject_irq() will be called and L0->L2 interrupt information will be written to VM_ENTRY_INTR_INFO_FIELD. Now in vmx_vcpu_run() there is again check for valid vmcs12->idt_vectoring_info_field and if it is valid VM_ENTRY_INTR_INFO_FIELD is overwritten with it. Interrupt that L0 just injected into L2 was lost forever. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2013-03-14 16:37, Gleb Natapov wrote: > On Thu, Mar 14, 2013 at 04:24:18PM +0100, Jan Kiszka wrote: >> On 2013-03-14 16:12, Gleb Natapov wrote: >>> On Wed, Mar 13, 2013 at 05:53:45PM +0100, Jan Kiszka wrote: >>>> If we are in guest mode, L0 can only inject events into L2 if L1 has >>>> nothing pending. Otherwise, L0 would overwrite L1's events and they >>>> would get lost. This check is conceptually independent of >>>> nested_exit_on_intr. >>>> >>>> If L1 traps external interrupts, then we also need to look at L1's >>>> idt_vectoring_info_field. If it is empty, we can kick the guest from L2 >>>> to L1, just like the previous code worked. >>>> >>>> Finally, the logic for checking interrupt has to be applied also on NMIs >>>> in an analogous way. This enables NMI interception for nested guests. >>>> >>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> >>>> --- >>>> arch/x86/kvm/vmx.c | 59 ++++++++++++++++++++++++++++++++++++++++++++------- >>>> 1 files changed, 51 insertions(+), 8 deletions(-) >>>> >>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >>>> index b50174d..10de336 100644 >>>> --- a/arch/x86/kvm/vmx.c >>>> +++ b/arch/x86/kvm/vmx.c >>>> @@ -4211,6 +4211,12 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) >>>> PIN_BASED_EXT_INTR_MASK; >>>> } >>>> >>>> +static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) >>>> +{ >>>> + return get_vmcs12(vcpu)->pin_based_vm_exec_control & >>>> + PIN_BASED_NMI_EXITING; >>>> +} >>>> + >>>> static void enable_irq_window(struct kvm_vcpu *vcpu) >>>> { >>>> u32 cpu_based_vm_exec_control; >>>> @@ -4307,6 +4313,30 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) >>>> >>>> static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) >>>> { >>>> + if (is_guest_mode(vcpu)) { >>>> + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); >>>> + >>>> + if (to_vmx(vcpu)->nested.nested_run_pending && >>>> + (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) >>>> + return 0; >>>> + if (nested_exit_on_nmi(vcpu)) { >>>> + /* >>>> + * Check if the idt_vectoring_info_field is free. We >>>> + * cannot raise EXIT_REASON_EXCEPTION_NMI if it isn't. >>>> + */ >>>> + if (vmcs12->idt_vectoring_info_field & >>>> + VECTORING_INFO_VALID_MASK) >>>> + return 0; >>>> + nested_vmx_vmexit(vcpu); >>>> + vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; >>>> + vmcs12->vm_exit_intr_info = NMI_VECTOR | >>>> + INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; >>>> + /* >>>> + * fall through to normal code, but now in L1, not L2 >>>> + */ >>>> + } >>>> + } >>>> + >>>> if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) >>>> return 0; >>>> >>>> @@ -4346,16 +4376,29 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) >>>> >>>> static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) >>>> { >>>> - if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { >>>> + if (is_guest_mode(vcpu)) { >>>> struct vmcs12 *vmcs12 = get_vmcs12(vcpu); >>>> - if (to_vmx(vcpu)->nested.nested_run_pending || >>>> - (vmcs12->idt_vectoring_info_field & >>>> - VECTORING_INFO_VALID_MASK)) >>>> + >>>> + if (to_vmx(vcpu)->nested.nested_run_pending && >>>> + (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) >>>> return 0; >>> I do not understand this. As far as I remember Nadav's explanation we >>> have to enter guest if nested_run_pending is set because VMX does not >>> expect vmexit to happen without running guest at all. May be >>> idt_vectoring_info_field processing is the only reason, may be not. I >>> wouldn't gamble on it. >> >> What are the problems? Specifically if we emulate the effects of an >> immediate vmexit properly. I'm not categorically excluding that some >> case is missing. If we do not allow soft-vmexit here, we will set the >> interrupt window later and will get such an immediate exit as well >> (provided the L2 was interruptible). >> > Don't know. Some field that VMX change on vmresume and since from L2 > point of view vmresume was executed, but in reality it was not the > result that L2 will see will be incorrect. vm_entry_intr_info_field is > on of such fields, may be there are or will be more (vAPIC+posted > interrupt)? OK, better safe than sorry. Easy to change. > >>> >>>> - nested_vmx_vmexit(vcpu); >>>> - vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; >>>> - vmcs12->vm_exit_intr_info = 0; >>>> - /* fall through to normal code, but now in L1, not L2 */ >>>> + if (nested_exit_on_intr(vcpu)) { >>>> + /* >>>> + * Check if the idt_vectoring_info_field is free. We >>>> + * cannot raise EXIT_REASON_EXTERNAL_INTERRUPT if it >>>> + * isn't. >>>> + */ >>>> + if (vmcs12->idt_vectoring_info_field & >>>> + VECTORING_INFO_VALID_MASK) >>>> + return 0; >>> I think we actually need to return 0 if idt_vectoring_info_field is >>> valid even if !nested_exit_on_intr(). If we do not we let L0 inject >>> interrupt into L2 and then overwrite it on entry from >>> vmcs12->idt_vectoring_info_field. >> >> Sorry, don't understand the last sentence. This check is about the >> software idt_vectoring_info_field, the one we keep for L1, not the real >> field. >> > > Suppose the vmcs12->idt_vectoring_info_field is valid and L0 want to > inject an interrupt directly into L2 and L2 does not block interrupts. > vmx_interrupt_allowed() will return true, so vmx_inject_irq() > will be called and L0->L2 interrupt information will be written > to VM_ENTRY_INTR_INFO_FIELD. Now in vmx_vcpu_run() there is again > check for valid vmcs12->idt_vectoring_info_field and if it is valid > VM_ENTRY_INTR_INFO_FIELD is overwritten with it. Interrupt that L0 just > injected into L2 was lost forever. OK, that's my fault in trying to split patch 2 and 3. Patch 3 will remove that bogus overwriting from vmx_vcpu_run. I'll merge both patches in the next round. Jan
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b50174d..10de336 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4211,6 +4211,12 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) PIN_BASED_EXT_INTR_MASK; } +static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) +{ + return get_vmcs12(vcpu)->pin_based_vm_exec_control & + PIN_BASED_NMI_EXITING; +} + static void enable_irq_window(struct kvm_vcpu *vcpu) { u32 cpu_based_vm_exec_control; @@ -4307,6 +4313,30 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) { + if (is_guest_mode(vcpu)) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + + if (to_vmx(vcpu)->nested.nested_run_pending && + (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) + return 0; + if (nested_exit_on_nmi(vcpu)) { + /* + * Check if the idt_vectoring_info_field is free. We + * cannot raise EXIT_REASON_EXCEPTION_NMI if it isn't. + */ + if (vmcs12->idt_vectoring_info_field & + VECTORING_INFO_VALID_MASK) + return 0; + nested_vmx_vmexit(vcpu); + vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; + vmcs12->vm_exit_intr_info = NMI_VECTOR | + INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; + /* + * fall through to normal code, but now in L1, not L2 + */ + } + } + if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) return 0; @@ -4346,16 +4376,29 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { + if (is_guest_mode(vcpu)) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - if (to_vmx(vcpu)->nested.nested_run_pending || - (vmcs12->idt_vectoring_info_field & - VECTORING_INFO_VALID_MASK)) + + if (to_vmx(vcpu)->nested.nested_run_pending && + (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) return 0; - nested_vmx_vmexit(vcpu); - vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; - vmcs12->vm_exit_intr_info = 0; - /* fall through to normal code, but now in L1, not L2 */ + if (nested_exit_on_intr(vcpu)) { + /* + * Check if the idt_vectoring_info_field is free. We + * cannot raise EXIT_REASON_EXTERNAL_INTERRUPT if it + * isn't. + */ + if (vmcs12->idt_vectoring_info_field & + VECTORING_INFO_VALID_MASK) + return 0; + nested_vmx_vmexit(vcpu); + vmcs12->vm_exit_reason = + EXIT_REASON_EXTERNAL_INTERRUPT; + vmcs12->vm_exit_intr_info = 0; + /* + * fall through to normal code, but now in L1, not L2 + */ + } } return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
If we are in guest mode, L0 can only inject events into L2 if L1 has nothing pending. Otherwise, L0 would overwrite L1's events and they would get lost. This check is conceptually independent of nested_exit_on_intr. If L1 traps external interrupts, then we also need to look at L1's idt_vectoring_info_field. If it is empty, we can kick the guest from L2 to L1, just like the previous code worked. Finally, the logic for checking interrupt has to be applied also on NMIs in an analogous way. This enables NMI interception for nested guests. Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> --- arch/x86/kvm/vmx.c | 59 ++++++++++++++++++++++++++++++++++++++++++++------- 1 files changed, 51 insertions(+), 8 deletions(-)