Message ID | 20200922052343.84388-1-yadong.qi@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: x86: emulate wait-for-SIPI and SIPI-VMExit | expand |
On 22/09/20 07:23, yadong.qi@intel.com wrote: > From: Yadong Qi <yadong.qi@intel.com> > > Background: We have a lightweight HV, it needs INIT-VMExit and > SIPI-VMExit to wake-up APs for guests since it do not monitor > the Local APIC. But currently virtual wait-for-SIPI(WFS) state > is not supported in nVMX, so when running on top of KVM, the L1 > HV cannot receive the INIT-VMExit and SIPI-VMExit which cause > the L2 guest cannot wake up the APs. > > According to Intel SDM Chapter 25.2 Other Causes of VM Exits, > SIPIs cause VM exits when a logical processor is in > wait-for-SIPI state. > > In this patch: > 1. introduce SIPI exit reason, > 2. introduce wait-for-SIPI state for nVMX, > 3. advertise wait-for-SIPI support to guest. > > When L1 hypervisor is not monitoring Local APIC, L0 need to emulate > INIT-VMExit and SIPI-VMExit to L1 to emulate INIT-SIPI-SIPI for > L2. L2 LAPIC write would be traped by L0 Hypervisor(KVM), L0 should > emulate the INIT/SIPI vmexit to L1 hypervisor to set proper state > for L2's vcpu state. > > Handle procdure: > Source vCPU: > L2 write LAPIC.ICR(INIT). > L0 trap LAPIC.ICR write(INIT): inject a latched INIT event to target > vCPU. > Target vCPU: > L0 emulate an INIT VMExit to L1 if is guest mode. > L1 set guest VMCS, guest_activity_state=WAIT_SIPI, vmresume. > L0 set vcpu.mp_state to INIT_RECEIVED if (vmcs12.guest_activity_state > == WAIT_SIPI). > > Source vCPU: > L2 write LAPIC.ICR(SIPI). > L0 trap LAPIC.ICR write(INIT): inject a latched SIPI event to traget > vCPU. > Target vCPU: > L0 emulate an SIPI VMExit to L1 if (vcpu.mp_state == INIT_RECEIVED). > L1 set CS:IP, guest_activity_state=ACTIVE, vmresume. > L0 resume to L2. > L2 start-up. Again, this looks good but it needs testcases. Thanks, Paolo > Signed-off-by: Yadong Qi <yadong.qi@intel.com> > --- > arch/x86/include/asm/vmx.h | 1 + > arch/x86/include/uapi/asm/vmx.h | 2 ++ > arch/x86/kvm/lapic.c | 5 ++-- > arch/x86/kvm/vmx/nested.c | 53 ++++++++++++++++++++++++--------- > 4 files changed, 45 insertions(+), 16 deletions(-) > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > index cd7de4b401fe..bff06dc64c52 100644 > --- a/arch/x86/include/asm/vmx.h > +++ b/arch/x86/include/asm/vmx.h > @@ -113,6 +113,7 @@ > #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f > #define VMX_MISC_SAVE_EFER_LMA 0x00000020 > #define VMX_MISC_ACTIVITY_HLT 0x00000040 > +#define VMX_MISC_ACTIVITY_WAIT_SIPI 0x00000100 > #define VMX_MISC_ZERO_LEN_INS 0x40000000 > #define VMX_MISC_MSR_LIST_MULTIPLIER 512 > > diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h > index b8ff9e8ac0d5..ada955c5ebb6 100644 > --- a/arch/x86/include/uapi/asm/vmx.h > +++ b/arch/x86/include/uapi/asm/vmx.h > @@ -32,6 +32,7 @@ > #define EXIT_REASON_EXTERNAL_INTERRUPT 1 > #define EXIT_REASON_TRIPLE_FAULT 2 > #define EXIT_REASON_INIT_SIGNAL 3 > +#define EXIT_REASON_SIPI_SIGNAL 4 > > #define EXIT_REASON_INTERRUPT_WINDOW 7 > #define EXIT_REASON_NMI_WINDOW 8 > @@ -94,6 +95,7 @@ > { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ > { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ > { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \ > + { EXIT_REASON_SIPI_SIGNAL, "SIPI_SIGNAL" }, \ > { EXIT_REASON_INTERRUPT_WINDOW, "INTERRUPT_WINDOW" }, \ > { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ > { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 5ccbee7165a2..d04ac7dc6adf 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -2839,7 +2839,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) > > /* > * INITs are latched while CPU is in specific states > - * (SMM, VMX non-root mode, SVM with GIF=0). > + * (SMM, SVM with GIF=0). > * Because a CPU cannot be in these states immediately > * after it has processed an INIT signal (and thus in > * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs > @@ -2847,7 +2847,8 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) > */ > if (kvm_vcpu_latch_init(vcpu)) { > WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); > - if (test_bit(KVM_APIC_SIPI, &apic->pending_events)) > + if (test_bit(KVM_APIC_SIPI, &apic->pending_events) && > + !is_guest_mode(vcpu)) > clear_bit(KVM_APIC_SIPI, &apic->pending_events); > return; > } > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > index 1bb6b31eb646..fe3bb68df987 100644 > --- a/arch/x86/kvm/vmx/nested.c > +++ b/arch/x86/kvm/vmx/nested.c > @@ -2946,7 +2946,8 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, > static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) > { > if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && > - vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)) > + vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT && > + vmcs12->guest_activity_state != GUEST_ACTIVITY_WAIT_SIPI)) > return -EINVAL; > > return 0; > @@ -3543,19 +3544,29 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > */ > nested_cache_shadow_vmcs12(vcpu, vmcs12); > > - /* > - * If we're entering a halted L2 vcpu and the L2 vcpu won't be > - * awakened by event injection or by an NMI-window VM-exit or > - * by an interrupt-window VM-exit, halt the vcpu. > - */ > - if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && > - !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) && > - !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_NMI_WINDOW_EXITING) && > - !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_INTR_WINDOW_EXITING) && > - (vmcs12->guest_rflags & X86_EFLAGS_IF))) { > + switch (vmcs12->guest_activity_state) { > + case GUEST_ACTIVITY_HLT: > + /* > + * If we're entering a halted L2 vcpu and the L2 vcpu won't be > + * awakened by event injection or by an NMI-window VM-exit or > + * by an interrupt-window VM-exit, halt the vcpu. > + */ > + if (!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) && > + !nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING) && > + !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) && > + (vmcs12->guest_rflags & X86_EFLAGS_IF))) { > + vmx->nested.nested_run_pending = 0; > + return kvm_vcpu_halt(vcpu); > + } > + break; > + case GUEST_ACTIVITY_WAIT_SIPI: > vmx->nested.nested_run_pending = 0; > - return kvm_vcpu_halt(vcpu); > + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; > + break; > + default: > + break; > } > + > return 1; > > vmentry_failed: > @@ -3781,7 +3792,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) > return -EBUSY; > nested_vmx_update_pending_dbg(vcpu); > clear_bit(KVM_APIC_INIT, &apic->pending_events); > - nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); > + if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED) > + nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); > + return 0; > + } > + > + if (lapic_in_kernel(vcpu) && > + test_bit(KVM_APIC_SIPI, &apic->pending_events)) { > + if (block_nested_events) > + return -EBUSY; > + > + clear_bit(KVM_APIC_SIPI, &apic->pending_events); > + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) > + nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0, > + apic->sipi_vector & 0xFFUL); > return 0; > } > > @@ -6471,7 +6495,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) > msrs->misc_low |= > MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | > VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | > - VMX_MISC_ACTIVITY_HLT; > + VMX_MISC_ACTIVITY_HLT | > + VMX_MISC_ACTIVITY_WAIT_SIPI; > msrs->misc_high = 0; > > /* >
> Again, this looks good but it needs testcases.
Yes, the unit test development is WIP.
Best Regard
Yadong
> Subject: RE: [PATCH] KVM: x86: emulate wait-for-SIPI and SIPI-VMExit > > > Again, this looks good but it needs testcases. > > Yes, the unit test development is WIP. > Hi, Paolo I have sent out the unit test patch. https://patchwork.kernel.org/patch/11799305/ Could you help review it? Thanks a lot. Best Regard Yadong
On 27/09/20 03:51, Qi, Yadong wrote: >> Subject: RE: [PATCH] KVM: x86: emulate wait-for-SIPI and SIPI-VMExit >> >>> Again, this looks good but it needs testcases. >> >> Yes, the unit test development is WIP. >> > > Hi, Paolo > > I have sent out the unit test patch. > https://patchwork.kernel.org/patch/11799305/ > Could you help review it? Thanks a lot. Yes, I've seen it. Tomorrow. :) Paolo
> -----Original Message----- > From: Paolo Bonzini <pbonzini@redhat.com> > Sent: Tuesday, September 22, 2020 5:10 PM > To: Qi, Yadong <yadong.qi@intel.com>; kvm@vger.kernel.org; linux- > kernel@vger.kernel.org; x86@kernel.org > Cc: Christopherson, Sean J <sean.j.christopherson@intel.com>; > vkuznets@redhat.com; wanpengli@tencent.com; jmattson@google.com; > joro@8bytes.org; tglx@linutronix.de; mingo@redhat.com; bp@alien8.de; > hpa@zytor.com; liran.alon@oracle.com; nikita.leshchenko@oracle.com; Gao, > Chao <chao.gao@intel.com>; Tian, Kevin <kevin.tian@intel.com>; Chen, Luhai > <luhai.chen@intel.com>; Zhu, Bing <bing.zhu@intel.com>; Wang, Kai Z > <kai.z.wang@intel.com> > Subject: Re: [PATCH] KVM: x86: emulate wait-for-SIPI and SIPI-VMExit > > On 22/09/20 07:23, yadong.qi@intel.com wrote: > > From: Yadong Qi <yadong.qi@intel.com> > > > > Background: We have a lightweight HV, it needs INIT-VMExit and > > SIPI-VMExit to wake-up APs for guests since it do not monitor the > > Local APIC. But currently virtual wait-for-SIPI(WFS) state is not > > supported in nVMX, so when running on top of KVM, the L1 HV cannot > > receive the INIT-VMExit and SIPI-VMExit which cause the L2 guest > > cannot wake up the APs. > > > > According to Intel SDM Chapter 25.2 Other Causes of VM Exits, SIPIs > > cause VM exits when a logical processor is in wait-for-SIPI state. > > > > In this patch: > > 1. introduce SIPI exit reason, > > 2. introduce wait-for-SIPI state for nVMX, > > 3. advertise wait-for-SIPI support to guest. > > > > When L1 hypervisor is not monitoring Local APIC, L0 need to emulate > > INIT-VMExit and SIPI-VMExit to L1 to emulate INIT-SIPI-SIPI for L2. L2 > > LAPIC write would be traped by L0 Hypervisor(KVM), L0 should emulate > > the INIT/SIPI vmexit to L1 hypervisor to set proper state for L2's > > vcpu state. > > > > Handle procdure: > > Source vCPU: > > L2 write LAPIC.ICR(INIT). > > L0 trap LAPIC.ICR write(INIT): inject a latched INIT event to target > > vCPU. > > Target vCPU: > > L0 emulate an INIT VMExit to L1 if is guest mode. > > L1 set guest VMCS, guest_activity_state=WAIT_SIPI, vmresume. > > L0 set vcpu.mp_state to INIT_RECEIVED if (vmcs12.guest_activity_state > > == WAIT_SIPI). > > > > Source vCPU: > > L2 write LAPIC.ICR(SIPI). > > L0 trap LAPIC.ICR write(INIT): inject a latched SIPI event to traget > > vCPU. > > Target vCPU: > > L0 emulate an SIPI VMExit to L1 if (vcpu.mp_state == INIT_RECEIVED). > > L1 set CS:IP, guest_activity_state=ACTIVE, vmresume. > > L0 resume to L2. > > L2 start-up. > > Again, this looks good but it needs testcases. > Hi, Paolo I saw you queued the testcase patch: https://patchwork.kernel.org/project/kvm/patch/20201013052845.249113-1-yadong.qi@intel.com/ Will you also queue this patch? Or there are some additional comments of this patch? Best Regard Yadong
On 22/09/20 07:23, yadong.qi@intel.com wrote: > From: Yadong Qi <yadong.qi@intel.com> > > Background: We have a lightweight HV, it needs INIT-VMExit and > SIPI-VMExit to wake-up APs for guests since it do not monitor > the Local APIC. But currently virtual wait-for-SIPI(WFS) state > is not supported in nVMX, so when running on top of KVM, the L1 > HV cannot receive the INIT-VMExit and SIPI-VMExit which cause > the L2 guest cannot wake up the APs. > > According to Intel SDM Chapter 25.2 Other Causes of VM Exits, > SIPIs cause VM exits when a logical processor is in > wait-for-SIPI state. > > In this patch: > 1. introduce SIPI exit reason, > 2. introduce wait-for-SIPI state for nVMX, > 3. advertise wait-for-SIPI support to guest. > > When L1 hypervisor is not monitoring Local APIC, L0 need to emulate > INIT-VMExit and SIPI-VMExit to L1 to emulate INIT-SIPI-SIPI for > L2. L2 LAPIC write would be traped by L0 Hypervisor(KVM), L0 should > emulate the INIT/SIPI vmexit to L1 hypervisor to set proper state > for L2's vcpu state. There is a problem in this patch, in that this change is incorrect: > > @@ -2847,7 +2847,8 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) > */ > if (kvm_vcpu_latch_init(vcpu)) { > WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); > - if (test_bit(KVM_APIC_SIPI, &apic->pending_events)) > + if (test_bit(KVM_APIC_SIPI, &apic->pending_events) && > + !is_guest_mode(vcpu)) > clear_bit(KVM_APIC_SIPI, &apic->pending_events); > return; > } Here you're not trying to process a latched INIT; you just want to delay the processing of the SIPI until check_nested_events. The change does have a correct part in it. In particular, vmx_apic_init_signal_blocked should have been diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 47b8357b9751..64339121a4f0 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7558,7 +7558,7 @@ static void enable_smi_window(struct kvm_vcpu *vcpu) static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) { - return to_vmx(vcpu)->nested.vmxon; + return to_vmx(vcpu)->nested.vmxon && !is_guest_mode(vcpu); } static void vmx_migrate_timers(struct kvm_vcpu *vcpu) to only latch INIT signals in root mode. However, SIPI must be cleared unconditionally on SVM; the "!is_guest_mode" test in that case is incorrect. The right way to do it is to call check_nested_events from kvm_apic_accept_events. This will cause an INIT or SIPI vmexit, as required. There is some extra complication to read pending_events *before* kvm_apic_accept_events and not steal from the guest any INIT or SIPI that is sent after kvm_apic_accept_events returns. Thanks to your test case, I will test a patch and send it. Paolo
> There is a problem in this patch, in that this change is incorrect: > > > > > @@ -2847,7 +2847,8 @@ void kvm_apic_accept_events(struct kvm_vcpu > *vcpu) > > */ > > if (kvm_vcpu_latch_init(vcpu)) { > > WARN_ON_ONCE(vcpu->arch.mp_state == > KVM_MP_STATE_INIT_RECEIVED); > > - if (test_bit(KVM_APIC_SIPI, &apic->pending_events)) > > + if (test_bit(KVM_APIC_SIPI, &apic->pending_events) && > > + !is_guest_mode(vcpu)) > > clear_bit(KVM_APIC_SIPI, &apic->pending_events); > > return; > > } > > Here you're not trying to process a latched INIT; you just want to delay the > processing of the SIPI until check_nested_events. > > The change does have a correct part in it. In particular, > vmx_apic_init_signal_blocked should have been > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index > 47b8357b9751..64339121a4f0 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -7558,7 +7558,7 @@ static void enable_smi_window(struct kvm_vcpu > *vcpu) > > static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) > { > - return to_vmx(vcpu)->nested.vmxon; > + return to_vmx(vcpu)->nested.vmxon && !is_guest_mode(vcpu); > } > > static void vmx_migrate_timers(struct kvm_vcpu *vcpu) > > to only latch INIT signals in root mode. However, SIPI must be cleared > unconditionally on SVM; the "!is_guest_mode" test in that case is incorrect. > > The right way to do it is to call check_nested_events from > kvm_apic_accept_events. This will cause an INIT or SIPI vmexit, as required. > There is some extra complication to read pending_events > *before* kvm_apic_accept_events and not steal from the guest any INIT or SIPI > that is sent after kvm_apic_accept_events returns. > > Thanks to your test case, I will test a patch and send it. > Thanks very much, Paolo. BTW, I noticed another issue in sync_vmcs02_to_vmcs12(): vmcs12->guest_activity_state is not set properly when mp_state is INIT_RECEIVED. I will correct it and send v2 of Patch 2/2.
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index cd7de4b401fe..bff06dc64c52 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -113,6 +113,7 @@ #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f #define VMX_MISC_SAVE_EFER_LMA 0x00000020 #define VMX_MISC_ACTIVITY_HLT 0x00000040 +#define VMX_MISC_ACTIVITY_WAIT_SIPI 0x00000100 #define VMX_MISC_ZERO_LEN_INS 0x40000000 #define VMX_MISC_MSR_LIST_MULTIPLIER 512 diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index b8ff9e8ac0d5..ada955c5ebb6 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -32,6 +32,7 @@ #define EXIT_REASON_EXTERNAL_INTERRUPT 1 #define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_INIT_SIGNAL 3 +#define EXIT_REASON_SIPI_SIGNAL 4 #define EXIT_REASON_INTERRUPT_WINDOW 7 #define EXIT_REASON_NMI_WINDOW 8 @@ -94,6 +95,7 @@ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \ + { EXIT_REASON_SIPI_SIGNAL, "SIPI_SIGNAL" }, \ { EXIT_REASON_INTERRUPT_WINDOW, "INTERRUPT_WINDOW" }, \ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5ccbee7165a2..d04ac7dc6adf 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2839,7 +2839,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) /* * INITs are latched while CPU is in specific states - * (SMM, VMX non-root mode, SVM with GIF=0). + * (SMM, SVM with GIF=0). * Because a CPU cannot be in these states immediately * after it has processed an INIT signal (and thus in * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs @@ -2847,7 +2847,8 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) */ if (kvm_vcpu_latch_init(vcpu)) { WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); - if (test_bit(KVM_APIC_SIPI, &apic->pending_events)) + if (test_bit(KVM_APIC_SIPI, &apic->pending_events) && + !is_guest_mode(vcpu)) clear_bit(KVM_APIC_SIPI, &apic->pending_events); return; } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1bb6b31eb646..fe3bb68df987 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2946,7 +2946,8 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) { if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && - vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)) + vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT && + vmcs12->guest_activity_state != GUEST_ACTIVITY_WAIT_SIPI)) return -EINVAL; return 0; @@ -3543,19 +3544,29 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) */ nested_cache_shadow_vmcs12(vcpu, vmcs12); - /* - * If we're entering a halted L2 vcpu and the L2 vcpu won't be - * awakened by event injection or by an NMI-window VM-exit or - * by an interrupt-window VM-exit, halt the vcpu. - */ - if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && - !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) && - !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_NMI_WINDOW_EXITING) && - !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_INTR_WINDOW_EXITING) && - (vmcs12->guest_rflags & X86_EFLAGS_IF))) { + switch (vmcs12->guest_activity_state) { + case GUEST_ACTIVITY_HLT: + /* + * If we're entering a halted L2 vcpu and the L2 vcpu won't be + * awakened by event injection or by an NMI-window VM-exit or + * by an interrupt-window VM-exit, halt the vcpu. + */ + if (!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) && + !nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING) && + !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) && + (vmcs12->guest_rflags & X86_EFLAGS_IF))) { + vmx->nested.nested_run_pending = 0; + return kvm_vcpu_halt(vcpu); + } + break; + case GUEST_ACTIVITY_WAIT_SIPI: vmx->nested.nested_run_pending = 0; - return kvm_vcpu_halt(vcpu); + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + break; + default: + break; } + return 1; vmentry_failed: @@ -3781,7 +3792,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) return -EBUSY; nested_vmx_update_pending_dbg(vcpu); clear_bit(KVM_APIC_INIT, &apic->pending_events); - nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); + if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED) + nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); + return 0; + } + + if (lapic_in_kernel(vcpu) && + test_bit(KVM_APIC_SIPI, &apic->pending_events)) { + if (block_nested_events) + return -EBUSY; + + clear_bit(KVM_APIC_SIPI, &apic->pending_events); + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) + nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0, + apic->sipi_vector & 0xFFUL); return 0; } @@ -6471,7 +6495,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) msrs->misc_low |= MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | - VMX_MISC_ACTIVITY_HLT; + VMX_MISC_ACTIVITY_HLT | + VMX_MISC_ACTIVITY_WAIT_SIPI; msrs->misc_high = 0; /*