@@ -4784,6 +4784,82 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
}
+int nested_vmx_leave_smm(struct kvm_vcpu *vcpu)
+{
+ enum vm_entry_failure_code entry_failure_code;
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ kvm_service_local_tlb_flush_requests(vcpu);
+
+ vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
+
+ prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12);
+
+ if (prepare_vmcs02(vcpu, vmcs12, false, &entry_failure_code)) {
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
+ nested_vmx_restore_host_state(vcpu);
+ return -EINVAL;
+ }
+
+ enter_guest_mode(vcpu);
+
+ kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+
+ vmx->nested.preemption_timer_expired = false;
+ if (nested_cpu_has_preemption_timer(vmcs12)) {
+ u64 timer_value = vmx_calc_preemption_timer_value(vcpu);
+
+ vmx_start_preemption_timer(vcpu, timer_value);
+ }
+
+ return 0;
+}
+
+void nested_vmx_enter_smm(struct kvm_vcpu *vcpu)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ enum vm_entry_failure_code ignored;
+
+#ifdef CONFIG_KVM_HYPERV
+ if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu))
+ nested_get_evmcs_page(vcpu);
+#endif
+
+ if (nested_cpu_has_preemption_timer(vmcs12))
+ hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
+
+ kvm_service_local_tlb_flush_requests(vcpu);
+
+ leave_guest_mode(vcpu);
+
+ sync_vmcs02_to_vmcs12(vcpu, vmcs12);
+ nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
+
+ vmx->nested.mtf_pending = false;
+ vcpu->arch.nmi_injected = false;
+ kvm_clear_exception_queue(vcpu);
+ kvm_clear_interrupt_queue(vcpu);
+
+ kvm_vcpu_unmap(vcpu, &vmx->nested.apic_access_page_map, false);
+ kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
+ kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
+ vmx->nested.pi_desc = NULL;
+
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
+
+ if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+ indirect_branch_prediction_barrier();
+
+ nested_ept_uninit_mmu_context(vcpu);
+
+ if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, true, &ignored))
+ nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
+
+ nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
+}
+
/*
* Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
* and modify vmcs12 to make it see what it would expect to see there if
@@ -23,11 +23,13 @@ void nested_vmx_hardware_unsetup(void);
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
void nested_vmx_set_vmcs_shadowing_bitmap(void);
void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
+int nested_vmx_leave_smm(struct kvm_vcpu *vcpu);
enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
bool from_vmentry);
bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu);
void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
u32 exit_intr_info, unsigned long exit_qualification);
+void nested_vmx_enter_smm(struct kvm_vcpu *vcpu);
void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu);
int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
@@ -8231,16 +8231,9 @@ int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- /*
- * TODO: Implement custom flows for forcing the vCPU out/in of L2 on
- * SMI and RSM. Using the common VM-Exit + VM-Enter routines is wrong
- * SMI and RSM only modify state that is saved and restored via SMRAM.
- * E.g. most MSRs are left untouched, but many are modified by VM-Exit
- * and VM-Enter, and thus L2's values may be corrupted on SMI+RSM.
- */
vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
if (vmx->nested.smm.guest_mode)
- nested_vmx_vmexit(vcpu, -1, 0, 0);
+ nested_vmx_enter_smm(vcpu);
vmx->nested.smm.vmxon = vmx->nested.vmxon;
vmx->nested.vmxon = false;
@@ -8259,12 +8252,14 @@ int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
}
if (vmx->nested.smm.guest_mode) {
- ret = nested_vmx_enter_non_root_mode(vcpu, false);
- if (ret)
- return ret;
-
vmx->nested.nested_run_pending = 1;
vmx->nested.smm.guest_mode = false;
+
+ ret = nested_vmx_leave_smm(vcpu);
+ if (ret) {
+ vmx->nested.nested_run_pending = 0;
+ return ret;
+ }
}
return 0;
}
This change aims to resolve a TODO documented in commit 5d76b1f8c793 ("KVM: nVMX: Rename nested.vmcs01_* fields to nested.pre_vmenter_*"): /* * TODO: Implement custom flows for forcing the vCPU out/in of L2 on * SMI and RSM. Using the common VM-Exit + VM-Enter routines is wrong * SMI and RSM only modify state that is saved and restored via SMRAM. * E.g. most MSRs are left untouched, but many are modified by VM-Exit * and VM-Enter, and thus L2's values may be corrupted on SMI+RSM. */ When a SMI is injected while running a L2 guest, the VMX enter/leave_smm() callbacks currently invoke the standard L2 VM-Exit/Entry routines. This has the side-effect of touching more state than necessary to emulate SMM. This change thus introduces new functions nested_vmx_enter_smm() and nested_vmx_leave_smm() to be invoked by the VMX enter/leave_smm() callbacks respectively during SMI and RSM. These functions cover the sufficient steps to transition a vCPU out of L2 after SMI and back into L2 during RSM. nested_vmx_enter_smm() updates and flushes the vmcs12 cache, leaves guest mode, deinitializes the nested mmu context, switches to vmcs01 and loads the host cr3 to facilitate execution of the SMM handler. nested_vmx_leave_smm() switches to vmcs02, re-enters guest mode and uses prepare_vmcs02() to re-establish state required for continuing L2 execution. Suggested-by: Sean Christopherson <seanjc@google.com> Fixes: 72e9cbdb4338 ("KVM: nVMX: fix SMI injection in guest mode") Signed-off-by: Kishen Maloor <kishen.maloor@intel.com> --- The kvm unit tests and self tests did not flag any errors on my setup. I would appreciate any comments. --- arch/x86/kvm/vmx/nested.c | 76 +++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/vmx/nested.h | 2 ++ arch/x86/kvm/vmx/vmx.c | 19 ++++------ 3 files changed, 85 insertions(+), 12 deletions(-)