[05/22] KVM: x86: Clean up and document nested #PF workaround

Message ID	20220414074000.31438-6-pbonzini@redhat.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> From: Paolo Bonzini <pbonzini@redhat.com> To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org Cc: seanjc@google.com Subject: [PATCH 05/22] KVM: x86: Clean up and document nested #PF workaround Date: Thu, 14 Apr 2022 03:39:43 -0400 Message-Id: <20220414074000.31438-6-pbonzini@redhat.com> In-Reply-To: <20220414074000.31438-1-pbonzini@redhat.com> References: <20220414074000.31438-1-pbonzini@redhat.com> MIME-Version: 1.0 Content-type: text/plain Content-Transfer-Encoding: 8bit Precedence: bulk
Series	https://www.spinics.net/lists/kvm/msg267878.html \| expand [v3,00/22] https://www.spinics.net/lists/kvm/msg267878.html [01/22] KVM: x86/mmu: nested EPT cannot be used in SMM [02/22] KVM: x86/mmu: constify uses of struct kvm_mmu_role_regs [03/22] KVM: x86/mmu: pull computation of kvm_mmu_role_regs to kvm_init_mmu [04/22] KVM: x86/mmu: rephrase unclear comment [05/22] KVM: x86: Clean up and document nested #PF workaround [06/22] KVM: x86/mmu: remove "bool base_only" arguments [07/22] KVM: x86/mmu: split cpu_role from mmu_role [08/22] KVM: x86/mmu: do not recompute root level from kvm_mmu_role_regs [09/22] KVM: x86/mmu: remove ept_ad field [10/22] KVM: x86/mmu: remove kvm_calc_shadow_root_page_role_common [11/22] KVM: x86/mmu: cleanup computation of MMU roles for two-dimensional paging [12/22] KVM: x86/mmu: cleanup computation of MMU roles for shadow paging [13/22] KVM: x86/mmu: store shadow EFER.NX in the MMU role [14/22] KVM: x86/mmu: remove extended bits from mmu_role, rename field [15/22] KVM: x86/mmu: rename kvm_mmu_role union [16/22] KVM: x86/mmu: remove redundant bits from extended role [17/22] KVM: x86/mmu: remove valid from extended role [18/22] KVM: x86/mmu: simplify and/or inline computation of shadow MMU roles [19/22] KVM: x86/mmu: pull CPU mode computation to kvm_init_mmu [20/22] KVM: x86/mmu: replace shadow_root_level with root_role.level [21/22] KVM: x86/mmu: replace root_level with cpu_role.base.level [22/22] KVM: x86/mmu: replace direct_map with root_role.direct

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e1c695f72b8b..e46bd289e5df 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1516,6 +1516,8 @@ struct kvm_x86_ops { struct kvm_x86_nested_ops { void (*leave_nested)(struct kvm_vcpu *vcpu); int (*check_events)(struct kvm_vcpu *vcpu); + bool (*handle_page_fault_workaround)(struct kvm_vcpu *vcpu, + struct x86_exception *fault); bool (*hv_timer_pending)(struct kvm_vcpu *vcpu); void (*triple_fault)(struct kvm_vcpu *vcpu); int (*get_state)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index caa691229b71..bed5e1692cef 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -55,24 +55,26 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, nested_svm_vmexit(svm); } -static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_exception *fault) +static bool nested_svm_handle_page_fault_workaround(struct kvm_vcpu *vcpu, + struct x86_exception *fault) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - WARN_ON(!is_guest_mode(vcpu)); + WARN_ON(!is_guest_mode(vcpu)); if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) && - !svm->nested.nested_run_pending) { - vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR; + !WARN_ON_ONCE(svm->nested.nested_run_pending)) { + vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR; vmcb->control.exit_code_hi = 0; vmcb->control.exit_info_1 = fault->error_code; vmcb->control.exit_info_2 = fault->address; nested_svm_vmexit(svm); - } else { - kvm_inject_page_fault(vcpu, fault); + return true; } + + return false; } static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) @@ -751,9 +753,6 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, if (ret) return ret; - if (!npt_enabled) - vcpu->arch.mmu->inject_page_fault = svm_inject_page_fault_nested; - if (!from_vmrun) kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); @@ -1659,6 +1658,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) struct kvm_x86_nested_ops svm_nested_ops = { .leave_nested = svm_leave_nested, .check_events = svm_check_nested_events, + .handle_page_fault_workaround = nested_svm_handle_page_fault_workaround, .triple_fault = nested_svm_triple_fault, .get_nested_state_pages = svm_get_nested_state_pages, .get_state = svm_get_nested_state, diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 838ac7ab5950..a6688663da4d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -476,24 +476,23 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit return 0; } - -static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, - struct x86_exception *fault) +static bool nested_vmx_handle_page_fault_workaround(struct kvm_vcpu *vcpu, + struct x86_exception *fault) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); WARN_ON(!is_guest_mode(vcpu)); if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) && - !to_vmx(vcpu)->nested.nested_run_pending) { + !WARN_ON_ONCE(to_vmx(vcpu)->nested.nested_run_pending)) { vmcs12->vm_exit_intr_error_code = fault->error_code; nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, fault->address); - } else { - kvm_inject_page_fault(vcpu, fault); + return true; } + return false; } static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu, @@ -2614,9 +2613,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); } - if (!enable_ept) - vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; - if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, vmcs12->guest_ia32_perf_global_ctrl))) { @@ -6830,6 +6826,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) struct kvm_x86_nested_ops vmx_nested_ops = { .leave_nested = vmx_leave_nested, .check_events = vmx_check_nested_events, + .handle_page_fault_workaround = nested_vmx_handle_page_fault_workaround, .hv_timer_pending = nested_vmx_preemption_timer_pending, .triple_fault = nested_vmx_triple_fault, .get_state = vmx_get_nested_state, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4e7f3a8da16a..9866853ca320 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -748,6 +748,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) } EXPORT_SYMBOL_GPL(kvm_inject_page_fault); +/* Returns true if the page fault was immediately morphed into a VM-Exit. */ bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) { @@ -766,8 +767,26 @@ bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address, fault_mmu->root.hpa); + /* + * A workaround for KVM's bad exception handling. If KVM injected an + * exception into L2, and L2 encountered a #PF while vectoring the + * injected exception, manually check to see if L1 wants to intercept + * #PF, otherwise queuing the #PF will lead to #DF or a lost exception. + * In all other cases, defer the check to nested_ops->check_events(), + * which will correctly handle priority (this does not). Note, other + * exceptions, e.g. #GP, are theoretically affected, #PF is simply the + * most problematic, e.g. when L0 and L1 are both intercepting #PF for + * shadow paging. + * + * TODO: Rewrite exception handling to track injected and pending + * (VM-Exit) exceptions separately. + */ + if (unlikely(vcpu->arch.exception.injected && is_guest_mode(vcpu)) && + kvm_x86_ops.nested_ops->handle_page_fault_workaround(vcpu, fault)) + return true; + fault_mmu->inject_page_fault(vcpu, fault); - return fault->nested_page_fault; + return false; } EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);

[05/22] KVM: x86: Clean up and document nested #PF workaround

Commit Message

Patch