Message ID | 20240412173532.3481264-8-pbonzini@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: MMU changes for confidential computing | expand |
> >- if (cpu_has_secondary_exec_ctrls()) >+ if (cpu_has_secondary_exec_ctrls()) { > secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx)); >+ if (secondary_exec_controls_get(vmx) & >+ SECONDARY_EXEC_EPT_VIOLATION_VE) { >+ if (!vmx->ve_info) { how about allocating ve_info in vmx_vcpu_create()? It is better to me because: a. symmetry. ve_info is free'd in vmx_vcpu_free(). b. no need to check if this is the first call of init_vmcs(). and ENOMEM can be returned on allocation failure. >+ /* ve_info must be page aligned. */ >+ struct page *page; >+ >+ BUILD_BUG_ON(sizeof(*vmx->ve_info) > PAGE_SIZE); >+ page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); >+ if (page) >+ vmx->ve_info = page_to_virt(page); >+ } >+ if (vmx->ve_info) { >+ /* >+ * Allow #VE delivery. CPU sets this field to >+ * 0xFFFFFFFF on #VE delivery. Another #VE can >+ * occur only if software clears the field. >+ */ >+ vmx->ve_info->delivery = 0; Is it necessary to reset ve_info->delivery to 0 given __GFP_ZERO? >+ vmcs_write64(VE_INFORMATION_ADDRESS, >+ __pa(vmx->ve_info)); I think the logic here should just be: if (secondary_exec_controls_get(vmx) & SECONDARY_EXEC_EPT_VIOLATION_VE) vmcs_write64(VE_INFORMATION_ADDRESS, __pa(vmx->ve_info)); >+ } else { >+ /* >+ * Because SECONDARY_EXEC_EPT_VIOLATION_VE is >+ * used only for debugging, it's okay to leave >+ * it disabled. >+ */ >+ pr_err("Failed to allocate ve_info. disabling EPT_VIOLATION_VE.\n"); >+ secondary_exec_controls_clearbit(vmx, >+ SECONDARY_EXEC_EPT_VIOLATION_VE); >+ } >+ } >+ } > > if (cpu_has_tertiary_exec_ctrls()) > tertiary_exec_controls_set(vmx, vmx_tertiary_exec_control(vmx)); >@@ -5200,6 +5243,12 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) > if (is_invalid_opcode(intr_info)) > return handle_ud(vcpu); > >+ /* >+ * #VE isn't supposed to happen. Block the VM if it does. >+ */ >+ if (KVM_BUG_ON(is_ve_fault(intr_info), vcpu->kvm)) >+ return -EIO; >+ > error_code = 0; > if (intr_info & INTR_INFO_DELIVER_CODE_MASK) > error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); >@@ -7474,6 +7523,8 @@ void vmx_vcpu_free(struct kvm_vcpu *vcpu) > free_vpid(vmx->vpid); > nested_vmx_free_vcpu(vcpu); > free_loaded_vmcs(vmx->loaded_vmcs); >+ if (vmx->ve_info) >+ free_page((unsigned long)vmx->ve_info); > } > > int vmx_vcpu_create(struct kvm_vcpu *vcpu) >diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h >index 65786dbe7d60..0da79a386825 100644 >--- a/arch/x86/kvm/vmx/vmx.h >+++ b/arch/x86/kvm/vmx/vmx.h >@@ -362,6 +362,9 @@ struct vcpu_vmx { > DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS); > DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS); > } shadow_msr_intercept; >+ >+ /* ve_info must be page aligned. */ this comment is not so useful. I think this should be placed above the call of alloc_page(). >+ struct vmx_ve_information *ve_info; > }; > > struct kvm_vmx { >@@ -574,7 +577,8 @@ static inline u8 vmx_get_rvi(void) > SECONDARY_EXEC_ENABLE_VMFUNC | \ > SECONDARY_EXEC_BUS_LOCK_DETECTION | \ > SECONDARY_EXEC_NOTIFY_VM_EXITING | \ >- SECONDARY_EXEC_ENCLS_EXITING) >+ SECONDARY_EXEC_ENCLS_EXITING | \ >+ SECONDARY_EXEC_EPT_VIOLATION_VE) > > #define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0 > #define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL \ >-- >2.43.0 > > >
On Mon, Apr 15, 2024 at 3:22 PM Chao Gao <chao.gao@intel.com> wrote: > > > > >- if (cpu_has_secondary_exec_ctrls()) > >+ if (cpu_has_secondary_exec_ctrls()) { > > secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx)); > >+ if (secondary_exec_controls_get(vmx) & > >+ SECONDARY_EXEC_EPT_VIOLATION_VE) { > >+ if (!vmx->ve_info) { > > how about allocating ve_info in vmx_vcpu_create()? It is better to me because: > > a. symmetry. ve_info is free'd in vmx_vcpu_free(). > b. no need to check if this is the first call of init_vmcs(). and ENOMEM can > be returned on allocation failure. There is no need to return ENOMEM however, it is okay to disable the test. However I agree that doing it in vmx_vcpu_create(), conditional on vmcs_config.cpu_based_2nd_exec_ctrl, is a bit cleaner. Paolo
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 3aaf7e86a859..7632fe6e4db9 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -95,6 +95,19 @@ config KVM_INTEL To compile this as a module, choose M here: the module will be called kvm-intel. +config KVM_INTEL_PROVE_VE + bool "Check that guests do not receive #VE exceptions" + default KVM_PROVE_MMU || DEBUG_KERNEL + depends on KVM_INTEL + help + + Checks that KVM's page table management code will not incorrectly + let guests receive a virtualization exception. Virtualization + exceptions will be trapped by the hypervisor rather than injected + in the guest. + + If unsure, say N. + config X86_SGX_KVM bool "Software Guard eXtensions (SGX) Virtualization" depends on X86_SGX && KVM_INTEL diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 7c1996b433e2..b25625314658 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -140,6 +140,11 @@ static inline bool is_nm_fault(u32 intr_info) return is_exception_n(intr_info, NM_VECTOR); } +static inline bool is_ve_fault(u32 intr_info) +{ + return is_exception_n(intr_info, VE_VECTOR); +} + /* Undocumented: icebp/int1 */ static inline bool is_icebp(u32 intr_info) { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 2c746318c6c3..1a5ad18a1fee 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -869,6 +869,12 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu) eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); + /* + * #VE isn't used for VMX. To test against unexpected changes + * related to #VE for VMX, intercept unexpected #VE and warn on it. + */ + if (IS_ENABLED(CONFIG_KVM_INTEL_PROVE_VE)) + eb |= 1u << VE_VECTOR; /* * Guest access to VMware backdoor ports could legitimately * trigger #GP because of TSS I/O permission bitmap. @@ -2602,6 +2608,9 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, &_cpu_based_2nd_exec_control)) return -EIO; } + if (!IS_ENABLED(CONFIG_KVM_INTEL_PROVE_VE)) + _cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_EPT_VIOLATION_VE; + #ifndef CONFIG_X86_64 if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) @@ -2626,6 +2635,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, return -EIO; vmx_cap->ept = 0; + _cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_EPT_VIOLATION_VE; } if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) && vmx_cap->vpid) { @@ -4588,6 +4598,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; if (!enable_ept) { exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; + exec_control &= ~SECONDARY_EXEC_EPT_VIOLATION_VE; enable_unrestricted_guest = 0; } if (!enable_unrestricted_guest) @@ -4711,8 +4722,40 @@ static void init_vmcs(struct vcpu_vmx *vmx) exec_controls_set(vmx, vmx_exec_control(vmx)); - if (cpu_has_secondary_exec_ctrls()) + if (cpu_has_secondary_exec_ctrls()) { secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx)); + if (secondary_exec_controls_get(vmx) & + SECONDARY_EXEC_EPT_VIOLATION_VE) { + if (!vmx->ve_info) { + /* ve_info must be page aligned. */ + struct page *page; + + BUILD_BUG_ON(sizeof(*vmx->ve_info) > PAGE_SIZE); + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (page) + vmx->ve_info = page_to_virt(page); + } + if (vmx->ve_info) { + /* + * Allow #VE delivery. CPU sets this field to + * 0xFFFFFFFF on #VE delivery. Another #VE can + * occur only if software clears the field. + */ + vmx->ve_info->delivery = 0; + vmcs_write64(VE_INFORMATION_ADDRESS, + __pa(vmx->ve_info)); + } else { + /* + * Because SECONDARY_EXEC_EPT_VIOLATION_VE is + * used only for debugging, it's okay to leave + * it disabled. + */ + pr_err("Failed to allocate ve_info. disabling EPT_VIOLATION_VE.\n"); + secondary_exec_controls_clearbit(vmx, + SECONDARY_EXEC_EPT_VIOLATION_VE); + } + } + } if (cpu_has_tertiary_exec_ctrls()) tertiary_exec_controls_set(vmx, vmx_tertiary_exec_control(vmx)); @@ -5200,6 +5243,12 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) if (is_invalid_opcode(intr_info)) return handle_ud(vcpu); + /* + * #VE isn't supposed to happen. Block the VM if it does. + */ + if (KVM_BUG_ON(is_ve_fault(intr_info), vcpu->kvm)) + return -EIO; + error_code = 0; if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); @@ -7474,6 +7523,8 @@ void vmx_vcpu_free(struct kvm_vcpu *vcpu) free_vpid(vmx->vpid); nested_vmx_free_vcpu(vcpu); free_loaded_vmcs(vmx->loaded_vmcs); + if (vmx->ve_info) + free_page((unsigned long)vmx->ve_info); } int vmx_vcpu_create(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 65786dbe7d60..0da79a386825 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -362,6 +362,9 @@ struct vcpu_vmx { DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS); DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS); } shadow_msr_intercept; + + /* ve_info must be page aligned. */ + struct vmx_ve_information *ve_info; }; struct kvm_vmx { @@ -574,7 +577,8 @@ static inline u8 vmx_get_rvi(void) SECONDARY_EXEC_ENABLE_VMFUNC | \ SECONDARY_EXEC_BUS_LOCK_DETECTION | \ SECONDARY_EXEC_NOTIFY_VM_EXITING | \ - SECONDARY_EXEC_ENCLS_EXITING) + SECONDARY_EXEC_ENCLS_EXITING | \ + SECONDARY_EXEC_EPT_VIOLATION_VE) #define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0 #define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL \