[RFC,v5,22/26] KVM: VMX: Add SGX ENCLS[ECREATE] handler to enforce CPUID restrictions

Message ID	22169b7784d1dccb9bb2bfbd855b95e346647e35.1613221549.git.kai.huang@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-sgx-owner@kernel.org> IronPort-SDR: rjimh1rAM12z/f6iXjAXtFyhwgpm7apWUHIDK7WzUAqe7snkUizPhJ23JR2NUMfvUQBoGAS8LG 24AJ1JynBGAQ== IronPort-SDR: B8fzM6yjFxq2XjY/Zhu/oUOIkRiOJ/aYOwbog9a/vesCRsHEhbhT9fbjwpE+oTGfUqCuBppxsj 7nND/l/niT5w== From: Kai Huang <kai.huang@intel.com> To: linux-sgx@vger.kernel.org, kvm@vger.kernel.org, x86@kernel.org Cc: seanjc@google.com, jarkko@kernel.org, luto@kernel.org, dave.hansen@intel.com, rick.p.edgecombe@intel.com, haitao.huang@intel.com, pbonzini@redhat.com, bp@alien8.de, tglx@linutronix.de, mingo@redhat.com, hpa@zytor.com, jmattson@google.com, joro@8bytes.org, vkuznets@redhat.com, wanpengli@tencent.com, Kai Huang <kai.huang@intel.com> Subject: [RFC PATCH v5 22/26] KVM: VMX: Add SGX ENCLS[ECREATE] handler to enforce CPUID restrictions Date: Sun, 14 Feb 2021 02:29:50 +1300 Message-Id: <22169b7784d1dccb9bb2bfbd855b95e346647e35.1613221549.git.kai.huang@intel.com> In-Reply-To: <cover.1613221549.git.kai.huang@intel.com> References: <cover.1613221549.git.kai.huang@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	KVM SGX virtualization support \| expand [RFC,v5,00/26] KVM SGX virtualization support [RFC,v5,01/26] x86/cpufeatures: Make SGX_LC feature bit depend on SGX bit [RFC,v5,02/26] x86/cpufeatures: Add SGX1 and SGX2 sub-features [RFC,v5,03/26] x86/sgx: Wipe out EREMOVE from sgx_free_epc_page() [RFC,v5,04/26] x86/sgx: Add SGX_CHILD_PRESENT hardware error code [RFC,v5,05/26] x86/sgx: Introduce virtual EPC for use by KVM guests [RFC,v5,06/26] x86/cpu/intel: Allow SGX virtualization without Launch Control support [RFC,v5,07/26] x86/sgx: Initialize virtual EPC driver even when SGX driver is disabled [RFC,v5,08/26] x86/sgx: Expose SGX architectural definitions to the kernel [RFC,v5,09/26] x86/sgx: Move ENCLS leaf definitions to sgx_arch.h [RFC,v5,10/26] x86/sgx: Add SGX2 ENCLS leaf definitions (EAUG, EMODPR and EMODT) [RFC,v5,11/26] x86/sgx: Add encls_faulted() helper [RFC,v5,12/26] x86/sgx: Add helper to update SGX_LEPUBKEYHASHn MSRs [RFC,v5,13/26] x86/sgx: Add helpers to expose ECREATE and EINIT to KVM [RFC,v5,14/26] x86/sgx: Move provisioning device creation out of SGX driver [RFC,v5,15/26] KVM: VMX: Convert vcpu_vmx.exit_reason to a union [RFC,v5,16/26] KVM: x86: Export kvm_mmu_gva_to_gpa_{read,write}() for SGX (VMX) [RFC,v5,17/26] KVM: x86: Define new #PF SGX error code bit [RFC,v5,18/26] KVM: x86: Add support for reverse CPUID lookup of scattered features [RFC,v5,19/26] KVM: x86: Add reverse-CPUID lookup support for scattered SGX features [RFC,v5,20/26] KVM: VMX: Add basic handling of VM-Exit from SGX enclave [RFC,v5,21/26] KVM: VMX: Frame in ENCLS handler for SGX virtualization [RFC,v5,22/26] KVM: VMX: Add SGX ENCLS[ECREATE] handler to enforce CPUID restrictions [RFC,v5,23/26] KVM: VMX: Add emulation of SGX Launch Control LE hash MSRs [RFC,v5,24/26] KVM: VMX: Add ENCLS[EINIT] handler to support SGX Launch Control (LC) [RFC,v5,25/26] KVM: VMX: Enable SGX virtualization for SGX1, SGX2 and LC [RFC,v5,26/26] KVM: x86: Add capability to grant VM access to privileged SGX attribute

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9581f81e62a4..cd71f30fbdd1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1000,6 +1000,9 @@ struct kvm_arch { struct msr_bitmap_range ranges[16]; } msr_filter; + /* Guest can access the SGX PROVISIONKEY. */ + bool sgx_provisioning_allowed; + struct kvm_pmu_event_filter *pmu_event_filter; struct task_struct *nx_lpage_recovery_thread; diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index 6b3d614d288c..7bdb125325ef 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -12,6 +12,251 @@ bool __read_mostly enable_sgx; +/* + * ENCLS's memory operands use a fixed segment (DS) and a fixed + * address size based on the mode. Related prefixes are ignored. + */ +static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset, + int size, int alignment, gva_t *gva) +{ + struct kvm_segment s; + bool fault; + + /* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */ + *gva = offset; + if (!is_long_mode(vcpu)) { + vmx_get_segment(vcpu, &s, VCPU_SREG_DS); + *gva += s.base; + } + + if (!IS_ALIGNED(*gva, alignment)) { + fault = true; + } else if (likely(is_long_mode(vcpu))) { + fault = is_noncanonical_address(*gva, vcpu); + } else { + *gva &= 0xffffffff; + fault = (s.unusable) || + (s.type != 2 && s.type != 3) || + (*gva > s.limit) || + ((s.base != 0 || s.limit != 0xffffffff) && + (((u64)*gva + size - 1) > s.limit + 1)); + } + if (fault) + kvm_inject_gp(vcpu, 0); + return fault ? -EINVAL : 0; +} + +static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr, + unsigned int size) +{ + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 2; + vcpu->run->internal.data[0] = addr; + vcpu->run->internal.data[1] = size; +} + +static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data, + unsigned int size) +{ + if (__copy_from_user(data, (void __user *)hva, size)) { + sgx_handle_emulation_failure(vcpu, hva, size); + return -EFAULT; + } + + return 0; +} + +static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write, + gpa_t *gpa) +{ + struct x86_exception ex; + + if (write) + *gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex); + else + *gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex); + + if (*gpa == UNMAPPED_GVA) { + kvm_inject_emulated_page_fault(vcpu, &ex); + return -EFAULT; + } + + return 0; +} + +static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva) +{ + *hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa)); + if (kvm_is_error_hva(*hva)) { + sgx_handle_emulation_failure(vcpu, gpa, 1); + return -EFAULT; + } + + *hva |= gpa & ~PAGE_MASK; + + return 0; +} + +static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr) +{ + struct x86_exception ex; + + /* + * A non-EPCM #PF indicates a bad userspace HVA. This *should* check + * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC, + * but the error code isn't (yet) plumbed through the ENCLS helpers. + */ + if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; + } + + /* + * If the guest thinks it's running on SGX2 hardware, inject an SGX + * #PF if the fault matches an EPCM fault signature (#GP on SGX1, + * #PF on SGX2). The assumption is that EPCM faults are much more + * likely than a bad userspace address. + */ + if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) && + guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) { + memset(&ex, 0, sizeof(ex)); + ex.vector = PF_VECTOR; + ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK | + PFERR_SGX_MASK; + ex.address = gva; + ex.error_code_valid = true; + ex.nested_page_fault = false; + kvm_inject_page_fault(vcpu, &ex); + } else { + kvm_inject_gp(vcpu, 0); + } + return 1; +} + +static int handle_encls_ecreate(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1; + gva_t pageinfo_gva, secs_gva; + gva_t metadata_gva, contents_gva; + gpa_t metadata_gpa, contents_gpa, secs_gpa; + unsigned long metadata_hva, contents_hva, secs_hva; + struct sgx_pageinfo pageinfo; + struct sgx_secs *contents; + u64 attributes, xfrm, size; + u32 miscselect; + struct x86_exception ex; + u8 max_size_log2; + int trapnr, r; + + sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0); + sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1); + if (!sgx_12_0 || !sgx_12_1) { + kvm_inject_gp(vcpu, 0); + return 1; + } + + if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) || + sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva)) + return 1; + + /* + * Copy the PAGEINFO to local memory, its pointers need to be + * translated, i.e. we need to do a deep copy/translate. + */ + r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo, + sizeof(pageinfo), &ex); + if (r == X86EMUL_PROPAGATE_FAULT) { + kvm_inject_emulated_page_fault(vcpu, &ex); + return 1; + } else if (r != X86EMUL_CONTINUE) { + sgx_handle_emulation_failure(vcpu, pageinfo_gva, size); + return 0; + } + + if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) || + sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096, + &contents_gva)) + return 1; + + /* + * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA. + * Resume the guest on failure to inject a #PF. + */ + if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) || + sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) || + sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa)) + return 1; + + /* + * ...and then to HVA. The order of accesses isn't architectural, i.e. + * KVM doesn't have to fully process one address at a time. Exit to + * userspace if a GPA is invalid. + */ + if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) || + sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) || + sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva)) + return 0; + + /* + * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the + * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and + * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to + * enforce restriction of access to the PROVISIONKEY. + */ + contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL); + if (!contents) + return -ENOMEM; + + /* Exit to userspace if copying from a host userspace address fails. */ + if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) + return 0; + + miscselect = contents->miscselect; + attributes = contents->attributes; + xfrm = contents->xfrm; + size = contents->size; + + /* Enforce restriction of access to the PROVISIONKEY. */ + if (!vcpu->kvm->arch.sgx_provisioning_allowed && + (attributes & SGX_ATTR_PROVISIONKEY)) { + if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY) + pr_warn_once("KVM: SGX PROVISIONKEY advertised but not allowed\n"); + kvm_inject_gp(vcpu, 0); + return 1; + } + + /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */ + if ((u32)miscselect & ~sgx_12_0->ebx || + (u32)attributes & ~sgx_12_1->eax || + (u32)(attributes >> 32) & ~sgx_12_1->ebx || + (u32)xfrm & ~sgx_12_1->ecx || + (u32)(xfrm >> 32) & ~sgx_12_1->edx) { + kvm_inject_gp(vcpu, 0); + return 1; + } + + /* Enforce CPUID restriction on max enclave size. */ + max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 : + sgx_12_0->edx; + if (size >= BIT_ULL(max_size_log2)) + kvm_inject_gp(vcpu, 0); + + pageinfo.metadata = metadata_hva; + pageinfo.contents = (u64)contents; + + r = sgx_virt_ecreate(&pageinfo, (void __user *)secs_hva, &trapnr); + + free_page((unsigned long)contents); + + if (r) + return sgx_inject_fault(vcpu, secs_gva, trapnr); + + return kvm_skip_emulated_instruction(vcpu); +} + static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf) { if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX)) @@ -42,6 +287,8 @@ int handle_encls(struct kvm_vcpu *vcpu) } else if (!sgx_enabled_in_guest_bios(vcpu)) { kvm_inject_gp(vcpu, 0); } else { + if (leaf == ECREATE) + return handle_encls_ecreate(vcpu); WARN(1, "KVM: unexpected exit on ENCLS[%u]", leaf); vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS;

[RFC,v5,22/26] KVM: VMX: Add SGX ENCLS[ECREATE] handler to enforce CPUID restrictions

Commit Message

Patch