diff mbox series

[RFC,v3,23/27] KVM: VMX: Add SGX ENCLS[ECREATE] handler to enforce CPUID restrictions

Message ID d68c01baed78f859ac5fce4519646fc8a356c77d.1611634586.git.kai.huang@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM SGX virtualization support | expand

Commit Message

Kai Huang Jan. 26, 2021, 9:31 a.m. UTC
From: Sean Christopherson <sean.j.christopherson@intel.com>

Add an ECREATE handler that will be used to intercept ECREATE for the
purpose of enforcing and enclave's MISCSELECT, ATTRIBUTES and XFRM, i.e.
to allow userspace to restrict SGX features via CPUID.  ECREATE will be
intercepted when any of the aforementioned masks diverges from hardware
in order to enforce the desired CPUID model, i.e. inject #GP if the
guest attempts to set a bit that hasn't been enumerated as allowed-1 in
CPUID.

Note, access to the PROVISIONKEY is not yet supported.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Kai Huang <kai.huang@intel.com>
---
 arch/x86/include/asm/kvm_host.h |   3 +
 arch/x86/kvm/vmx/sgx.c          | 243 ++++++++++++++++++++++++++++++++
 2 files changed, 246 insertions(+)

Comments

Edgecombe, Rick P Feb. 3, 2021, 12:52 a.m. UTC | #1
On Tue, 2021-01-26 at 22:31 +1300, Kai Huang wrote:
> +static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
> +{
> +       unsigned long a_hva, m_hva, x_hva, s_hva, secs_hva;
> +       struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
> +       gpa_t metadata_gpa, contents_gpa, secs_gpa;
> +       struct sgx_pageinfo pageinfo;
> +       gva_t pageinfo_gva, secs_gva;
> +       u64 attributes, xfrm, size;
> +       struct x86_exception ex;
> +       u8 max_size_log2;
> +       u32 miscselect;
> +       int trapnr, r;
> +
> +       sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
> +       sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
> +       if (!sgx_12_0 || !sgx_12_1) {
> +               kvm_inject_gp(vcpu, 0);
> +               return 1;
> +       }
> +
> +       if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32,
> &pageinfo_gva) ||
> +           sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096,
> &secs_gva))
> +               return 1;
> +
> +       /*
> +        * Copy the PAGEINFO to local memory, its pointers need to be
> +        * translated, i.e. we need to do a deep copy/translate.
> +        */
> +       r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
> +                               sizeof(pageinfo), &ex);
> +       if (r == X86EMUL_PROPAGATE_FAULT) {
> +               kvm_inject_emulated_page_fault(vcpu, &ex);
> +               return 1;
> +       } else if (r != X86EMUL_CONTINUE) {
> +               sgx_handle_emulation_failure(vcpu, pageinfo_gva,
> size);
> +               return 0;
> +       }
> +
> +       /*
> +        * Verify alignment early.  This conveniently avoids having
> to worry
> +        * about page splits on userspace addresses.
> +        */
> +       if (!IS_ALIGNED(pageinfo.metadata, 64) ||
> +           !IS_ALIGNED(pageinfo.contents, 4096)) {
> +               kvm_inject_gp(vcpu, 0);
> +               return 1;
> +       }
> +
> +       /*
> +        * Translate the SECINFO, SOURCE and SECS pointers from GVA
> to GPA.
> +        * Resume the guest on failure to inject a #PF.
> +        */
> +       if (sgx_gva_to_gpa(vcpu, pageinfo.metadata, false,
> &metadata_gpa) ||
> +           sgx_gva_to_gpa(vcpu, pageinfo.contents, false,
> &contents_gpa) ||
> +           sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
> +               return 1;
> +

Do pageinfo.metadata and pageinfo.contents need cannonical checks here?

I was noticing the other day that the guest walker could access host
memory slightly outside of a memslot if it ever got passed a gva with
bits higher that the va bits. Or at least it appeared that way. I
didn't fully wade into the bit math because all callers from the guest
did cannonical checks.
Sean Christopherson Feb. 3, 2021, 1:36 a.m. UTC | #2
On Wed, Feb 03, 2021, Edgecombe, Rick P wrote:
> On Tue, 2021-01-26 at 22:31 +1300, Kai Huang wrote:
> > +static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
> > +{
> > +       unsigned long a_hva, m_hva, x_hva, s_hva, secs_hva;
> > +       struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
> > +       gpa_t metadata_gpa, contents_gpa, secs_gpa;
> > +       struct sgx_pageinfo pageinfo;
> > +       gva_t pageinfo_gva, secs_gva;
> > +       u64 attributes, xfrm, size;
> > +       struct x86_exception ex;
> > +       u8 max_size_log2;
> > +       u32 miscselect;
> > +       int trapnr, r;
> > +
> > +       sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
> > +       sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
> > +       if (!sgx_12_0 || !sgx_12_1) {
> > +               kvm_inject_gp(vcpu, 0);
> > +               return 1;
> > +       }
> > +
> > +       if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32,
> > &pageinfo_gva) ||
> > +           sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096,
> > &secs_gva))
> > +               return 1;
> > +
> > +       /*
> > +        * Copy the PAGEINFO to local memory, its pointers need to be
> > +        * translated, i.e. we need to do a deep copy/translate.
> > +        */
> > +       r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
> > +                               sizeof(pageinfo), &ex);
> > +       if (r == X86EMUL_PROPAGATE_FAULT) {
> > +               kvm_inject_emulated_page_fault(vcpu, &ex);
> > +               return 1;
> > +       } else if (r != X86EMUL_CONTINUE) {
> > +               sgx_handle_emulation_failure(vcpu, pageinfo_gva,
> > size);
> > +               return 0;
> > +       }
> > +
> > +       /*
> > +        * Verify alignment early.  This conveniently avoids having
> > to worry
> > +        * about page splits on userspace addresses.
> > +        */
> > +       if (!IS_ALIGNED(pageinfo.metadata, 64) ||
> > +           !IS_ALIGNED(pageinfo.contents, 4096)) {
> > +               kvm_inject_gp(vcpu, 0);
> > +               return 1;
> > +       }
> > +
> > +       /*
> > +        * Translate the SECINFO, SOURCE and SECS pointers from GVA
> > to GPA.
> > +        * Resume the guest on failure to inject a #PF.
> > +        */
> > +       if (sgx_gva_to_gpa(vcpu, pageinfo.metadata, false,
> > &metadata_gpa) ||
> > +           sgx_gva_to_gpa(vcpu, pageinfo.contents, false,
> > &contents_gpa) ||
> > +           sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
> > +               return 1;
> > +
> 
> Do pageinfo.metadata and pageinfo.contents need cannonical checks here?

Bugger, yes.  So much boilerplate needed in this code :-/

Maybe add yet another helper to do alignment+canonical checks, up where the
IS_ALIGNED() calls are?
Kai Huang Feb. 3, 2021, 9:11 a.m. UTC | #3
On Tue, 2 Feb 2021 17:36:12 -0800 Sean Christopherson wrote:
> On Wed, Feb 03, 2021, Edgecombe, Rick P wrote:
> > On Tue, 2021-01-26 at 22:31 +1300, Kai Huang wrote:
> > > +static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
> > > +{
> > > +       unsigned long a_hva, m_hva, x_hva, s_hva, secs_hva;
> > > +       struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
> > > +       gpa_t metadata_gpa, contents_gpa, secs_gpa;
> > > +       struct sgx_pageinfo pageinfo;
> > > +       gva_t pageinfo_gva, secs_gva;
> > > +       u64 attributes, xfrm, size;
> > > +       struct x86_exception ex;
> > > +       u8 max_size_log2;
> > > +       u32 miscselect;
> > > +       int trapnr, r;
> > > +
> > > +       sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
> > > +       sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
> > > +       if (!sgx_12_0 || !sgx_12_1) {
> > > +               kvm_inject_gp(vcpu, 0);
> > > +               return 1;
> > > +       }
> > > +
> > > +       if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32,
> > > &pageinfo_gva) ||
> > > +           sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096,
> > > &secs_gva))
> > > +               return 1;
> > > +
> > > +       /*
> > > +        * Copy the PAGEINFO to local memory, its pointers need to be
> > > +        * translated, i.e. we need to do a deep copy/translate.
> > > +        */
> > > +       r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
> > > +                               sizeof(pageinfo), &ex);
> > > +       if (r == X86EMUL_PROPAGATE_FAULT) {
> > > +               kvm_inject_emulated_page_fault(vcpu, &ex);
> > > +               return 1;
> > > +       } else if (r != X86EMUL_CONTINUE) {
> > > +               sgx_handle_emulation_failure(vcpu, pageinfo_gva,
> > > size);
> > > +               return 0;
> > > +       }
> > > +
> > > +       /*
> > > +        * Verify alignment early.  This conveniently avoids having
> > > to worry
> > > +        * about page splits on userspace addresses.
> > > +        */
> > > +       if (!IS_ALIGNED(pageinfo.metadata, 64) ||
> > > +           !IS_ALIGNED(pageinfo.contents, 4096)) {
> > > +               kvm_inject_gp(vcpu, 0);
> > > +               return 1;
> > > +       }
> > > +
> > > +       /*
> > > +        * Translate the SECINFO, SOURCE and SECS pointers from GVA
> > > to GPA.
> > > +        * Resume the guest on failure to inject a #PF.
> > > +        */
> > > +       if (sgx_gva_to_gpa(vcpu, pageinfo.metadata, false,
> > > &metadata_gpa) ||
> > > +           sgx_gva_to_gpa(vcpu, pageinfo.contents, false,
> > > &contents_gpa) ||
> > > +           sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
> > > +               return 1;
> > > +
> > 
> > Do pageinfo.metadata and pageinfo.contents need cannonical checks here?
> 
> Bugger, yes.  So much boilerplate needed in this code :-/
> 
> Maybe add yet another helper to do alignment+canonical checks, up where the
> IS_ALIGNED() calls are?

sgx_get_encls_gva() already does canonical check. Couldn't we just use it?

For instance:

	if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64 &metadata_gva) ||
	    sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
                             &contents_gva))
		return 1;
Sean Christopherson Feb. 3, 2021, 5:07 p.m. UTC | #4
On Wed, Feb 03, 2021, Kai Huang wrote:
> On Tue, 2 Feb 2021 17:36:12 -0800 Sean Christopherson wrote:
> > On Wed, Feb 03, 2021, Edgecombe, Rick P wrote:
> > > On Tue, 2021-01-26 at 22:31 +1300, Kai Huang wrote:
> > > > +       /*
> > > > +        * Verify alignment early.  This conveniently avoids having
> > > > to worry
> > > > +        * about page splits on userspace addresses.
> > > > +        */
> > > > +       if (!IS_ALIGNED(pageinfo.metadata, 64) ||
> > > > +           !IS_ALIGNED(pageinfo.contents, 4096)) {
> > > > +               kvm_inject_gp(vcpu, 0);
> > > > +               return 1;
> > > > +       }
> > > > +
> > > > +       /*
> > > > +        * Translate the SECINFO, SOURCE and SECS pointers from GVA
> > > > to GPA.
> > > > +        * Resume the guest on failure to inject a #PF.
> > > > +        */
> > > > +       if (sgx_gva_to_gpa(vcpu, pageinfo.metadata, false,
> > > > &metadata_gpa) ||
> > > > +           sgx_gva_to_gpa(vcpu, pageinfo.contents, false,
> > > > &contents_gpa) ||
> > > > +           sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
> > > > +               return 1;
> > > > +
> > > 
> > > Do pageinfo.metadata and pageinfo.contents need cannonical checks here?
> > 
> > Bugger, yes.  So much boilerplate needed in this code :-/
> > 
> > Maybe add yet another helper to do alignment+canonical checks, up where the
> > IS_ALIGNED() calls are?
> 
> sgx_get_encls_gva() already does canonical check. Couldn't we just use it?

After rereading the SDM for the bajillionth time, yes, these should indeed use
sgx_get_encls_gva().  Originally I was thinking they were linear addresses, but
they are effective addresses that use DS, i.e. not using the helper to avoid the
DS.base adjustment for 32-bit mode was also wrong.

> For instance:
> 
> 	if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64 &metadata_gva) ||
> 	    sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
>                              &contents_gva))
> 		return 1;
Edgecombe, Rick P Feb. 3, 2021, 6:47 p.m. UTC | #5
On Tue, 2021-01-26 at 22:31 +1300, Kai Huang wrote:
> +       /* Exit to userspace if copying from a host userspace address
> fails. */
> +       if (sgx_read_hva(vcpu, m_hva, &miscselect,
> sizeof(miscselect)) ||
> +           sgx_read_hva(vcpu, a_hva, &attributes,
> sizeof(attributes)) ||
> +           sgx_read_hva(vcpu, x_hva, &xfrm, sizeof(xfrm)) ||
> +           sgx_read_hva(vcpu, s_hva, &size, sizeof(size)))
> +               return 0;
> +
> +       /* Enforce restriction of access to the PROVISIONKEY. */
> +       if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
> +           (attributes & SGX_ATTR_PROVISIONKEY)) {
> +               if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
> +                       pr_warn_once("KVM: SGX PROVISIONKEY
> advertised but not allowed\n");
> +               kvm_inject_gp(vcpu, 0);
> +               return 1;
> +       }
> +
> +       /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and
> XFRM. */
> +       if ((u32)miscselect & ~sgx_12_0->ebx ||
> +           (u32)attributes & ~sgx_12_1->eax ||
> +           (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
> +           (u32)xfrm & ~sgx_12_1->ecx ||
> +           (u32)(xfrm >> 32) & ~sgx_12_1->edx) {
> +               kvm_inject_gp(vcpu, 0);
> +               return 1;
> +       }

Don't you need to deep copy the pageinfo.contents struct as well?
Otherwise the guest could change these after they were checked.

But it seems it is checked by the HW and something is caught that would
inject a GP anyway? Can you elaborate on the importance of these
checks?
Sean Christopherson Feb. 3, 2021, 7:36 p.m. UTC | #6
On Wed, Feb 03, 2021, Edgecombe, Rick P wrote:
> On Tue, 2021-01-26 at 22:31 +1300, Kai Huang wrote:
> > +       /* Exit to userspace if copying from a host userspace address
> > fails. */
> > +       if (sgx_read_hva(vcpu, m_hva, &miscselect,
> > sizeof(miscselect)) ||
> > +           sgx_read_hva(vcpu, a_hva, &attributes,
> > sizeof(attributes)) ||
> > +           sgx_read_hva(vcpu, x_hva, &xfrm, sizeof(xfrm)) ||
> > +           sgx_read_hva(vcpu, s_hva, &size, sizeof(size)))
> > +               return 0;
> > +
> > +       /* Enforce restriction of access to the PROVISIONKEY. */
> > +       if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
> > +           (attributes & SGX_ATTR_PROVISIONKEY)) {
> > +               if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
> > +                       pr_warn_once("KVM: SGX PROVISIONKEY
> > advertised but not allowed\n");
> > +               kvm_inject_gp(vcpu, 0);
> > +               return 1;
> > +       }
> > +
> > +       /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and
> > XFRM. */
> > +       if ((u32)miscselect & ~sgx_12_0->ebx ||
> > +           (u32)attributes & ~sgx_12_1->eax ||
> > +           (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
> > +           (u32)xfrm & ~sgx_12_1->ecx ||
> > +           (u32)(xfrm >> 32) & ~sgx_12_1->edx) {
> > +               kvm_inject_gp(vcpu, 0);
> > +               return 1;
> > +       }
> 
> Don't you need to deep copy the pageinfo.contents struct as well?
> Otherwise the guest could change these after they were checked.
> 
> But it seems it is checked by the HW and something is caught that would
> inject a GP anyway? Can you elaborate on the importance of these
> checks?

Argh, yes.  These checks are to allow migration between systems with different
SGX capabilities, and more importantly to prevent userspace from doing an end
around on the restricted access to PROVISIONKEY.

IIRC, earlier versions did do a deep copy, but then I got clever.  Anyways, yeah,
sadly the entire pageinfo.contents page will need to be copied.
Kai Huang Feb. 3, 2021, 11:11 p.m. UTC | #7
On Wed, 2021-02-03 at 09:07 -0800, Sean Christopherson wrote:
> On Wed, Feb 03, 2021, Kai Huang wrote:
> > On Tue, 2 Feb 2021 17:36:12 -0800 Sean Christopherson wrote:
> > > On Wed, Feb 03, 2021, Edgecombe, Rick P wrote:
> > > > On Tue, 2021-01-26 at 22:31 +1300, Kai Huang wrote:
> > > > > +       /*
> > > > > +        * Verify alignment early.  This conveniently avoids having
> > > > > to worry
> > > > > +        * about page splits on userspace addresses.
> > > > > +        */
> > > > > +       if (!IS_ALIGNED(pageinfo.metadata, 64) ||
> > > > > +           !IS_ALIGNED(pageinfo.contents, 4096)) {
> > > > > +               kvm_inject_gp(vcpu, 0);
> > > > > +               return 1;
> > > > > +       }
> > > > > +
> > > > > +       /*
> > > > > +        * Translate the SECINFO, SOURCE and SECS pointers from GVA
> > > > > to GPA.
> > > > > +        * Resume the guest on failure to inject a #PF.
> > > > > +        */
> > > > > +       if (sgx_gva_to_gpa(vcpu, pageinfo.metadata, false,
> > > > > &metadata_gpa) ||
> > > > > +           sgx_gva_to_gpa(vcpu, pageinfo.contents, false,
> > > > > &contents_gpa) ||
> > > > > +           sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
> > > > > +               return 1;
> > > > > +
> > > > 
> > > > Do pageinfo.metadata and pageinfo.contents need cannonical checks here?
> > > 
> > > Bugger, yes.  So much boilerplate needed in this code :-/
> > > 
> > > Maybe add yet another helper to do alignment+canonical checks, up where the
> > > IS_ALIGNED() calls are?
> > 
> > sgx_get_encls_gva() already does canonical check. Couldn't we just use it?
> 
> After rereading the SDM for the bajillionth time, yes, these should indeed use
> sgx_get_encls_gva().  Originally I was thinking they were linear addresses, but
> they are effective addresses that use DS, i.e. not using the helper to avoid the
> DS.base adjustment for 32-bit mode was also wrong.

Agreed.

> 
> > For instance:
> > 
> > 	if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64 &metadata_gva) ||
> > 	    sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
> >                              &contents_gva))
> > 		return 1;
Kai Huang Feb. 3, 2021, 11:29 p.m. UTC | #8
On Wed, 2021-02-03 at 11:36 -0800, Sean Christopherson wrote:
> On Wed, Feb 03, 2021, Edgecombe, Rick P wrote:
> > On Tue, 2021-01-26 at 22:31 +1300, Kai Huang wrote:
> > > +       /* Exit to userspace if copying from a host userspace address
> > > fails. */
> > > +       if (sgx_read_hva(vcpu, m_hva, &miscselect,
> > > sizeof(miscselect)) ||
> > > +           sgx_read_hva(vcpu, a_hva, &attributes,
> > > sizeof(attributes)) ||
> > > +           sgx_read_hva(vcpu, x_hva, &xfrm, sizeof(xfrm)) ||
> > > +           sgx_read_hva(vcpu, s_hva, &size, sizeof(size)))
> > > +               return 0;
> > > +
> > > +       /* Enforce restriction of access to the PROVISIONKEY. */
> > > +       if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
> > > +           (attributes & SGX_ATTR_PROVISIONKEY)) {
> > > +               if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
> > > +                       pr_warn_once("KVM: SGX PROVISIONKEY
> > > advertised but not allowed\n");
> > > +               kvm_inject_gp(vcpu, 0);
> > > +               return 1;
> > > +       }
> > > +
> > > +       /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and
> > > XFRM. */
> > > +       if ((u32)miscselect & ~sgx_12_0->ebx ||
> > > +           (u32)attributes & ~sgx_12_1->eax ||
> > > +           (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
> > > +           (u32)xfrm & ~sgx_12_1->ecx ||
> > > +           (u32)(xfrm >> 32) & ~sgx_12_1->edx) {
> > > +               kvm_inject_gp(vcpu, 0);
> > > +               return 1;
> > > +       }
> > 
> > Don't you need to deep copy the pageinfo.contents struct as well?
> > Otherwise the guest could change these after they were checked.
> > 
> > But it seems it is checked by the HW and something is caught that would
> > inject a GP anyway? Can you elaborate on the importance of these
> > checks?
> 
> Argh, yes.  These checks are to allow migration between systems with different
> SGX capabilities, and more importantly to prevent userspace from doing an end
> around on the restricted access to PROVISIONKEY.
> 
> IIRC, earlier versions did do a deep copy, but then I got clever.  Anyways, yeah,
> sadly the entire pageinfo.contents page will need to be copied.

I don't fully understand the problem. Are you worried about contents being updated by
other vcpus during the trap? 

And I don't see how copy can avoid this problem. Even you do copy, the content can
still be modified afterwards, correct? So what's the point of copying? Looks a better
solution is to kick all vcpus and put them into block state while KVM is doing ENCLS
for guest.
Sean Christopherson Feb. 3, 2021, 11:36 p.m. UTC | #9
On Thu, Feb 04, 2021, Kai Huang wrote:
> On Wed, 2021-02-03 at 11:36 -0800, Sean Christopherson wrote:
> > On Wed, Feb 03, 2021, Edgecombe, Rick P wrote:
> > > On Tue, 2021-01-26 at 22:31 +1300, Kai Huang wrote:
> > > > +       /* Exit to userspace if copying from a host userspace address
> > > > fails. */
> > > > +       if (sgx_read_hva(vcpu, m_hva, &miscselect,
> > > > sizeof(miscselect)) ||
> > > > +           sgx_read_hva(vcpu, a_hva, &attributes,
> > > > sizeof(attributes)) ||
> > > > +           sgx_read_hva(vcpu, x_hva, &xfrm, sizeof(xfrm)) ||
> > > > +           sgx_read_hva(vcpu, s_hva, &size, sizeof(size)))
> > > > +               return 0;
> > > > +
> > > > +       /* Enforce restriction of access to the PROVISIONKEY. */
> > > > +       if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
> > > > +           (attributes & SGX_ATTR_PROVISIONKEY)) {
> > > > +               if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
> > > > +                       pr_warn_once("KVM: SGX PROVISIONKEY
> > > > advertised but not allowed\n");
> > > > +               kvm_inject_gp(vcpu, 0);
> > > > +               return 1;
> > > > +       }
> > > > +
> > > > +       /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and
> > > > XFRM. */
> > > > +       if ((u32)miscselect & ~sgx_12_0->ebx ||
> > > > +           (u32)attributes & ~sgx_12_1->eax ||
> > > > +           (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
> > > > +           (u32)xfrm & ~sgx_12_1->ecx ||
> > > > +           (u32)(xfrm >> 32) & ~sgx_12_1->edx) {
> > > > +               kvm_inject_gp(vcpu, 0);
> > > > +               return 1;
> > > > +       }
> > > 
> > > Don't you need to deep copy the pageinfo.contents struct as well?
> > > Otherwise the guest could change these after they were checked.
> > > 
> > > But it seems it is checked by the HW and something is caught that would
> > > inject a GP anyway? Can you elaborate on the importance of these
> > > checks?
> > 
> > Argh, yes.  These checks are to allow migration between systems with different
> > SGX capabilities, and more importantly to prevent userspace from doing an end
> > around on the restricted access to PROVISIONKEY.
> > 
> > IIRC, earlier versions did do a deep copy, but then I got clever.  Anyways, yeah,
> > sadly the entire pageinfo.contents page will need to be copied.
> 
> I don't fully understand the problem. Are you worried about contents being updated by
> other vcpus during the trap? 
> 
> And I don't see how copy can avoid this problem. Even you do copy, the content can
> still be modified afterwards, correct? So what's the point of copying?

The goal isn't correctness, it's to prevent a TOCTOU bug.  E.g. the guest could
do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and simultaneously set
SGX_ATTR_PROVISIONKEY to bypass the above check.

> Looks a better solution is to kick all vcpus and put them into block state
> while KVM is doing ENCLS for guest.

No.  (a) it won't work, as the memory is writable from host userspace.  (b) that
does not scale, at all.
Kai Huang Feb. 3, 2021, 11:45 p.m. UTC | #10
On Wed, 2021-02-03 at 15:36 -0800, Sean Christopherson wrote:
> On Thu, Feb 04, 2021, Kai Huang wrote:
> > On Wed, 2021-02-03 at 11:36 -0800, Sean Christopherson wrote:
> > > On Wed, Feb 03, 2021, Edgecombe, Rick P wrote:
> > > > On Tue, 2021-01-26 at 22:31 +1300, Kai Huang wrote:
> > > > > +       /* Exit to userspace if copying from a host userspace address
> > > > > fails. */
> > > > > +       if (sgx_read_hva(vcpu, m_hva, &miscselect,
> > > > > sizeof(miscselect)) ||
> > > > > +           sgx_read_hva(vcpu, a_hva, &attributes,
> > > > > sizeof(attributes)) ||
> > > > > +           sgx_read_hva(vcpu, x_hva, &xfrm, sizeof(xfrm)) ||
> > > > > +           sgx_read_hva(vcpu, s_hva, &size, sizeof(size)))
> > > > > +               return 0;
> > > > > +
> > > > > +       /* Enforce restriction of access to the PROVISIONKEY. */
> > > > > +       if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
> > > > > +           (attributes & SGX_ATTR_PROVISIONKEY)) {
> > > > > +               if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
> > > > > +                       pr_warn_once("KVM: SGX PROVISIONKEY
> > > > > advertised but not allowed\n");
> > > > > +               kvm_inject_gp(vcpu, 0);
> > > > > +               return 1;
> > > > > +       }
> > > > > +
> > > > > +       /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and
> > > > > XFRM. */
> > > > > +       if ((u32)miscselect & ~sgx_12_0->ebx ||
> > > > > +           (u32)attributes & ~sgx_12_1->eax ||
> > > > > +           (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
> > > > > +           (u32)xfrm & ~sgx_12_1->ecx ||
> > > > > +           (u32)(xfrm >> 32) & ~sgx_12_1->edx) {
> > > > > +               kvm_inject_gp(vcpu, 0);
> > > > > +               return 1;
> > > > > +       }
> > > > 
> > > > Don't you need to deep copy the pageinfo.contents struct as well?
> > > > Otherwise the guest could change these after they were checked.
> > > > 
> > > > But it seems it is checked by the HW and something is caught that would
> > > > inject a GP anyway? Can you elaborate on the importance of these
> > > > checks?
> > > 
> > > Argh, yes.  These checks are to allow migration between systems with different
> > > SGX capabilities, and more importantly to prevent userspace from doing an end
> > > around on the restricted access to PROVISIONKEY.
> > > 
> > > IIRC, earlier versions did do a deep copy, but then I got clever.  Anyways, yeah,
> > > sadly the entire pageinfo.contents page will need to be copied.
> > 
> > I don't fully understand the problem. Are you worried about contents being updated by
> > other vcpus during the trap? 
> > 
> > And I don't see how copy can avoid this problem. Even you do copy, the content can
> > still be modified afterwards, correct? So what's the point of copying?
> 
> The goal isn't correctness, it's to prevent a TOCTOU bug.  E.g. the guest could
> do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and simultaneously set
> SGX_ATTR_PROVISIONKEY to bypass the above check.

Oh ok. Agreed.

However, such attack would require precise timing. Not sure whether it is feasible in
practice.

> 
> > Looks a better solution is to kick all vcpus and put them into block state
> > while KVM is doing ENCLS for guest.
> 
> No.  (a) it won't work, as the memory is writable from host userspace.  (b) that
> does not scale, at all.

Good point. Agreed.
Sean Christopherson Feb. 3, 2021, 11:59 p.m. UTC | #11
On Thu, Feb 04, 2021, Kai Huang wrote:
> On Wed, 2021-02-03 at 15:36 -0800, Sean Christopherson wrote:
> > On Thu, Feb 04, 2021, Kai Huang wrote:
> > > On Wed, 2021-02-03 at 11:36 -0800, Sean Christopherson wrote:
> > > > On Wed, Feb 03, 2021, Edgecombe, Rick P wrote:
> > > > > On Tue, 2021-01-26 at 22:31 +1300, Kai Huang wrote:
> > > > > Don't you need to deep copy the pageinfo.contents struct as well?
> > > > > Otherwise the guest could change these after they were checked.
> > > > > 
> > > > > But it seems it is checked by the HW and something is caught that would
> > > > > inject a GP anyway? Can you elaborate on the importance of these
> > > > > checks?
> > > > 
> > > > Argh, yes.  These checks are to allow migration between systems with different
> > > > SGX capabilities, and more importantly to prevent userspace from doing an end
> > > > around on the restricted access to PROVISIONKEY.
> > > > 
> > > > IIRC, earlier versions did do a deep copy, but then I got clever.  Anyways, yeah,
> > > > sadly the entire pageinfo.contents page will need to be copied.
> > > 
> > > I don't fully understand the problem. Are you worried about contents being updated by
> > > other vcpus during the trap? 
> > > 
> > > And I don't see how copy can avoid this problem. Even you do copy, the content can
> > > still be modified afterwards, correct? So what's the point of copying?
> > 
> > The goal isn't correctness, it's to prevent a TOCTOU bug.  E.g. the guest could
> > do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and simultaneously set
> > SGX_ATTR_PROVISIONKEY to bypass the above check.
> 
> Oh ok. Agreed.
> 
> However, such attack would require precise timing. Not sure whether it is feasible in
> practice.

It's very feasible.  XOR the bit in a tight loop, build the enclave on a
separate thread.  Do that until EINIT succeeds.  Compared to other timing
attacks, I doubt it'd take all that long to get a successful result.

Regardless, the difficulty in exploiting the bug is irrelevant, it's a glaring
flaw that needs to be fixed.
Kai Huang Feb. 4, 2021, 12:11 a.m. UTC | #12
On Wed, 2021-02-03 at 15:59 -0800, Sean Christopherson wrote:
> On Thu, Feb 04, 2021, Kai Huang wrote:
> > On Wed, 2021-02-03 at 15:36 -0800, Sean Christopherson wrote:
> > > On Thu, Feb 04, 2021, Kai Huang wrote:
> > > > On Wed, 2021-02-03 at 11:36 -0800, Sean Christopherson wrote:
> > > > > On Wed, Feb 03, 2021, Edgecombe, Rick P wrote:
> > > > > > On Tue, 2021-01-26 at 22:31 +1300, Kai Huang wrote:
> > > > > > Don't you need to deep copy the pageinfo.contents struct as well?
> > > > > > Otherwise the guest could change these after they were checked.
> > > > > > 
> > > > > > But it seems it is checked by the HW and something is caught that would
> > > > > > inject a GP anyway? Can you elaborate on the importance of these
> > > > > > checks?
> > > > > 
> > > > > Argh, yes.  These checks are to allow migration between systems with different
> > > > > SGX capabilities, and more importantly to prevent userspace from doing an end
> > > > > around on the restricted access to PROVISIONKEY.
> > > > > 
> > > > > IIRC, earlier versions did do a deep copy, but then I got clever.  Anyways, yeah,
> > > > > sadly the entire pageinfo.contents page will need to be copied.
> > > > 
> > > > I don't fully understand the problem. Are you worried about contents being updated by
> > > > other vcpus during the trap? 
> > > > 
> > > > And I don't see how copy can avoid this problem. Even you do copy, the content can
> > > > still be modified afterwards, correct? So what's the point of copying?
> > > 
> > > The goal isn't correctness, it's to prevent a TOCTOU bug.  E.g. the guest could
> > > do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and simultaneously set
> > > SGX_ATTR_PROVISIONKEY to bypass the above check.
> > 
> > Oh ok. Agreed.
> > 
> > However, such attack would require precise timing. Not sure whether it is feasible in
> > practice.
> 
> It's very feasible.  XOR the bit in a tight loop, build the enclave on a
> separate thread.  Do that until EINIT succeeds.  Compared to other timing
> attacks, I doubt it'd take all that long to get a successful result.

How does it work? The setting PROVISION bit needs to be set after KVM checks SECS's
attribute, and before KVM actually does ECREATE, right?

> 
> Regardless, the difficulty in exploiting the bug is irrelevant, it's a glaring
> flaw that needs to be fixed.

Sure.
Sean Christopherson Feb. 4, 2021, 2:01 a.m. UTC | #13
On Thu, Feb 04, 2021, Kai Huang wrote:
> On Wed, 2021-02-03 at 15:59 -0800, Sean Christopherson wrote:
> > On Thu, Feb 04, 2021, Kai Huang wrote:
> > > On Wed, 2021-02-03 at 15:36 -0800, Sean Christopherson wrote:
> > > > On Thu, Feb 04, 2021, Kai Huang wrote:
> > > > > On Wed, 2021-02-03 at 11:36 -0800, Sean Christopherson wrote:
> > > > > > On Wed, Feb 03, 2021, Edgecombe, Rick P wrote:
> > > > > > > On Tue, 2021-01-26 at 22:31 +1300, Kai Huang wrote:
> > > > > > > Don't you need to deep copy the pageinfo.contents struct as well?
> > > > > > > Otherwise the guest could change these after they were checked.
> > > > > > > 
> > > > > > > But it seems it is checked by the HW and something is caught that would
> > > > > > > inject a GP anyway? Can you elaborate on the importance of these
> > > > > > > checks?
> > > > > > 
> > > > > > Argh, yes.  These checks are to allow migration between systems with different
> > > > > > SGX capabilities, and more importantly to prevent userspace from doing an end
> > > > > > around on the restricted access to PROVISIONKEY.
> > > > > > 
> > > > > > IIRC, earlier versions did do a deep copy, but then I got clever.  Anyways, yeah,
> > > > > > sadly the entire pageinfo.contents page will need to be copied.
> > > > > 
> > > > > I don't fully understand the problem. Are you worried about contents being updated by
> > > > > other vcpus during the trap? 
> > > > > 
> > > > > And I don't see how copy can avoid this problem. Even you do copy, the content can
> > > > > still be modified afterwards, correct? So what's the point of copying?
> > > > 
> > > > The goal isn't correctness, it's to prevent a TOCTOU bug.  E.g. the guest could
> > > > do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and simultaneously set
> > > > SGX_ATTR_PROVISIONKEY to bypass the above check.
> > > 
> > > Oh ok. Agreed.
> > > 
> > > However, such attack would require precise timing. Not sure whether it is feasible in
> > > practice.
> > 
> > It's very feasible.  XOR the bit in a tight loop, build the enclave on a
> > separate thread.  Do that until EINIT succeeds.  Compared to other timing
> > attacks, I doubt it'd take all that long to get a successful result.
> 
> How does it work? The setting PROVISION bit needs to be set after KVM checks SECS's
> attribute, and before KVM actually does ECREATE, right?

Yep.  More precisely, toggled between KVM's read into its local copy and final
execution of ECREATE.  It's actually a huge window when you consider how many
uops ENCLS has to churn through before it reads 'contents'.  The success rate
would probaby be 25%: 50% chance KVM's read sees the 'good' value, 50% chance
the CPU sees the 'bad' value in the same exit.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9581f81e62a4..cd71f30fbdd1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1000,6 +1000,9 @@  struct kvm_arch {
 		struct msr_bitmap_range ranges[16];
 	} msr_filter;
 
+	/* Guest can access the SGX PROVISIONKEY. */
+	bool sgx_provisioning_allowed;
+
 	struct kvm_pmu_event_filter *pmu_event_filter;
 	struct task_struct *nx_lpage_recovery_thread;
 
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
index 693bf7735308..4281045318ac 100644
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -12,6 +12,247 @@ 
 
 bool __read_mostly enable_sgx;
 
+/*
+ * ENCLS's memory operands use a fixed segment (DS) and a fixed
+ * address size based on the mode.  Related prefixes are ignored.
+ */
+static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
+			     int size, int alignment, gva_t *gva)
+{
+	struct kvm_segment s;
+	bool fault;
+
+	/* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
+	*gva = offset;
+	if (!is_long_mode(vcpu)) {
+		vmx_get_segment(vcpu, &s, VCPU_SREG_DS);
+		*gva += s.base;
+	}
+
+	if (!IS_ALIGNED(*gva, alignment)) {
+		fault = true;
+	} else if (likely(is_long_mode(vcpu))) {
+		fault = is_noncanonical_address(*gva, vcpu);
+	} else {
+		*gva &= 0xffffffff;
+		fault = (s.unusable) ||
+			(s.type != 2 && s.type != 3) ||
+			(*gva > s.limit) ||
+			((s.base != 0 || s.limit != 0xffffffff) &&
+			(((u64)*gva + size - 1) > s.limit + 1));
+	}
+	if (fault)
+		kvm_inject_gp(vcpu, 0);
+	return fault ? -EINVAL : 0;
+}
+
+static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr,
+					 unsigned int size)
+{
+	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+	vcpu->run->internal.ndata = 2;
+	vcpu->run->internal.data[0] = addr;
+	vcpu->run->internal.data[1] = size;
+}
+
+static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data,
+			unsigned int size)
+{
+	if (__copy_from_user(data, (void __user *)hva, size)) {
+		sgx_handle_emulation_failure(vcpu, hva, size);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write,
+			  gpa_t *gpa)
+{
+	struct x86_exception ex;
+
+	if (write)
+		*gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex);
+	else
+		*gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex);
+
+	if (*gpa == UNMAPPED_GVA) {
+		kvm_inject_emulated_page_fault(vcpu, &ex);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva)
+{
+	*hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa));
+	if (kvm_is_error_hva(*hva)) {
+		sgx_handle_emulation_failure(vcpu, gpa, 1);
+		return -EFAULT;
+	}
+
+	*hva |= gpa & ~PAGE_MASK;
+
+	return 0;
+}
+
+static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
+{
+	struct x86_exception ex;
+
+	/*
+	 * A non-EPCM #PF indicates a bad userspace HVA.  This *should* check
+	 * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC,
+	 * but the error code isn't (yet) plumbed through the ENCLS helpers.
+	 */
+	if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) {
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+		vcpu->run->internal.ndata = 0;
+		return 0;
+	}
+
+	/*
+	 * If the guest thinks it's running on SGX2 hardware, inject an SGX
+	 * #PF if the fault matches an EPCM fault signature (#GP on SGX1,
+	 * #PF on SGX2).  The assumption is that EPCM faults are much more
+	 * likely than a bad userspace address.
+	 */
+	if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
+	    guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) {
+		memset(&ex, 0, sizeof(ex));
+		ex.vector = PF_VECTOR;
+		ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
+				PFERR_SGX_MASK;
+		ex.address = gva;
+		ex.error_code_valid = true;
+		ex.nested_page_fault = false;
+		kvm_inject_page_fault(vcpu, &ex);
+	} else {
+		kvm_inject_gp(vcpu, 0);
+	}
+	return 1;
+}
+
+static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
+{
+	unsigned long a_hva, m_hva, x_hva, s_hva, secs_hva;
+	struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
+	gpa_t metadata_gpa, contents_gpa, secs_gpa;
+	struct sgx_pageinfo pageinfo;
+	gva_t pageinfo_gva, secs_gva;
+	u64 attributes, xfrm, size;
+	struct x86_exception ex;
+	u8 max_size_log2;
+	u32 miscselect;
+	int trapnr, r;
+
+	sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
+	sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
+	if (!sgx_12_0 || !sgx_12_1) {
+		kvm_inject_gp(vcpu, 0);
+		return 1;
+	}
+
+	if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) ||
+	    sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva))
+		return 1;
+
+	/*
+	 * Copy the PAGEINFO to local memory, its pointers need to be
+	 * translated, i.e. we need to do a deep copy/translate.
+	 */
+	r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
+				sizeof(pageinfo), &ex);
+	if (r == X86EMUL_PROPAGATE_FAULT) {
+		kvm_inject_emulated_page_fault(vcpu, &ex);
+		return 1;
+	} else if (r != X86EMUL_CONTINUE) {
+		sgx_handle_emulation_failure(vcpu, pageinfo_gva, size);
+		return 0;
+	}
+
+	/*
+	 * Verify alignment early.  This conveniently avoids having to worry
+	 * about page splits on userspace addresses.
+	 */
+	if (!IS_ALIGNED(pageinfo.metadata, 64) ||
+	    !IS_ALIGNED(pageinfo.contents, 4096)) {
+		kvm_inject_gp(vcpu, 0);
+		return 1;
+	}
+
+	/*
+	 * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
+	 * Resume the guest on failure to inject a #PF.
+	 */
+	if (sgx_gva_to_gpa(vcpu, pageinfo.metadata, false, &metadata_gpa) ||
+	    sgx_gva_to_gpa(vcpu, pageinfo.contents, false, &contents_gpa) ||
+	    sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
+		return 1;
+
+	/*
+	 * ...and then to HVA.  The order of accesses isn't architectural, i.e.
+	 * KVM doesn't have to fully process one address at a time.  Exit to
+	 * userspace if a GPA is invalid.
+	 */
+	if (sgx_gpa_to_hva(vcpu, metadata_gpa,
+			   (unsigned long *)&pageinfo.metadata) ||
+	    sgx_gpa_to_hva(vcpu, contents_gpa,
+			   (unsigned long *)&pageinfo.contents) ||
+	    sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva))
+		return 0;
+
+	/*
+	 * Read out select portions of the input SECS to enforce userspace
+	 * restrictions on MISCSELECT, ATTRIBUTES, etc...  Note, 'contents' is
+	 * page aligned, i.e. no need to worry about page splits.
+	 */
+	m_hva = pageinfo.contents + offsetof(struct sgx_secs, miscselect);
+	a_hva = pageinfo.contents + offsetof(struct sgx_secs, attributes);
+	x_hva = pageinfo.contents + offsetof(struct sgx_secs, xfrm);
+	s_hva = pageinfo.contents + offsetof(struct sgx_secs, size);
+
+	/* Exit to userspace if copying from a host userspace address fails. */
+	if (sgx_read_hva(vcpu, m_hva, &miscselect, sizeof(miscselect)) ||
+	    sgx_read_hva(vcpu, a_hva, &attributes, sizeof(attributes)) ||
+	    sgx_read_hva(vcpu, x_hva, &xfrm, sizeof(xfrm)) ||
+	    sgx_read_hva(vcpu, s_hva, &size, sizeof(size)))
+		return 0;
+
+	/* Enforce restriction of access to the PROVISIONKEY. */
+	if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
+	    (attributes & SGX_ATTR_PROVISIONKEY)) {
+		if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
+			pr_warn_once("KVM: SGX PROVISIONKEY advertised but not allowed\n");
+		kvm_inject_gp(vcpu, 0);
+		return 1;
+	}
+
+	/* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */
+	if ((u32)miscselect & ~sgx_12_0->ebx ||
+	    (u32)attributes & ~sgx_12_1->eax ||
+	    (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
+	    (u32)xfrm & ~sgx_12_1->ecx ||
+	    (u32)(xfrm >> 32) & ~sgx_12_1->edx) {
+		kvm_inject_gp(vcpu, 0);
+		return 1;
+	}
+
+	/* Enforce CPUID restriction on max enclave size. */
+	max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
+							    sgx_12_0->edx;
+	if (size >= BIT_ULL(max_size_log2))
+		kvm_inject_gp(vcpu, 0);
+
+	if (sgx_virt_ecreate(&pageinfo, (void __user *)secs_hva, &trapnr))
+		return sgx_inject_fault(vcpu, secs_gva, trapnr);
+
+	return kvm_skip_emulated_instruction(vcpu);
+}
+
 static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
 {
 	if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX))
@@ -42,6 +283,8 @@  int handle_encls(struct kvm_vcpu *vcpu)
 	} else if (!sgx_enabled_in_guest_bios(vcpu)) {
 		kvm_inject_gp(vcpu, 0);
 	} else {
+		if (leaf == ECREATE)
+			return handle_encls_ecreate(vcpu);
 		WARN(1, "KVM: unexpected exit on ENCLS[%u]", leaf);
 		vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
 		vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS;