diff mbox series

[v39,12/24] x86/sgx: Add SGX_IOC_ENCLAVE_CREATE

Message ID 20201003045059.665934-13-jarkko.sakkinen@linux.intel.com
State New, archived
Headers show
Series Intel SGX foundations | expand

Commit Message

Jarkko Sakkinen Oct. 3, 2020, 4:50 a.m. UTC
Add an ioctl that performs ENCLS[ECREATE], which creates SGX Enclave
Control Structure for the enclave. SECS contains attributes about the
enclave that are used by the hardware and cannot be directly accessed by
software, as SECS resides in the EPC.

One essential field in SECS is a field that stores the SHA256 of the
measured enclave pages. This field, MRENCLAVE, is initialized by the
ECREATE instruction and updated by every EADD and EEXTEND operation.
Finally, EINIT locks down the value.

Acked-by: Jethro Beekman <jethro@fortanix.com>
Tested-by: Jethro Beekman <jethro@fortanix.com>
Tested-by: Haitao Huang <haitao.huang@linux.intel.com>
Tested-by: Chunyang Hui <sanqian.hcy@antfin.com>
Tested-by: Jordan Hand <jorhand@linux.microsoft.com>
Tested-by: Nathaniel McCallum <npmccallum@redhat.com>
Tested-by: Seth Moore <sethmo@google.com>
Tested-by: Darren Kenny <darren.kenny@oracle.com>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Co-developed-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 .../userspace-api/ioctl/ioctl-number.rst      |   1 +
 arch/x86/include/uapi/asm/sgx.h               |  25 ++
 arch/x86/kernel/cpu/sgx/Makefile              |   1 +
 arch/x86/kernel/cpu/sgx/driver.c              |  12 +
 arch/x86/kernel/cpu/sgx/driver.h              |   1 +
 arch/x86/kernel/cpu/sgx/ioctl.c               | 223 ++++++++++++++++++
 6 files changed, 263 insertions(+)
 create mode 100644 arch/x86/include/uapi/asm/sgx.h
 create mode 100644 arch/x86/kernel/cpu/sgx/ioctl.c

Comments

Dave Hansen Oct. 16, 2020, 5:07 p.m. UTC | #1
> +static u32 sgx_calc_ssa_frame_size(u32 miscselect, u64 xfrm)
> +{
> +	u32 size_max = PAGE_SIZE;
> +	u32 size;
> +	int i;
> +
> +	for (i = 2; i < 64; i++) {

Should this be:

	for (i = XFEATURE_YMM; i < XFEATURE_MAX; i++) {

Basically, does this need to be 64, or should it be limited to the
kernel-known XFEATURES?  Or, should this be looping through all the bits
set in xfeatures_mask_user().

> +		if (!((1 << i) & xfrm))
> +			continue;
> +
> +		size = SGX_SSA_GPRS_SIZE + sgx_xsave_size_tbl[i];
> +
> +		if (miscselect & SGX_MISC_EXINFO)
> +			size += SGX_SSA_MISC_EXINFO_SIZE;
> +
> +		if (size > size_max)
> +			size_max = size;
> +	}
> +
> +	return PFN_UP(size_max);
> +}
> +
> +static int sgx_validate_secs(const struct sgx_secs *secs)
> +{

What's the overall point of this function?  Does it avoid a #GP from an
instruction later?

Does all of the 'secs' content come from userspace?

> +	u64 max_size = (secs->attributes & SGX_ATTR_MODE64BIT) ?
> +		       sgx_encl_size_max_64 : sgx_encl_size_max_32;
> +
> +	if (secs->size < (2 * PAGE_SIZE) || !is_power_of_2(secs->size))
> +		return -EINVAL;
> +
> +	if (secs->base & (secs->size - 1))
> +		return -EINVAL;
> +
> +	if (secs->miscselect & sgx_misc_reserved_mask ||
> +	    secs->attributes & sgx_attributes_reserved_mask ||
> +	    secs->xfrm & sgx_xfrm_reserved_mask)
> +		return -EINVAL;
> +
> +	if (secs->size > max_size)
> +		return -EINVAL;
> +
> +	if (!(secs->xfrm & XFEATURE_MASK_FP) ||
> +	    !(secs->xfrm & XFEATURE_MASK_SSE) ||
> +	    (((secs->xfrm >> XFEATURE_BNDREGS) & 1) != ((secs->xfrm >> XFEATURE_BNDCSR) & 1)))
> +		return -EINVAL;
> +
> +	if (!secs->ssa_frame_size)
> +		return -EINVAL;
> +
> +	if (sgx_calc_ssa_frame_size(secs->miscselect, secs->xfrm) > secs->ssa_frame_size)
> +		return -EINVAL;
> +
> +	if (memchr_inv(secs->reserved1, 0, sizeof(secs->reserved1)) ||
> +	    memchr_inv(secs->reserved2, 0, sizeof(secs->reserved2)) ||
> +	    memchr_inv(secs->reserved3, 0, sizeof(secs->reserved3)) ||
> +	    memchr_inv(secs->reserved4, 0, sizeof(secs->reserved4)))
> +		return -EINVAL;
> +
> +	return 0;
> +}

I think it would be nice to at least have one comment per condition to
explain what's going on there.

> +static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
> +{
> +	struct sgx_epc_page *secs_epc;
> +	struct sgx_pageinfo pginfo;
> +	struct sgx_secinfo secinfo;
> +	unsigned long encl_size;
> +	struct file *backing;
> +	long ret;
> +
> +	if (sgx_validate_secs(secs)) {
> +		pr_debug("invalid SECS\n");
> +		return -EINVAL;
> +	}
> +
> +	/* The extra page goes to SECS. */
> +	encl_size = secs->size + PAGE_SIZE;
> +
> +	backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5),
> +				   VM_NORESERVE);

What's the >>5 adjustment for?

> +	if (IS_ERR(backing))
> +		return PTR_ERR(backing);
> +
> +	encl->backing = backing;
> +
> +	secs_epc = __sgx_alloc_epc_page();
> +	if (IS_ERR(secs_epc)) {
> +		ret = PTR_ERR(secs_epc);
> +		goto err_out_backing;
> +	}
> +
> +	encl->secs.epc_page = secs_epc;
> +
> +	pginfo.addr = 0;
> +	pginfo.contents = (unsigned long)secs;
> +	pginfo.metadata = (unsigned long)&secinfo;
> +	pginfo.secs = 0;
> +	memset(&secinfo, 0, sizeof(secinfo));
> +
> +	ret = __ecreate((void *)&pginfo, sgx_get_epc_addr(secs_epc));
> +	if (ret) {
> +		pr_debug("ECREATE returned %ld\n", ret);
> +		goto err_out;
> +	}
> +
> +	if (secs->attributes & SGX_ATTR_DEBUG)
> +		atomic_or(SGX_ENCL_DEBUG, &encl->flags);
> +
> +	encl->secs.encl = encl;
> +	encl->base = secs->base;
> +	encl->size = secs->size;
> +	encl->ssaframesize = secs->ssa_frame_size;
> +
> +	/*
> +	 * Set SGX_ENCL_CREATED only after the enclave is fully prepped.  This
> +	 * allows setting and checking enclave creation without having to take
> +	 * encl->lock.
> +	 */
> +	atomic_or(SGX_ENCL_CREATED, &encl->flags);

I'm wondering what the impact of setting this flag is.  That's hard to
figure out because the flag isn't documented.

It's also unusual to have atomic_or() used like this.  The normal
set_bit()/clear_bit() that you can use on an unsigned long are actually
implemented as atomics.

I'm curious both why this needs to be atomics, *and* why the atomic_or()
interface is being used.

> +	return 0;
> +
> +err_out:
> +	sgx_free_epc_page(encl->secs.epc_page);
> +	encl->secs.epc_page = NULL;
> +
> +err_out_backing:
> +	fput(encl->backing);
> +	encl->backing = NULL;
> +
> +	return ret;
> +}
> +
> +/**
> + * sgx_ioc_enclave_create - handler for %SGX_IOC_ENCLAVE_CREATE
> + * @encl:	an enclave pointer
> + * @arg:	userspace pointer to a struct sgx_enclave_create instance
> + *
> + * Allocate kernel data structures for a new enclave and execute ECREATE after
> + * checking that the provided data for SECS meets the expectations of ECREATE
> + * for an uninitialized enclave and size of the address space does not surpass the
> + * platform expectations. This validation is done by sgx_validate_secs().
> + *
> + * Return:
> + *   0 on success,
> + *   -errno otherwise
> + */
> +static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg)
> +{
> +	struct sgx_enclave_create ecreate;
> +	struct page *secs_page;
> +	struct sgx_secs *secs;
> +	int ret;
> +
> +	if (atomic_read(&encl->flags) & SGX_ENCL_CREATED)
> +		return -EINVAL;
> +
> +	if (copy_from_user(&ecreate, arg, sizeof(ecreate)))
> +		return -EFAULT;
> +
> +	secs_page = alloc_page(GFP_KERNEL);
> +	if (!secs_page)
> +		return -ENOMEM;
> +
> +	secs = kmap(secs_page);

GFP_KERNEL pages are in low memory and don't need to be kmap()'d.

This can just be:

	secs = __get_free_page(GFP_KERNEL);
	if (copy_from_user(secs, (void __user *)ecreate.src,...

and forget about the kmapping.  You also need to change __free_pages()
to free_pages().

The other alternative would be to just kmalloc() it.  kmalloc()
guarantees alignment in a stronger way than it used to.

> +	if (copy_from_user(secs, (void __user *)ecreate.src, sizeof(*secs))) {
> +		ret = -EFAULT;
> +		goto out;
> +	}
> +
> +	ret = sgx_encl_create(encl, secs);
> +
> +out:
> +	kunmap(secs_page);
> +	__free_page(secs_page);
> +	return ret;
> +}
> +
> +long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
> +{
> +	struct sgx_encl *encl = filep->private_data;
> +	int ret, encl_flags;
> +
> +	encl_flags = atomic_fetch_or(SGX_ENCL_IOCTL, &encl->flags);
> +	if (encl_flags & SGX_ENCL_IOCTL)
> +		return -EBUSY;

Is the SGX_ENCL_IOCTL bit essentially just a lock to single-thread
ioctl()s?  Should we name it as such?

> +	if (encl_flags & SGX_ENCL_DEAD) {
> +		ret = -EFAULT;
> +		goto out;
> +	}
> +
> +	switch (cmd) {
> +	case SGX_IOC_ENCLAVE_CREATE:
> +		ret = sgx_ioc_enclave_create(encl, (void __user *)arg);
> +		break;
> +	default:
> +		ret = -ENOIOCTLCMD;
> +		break;
> +	}
> +
> +out:
> +	atomic_andnot(SGX_ENCL_IOCTL, &encl->flags);
> +	return ret;
> +}
>
Jarkko Sakkinen Oct. 18, 2020, 4:26 a.m. UTC | #2
On Fri, Oct 16, 2020 at 10:07:47AM -0700, Dave Hansen wrote:
> > +static u32 sgx_calc_ssa_frame_size(u32 miscselect, u64 xfrm)
> > +{
> > +	u32 size_max = PAGE_SIZE;
> > +	u32 size;
> > +	int i;
> > +
> > +	for (i = 2; i < 64; i++) {
> 
> Should this be:
> 
> 	for (i = XFEATURE_YMM; i < XFEATURE_MAX; i++) {
> 
> Basically, does this need to be 64, or should it be limited to the
> kernel-known XFEATURES?  Or, should this be looping through all the bits
> set in xfeatures_mask_user().

I think so yes.

> > +		if (!((1 << i) & xfrm))
> > +			continue;
> > +
> > +		size = SGX_SSA_GPRS_SIZE + sgx_xsave_size_tbl[i];
> > +
> > +		if (miscselect & SGX_MISC_EXINFO)
> > +			size += SGX_SSA_MISC_EXINFO_SIZE;
> > +
> > +		if (size > size_max)
> > +			size_max = size;
> > +	}
> > +
> > +	return PFN_UP(size_max);
> > +}
> > +
> > +static int sgx_validate_secs(const struct sgx_secs *secs)
> > +{
> 
> What's the overall point of this function?  Does it avoid a #GP from an
> instruction later?
> 
> Does all of the 'secs' content come from userspace?

Yes it does avoid #GP, and all the data comes from the user space.

> > +	u64 max_size = (secs->attributes & SGX_ATTR_MODE64BIT) ?
> > +		       sgx_encl_size_max_64 : sgx_encl_size_max_32;
> > +
> > +	if (secs->size < (2 * PAGE_SIZE) || !is_power_of_2(secs->size))
> > +		return -EINVAL;
> > +
> > +	if (secs->base & (secs->size - 1))
> > +		return -EINVAL;
> > +
> > +	if (secs->miscselect & sgx_misc_reserved_mask ||
> > +	    secs->attributes & sgx_attributes_reserved_mask ||
> > +	    secs->xfrm & sgx_xfrm_reserved_mask)
> > +		return -EINVAL;
> > +
> > +	if (secs->size > max_size)
> > +		return -EINVAL;
> > +
> > +	if (!(secs->xfrm & XFEATURE_MASK_FP) ||
> > +	    !(secs->xfrm & XFEATURE_MASK_SSE) ||
> > +	    (((secs->xfrm >> XFEATURE_BNDREGS) & 1) != ((secs->xfrm >> XFEATURE_BNDCSR) & 1)))
> > +		return -EINVAL;
> > +
> > +	if (!secs->ssa_frame_size)
> > +		return -EINVAL;
> > +
> > +	if (sgx_calc_ssa_frame_size(secs->miscselect, secs->xfrm) > secs->ssa_frame_size)
> > +		return -EINVAL;
> > +
> > +	if (memchr_inv(secs->reserved1, 0, sizeof(secs->reserved1)) ||
> > +	    memchr_inv(secs->reserved2, 0, sizeof(secs->reserved2)) ||
> > +	    memchr_inv(secs->reserved3, 0, sizeof(secs->reserved3)) ||
> > +	    memchr_inv(secs->reserved4, 0, sizeof(secs->reserved4)))
> > +		return -EINVAL;
> > +
> > +	return 0;
> > +}
> 
> I think it would be nice to at least have one comment per condition to
> explain what's going on there.

OK, I can do that.

> 
> > +static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
> > +{
> > +	struct sgx_epc_page *secs_epc;
> > +	struct sgx_pageinfo pginfo;
> > +	struct sgx_secinfo secinfo;
> > +	unsigned long encl_size;
> > +	struct file *backing;
> > +	long ret;
> > +
> > +	if (sgx_validate_secs(secs)) {
> > +		pr_debug("invalid SECS\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* The extra page goes to SECS. */
> > +	encl_size = secs->size + PAGE_SIZE;
> > +
> > +	backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5),
> > +				   VM_NORESERVE);
> 
> What's the >>5 adjustment for?

The backing storage stores not only the swapped page but also
Paging Crypto MetaData (PCMD) structure. It essentially contains
a CPU encrypted MAC for a page.

The MAC is over page version and data. The version is stored in
a EPC page called Version Array (VA) page.

Both of these are needed by ENCLS[ELDU].

> 
> > +	if (IS_ERR(backing))
> > +		return PTR_ERR(backing);
> > +
> > +	encl->backing = backing;
> > +
> > +	secs_epc = __sgx_alloc_epc_page();
> > +	if (IS_ERR(secs_epc)) {
> > +		ret = PTR_ERR(secs_epc);
> > +		goto err_out_backing;
> > +	}
> > +
> > +	encl->secs.epc_page = secs_epc;
> > +
> > +	pginfo.addr = 0;
> > +	pginfo.contents = (unsigned long)secs;
> > +	pginfo.metadata = (unsigned long)&secinfo;
> > +	pginfo.secs = 0;
> > +	memset(&secinfo, 0, sizeof(secinfo));
> > +
> > +	ret = __ecreate((void *)&pginfo, sgx_get_epc_addr(secs_epc));
> > +	if (ret) {
> > +		pr_debug("ECREATE returned %ld\n", ret);
> > +		goto err_out;
> > +	}
> > +
> > +	if (secs->attributes & SGX_ATTR_DEBUG)
> > +		atomic_or(SGX_ENCL_DEBUG, &encl->flags);
> > +
> > +	encl->secs.encl = encl;
> > +	encl->base = secs->base;
> > +	encl->size = secs->size;
> > +	encl->ssaframesize = secs->ssa_frame_size;
> > +
> > +	/*
> > +	 * Set SGX_ENCL_CREATED only after the enclave is fully prepped.  This
> > +	 * allows setting and checking enclave creation without having to take
> > +	 * encl->lock.
> > +	 */
> > +	atomic_or(SGX_ENCL_CREATED, &encl->flags);
> 
> I'm wondering what the impact of setting this flag is.  That's hard to
> figure out because the flag isn't documented.
> 
> It's also unusual to have atomic_or() used like this.  The normal
> set_bit()/clear_bit() that you can use on an unsigned long are actually
> implemented as atomics.
> 
> I'm curious both why this needs to be atomics, *and* why the atomic_or()
> interface is being used.

Right, and this covers also test_and_change_bit() too (just checked).

So, I suppose we can.

> > +	return 0;
> > +
> > +err_out:
> > +	sgx_free_epc_page(encl->secs.epc_page);
> > +	encl->secs.epc_page = NULL;
> > +
> > +err_out_backing:
> > +	fput(encl->backing);
> > +	encl->backing = NULL;
> > +
> > +	return ret;
> > +}
> > +
> > +/**
> > + * sgx_ioc_enclave_create - handler for %SGX_IOC_ENCLAVE_CREATE
> > + * @encl:	an enclave pointer
> > + * @arg:	userspace pointer to a struct sgx_enclave_create instance
> > + *
> > + * Allocate kernel data structures for a new enclave and execute ECREATE after
> > + * checking that the provided data for SECS meets the expectations of ECREATE
> > + * for an uninitialized enclave and size of the address space does not surpass the
> > + * platform expectations. This validation is done by sgx_validate_secs().
> > + *
> > + * Return:
> > + *   0 on success,
> > + *   -errno otherwise
> > + */
> > +static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg)
> > +{
> > +	struct sgx_enclave_create ecreate;
> > +	struct page *secs_page;
> > +	struct sgx_secs *secs;
> > +	int ret;
> > +
> > +	if (atomic_read(&encl->flags) & SGX_ENCL_CREATED)
> > +		return -EINVAL;
> > +
> > +	if (copy_from_user(&ecreate, arg, sizeof(ecreate)))
> > +		return -EFAULT;
> > +
> > +	secs_page = alloc_page(GFP_KERNEL);
> > +	if (!secs_page)
> > +		return -ENOMEM;
> > +
> > +	secs = kmap(secs_page);
> 
> GFP_KERNEL pages are in low memory and don't need to be kmap()'d.
> 
> This can just be:
> 
> 	secs = __get_free_page(GFP_KERNEL);
> 	if (copy_from_user(secs, (void __user *)ecreate.src,...
> 
> and forget about the kmapping.  You also need to change __free_pages()
> to free_pages().
> 
> The other alternative would be to just kmalloc() it.  kmalloc()
> guarantees alignment in a stronger way than it used to.

Right, I'll change this, makes sense.

> 
> > +	if (copy_from_user(secs, (void __user *)ecreate.src, sizeof(*secs))) {
> > +		ret = -EFAULT;
> > +		goto out;
> > +	}
> > +
> > +	ret = sgx_encl_create(encl, secs);
> > +
> > +out:
> > +	kunmap(secs_page);
> > +	__free_page(secs_page);
> > +	return ret;
> > +}
> > +
> > +long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
> > +{
> > +	struct sgx_encl *encl = filep->private_data;
> > +	int ret, encl_flags;
> > +
> > +	encl_flags = atomic_fetch_or(SGX_ENCL_IOCTL, &encl->flags);
> > +	if (encl_flags & SGX_ENCL_IOCTL)
> > +		return -EBUSY;
> 
> Is the SGX_ENCL_IOCTL bit essentially just a lock to single-thread
> ioctl()s?  Should we name it as such?

Yes. It makes the concurrency overally easier if we can assume that
only a single ioctl is in progress. There is no good reason to do
them in parallel.

E.g. when you add pages you want to do that serially because the
order changes the MRENCLAVE.

So should I rename it as SGX_ENCL_IOCTL_LOCKED?

> > +	if (encl_flags & SGX_ENCL_DEAD) {
> > +		ret = -EFAULT;
> > +		goto out;
> > +	}
> > +
> > +	switch (cmd) {
> > +	case SGX_IOC_ENCLAVE_CREATE:
> > +		ret = sgx_ioc_enclave_create(encl, (void __user *)arg);
> > +		break;
> > +	default:
> > +		ret = -ENOIOCTLCMD;
> > +		break;
> > +	}
> > +
> > +out:
> > +	atomic_andnot(SGX_ENCL_IOCTL, &encl->flags);
> > +	return ret;
> > +}
> > 
> 


/Jarkko
Dave Hansen Oct. 19, 2020, 8:21 p.m. UTC | #3
On 10/17/20 9:26 PM, Jarkko Sakkinen wrote:
...
>>> +static int sgx_validate_secs(const struct sgx_secs *secs)
>>> +{
>>
>> What's the overall point of this function?  Does it avoid a #GP from an
>> instruction later?
>>
>> Does all of the 'secs' content come from userspace?
> 
> Yes it does avoid #GP, and all the data comes from the user space.

Please comment the function to indicate this.

But, in general, why do we care to avoid a #GP?  Is it just because we
don't have infrastructure in-kernel to suppress the resulting panic()?

>>> +	u64 max_size = (secs->attributes & SGX_ATTR_MODE64BIT) ?
>>> +		       sgx_encl_size_max_64 : sgx_encl_size_max_32;
>>> +
>>> +	if (secs->size < (2 * PAGE_SIZE) || !is_power_of_2(secs->size))
>>> +		return -EINVAL;
>>> +
>>> +	if (secs->base & (secs->size - 1))
>>> +		return -EINVAL;
>>> +
>>> +	if (secs->miscselect & sgx_misc_reserved_mask ||
>>> +	    secs->attributes & sgx_attributes_reserved_mask ||
>>> +	    secs->xfrm & sgx_xfrm_reserved_mask)
>>> +		return -EINVAL;
>>> +
>>> +	if (secs->size > max_size)
>>> +		return -EINVAL;
>>> +
>>> +	if (!(secs->xfrm & XFEATURE_MASK_FP) ||
>>> +	    !(secs->xfrm & XFEATURE_MASK_SSE) ||
>>> +	    (((secs->xfrm >> XFEATURE_BNDREGS) & 1) != ((secs->xfrm >> XFEATURE_BNDCSR) & 1)))
>>> +		return -EINVAL;
>>> +
>>> +	if (!secs->ssa_frame_size)
>>> +		return -EINVAL;
>>> +
>>> +	if (sgx_calc_ssa_frame_size(secs->miscselect, secs->xfrm) > secs->ssa_frame_size)
>>> +		return -EINVAL;
>>> +
>>> +	if (memchr_inv(secs->reserved1, 0, sizeof(secs->reserved1)) ||
>>> +	    memchr_inv(secs->reserved2, 0, sizeof(secs->reserved2)) ||
>>> +	    memchr_inv(secs->reserved3, 0, sizeof(secs->reserved3)) ||
>>> +	    memchr_inv(secs->reserved4, 0, sizeof(secs->reserved4)))
>>> +		return -EINVAL;
>>> +
>>> +	return 0;
>>> +}
>>
>> I think it would be nice to at least have one comment per condition to
>> explain what's going on there.
...


>>> +static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
>>> +{
>>> +	struct sgx_epc_page *secs_epc;
>>> +	struct sgx_pageinfo pginfo;
>>> +	struct sgx_secinfo secinfo;
>>> +	unsigned long encl_size;
>>> +	struct file *backing;
>>> +	long ret;
>>> +
>>> +	if (sgx_validate_secs(secs)) {
>>> +		pr_debug("invalid SECS\n");
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	/* The extra page goes to SECS. */
>>> +	encl_size = secs->size + PAGE_SIZE;
>>> +
>>> +	backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5),
>>> +				   VM_NORESERVE);
>>
>> What's the >>5 adjustment for?
> 
> The backing storage stores not only the swapped page but also
> Paging Crypto MetaData (PCMD) structure. It essentially contains
> a CPU encrypted MAC for a page.
> 
> The MAC is over page version and data. The version is stored in
> a EPC page called Version Array (VA) page.
> 
> Both of these are needed by ENCLS[ELDU].

	/*
	 * SGX backing storage needs to contain space for both the
	 * EPC data and some metadata called the Paging Crypto
	 * MetaData (PCMD).  The PCMD needs 128b of storage for each
	 * page.
 	 */

Also, the MAC is a fixed size, right?  Let's say that x86 got a larger
page size in the future.  Would this number be 128b or PAGE_SIZE/32?

If it's a fixed size, I'd write:

	size = encl_size;
	size += (encl_size / PAGE_SIZE) * SGX_PCPD_PER_PAGE;

If it really is 1/32nd, I'd write

	size += encl_size / SGX_PCPD_RATIO;

or something.

Either way, the >>5 is total magic and needs comments and fixing.

>>> +long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
>>> +{
>>> +	struct sgx_encl *encl = filep->private_data;
>>> +	int ret, encl_flags;
>>> +
>>> +	encl_flags = atomic_fetch_or(SGX_ENCL_IOCTL, &encl->flags);
>>> +	if (encl_flags & SGX_ENCL_IOCTL)
>>> +		return -EBUSY;
>>
>> Is the SGX_ENCL_IOCTL bit essentially just a lock to single-thread
>> ioctl()s?  Should we name it as such?
> 
> Yes. It makes the concurrency overally easier if we can assume that
> only a single ioctl is in progress. There is no good reason to do
> them in parallel.
> 
> E.g. when you add pages you want to do that serially because the
> order changes the MRENCLAVE.

There are also hardware concurrency requirements, right?  A bunch of the
SGX functions seem not not even support being called in parallel.

> So should I rename it as SGX_ENCL_IOCTL_LOCKED?

I'd rather not see hand-rolled locking primitives frankly.
Sean Christopherson Oct. 19, 2020, 8:48 p.m. UTC | #4
On Mon, Oct 19, 2020 at 01:21:09PM -0700, Dave Hansen wrote:
> On 10/17/20 9:26 PM, Jarkko Sakkinen wrote:
> >>> +long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
> >>> +{
> >>> +	struct sgx_encl *encl = filep->private_data;
> >>> +	int ret, encl_flags;
> >>> +
> >>> +	encl_flags = atomic_fetch_or(SGX_ENCL_IOCTL, &encl->flags);
> >>> +	if (encl_flags & SGX_ENCL_IOCTL)
> >>> +		return -EBUSY;
> >>
> >> Is the SGX_ENCL_IOCTL bit essentially just a lock to single-thread
> >> ioctl()s?  Should we name it as such?
> > 
> > Yes. It makes the concurrency overally easier if we can assume that
> > only a single ioctl is in progress. There is no good reason to do
> > them in parallel.
> > 
> > E.g. when you add pages you want to do that serially because the
> > order changes the MRENCLAVE.
> 
> There are also hardware concurrency requirements, right?  A bunch of the
> SGX functions seem not not even support being called in parallel.

Yes, and the driver, even when "holding" SGX_ENCL_IOCTL, takes encl->lock
when executing an ENCLS leaf.  The separate IOCTL flag avoids complications
with reclaim, specifically it allows the ioctls to initiate reclaim without
hitting a deadlock.

Reclaim needs to take encl->lock, e.g. to do ENCLS[EBLOCK], and reclaim is by
default initiated during allocation if there are no pages available.  I.e. if
an ioctl() simply held encl->lock, it would deadlock in the scenario where it
triggered reclaim on the current enclave.

In other words, the flag is necessary even if it weren't being used a lock
primitive, e.g. it'd still need to exist even if encl->lock were taken to set
and check the flag.  The atomic shenanigans were added as an optimization to
allow reclaim in parallel with the bulk of the ioctl flows, and partly because
using atomic_fetch_or() avoided having to drop encl->lock in an error flow,
i.e. yielded less code.

> > So should I rename it as SGX_ENCL_IOCTL_LOCKED?
> 
> I'd rather not see hand-rolled locking primitives frankly.

IOCTL_IN_PROGRESS would be my vote if we want a more descriptive name.
diff mbox series

Patch

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 2a198838fca9..a89e1c46a25a 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -323,6 +323,7 @@  Code  Seq#    Include File                                           Comments
                                                                      <mailto:tlewis@mindspring.com>
 0xA3  90-9F  linux/dtlk.h
 0xA4  00-1F  uapi/linux/tee.h                                        Generic TEE subsystem
+0xA4  00-1F  uapi/asm/sgx.h                                          <mailto:linux-sgx@vger.kernel.org>
 0xAA  00-3F  linux/uapi/linux/userfaultfd.h
 0xAB  00-1F  linux/nbd.h
 0xAC  00-1F  linux/raw.h
diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
new file mode 100644
index 000000000000..c75b375f3770
--- /dev/null
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -0,0 +1,25 @@ 
+/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2016-19 Intel Corporation.
+ */
+#ifndef _UAPI_ASM_X86_SGX_H
+#define _UAPI_ASM_X86_SGX_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define SGX_MAGIC 0xA4
+
+#define SGX_IOC_ENCLAVE_CREATE \
+	_IOW(SGX_MAGIC, 0x00, struct sgx_enclave_create)
+
+/**
+ * struct sgx_enclave_create - parameter structure for the
+ *                             %SGX_IOC_ENCLAVE_CREATE ioctl
+ * @src:	address for the SECS page data
+ */
+struct sgx_enclave_create  {
+	__u64	src;
+};
+
+#endif /* _UAPI_ASM_X86_SGX_H */
diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile
index 3fc451120735..91d3dc784a29 100644
--- a/arch/x86/kernel/cpu/sgx/Makefile
+++ b/arch/x86/kernel/cpu/sgx/Makefile
@@ -1,4 +1,5 @@ 
 obj-y += \
 	driver.o \
 	encl.o \
+	ioctl.o \
 	main.o
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index f54da5f19c2b..7bdb49dfcca6 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -114,10 +114,22 @@  static unsigned long sgx_get_unmapped_area(struct file *file,
 	return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
 }
 
+#ifdef CONFIG_COMPAT
+static long sgx_compat_ioctl(struct file *filep, unsigned int cmd,
+			      unsigned long arg)
+{
+	return sgx_ioctl(filep, cmd, arg);
+}
+#endif
+
 static const struct file_operations sgx_encl_fops = {
 	.owner			= THIS_MODULE,
 	.open			= sgx_open,
 	.release		= sgx_release,
+	.unlocked_ioctl		= sgx_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl		= sgx_compat_ioctl,
+#endif
 	.mmap			= sgx_mmap,
 	.get_unmapped_area	= sgx_get_unmapped_area,
 };
diff --git a/arch/x86/kernel/cpu/sgx/driver.h b/arch/x86/kernel/cpu/sgx/driver.h
index f7ce40dedc91..e4063923115b 100644
--- a/arch/x86/kernel/cpu/sgx/driver.h
+++ b/arch/x86/kernel/cpu/sgx/driver.h
@@ -9,6 +9,7 @@ 
 #include <linux/rwsem.h>
 #include <linux/sched.h>
 #include <linux/workqueue.h>
+#include <uapi/asm/sgx.h>
 #include "sgx.h"
 
 #define SGX_EINIT_SPIN_COUNT	20
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
new file mode 100644
index 000000000000..9bb4694e57c1
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -0,0 +1,223 @@ 
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+// Copyright(c) 2016-19 Intel Corporation.
+
+#include <asm/mman.h>
+#include <linux/mman.h>
+#include <linux/delay.h>
+#include <linux/file.h>
+#include <linux/hashtable.h>
+#include <linux/highmem.h>
+#include <linux/ratelimit.h>
+#include <linux/sched/signal.h>
+#include <linux/shmem_fs.h>
+#include <linux/slab.h>
+#include <linux/suspend.h>
+#include "driver.h"
+#include "encl.h"
+#include "encls.h"
+
+static u32 sgx_calc_ssa_frame_size(u32 miscselect, u64 xfrm)
+{
+	u32 size_max = PAGE_SIZE;
+	u32 size;
+	int i;
+
+	for (i = 2; i < 64; i++) {
+		if (!((1 << i) & xfrm))
+			continue;
+
+		size = SGX_SSA_GPRS_SIZE + sgx_xsave_size_tbl[i];
+
+		if (miscselect & SGX_MISC_EXINFO)
+			size += SGX_SSA_MISC_EXINFO_SIZE;
+
+		if (size > size_max)
+			size_max = size;
+	}
+
+	return PFN_UP(size_max);
+}
+
+static int sgx_validate_secs(const struct sgx_secs *secs)
+{
+	u64 max_size = (secs->attributes & SGX_ATTR_MODE64BIT) ?
+		       sgx_encl_size_max_64 : sgx_encl_size_max_32;
+
+	if (secs->size < (2 * PAGE_SIZE) || !is_power_of_2(secs->size))
+		return -EINVAL;
+
+	if (secs->base & (secs->size - 1))
+		return -EINVAL;
+
+	if (secs->miscselect & sgx_misc_reserved_mask ||
+	    secs->attributes & sgx_attributes_reserved_mask ||
+	    secs->xfrm & sgx_xfrm_reserved_mask)
+		return -EINVAL;
+
+	if (secs->size > max_size)
+		return -EINVAL;
+
+	if (!(secs->xfrm & XFEATURE_MASK_FP) ||
+	    !(secs->xfrm & XFEATURE_MASK_SSE) ||
+	    (((secs->xfrm >> XFEATURE_BNDREGS) & 1) != ((secs->xfrm >> XFEATURE_BNDCSR) & 1)))
+		return -EINVAL;
+
+	if (!secs->ssa_frame_size)
+		return -EINVAL;
+
+	if (sgx_calc_ssa_frame_size(secs->miscselect, secs->xfrm) > secs->ssa_frame_size)
+		return -EINVAL;
+
+	if (memchr_inv(secs->reserved1, 0, sizeof(secs->reserved1)) ||
+	    memchr_inv(secs->reserved2, 0, sizeof(secs->reserved2)) ||
+	    memchr_inv(secs->reserved3, 0, sizeof(secs->reserved3)) ||
+	    memchr_inv(secs->reserved4, 0, sizeof(secs->reserved4)))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
+{
+	struct sgx_epc_page *secs_epc;
+	struct sgx_pageinfo pginfo;
+	struct sgx_secinfo secinfo;
+	unsigned long encl_size;
+	struct file *backing;
+	long ret;
+
+	if (sgx_validate_secs(secs)) {
+		pr_debug("invalid SECS\n");
+		return -EINVAL;
+	}
+
+	/* The extra page goes to SECS. */
+	encl_size = secs->size + PAGE_SIZE;
+
+	backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5),
+				   VM_NORESERVE);
+	if (IS_ERR(backing))
+		return PTR_ERR(backing);
+
+	encl->backing = backing;
+
+	secs_epc = __sgx_alloc_epc_page();
+	if (IS_ERR(secs_epc)) {
+		ret = PTR_ERR(secs_epc);
+		goto err_out_backing;
+	}
+
+	encl->secs.epc_page = secs_epc;
+
+	pginfo.addr = 0;
+	pginfo.contents = (unsigned long)secs;
+	pginfo.metadata = (unsigned long)&secinfo;
+	pginfo.secs = 0;
+	memset(&secinfo, 0, sizeof(secinfo));
+
+	ret = __ecreate((void *)&pginfo, sgx_get_epc_addr(secs_epc));
+	if (ret) {
+		pr_debug("ECREATE returned %ld\n", ret);
+		goto err_out;
+	}
+
+	if (secs->attributes & SGX_ATTR_DEBUG)
+		atomic_or(SGX_ENCL_DEBUG, &encl->flags);
+
+	encl->secs.encl = encl;
+	encl->base = secs->base;
+	encl->size = secs->size;
+	encl->ssaframesize = secs->ssa_frame_size;
+
+	/*
+	 * Set SGX_ENCL_CREATED only after the enclave is fully prepped.  This
+	 * allows setting and checking enclave creation without having to take
+	 * encl->lock.
+	 */
+	atomic_or(SGX_ENCL_CREATED, &encl->flags);
+
+	return 0;
+
+err_out:
+	sgx_free_epc_page(encl->secs.epc_page);
+	encl->secs.epc_page = NULL;
+
+err_out_backing:
+	fput(encl->backing);
+	encl->backing = NULL;
+
+	return ret;
+}
+
+/**
+ * sgx_ioc_enclave_create - handler for %SGX_IOC_ENCLAVE_CREATE
+ * @encl:	an enclave pointer
+ * @arg:	userspace pointer to a struct sgx_enclave_create instance
+ *
+ * Allocate kernel data structures for a new enclave and execute ECREATE after
+ * checking that the provided data for SECS meets the expectations of ECREATE
+ * for an uninitialized enclave and size of the address space does not surpass the
+ * platform expectations. This validation is done by sgx_validate_secs().
+ *
+ * Return:
+ *   0 on success,
+ *   -errno otherwise
+ */
+static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg)
+{
+	struct sgx_enclave_create ecreate;
+	struct page *secs_page;
+	struct sgx_secs *secs;
+	int ret;
+
+	if (atomic_read(&encl->flags) & SGX_ENCL_CREATED)
+		return -EINVAL;
+
+	if (copy_from_user(&ecreate, arg, sizeof(ecreate)))
+		return -EFAULT;
+
+	secs_page = alloc_page(GFP_KERNEL);
+	if (!secs_page)
+		return -ENOMEM;
+
+	secs = kmap(secs_page);
+	if (copy_from_user(secs, (void __user *)ecreate.src, sizeof(*secs))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = sgx_encl_create(encl, secs);
+
+out:
+	kunmap(secs_page);
+	__free_page(secs_page);
+	return ret;
+}
+
+long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+	struct sgx_encl *encl = filep->private_data;
+	int ret, encl_flags;
+
+	encl_flags = atomic_fetch_or(SGX_ENCL_IOCTL, &encl->flags);
+	if (encl_flags & SGX_ENCL_IOCTL)
+		return -EBUSY;
+
+	if (encl_flags & SGX_ENCL_DEAD) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	switch (cmd) {
+	case SGX_IOC_ENCLAVE_CREATE:
+		ret = sgx_ioc_enclave_create(encl, (void __user *)arg);
+		break;
+	default:
+		ret = -ENOIOCTLCMD;
+		break;
+	}
+
+out:
+	atomic_andnot(SGX_ENCL_IOCTL, &encl->flags);
+	return ret;
+}