Message ID | 20201003045059.665934-13-jarkko.sakkinen@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Intel SGX foundations | expand |
> +static u32 sgx_calc_ssa_frame_size(u32 miscselect, u64 xfrm) > +{ > + u32 size_max = PAGE_SIZE; > + u32 size; > + int i; > + > + for (i = 2; i < 64; i++) { Should this be: for (i = XFEATURE_YMM; i < XFEATURE_MAX; i++) { Basically, does this need to be 64, or should it be limited to the kernel-known XFEATURES? Or, should this be looping through all the bits set in xfeatures_mask_user(). > + if (!((1 << i) & xfrm)) > + continue; > + > + size = SGX_SSA_GPRS_SIZE + sgx_xsave_size_tbl[i]; > + > + if (miscselect & SGX_MISC_EXINFO) > + size += SGX_SSA_MISC_EXINFO_SIZE; > + > + if (size > size_max) > + size_max = size; > + } > + > + return PFN_UP(size_max); > +} > + > +static int sgx_validate_secs(const struct sgx_secs *secs) > +{ What's the overall point of this function? Does it avoid a #GP from an instruction later? Does all of the 'secs' content come from userspace? > + u64 max_size = (secs->attributes & SGX_ATTR_MODE64BIT) ? > + sgx_encl_size_max_64 : sgx_encl_size_max_32; > + > + if (secs->size < (2 * PAGE_SIZE) || !is_power_of_2(secs->size)) > + return -EINVAL; > + > + if (secs->base & (secs->size - 1)) > + return -EINVAL; > + > + if (secs->miscselect & sgx_misc_reserved_mask || > + secs->attributes & sgx_attributes_reserved_mask || > + secs->xfrm & sgx_xfrm_reserved_mask) > + return -EINVAL; > + > + if (secs->size > max_size) > + return -EINVAL; > + > + if (!(secs->xfrm & XFEATURE_MASK_FP) || > + !(secs->xfrm & XFEATURE_MASK_SSE) || > + (((secs->xfrm >> XFEATURE_BNDREGS) & 1) != ((secs->xfrm >> XFEATURE_BNDCSR) & 1))) > + return -EINVAL; > + > + if (!secs->ssa_frame_size) > + return -EINVAL; > + > + if (sgx_calc_ssa_frame_size(secs->miscselect, secs->xfrm) > secs->ssa_frame_size) > + return -EINVAL; > + > + if (memchr_inv(secs->reserved1, 0, sizeof(secs->reserved1)) || > + memchr_inv(secs->reserved2, 0, sizeof(secs->reserved2)) || > + memchr_inv(secs->reserved3, 0, sizeof(secs->reserved3)) || > + memchr_inv(secs->reserved4, 0, sizeof(secs->reserved4))) > + return -EINVAL; > + > + return 0; > +} I think it would be nice to at least have one comment per condition to explain what's going on there. > +static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs) > +{ > + struct sgx_epc_page *secs_epc; > + struct sgx_pageinfo pginfo; > + struct sgx_secinfo secinfo; > + unsigned long encl_size; > + struct file *backing; > + long ret; > + > + if (sgx_validate_secs(secs)) { > + pr_debug("invalid SECS\n"); > + return -EINVAL; > + } > + > + /* The extra page goes to SECS. */ > + encl_size = secs->size + PAGE_SIZE; > + > + backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5), > + VM_NORESERVE); What's the >>5 adjustment for? > + if (IS_ERR(backing)) > + return PTR_ERR(backing); > + > + encl->backing = backing; > + > + secs_epc = __sgx_alloc_epc_page(); > + if (IS_ERR(secs_epc)) { > + ret = PTR_ERR(secs_epc); > + goto err_out_backing; > + } > + > + encl->secs.epc_page = secs_epc; > + > + pginfo.addr = 0; > + pginfo.contents = (unsigned long)secs; > + pginfo.metadata = (unsigned long)&secinfo; > + pginfo.secs = 0; > + memset(&secinfo, 0, sizeof(secinfo)); > + > + ret = __ecreate((void *)&pginfo, sgx_get_epc_addr(secs_epc)); > + if (ret) { > + pr_debug("ECREATE returned %ld\n", ret); > + goto err_out; > + } > + > + if (secs->attributes & SGX_ATTR_DEBUG) > + atomic_or(SGX_ENCL_DEBUG, &encl->flags); > + > + encl->secs.encl = encl; > + encl->base = secs->base; > + encl->size = secs->size; > + encl->ssaframesize = secs->ssa_frame_size; > + > + /* > + * Set SGX_ENCL_CREATED only after the enclave is fully prepped. This > + * allows setting and checking enclave creation without having to take > + * encl->lock. > + */ > + atomic_or(SGX_ENCL_CREATED, &encl->flags); I'm wondering what the impact of setting this flag is. That's hard to figure out because the flag isn't documented. It's also unusual to have atomic_or() used like this. The normal set_bit()/clear_bit() that you can use on an unsigned long are actually implemented as atomics. I'm curious both why this needs to be atomics, *and* why the atomic_or() interface is being used. > + return 0; > + > +err_out: > + sgx_free_epc_page(encl->secs.epc_page); > + encl->secs.epc_page = NULL; > + > +err_out_backing: > + fput(encl->backing); > + encl->backing = NULL; > + > + return ret; > +} > + > +/** > + * sgx_ioc_enclave_create - handler for %SGX_IOC_ENCLAVE_CREATE > + * @encl: an enclave pointer > + * @arg: userspace pointer to a struct sgx_enclave_create instance > + * > + * Allocate kernel data structures for a new enclave and execute ECREATE after > + * checking that the provided data for SECS meets the expectations of ECREATE > + * for an uninitialized enclave and size of the address space does not surpass the > + * platform expectations. This validation is done by sgx_validate_secs(). > + * > + * Return: > + * 0 on success, > + * -errno otherwise > + */ > +static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg) > +{ > + struct sgx_enclave_create ecreate; > + struct page *secs_page; > + struct sgx_secs *secs; > + int ret; > + > + if (atomic_read(&encl->flags) & SGX_ENCL_CREATED) > + return -EINVAL; > + > + if (copy_from_user(&ecreate, arg, sizeof(ecreate))) > + return -EFAULT; > + > + secs_page = alloc_page(GFP_KERNEL); > + if (!secs_page) > + return -ENOMEM; > + > + secs = kmap(secs_page); GFP_KERNEL pages are in low memory and don't need to be kmap()'d. This can just be: secs = __get_free_page(GFP_KERNEL); if (copy_from_user(secs, (void __user *)ecreate.src,... and forget about the kmapping. You also need to change __free_pages() to free_pages(). The other alternative would be to just kmalloc() it. kmalloc() guarantees alignment in a stronger way than it used to. > + if (copy_from_user(secs, (void __user *)ecreate.src, sizeof(*secs))) { > + ret = -EFAULT; > + goto out; > + } > + > + ret = sgx_encl_create(encl, secs); > + > +out: > + kunmap(secs_page); > + __free_page(secs_page); > + return ret; > +} > + > +long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) > +{ > + struct sgx_encl *encl = filep->private_data; > + int ret, encl_flags; > + > + encl_flags = atomic_fetch_or(SGX_ENCL_IOCTL, &encl->flags); > + if (encl_flags & SGX_ENCL_IOCTL) > + return -EBUSY; Is the SGX_ENCL_IOCTL bit essentially just a lock to single-thread ioctl()s? Should we name it as such? > + if (encl_flags & SGX_ENCL_DEAD) { > + ret = -EFAULT; > + goto out; > + } > + > + switch (cmd) { > + case SGX_IOC_ENCLAVE_CREATE: > + ret = sgx_ioc_enclave_create(encl, (void __user *)arg); > + break; > + default: > + ret = -ENOIOCTLCMD; > + break; > + } > + > +out: > + atomic_andnot(SGX_ENCL_IOCTL, &encl->flags); > + return ret; > +} >
On Fri, Oct 16, 2020 at 10:07:47AM -0700, Dave Hansen wrote: > > +static u32 sgx_calc_ssa_frame_size(u32 miscselect, u64 xfrm) > > +{ > > + u32 size_max = PAGE_SIZE; > > + u32 size; > > + int i; > > + > > + for (i = 2; i < 64; i++) { > > Should this be: > > for (i = XFEATURE_YMM; i < XFEATURE_MAX; i++) { > > Basically, does this need to be 64, or should it be limited to the > kernel-known XFEATURES? Or, should this be looping through all the bits > set in xfeatures_mask_user(). I think so yes. > > + if (!((1 << i) & xfrm)) > > + continue; > > + > > + size = SGX_SSA_GPRS_SIZE + sgx_xsave_size_tbl[i]; > > + > > + if (miscselect & SGX_MISC_EXINFO) > > + size += SGX_SSA_MISC_EXINFO_SIZE; > > + > > + if (size > size_max) > > + size_max = size; > > + } > > + > > + return PFN_UP(size_max); > > +} > > + > > +static int sgx_validate_secs(const struct sgx_secs *secs) > > +{ > > What's the overall point of this function? Does it avoid a #GP from an > instruction later? > > Does all of the 'secs' content come from userspace? Yes it does avoid #GP, and all the data comes from the user space. > > + u64 max_size = (secs->attributes & SGX_ATTR_MODE64BIT) ? > > + sgx_encl_size_max_64 : sgx_encl_size_max_32; > > + > > + if (secs->size < (2 * PAGE_SIZE) || !is_power_of_2(secs->size)) > > + return -EINVAL; > > + > > + if (secs->base & (secs->size - 1)) > > + return -EINVAL; > > + > > + if (secs->miscselect & sgx_misc_reserved_mask || > > + secs->attributes & sgx_attributes_reserved_mask || > > + secs->xfrm & sgx_xfrm_reserved_mask) > > + return -EINVAL; > > + > > + if (secs->size > max_size) > > + return -EINVAL; > > + > > + if (!(secs->xfrm & XFEATURE_MASK_FP) || > > + !(secs->xfrm & XFEATURE_MASK_SSE) || > > + (((secs->xfrm >> XFEATURE_BNDREGS) & 1) != ((secs->xfrm >> XFEATURE_BNDCSR) & 1))) > > + return -EINVAL; > > + > > + if (!secs->ssa_frame_size) > > + return -EINVAL; > > + > > + if (sgx_calc_ssa_frame_size(secs->miscselect, secs->xfrm) > secs->ssa_frame_size) > > + return -EINVAL; > > + > > + if (memchr_inv(secs->reserved1, 0, sizeof(secs->reserved1)) || > > + memchr_inv(secs->reserved2, 0, sizeof(secs->reserved2)) || > > + memchr_inv(secs->reserved3, 0, sizeof(secs->reserved3)) || > > + memchr_inv(secs->reserved4, 0, sizeof(secs->reserved4))) > > + return -EINVAL; > > + > > + return 0; > > +} > > I think it would be nice to at least have one comment per condition to > explain what's going on there. OK, I can do that. > > > +static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs) > > +{ > > + struct sgx_epc_page *secs_epc; > > + struct sgx_pageinfo pginfo; > > + struct sgx_secinfo secinfo; > > + unsigned long encl_size; > > + struct file *backing; > > + long ret; > > + > > + if (sgx_validate_secs(secs)) { > > + pr_debug("invalid SECS\n"); > > + return -EINVAL; > > + } > > + > > + /* The extra page goes to SECS. */ > > + encl_size = secs->size + PAGE_SIZE; > > + > > + backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5), > > + VM_NORESERVE); > > What's the >>5 adjustment for? The backing storage stores not only the swapped page but also Paging Crypto MetaData (PCMD) structure. It essentially contains a CPU encrypted MAC for a page. The MAC is over page version and data. The version is stored in a EPC page called Version Array (VA) page. Both of these are needed by ENCLS[ELDU]. > > > + if (IS_ERR(backing)) > > + return PTR_ERR(backing); > > + > > + encl->backing = backing; > > + > > + secs_epc = __sgx_alloc_epc_page(); > > + if (IS_ERR(secs_epc)) { > > + ret = PTR_ERR(secs_epc); > > + goto err_out_backing; > > + } > > + > > + encl->secs.epc_page = secs_epc; > > + > > + pginfo.addr = 0; > > + pginfo.contents = (unsigned long)secs; > > + pginfo.metadata = (unsigned long)&secinfo; > > + pginfo.secs = 0; > > + memset(&secinfo, 0, sizeof(secinfo)); > > + > > + ret = __ecreate((void *)&pginfo, sgx_get_epc_addr(secs_epc)); > > + if (ret) { > > + pr_debug("ECREATE returned %ld\n", ret); > > + goto err_out; > > + } > > + > > + if (secs->attributes & SGX_ATTR_DEBUG) > > + atomic_or(SGX_ENCL_DEBUG, &encl->flags); > > + > > + encl->secs.encl = encl; > > + encl->base = secs->base; > > + encl->size = secs->size; > > + encl->ssaframesize = secs->ssa_frame_size; > > + > > + /* > > + * Set SGX_ENCL_CREATED only after the enclave is fully prepped. This > > + * allows setting and checking enclave creation without having to take > > + * encl->lock. > > + */ > > + atomic_or(SGX_ENCL_CREATED, &encl->flags); > > I'm wondering what the impact of setting this flag is. That's hard to > figure out because the flag isn't documented. > > It's also unusual to have atomic_or() used like this. The normal > set_bit()/clear_bit() that you can use on an unsigned long are actually > implemented as atomics. > > I'm curious both why this needs to be atomics, *and* why the atomic_or() > interface is being used. Right, and this covers also test_and_change_bit() too (just checked). So, I suppose we can. > > + return 0; > > + > > +err_out: > > + sgx_free_epc_page(encl->secs.epc_page); > > + encl->secs.epc_page = NULL; > > + > > +err_out_backing: > > + fput(encl->backing); > > + encl->backing = NULL; > > + > > + return ret; > > +} > > + > > +/** > > + * sgx_ioc_enclave_create - handler for %SGX_IOC_ENCLAVE_CREATE > > + * @encl: an enclave pointer > > + * @arg: userspace pointer to a struct sgx_enclave_create instance > > + * > > + * Allocate kernel data structures for a new enclave and execute ECREATE after > > + * checking that the provided data for SECS meets the expectations of ECREATE > > + * for an uninitialized enclave and size of the address space does not surpass the > > + * platform expectations. This validation is done by sgx_validate_secs(). > > + * > > + * Return: > > + * 0 on success, > > + * -errno otherwise > > + */ > > +static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg) > > +{ > > + struct sgx_enclave_create ecreate; > > + struct page *secs_page; > > + struct sgx_secs *secs; > > + int ret; > > + > > + if (atomic_read(&encl->flags) & SGX_ENCL_CREATED) > > + return -EINVAL; > > + > > + if (copy_from_user(&ecreate, arg, sizeof(ecreate))) > > + return -EFAULT; > > + > > + secs_page = alloc_page(GFP_KERNEL); > > + if (!secs_page) > > + return -ENOMEM; > > + > > + secs = kmap(secs_page); > > GFP_KERNEL pages are in low memory and don't need to be kmap()'d. > > This can just be: > > secs = __get_free_page(GFP_KERNEL); > if (copy_from_user(secs, (void __user *)ecreate.src,... > > and forget about the kmapping. You also need to change __free_pages() > to free_pages(). > > The other alternative would be to just kmalloc() it. kmalloc() > guarantees alignment in a stronger way than it used to. Right, I'll change this, makes sense. > > > + if (copy_from_user(secs, (void __user *)ecreate.src, sizeof(*secs))) { > > + ret = -EFAULT; > > + goto out; > > + } > > + > > + ret = sgx_encl_create(encl, secs); > > + > > +out: > > + kunmap(secs_page); > > + __free_page(secs_page); > > + return ret; > > +} > > + > > +long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) > > +{ > > + struct sgx_encl *encl = filep->private_data; > > + int ret, encl_flags; > > + > > + encl_flags = atomic_fetch_or(SGX_ENCL_IOCTL, &encl->flags); > > + if (encl_flags & SGX_ENCL_IOCTL) > > + return -EBUSY; > > Is the SGX_ENCL_IOCTL bit essentially just a lock to single-thread > ioctl()s? Should we name it as such? Yes. It makes the concurrency overally easier if we can assume that only a single ioctl is in progress. There is no good reason to do them in parallel. E.g. when you add pages you want to do that serially because the order changes the MRENCLAVE. So should I rename it as SGX_ENCL_IOCTL_LOCKED? > > + if (encl_flags & SGX_ENCL_DEAD) { > > + ret = -EFAULT; > > + goto out; > > + } > > + > > + switch (cmd) { > > + case SGX_IOC_ENCLAVE_CREATE: > > + ret = sgx_ioc_enclave_create(encl, (void __user *)arg); > > + break; > > + default: > > + ret = -ENOIOCTLCMD; > > + break; > > + } > > + > > +out: > > + atomic_andnot(SGX_ENCL_IOCTL, &encl->flags); > > + return ret; > > +} > > > /Jarkko
On 10/17/20 9:26 PM, Jarkko Sakkinen wrote: ... >>> +static int sgx_validate_secs(const struct sgx_secs *secs) >>> +{ >> >> What's the overall point of this function? Does it avoid a #GP from an >> instruction later? >> >> Does all of the 'secs' content come from userspace? > > Yes it does avoid #GP, and all the data comes from the user space. Please comment the function to indicate this. But, in general, why do we care to avoid a #GP? Is it just because we don't have infrastructure in-kernel to suppress the resulting panic()? >>> + u64 max_size = (secs->attributes & SGX_ATTR_MODE64BIT) ? >>> + sgx_encl_size_max_64 : sgx_encl_size_max_32; >>> + >>> + if (secs->size < (2 * PAGE_SIZE) || !is_power_of_2(secs->size)) >>> + return -EINVAL; >>> + >>> + if (secs->base & (secs->size - 1)) >>> + return -EINVAL; >>> + >>> + if (secs->miscselect & sgx_misc_reserved_mask || >>> + secs->attributes & sgx_attributes_reserved_mask || >>> + secs->xfrm & sgx_xfrm_reserved_mask) >>> + return -EINVAL; >>> + >>> + if (secs->size > max_size) >>> + return -EINVAL; >>> + >>> + if (!(secs->xfrm & XFEATURE_MASK_FP) || >>> + !(secs->xfrm & XFEATURE_MASK_SSE) || >>> + (((secs->xfrm >> XFEATURE_BNDREGS) & 1) != ((secs->xfrm >> XFEATURE_BNDCSR) & 1))) >>> + return -EINVAL; >>> + >>> + if (!secs->ssa_frame_size) >>> + return -EINVAL; >>> + >>> + if (sgx_calc_ssa_frame_size(secs->miscselect, secs->xfrm) > secs->ssa_frame_size) >>> + return -EINVAL; >>> + >>> + if (memchr_inv(secs->reserved1, 0, sizeof(secs->reserved1)) || >>> + memchr_inv(secs->reserved2, 0, sizeof(secs->reserved2)) || >>> + memchr_inv(secs->reserved3, 0, sizeof(secs->reserved3)) || >>> + memchr_inv(secs->reserved4, 0, sizeof(secs->reserved4))) >>> + return -EINVAL; >>> + >>> + return 0; >>> +} >> >> I think it would be nice to at least have one comment per condition to >> explain what's going on there. ... >>> +static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs) >>> +{ >>> + struct sgx_epc_page *secs_epc; >>> + struct sgx_pageinfo pginfo; >>> + struct sgx_secinfo secinfo; >>> + unsigned long encl_size; >>> + struct file *backing; >>> + long ret; >>> + >>> + if (sgx_validate_secs(secs)) { >>> + pr_debug("invalid SECS\n"); >>> + return -EINVAL; >>> + } >>> + >>> + /* The extra page goes to SECS. */ >>> + encl_size = secs->size + PAGE_SIZE; >>> + >>> + backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5), >>> + VM_NORESERVE); >> >> What's the >>5 adjustment for? > > The backing storage stores not only the swapped page but also > Paging Crypto MetaData (PCMD) structure. It essentially contains > a CPU encrypted MAC for a page. > > The MAC is over page version and data. The version is stored in > a EPC page called Version Array (VA) page. > > Both of these are needed by ENCLS[ELDU]. /* * SGX backing storage needs to contain space for both the * EPC data and some metadata called the Paging Crypto * MetaData (PCMD). The PCMD needs 128b of storage for each * page. */ Also, the MAC is a fixed size, right? Let's say that x86 got a larger page size in the future. Would this number be 128b or PAGE_SIZE/32? If it's a fixed size, I'd write: size = encl_size; size += (encl_size / PAGE_SIZE) * SGX_PCPD_PER_PAGE; If it really is 1/32nd, I'd write size += encl_size / SGX_PCPD_RATIO; or something. Either way, the >>5 is total magic and needs comments and fixing. >>> +long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) >>> +{ >>> + struct sgx_encl *encl = filep->private_data; >>> + int ret, encl_flags; >>> + >>> + encl_flags = atomic_fetch_or(SGX_ENCL_IOCTL, &encl->flags); >>> + if (encl_flags & SGX_ENCL_IOCTL) >>> + return -EBUSY; >> >> Is the SGX_ENCL_IOCTL bit essentially just a lock to single-thread >> ioctl()s? Should we name it as such? > > Yes. It makes the concurrency overally easier if we can assume that > only a single ioctl is in progress. There is no good reason to do > them in parallel. > > E.g. when you add pages you want to do that serially because the > order changes the MRENCLAVE. There are also hardware concurrency requirements, right? A bunch of the SGX functions seem not not even support being called in parallel. > So should I rename it as SGX_ENCL_IOCTL_LOCKED? I'd rather not see hand-rolled locking primitives frankly.
On Mon, Oct 19, 2020 at 01:21:09PM -0700, Dave Hansen wrote: > On 10/17/20 9:26 PM, Jarkko Sakkinen wrote: > >>> +long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) > >>> +{ > >>> + struct sgx_encl *encl = filep->private_data; > >>> + int ret, encl_flags; > >>> + > >>> + encl_flags = atomic_fetch_or(SGX_ENCL_IOCTL, &encl->flags); > >>> + if (encl_flags & SGX_ENCL_IOCTL) > >>> + return -EBUSY; > >> > >> Is the SGX_ENCL_IOCTL bit essentially just a lock to single-thread > >> ioctl()s? Should we name it as such? > > > > Yes. It makes the concurrency overally easier if we can assume that > > only a single ioctl is in progress. There is no good reason to do > > them in parallel. > > > > E.g. when you add pages you want to do that serially because the > > order changes the MRENCLAVE. > > There are also hardware concurrency requirements, right? A bunch of the > SGX functions seem not not even support being called in parallel. Yes, and the driver, even when "holding" SGX_ENCL_IOCTL, takes encl->lock when executing an ENCLS leaf. The separate IOCTL flag avoids complications with reclaim, specifically it allows the ioctls to initiate reclaim without hitting a deadlock. Reclaim needs to take encl->lock, e.g. to do ENCLS[EBLOCK], and reclaim is by default initiated during allocation if there are no pages available. I.e. if an ioctl() simply held encl->lock, it would deadlock in the scenario where it triggered reclaim on the current enclave. In other words, the flag is necessary even if it weren't being used a lock primitive, e.g. it'd still need to exist even if encl->lock were taken to set and check the flag. The atomic shenanigans were added as an optimization to allow reclaim in parallel with the bulk of the ioctl flows, and partly because using atomic_fetch_or() avoided having to drop encl->lock in an error flow, i.e. yielded less code. > > So should I rename it as SGX_ENCL_IOCTL_LOCKED? > > I'd rather not see hand-rolled locking primitives frankly. IOCTL_IN_PROGRESS would be my vote if we want a more descriptive name.
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 2a198838fca9..a89e1c46a25a 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -323,6 +323,7 @@ Code Seq# Include File Comments <mailto:tlewis@mindspring.com> 0xA3 90-9F linux/dtlk.h 0xA4 00-1F uapi/linux/tee.h Generic TEE subsystem +0xA4 00-1F uapi/asm/sgx.h <mailto:linux-sgx@vger.kernel.org> 0xAA 00-3F linux/uapi/linux/userfaultfd.h 0xAB 00-1F linux/nbd.h 0xAC 00-1F linux/raw.h diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h new file mode 100644 index 000000000000..c75b375f3770 --- /dev/null +++ b/arch/x86/include/uapi/asm/sgx.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* + * Copyright(c) 2016-19 Intel Corporation. + */ +#ifndef _UAPI_ASM_X86_SGX_H +#define _UAPI_ASM_X86_SGX_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +#define SGX_MAGIC 0xA4 + +#define SGX_IOC_ENCLAVE_CREATE \ + _IOW(SGX_MAGIC, 0x00, struct sgx_enclave_create) + +/** + * struct sgx_enclave_create - parameter structure for the + * %SGX_IOC_ENCLAVE_CREATE ioctl + * @src: address for the SECS page data + */ +struct sgx_enclave_create { + __u64 src; +}; + +#endif /* _UAPI_ASM_X86_SGX_H */ diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile index 3fc451120735..91d3dc784a29 100644 --- a/arch/x86/kernel/cpu/sgx/Makefile +++ b/arch/x86/kernel/cpu/sgx/Makefile @@ -1,4 +1,5 @@ obj-y += \ driver.o \ encl.o \ + ioctl.o \ main.o diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c index f54da5f19c2b..7bdb49dfcca6 100644 --- a/arch/x86/kernel/cpu/sgx/driver.c +++ b/arch/x86/kernel/cpu/sgx/driver.c @@ -114,10 +114,22 @@ static unsigned long sgx_get_unmapped_area(struct file *file, return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); } +#ifdef CONFIG_COMPAT +static long sgx_compat_ioctl(struct file *filep, unsigned int cmd, + unsigned long arg) +{ + return sgx_ioctl(filep, cmd, arg); +} +#endif + static const struct file_operations sgx_encl_fops = { .owner = THIS_MODULE, .open = sgx_open, .release = sgx_release, + .unlocked_ioctl = sgx_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = sgx_compat_ioctl, +#endif .mmap = sgx_mmap, .get_unmapped_area = sgx_get_unmapped_area, }; diff --git a/arch/x86/kernel/cpu/sgx/driver.h b/arch/x86/kernel/cpu/sgx/driver.h index f7ce40dedc91..e4063923115b 100644 --- a/arch/x86/kernel/cpu/sgx/driver.h +++ b/arch/x86/kernel/cpu/sgx/driver.h @@ -9,6 +9,7 @@ #include <linux/rwsem.h> #include <linux/sched.h> #include <linux/workqueue.h> +#include <uapi/asm/sgx.h> #include "sgx.h" #define SGX_EINIT_SPIN_COUNT 20 diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c new file mode 100644 index 000000000000..9bb4694e57c1 --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +// Copyright(c) 2016-19 Intel Corporation. + +#include <asm/mman.h> +#include <linux/mman.h> +#include <linux/delay.h> +#include <linux/file.h> +#include <linux/hashtable.h> +#include <linux/highmem.h> +#include <linux/ratelimit.h> +#include <linux/sched/signal.h> +#include <linux/shmem_fs.h> +#include <linux/slab.h> +#include <linux/suspend.h> +#include "driver.h" +#include "encl.h" +#include "encls.h" + +static u32 sgx_calc_ssa_frame_size(u32 miscselect, u64 xfrm) +{ + u32 size_max = PAGE_SIZE; + u32 size; + int i; + + for (i = 2; i < 64; i++) { + if (!((1 << i) & xfrm)) + continue; + + size = SGX_SSA_GPRS_SIZE + sgx_xsave_size_tbl[i]; + + if (miscselect & SGX_MISC_EXINFO) + size += SGX_SSA_MISC_EXINFO_SIZE; + + if (size > size_max) + size_max = size; + } + + return PFN_UP(size_max); +} + +static int sgx_validate_secs(const struct sgx_secs *secs) +{ + u64 max_size = (secs->attributes & SGX_ATTR_MODE64BIT) ? + sgx_encl_size_max_64 : sgx_encl_size_max_32; + + if (secs->size < (2 * PAGE_SIZE) || !is_power_of_2(secs->size)) + return -EINVAL; + + if (secs->base & (secs->size - 1)) + return -EINVAL; + + if (secs->miscselect & sgx_misc_reserved_mask || + secs->attributes & sgx_attributes_reserved_mask || + secs->xfrm & sgx_xfrm_reserved_mask) + return -EINVAL; + + if (secs->size > max_size) + return -EINVAL; + + if (!(secs->xfrm & XFEATURE_MASK_FP) || + !(secs->xfrm & XFEATURE_MASK_SSE) || + (((secs->xfrm >> XFEATURE_BNDREGS) & 1) != ((secs->xfrm >> XFEATURE_BNDCSR) & 1))) + return -EINVAL; + + if (!secs->ssa_frame_size) + return -EINVAL; + + if (sgx_calc_ssa_frame_size(secs->miscselect, secs->xfrm) > secs->ssa_frame_size) + return -EINVAL; + + if (memchr_inv(secs->reserved1, 0, sizeof(secs->reserved1)) || + memchr_inv(secs->reserved2, 0, sizeof(secs->reserved2)) || + memchr_inv(secs->reserved3, 0, sizeof(secs->reserved3)) || + memchr_inv(secs->reserved4, 0, sizeof(secs->reserved4))) + return -EINVAL; + + return 0; +} + +static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs) +{ + struct sgx_epc_page *secs_epc; + struct sgx_pageinfo pginfo; + struct sgx_secinfo secinfo; + unsigned long encl_size; + struct file *backing; + long ret; + + if (sgx_validate_secs(secs)) { + pr_debug("invalid SECS\n"); + return -EINVAL; + } + + /* The extra page goes to SECS. */ + encl_size = secs->size + PAGE_SIZE; + + backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5), + VM_NORESERVE); + if (IS_ERR(backing)) + return PTR_ERR(backing); + + encl->backing = backing; + + secs_epc = __sgx_alloc_epc_page(); + if (IS_ERR(secs_epc)) { + ret = PTR_ERR(secs_epc); + goto err_out_backing; + } + + encl->secs.epc_page = secs_epc; + + pginfo.addr = 0; + pginfo.contents = (unsigned long)secs; + pginfo.metadata = (unsigned long)&secinfo; + pginfo.secs = 0; + memset(&secinfo, 0, sizeof(secinfo)); + + ret = __ecreate((void *)&pginfo, sgx_get_epc_addr(secs_epc)); + if (ret) { + pr_debug("ECREATE returned %ld\n", ret); + goto err_out; + } + + if (secs->attributes & SGX_ATTR_DEBUG) + atomic_or(SGX_ENCL_DEBUG, &encl->flags); + + encl->secs.encl = encl; + encl->base = secs->base; + encl->size = secs->size; + encl->ssaframesize = secs->ssa_frame_size; + + /* + * Set SGX_ENCL_CREATED only after the enclave is fully prepped. This + * allows setting and checking enclave creation without having to take + * encl->lock. + */ + atomic_or(SGX_ENCL_CREATED, &encl->flags); + + return 0; + +err_out: + sgx_free_epc_page(encl->secs.epc_page); + encl->secs.epc_page = NULL; + +err_out_backing: + fput(encl->backing); + encl->backing = NULL; + + return ret; +} + +/** + * sgx_ioc_enclave_create - handler for %SGX_IOC_ENCLAVE_CREATE + * @encl: an enclave pointer + * @arg: userspace pointer to a struct sgx_enclave_create instance + * + * Allocate kernel data structures for a new enclave and execute ECREATE after + * checking that the provided data for SECS meets the expectations of ECREATE + * for an uninitialized enclave and size of the address space does not surpass the + * platform expectations. This validation is done by sgx_validate_secs(). + * + * Return: + * 0 on success, + * -errno otherwise + */ +static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg) +{ + struct sgx_enclave_create ecreate; + struct page *secs_page; + struct sgx_secs *secs; + int ret; + + if (atomic_read(&encl->flags) & SGX_ENCL_CREATED) + return -EINVAL; + + if (copy_from_user(&ecreate, arg, sizeof(ecreate))) + return -EFAULT; + + secs_page = alloc_page(GFP_KERNEL); + if (!secs_page) + return -ENOMEM; + + secs = kmap(secs_page); + if (copy_from_user(secs, (void __user *)ecreate.src, sizeof(*secs))) { + ret = -EFAULT; + goto out; + } + + ret = sgx_encl_create(encl, secs); + +out: + kunmap(secs_page); + __free_page(secs_page); + return ret; +} + +long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct sgx_encl *encl = filep->private_data; + int ret, encl_flags; + + encl_flags = atomic_fetch_or(SGX_ENCL_IOCTL, &encl->flags); + if (encl_flags & SGX_ENCL_IOCTL) + return -EBUSY; + + if (encl_flags & SGX_ENCL_DEAD) { + ret = -EFAULT; + goto out; + } + + switch (cmd) { + case SGX_IOC_ENCLAVE_CREATE: + ret = sgx_ioc_enclave_create(encl, (void __user *)arg); + break; + default: + ret = -ENOIOCTLCMD; + break; + } + +out: + atomic_andnot(SGX_ENCL_IOCTL, &encl->flags); + return ret; +}