Message ID | 6b29d1ee66715b40aba847b31cbdac71cbb22524.1609890536.git.kai.huang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM SGX virtualization support | expand |
On 1/5/21 5:56 PM, Kai Huang wrote: > From: Sean Christopherson <sean.j.christopherson@intel.com> > > Provide wrappers around __ecreate() and __einit() to hide the ugliness > of overloading the ENCLS return value to encode multiple error formats > in a single int. KVM will trap-and-execute ECREATE and EINIT as part > of SGX virtualization, and on an exception, KVM needs the trapnr so that > it can inject the correct fault into the guest. This is missing a bit of a step about how and why ECREATE needs to be run in the host in the first place. > diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h > new file mode 100644 > index 000000000000..0d643b985085 > --- /dev/null > +++ b/arch/x86/include/asm/sgx.h > @@ -0,0 +1,16 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef _ASM_X86_SGX_H > +#define _ASM_X86_SGX_H > + > +#include <linux/types.h> > + > +#ifdef CONFIG_X86_SGX_VIRTUALIZATION > +struct sgx_pageinfo; > + > +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, > + int *trapnr); > +int sgx_virt_einit(void __user *sigstruct, void __user *token, > + void __user *secs, u64 *lepubkeyhash, int *trapnr); > +#endif > + > +#endif /* _ASM_X86_SGX_H */ > diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c > index d625551ccf25..4e9810ba9259 100644 > --- a/arch/x86/kernel/cpu/sgx/virt.c > +++ b/arch/x86/kernel/cpu/sgx/virt.c > @@ -261,3 +261,58 @@ int __init sgx_virt_epc_init(void) > > return misc_register(&sgx_virt_epc_dev); > } > + > +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, > + int *trapnr) > +{ > + int ret; > + > + __uaccess_begin(); > + ret = __ecreate(pageinfo, (void *)secs); > + __uaccess_end(); The __uaccess_begin/end() worries me. There are *very* few of these in the kernel and it seems like something we want to use as sparingly as possible. Why don't we just use the kernel mapping for 'secs' and not have to deal with stac/clac? I'm also just generally worried about casting away an __user without doing any checking. How is that OK? > + if (encls_faulted(ret)) { > + *trapnr = ENCLS_TRAPNR(ret); > + return -EFAULT; > + } > + > + /* ECREATE doesn't return an error code, it faults or succeeds. */ > + WARN_ON_ONCE(ret); > + return 0; > +} > +EXPORT_SYMBOL_GPL(sgx_virt_ecreate); > + > +static int __sgx_virt_einit(void __user *sigstruct, void __user *token, > + void __user *secs) > +{ > + int ret; > + > + __uaccess_begin(); > + ret = __einit((void *)sigstruct, (void *)token, (void *)secs); > + __uaccess_end(); > + return ret; > +} If casting away one __user wasn't good enough, we get three! :) We need some more background in the changelog on why this is OK. > +int sgx_virt_einit(void __user *sigstruct, void __user *token, > + void __user *secs, u64 *lepubkeyhash, int *trapnr) > +{ > + int ret; > + > + if (!boot_cpu_has(X86_FEATURE_SGX_LC)) { > + ret = __sgx_virt_einit(sigstruct, token, secs); > + } else { > + preempt_disable(); > + > + sgx_update_lepubkeyhash(lepubkeyhash); > + > + ret = __sgx_virt_einit(sigstruct, token, secs); > + preempt_enable(); > + } > + > + if (encls_faulted(ret)) { > + *trapnr = ENCLS_TRAPNR(ret); > + return -EFAULT; > + } > + return ret; > +} > +EXPORT_SYMBOL_GPL(sgx_virt_einit); >
On Wed, Jan 06, 2021, Dave Hansen wrote: > On 1/5/21 5:56 PM, Kai Huang wrote: > > From: Sean Christopherson <sean.j.christopherson@intel.com> > > > > Provide wrappers around __ecreate() and __einit() to hide the ugliness > > of overloading the ENCLS return value to encode multiple error formats > > in a single int. KVM will trap-and-execute ECREATE and EINIT as part > > of SGX virtualization, and on an exception, KVM needs the trapnr so that > > it can inject the correct fault into the guest. > > This is missing a bit of a step about how and why ECREATE needs to be > run in the host in the first place. There's (hopefully) good info in the KVM usage patch that can be borrowed: Add an ECREATE handler that will be used to intercept ECREATE for the purpose of enforcing and enclave's MISCSELECT, ATTRIBUTES and XFRM, i.e. to allow userspace to restrict SGX features via CPUID. ECREATE will be intercepted when any of the aforementioned masks diverges from hardware in order to enforce the desired CPUID model, i.e. inject #GP if the guest attempts to set a bit that hasn't been enumerated as allowed-1 in CPUID. > > diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h > > new file mode 100644 > > index 000000000000..0d643b985085 > > --- /dev/null > > +++ b/arch/x86/include/asm/sgx.h > > @@ -0,0 +1,16 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +#ifndef _ASM_X86_SGX_H > > +#define _ASM_X86_SGX_H > > + > > +#include <linux/types.h> > > + > > +#ifdef CONFIG_X86_SGX_VIRTUALIZATION > > +struct sgx_pageinfo; > > + > > +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, > > + int *trapnr); > > +int sgx_virt_einit(void __user *sigstruct, void __user *token, > > + void __user *secs, u64 *lepubkeyhash, int *trapnr); > > +#endif > > + > > +#endif /* _ASM_X86_SGX_H */ > > diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c > > index d625551ccf25..4e9810ba9259 100644 > > --- a/arch/x86/kernel/cpu/sgx/virt.c > > +++ b/arch/x86/kernel/cpu/sgx/virt.c > > @@ -261,3 +261,58 @@ int __init sgx_virt_epc_init(void) > > > > return misc_register(&sgx_virt_epc_dev); > > } > > + > > +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, > > + int *trapnr) > > +{ > > + int ret; > > + > > + __uaccess_begin(); > > + ret = __ecreate(pageinfo, (void *)secs); > > + __uaccess_end(); > > The __uaccess_begin/end() worries me. There are *very* few of these in > the kernel and it seems like something we want to use as sparingly as > possible. > > Why don't we just use the kernel mapping for 'secs' and not have to deal > with stac/clac? The kernel mapping isn't readily available. At this point, it's not even guaranteed that @secs points at an EPC page. Unlike the driver code, where the EPC page is allocated on-demand by the kernel, the pointer here is userspace (technically guest) controlled. The caller (KVM) is responsible for ensuring it's a valid userspace address, but the SGX/EPC specific checks are mostly deferred to hardware. It's also possible to either retrieve the existing kernel mapping or to generate a new mapping by resolving the PFN; this is/was simpler. > I'm also just generally worried about casting away an __user without > doing any checking. How is that OK? Short answer, KVM validates the virtual addresses. KVM validates the host virtual addresses (HVA) when creating a memslot (maps GPA->HVA). The HVAs that are passed to these helpers are generated/retrieved by KVM translating GVA->GPA->HVA; the GPA->HVA stage ensures the address is in a valid memslot, and thus a valid user address. That being said, these aren't exactly fast operations, adding access_ok() checks is probably a good idea.
On 1/6/21 1:04 PM, Sean Christopherson wrote: > On Wed, Jan 06, 2021, Dave Hansen wrote: >> On 1/5/21 5:56 PM, Kai Huang wrote: >>> From: Sean Christopherson <sean.j.christopherson@intel.com> >>> >>> Provide wrappers around __ecreate() and __einit() to hide the ugliness >>> of overloading the ENCLS return value to encode multiple error formats >>> in a single int. KVM will trap-and-execute ECREATE and EINIT as part >>> of SGX virtualization, and on an exception, KVM needs the trapnr so that >>> it can inject the correct fault into the guest. >> >> This is missing a bit of a step about how and why ECREATE needs to be >> run in the host in the first place. > > There's (hopefully) good info in the KVM usage patch that can be borrowed: > > Add an ECREATE handler that will be used to intercept ECREATE for the > purpose of enforcing and enclave's MISCSELECT, ATTRIBUTES and XFRM, i.e. > to allow userspace to restrict SGX features via CPUID. ECREATE will be > intercepted when any of the aforementioned masks diverges from hardware > in order to enforce the desired CPUID model, i.e. inject #GP if the > guest attempts to set a bit that hasn't been enumerated as allowed-1 in > CPUID. OK, so in plain language: the bare-metal kernel must intercept ECREATE to be able to impose policies on guests. When it does this, the bare-metal kernel runs ECREATE against the userspace mapping of the virtualized EPC. >>> diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h >>> new file mode 100644 >>> index 000000000000..0d643b985085 >>> --- /dev/null >>> +++ b/arch/x86/include/asm/sgx.h >>> @@ -0,0 +1,16 @@ >>> +/* SPDX-License-Identifier: GPL-2.0 */ >>> +#ifndef _ASM_X86_SGX_H >>> +#define _ASM_X86_SGX_H >>> + >>> +#include <linux/types.h> >>> + >>> +#ifdef CONFIG_X86_SGX_VIRTUALIZATION >>> +struct sgx_pageinfo; >>> + >>> +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, >>> + int *trapnr); >>> +int sgx_virt_einit(void __user *sigstruct, void __user *token, >>> + void __user *secs, u64 *lepubkeyhash, int *trapnr); >>> +#endif >>> + >>> +#endif /* _ASM_X86_SGX_H */ >>> diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c >>> index d625551ccf25..4e9810ba9259 100644 >>> --- a/arch/x86/kernel/cpu/sgx/virt.c >>> +++ b/arch/x86/kernel/cpu/sgx/virt.c >>> @@ -261,3 +261,58 @@ int __init sgx_virt_epc_init(void) >>> >>> return misc_register(&sgx_virt_epc_dev); >>> } >>> + >>> +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, >>> + int *trapnr) >>> +{ >>> + int ret; >>> + >>> + __uaccess_begin(); >>> + ret = __ecreate(pageinfo, (void *)secs); >>> + __uaccess_end(); >> >> The __uaccess_begin/end() worries me. There are *very* few of these in >> the kernel and it seems like something we want to use as sparingly as >> possible. >> >> Why don't we just use the kernel mapping for 'secs' and not have to deal >> with stac/clac? > > The kernel mapping isn't readily available. Oh, duh. There's no kernel mapping for EPC... it's not RAM in the first place. > At this point, it's not even > guaranteed that @secs points at an EPC page. Unlike the driver code, where the > EPC page is allocated on-demand by the kernel, the pointer here is userspace > (technically guest) controlled. The caller (KVM) is responsible for ensuring > it's a valid userspace address, but the SGX/EPC specific checks are mostly > deferred to hardware. Ahh, got it. Kai, could we get some of this into comments or the changelog? >> I'm also just generally worried about casting away an __user without >> doing any checking. How is that OK? > > Short answer, KVM validates the virtual addresses. > > KVM validates the host virtual addresses (HVA) when creating a memslot (maps > GPA->HVA). The HVAs that are passed to these helpers are generated/retrieved > by KVM translating GVA->GPA->HVA; the GPA->HVA stage ensures the address is in a > valid memslot, and thus a valid user address. There is something a *bit* unpalatable about having KVM fill an 'unsigned long' only to cast it to a (void __user *), the to cast it back to a (void *) to pass it to the SGX inlines. I guess sparse would catch us in the window that it is __user if someone tried to dereference it. Adding access_ok()'s sounds like a good idea to me. Or, at *least* commenting why they're not necessary.
On Wed, 6 Jan 2021 13:23:37 -0800 Dave Hansen wrote: > On 1/6/21 1:04 PM, Sean Christopherson wrote: > > On Wed, Jan 06, 2021, Dave Hansen wrote: > >> On 1/5/21 5:56 PM, Kai Huang wrote: > >>> From: Sean Christopherson <sean.j.christopherson@intel.com> > >>> > >>> Provide wrappers around __ecreate() and __einit() to hide the ugliness > >>> of overloading the ENCLS return value to encode multiple error formats > >>> in a single int. KVM will trap-and-execute ECREATE and EINIT as part > >>> of SGX virtualization, and on an exception, KVM needs the trapnr so that > >>> it can inject the correct fault into the guest. > >> > >> This is missing a bit of a step about how and why ECREATE needs to be > >> run in the host in the first place. > > > > There's (hopefully) good info in the KVM usage patch that can be borrowed: > > > > Add an ECREATE handler that will be used to intercept ECREATE for the > > purpose of enforcing and enclave's MISCSELECT, ATTRIBUTES and XFRM, i.e. > > to allow userspace to restrict SGX features via CPUID. ECREATE will be > > intercepted when any of the aforementioned masks diverges from hardware > > in order to enforce the desired CPUID model, i.e. inject #GP if the > > guest attempts to set a bit that hasn't been enumerated as allowed-1 in > > CPUID. > > OK, so in plain language: the bare-metal kernel must intercept ECREATE > to be able to impose policies on guests. When it does this, the > bare-metal kernel runs ECREATE against the userspace mapping of the > virtualized EPC. Thanks. I'll add this to commit message. > > >>> diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h > >>> new file mode 100644 > >>> index 000000000000..0d643b985085 > >>> --- /dev/null > >>> +++ b/arch/x86/include/asm/sgx.h > >>> @@ -0,0 +1,16 @@ > >>> +/* SPDX-License-Identifier: GPL-2.0 */ > >>> +#ifndef _ASM_X86_SGX_H > >>> +#define _ASM_X86_SGX_H > >>> + > >>> +#include <linux/types.h> > >>> + > >>> +#ifdef CONFIG_X86_SGX_VIRTUALIZATION > >>> +struct sgx_pageinfo; > >>> + > >>> +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, > >>> + int *trapnr); > >>> +int sgx_virt_einit(void __user *sigstruct, void __user *token, > >>> + void __user *secs, u64 *lepubkeyhash, int *trapnr); > >>> +#endif > >>> + > >>> +#endif /* _ASM_X86_SGX_H */ > >>> diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c > >>> index d625551ccf25..4e9810ba9259 100644 > >>> --- a/arch/x86/kernel/cpu/sgx/virt.c > >>> +++ b/arch/x86/kernel/cpu/sgx/virt.c > >>> @@ -261,3 +261,58 @@ int __init sgx_virt_epc_init(void) > >>> > >>> return misc_register(&sgx_virt_epc_dev); > >>> } > >>> + > >>> +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, > >>> + int *trapnr) > >>> +{ > >>> + int ret; > >>> + > >>> + __uaccess_begin(); > >>> + ret = __ecreate(pageinfo, (void *)secs); > >>> + __uaccess_end(); > >> > >> The __uaccess_begin/end() worries me. There are *very* few of these in > >> the kernel and it seems like something we want to use as sparingly as > >> possible. > >> > >> Why don't we just use the kernel mapping for 'secs' and not have to deal > >> with stac/clac? > > > > The kernel mapping isn't readily available. > > Oh, duh. There's no kernel mapping for EPC... it's not RAM in the first > place. > > > At this point, it's not even > > guaranteed that @secs points at an EPC page. Unlike the driver code, where the > > EPC page is allocated on-demand by the kernel, the pointer here is userspace > > (technically guest) controlled. The caller (KVM) is responsible for ensuring > > it's a valid userspace address, but the SGX/EPC specific checks are mostly > > deferred to hardware. > > Ahh, got it. Kai, could we get some of this into comments or the changelog? Yes I'll add some into comments.
diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h new file mode 100644 index 000000000000..0d643b985085 --- /dev/null +++ b/arch/x86/include/asm/sgx.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SGX_H +#define _ASM_X86_SGX_H + +#include <linux/types.h> + +#ifdef CONFIG_X86_SGX_VIRTUALIZATION +struct sgx_pageinfo; + +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, + int *trapnr); +int sgx_virt_einit(void __user *sigstruct, void __user *token, + void __user *secs, u64 *lepubkeyhash, int *trapnr); +#endif + +#endif /* _ASM_X86_SGX_H */ diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c index d625551ccf25..4e9810ba9259 100644 --- a/arch/x86/kernel/cpu/sgx/virt.c +++ b/arch/x86/kernel/cpu/sgx/virt.c @@ -261,3 +261,58 @@ int __init sgx_virt_epc_init(void) return misc_register(&sgx_virt_epc_dev); } + +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs, + int *trapnr) +{ + int ret; + + __uaccess_begin(); + ret = __ecreate(pageinfo, (void *)secs); + __uaccess_end(); + + if (encls_faulted(ret)) { + *trapnr = ENCLS_TRAPNR(ret); + return -EFAULT; + } + + /* ECREATE doesn't return an error code, it faults or succeeds. */ + WARN_ON_ONCE(ret); + return 0; +} +EXPORT_SYMBOL_GPL(sgx_virt_ecreate); + +static int __sgx_virt_einit(void __user *sigstruct, void __user *token, + void __user *secs) +{ + int ret; + + __uaccess_begin(); + ret = __einit((void *)sigstruct, (void *)token, (void *)secs); + __uaccess_end(); + return ret; +} + +int sgx_virt_einit(void __user *sigstruct, void __user *token, + void __user *secs, u64 *lepubkeyhash, int *trapnr) +{ + int ret; + + if (!boot_cpu_has(X86_FEATURE_SGX_LC)) { + ret = __sgx_virt_einit(sigstruct, token, secs); + } else { + preempt_disable(); + + sgx_update_lepubkeyhash(lepubkeyhash); + + ret = __sgx_virt_einit(sigstruct, token, secs); + preempt_enable(); + } + + if (encls_faulted(ret)) { + *trapnr = ENCLS_TRAPNR(ret); + return -EFAULT; + } + return ret; +} +EXPORT_SYMBOL_GPL(sgx_virt_einit);