diff mbox series

[RFC,11/23] x86/sgx: Add helpers to expose ECREATE and EINIT to KVM

Message ID 6b29d1ee66715b40aba847b31cbdac71cbb22524.1609890536.git.kai.huang@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM SGX virtualization support | expand

Commit Message

Huang, Kai Jan. 6, 2021, 1:56 a.m. UTC
From: Sean Christopherson <sean.j.christopherson@intel.com>

Provide wrappers around __ecreate() and __einit() to hide the ugliness
of overloading the ENCLS return value to encode multiple error formats
in a single int.  KVM will trap-and-execute ECREATE and EINIT as part
of SGX virtualization, and on an exception, KVM needs the trapnr so that
it can inject the correct fault into the guest.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
 [Kai: Use sgx_update_lepubkeyhash() to update pubkey hash MSRs.]
Signed-off-by: Kai Huang <kai.huang@intel.com>
---
 arch/x86/include/asm/sgx.h     | 16 ++++++++++
 arch/x86/kernel/cpu/sgx/virt.c | 55 ++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)
 create mode 100644 arch/x86/include/asm/sgx.h

Comments

Dave Hansen Jan. 6, 2021, 8:12 p.m. UTC | #1
On 1/5/21 5:56 PM, Kai Huang wrote:
> From: Sean Christopherson <sean.j.christopherson@intel.com>
> 
> Provide wrappers around __ecreate() and __einit() to hide the ugliness
> of overloading the ENCLS return value to encode multiple error formats
> in a single int.  KVM will trap-and-execute ECREATE and EINIT as part
> of SGX virtualization, and on an exception, KVM needs the trapnr so that
> it can inject the correct fault into the guest.

This is missing a bit of a step about how and why ECREATE needs to be
run in the host in the first place.

> diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
> new file mode 100644
> index 000000000000..0d643b985085
> --- /dev/null
> +++ b/arch/x86/include/asm/sgx.h
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_X86_SGX_H
> +#define _ASM_X86_SGX_H
> +
> +#include <linux/types.h>
> +
> +#ifdef CONFIG_X86_SGX_VIRTUALIZATION
> +struct sgx_pageinfo;
> +
> +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
> +		     int *trapnr);
> +int sgx_virt_einit(void __user *sigstruct, void __user *token,
> +		   void __user *secs, u64 *lepubkeyhash, int *trapnr);
> +#endif
> +
> +#endif /* _ASM_X86_SGX_H */
> diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
> index d625551ccf25..4e9810ba9259 100644
> --- a/arch/x86/kernel/cpu/sgx/virt.c
> +++ b/arch/x86/kernel/cpu/sgx/virt.c
> @@ -261,3 +261,58 @@ int __init sgx_virt_epc_init(void)
>  
>  	return misc_register(&sgx_virt_epc_dev);
>  }
> +
> +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
> +		     int *trapnr)
> +{
> +	int ret;
> +
> +	__uaccess_begin();
> +	ret = __ecreate(pageinfo, (void *)secs);
> +	__uaccess_end();

The __uaccess_begin/end() worries me.  There are *very* few of these in
the kernel and it seems like something we want to use as sparingly as
possible.

Why don't we just use the kernel mapping for 'secs' and not have to deal
with stac/clac?

I'm also just generally worried about casting away an __user without
doing any checking.  How is that OK?

> +	if (encls_faulted(ret)) {
> +		*trapnr = ENCLS_TRAPNR(ret);
> +		return -EFAULT;
> +	}
> +
> +	/* ECREATE doesn't return an error code, it faults or succeeds. */
> +	WARN_ON_ONCE(ret);
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(sgx_virt_ecreate);
> +
> +static int __sgx_virt_einit(void __user *sigstruct, void __user *token,
> +			    void __user *secs)
> +{
> +	int ret;
> +
> +	__uaccess_begin();
> +	ret =  __einit((void *)sigstruct, (void *)token, (void *)secs);
> +	__uaccess_end();
> +	return ret;
> +}

If casting away one __user wasn't good enough, we get three! :)

We need some more background in the changelog on why this is OK.

> +int sgx_virt_einit(void __user *sigstruct, void __user *token,
> +		   void __user *secs, u64 *lepubkeyhash, int *trapnr)
> +{
> +	int ret;
> +
> +	if (!boot_cpu_has(X86_FEATURE_SGX_LC)) {
> +		ret = __sgx_virt_einit(sigstruct, token, secs);
> +	} else {
> +		preempt_disable();
> +
> +		sgx_update_lepubkeyhash(lepubkeyhash);
> +
> +		ret = __sgx_virt_einit(sigstruct, token, secs);
> +		preempt_enable();
> +	}
> +
> +	if (encls_faulted(ret)) {
> +		*trapnr = ENCLS_TRAPNR(ret);
> +		return -EFAULT;
> +	}
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(sgx_virt_einit);
>
Sean Christopherson Jan. 6, 2021, 9:04 p.m. UTC | #2
On Wed, Jan 06, 2021, Dave Hansen wrote:
> On 1/5/21 5:56 PM, Kai Huang wrote:
> > From: Sean Christopherson <sean.j.christopherson@intel.com>
> > 
> > Provide wrappers around __ecreate() and __einit() to hide the ugliness
> > of overloading the ENCLS return value to encode multiple error formats
> > in a single int.  KVM will trap-and-execute ECREATE and EINIT as part
> > of SGX virtualization, and on an exception, KVM needs the trapnr so that
> > it can inject the correct fault into the guest.
> 
> This is missing a bit of a step about how and why ECREATE needs to be
> run in the host in the first place.

There's (hopefully) good info in the KVM usage patch that can be borrowed:

  Add an ECREATE handler that will be used to intercept ECREATE for the
  purpose of enforcing and enclave's MISCSELECT, ATTRIBUTES and XFRM, i.e.
  to allow userspace to restrict SGX features via CPUID.  ECREATE will be
  intercepted when any of the aforementioned masks diverges from hardware
  in order to enforce the desired CPUID model, i.e. inject #GP if the
  guest attempts to set a bit that hasn't been enumerated as allowed-1 in
  CPUID.
 
> > diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
> > new file mode 100644
> > index 000000000000..0d643b985085
> > --- /dev/null
> > +++ b/arch/x86/include/asm/sgx.h
> > @@ -0,0 +1,16 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#ifndef _ASM_X86_SGX_H
> > +#define _ASM_X86_SGX_H
> > +
> > +#include <linux/types.h>
> > +
> > +#ifdef CONFIG_X86_SGX_VIRTUALIZATION
> > +struct sgx_pageinfo;
> > +
> > +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
> > +		     int *trapnr);
> > +int sgx_virt_einit(void __user *sigstruct, void __user *token,
> > +		   void __user *secs, u64 *lepubkeyhash, int *trapnr);
> > +#endif
> > +
> > +#endif /* _ASM_X86_SGX_H */
> > diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
> > index d625551ccf25..4e9810ba9259 100644
> > --- a/arch/x86/kernel/cpu/sgx/virt.c
> > +++ b/arch/x86/kernel/cpu/sgx/virt.c
> > @@ -261,3 +261,58 @@ int __init sgx_virt_epc_init(void)
> >  
> >  	return misc_register(&sgx_virt_epc_dev);
> >  }
> > +
> > +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
> > +		     int *trapnr)
> > +{
> > +	int ret;
> > +
> > +	__uaccess_begin();
> > +	ret = __ecreate(pageinfo, (void *)secs);
> > +	__uaccess_end();
> 
> The __uaccess_begin/end() worries me.  There are *very* few of these in
> the kernel and it seems like something we want to use as sparingly as
> possible.
> 
> Why don't we just use the kernel mapping for 'secs' and not have to deal
> with stac/clac?

The kernel mapping isn't readily available.  At this point, it's not even
guaranteed that @secs points at an EPC page.  Unlike the driver code, where the
EPC page is allocated on-demand by the kernel, the pointer here is userspace
(technically guest) controlled.  The caller (KVM) is responsible for ensuring
it's a valid userspace address, but the SGX/EPC specific checks are mostly
deferred to hardware.

It's also possible to either retrieve the existing kernel mapping or to generate
a new mapping by resolving the PFN; this is/was simpler.

> I'm also just generally worried about casting away an __user without
> doing any checking.  How is that OK?

Short answer, KVM validates the virtual addresses.

KVM validates the host virtual addresses (HVA) when creating a memslot (maps
GPA->HVA).  The HVAs that are passed to these helpers are generated/retrieved
by KVM translating GVA->GPA->HVA; the GPA->HVA stage ensures the address is in a
valid memslot, and thus a valid user address.

That being said, these aren't exactly fast operations, adding access_ok() checks
is probably a good idea.
Dave Hansen Jan. 6, 2021, 9:23 p.m. UTC | #3
On 1/6/21 1:04 PM, Sean Christopherson wrote:
> On Wed, Jan 06, 2021, Dave Hansen wrote:
>> On 1/5/21 5:56 PM, Kai Huang wrote:
>>> From: Sean Christopherson <sean.j.christopherson@intel.com>
>>>
>>> Provide wrappers around __ecreate() and __einit() to hide the ugliness
>>> of overloading the ENCLS return value to encode multiple error formats
>>> in a single int.  KVM will trap-and-execute ECREATE and EINIT as part
>>> of SGX virtualization, and on an exception, KVM needs the trapnr so that
>>> it can inject the correct fault into the guest.
>>
>> This is missing a bit of a step about how and why ECREATE needs to be
>> run in the host in the first place.
> 
> There's (hopefully) good info in the KVM usage patch that can be borrowed:
> 
>   Add an ECREATE handler that will be used to intercept ECREATE for the
>   purpose of enforcing and enclave's MISCSELECT, ATTRIBUTES and XFRM, i.e.
>   to allow userspace to restrict SGX features via CPUID.  ECREATE will be
>   intercepted when any of the aforementioned masks diverges from hardware
>   in order to enforce the desired CPUID model, i.e. inject #GP if the
>   guest attempts to set a bit that hasn't been enumerated as allowed-1 in
>   CPUID.

OK, so in plain language: the bare-metal kernel must intercept ECREATE
to be able to impose policies on guests.  When it does this, the
bare-metal kernel runs ECREATE against the userspace mapping of the
virtualized EPC.

>>> diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
>>> new file mode 100644
>>> index 000000000000..0d643b985085
>>> --- /dev/null
>>> +++ b/arch/x86/include/asm/sgx.h
>>> @@ -0,0 +1,16 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +#ifndef _ASM_X86_SGX_H
>>> +#define _ASM_X86_SGX_H
>>> +
>>> +#include <linux/types.h>
>>> +
>>> +#ifdef CONFIG_X86_SGX_VIRTUALIZATION
>>> +struct sgx_pageinfo;
>>> +
>>> +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
>>> +		     int *trapnr);
>>> +int sgx_virt_einit(void __user *sigstruct, void __user *token,
>>> +		   void __user *secs, u64 *lepubkeyhash, int *trapnr);
>>> +#endif
>>> +
>>> +#endif /* _ASM_X86_SGX_H */
>>> diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
>>> index d625551ccf25..4e9810ba9259 100644
>>> --- a/arch/x86/kernel/cpu/sgx/virt.c
>>> +++ b/arch/x86/kernel/cpu/sgx/virt.c
>>> @@ -261,3 +261,58 @@ int __init sgx_virt_epc_init(void)
>>>  
>>>  	return misc_register(&sgx_virt_epc_dev);
>>>  }
>>> +
>>> +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
>>> +		     int *trapnr)
>>> +{
>>> +	int ret;
>>> +
>>> +	__uaccess_begin();
>>> +	ret = __ecreate(pageinfo, (void *)secs);
>>> +	__uaccess_end();
>>
>> The __uaccess_begin/end() worries me.  There are *very* few of these in
>> the kernel and it seems like something we want to use as sparingly as
>> possible.
>>
>> Why don't we just use the kernel mapping for 'secs' and not have to deal
>> with stac/clac?
> 
> The kernel mapping isn't readily available. 

Oh, duh.  There's no kernel mapping for EPC... it's not RAM in the first
place.

> At this point, it's not even
> guaranteed that @secs points at an EPC page.  Unlike the driver code, where the
> EPC page is allocated on-demand by the kernel, the pointer here is userspace
> (technically guest) controlled.  The caller (KVM) is responsible for ensuring
> it's a valid userspace address, but the SGX/EPC specific checks are mostly
> deferred to hardware.

Ahh, got it.  Kai, could we get some of this into comments or the changelog?


>> I'm also just generally worried about casting away an __user without
>> doing any checking.  How is that OK?
> 
> Short answer, KVM validates the virtual addresses.
> 
> KVM validates the host virtual addresses (HVA) when creating a memslot (maps
> GPA->HVA).  The HVAs that are passed to these helpers are generated/retrieved
> by KVM translating GVA->GPA->HVA; the GPA->HVA stage ensures the address is in a
> valid memslot, and thus a valid user address.

There is something a *bit* unpalatable about having KVM fill an
'unsigned long' only to cast it to a (void __user *), the to cast it
back to a (void *) to pass it to the SGX inlines.

I guess sparse would catch us in the window that it is __user if someone
tried to dereference it.

Adding access_ok()'s sounds like a good idea to me.  Or, at *least*
commenting why they're not necessary.
Huang, Kai Jan. 6, 2021, 10:58 p.m. UTC | #4
On Wed, 6 Jan 2021 13:23:37 -0800 Dave Hansen wrote:
> On 1/6/21 1:04 PM, Sean Christopherson wrote:
> > On Wed, Jan 06, 2021, Dave Hansen wrote:
> >> On 1/5/21 5:56 PM, Kai Huang wrote:
> >>> From: Sean Christopherson <sean.j.christopherson@intel.com>
> >>>
> >>> Provide wrappers around __ecreate() and __einit() to hide the ugliness
> >>> of overloading the ENCLS return value to encode multiple error formats
> >>> in a single int.  KVM will trap-and-execute ECREATE and EINIT as part
> >>> of SGX virtualization, and on an exception, KVM needs the trapnr so that
> >>> it can inject the correct fault into the guest.
> >>
> >> This is missing a bit of a step about how and why ECREATE needs to be
> >> run in the host in the first place.
> > 
> > There's (hopefully) good info in the KVM usage patch that can be borrowed:
> > 
> >   Add an ECREATE handler that will be used to intercept ECREATE for the
> >   purpose of enforcing and enclave's MISCSELECT, ATTRIBUTES and XFRM, i.e.
> >   to allow userspace to restrict SGX features via CPUID.  ECREATE will be
> >   intercepted when any of the aforementioned masks diverges from hardware
> >   in order to enforce the desired CPUID model, i.e. inject #GP if the
> >   guest attempts to set a bit that hasn't been enumerated as allowed-1 in
> >   CPUID.
> 
> OK, so in plain language: the bare-metal kernel must intercept ECREATE
> to be able to impose policies on guests.  When it does this, the
> bare-metal kernel runs ECREATE against the userspace mapping of the
> virtualized EPC.

Thanks. I'll add this to commit message.

> 
> >>> diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
> >>> new file mode 100644
> >>> index 000000000000..0d643b985085
> >>> --- /dev/null
> >>> +++ b/arch/x86/include/asm/sgx.h
> >>> @@ -0,0 +1,16 @@
> >>> +/* SPDX-License-Identifier: GPL-2.0 */
> >>> +#ifndef _ASM_X86_SGX_H
> >>> +#define _ASM_X86_SGX_H
> >>> +
> >>> +#include <linux/types.h>
> >>> +
> >>> +#ifdef CONFIG_X86_SGX_VIRTUALIZATION
> >>> +struct sgx_pageinfo;
> >>> +
> >>> +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
> >>> +		     int *trapnr);
> >>> +int sgx_virt_einit(void __user *sigstruct, void __user *token,
> >>> +		   void __user *secs, u64 *lepubkeyhash, int *trapnr);
> >>> +#endif
> >>> +
> >>> +#endif /* _ASM_X86_SGX_H */
> >>> diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
> >>> index d625551ccf25..4e9810ba9259 100644
> >>> --- a/arch/x86/kernel/cpu/sgx/virt.c
> >>> +++ b/arch/x86/kernel/cpu/sgx/virt.c
> >>> @@ -261,3 +261,58 @@ int __init sgx_virt_epc_init(void)
> >>>  
> >>>  	return misc_register(&sgx_virt_epc_dev);
> >>>  }
> >>> +
> >>> +int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
> >>> +		     int *trapnr)
> >>> +{
> >>> +	int ret;
> >>> +
> >>> +	__uaccess_begin();
> >>> +	ret = __ecreate(pageinfo, (void *)secs);
> >>> +	__uaccess_end();
> >>
> >> The __uaccess_begin/end() worries me.  There are *very* few of these in
> >> the kernel and it seems like something we want to use as sparingly as
> >> possible.
> >>
> >> Why don't we just use the kernel mapping for 'secs' and not have to deal
> >> with stac/clac?
> > 
> > The kernel mapping isn't readily available. 
> 
> Oh, duh.  There's no kernel mapping for EPC... it's not RAM in the first
> place.
> 
> > At this point, it's not even
> > guaranteed that @secs points at an EPC page.  Unlike the driver code, where the
> > EPC page is allocated on-demand by the kernel, the pointer here is userspace
> > (technically guest) controlled.  The caller (KVM) is responsible for ensuring
> > it's a valid userspace address, but the SGX/EPC specific checks are mostly
> > deferred to hardware.
> 
> Ahh, got it.  Kai, could we get some of this into comments or the changelog?

Yes I'll add some into comments.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
new file mode 100644
index 000000000000..0d643b985085
--- /dev/null
+++ b/arch/x86/include/asm/sgx.h
@@ -0,0 +1,16 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SGX_H
+#define _ASM_X86_SGX_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_X86_SGX_VIRTUALIZATION
+struct sgx_pageinfo;
+
+int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
+		     int *trapnr);
+int sgx_virt_einit(void __user *sigstruct, void __user *token,
+		   void __user *secs, u64 *lepubkeyhash, int *trapnr);
+#endif
+
+#endif /* _ASM_X86_SGX_H */
diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
index d625551ccf25..4e9810ba9259 100644
--- a/arch/x86/kernel/cpu/sgx/virt.c
+++ b/arch/x86/kernel/cpu/sgx/virt.c
@@ -261,3 +261,58 @@  int __init sgx_virt_epc_init(void)
 
 	return misc_register(&sgx_virt_epc_dev);
 }
+
+int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
+		     int *trapnr)
+{
+	int ret;
+
+	__uaccess_begin();
+	ret = __ecreate(pageinfo, (void *)secs);
+	__uaccess_end();
+
+	if (encls_faulted(ret)) {
+		*trapnr = ENCLS_TRAPNR(ret);
+		return -EFAULT;
+	}
+
+	/* ECREATE doesn't return an error code, it faults or succeeds. */
+	WARN_ON_ONCE(ret);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sgx_virt_ecreate);
+
+static int __sgx_virt_einit(void __user *sigstruct, void __user *token,
+			    void __user *secs)
+{
+	int ret;
+
+	__uaccess_begin();
+	ret =  __einit((void *)sigstruct, (void *)token, (void *)secs);
+	__uaccess_end();
+	return ret;
+}
+
+int sgx_virt_einit(void __user *sigstruct, void __user *token,
+		   void __user *secs, u64 *lepubkeyhash, int *trapnr)
+{
+	int ret;
+
+	if (!boot_cpu_has(X86_FEATURE_SGX_LC)) {
+		ret = __sgx_virt_einit(sigstruct, token, secs);
+	} else {
+		preempt_disable();
+
+		sgx_update_lepubkeyhash(lepubkeyhash);
+
+		ret = __sgx_virt_einit(sigstruct, token, secs);
+		preempt_enable();
+	}
+
+	if (encls_faulted(ret)) {
+		*trapnr = ENCLS_TRAPNR(ret);
+		return -EFAULT;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sgx_virt_einit);