Message ID | 20200915112842.897265-22-jarkko.sakkinen@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Intel SGX foundations | expand |
On Tue, Sep 15, 2020 at 02:28:39PM +0300, Jarkko Sakkinen wrote: > diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > new file mode 100644 > index 000000000000..adbd59d41517 > --- /dev/null > +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S Why not simply arch/x86/entry/vdso/sgx.S ? > @@ -0,0 +1,157 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +#include <linux/linkage.h> > +#include <asm/export.h> > +#include <asm/errno.h> > +#include <asm/enclu.h> > + > +#include "extable.h" > + > +/* Offset of 'struct sgx_enclave_run' relative to %rbp. */ > +#define SGX_ENCLAVE_RUN_PTR 2*8 > + > +/* Offsets into 'struct sgx_enclave_run'. */ > +#define SGX_ENCLAVE_RUN_TSC 0*8 > +#define SGX_ENCLAVE_RUN_FLAGS 1*8 > +#define SGX_ENCLAVE_RUN_EXIT_REASON 1*8 + 4 > +#define SGX_ENCLAVE_RUN_USER_HANDLER 2*8 > +/* #define SGX_ENCLAVE_RUN_USER_DATA 3*8 */ What's with that guy? Left here as documentation showing what's at 3*8 offset? > +#define SGX_ENCLAVE_RUN_EXCEPTION 4*8 > + > +#define SGX_SYNCHRONOUS_EXIT 0 > +#define SGX_EXCEPTION_EXIT 1 Those are in ...uapi/asm/sgx.h too. Unify? > + > +/* Offsets into sgx_enter_enclave.exception. */ > +#define SGX_EX_LEAF 0*8 > +#define SGX_EX_TRAPNR 0*8+4 > +#define SGX_EX_ERROR_CODE 0*8+6 > +#define SGX_EX_ADDRESS 1*8 > + > +.code64 > +.section .text, "ax" > + > +SYM_FUNC_START(__vdso_sgx_enter_enclave) > + /* Prolog */ > + .cfi_startproc Are the CFI annotations for userspace? > + push %rbp > + .cfi_adjust_cfa_offset 8 > + .cfi_rel_offset %rbp, 0 > + mov %rsp, %rbp > + .cfi_def_cfa_register %rbp > + push %rbx > + .cfi_rel_offset %rbx, -8 > + > + mov %ecx, %eax > +.Lenter_enclave: > + /* EENTER <= leaf <= ERESUME */ > + cmp $EENTER, %eax > + jb .Linvalid_input > + cmp $ERESUME, %eax > + ja .Linvalid_input > + > + mov SGX_ENCLAVE_RUN_PTR(%rbp), %rcx > + > + /* No flags are currently defined/supported. */ > + cmpl $0, SGX_ENCLAVE_RUN_FLAGS(%rcx) > + jne .Linvalid_input > + > + /* Load TCS and AEP */ TSC I can see why you would write "TCS" though - there's a thread control structure thing too in that patch. > + mov SGX_ENCLAVE_RUN_TSC(%rcx), %rbx > + lea .Lasync_exit_pointer(%rip), %rcx > + > + /* Single ENCLU serving as both EENTER and AEP (ERESUME) */ > +.Lasync_exit_pointer: > +.Lenclu_eenter_eresume: > + enclu > + > + /* EEXIT jumps here unless the enclave is doing something fancy. */ > + mov SGX_ENCLAVE_RUN_PTR(%rbp), %rbx > + > + /* Set exit_reason. */ > + movl $SGX_SYNCHRONOUS_EXIT, SGX_ENCLAVE_RUN_EXIT_REASON(%rbx) > + > + /* Invoke userspace's exit handler if one was provided. */ > +.Lhandle_exit: > + cmpq $0, SGX_ENCLAVE_RUN_USER_HANDLER(%rbx) > + jne .Linvoke_userspace_handler > + > + /* Success, in the sense that ENCLU was attempted. */ > + xor %eax, %eax > + > +.Lout: > + pop %rbx > + leave > + .cfi_def_cfa %rsp, 8 > + ret > + > + /* The out-of-line code runs with the pre-leave stack frame. */ > + .cfi_def_cfa %rbp, 16 > + > +.Linvalid_input: > + mov $(-EINVAL), %eax > + jmp .Lout > + > +.Lhandle_exception: > + mov SGX_ENCLAVE_RUN_PTR(%rbp), %rbx > + > + /* Set the exit_reason and exception info. */ > + movl $SGX_EXCEPTION_EXIT, SGX_ENCLAVE_RUN_EXIT_REASON(%rbx) > + > + mov %eax, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_LEAF)(%rbx) > + mov %di, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_TRAPNR)(%rbx) > + mov %si, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ERROR_CODE)(%rbx) > + mov %rdx, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ADDRESS)(%rbx) > + jmp .Lhandle_exit > + > +.Linvoke_userspace_handler: > + /* Pass the untrusted RSP (at exit) to the callback via %rcx. */ > + mov %rsp, %rcx > + > + /* Save @e, %rbx is about to be clobbered. */ I've been wondering what that "@e" thing is and I believe I've found it at the end: "... @e, the optional sgx_enclave_exception struct" Yes? Please rewrite what that "e" is here - I think you wanna say "struct sgx_enclave_run.exception" instead to clarify. ... > diff --git a/arch/x86/include/asm/enclu.h b/arch/x86/include/asm/enclu.h > new file mode 100644 > index 000000000000..06157b3e9ede > --- /dev/null > +++ b/arch/x86/include/asm/enclu.h > @@ -0,0 +1,8 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef _ASM_X86_ENCLU_H > +#define _ASM_X86_ENCLU_H > + > +#define EENTER 0x02 > +#define ERESUME 0x03 > + > +#endif /* _ASM_X86_ENCLU_H */ > diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h > index d0916fb9629e..1564d7f88597 100644 > --- a/arch/x86/include/uapi/asm/sgx.h > +++ b/arch/x86/include/uapi/asm/sgx.h Are all those defines being added to the uapi header, also actually needed by userspace? > @@ -72,4 +72,132 @@ struct sgx_enclave_provision { > __u64 attribute_fd; > }; > > +#define SGX_SYNCHRONOUS_EXIT 0 > +#define SGX_EXCEPTION_EXIT 1 > + > +struct sgx_enclave_run; > + > +/** > + * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by > + * __vdso_sgx_enter_enclave() > + * > + * @rdi: RDI at the time of enclave exit > + * @rsi: RSI at the time of enclave exit > + * @rdx: RDX at the time of enclave exit > + * @ursp: RSP at the time of enclave exit (untrusted stack) > + * @r8: R8 at the time of enclave exit > + * @r9: R9 at the time of enclave exit I'm sure you can avoid that "at the time of enclave exit" repetition. ... > +/** > + * typedef vdso_sgx_enter_enclave_t - Prototype for __vdso_sgx_enter_enclave(), > + * a vDSO function to enter an SGX enclave. > + * > + * @rdi: Pass-through value for RDI > + * @rsi: Pass-through value for RSI > + * @rdx: Pass-through value for RDX > + * @leaf: ENCLU leaf, must be EENTER or ERESUME > + * @r8: Pass-through value for R8 > + * @r9: Pass-through value for R9 > + * @r: struct sgx_enclave_run, must be non-NULL > + * > + * NOTE: __vdso_sgx_enter_enclave() does not ensure full compliance with the > + * x86-64 ABI, e.g. doesn't handle XSAVE state. Except for non-volatile > + * general purpose registers, EFLAGS.DF, and RSP alignment, preserving/setting > + * state in accordance with the x86-64 ABI is the responsibility of the enclave > + * and its runtime, i.e. __vdso_sgx_enter_enclave() cannot be called from C > + * code without careful consideration by both the enclave and its runtime. > + * > + * All general purpose registers except RAX, RBX and RCX are passed as-is to > + * the enclave. RAX, RBX and RCX are consumed by EENTER and ERESUME and are > + * loaded with @leaf, asynchronous exit pointer, and @tcs respectively. That tcs is probably struct sgx_enclave_run.tcs? > + * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the > + * pre-enclave state, e.g. to retrieve @e and @handler after an enclave exit. Expand @e and do you mean "@user_handler" with "@handler"? > + * All other registers are available for use by the enclave and its runtime, > + * e.g. an enclave can push additional data onto the stack (and modify RSP) to > + * pass information to the optional exit handler (see below). > + * > + * Most exceptions reported on ENCLU, including those that occur within the > + * enclave, are fixed up and reported synchronously instead of being delivered > + * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are > + * never fixed up and are always delivered via standard signals. On synchrously > + * reported exceptions, -EFAULT is returned and details about the exception are > + * recorded in @e, the optional sgx_enclave_exception struct. > + * > + * If an exit handler is provided, the handler will be invoked on synchronous > + * exits from the enclave and for all synchronously reported exceptions. In > + * latter case, @e is filled prior to invoking the handler. > + * > + * The exit handler's return value is interpreted as follows: > + * >0: continue, restart __vdso_sgx_enter_enclave() with @ret as @leaf > + * 0: success, return @ret to the caller > + * <0: error, return @ret to the caller > + * > + * The exit handler may transfer control, e.g. via longjmp() or C++ exception, > + * without returning to __vdso_sgx_enter_enclave(). > + * > + * Return: > + * 0 on success (ENCLU reached), > + * -EINVAL if ENCLU leaf is not allowed, > + * -errno for all other negative values returned by the userspace exit handler > + */ > +typedef int (*vdso_sgx_enter_enclave_t)(unsigned long rdi, unsigned long rsi, > + unsigned long rdx, unsigned int leaf, > + unsigned long r8, unsigned long r9, > + struct sgx_enclave_run *r); > + > #endif /* _UAPI_ASM_X86_SGX_H */ > -- > 2.25.1 >
On Thu, Sep 24, 2020 at 08:04:07PM +0200, Borislav Petkov wrote: > On Tue, Sep 15, 2020 at 02:28:39PM +0300, Jarkko Sakkinen wrote: > > diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > new file mode 100644 > > index 000000000000..adbd59d41517 > > --- /dev/null > > +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > Why not simply > > arch/x86/entry/vdso/sgx.S > > ? I renamed it as vsgx.S (for the sake of convention). > > @@ -0,0 +1,157 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > + > > +#include <linux/linkage.h> > > +#include <asm/export.h> > > +#include <asm/errno.h> > > +#include <asm/enclu.h> > > + > > +#include "extable.h" > > + > > +/* Offset of 'struct sgx_enclave_run' relative to %rbp. */ > > +#define SGX_ENCLAVE_RUN_PTR 2*8 > > + > > +/* Offsets into 'struct sgx_enclave_run'. */ > > +#define SGX_ENCLAVE_RUN_TSC 0*8 > > +#define SGX_ENCLAVE_RUN_FLAGS 1*8 > > +#define SGX_ENCLAVE_RUN_EXIT_REASON 1*8 + 4 > > +#define SGX_ENCLAVE_RUN_USER_HANDLER 2*8 > > +/* #define SGX_ENCLAVE_RUN_USER_DATA 3*8 */ > > What's with that guy? Left here as documentation showing what's at 3*8 > offset? Yes. > > +#define SGX_ENCLAVE_RUN_EXCEPTION 4*8 > > + > > +#define SGX_SYNCHRONOUS_EXIT 0 > > +#define SGX_EXCEPTION_EXIT 1 > > Those are in ...uapi/asm/sgx.h too. Unify? I have not authored this patch but what I would propose is to use just raw value in the place of these constants. It is practially just a boolean value. I can also add sgx_vdso.h to uapi directory. I just don't see the point. Holding on for feedback with this one. > > + > > +/* Offsets into sgx_enter_enclave.exception. */ > > +#define SGX_EX_LEAF 0*8 > > +#define SGX_EX_TRAPNR 0*8+4 > > +#define SGX_EX_ERROR_CODE 0*8+6 > > +#define SGX_EX_ADDRESS 1*8 > > + > > +.code64 > > +.section .text, "ax" > > + > > +SYM_FUNC_START(__vdso_sgx_enter_enclave) > > + /* Prolog */ > > + .cfi_startproc > > Are the CFI annotations for userspace? Yes, for stack unwinding. However, I would leave them out. Holding on for further feedback with this tho. > > + push %rbp > > + .cfi_adjust_cfa_offset 8 > > + .cfi_rel_offset %rbp, 0 > > + mov %rsp, %rbp > > + .cfi_def_cfa_register %rbp > > + push %rbx > > + .cfi_rel_offset %rbx, -8 > > + > > + mov %ecx, %eax > > +.Lenter_enclave: > > + /* EENTER <= leaf <= ERESUME */ > > + cmp $EENTER, %eax > > + jb .Linvalid_input > > + cmp $ERESUME, %eax > > + ja .Linvalid_input > > + > > + mov SGX_ENCLAVE_RUN_PTR(%rbp), %rcx > > + > > + /* No flags are currently defined/supported. */ > > + cmpl $0, SGX_ENCLAVE_RUN_FLAGS(%rcx) > > + jne .Linvalid_input > > + > > + /* Load TCS and AEP */ > > TSC > > I can see why you would write "TCS" though - there's a thread control > structure thing too in that patch. Renamed. > > + mov SGX_ENCLAVE_RUN_TSC(%rcx), %rbx > > + lea .Lasync_exit_pointer(%rip), %rcx > > + > > + /* Single ENCLU serving as both EENTER and AEP (ERESUME) */ > > +.Lasync_exit_pointer: > > +.Lenclu_eenter_eresume: > > + enclu > > + > > + /* EEXIT jumps here unless the enclave is doing something fancy. */ > > + mov SGX_ENCLAVE_RUN_PTR(%rbp), %rbx > > + > > + /* Set exit_reason. */ > > + movl $SGX_SYNCHRONOUS_EXIT, SGX_ENCLAVE_RUN_EXIT_REASON(%rbx) > > + > > + /* Invoke userspace's exit handler if one was provided. */ > > +.Lhandle_exit: > > + cmpq $0, SGX_ENCLAVE_RUN_USER_HANDLER(%rbx) > > + jne .Linvoke_userspace_handler > > + > > + /* Success, in the sense that ENCLU was attempted. */ > > + xor %eax, %eax > > + > > +.Lout: > > + pop %rbx > > + leave > > + .cfi_def_cfa %rsp, 8 > > + ret > > + > > + /* The out-of-line code runs with the pre-leave stack frame. */ > > + .cfi_def_cfa %rbp, 16 > > + > > +.Linvalid_input: > > + mov $(-EINVAL), %eax > > + jmp .Lout > > + > > +.Lhandle_exception: > > + mov SGX_ENCLAVE_RUN_PTR(%rbp), %rbx > > + > > + /* Set the exit_reason and exception info. */ > > + movl $SGX_EXCEPTION_EXIT, SGX_ENCLAVE_RUN_EXIT_REASON(%rbx) > > + > > + mov %eax, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_LEAF)(%rbx) > > + mov %di, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_TRAPNR)(%rbx) > > + mov %si, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ERROR_CODE)(%rbx) > > + mov %rdx, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ADDRESS)(%rbx) > > + jmp .Lhandle_exit > > + > > +.Linvoke_userspace_handler: > > + /* Pass the untrusted RSP (at exit) to the callback via %rcx. */ > > + mov %rsp, %rcx > > + > > + /* Save @e, %rbx is about to be clobbered. */ > > I've been wondering what that "@e" thing is and I believe I've found it at the > end: "... @e, the optional sgx_enclave_exception struct" > > Yes? > > Please rewrite what that "e" is here - I think you wanna say "struct > sgx_enclave_run.exception" instead to clarify. I agree. Open coded them as "struct sgx_enclave_exception". > > ... > > > diff --git a/arch/x86/include/asm/enclu.h b/arch/x86/include/asm/enclu.h > > new file mode 100644 > > index 000000000000..06157b3e9ede > > --- /dev/null > > +++ b/arch/x86/include/asm/enclu.h > > @@ -0,0 +1,8 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +#ifndef _ASM_X86_ENCLU_H > > +#define _ASM_X86_ENCLU_H > > + > > +#define EENTER 0x02 > > +#define ERESUME 0x03 > > + > > +#endif /* _ASM_X86_ENCLU_H */ > > diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h > > index d0916fb9629e..1564d7f88597 100644 > > --- a/arch/x86/include/uapi/asm/sgx.h > > +++ b/arch/x86/include/uapi/asm/sgx.h > > Are all those defines being added to the uapi header, also actually > needed by userspace? I'd remove the two constants, as it is clearly just a boolean value. > > @@ -72,4 +72,132 @@ struct sgx_enclave_provision { > > __u64 attribute_fd; > > }; > > > > +#define SGX_SYNCHRONOUS_EXIT 0 > > +#define SGX_EXCEPTION_EXIT 1 > > + > > +struct sgx_enclave_run; > > + > > +/** > > + * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by > > + * __vdso_sgx_enter_enclave() > > + * > > + * @rdi: RDI at the time of enclave exit > > + * @rsi: RSI at the time of enclave exit > > + * @rdx: RDX at the time of enclave exit > > + * @ursp: RSP at the time of enclave exit (untrusted stack) > > + * @r8: R8 at the time of enclave exit > > + * @r9: R9 at the time of enclave exit > > I'm sure you can avoid that "at the time of enclave exit" repetition. I edited this as /** * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by * __vdso_sgx_enter_enclave() * @rdi: RDI snapshot * @rsi: RSI snapshot * @rdx: RDX snapshot * @rsp: RSP snapshot (untrusted stack) * @r8: R8 snapshot * @r9: R9 snapshot * @run: Pointer to the caller provided struct sgx_enclave_run * * Return: * 0 or negative to exit vDSO * positive to re-enter enclave (must be EENTER or ERESUME leaf) */ typedef int (*sgx_enclave_exit_handler_t)(long rdi, long rsi, long rdx, long rsp, long r8, long r9, struct sgx_enclave_run *run); > > +/** > > + * typedef vdso_sgx_enter_enclave_t - Prototype for __vdso_sgx_enter_enclave(), > > + * a vDSO function to enter an SGX enclave. > > + * > > + * @rdi: Pass-through value for RDI > > + * @rsi: Pass-through value for RSI > > + * @rdx: Pass-through value for RDX > > + * @leaf: ENCLU leaf, must be EENTER or ERESUME > > + * @r8: Pass-through value for R8 > > + * @r9: Pass-through value for R9 > > + * @r: struct sgx_enclave_run, must be non-NULL > > + * > > + * NOTE: __vdso_sgx_enter_enclave() does not ensure full compliance with the > > + * x86-64 ABI, e.g. doesn't handle XSAVE state. Except for non-volatile > > + * general purpose registers, EFLAGS.DF, and RSP alignment, preserving/setting > > + * state in accordance with the x86-64 ABI is the responsibility of the enclave > > + * and its runtime, i.e. __vdso_sgx_enter_enclave() cannot be called from C > > + * code without careful consideration by both the enclave and its runtime. > > + * > > + * All general purpose registers except RAX, RBX and RCX are passed as-is to > > + * the enclave. RAX, RBX and RCX are consumed by EENTER and ERESUME and are > > + * loaded with @leaf, asynchronous exit pointer, and @tcs respectively. > > That tcs is probably struct sgx_enclave_run.tcs? Yes. > > + * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the > > + * pre-enclave state, e.g. to retrieve @e and @handler after an enclave exit. > > Expand @e and do you mean "@user_handler" with "@handler"? I replaced this and similar fields as @run.user_handler and so forth. > > > + * All other registers are available for use by the enclave and its runtime, > > + * e.g. an enclave can push additional data onto the stack (and modify RSP) to > > + * pass information to the optional exit handler (see below). > > + * > > + * Most exceptions reported on ENCLU, including those that occur within the > > + * enclave, are fixed up and reported synchronously instead of being delivered > > + * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are > > + * never fixed up and are always delivered via standard signals. On synchrously > > + * reported exceptions, -EFAULT is returned and details about the exception are > > + * recorded in @e, the optional sgx_enclave_exception struct. > > + * > > + * If an exit handler is provided, the handler will be invoked on synchronous > > + * exits from the enclave and for all synchronously reported exceptions. In > > + * latter case, @e is filled prior to invoking the handler. > > + * > > + * The exit handler's return value is interpreted as follows: > > + * >0: continue, restart __vdso_sgx_enter_enclave() with @ret as @leaf > > + * 0: success, return @ret to the caller > > + * <0: error, return @ret to the caller > > + * > > + * The exit handler may transfer control, e.g. via longjmp() or C++ exception, > > + * without returning to __vdso_sgx_enter_enclave(). > > + * > > + * Return: > > + * 0 on success (ENCLU reached), > > + * -EINVAL if ENCLU leaf is not allowed, > > + * -errno for all other negative values returned by the userspace exit handler > > + */ > > +typedef int (*vdso_sgx_enter_enclave_t)(unsigned long rdi, unsigned long rsi, > > + unsigned long rdx, unsigned int leaf, > > + unsigned long r8, unsigned long r9, > > + struct sgx_enclave_run *r); > > + > > #endif /* _UAPI_ASM_X86_SGX_H */ > > -- > > 2.25.1 > > > > -- > Regards/Gruss, > Boris. > > https://people.kernel.org/tglx/notes-about-netiquette Also, I renamed 'r' as 'run' in some places. End result: https://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-sgx.git/tree/arch/x86/include/uapi/asm/sgx.h I'm wondering this sentence: "The calling convention is custom and does not follow System V x86-64 ABI." AFAIK, now the vDSO is fully C-callable but waiting for feedback before removing it. /Jarkko
On Thu, Sep 24, 2020 at 05:38:10PM -0700, Sean Christopherson wrote: > On Thu, Sep 24, 2020 at 08:04:07PM +0200, Borislav Petkov wrote: > > On Tue, Sep 15, 2020 at 02:28:39PM +0300, Jarkko Sakkinen wrote: > > > diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > > new file mode 100644 > > > index 000000000000..adbd59d41517 > > > --- /dev/null > > > +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > > > Why not simply > > > > arch/x86/entry/vdso/sgx.S > > > > ? > > I really like typing? I fixed bunch of things. For that sentence I need feedback + for any other possible changes please send a patch if required. /Jarkko
On Thu, Sep 24, 2020 at 05:38:10PM -0700, Sean Christopherson wrote: > > Why not simply > > > > arch/x86/entry/vdso/sgx.S > > > > ? > > I really like typing? I'll say. > Yes, to call out that there's a field there, but a field that the vDSO should > never touch. You wanna enforce that in the vdso code? Because if it is there, luserspace will touch it. > > > +#define SGX_ENCLAVE_RUN_EXCEPTION 4*8 > > > + > > > +#define SGX_SYNCHRONOUS_EXIT 0 > > > +#define SGX_EXCEPTION_EXIT 1 > > > > Those are in ...uapi/asm/sgx.h too. Unify? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ What about this here? > > Are the CFI annotations for userspace? > > Yes, though that's a 90% confident "yes". Looks like we wanna support it: f24f91088427 ("x86/vdso: Define BUILD_VDSO while building and emit .eh_frame in asm") > Argh, it's actually supposed to be TCS, SGX_ENCLAVE_RUN_TSC is the one that's > wrong. Whoopsie :-) Thx.
On Fri, Sep 25, 2020 at 04:00:40AM +0300, Jarkko Sakkinen wrote: > I renamed it as vsgx.S (for the sake of convention). Right. > I have not authored this patch but what I would propose is to use just > raw value in the place of these constants. It is practially just a > boolean value. > > I can also add sgx_vdso.h to uapi directory. I just don't see the point. Just be very cautious what you add to the uapi/ directory because it becomes API and there's no changing it. That's why I point you guys to it, to think hard what you expose there and that it becomes contract with luserspace. > > I can see why you would write "TCS" though - there's a thread control > > structure thing too in that patch. > > Renamed. See Sean's reply. > /** > * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by > * __vdso_sgx_enter_enclave() > * @rdi: RDI snapshot > * @rsi: RSI snapshot > * @rdx: RDX snapshot > * @rsp: RSP snapshot (untrusted stack) > * @r8: R8 snapshot > * @r9: R9 snapshot I'd say here: "The registers' content is the snapshot made at enclave exit." > Also, I renamed 'r' as 'run' in some places. > > End result: > > https://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-sgx.git/tree/arch/x86/include/uapi/asm/sgx.h > > I'm wondering this sentence: > > "The calling convention is custom and does not follow System V x86-64 ABI." Yeah, I was wondering what that meant too.
On 2020-09-25 03:00, Jarkko Sakkinen wrote: > End result: > > https://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-sgx.git/tree/arch/x86/include/uapi/asm/sgx.h > > I'm wondering this sentence: > > "The calling convention is custom and does not follow System V x86-64 ABI." > > AFAIK, now the vDSO is fully C-callable but waiting for feedback before > removing it. It's C-callable *iff your enclave follows the System V x86-64 ABI*. In addition, all registers not clobbered by the SGX ISA are passed to the enclave, not just those specified as parameter passing registers in the ABI. This is intentional to make the vDSO function usable in applications that use the current flexibility of ENCLU. -- Jethro Beekman | Fortanix
On Fri, Sep 25, 2020 at 10:14:41AM +0200, Borislav Petkov wrote: > > > > +#define SGX_ENCLAVE_RUN_EXCEPTION 4*8 > > > > + > > > > +#define SGX_SYNCHRONOUS_EXIT 0 > > > > +#define SGX_EXCEPTION_EXIT 1 > > > > > > Those are in ...uapi/asm/sgx.h too. Unify? > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > > What about this here? Repeating myself, but since there is only 0 and 1, I would just use 0 and 1. Otherwise, I think I pretty much got these comments sorted out. /Jarkko
On Fri, Sep 25, 2020 at 10:39:58AM +0200, Jethro Beekman wrote: > On 2020-09-25 03:00, Jarkko Sakkinen wrote: > > End result: > > > > https://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-sgx.git/tree/arch/x86/include/uapi/asm/sgx.h > > > > I'm wondering this sentence: > > > > "The calling convention is custom and does not follow System V x86-64 ABI." > > > > AFAIK, now the vDSO is fully C-callable but waiting for feedback before > > removing it. > > It's C-callable *iff your enclave follows the System V x86-64 ABI*. In > addition, all registers not clobbered by the SGX ISA are passed to the > enclave, not just those specified as parameter passing registers in > the ABI. This is intentional to make the vDSO function usable in > applications that use the current flexibility of ENCLU. Hold on, I really want to fix this bit of documentation before sending any new version, so I'll explain how I understand it. I think it is just SystemV ABI call with six parameters in the usual GPRs (rdi, rsi, rdx, rcx, r8, r9). I'm looking at vdso_sgx_enter_enclave. What I'm not getting? > -- > Jethro Beekman | Fortanix /Jarkko
On 2020-09-25 13:17, Jarkko Sakkinen wrote: > On Fri, Sep 25, 2020 at 10:39:58AM +0200, Jethro Beekman wrote: >> On 2020-09-25 03:00, Jarkko Sakkinen wrote: >>> End result: >>> >>> https://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-sgx.git/tree/arch/x86/include/uapi/asm/sgx.h >>> >>> I'm wondering this sentence: >>> >>> "The calling convention is custom and does not follow System V x86-64 ABI." >>> >>> AFAIK, now the vDSO is fully C-callable but waiting for feedback before >>> removing it. >> >> It's C-callable *iff your enclave follows the System V x86-64 ABI*. In >> addition, all registers not clobbered by the SGX ISA are passed to the >> enclave, not just those specified as parameter passing registers in >> the ABI. This is intentional to make the vDSO function usable in >> applications that use the current flexibility of ENCLU. > > Hold on, I really want to fix this bit of documentation before sending > any new version, so I'll explain how I understand it. > > I think it is just SystemV ABI call with six parameters in the usual > GPRs (rdi, rsi, rdx, rcx, r8, r9). > > I'm looking at vdso_sgx_enter_enclave. > > What I'm not getting? This can't be observed from looking at the code in vdso_sgx_enter_enclave, because what makes this "custom" is the absence of code or code in the enclave. If you call vdso_sgx_enter_enclave() from C but your enclave doesn't respect the ABI (for example, it clobbers callee-saved registers), your code will break. But if you call vdso_sgx_enter_enclave from assembly, you can use enclaves with any ABI as long as your assembly and the enclave agree on that ABI. Alternatively, when using assembly, I can pass stuff to the enclave in registers besides rdi, rsi, rdx, rcx, r8, r9, just as if I were manually calling ENCLU from assembly. The vDSO assembly has been carefully written to support both scenarios by only using rax, rbx, rcx, rbp, rsp and passing rdi, rsi, rdx, r8, r9 (and everything else). > + * NOTE: __vdso_sgx_enter_enclave() does not ensure full compliance with the > + * x86-64 ABI, e.g. doesn't handle XSAVE state. Except for non-volatile > + * general purpose registers, EFLAGS.DF, and RSP alignment, preserving/setting > + * state in accordance with the x86-64 ABI is the responsibility of the enclave > + * and its runtime, i.e. __vdso_sgx_enter_enclave() cannot be called from C > + * code without careful consideration by both the enclave and its runtime. > + * > + * All general purpose registers except RAX, RBX and RCX are passed as-is to > + * the enclave. RAX, RBX and RCX are consumed by EENTER and ERESUME and are > + * loaded with @leaf, asynchronous exit pointer, and @tcs respectively. Perhaps this should be updated to read "All general purpose registers except RAX, RBX, RCX, RBP and RSP ..." -- Jethro Beekman | Fortanix
On 15/09/2020 12:28, Jarkko Sakkinen wrote: > diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > new file mode 100644 > index 000000000000..adbd59d41517 > --- /dev/null > +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S > @@ -0,0 +1,157 @@ > +SYM_FUNC_START(__vdso_sgx_enter_enclave) > <snip> > +.Lretpoline: > + call 2f > +1: pause > + lfence > + jmp 1b > +2: mov %rax, (%rsp) > + ret I hate to throw further spanners in the work, but this is not compatible with CET, and the user shadow stack work in progress. Whichever of these two large series lands first is going to inflict fixing this problem on the other. As the vdso text is global (to a first approximation), it must not be a retpoline if any other process is liable to want to use CET-SS. If the retpoline really does need to stay, then the vdso probably needs to gain suitable __x86_indirect_thunk_%reg thunks which are patched at boot based on the system properties. ~Andrew
On Fri, Sep 25, 2020 at 10:28:07AM +0200, Borislav Petkov wrote: > > > I can see why you would write "TCS" though - there's a thread control > > > structure thing too in that patch. > > > > Renamed. > > See Sean's reply. I did not get Sean's reply, and neither can find it from lore: https://lore.kernel.org/linux-sgx/20200915112842.897265-1-jarkko.sakkinen@linux.intel.com/T/#t > > * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by > > * __vdso_sgx_enter_enclave() > > * @rdi: RDI snapshot > > * @rsi: RSI snapshot > > * @rdx: RDX snapshot > > * @rsp: RSP snapshot (untrusted stack) > > * @r8: R8 snapshot > > * @r9: R9 snapshot > > I'd say here: > > "The registers' content is the snapshot made at enclave exit." I'd make that a description and take away individual parameter descriptions. Is that fine? > > Also, I renamed 'r' as 'run' in some places. > > > > End result: > > > > https://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-sgx.git/tree/arch/x86/include/uapi/asm/sgx.h > > > > I'm wondering this sentence: > > > > "The calling convention is custom and does not follow System V x86-64 ABI." > > Yeah, I was wondering what that meant too. I'll refine that one based on my own and Jethro's feedback. > -- > Regards/Gruss, > Boris. > > https://people.kernel.org/tglx/notes-about-netiquette /Jarkko
On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote: > On 15/09/2020 12:28, Jarkko Sakkinen wrote: > > diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > new file mode 100644 > > index 000000000000..adbd59d41517 > > --- /dev/null > > +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > @@ -0,0 +1,157 @@ > > +SYM_FUNC_START(__vdso_sgx_enter_enclave) > > <snip> > > +.Lretpoline: > > + call 2f > > +1: pause > > + lfence > > + jmp 1b > > +2: mov %rax, (%rsp) > > + ret > > I hate to throw further spanners in the work, but this is not compatible > with CET, and the user shadow stack work in progress. CET goes beyond my expertise. Can you describe, at least rudimentary, how this code is not compatible? I know CET only conceptual level (separate stack holding return addresses as an measure against return oriented programming (ROP)). > Whichever of these two large series lands first is going to inflict > fixing this problem on the other. > > As the vdso text is global (to a first approximation), it must not be a > retpoline if any other process is liable to want to use CET-SS. Why is that? > If the retpoline really does need to stay, then the vdso probably needs > to gain suitable __x86_indirect_thunk_%reg thunks which are patched at > boot based on the system properties. > > ~Andrew aka without CET it is patched? With CET, not? /Jarkko
On Mon, Sep 28, 2020 at 02:37:00AM +0300, Jarkko Sakkinen wrote: > I did not get Sean's reply, and neither can find it from lore: > > https://lore.kernel.org/linux-sgx/20200915112842.897265-1-jarkko.sakkinen@linux.intel.com/T/#t Yah, your mail server upgrade broke a lot of stuff. And lore even says it is not there: 2020-09-25 11:43 ` Jethro Beekman [not found] ` <20200925003808.GB20333@linux.intel.com> <--- 2020-09-25 1:04 ` Jarkko Sakkinen Lemme bounce it to you. > I'd make that a description and take away individual parameter > descriptions. Is that fine? Sure.
On Thu, Sep 24, 2020 at 05:38:10PM -0700, Sean Christopherson wrote: > > I can see why you would write "TCS" though - there's a thread control > > structure thing too in that patch. > > Argh, it's actually supposed to be TCS, SGX_ENCLAVE_RUN_TSC is the one that's > wrong. So I presume that I fixed this one already: https://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-sgx.git/tree/arch/x86/entry/vdso/vsgx.S /Jarkko
On Mon, Sep 28, 2020 at 10:30:32AM +0200, Borislav Petkov wrote: > On Mon, Sep 28, 2020 at 02:37:00AM +0300, Jarkko Sakkinen wrote: > > I did not get Sean's reply, and neither can find it from lore: > > > > https://lore.kernel.org/linux-sgx/20200915112842.897265-1-jarkko.sakkinen@linux.intel.com/T/#t > > Yah, your mail server upgrade broke a lot of stuff. And lore even says > it is not there: > > 2020-09-25 11:43 ` Jethro Beekman > [not found] ` <20200925003808.GB20333@linux.intel.com> <--- > 2020-09-25 1:04 ` Jarkko Sakkinen > > Lemme bounce it to you. Thank you. I think I have it correctly in my tree. And I actually noticed that I had the original email stored in wrong archive folder on my machine (sorry about that), so did I receive it after all, but it does not exist in lore. > > I'd make that a description and take away individual parameter > > descriptions. Is that fine? > > Sure. /** * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by * __vdso_sgx_enter_enclave() * @run: Pointer to the caller provided struct sgx_enclave_run * * The register parameters contain the snapshot of their values at enclave * exit * * Return: * 0 or negative to exit vDSO * positive to re-enter enclave (must be EENTER or ERESUME leaf) */ typedef int (*sgx_enclave_exit_handler_t)(long rdi, long rsi, long rdx, long rsp, long r8, long r9, struct sgx_enclave_run *run); I think this looks reasonable now. Another minor clean up I made is: struct sgx_enclave_run { __u64 tcs; __u32 flags; __u32 exit_reason; __u64 user_handler; __u64 user_data; I.e. got rid of the "user_handler union. Makes the struc less confusing looking and is consistent with the other structs. I've been thinking about this tail: union { struct sgx_enclave_exception exception; /* Pad the entire struct to 256 bytes. */ __u8 pad[256 - 32]; }; }; I'd just replace this with __u64 exception; }; And do something like (just writing it to the email to show the idea, have not even compiled this): - mov %eax, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_LEAF)(%rbx) - mov %di, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_TRAPNR)(%rbx) - mov %si, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ERROR_CODE)(%rbx) - mov %rdx, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ADDRESS)(%rbx) + mov SGX_ENCLAVE_RUN_EXCEPTION(%rbx), %rbx + + mov %eax, (SGX_EX_LEAF)(%rbx) + mov %di, (SGX_EX_TRAPNR)(%rbx) + mov %si, (SGX_EX_ERROR_CODE)(%rbx) + mov %rdx, (SGX_EX_ADDRESS)(%rbx) > -- > Regards/Gruss, > Boris. > > https://people.kernel.org/tglx/notes-about-netiquette /Jarkko
On 9/25/2020 11:23 AM, Andrew Cooper wrote: > On 15/09/2020 12:28, Jarkko Sakkinen wrote: >> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S >> new file mode 100644 >> index 000000000000..adbd59d41517 >> --- /dev/null >> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S >> @@ -0,0 +1,157 @@ >> +SYM_FUNC_START(__vdso_sgx_enter_enclave) >> <snip> >> +.Lretpoline: >> + call 2f >> +1: pause >> + lfence >> + jmp 1b >> +2: mov %rax, (%rsp) >> + ret > > I hate to throw further spanners in the work, but this is not compatible > with CET, and the user shadow stack work in progress. Hi Jarkko, These 1: and 2: targets are reached only from these few lines? If they are direct call/jmp targets, I think it is OK in terms of CET. If they are reached from an instruction like "jmp *%rax", then we need to put in an "endbr64". Yu-cheng > > Whichever of these two large series lands first is going to inflict > fixing this problem on the other. > > As the vdso text is global (to a first approximation), it must not be a > retpoline if any other process is liable to want to use CET-SS. > > If the retpoline really does need to stay, then the vdso probably needs > to gain suitable __x86_indirect_thunk_%reg thunks which are patched at > boot based on the system properties. > > ~Andrew >
On Mon, Sep 28, 2020 at 8:43 AM Yu, Yu-cheng <yu-cheng.yu@intel.com> wrote: > > On 9/25/2020 11:23 AM, Andrew Cooper wrote: > > On 15/09/2020 12:28, Jarkko Sakkinen wrote: > >> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > >> new file mode 100644 > >> index 000000000000..adbd59d41517 > >> --- /dev/null > >> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S > >> @@ -0,0 +1,157 @@ > >> +SYM_FUNC_START(__vdso_sgx_enter_enclave) > >> <snip> > >> +.Lretpoline: > >> + call 2f > >> +1: pause > >> + lfence > >> + jmp 1b > >> +2: mov %rax, (%rsp) > >> + ret > > > > I hate to throw further spanners in the work, but this is not compatible > > with CET, and the user shadow stack work in progress. > > Hi Jarkko, > > These 1: and 2: targets are reached only from these few lines? If they > are direct call/jmp targets, I think it is OK in terms of CET. If they > are reached from an instruction like "jmp *%rax", then we need to put in > an "endbr64". > This also isn't compatible with shadow stack.
On 9/28/2020 8:54 AM, H.J. Lu wrote: > On Mon, Sep 28, 2020 at 8:43 AM Yu, Yu-cheng <yu-cheng.yu@intel.com> wrote: >> >> On 9/25/2020 11:23 AM, Andrew Cooper wrote: >>> On 15/09/2020 12:28, Jarkko Sakkinen wrote: >>>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S >>>> new file mode 100644 >>>> index 000000000000..adbd59d41517 >>>> --- /dev/null >>>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S >>>> @@ -0,0 +1,157 @@ >>>> +SYM_FUNC_START(__vdso_sgx_enter_enclave) >>>> <snip> >>>> +.Lretpoline: >>>> + call 2f >>>> +1: pause >>>> + lfence >>>> + jmp 1b >>>> +2: mov %rax, (%rsp) >>>> + ret >>> >>> I hate to throw further spanners in the work, but this is not compatible >>> with CET, and the user shadow stack work in progress. >> >> Hi Jarkko, >> >> These 1: and 2: targets are reached only from these few lines? If they >> are direct call/jmp targets, I think it is OK in terms of CET. If they >> are reached from an instruction like "jmp *%rax", then we need to put in >> an "endbr64". >> > > This also isn't compatible with shadow stack. > Then, when shadow stack is enabled, disable this? Yu-cheng
On 28/09/2020 01:58, Jarkko Sakkinen wrote: > On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote: >> On 15/09/2020 12:28, Jarkko Sakkinen wrote: >>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S >>> new file mode 100644 >>> index 000000000000..adbd59d41517 >>> --- /dev/null >>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S >>> @@ -0,0 +1,157 @@ >>> +SYM_FUNC_START(__vdso_sgx_enter_enclave) >>> <snip> >>> +.Lretpoline: >>> + call 2f >>> +1: pause >>> + lfence >>> + jmp 1b >>> +2: mov %rax, (%rsp) >>> + ret >> I hate to throw further spanners in the work, but this is not compatible >> with CET, and the user shadow stack work in progress. > CET goes beyond my expertise. Can you describe, at least rudimentary, > how this code is not compatible? CET Shadow Stacks detect attacks which modify the return address on the stack. Retpoline *is* a ROP gadget. It really does modify the return address on the stack, even if its purpose is defensive (vs Spectre v2) rather than malicious. >> Whichever of these two large series lands first is going to inflict >> fixing this problem on the other. >> >> As the vdso text is global (to a first approximation), it must not be a >> retpoline if any other process is liable to want to use CET-SS. > Why is that? Because when CET-SS is enabled, the ret will suffer a #CP exception (return address on the stack not matching the one recorded in the shadow stack), which I presume/hope is wired into SIGSEGV. ~Andrew
On Mon, Sep 28, 2020 at 9:44 AM Andrew Cooper <andrew.cooper3@citrix.com> wrote: > > On 28/09/2020 01:58, Jarkko Sakkinen wrote: > > On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote: > >> On 15/09/2020 12:28, Jarkko Sakkinen wrote: > >>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > >>> new file mode 100644 > >>> index 000000000000..adbd59d41517 > >>> --- /dev/null > >>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S > >>> @@ -0,0 +1,157 @@ > >>> +SYM_FUNC_START(__vdso_sgx_enter_enclave) > >>> <snip> > >>> +.Lretpoline: > >>> + call 2f > >>> +1: pause > >>> + lfence > >>> + jmp 1b > >>> +2: mov %rax, (%rsp) > >>> + ret > >> I hate to throw further spanners in the work, but this is not compatible > >> with CET, and the user shadow stack work in progress. > > CET goes beyond my expertise. Can you describe, at least rudimentary, > > how this code is not compatible? > > CET Shadow Stacks detect attacks which modify the return address on the > stack. > > Retpoline *is* a ROP gadget. It really does modify the return address > on the stack, even if its purpose is defensive (vs Spectre v2) rather > than malicious. > > >> Whichever of these two large series lands first is going to inflict > >> fixing this problem on the other. > >> > >> As the vdso text is global (to a first approximation), it must not be a > >> retpoline if any other process is liable to want to use CET-SS. > > Why is that? > > Because when CET-SS is enabled, the ret will suffer a #CP exception > (return address on the stack not matching the one recorded in the shadow > stack), which I presume/hope is wired into SIGSEGV. > Here is the CET compatible retpoline: endbr64 /* Check if shadow stack is in use. NB: R11 is the only usable scratch register for function calls. */ xorl %r11d, %r11d rdsspq %r11 testq %r11, %r11 jnz 3f call 2f 1: pause lfence jmp 1b 2: mov %rax, (%rsp) ret 3: /* Shadow stack is in use. Make the indirect call. */ call *%rax ret
On Mon, Sep 28, 2020 at 11:08 AM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Mon, Sep 28, 2020 at 9:44 AM Andrew Cooper <andrew.cooper3@citrix.com> wrote: > > > > On 28/09/2020 01:58, Jarkko Sakkinen wrote: > > > On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote: > > >> On 15/09/2020 12:28, Jarkko Sakkinen wrote: > > >>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > >>> new file mode 100644 > > >>> index 000000000000..adbd59d41517 > > >>> --- /dev/null > > >>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > >>> @@ -0,0 +1,157 @@ > > >>> +SYM_FUNC_START(__vdso_sgx_enter_enclave) > > >>> <snip> > > >>> +.Lretpoline: > > >>> + call 2f > > >>> +1: pause > > >>> + lfence > > >>> + jmp 1b > > >>> +2: mov %rax, (%rsp) > > >>> + ret > > >> I hate to throw further spanners in the work, but this is not compatible > > >> with CET, and the user shadow stack work in progress. > > > CET goes beyond my expertise. Can you describe, at least rudimentary, > > > how this code is not compatible? > > > > CET Shadow Stacks detect attacks which modify the return address on the > > stack. > > > > Retpoline *is* a ROP gadget. It really does modify the return address > > on the stack, even if its purpose is defensive (vs Spectre v2) rather > > than malicious. > > > > >> Whichever of these two large series lands first is going to inflict > > >> fixing this problem on the other. > > >> > > >> As the vdso text is global (to a first approximation), it must not be a > > >> retpoline if any other process is liable to want to use CET-SS. > > > Why is that? > > > > Because when CET-SS is enabled, the ret will suffer a #CP exception > > (return address on the stack not matching the one recorded in the shadow > > stack), which I presume/hope is wired into SIGSEGV. > > > > Here is the CET compatible retpoline: > > endbr64 > /* Check if shadow stack is in use. NB: R11 is the only usable > scratch register for function calls. */ > xorl %r11d, %r11d > rdsspq %r11 > testq %r11, %r11 > jnz 3f > call 2f > 1: > pause > lfence > jmp 1b > 2: > mov %rax, (%rsp) > ret > 3: > /* Shadow stack is in use. Make the indirect call. */ > call *%rax > ret What do we expect user programs to do on CET systems? It would be nice if we could instead ALTERNATIVE this out if X86_FEATURE_SHSTK. --Andy
On 9/28/20 11:12 AM, Andy Lutomirski wrote: >> endbr64 >> /* Check if shadow stack is in use. NB: R11 is the only usable >> scratch register for function calls. */ >> xorl %r11d, %r11d >> rdsspq %r11 >> testq %r11, %r11 >> jnz 3f >> call 2f >> 1: >> pause >> lfence >> jmp 1b >> 2: >> mov %rax, (%rsp) >> ret >> 3: >> /* Shadow stack is in use. Make the indirect call. */ >> call *%rax >> ret > What do we expect user programs to do on CET systems? It would be > nice if we could instead ALTERNATIVE this out if X86_FEATURE_SHSTK. Shouldn't we just be able to use X86_FEATURE_RETPOLINE? We probably need a mechanism to force X86_FEATURE_SHSTK and X86_FEATURE_RETPOLINE to be mutually exclusive if we don't have one already.
On Mon, Sep 28, 2020 at 05:44:35PM +0100, Andrew Cooper wrote: > On 28/09/2020 01:58, Jarkko Sakkinen wrote: > > On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote: > >> On 15/09/2020 12:28, Jarkko Sakkinen wrote: > >>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > >>> new file mode 100644 > >>> index 000000000000..adbd59d41517 > >>> --- /dev/null > >>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S > >>> @@ -0,0 +1,157 @@ > >>> +SYM_FUNC_START(__vdso_sgx_enter_enclave) > >>> <snip> > >>> +.Lretpoline: > >>> + call 2f > >>> +1: pause > >>> + lfence > >>> + jmp 1b > >>> +2: mov %rax, (%rsp) > >>> + ret > >> I hate to throw further spanners in the work, but this is not compatible > >> with CET, and the user shadow stack work in progress. > > CET goes beyond my expertise. Can you describe, at least rudimentary, > > how this code is not compatible? > > CET Shadow Stacks detect attacks which modify the return address on the > stack. > > Retpoline *is* a ROP gadget. It really does modify the return address > on the stack, even if its purpose is defensive (vs Spectre v2) rather > than malicious. Aah. I get that, yes. Kernel is full of retpoline but I presume that ring-0 does not use CET. The situation with callback is follows: for a run-time the user_handler by all practical means is always the same. There is ever only one user handler that gets executed. I.e. the indirect callback will always lead to the same thing. I wonder how much assets an adversary would get if we just remove retpoline bits (not much thinking done yet on that). /Jarkko
On Mon, Sep 28, 2020 at 08:43:16AM -0700, Yu, Yu-cheng wrote: > On 9/25/2020 11:23 AM, Andrew Cooper wrote: > > On 15/09/2020 12:28, Jarkko Sakkinen wrote: > > > diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > > new file mode 100644 > > > index 000000000000..adbd59d41517 > > > --- /dev/null > > > +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > > @@ -0,0 +1,157 @@ > > > +SYM_FUNC_START(__vdso_sgx_enter_enclave) > > > <snip> > > > +.Lretpoline: > > > + call 2f > > > +1: pause > > > + lfence > > > + jmp 1b > > > +2: mov %rax, (%rsp) > > > + ret > > > > I hate to throw further spanners in the work, but this is not compatible > > with CET, and the user shadow stack work in progress. > > Hi Jarkko, > > These 1: and 2: targets are reached only from these few lines? If they are > direct call/jmp targets, I think it is OK in terms of CET. If they are > reached from an instruction like "jmp *%rax", then we need to put in an > "endbr64". > > Yu-cheng The invocation is over here: /* Load the callback pointer to %rax and invoke it via retpoline. */ mov SGX_ENCLAVE_RUN_USER_HANDLER(%rax), %rax call .Lretpoline /Jarkko
On Mon, Sep 28, 2020 at 08:54:01AM -0700, H.J. Lu wrote: > On Mon, Sep 28, 2020 at 8:43 AM Yu, Yu-cheng <yu-cheng.yu@intel.com> wrote: > > > > On 9/25/2020 11:23 AM, Andrew Cooper wrote: > > > On 15/09/2020 12:28, Jarkko Sakkinen wrote: > > >> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > >> new file mode 100644 > > >> index 000000000000..adbd59d41517 > > >> --- /dev/null > > >> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > >> @@ -0,0 +1,157 @@ > > >> +SYM_FUNC_START(__vdso_sgx_enter_enclave) > > >> <snip> > > >> +.Lretpoline: > > >> + call 2f > > >> +1: pause > > >> + lfence > > >> + jmp 1b > > >> +2: mov %rax, (%rsp) > > >> + ret > > > > > > I hate to throw further spanners in the work, but this is not compatible > > > with CET, and the user shadow stack work in progress. > > > > Hi Jarkko, > > > > These 1: and 2: targets are reached only from these few lines? If they > > are direct call/jmp targets, I think it is OK in terms of CET. If they > > are reached from an instruction like "jmp *%rax", then we need to put in > > an "endbr64". > > > > This also isn't compatible with shadow stack. > > -- > H.J. I have the now full picture of the problem thanks to Andrew's response [1]. And Dave Hansen just explained me in detail the context and background with [2]. So I'd guess this will get sorted out soon. If you don't mind I'll CC you to this commit when I send the next version? [1] https://lkml.org/lkml/2020/9/28/1153 [2] https://lkml.org/lkml/2020/9/25/1122 /Jarkko
On Mon, Sep 28, 2020 at 11:07:47AM -0700, H.J. Lu wrote: > On Mon, Sep 28, 2020 at 9:44 AM Andrew Cooper <andrew.cooper3@citrix.com> wrote: > > > > On 28/09/2020 01:58, Jarkko Sakkinen wrote: > > > On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote: > > >> On 15/09/2020 12:28, Jarkko Sakkinen wrote: > > >>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > >>> new file mode 100644 > > >>> index 000000000000..adbd59d41517 > > >>> --- /dev/null > > >>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > >>> @@ -0,0 +1,157 @@ > > >>> +SYM_FUNC_START(__vdso_sgx_enter_enclave) > > >>> <snip> > > >>> +.Lretpoline: > > >>> + call 2f > > >>> +1: pause > > >>> + lfence > > >>> + jmp 1b > > >>> +2: mov %rax, (%rsp) > > >>> + ret > > >> I hate to throw further spanners in the work, but this is not compatible > > >> with CET, and the user shadow stack work in progress. > > > CET goes beyond my expertise. Can you describe, at least rudimentary, > > > how this code is not compatible? > > > > CET Shadow Stacks detect attacks which modify the return address on the > > stack. > > > > Retpoline *is* a ROP gadget. It really does modify the return address > > on the stack, even if its purpose is defensive (vs Spectre v2) rather > > than malicious. > > > > >> Whichever of these two large series lands first is going to inflict > > >> fixing this problem on the other. > > >> > > >> As the vdso text is global (to a first approximation), it must not be a > > >> retpoline if any other process is liable to want to use CET-SS. > > > Why is that? > > > > Because when CET-SS is enabled, the ret will suffer a #CP exception > > (return address on the stack not matching the one recorded in the shadow > > stack), which I presume/hope is wired into SIGSEGV. > > > > Here is the CET compatible retpoline: > > endbr64 > /* Check if shadow stack is in use. NB: R11 is the only usable > scratch register for function calls. */ > xorl %r11d, %r11d > rdsspq %r11 > testq %r11, %r11 > jnz 3f > call 2f > 1: > pause > lfence > jmp 1b > 2: > mov %rax, (%rsp) > ret > 3: > /* Shadow stack is in use. Make the indirect call. */ > call *%rax > ret Right, so I have actually two alternatives: this and boot time patching: https://lkml.org/lkml/2020/9/25/1122 /Jarkko
On Mon, Sep 28, 2020 at 11:12:08AM -0700, Andy Lutomirski wrote: > On Mon, Sep 28, 2020 at 11:08 AM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > On Mon, Sep 28, 2020 at 9:44 AM Andrew Cooper <andrew.cooper3@citrix.com> wrote: > > > > > > On 28/09/2020 01:58, Jarkko Sakkinen wrote: > > > > On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote: > > > >> On 15/09/2020 12:28, Jarkko Sakkinen wrote: > > > >>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > > >>> new file mode 100644 > > > >>> index 000000000000..adbd59d41517 > > > >>> --- /dev/null > > > >>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > > >>> @@ -0,0 +1,157 @@ > > > >>> +SYM_FUNC_START(__vdso_sgx_enter_enclave) > > > >>> <snip> > > > >>> +.Lretpoline: > > > >>> + call 2f > > > >>> +1: pause > > > >>> + lfence > > > >>> + jmp 1b > > > >>> +2: mov %rax, (%rsp) > > > >>> + ret > > > >> I hate to throw further spanners in the work, but this is not compatible > > > >> with CET, and the user shadow stack work in progress. > > > > CET goes beyond my expertise. Can you describe, at least rudimentary, > > > > how this code is not compatible? > > > > > > CET Shadow Stacks detect attacks which modify the return address on the > > > stack. > > > > > > Retpoline *is* a ROP gadget. It really does modify the return address > > > on the stack, even if its purpose is defensive (vs Spectre v2) rather > > > than malicious. > > > > > > >> Whichever of these two large series lands first is going to inflict > > > >> fixing this problem on the other. > > > >> > > > >> As the vdso text is global (to a first approximation), it must not be a > > > >> retpoline if any other process is liable to want to use CET-SS. > > > > Why is that? > > > > > > Because when CET-SS is enabled, the ret will suffer a #CP exception > > > (return address on the stack not matching the one recorded in the shadow > > > stack), which I presume/hope is wired into SIGSEGV. > > > > > > > Here is the CET compatible retpoline: > > > > endbr64 > > /* Check if shadow stack is in use. NB: R11 is the only usable > > scratch register for function calls. */ > > xorl %r11d, %r11d > > rdsspq %r11 > > testq %r11, %r11 > > jnz 3f > > call 2f > > 1: > > pause > > lfence > > jmp 1b > > 2: > > mov %rax, (%rsp) > > ret > > 3: > > /* Shadow stack is in use. Make the indirect call. */ > > call *%rax > > ret > > What do we expect user programs to do on CET systems? It would be > nice if we could instead ALTERNATIVE this out if X86_FEATURE_SHSTK. > > --Andy I'm open to do either solution. My thinking was to initially do things vsgx.S local (i.e. consider ALTERNATIVE post upstreaming) and use the above solution but I'm also fine doing ALTERNATIVE. Dave kindly briefed on details how that thing works and it should be perfectly usable for our use case. /Jarkko
On Mon, Sep 28, 2020 at 2:56 PM Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote: > > On Mon, Sep 28, 2020 at 11:12:08AM -0700, Andy Lutomirski wrote: > > On Mon, Sep 28, 2020 at 11:08 AM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > On Mon, Sep 28, 2020 at 9:44 AM Andrew Cooper <andrew.cooper3@citrix.com> wrote: > > > > > > > > On 28/09/2020 01:58, Jarkko Sakkinen wrote: > > > > > On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote: > > > > >> On 15/09/2020 12:28, Jarkko Sakkinen wrote: > > > > >>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > > > >>> new file mode 100644 > > > > >>> index 000000000000..adbd59d41517 > > > > >>> --- /dev/null > > > > >>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S > > > > >>> @@ -0,0 +1,157 @@ > > > > >>> +SYM_FUNC_START(__vdso_sgx_enter_enclave) > > > > >>> <snip> > > > > >>> +.Lretpoline: > > > > >>> + call 2f > > > > >>> +1: pause > > > > >>> + lfence > > > > >>> + jmp 1b > > > > >>> +2: mov %rax, (%rsp) > > > > >>> + ret > > > > >> I hate to throw further spanners in the work, but this is not compatible > > > > >> with CET, and the user shadow stack work in progress. > > > > > CET goes beyond my expertise. Can you describe, at least rudimentary, > > > > > how this code is not compatible? > > > > > > > > CET Shadow Stacks detect attacks which modify the return address on the > > > > stack. > > > > > > > > Retpoline *is* a ROP gadget. It really does modify the return address > > > > on the stack, even if its purpose is defensive (vs Spectre v2) rather > > > > than malicious. > > > > > > > > >> Whichever of these two large series lands first is going to inflict > > > > >> fixing this problem on the other. > > > > >> > > > > >> As the vdso text is global (to a first approximation), it must not be a > > > > >> retpoline if any other process is liable to want to use CET-SS. > > > > > Why is that? > > > > > > > > Because when CET-SS is enabled, the ret will suffer a #CP exception > > > > (return address on the stack not matching the one recorded in the shadow > > > > stack), which I presume/hope is wired into SIGSEGV. > > > > > > > > > > Here is the CET compatible retpoline: > > > > > > endbr64 > > > /* Check if shadow stack is in use. NB: R11 is the only usable > > > scratch register for function calls. */ > > > xorl %r11d, %r11d > > > rdsspq %r11 > > > testq %r11, %r11 > > > jnz 3f > > > call 2f > > > 1: > > > pause > > > lfence > > > jmp 1b > > > 2: > > > mov %rax, (%rsp) > > > ret > > > 3: > > > /* Shadow stack is in use. Make the indirect call. */ > > > call *%rax > > > ret > > > > What do we expect user programs to do on CET systems? It would be > > nice if we could instead ALTERNATIVE this out if X86_FEATURE_SHSTK. > > > > --Andy > > I'm open to do either solution. My thinking was to initially do things > vsgx.S local (i.e. consider ALTERNATIVE post upstreaming) and use the > above solution but I'm also fine doing ALTERNATIVE. Dave kindly briefed > on details how that thing works and it should be perfectly usable for > our use case. > Since SHSTK and IBT are enabled per process, not the whole machine, are you going to patch vDSO on a per-process basis?
On Mon, Sep 28, 2020 at 11:17:42AM -0700, Dave Hansen wrote: > On 9/28/20 11:12 AM, Andy Lutomirski wrote: > >> endbr64 > >> /* Check if shadow stack is in use. NB: R11 is the only usable > >> scratch register for function calls. */ > >> xorl %r11d, %r11d > >> rdsspq %r11 > >> testq %r11, %r11 > >> jnz 3f > >> call 2f > >> 1: > >> pause > >> lfence > >> jmp 1b > >> 2: > >> mov %rax, (%rsp) > >> ret > >> 3: > >> /* Shadow stack is in use. Make the indirect call. */ > >> call *%rax > >> ret > > What do we expect user programs to do on CET systems? It would be > > nice if we could instead ALTERNATIVE this out if X86_FEATURE_SHSTK. > > Shouldn't we just be able to use X86_FEATURE_RETPOLINE? > > We probably need a mechanism to force X86_FEATURE_SHSTK and > X86_FEATURE_RETPOLINE to be mutually exclusive if we don't have one already. First of all: lets go with boot time patching instead of dynamic detection. It's both easier to implement and by all other merits makes a lot more sense. It was just a thing that I've not used before. That sorted out, does it matter which direction I look it at? I could use either feature flag as basis (and I do not have a personal preference here). /Jarkko
On 9/28/20 3:06 PM, H.J. Lu wrote: >> I'm open to do either solution. My thinking was to initially do things >> vsgx.S local (i.e. consider ALTERNATIVE post upstreaming) and use the >> above solution but I'm also fine doing ALTERNATIVE. Dave kindly briefed >> on details how that thing works and it should be perfectly usable for >> our use case. >> > Since SHSTK and IBT are enabled per process, not the whole machine, > are you going to patch vDSO on a per-process basis? No. Retpolines mitigate Spectre v2 attacks. If you're not vulnerable to Spectre v2, you don't need retpolines. All processors which support CET *also* have hardware mitigations against Spectre v2 and don't need retpolines. Here's all of the possibilities: CET=y, BUG_SPECTRE_V2=y: does not exist CET=n, BUG_SPECTRE_V2=y: vulnerable, use retpoline CET=y, BUG_SPECTRE_V2=n: no retpoline, not vulnerable CET=n, BUG_SPECTRE_V2=n: no retpoline, not vulnerable
On Mon, Sep 28, 2020 at 3:18 PM Dave Hansen <dave.hansen@intel.com> wrote: > > On 9/28/20 3:06 PM, H.J. Lu wrote: > >> I'm open to do either solution. My thinking was to initially do things > >> vsgx.S local (i.e. consider ALTERNATIVE post upstreaming) and use the > >> above solution but I'm also fine doing ALTERNATIVE. Dave kindly briefed > >> on details how that thing works and it should be perfectly usable for > >> our use case. > >> > > Since SHSTK and IBT are enabled per process, not the whole machine, > > are you going to patch vDSO on a per-process basis? > > No. > > Retpolines mitigate Spectre v2 attacks. If you're not vulnerable to > Spectre v2, you don't need retpolines. > > All processors which support CET *also* have hardware mitigations > against Spectre v2 and don't need retpolines. Here's all of the > possibilities: > > CET=y, BUG_SPECTRE_V2=y: does not exist > CET=n, BUG_SPECTRE_V2=y: vulnerable, use retpoline > CET=y, BUG_SPECTRE_V2=n: no retpoline, not vulnerable > CET=n, BUG_SPECTRE_V2=n: no retpoline, not vulnerable Just to confirm: does this mean that the CPU mitigates against user code mistraining the branch predictors for CPL0? Because this is the vDSO, and the situation we're actually concerned about is user code mistraining its own branch predictors. This could happen cross-process or within the same process.
On 28/09/2020 23:41, Andy Lutomirski wrote: > On Mon, Sep 28, 2020 at 3:18 PM Dave Hansen <dave.hansen@intel.com> wrote: >> On 9/28/20 3:06 PM, H.J. Lu wrote: >>>> I'm open to do either solution. My thinking was to initially do things >>>> vsgx.S local (i.e. consider ALTERNATIVE post upstreaming) and use the >>>> above solution but I'm also fine doing ALTERNATIVE. Dave kindly briefed >>>> on details how that thing works and it should be perfectly usable for >>>> our use case. >>>> >>> Since SHSTK and IBT are enabled per process, not the whole machine, >>> are you going to patch vDSO on a per-process basis? >> No. >> >> Retpolines mitigate Spectre v2 attacks. If you're not vulnerable to >> Spectre v2, you don't need retpolines. >> >> All processors which support CET *also* have hardware mitigations >> against Spectre v2 and don't need retpolines. Here's all of the >> possibilities: >> >> CET=y, BUG_SPECTRE_V2=y: does not exist >> CET=n, BUG_SPECTRE_V2=y: vulnerable, use retpoline >> CET=y, BUG_SPECTRE_V2=n: no retpoline, not vulnerable >> CET=n, BUG_SPECTRE_V2=n: no retpoline, not vulnerable > Just to confirm: does this mean that the CPU mitigates against user > code mistraining the branch predictors for CPL0? If (and only if) you have eIBRS enabled. eIBRS should be available on all CET-capable hardware, and Linux ought to use it by default. > Because this is the > vDSO, and the situation we're actually concerned about is user code > mistraining its own branch predictors. This could happen > cross-process or within the same process. There is nothing (in Intel parts) which prevents mode same-mode training of indirect branches, either in user or kernel space. However, an IBPB on context switch should prevent cross-process trailing attacks. ~Andrew
On 28/09/2020 21:42, Jarkko Sakkinen wrote: > On Mon, Sep 28, 2020 at 05:44:35PM +0100, Andrew Cooper wrote: >> On 28/09/2020 01:58, Jarkko Sakkinen wrote: >>> On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote: >>>> On 15/09/2020 12:28, Jarkko Sakkinen wrote: >>>>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S >>>>> new file mode 100644 >>>>> index 000000000000..adbd59d41517 >>>>> --- /dev/null >>>>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S >>>>> @@ -0,0 +1,157 @@ >>>>> +SYM_FUNC_START(__vdso_sgx_enter_enclave) >>>>> <snip> >>>>> +.Lretpoline: >>>>> + call 2f >>>>> +1: pause >>>>> + lfence >>>>> + jmp 1b >>>>> +2: mov %rax, (%rsp) >>>>> + ret >>>> I hate to throw further spanners in the work, but this is not compatible >>>> with CET, and the user shadow stack work in progress. >>> CET goes beyond my expertise. Can you describe, at least rudimentary, >>> how this code is not compatible? >> CET Shadow Stacks detect attacks which modify the return address on the >> stack. >> >> Retpoline *is* a ROP gadget. It really does modify the return address >> on the stack, even if its purpose is defensive (vs Spectre v2) rather >> than malicious. > Aah. I get that, yes. > > Kernel is full of retpoline but I presume that ring-0 does not use CET. No-one has implemented support CET-SS support for Linux itself yet, but other kernels do have it working. ~Andrew
On 9/28/20 4:38 PM, Andrew Cooper wrote: >>> CET=y, BUG_SPECTRE_V2=y: does not exist >>> CET=n, BUG_SPECTRE_V2=y: vulnerable, use retpoline >>> CET=y, BUG_SPECTRE_V2=n: no retpoline, not vulnerable >>> CET=n, BUG_SPECTRE_V2=n: no retpoline, not vulnerable >> Just to confirm: does this mean that the CPU mitigates against user >> code mistraining the branch predictors for CPL0? > If (and only if) you have eIBRS enabled. > > eIBRS should be available on all CET-capable hardware, and Linux ought > to use it by default. You're totally right, of course. I was (wrongly) thinking about this VDSO retpoline as kernel code. There's another wrinkle here. Let's say we're vulnerable to a Spectre-v2-style attack and we want to mitigate it on CET hardware that has enhanced IBRS. I'm not sure how reliable of a mitigation retpolines are on enhanced IBRS hardware. Intel has recommended _against_ using them in some cases: > https://software.intel.com/security-software-guidance/api-app/sites/default/files/Retpoline-A-Branch-Target-Injection-Mitigation.pdf "On processors that support enhanced IBRS, it should be used for mitigation instead of retpoline." I actually authored that bit of the whitepaper, and I recall that this was not simply a recommendation based on performance advantages of using enhanced IBRS. I can dig through some old email if we decide that we want to explore using a retpoline on enhanced IBRS hardware. But, let's take a step back. The changelog for this patch needs to at least have: 1. What is the attack being mitigated by the retpoline? 2. Do we actually want to mitigate it? 3. What options are there to mitigate it? 4. Which option does this patch use and why? Right now, there's not even a comment about this.
On 29/09/2020 15:10, Dave Hansen wrote: > On 9/28/20 4:38 PM, Andrew Cooper wrote: >>>> CET=y, BUG_SPECTRE_V2=y: does not exist >>>> CET=n, BUG_SPECTRE_V2=y: vulnerable, use retpoline >>>> CET=y, BUG_SPECTRE_V2=n: no retpoline, not vulnerable >>>> CET=n, BUG_SPECTRE_V2=n: no retpoline, not vulnerable >>> Just to confirm: does this mean that the CPU mitigates against user >>> code mistraining the branch predictors for CPL0? >> If (and only if) you have eIBRS enabled. >> >> eIBRS should be available on all CET-capable hardware, and Linux ought >> to use it by default. > You're totally right, of course. I was (wrongly) thinking about this > VDSO retpoline as kernel code. > > There's another wrinkle here. Let's say we're vulnerable to a > Spectre-v2-style attack and we want to mitigate it on CET hardware that > has enhanced IBRS. I'm not sure how reliable of a mitigation retpolines > are on enhanced IBRS hardware. Intel has recommended _against_ using > them in some cases: > >> https://software.intel.com/security-software-guidance/api-app/sites/default/files/Retpoline-A-Branch-Target-Injection-Mitigation.pdf > "On processors that support enhanced IBRS, it should be used for > mitigation instead of retpoline." > I actually authored that bit of the whitepaper, and I recall that this > was not simply a recommendation based on performance advantages of using > enhanced IBRS. I can dig through some old email if we decide that we > want to explore using a retpoline on enhanced IBRS hardware. If only life were simple. In light of https://arxiv.org/abs/2008.02307 which managed to demonstrate that the original KAISER was actually a speculative attack and nothing to do with the prefetch instruction, a discussion about same-mode training happened. The updated recommendation given was to continue using retpoline as well as eIBRS to prevent same-mode training of the syscall indirect branch. Josh (CC'd) has been doing a lot of work to find and fix other speculative leaks in this area. For Skylake uarch and later, even if an RSB underflow leads to a BTB lookup, it still requires an interrupt/NMI to hit one of two instruction boundaries to empty the RSB, and an attacker with that level of control probably has more interesting things to be trying to do. Without retpoline (or something even more expensive such as IRET-ing around), an attacker can still create speculative type confusion between different system calls, when eIBRS is active. Once you mix CET-SS in, this breaks, unless you're prepared to update the retpoline gadget to include a WRSS to modify the shadow stack alongside the regular stack. Add this to the large pile of fun for whomever has the privileg^W chore of implementing supervisor CET support. > > But, let's take a step back. The changelog for this patch needs to at > least have: > > 1. What is the attack being mitigated by the retpoline? > 2. Do we actually want to mitigate it? > 3. What options are there to mitigate it? > 4. Which option does this patch use and why? > > Right now, there's not even a comment about this. I agree. The reason for using a retpoline here in the first place is unclear. ~Andrew
On Tue, Sep 29, 2020 at 12:52:35AM +0100, Andrew Cooper wrote: > On 28/09/2020 21:42, Jarkko Sakkinen wrote: > > On Mon, Sep 28, 2020 at 05:44:35PM +0100, Andrew Cooper wrote: > >> On 28/09/2020 01:58, Jarkko Sakkinen wrote: > >>> On Fri, Sep 25, 2020 at 07:23:59PM +0100, Andrew Cooper wrote: > >>>> On 15/09/2020 12:28, Jarkko Sakkinen wrote: > >>>>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S > >>>>> new file mode 100644 > >>>>> index 000000000000..adbd59d41517 > >>>>> --- /dev/null > >>>>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S > >>>>> @@ -0,0 +1,157 @@ > >>>>> +SYM_FUNC_START(__vdso_sgx_enter_enclave) > >>>>> <snip> > >>>>> +.Lretpoline: > >>>>> + call 2f > >>>>> +1: pause > >>>>> + lfence > >>>>> + jmp 1b > >>>>> +2: mov %rax, (%rsp) > >>>>> + ret > >>>> I hate to throw further spanners in the work, but this is not compatible > >>>> with CET, and the user shadow stack work in progress. > >>> CET goes beyond my expertise. Can you describe, at least rudimentary, > >>> how this code is not compatible? > >> CET Shadow Stacks detect attacks which modify the return address on the > >> stack. > >> > >> Retpoline *is* a ROP gadget. It really does modify the return address > >> on the stack, even if its purpose is defensive (vs Spectre v2) rather > >> than malicious. > > Aah. I get that, yes. > > > > Kernel is full of retpoline but I presume that ring-0 does not use CET. > > No-one has implemented support CET-SS support for Linux itself yet, but > other kernels do have it working. > > ~Andrew Haitao, can you point out the user handler callback in the Intel SGX runtime? There is only one single global callback in a practical deployment provided by the runtime. AFAIK, it just copies values, does not do any rountrips and is generally very trivial peace of code but it is better to check it before final say. I've now experimented with ALTERNATIVE() and it can be definitely made work. I'm just thinking that would it be best not use retpoline at all? My guess is that the callback would not have much applicability in Spectre'ish attacks but do not have enough expertise on that to even semiformally conclude it. My intention is to find reasonable conclusions to drop it instead of adding more complexity to the vDSO. /Jarkko
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 3f183d0b8826..416f9432269d 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -29,6 +29,7 @@ VDSO32-$(CONFIG_IA32_EMULATION) := y vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o vobjs32-y += vdso32/vclock_gettime.o +vobjs-$(VDSO64-y) += vsgx_enter_enclave.o # files to link into kernel obj-y += vma.o extable.o @@ -100,6 +101,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS CFLAGS_REMOVE_vclock_gettime.o = -pg CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg CFLAGS_REMOVE_vgetcpu.o = -pg +CFLAGS_REMOVE_vsgx_enter_enclave.o = -pg # # X32 processes use x32 vDSO to access 64bit kernel data. diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S index 36b644e16272..4bf48462fca7 100644 --- a/arch/x86/entry/vdso/vdso.lds.S +++ b/arch/x86/entry/vdso/vdso.lds.S @@ -27,6 +27,7 @@ VERSION { __vdso_time; clock_getres; __vdso_clock_getres; + __vdso_sgx_enter_enclave; local: *; }; } diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S new file mode 100644 index 000000000000..adbd59d41517 --- /dev/null +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/linkage.h> +#include <asm/export.h> +#include <asm/errno.h> +#include <asm/enclu.h> + +#include "extable.h" + +/* Offset of 'struct sgx_enclave_run' relative to %rbp. */ +#define SGX_ENCLAVE_RUN_PTR 2*8 + +/* Offsets into 'struct sgx_enclave_run'. */ +#define SGX_ENCLAVE_RUN_TSC 0*8 +#define SGX_ENCLAVE_RUN_FLAGS 1*8 +#define SGX_ENCLAVE_RUN_EXIT_REASON 1*8 + 4 +#define SGX_ENCLAVE_RUN_USER_HANDLER 2*8 +/* #define SGX_ENCLAVE_RUN_USER_DATA 3*8 */ +#define SGX_ENCLAVE_RUN_EXCEPTION 4*8 + +#define SGX_SYNCHRONOUS_EXIT 0 +#define SGX_EXCEPTION_EXIT 1 + +/* Offsets into sgx_enter_enclave.exception. */ +#define SGX_EX_LEAF 0*8 +#define SGX_EX_TRAPNR 0*8+4 +#define SGX_EX_ERROR_CODE 0*8+6 +#define SGX_EX_ADDRESS 1*8 + +.code64 +.section .text, "ax" + +SYM_FUNC_START(__vdso_sgx_enter_enclave) + /* Prolog */ + .cfi_startproc + push %rbp + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %rbp, 0 + mov %rsp, %rbp + .cfi_def_cfa_register %rbp + push %rbx + .cfi_rel_offset %rbx, -8 + + mov %ecx, %eax +.Lenter_enclave: + /* EENTER <= leaf <= ERESUME */ + cmp $EENTER, %eax + jb .Linvalid_input + cmp $ERESUME, %eax + ja .Linvalid_input + + mov SGX_ENCLAVE_RUN_PTR(%rbp), %rcx + + /* No flags are currently defined/supported. */ + cmpl $0, SGX_ENCLAVE_RUN_FLAGS(%rcx) + jne .Linvalid_input + + /* Load TCS and AEP */ + mov SGX_ENCLAVE_RUN_TSC(%rcx), %rbx + lea .Lasync_exit_pointer(%rip), %rcx + + /* Single ENCLU serving as both EENTER and AEP (ERESUME) */ +.Lasync_exit_pointer: +.Lenclu_eenter_eresume: + enclu + + /* EEXIT jumps here unless the enclave is doing something fancy. */ + mov SGX_ENCLAVE_RUN_PTR(%rbp), %rbx + + /* Set exit_reason. */ + movl $SGX_SYNCHRONOUS_EXIT, SGX_ENCLAVE_RUN_EXIT_REASON(%rbx) + + /* Invoke userspace's exit handler if one was provided. */ +.Lhandle_exit: + cmpq $0, SGX_ENCLAVE_RUN_USER_HANDLER(%rbx) + jne .Linvoke_userspace_handler + + /* Success, in the sense that ENCLU was attempted. */ + xor %eax, %eax + +.Lout: + pop %rbx + leave + .cfi_def_cfa %rsp, 8 + ret + + /* The out-of-line code runs with the pre-leave stack frame. */ + .cfi_def_cfa %rbp, 16 + +.Linvalid_input: + mov $(-EINVAL), %eax + jmp .Lout + +.Lhandle_exception: + mov SGX_ENCLAVE_RUN_PTR(%rbp), %rbx + + /* Set the exit_reason and exception info. */ + movl $SGX_EXCEPTION_EXIT, SGX_ENCLAVE_RUN_EXIT_REASON(%rbx) + + mov %eax, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_LEAF)(%rbx) + mov %di, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_TRAPNR)(%rbx) + mov %si, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ERROR_CODE)(%rbx) + mov %rdx, (SGX_ENCLAVE_RUN_EXCEPTION + SGX_EX_ADDRESS)(%rbx) + jmp .Lhandle_exit + +.Linvoke_userspace_handler: + /* Pass the untrusted RSP (at exit) to the callback via %rcx. */ + mov %rsp, %rcx + + /* Save @e, %rbx is about to be clobbered. */ + mov %rbx, %rax + + /* Save the untrusted RSP offset in %rbx (non-volatile register). */ + mov %rsp, %rbx + and $0xf, %rbx + + /* + * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned + * _after_ pushing the parameters on the stack, hence the bonus push. + */ + and $-0x10, %rsp + push %rax + + /* Push @e as a param to the callback. */ + push %rax + + /* Clear RFLAGS.DF per x86_64 ABI */ + cld + + /* Load the callback pointer to %rax and invoke it via retpoline. */ + mov SGX_ENCLAVE_RUN_USER_HANDLER(%rax), %rax + call .Lretpoline + + /* Undo the post-exit %rsp adjustment. */ + lea 0x10(%rsp, %rbx), %rsp + + /* + * If the return from callback is zero or negative, return immediately, + * else re-execute ENCLU with the postive return value interpreted as + * the requested ENCLU leaf. + */ + cmp $0, %eax + jle .Lout + jmp .Lenter_enclave + +.Lretpoline: + call 2f +1: pause + lfence + jmp 1b +2: mov %rax, (%rsp) + ret + .cfi_endproc + +_ASM_VDSO_EXTABLE_HANDLE(.Lenclu_eenter_eresume, .Lhandle_exception) + +SYM_FUNC_END(__vdso_sgx_enter_enclave) diff --git a/arch/x86/include/asm/enclu.h b/arch/x86/include/asm/enclu.h new file mode 100644 index 000000000000..06157b3e9ede --- /dev/null +++ b/arch/x86/include/asm/enclu.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_ENCLU_H +#define _ASM_X86_ENCLU_H + +#define EENTER 0x02 +#define ERESUME 0x03 + +#endif /* _ASM_X86_ENCLU_H */ diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h index d0916fb9629e..1564d7f88597 100644 --- a/arch/x86/include/uapi/asm/sgx.h +++ b/arch/x86/include/uapi/asm/sgx.h @@ -72,4 +72,132 @@ struct sgx_enclave_provision { __u64 attribute_fd; }; +#define SGX_SYNCHRONOUS_EXIT 0 +#define SGX_EXCEPTION_EXIT 1 + +struct sgx_enclave_run; + +/** + * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by + * __vdso_sgx_enter_enclave() + * + * @rdi: RDI at the time of enclave exit + * @rsi: RSI at the time of enclave exit + * @rdx: RDX at the time of enclave exit + * @ursp: RSP at the time of enclave exit (untrusted stack) + * @r8: R8 at the time of enclave exit + * @r9: R9 at the time of enclave exit + * @r: Pointer to struct sgx_enclave_run (as provided by caller) + * + * Return: + * 0 or negative to exit vDSO + * positive to re-enter enclave (must be EENTER or ERESUME leaf) + */ +typedef int (*sgx_enclave_exit_handler_t)(long rdi, long rsi, long rdx, + long ursp, long r8, long r9, + struct sgx_enclave_run *r); + +/** + * struct sgx_enclave_exception - structure to report exceptions encountered in + * __vdso_sgx_enter_enclave() + * + * @leaf: ENCLU leaf from \%eax at time of exception + * @trapnr: exception trap number, a.k.a. fault vector + * @error_code: exception error code + * @address: exception address, e.g. CR2 on a #PF + */ +struct sgx_enclave_exception { + __u32 leaf; + __u16 trapnr; + __u16 error_code; + __u64 address; +}; + +/** + * struct sgx_enclave_run - Control structure for __vdso_sgx_enter_enclave() + * + * @tcs: Thread Control Structure used to enter enclave + * @flags: Control flags + * @exit_reason: Cause of exit from enclave, e.g. EEXIT vs. exception + * @user_handler: User provided exit handler (optional) + * @user_data: User provided opaque value (optional) + * @exception: Valid on exit due to exception + */ +struct sgx_enclave_run { + __u64 tcs; + __u32 flags; + __u32 exit_reason; + + union { + sgx_enclave_exit_handler_t user_handler; + __u64 __user_handler; + }; + __u64 user_data; + + union { + struct sgx_enclave_exception exception; + + /* Pad the entire struct to 256 bytes. */ + __u8 pad[256 - 32]; + }; +}; + +/** + * typedef vdso_sgx_enter_enclave_t - Prototype for __vdso_sgx_enter_enclave(), + * a vDSO function to enter an SGX enclave. + * + * @rdi: Pass-through value for RDI + * @rsi: Pass-through value for RSI + * @rdx: Pass-through value for RDX + * @leaf: ENCLU leaf, must be EENTER or ERESUME + * @r8: Pass-through value for R8 + * @r9: Pass-through value for R9 + * @r: struct sgx_enclave_run, must be non-NULL + * + * NOTE: __vdso_sgx_enter_enclave() does not ensure full compliance with the + * x86-64 ABI, e.g. doesn't handle XSAVE state. Except for non-volatile + * general purpose registers, EFLAGS.DF, and RSP alignment, preserving/setting + * state in accordance with the x86-64 ABI is the responsibility of the enclave + * and its runtime, i.e. __vdso_sgx_enter_enclave() cannot be called from C + * code without careful consideration by both the enclave and its runtime. + * + * All general purpose registers except RAX, RBX and RCX are passed as-is to + * the enclave. RAX, RBX and RCX are consumed by EENTER and ERESUME and are + * loaded with @leaf, asynchronous exit pointer, and @tcs respectively. + * + * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the + * pre-enclave state, e.g. to retrieve @e and @handler after an enclave exit. + * All other registers are available for use by the enclave and its runtime, + * e.g. an enclave can push additional data onto the stack (and modify RSP) to + * pass information to the optional exit handler (see below). + * + * Most exceptions reported on ENCLU, including those that occur within the + * enclave, are fixed up and reported synchronously instead of being delivered + * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are + * never fixed up and are always delivered via standard signals. On synchrously + * reported exceptions, -EFAULT is returned and details about the exception are + * recorded in @e, the optional sgx_enclave_exception struct. + * + * If an exit handler is provided, the handler will be invoked on synchronous + * exits from the enclave and for all synchronously reported exceptions. In + * latter case, @e is filled prior to invoking the handler. + * + * The exit handler's return value is interpreted as follows: + * >0: continue, restart __vdso_sgx_enter_enclave() with @ret as @leaf + * 0: success, return @ret to the caller + * <0: error, return @ret to the caller + * + * The exit handler may transfer control, e.g. via longjmp() or C++ exception, + * without returning to __vdso_sgx_enter_enclave(). + * + * Return: + * 0 on success (ENCLU reached), + * -EINVAL if ENCLU leaf is not allowed, + * -errno for all other negative values returned by the userspace exit handler + */ +typedef int (*vdso_sgx_enter_enclave_t)(unsigned long rdi, unsigned long rsi, + unsigned long rdx, unsigned int leaf, + unsigned long r8, unsigned long r9, + struct sgx_enclave_run *r); + #endif /* _UAPI_ASM_X86_SGX_H */