diff mbox series

[v28,21/22] x86/vdso: Implement a vDSO for Intel SGX enclave call

Message ID 20200303233609.713348-22-jarkko.sakkinen@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Intel SGX foundations | expand

Commit Message

Jarkko Sakkinen March 3, 2020, 11:36 p.m. UTC
From: Sean Christopherson <sean.j.christopherson@intel.com>

An SGX runtime must be aware of the exceptions, which happen inside an
enclave. Introduce a vDSO call that wraps EENTER/ERESUME cycle and returns
the CPU exception back to the caller exactly when it happens.

Kernel fixups the exception information to RDI, RSI and RDX. The SGX call
vDSO handler fills this information to the user provided buffer or
alternatively trigger user provided callback at the time of the exception.

The calling convention is custom and does not follow System V x86-64 ABI.

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Co-developed-by: Cedric Xing <cedric.xing@intel.com>
Signed-off-by: Cedric Xing <cedric.xing@intel.com>
Tested-by: Jethro Beekman <jethro@fortanix.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 arch/x86/entry/vdso/Makefile             |   2 +
 arch/x86/entry/vdso/vdso.lds.S           |   1 +
 arch/x86/entry/vdso/vsgx_enter_enclave.S | 187 +++++++++++++++++++++++
 arch/x86/include/uapi/asm/sgx.h          |  37 +++++
 4 files changed, 227 insertions(+)
 create mode 100644 arch/x86/entry/vdso/vsgx_enter_enclave.S

Comments

Nathaniel McCallum March 11, 2020, 5:30 p.m. UTC | #1
On Tue, Mar 3, 2020 at 6:40 PM Jarkko Sakkinen
<jarkko.sakkinen@linux.intel.com> wrote:
>
> From: Sean Christopherson <sean.j.christopherson@intel.com>
>
> An SGX runtime must be aware of the exceptions, which happen inside an
> enclave. Introduce a vDSO call that wraps EENTER/ERESUME cycle and returns
> the CPU exception back to the caller exactly when it happens.
>
> Kernel fixups the exception information to RDI, RSI and RDX. The SGX call
> vDSO handler fills this information to the user provided buffer or
> alternatively trigger user provided callback at the time of the exception.
>
> The calling convention is custom and does not follow System V x86-64 ABI.
>
> Suggested-by: Andy Lutomirski <luto@amacapital.net>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Co-developed-by: Cedric Xing <cedric.xing@intel.com>
> Signed-off-by: Cedric Xing <cedric.xing@intel.com>
> Tested-by: Jethro Beekman <jethro@fortanix.com>
> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> ---
>  arch/x86/entry/vdso/Makefile             |   2 +
>  arch/x86/entry/vdso/vdso.lds.S           |   1 +
>  arch/x86/entry/vdso/vsgx_enter_enclave.S | 187 +++++++++++++++++++++++
>  arch/x86/include/uapi/asm/sgx.h          |  37 +++++
>  4 files changed, 227 insertions(+)
>  create mode 100644 arch/x86/entry/vdso/vsgx_enter_enclave.S
>
> diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
> index 657e01d34d02..fa50c76a17a8 100644
> --- a/arch/x86/entry/vdso/Makefile
> +++ b/arch/x86/entry/vdso/Makefile
> @@ -24,6 +24,7 @@ VDSO32-$(CONFIG_IA32_EMULATION)       := y
>
>  # files to link into the vdso
>  vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
> +vobjs-$(VDSO64-y)              += vsgx_enter_enclave.o
>
>  # files to link into kernel
>  obj-y                          += vma.o extable.o
> @@ -90,6 +91,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
>  CFLAGS_REMOVE_vclock_gettime.o = -pg
>  CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
>  CFLAGS_REMOVE_vgetcpu.o = -pg
> +CFLAGS_REMOVE_vsgx_enter_enclave.o = -pg
>
>  #
>  # X32 processes use x32 vDSO to access 64bit kernel data.
> diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
> index 36b644e16272..4bf48462fca7 100644
> --- a/arch/x86/entry/vdso/vdso.lds.S
> +++ b/arch/x86/entry/vdso/vdso.lds.S
> @@ -27,6 +27,7 @@ VERSION {
>                 __vdso_time;
>                 clock_getres;
>                 __vdso_clock_getres;
> +               __vdso_sgx_enter_enclave;
>         local: *;
>         };
>  }
> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> new file mode 100644
> index 000000000000..94a8e5f99961
> --- /dev/null
> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> @@ -0,0 +1,187 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#include <linux/linkage.h>
> +#include <asm/export.h>
> +#include <asm/errno.h>
> +
> +#include "extable.h"
> +
> +#define EX_LEAF                0*8
> +#define EX_TRAPNR      0*8+4
> +#define EX_ERROR_CODE  0*8+6
> +#define EX_ADDRESS     1*8
> +
> +.code64
> +.section .text, "ax"
> +
> +/**
> + * __vdso_sgx_enter_enclave() - Enter an SGX enclave
> + * @leaf:      ENCLU leaf, must be EENTER or ERESUME
> + * @tcs:       TCS, must be non-NULL
> + * @e:         Optional struct sgx_enclave_exception instance
> + * @handler:   Optional enclave exit handler
> + *
> + * **Important!**  __vdso_sgx_enter_enclave() is **NOT** compliant with the
> + * x86-64 ABI, i.e. cannot be called from standard C code.
> + *
> + * Input ABI:
> + *  @leaf      %eax
> + *  @tcs       8(%rsp)
> + *  @e                 0x10(%rsp)
> + *  @handler   0x18(%rsp)
> + *
> + * Output ABI:
> + *  @ret       %eax
> + *
> + * All general purpose registers except RAX, RBX and RCX are passed as-is to
> + * the enclave. RAX, RBX and RCX are consumed by EENTER and ERESUME and are
> + * loaded with @leaf, asynchronous exit pointer, and @tcs respectively.
> + *
> + * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the
> + * pre-enclave state, e.g. to retrieve @e and @handler after an enclave exit.
> + * All other registers are available for use by the enclave and its runtime,
> + * e.g. an enclave can push additional data onto the stack (and modify RSP) to
> + * pass information to the optional exit handler (see below).
> + *
> + * Most exceptions reported on ENCLU, including those that occur within the
> + * enclave, are fixed up and reported synchronously instead of being delivered
> + * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are
> + * never fixed up and are always delivered via standard signals. On synchrously
> + * reported exceptions, -EFAULT is returned and details about the exception are
> + * recorded in @e, the optional sgx_enclave_exception struct.
> +
> + * If an exit handler is provided, the handler will be invoked on synchronous
> + * exits from the enclave and for all synchronously reported exceptions. In
> + * latter case, @e is filled prior to invoking the handler.
> + *
> + * The exit handler's return value is interpreted as follows:
> + *  >0:                continue, restart __vdso_sgx_enter_enclave() with @ret as @leaf
> + *   0:                success, return @ret to the caller
> + *  <0:                error, return @ret to the caller
> + *
> + * The userspace exit handler is responsible for unwinding the stack, e.g. to
> + * pop @e, u_rsp and @tcs, prior to returning to __vdso_sgx_enter_enclave().
> + * The exit handler may also transfer control, e.g. via longjmp() or a C++
> + * exception, without returning to __vdso_sgx_enter_enclave().
> + *
> + * Return:
> + *  0 on success,
> + *  -EINVAL if ENCLU leaf is not allowed,
> + *  -EFAULT if an exception occurs on ENCLU or within the enclave
> + *  -errno for all other negative values returned by the userspace exit handler
> + */
> +#ifdef SGX_KERNEL_DOC
> +/* C-style function prototype to coerce kernel-doc into parsing the comment. */
> +int __vdso_sgx_enter_enclave(int leaf, void *tcs,
> +                            struct sgx_enclave_exception *e,
> +                            sgx_enclave_exit_handler_t handler);
> +#endif
> +SYM_FUNC_START(__vdso_sgx_enter_enclave)

Currently, the selftest has a wrapper around
__vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
registers (CSRs), though it uses none of them. Then it calls this
function which uses %rbx but preserves none of the CSRs. Then it jumps
into an enclave which zeroes all these registers before returning.
Thus:

  1. wrapper saves all CSRs
  2. wrapper repositions stack arguments
  3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
  4. selftest zeros all CSRs
  5. wrapper loads all CSRs

I'd like to propose instead that the enclave be responsible for saving
and restoring CSRs. So instead of the above we have:
  1. __vdso_sgx_enter_enclave() saves %rbx
  2. enclave saves CSRs
  3. enclave loads CSRs
  4. __vdso_sgx_enter_enclave() loads %rbx

I know that lots of other stuff happens during enclave transitions,
but at the very least we could reduce the number of instructions
through this critical path.

> +       /* Prolog */
> +       .cfi_startproc
> +       push    %rbp
> +       .cfi_adjust_cfa_offset  8
> +       .cfi_rel_offset         %rbp, 0
> +       mov     %rsp, %rbp
> +       .cfi_def_cfa_register   %rbp
> +
> +.Lenter_enclave:
> +       /* EENTER <= leaf <= ERESUME */
> +       cmp     $0x2, %eax
> +       jb      .Linvalid_leaf
> +       cmp     $0x3, %eax
> +       ja      .Linvalid_leaf
> +
> +       /* Load TCS and AEP */
> +       mov     0x10(%rbp), %rbx
> +       lea     .Lasync_exit_pointer(%rip), %rcx
> +
> +       /* Single ENCLU serving as both EENTER and AEP (ERESUME) */
> +.Lasync_exit_pointer:
> +.Lenclu_eenter_eresume:
> +       enclu
> +
> +       /* EEXIT jumps here unless the enclave is doing something fancy. */
> +       xor     %eax, %eax
> +
> +       /* Invoke userspace's exit handler if one was provided. */
> +.Lhandle_exit:
> +       cmp     $0, 0x20(%rbp)
> +       jne     .Linvoke_userspace_handler
> +
> +.Lout:
> +       leave
> +       .cfi_def_cfa            %rsp, 8
> +       ret
> +
> +       /* The out-of-line code runs with the pre-leave stack frame. */
> +       .cfi_def_cfa            %rbp, 16
> +
> +.Linvalid_leaf:
> +       mov     $(-EINVAL), %eax
> +       jmp     .Lout
> +
> +.Lhandle_exception:
> +       mov     0x18(%rbp), %rcx
> +       test    %rcx, %rcx
> +       je      .Lskip_exception_info
> +
> +       /* Fill optional exception info. */
> +       mov     %eax, EX_LEAF(%rcx)
> +       mov     %di,  EX_TRAPNR(%rcx)
> +       mov     %si,  EX_ERROR_CODE(%rcx)
> +       mov     %rdx, EX_ADDRESS(%rcx)
> +.Lskip_exception_info:
> +       mov     $(-EFAULT), %eax
> +       jmp     .Lhandle_exit
> +
> +.Linvoke_userspace_handler:
> +       /* Pass the untrusted RSP (at exit) to the callback via %rcx. */
> +       mov     %rsp, %rcx
> +
> +       /* Save the untrusted RSP in %rbx (non-volatile register). */
> +       mov     %rsp, %rbx
> +
> +       /*
> +        * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned
> +        * _after_ pushing the parameters on the stack, hence the bonus push.
> +        */
> +       and     $-0x10, %rsp
> +       push    %rax
> +
> +       /* Push @e, the "return" value and @tcs as params to the callback. */
> +       push    0x18(%rbp)
> +       push    %rax
> +       push    0x10(%rbp)
> +
> +       /* Clear RFLAGS.DF per x86_64 ABI */
> +       cld
> +
> +       /* Load the callback pointer to %rax and invoke it via retpoline. */
> +       mov     0x20(%rbp), %rax
> +       call    .Lretpoline
> +
> +       /* Restore %rsp to its post-exit value. */
> +       mov     %rbx, %rsp
> +
> +       /*
> +        * If the return from callback is zero or negative, return immediately,
> +        * else re-execute ENCLU with the postive return value interpreted as
> +        * the requested ENCLU leaf.
> +        */
> +       cmp     $0, %eax
> +       jle     .Lout
> +       jmp     .Lenter_enclave
> +
> +.Lretpoline:
> +       call    2f
> +1:     pause
> +       lfence
> +       jmp     1b
> +2:     mov     %rax, (%rsp)
> +       ret
> +       .cfi_endproc
> +
> +_ASM_VDSO_EXTABLE_HANDLE(.Lenclu_eenter_eresume, .Lhandle_exception)
> +
> +SYM_FUNC_END(__vdso_sgx_enter_enclave)
> diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
> index 57d0d30c79b3..e196cfd44b70 100644
> --- a/arch/x86/include/uapi/asm/sgx.h
> +++ b/arch/x86/include/uapi/asm/sgx.h
> @@ -74,4 +74,41 @@ struct sgx_enclave_set_attribute {
>         __u64 attribute_fd;
>  };
>
> +/**
> + * struct sgx_enclave_exception - structure to report exceptions encountered in
> + *                               __vdso_sgx_enter_enclave()
> + *
> + * @leaf:      ENCLU leaf from \%eax at time of exception
> + * @trapnr:    exception trap number, a.k.a. fault vector
> + * @error_code:        exception error code
> + * @address:   exception address, e.g. CR2 on a #PF
> + * @reserved:  reserved for future use
> + */
> +struct sgx_enclave_exception {
> +       __u32 leaf;
> +       __u16 trapnr;
> +       __u16 error_code;
> +       __u64 address;
> +       __u64 reserved[2];
> +};
> +
> +/**
> + * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by
> + *                                     __vdso_sgx_enter_enclave()
> + *
> + * @rdi:       RDI at the time of enclave exit
> + * @rsi:       RSI at the time of enclave exit
> + * @rdx:       RDX at the time of enclave exit
> + * @ursp:      RSP at the time of enclave exit (untrusted stack)
> + * @r8:                R8 at the time of enclave exit
> + * @r9:                R9 at the time of enclave exit
> + * @tcs:       Thread Control Structure used to enter enclave
> + * @ret:       0 on success (EEXIT), -EFAULT on an exception
> + * @e:         Pointer to struct sgx_enclave_exception (as provided by caller)
> + */
> +typedef int (*sgx_enclave_exit_handler_t)(long rdi, long rsi, long rdx,
> +                                         long ursp, long r8, long r9,
> +                                         void *tcs, int ret,
> +                                         struct sgx_enclave_exception *e);
> +
>  #endif /* _UAPI_ASM_X86_SGX_H */
> --
> 2.25.0
>
Jethro Beekman March 11, 2020, 5:38 p.m. UTC | #2
On 2020-03-11 18:30, Nathaniel McCallum wrote:
> On Tue, Mar 3, 2020 at 6:40 PM Jarkko Sakkinen
> <jarkko.sakkinen@linux.intel.com> wrote:
>>
>> From: Sean Christopherson <sean.j.christopherson@intel.com>
>>
>> An SGX runtime must be aware of the exceptions, which happen inside an
>> enclave. Introduce a vDSO call that wraps EENTER/ERESUME cycle and returns
>> the CPU exception back to the caller exactly when it happens.
>>
>> Kernel fixups the exception information to RDI, RSI and RDX. The SGX call
>> vDSO handler fills this information to the user provided buffer or
>> alternatively trigger user provided callback at the time of the exception.
>>
>> The calling convention is custom and does not follow System V x86-64 ABI.
>>
>> Suggested-by: Andy Lutomirski <luto@amacapital.net>
>> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
>> Co-developed-by: Cedric Xing <cedric.xing@intel.com>
>> Signed-off-by: Cedric Xing <cedric.xing@intel.com>
>> Tested-by: Jethro Beekman <jethro@fortanix.com>
>> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
>> ---
>>  arch/x86/entry/vdso/Makefile             |   2 +
>>  arch/x86/entry/vdso/vdso.lds.S           |   1 +
>>  arch/x86/entry/vdso/vsgx_enter_enclave.S | 187 +++++++++++++++++++++++
>>  arch/x86/include/uapi/asm/sgx.h          |  37 +++++
>>  4 files changed, 227 insertions(+)
>>  create mode 100644 arch/x86/entry/vdso/vsgx_enter_enclave.S
>>
>> diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
>> index 657e01d34d02..fa50c76a17a8 100644
>> --- a/arch/x86/entry/vdso/Makefile
>> +++ b/arch/x86/entry/vdso/Makefile
>> @@ -24,6 +24,7 @@ VDSO32-$(CONFIG_IA32_EMULATION)       := y
>>
>>  # files to link into the vdso
>>  vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
>> +vobjs-$(VDSO64-y)              += vsgx_enter_enclave.o
>>
>>  # files to link into kernel
>>  obj-y                          += vma.o extable.o
>> @@ -90,6 +91,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
>>  CFLAGS_REMOVE_vclock_gettime.o = -pg
>>  CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
>>  CFLAGS_REMOVE_vgetcpu.o = -pg
>> +CFLAGS_REMOVE_vsgx_enter_enclave.o = -pg
>>
>>  #
>>  # X32 processes use x32 vDSO to access 64bit kernel data.
>> diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
>> index 36b644e16272..4bf48462fca7 100644
>> --- a/arch/x86/entry/vdso/vdso.lds.S
>> +++ b/arch/x86/entry/vdso/vdso.lds.S
>> @@ -27,6 +27,7 @@ VERSION {
>>                 __vdso_time;
>>                 clock_getres;
>>                 __vdso_clock_getres;
>> +               __vdso_sgx_enter_enclave;
>>         local: *;
>>         };
>>  }
>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
>> new file mode 100644
>> index 000000000000..94a8e5f99961
>> --- /dev/null
>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
>> @@ -0,0 +1,187 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +
>> +#include <linux/linkage.h>
>> +#include <asm/export.h>
>> +#include <asm/errno.h>
>> +
>> +#include "extable.h"
>> +
>> +#define EX_LEAF                0*8
>> +#define EX_TRAPNR      0*8+4
>> +#define EX_ERROR_CODE  0*8+6
>> +#define EX_ADDRESS     1*8
>> +
>> +.code64
>> +.section .text, "ax"
>> +
>> +/**
>> + * __vdso_sgx_enter_enclave() - Enter an SGX enclave
>> + * @leaf:      ENCLU leaf, must be EENTER or ERESUME
>> + * @tcs:       TCS, must be non-NULL
>> + * @e:         Optional struct sgx_enclave_exception instance
>> + * @handler:   Optional enclave exit handler
>> + *
>> + * **Important!**  __vdso_sgx_enter_enclave() is **NOT** compliant with the
>> + * x86-64 ABI, i.e. cannot be called from standard C code.
>> + *
>> + * Input ABI:
>> + *  @leaf      %eax
>> + *  @tcs       8(%rsp)
>> + *  @e                 0x10(%rsp)
>> + *  @handler   0x18(%rsp)
>> + *
>> + * Output ABI:
>> + *  @ret       %eax
>> + *
>> + * All general purpose registers except RAX, RBX and RCX are passed as-is to
>> + * the enclave. RAX, RBX and RCX are consumed by EENTER and ERESUME and are
>> + * loaded with @leaf, asynchronous exit pointer, and @tcs respectively.
>> + *
>> + * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the
>> + * pre-enclave state, e.g. to retrieve @e and @handler after an enclave exit.
>> + * All other registers are available for use by the enclave and its runtime,
>> + * e.g. an enclave can push additional data onto the stack (and modify RSP) to
>> + * pass information to the optional exit handler (see below).
>> + *
>> + * Most exceptions reported on ENCLU, including those that occur within the
>> + * enclave, are fixed up and reported synchronously instead of being delivered
>> + * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are
>> + * never fixed up and are always delivered via standard signals. On synchrously
>> + * reported exceptions, -EFAULT is returned and details about the exception are
>> + * recorded in @e, the optional sgx_enclave_exception struct.
>> +
>> + * If an exit handler is provided, the handler will be invoked on synchronous
>> + * exits from the enclave and for all synchronously reported exceptions. In
>> + * latter case, @e is filled prior to invoking the handler.
>> + *
>> + * The exit handler's return value is interpreted as follows:
>> + *  >0:                continue, restart __vdso_sgx_enter_enclave() with @ret as @leaf
>> + *   0:                success, return @ret to the caller
>> + *  <0:                error, return @ret to the caller
>> + *
>> + * The userspace exit handler is responsible for unwinding the stack, e.g. to
>> + * pop @e, u_rsp and @tcs, prior to returning to __vdso_sgx_enter_enclave().
>> + * The exit handler may also transfer control, e.g. via longjmp() or a C++
>> + * exception, without returning to __vdso_sgx_enter_enclave().
>> + *
>> + * Return:
>> + *  0 on success,
>> + *  -EINVAL if ENCLU leaf is not allowed,
>> + *  -EFAULT if an exception occurs on ENCLU or within the enclave
>> + *  -errno for all other negative values returned by the userspace exit handler
>> + */
>> +#ifdef SGX_KERNEL_DOC
>> +/* C-style function prototype to coerce kernel-doc into parsing the comment. */
>> +int __vdso_sgx_enter_enclave(int leaf, void *tcs,
>> +                            struct sgx_enclave_exception *e,
>> +                            sgx_enclave_exit_handler_t handler);
>> +#endif
>> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> 
> Currently, the selftest has a wrapper around
> __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
> registers (CSRs), though it uses none of them. Then it calls this
> function which uses %rbx but preserves none of the CSRs. Then it jumps
> into an enclave which zeroes all these registers before returning.
> Thus:
> 
>   1. wrapper saves all CSRs
>   2. wrapper repositions stack arguments
>   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
>   4. selftest zeros all CSRs
>   5. wrapper loads all CSRs
> 
> I'd like to propose instead that the enclave be responsible for saving
> and restoring CSRs. So instead of the above we have:
>   1. __vdso_sgx_enter_enclave() saves %rbx
>   2. enclave saves CSRs
>   3. enclave loads CSRs
>   4. __vdso_sgx_enter_enclave() loads %rbx
> 
> I know that lots of other stuff happens during enclave transitions,
> but at the very least we could reduce the number of instructions
> through this critical path.

The current calling convention for __vdso_sgx_enter_enclave has been carefully designed to mimic just calling ENCLU[EENTER] as closely as possible.

I'm not sure why you're referring to the selftest wrapper here, that doesn't seem relevant to me. 

--
Jethro Beekman | Fortanix
Nathaniel McCallum March 11, 2020, 7:15 p.m. UTC | #3
On Wed, Mar 11, 2020 at 1:38 PM Jethro Beekman <jethro@fortanix.com> wrote:
>
> On 2020-03-11 18:30, Nathaniel McCallum wrote:
> > On Tue, Mar 3, 2020 at 6:40 PM Jarkko Sakkinen
> > <jarkko.sakkinen@linux.intel.com> wrote:
> >>
> >> From: Sean Christopherson <sean.j.christopherson@intel.com>
> >>
> >> An SGX runtime must be aware of the exceptions, which happen inside an
> >> enclave. Introduce a vDSO call that wraps EENTER/ERESUME cycle and returns
> >> the CPU exception back to the caller exactly when it happens.
> >>
> >> Kernel fixups the exception information to RDI, RSI and RDX. The SGX call
> >> vDSO handler fills this information to the user provided buffer or
> >> alternatively trigger user provided callback at the time of the exception.
> >>
> >> The calling convention is custom and does not follow System V x86-64 ABI.
> >>
> >> Suggested-by: Andy Lutomirski <luto@amacapital.net>
> >> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> >> Co-developed-by: Cedric Xing <cedric.xing@intel.com>
> >> Signed-off-by: Cedric Xing <cedric.xing@intel.com>
> >> Tested-by: Jethro Beekman <jethro@fortanix.com>
> >> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> >> ---
> >>  arch/x86/entry/vdso/Makefile             |   2 +
> >>  arch/x86/entry/vdso/vdso.lds.S           |   1 +
> >>  arch/x86/entry/vdso/vsgx_enter_enclave.S | 187 +++++++++++++++++++++++
> >>  arch/x86/include/uapi/asm/sgx.h          |  37 +++++
> >>  4 files changed, 227 insertions(+)
> >>  create mode 100644 arch/x86/entry/vdso/vsgx_enter_enclave.S
> >>
> >> diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
> >> index 657e01d34d02..fa50c76a17a8 100644
> >> --- a/arch/x86/entry/vdso/Makefile
> >> +++ b/arch/x86/entry/vdso/Makefile
> >> @@ -24,6 +24,7 @@ VDSO32-$(CONFIG_IA32_EMULATION)       := y
> >>
> >>  # files to link into the vdso
> >>  vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
> >> +vobjs-$(VDSO64-y)              += vsgx_enter_enclave.o
> >>
> >>  # files to link into kernel
> >>  obj-y                          += vma.o extable.o
> >> @@ -90,6 +91,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
> >>  CFLAGS_REMOVE_vclock_gettime.o = -pg
> >>  CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
> >>  CFLAGS_REMOVE_vgetcpu.o = -pg
> >> +CFLAGS_REMOVE_vsgx_enter_enclave.o = -pg
> >>
> >>  #
> >>  # X32 processes use x32 vDSO to access 64bit kernel data.
> >> diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
> >> index 36b644e16272..4bf48462fca7 100644
> >> --- a/arch/x86/entry/vdso/vdso.lds.S
> >> +++ b/arch/x86/entry/vdso/vdso.lds.S
> >> @@ -27,6 +27,7 @@ VERSION {
> >>                 __vdso_time;
> >>                 clock_getres;
> >>                 __vdso_clock_getres;
> >> +               __vdso_sgx_enter_enclave;
> >>         local: *;
> >>         };
> >>  }
> >> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >> new file mode 100644
> >> index 000000000000..94a8e5f99961
> >> --- /dev/null
> >> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >> @@ -0,0 +1,187 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 */
> >> +
> >> +#include <linux/linkage.h>
> >> +#include <asm/export.h>
> >> +#include <asm/errno.h>
> >> +
> >> +#include "extable.h"
> >> +
> >> +#define EX_LEAF                0*8
> >> +#define EX_TRAPNR      0*8+4
> >> +#define EX_ERROR_CODE  0*8+6
> >> +#define EX_ADDRESS     1*8
> >> +
> >> +.code64
> >> +.section .text, "ax"
> >> +
> >> +/**
> >> + * __vdso_sgx_enter_enclave() - Enter an SGX enclave
> >> + * @leaf:      ENCLU leaf, must be EENTER or ERESUME
> >> + * @tcs:       TCS, must be non-NULL
> >> + * @e:         Optional struct sgx_enclave_exception instance
> >> + * @handler:   Optional enclave exit handler
> >> + *
> >> + * **Important!**  __vdso_sgx_enter_enclave() is **NOT** compliant with the
> >> + * x86-64 ABI, i.e. cannot be called from standard C code.
> >> + *
> >> + * Input ABI:
> >> + *  @leaf      %eax
> >> + *  @tcs       8(%rsp)
> >> + *  @e                 0x10(%rsp)
> >> + *  @handler   0x18(%rsp)
> >> + *
> >> + * Output ABI:
> >> + *  @ret       %eax
> >> + *
> >> + * All general purpose registers except RAX, RBX and RCX are passed as-is to
> >> + * the enclave. RAX, RBX and RCX are consumed by EENTER and ERESUME and are
> >> + * loaded with @leaf, asynchronous exit pointer, and @tcs respectively.
> >> + *
> >> + * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the
> >> + * pre-enclave state, e.g. to retrieve @e and @handler after an enclave exit.
> >> + * All other registers are available for use by the enclave and its runtime,
> >> + * e.g. an enclave can push additional data onto the stack (and modify RSP) to
> >> + * pass information to the optional exit handler (see below).
> >> + *
> >> + * Most exceptions reported on ENCLU, including those that occur within the
> >> + * enclave, are fixed up and reported synchronously instead of being delivered
> >> + * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are
> >> + * never fixed up and are always delivered via standard signals. On synchrously
> >> + * reported exceptions, -EFAULT is returned and details about the exception are
> >> + * recorded in @e, the optional sgx_enclave_exception struct.
> >> +
> >> + * If an exit handler is provided, the handler will be invoked on synchronous
> >> + * exits from the enclave and for all synchronously reported exceptions. In
> >> + * latter case, @e is filled prior to invoking the handler.
> >> + *
> >> + * The exit handler's return value is interpreted as follows:
> >> + *  >0:                continue, restart __vdso_sgx_enter_enclave() with @ret as @leaf
> >> + *   0:                success, return @ret to the caller
> >> + *  <0:                error, return @ret to the caller
> >> + *
> >> + * The userspace exit handler is responsible for unwinding the stack, e.g. to
> >> + * pop @e, u_rsp and @tcs, prior to returning to __vdso_sgx_enter_enclave().
> >> + * The exit handler may also transfer control, e.g. via longjmp() or a C++
> >> + * exception, without returning to __vdso_sgx_enter_enclave().
> >> + *
> >> + * Return:
> >> + *  0 on success,
> >> + *  -EINVAL if ENCLU leaf is not allowed,
> >> + *  -EFAULT if an exception occurs on ENCLU or within the enclave
> >> + *  -errno for all other negative values returned by the userspace exit handler
> >> + */
> >> +#ifdef SGX_KERNEL_DOC
> >> +/* C-style function prototype to coerce kernel-doc into parsing the comment. */
> >> +int __vdso_sgx_enter_enclave(int leaf, void *tcs,
> >> +                            struct sgx_enclave_exception *e,
> >> +                            sgx_enclave_exit_handler_t handler);
> >> +#endif
> >> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> >
> > Currently, the selftest has a wrapper around
> > __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
> > registers (CSRs), though it uses none of them. Then it calls this
> > function which uses %rbx but preserves none of the CSRs. Then it jumps
> > into an enclave which zeroes all these registers before returning.
> > Thus:
> >
> >   1. wrapper saves all CSRs
> >   2. wrapper repositions stack arguments
> >   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
> >   4. selftest zeros all CSRs
> >   5. wrapper loads all CSRs
> >
> > I'd like to propose instead that the enclave be responsible for saving
> > and restoring CSRs. So instead of the above we have:
> >   1. __vdso_sgx_enter_enclave() saves %rbx
> >   2. enclave saves CSRs
> >   3. enclave loads CSRs
> >   4. __vdso_sgx_enter_enclave() loads %rbx
> >
> > I know that lots of other stuff happens during enclave transitions,
> > but at the very least we could reduce the number of instructions
> > through this critical path.
>
> The current calling convention for __vdso_sgx_enter_enclave has been carefully designed to mimic just calling ENCLU[EENTER] as closely as possible.

That seems like a reasonable contract. Thanks!
Nathaniel McCallum March 11, 2020, 7:30 p.m. UTC | #4
On Tue, Mar 3, 2020 at 6:40 PM Jarkko Sakkinen
<jarkko.sakkinen@linux.intel.com> wrote:
>
> From: Sean Christopherson <sean.j.christopherson@intel.com>
>
> An SGX runtime must be aware of the exceptions, which happen inside an
> enclave. Introduce a vDSO call that wraps EENTER/ERESUME cycle and returns
> the CPU exception back to the caller exactly when it happens.
>
> Kernel fixups the exception information to RDI, RSI and RDX. The SGX call
> vDSO handler fills this information to the user provided buffer or
> alternatively trigger user provided callback at the time of the exception.
>
> The calling convention is custom and does not follow System V x86-64 ABI.
>
> Suggested-by: Andy Lutomirski <luto@amacapital.net>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Co-developed-by: Cedric Xing <cedric.xing@intel.com>
> Signed-off-by: Cedric Xing <cedric.xing@intel.com>
> Tested-by: Jethro Beekman <jethro@fortanix.com>
> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> ---
>  arch/x86/entry/vdso/Makefile             |   2 +
>  arch/x86/entry/vdso/vdso.lds.S           |   1 +
>  arch/x86/entry/vdso/vsgx_enter_enclave.S | 187 +++++++++++++++++++++++
>  arch/x86/include/uapi/asm/sgx.h          |  37 +++++
>  4 files changed, 227 insertions(+)
>  create mode 100644 arch/x86/entry/vdso/vsgx_enter_enclave.S
>
> diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
> index 657e01d34d02..fa50c76a17a8 100644
> --- a/arch/x86/entry/vdso/Makefile
> +++ b/arch/x86/entry/vdso/Makefile
> @@ -24,6 +24,7 @@ VDSO32-$(CONFIG_IA32_EMULATION)       := y
>
>  # files to link into the vdso
>  vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
> +vobjs-$(VDSO64-y)              += vsgx_enter_enclave.o
>
>  # files to link into kernel
>  obj-y                          += vma.o extable.o
> @@ -90,6 +91,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
>  CFLAGS_REMOVE_vclock_gettime.o = -pg
>  CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
>  CFLAGS_REMOVE_vgetcpu.o = -pg
> +CFLAGS_REMOVE_vsgx_enter_enclave.o = -pg
>
>  #
>  # X32 processes use x32 vDSO to access 64bit kernel data.
> diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
> index 36b644e16272..4bf48462fca7 100644
> --- a/arch/x86/entry/vdso/vdso.lds.S
> +++ b/arch/x86/entry/vdso/vdso.lds.S
> @@ -27,6 +27,7 @@ VERSION {
>                 __vdso_time;
>                 clock_getres;
>                 __vdso_clock_getres;
> +               __vdso_sgx_enter_enclave;
>         local: *;
>         };
>  }
> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> new file mode 100644
> index 000000000000..94a8e5f99961
> --- /dev/null
> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> @@ -0,0 +1,187 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#include <linux/linkage.h>
> +#include <asm/export.h>
> +#include <asm/errno.h>
> +
> +#include "extable.h"
> +
> +#define EX_LEAF                0*8
> +#define EX_TRAPNR      0*8+4
> +#define EX_ERROR_CODE  0*8+6
> +#define EX_ADDRESS     1*8
> +
> +.code64
> +.section .text, "ax"
> +
> +/**
> + * __vdso_sgx_enter_enclave() - Enter an SGX enclave
> + * @leaf:      ENCLU leaf, must be EENTER or ERESUME
> + * @tcs:       TCS, must be non-NULL
> + * @e:         Optional struct sgx_enclave_exception instance
> + * @handler:   Optional enclave exit handler
> + *
> + * **Important!**  __vdso_sgx_enter_enclave() is **NOT** compliant with the
> + * x86-64 ABI, i.e. cannot be called from standard C code.
> + *
> + * Input ABI:
> + *  @leaf      %eax
> + *  @tcs       8(%rsp)
> + *  @e                 0x10(%rsp)
> + *  @handler   0x18(%rsp)
> + *
> + * Output ABI:
> + *  @ret       %eax
> + *
> + * All general purpose registers except RAX, RBX and RCX are passed as-is to
> + * the enclave. RAX, RBX and RCX are consumed by EENTER and ERESUME and are
> + * loaded with @leaf, asynchronous exit pointer, and @tcs respectively.
> + *
> + * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the
> + * pre-enclave state, e.g. to retrieve @e and @handler after an enclave exit.
> + * All other registers are available for use by the enclave and its runtime,
> + * e.g. an enclave can push additional data onto the stack (and modify RSP) to
> + * pass information to the optional exit handler (see below).
> + *
> + * Most exceptions reported on ENCLU, including those that occur within the
> + * enclave, are fixed up and reported synchronously instead of being delivered
> + * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are
> + * never fixed up and are always delivered via standard signals. On synchrously
> + * reported exceptions, -EFAULT is returned and details about the exception are
> + * recorded in @e, the optional sgx_enclave_exception struct.
> +
> + * If an exit handler is provided, the handler will be invoked on synchronous
> + * exits from the enclave and for all synchronously reported exceptions. In
> + * latter case, @e is filled prior to invoking the handler.
> + *
> + * The exit handler's return value is interpreted as follows:
> + *  >0:                continue, restart __vdso_sgx_enter_enclave() with @ret as @leaf
> + *   0:                success, return @ret to the caller
> + *  <0:                error, return @ret to the caller
> + *
> + * The userspace exit handler is responsible for unwinding the stack, e.g. to
> + * pop @e, u_rsp and @tcs, prior to returning to __vdso_sgx_enter_enclave().

Unless I misunderstand, this documentation...

> + * The exit handler may also transfer control, e.g. via longjmp() or a C++
> + * exception, without returning to __vdso_sgx_enter_enclave().
> + *
> + * Return:
> + *  0 on success,
> + *  -EINVAL if ENCLU leaf is not allowed,
> + *  -EFAULT if an exception occurs on ENCLU or within the enclave
> + *  -errno for all other negative values returned by the userspace exit handler
> + */
> +#ifdef SGX_KERNEL_DOC
> +/* C-style function prototype to coerce kernel-doc into parsing the comment. */
> +int __vdso_sgx_enter_enclave(int leaf, void *tcs,
> +                            struct sgx_enclave_exception *e,
> +                            sgx_enclave_exit_handler_t handler);
> +#endif
> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> +       /* Prolog */
> +       .cfi_startproc
> +       push    %rbp
> +       .cfi_adjust_cfa_offset  8
> +       .cfi_rel_offset         %rbp, 0
> +       mov     %rsp, %rbp
> +       .cfi_def_cfa_register   %rbp
> +
> +.Lenter_enclave:
> +       /* EENTER <= leaf <= ERESUME */
> +       cmp     $0x2, %eax
> +       jb      .Linvalid_leaf
> +       cmp     $0x3, %eax
> +       ja      .Linvalid_leaf
> +
> +       /* Load TCS and AEP */
> +       mov     0x10(%rbp), %rbx
> +       lea     .Lasync_exit_pointer(%rip), %rcx
> +
> +       /* Single ENCLU serving as both EENTER and AEP (ERESUME) */
> +.Lasync_exit_pointer:
> +.Lenclu_eenter_eresume:
> +       enclu
> +
> +       /* EEXIT jumps here unless the enclave is doing something fancy. */
> +       xor     %eax, %eax
> +
> +       /* Invoke userspace's exit handler if one was provided. */
> +.Lhandle_exit:
> +       cmp     $0, 0x20(%rbp)
> +       jne     .Linvoke_userspace_handler
> +
> +.Lout:
> +       leave
> +       .cfi_def_cfa            %rsp, 8
> +       ret
> +
> +       /* The out-of-line code runs with the pre-leave stack frame. */
> +       .cfi_def_cfa            %rbp, 16
> +
> +.Linvalid_leaf:
> +       mov     $(-EINVAL), %eax
> +       jmp     .Lout
> +
> +.Lhandle_exception:
> +       mov     0x18(%rbp), %rcx
> +       test    %rcx, %rcx
> +       je      .Lskip_exception_info
> +
> +       /* Fill optional exception info. */
> +       mov     %eax, EX_LEAF(%rcx)
> +       mov     %di,  EX_TRAPNR(%rcx)
> +       mov     %si,  EX_ERROR_CODE(%rcx)
> +       mov     %rdx, EX_ADDRESS(%rcx)
> +.Lskip_exception_info:
> +       mov     $(-EFAULT), %eax
> +       jmp     .Lhandle_exit
> +
> +.Linvoke_userspace_handler:
> +       /* Pass the untrusted RSP (at exit) to the callback via %rcx. */
> +       mov     %rsp, %rcx
> +
> +       /* Save the untrusted RSP in %rbx (non-volatile register). */
> +       mov     %rsp, %rbx
> +
> +       /*
> +        * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned
> +        * _after_ pushing the parameters on the stack, hence the bonus push.
> +        */
> +       and     $-0x10, %rsp
> +       push    %rax
> +
> +       /* Push @e, the "return" value and @tcs as params to the callback. */
> +       push    0x18(%rbp)
> +       push    %rax
> +       push    0x10(%rbp)
> +
> +       /* Clear RFLAGS.DF per x86_64 ABI */
> +       cld
> +
> +       /* Load the callback pointer to %rax and invoke it via retpoline. */
> +       mov     0x20(%rbp), %rax
> +       call    .Lretpoline
> +
> +       /* Restore %rsp to its post-exit value. */
> +       mov     %rbx, %rsp

... doesn't seem to match this code.

If the handler pops from the stack and then we restore the stack here,
the handler had no effect.

Also, one difference between this interface and a raw ENCLU[EENTER] is
that we can't pass arguments on the untrusted stack during EEXIT. If
we want to support that workflow, then we need to allow the ability
for the handler to pop "additional" values without restoring the stack
pointer to the exact value here.

> +       /*
> +        * If the return from callback is zero or negative, return immediately,
> +        * else re-execute ENCLU with the postive return value interpreted as
> +        * the requested ENCLU leaf.
> +        */
> +       cmp     $0, %eax
> +       jle     .Lout
> +       jmp     .Lenter_enclave
> +
> +.Lretpoline:
> +       call    2f
> +1:     pause
> +       lfence
> +       jmp     1b
> +2:     mov     %rax, (%rsp)
> +       ret
> +       .cfi_endproc
> +
> +_ASM_VDSO_EXTABLE_HANDLE(.Lenclu_eenter_eresume, .Lhandle_exception)
> +
> +SYM_FUNC_END(__vdso_sgx_enter_enclave)
> diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
> index 57d0d30c79b3..e196cfd44b70 100644
> --- a/arch/x86/include/uapi/asm/sgx.h
> +++ b/arch/x86/include/uapi/asm/sgx.h
> @@ -74,4 +74,41 @@ struct sgx_enclave_set_attribute {
>         __u64 attribute_fd;
>  };
>
> +/**
> + * struct sgx_enclave_exception - structure to report exceptions encountered in
> + *                               __vdso_sgx_enter_enclave()
> + *
> + * @leaf:      ENCLU leaf from \%eax at time of exception
> + * @trapnr:    exception trap number, a.k.a. fault vector
> + * @error_code:        exception error code
> + * @address:   exception address, e.g. CR2 on a #PF
> + * @reserved:  reserved for future use
> + */
> +struct sgx_enclave_exception {
> +       __u32 leaf;
> +       __u16 trapnr;
> +       __u16 error_code;
> +       __u64 address;
> +       __u64 reserved[2];
> +};
> +
> +/**
> + * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by
> + *                                     __vdso_sgx_enter_enclave()
> + *
> + * @rdi:       RDI at the time of enclave exit
> + * @rsi:       RSI at the time of enclave exit
> + * @rdx:       RDX at the time of enclave exit
> + * @ursp:      RSP at the time of enclave exit (untrusted stack)
> + * @r8:                R8 at the time of enclave exit
> + * @r9:                R9 at the time of enclave exit
> + * @tcs:       Thread Control Structure used to enter enclave
> + * @ret:       0 on success (EEXIT), -EFAULT on an exception
> + * @e:         Pointer to struct sgx_enclave_exception (as provided by caller)
> + */
> +typedef int (*sgx_enclave_exit_handler_t)(long rdi, long rsi, long rdx,
> +                                         long ursp, long r8, long r9,
> +                                         void *tcs, int ret,
> +                                         struct sgx_enclave_exception *e);
> +
>  #endif /* _UAPI_ASM_X86_SGX_H */
> --
> 2.25.0
>
Sean Christopherson March 13, 2020, 12:52 a.m. UTC | #5
On Wed, Mar 11, 2020 at 03:30:44PM -0400, Nathaniel McCallum wrote:
> On Tue, Mar 3, 2020 at 6:40 PM Jarkko Sakkinen
> <jarkko.sakkinen@linux.intel.com> wrote:
> > + * The exit handler's return value is interpreted as follows:
> > + *  >0:                continue, restart __vdso_sgx_enter_enclave() with @ret as @leaf
> > + *   0:                success, return @ret to the caller
> > + *  <0:                error, return @ret to the caller
> > + *
> > + * The userspace exit handler is responsible for unwinding the stack, e.g. to
> > + * pop @e, u_rsp and @tcs, prior to returning to __vdso_sgx_enter_enclave().
> 
> Unless I misunderstand, this documentation...

Hrm, that does appear wrong.  I'm guessing that was leftover from a previous
incarnation of the code.  Or I botched the description, which is just as
likely.

> > + * The exit handler may also transfer control, e.g. via longjmp() or a C++
> > + * exception, without returning to __vdso_sgx_enter_enclave().
> > + *
> > + * Return:
> > + *  0 on success,
> > + *  -EINVAL if ENCLU leaf is not allowed,
> > + *  -EFAULT if an exception occurs on ENCLU or within the enclave
> > + *  -errno for all other negative values returned by the userspace exit handler
> > + */

...

> > +       /* Load the callback pointer to %rax and invoke it via retpoline. */
> > +       mov     0x20(%rbp), %rax
> > +       call    .Lretpoline
> > +
> > +       /* Restore %rsp to its post-exit value. */
> > +       mov     %rbx, %rsp
> 
> ... doesn't seem to match this code.
> 
> If the handler pops from the stack and then we restore the stack here,
> the handler had no effect.
> 
> Also, one difference between this interface and a raw ENCLU[EENTER] is
> that we can't pass arguments on the untrusted stack during EEXIT. If
> we want to support that workflow, then we need to allow the ability
> for the handler to pop "additional" values without restoring the stack
> pointer to the exact value here.

> Also, one difference between this interface and a raw ENCLU[EENTER] is
> that we can't pass arguments on the untrusted stack during EEXIT. If
> we want to support that workflow, then we need to allow the ability
> for the handler to pop "additional" values without restoring the stack
> pointer to the exact value here.

The callback shenanigans exist precisely to allow passing arguments on the
untrusted stack.  The vDSO is very careful to preserve the stack memory
above RSP, and to snapshot RSP at the time of exit, e.g. the arguments in
memory and their addresses relative to u_rsp live across EEXIT.  It's the
same basic concept as regular function calls, e.g. the callee doesn't pop
params off the stack, it just knows what addresses relative to RSP hold
the data it wants.  The enclave, being the caller, is responsible for
cleaning up u_rsp.

FWIW, if the handler reaaaly wanted to pop off the stack, it could do so,
fixup the stack, and then re-call __vdso_sgx_enter_enclave() instead of
returning (to the original __vdso_sgx_enter_enclave()).

> > +       /*
> > +        * If the return from callback is zero or negative, return immediately,
> > +        * else re-execute ENCLU with the postive return value interpreted as
> > +        * the requested ENCLU leaf.
> > +        */
> > +       cmp     $0, %eax
> > +       jle     .Lout
> > +       jmp     .Lenter_enclave
> > +
> > +.Lretpoline:
> > +       call    2f
> > +1:     pause
> > +       lfence
> > +       jmp     1b
> > +2:     mov     %rax, (%rsp)
> > +       ret
> > +       .cfi_endproc
> > +
> > +_ASM_VDSO_EXTABLE_HANDLE(.Lenclu_eenter_eresume, .Lhandle_exception)
Nathaniel McCallum March 13, 2020, 3:48 p.m. UTC | #6
On Wed, Mar 11, 2020 at 1:38 PM Jethro Beekman <jethro@fortanix.com> wrote:
>
> On 2020-03-11 18:30, Nathaniel McCallum wrote:
> > On Tue, Mar 3, 2020 at 6:40 PM Jarkko Sakkinen
> > <jarkko.sakkinen@linux.intel.com> wrote:
> >>
> >> From: Sean Christopherson <sean.j.christopherson@intel.com>
> >>
> >> An SGX runtime must be aware of the exceptions, which happen inside an
> >> enclave. Introduce a vDSO call that wraps EENTER/ERESUME cycle and returns
> >> the CPU exception back to the caller exactly when it happens.
> >>
> >> Kernel fixups the exception information to RDI, RSI and RDX. The SGX call
> >> vDSO handler fills this information to the user provided buffer or
> >> alternatively trigger user provided callback at the time of the exception.
> >>
> >> The calling convention is custom and does not follow System V x86-64 ABI.
> >>
> >> Suggested-by: Andy Lutomirski <luto@amacapital.net>
> >> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> >> Co-developed-by: Cedric Xing <cedric.xing@intel.com>
> >> Signed-off-by: Cedric Xing <cedric.xing@intel.com>
> >> Tested-by: Jethro Beekman <jethro@fortanix.com>
> >> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> >> ---
> >>  arch/x86/entry/vdso/Makefile             |   2 +
> >>  arch/x86/entry/vdso/vdso.lds.S           |   1 +
> >>  arch/x86/entry/vdso/vsgx_enter_enclave.S | 187 +++++++++++++++++++++++
> >>  arch/x86/include/uapi/asm/sgx.h          |  37 +++++
> >>  4 files changed, 227 insertions(+)
> >>  create mode 100644 arch/x86/entry/vdso/vsgx_enter_enclave.S
> >>
> >> diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
> >> index 657e01d34d02..fa50c76a17a8 100644
> >> --- a/arch/x86/entry/vdso/Makefile
> >> +++ b/arch/x86/entry/vdso/Makefile
> >> @@ -24,6 +24,7 @@ VDSO32-$(CONFIG_IA32_EMULATION)       := y
> >>
> >>  # files to link into the vdso
> >>  vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
> >> +vobjs-$(VDSO64-y)              += vsgx_enter_enclave.o
> >>
> >>  # files to link into kernel
> >>  obj-y                          += vma.o extable.o
> >> @@ -90,6 +91,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
> >>  CFLAGS_REMOVE_vclock_gettime.o = -pg
> >>  CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
> >>  CFLAGS_REMOVE_vgetcpu.o = -pg
> >> +CFLAGS_REMOVE_vsgx_enter_enclave.o = -pg
> >>
> >>  #
> >>  # X32 processes use x32 vDSO to access 64bit kernel data.
> >> diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
> >> index 36b644e16272..4bf48462fca7 100644
> >> --- a/arch/x86/entry/vdso/vdso.lds.S
> >> +++ b/arch/x86/entry/vdso/vdso.lds.S
> >> @@ -27,6 +27,7 @@ VERSION {
> >>                 __vdso_time;
> >>                 clock_getres;
> >>                 __vdso_clock_getres;
> >> +               __vdso_sgx_enter_enclave;
> >>         local: *;
> >>         };
> >>  }
> >> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >> new file mode 100644
> >> index 000000000000..94a8e5f99961
> >> --- /dev/null
> >> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >> @@ -0,0 +1,187 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 */
> >> +
> >> +#include <linux/linkage.h>
> >> +#include <asm/export.h>
> >> +#include <asm/errno.h>
> >> +
> >> +#include "extable.h"
> >> +
> >> +#define EX_LEAF                0*8
> >> +#define EX_TRAPNR      0*8+4
> >> +#define EX_ERROR_CODE  0*8+6
> >> +#define EX_ADDRESS     1*8
> >> +
> >> +.code64
> >> +.section .text, "ax"
> >> +
> >> +/**
> >> + * __vdso_sgx_enter_enclave() - Enter an SGX enclave
> >> + * @leaf:      ENCLU leaf, must be EENTER or ERESUME
> >> + * @tcs:       TCS, must be non-NULL
> >> + * @e:         Optional struct sgx_enclave_exception instance
> >> + * @handler:   Optional enclave exit handler
> >> + *
> >> + * **Important!**  __vdso_sgx_enter_enclave() is **NOT** compliant with the
> >> + * x86-64 ABI, i.e. cannot be called from standard C code.
> >> + *
> >> + * Input ABI:
> >> + *  @leaf      %eax
> >> + *  @tcs       8(%rsp)
> >> + *  @e                 0x10(%rsp)
> >> + *  @handler   0x18(%rsp)
> >> + *
> >> + * Output ABI:
> >> + *  @ret       %eax
> >> + *
> >> + * All general purpose registers except RAX, RBX and RCX are passed as-is to
> >> + * the enclave. RAX, RBX and RCX are consumed by EENTER and ERESUME and are
> >> + * loaded with @leaf, asynchronous exit pointer, and @tcs respectively.
> >> + *
> >> + * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the
> >> + * pre-enclave state, e.g. to retrieve @e and @handler after an enclave exit.
> >> + * All other registers are available for use by the enclave and its runtime,
> >> + * e.g. an enclave can push additional data onto the stack (and modify RSP) to
> >> + * pass information to the optional exit handler (see below).
> >> + *
> >> + * Most exceptions reported on ENCLU, including those that occur within the
> >> + * enclave, are fixed up and reported synchronously instead of being delivered
> >> + * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are
> >> + * never fixed up and are always delivered via standard signals. On synchrously
> >> + * reported exceptions, -EFAULT is returned and details about the exception are
> >> + * recorded in @e, the optional sgx_enclave_exception struct.
> >> +
> >> + * If an exit handler is provided, the handler will be invoked on synchronous
> >> + * exits from the enclave and for all synchronously reported exceptions. In
> >> + * latter case, @e is filled prior to invoking the handler.
> >> + *
> >> + * The exit handler's return value is interpreted as follows:
> >> + *  >0:                continue, restart __vdso_sgx_enter_enclave() with @ret as @leaf
> >> + *   0:                success, return @ret to the caller
> >> + *  <0:                error, return @ret to the caller
> >> + *
> >> + * The userspace exit handler is responsible for unwinding the stack, e.g. to
> >> + * pop @e, u_rsp and @tcs, prior to returning to __vdso_sgx_enter_enclave().
> >> + * The exit handler may also transfer control, e.g. via longjmp() or a C++
> >> + * exception, without returning to __vdso_sgx_enter_enclave().
> >> + *
> >> + * Return:
> >> + *  0 on success,
> >> + *  -EINVAL if ENCLU leaf is not allowed,
> >> + *  -EFAULT if an exception occurs on ENCLU or within the enclave
> >> + *  -errno for all other negative values returned by the userspace exit handler
> >> + */
> >> +#ifdef SGX_KERNEL_DOC
> >> +/* C-style function prototype to coerce kernel-doc into parsing the comment. */
> >> +int __vdso_sgx_enter_enclave(int leaf, void *tcs,
> >> +                            struct sgx_enclave_exception *e,
> >> +                            sgx_enclave_exit_handler_t handler);
> >> +#endif
> >> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> >
> > Currently, the selftest has a wrapper around
> > __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
> > registers (CSRs), though it uses none of them. Then it calls this
> > function which uses %rbx but preserves none of the CSRs. Then it jumps
> > into an enclave which zeroes all these registers before returning.
> > Thus:
> >
> >   1. wrapper saves all CSRs
> >   2. wrapper repositions stack arguments
> >   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
> >   4. selftest zeros all CSRs
> >   5. wrapper loads all CSRs
> >
> > I'd like to propose instead that the enclave be responsible for saving
> > and restoring CSRs. So instead of the above we have:
> >   1. __vdso_sgx_enter_enclave() saves %rbx
> >   2. enclave saves CSRs
> >   3. enclave loads CSRs
> >   4. __vdso_sgx_enter_enclave() loads %rbx
> >
> > I know that lots of other stuff happens during enclave transitions,
> > but at the very least we could reduce the number of instructions
> > through this critical path.
>()
> The current calling convention for __vdso_sgx_enter_enclave has been carefully designed to mimic just calling ENCLU[EENTER] as closely as possible.
>
> I'm not sure why you're referring to the selftest wrapper here, that doesn't seem relevant to me.

Thinking about this more carefully, I still think that at least part
of my critique still stands.

__vdso_sgx_enter_enclave() doesn't use the x86-64 ABI. This means that
there will always be an assembly wrapper for
__vdso_sgx_enter_enclave(). But because __vdso_sgx_enter_enclave()
doesn't save %rbx, the wrapper is forced to in order to be called from
C.

A common pattern for the wrapper will be to do something like this:

# void enter_enclave(rdi, rsi, rdx, unused, r8, r9, @tcs, @e,
@handler, @leaf, @vdso)
enter_enclave:
    push %rbx
    push $0 /* align */
    push 0x48(%rsp)
    push 0x48(%rsp)
    push 0x48(%rsp)

    mov 0x70(%rsp), %eax
    call *0x68(%rsp)

    add $0x20, %rsp
    pop %rbx
    ret

Because __vdso_sgx_enter_enclave() doesn't preserve %rbx, the wrapper
is forced to reposition stack parameters in a performance-critical
path. On the other hand, if __vdso_sgx_enter_enclave() preserved %rbx,
you could implement the above as:

# void enter_enclave(rdi, rsi, rdx, unused, r8, r9, @tcs, @e,
@handler, @leaf, @vdso)
enter_enclave:
    mov 0x20(%rsp), %eax
    jmp *0x28(%rsp)

This also implies that if __vdso_sgx_enter_enclave() took @leaf as a
stack parameter and preserved %rbx, it would be x86-64 ABI compliant
enough to call from C if the enclave preserves all callee-saved
registers besides %rbx (Enarx does).

What are the downsides of this approach? It also doesn't harm the more
extended case when you need to use an assembly wrapper to setup
additional registers. This can still be done. It does imply an extra
push and mov instruction. But because there are currently an odd
number of stack function parameters, the push also removes an
alignment instruction where the stack is aligned before the call to
__vdso_sgx_enter_enclave() (likely). Further, the push and mov are
going to be performed by *someone* in order to call
__vdso_sgx_enter_enclave() from C.

Therefore, I'd like to propose that __vdso_sgx_enter_enclave():
  * Preserve %rbx.
  * Take the leaf as an additional stack parameter instead of passing
it in %rax.

Then C can call it without additional overhead. And people who need to
"extend" the C ABI can still do so.
Nathaniel McCallum March 13, 2020, 4:07 p.m. UTC | #7
On Thu, Mar 12, 2020 at 8:52 PM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Wed, Mar 11, 2020 at 03:30:44PM -0400, Nathaniel McCallum wrote:
> > On Tue, Mar 3, 2020 at 6:40 PM Jarkko Sakkinen
> > <jarkko.sakkinen@linux.intel.com> wrote:
> > > + * The exit handler's return value is interpreted as follows:
> > > + *  >0:                continue, restart __vdso_sgx_enter_enclave() with @ret as @leaf
> > > + *   0:                success, return @ret to the caller
> > > + *  <0:                error, return @ret to the caller
> > > + *
> > > + * The userspace exit handler is responsible for unwinding the stack, e.g. to
> > > + * pop @e, u_rsp and @tcs, prior to returning to __vdso_sgx_enter_enclave().
> >
> > Unless I misunderstand, this documentation...
>
> Hrm, that does appear wrong.  I'm guessing that was leftover from a previous
> incarnation of the code.  Or I botched the description, which is just as
> likely.

I figured out what happened on my end. This documentation error led me
to misread the code. More below.

> > > + * The exit handler may also transfer control, e.g. via longjmp() or a C++
> > > + * exception, without returning to __vdso_sgx_enter_enclave().
> > > + *
> > > + * Return:
> > > + *  0 on success,
> > > + *  -EINVAL if ENCLU leaf is not allowed,
> > > + *  -EFAULT if an exception occurs on ENCLU or within the enclave
> > > + *  -errno for all other negative values returned by the userspace exit handler
> > > + */
>
> ...
>
> > > +       /* Load the callback pointer to %rax and invoke it via retpoline. */
> > > +       mov     0x20(%rbp), %rax
> > > +       call    .Lretpoline
> > > +
> > > +       /* Restore %rsp to its post-exit value. */
> > > +       mov     %rbx, %rsp
> >
> > ... doesn't seem to match this code.
> >
> > If the handler pops from the stack and then we restore the stack here,
> > the handler had no effect.
> >
> > Also, one difference between this interface and a raw ENCLU[EENTER] is
> > that we can't pass arguments on the untrusted stack during EEXIT. If
> > we want to support that workflow, then we need to allow the ability
> > for the handler to pop "additional" values without restoring the stack
> > pointer to the exact value here.
>
> > Also, one difference between this interface and a raw ENCLU[EENTER] is
> > that we can't pass arguments on the untrusted stack during EEXIT. If
> > we want to support that workflow, then we need to allow the ability
> > for the handler to pop "additional" values without restoring the stack
> > pointer to the exact value here.
>
> The callback shenanigans exist precisely to allow passing arguments on the
> untrusted stack.  The vDSO is very careful to preserve the stack memory
> above RSP, and to snapshot RSP at the time of exit, e.g. the arguments in
> memory and their addresses relative to u_rsp live across EEXIT.  It's the
> same basic concept as regular function calls, e.g. the callee doesn't pop
> params off the stack, it just knows what addresses relative to RSP hold
> the data it wants.  The enclave, being the caller, is responsible for
> cleaning up u_rsp.
>
> FWIW, if the handler reaaaly wanted to pop off the stack, it could do so,
> fixup the stack, and then re-call __vdso_sgx_enter_enclave() instead of
> returning (to the original __vdso_sgx_enter_enclave()).

My understanding from the documentation issue above was that *if* you
wanted to push parameters back on the stack during enclave exit, you
would *have* to supply a handler so it could pop the parameters and
reset the stack. Which is why restoring %rsp from %rbx didn't make
sense to me.

Related to my other message in this thread, if
__vdso_sgx_enter_enclave() preserved %rbx and took @leaf as a stack
parameter, you could call __vdso_sgx_enter_enclave() from C so long as
the enclave didn't push return arguments on the stack. A workaround
for that case would be to fix up the stack in the handler. It would be
enough for the handler to simply set %rbx to the desired stack
location and return (though all of this unclean of course).

> > > +       /*
> > > +        * If the return from callback is zero or negative, return immediately,
> > > +        * else re-execute ENCLU with the postive return value interpreted as
> > > +        * the requested ENCLU leaf.
> > > +        */
> > > +       cmp     $0, %eax
> > > +       jle     .Lout
> > > +       jmp     .Lenter_enclave
> > > +
> > > +.Lretpoline:
> > > +       call    2f
> > > +1:     pause
> > > +       lfence
> > > +       jmp     1b
> > > +2:     mov     %rax, (%rsp)
> > > +       ret
> > > +       .cfi_endproc
> > > +
> > > +_ASM_VDSO_EXTABLE_HANDLE(.Lenclu_eenter_eresume, .Lhandle_exception)
>
Sean Christopherson March 13, 2020, 4:33 p.m. UTC | #8
On Fri, Mar 13, 2020 at 12:07:55PM -0400, Nathaniel McCallum wrote:
> On Thu, Mar 12, 2020 at 8:52 PM Sean Christopherson
> <sean.j.christopherson@intel.com> wrote:
> > FWIW, if the handler reaaaly wanted to pop off the stack, it could do so,
> > fixup the stack, and then re-call __vdso_sgx_enter_enclave() instead of
> > returning (to the original __vdso_sgx_enter_enclave()).
> 
> My understanding from the documentation issue above was that *if* you
> wanted to push parameters back on the stack during enclave exit, you
> would *have* to supply a handler so it could pop the parameters and
> reset the stack. Which is why restoring %rsp from %rbx didn't make
> sense to me.

Yep.
Sean Christopherson March 13, 2020, 4:46 p.m. UTC | #9
On Fri, Mar 13, 2020 at 11:48:54AM -0400, Nathaniel McCallum wrote:
> Thinking about this more carefully, I still think that at least part
> of my critique still stands.
> 
> __vdso_sgx_enter_enclave() doesn't use the x86-64 ABI. This means that
> there will always be an assembly wrapper for
> __vdso_sgx_enter_enclave(). But because __vdso_sgx_enter_enclave()
> doesn't save %rbx, the wrapper is forced to in order to be called from
> C.
> 
> A common pattern for the wrapper will be to do something like this:
> 
> # void enter_enclave(rdi, rsi, rdx, unused, r8, r9, @tcs, @e,
> @handler, @leaf, @vdso)
> enter_enclave:
>     push %rbx
>     push $0 /* align */
>     push 0x48(%rsp)
>     push 0x48(%rsp)
>     push 0x48(%rsp)
> 
>     mov 0x70(%rsp), %eax
>     call *0x68(%rsp)
> 
>     add $0x20, %rsp
>     pop %rbx
>     ret
> 
> Because __vdso_sgx_enter_enclave() doesn't preserve %rbx, the wrapper
> is forced to reposition stack parameters in a performance-critical
> path. On the other hand, if __vdso_sgx_enter_enclave() preserved %rbx,
> you could implement the above as:
> 
> # void enter_enclave(rdi, rsi, rdx, unused, r8, r9, @tcs, @e,
> @handler, @leaf, @vdso)
> enter_enclave:
>     mov 0x20(%rsp), %eax
>     jmp *0x28(%rsp)
> 
> This also implies that if __vdso_sgx_enter_enclave() took @leaf as a
> stack parameter and preserved %rbx, it would be x86-64 ABI compliant
> enough to call from C if the enclave preserves all callee-saved
> registers besides %rbx (Enarx does).
> 
> What are the downsides of this approach? It also doesn't harm the more
> extended case when you need to use an assembly wrapper to setup
> additional registers. This can still be done. It does imply an extra
> push and mov instruction. But because there are currently an odd
> number of stack function parameters, the push also removes an
> alignment instruction where the stack is aligned before the call to
> __vdso_sgx_enter_enclave() (likely). Further, the push and mov are
> going to be performed by *someone* in order to call
> __vdso_sgx_enter_enclave() from C.
> 
> Therefore, I'd like to propose that __vdso_sgx_enter_enclave():
>   * Preserve %rbx.

At first glance, that looks sane.  Being able to call __vdso... from C
would certainly be nice.

>   * Take the leaf as an additional stack parameter instead of passing
> it in %rax.

Does the leaf even need to be a stack param?  Wouldn't it be possible to
use %rcx as @leaf instead of @unusued?  E.g.

int __vdso_sgx_enter_enclave(unsigned long rdi, unsigned long rsi,
			     unsigned long rdx, unsigned int leaf,
			     unsigned long r8,  unsigned long r9,
			     void *tcs, struct sgx_enclave_exception *e,
			     sgx_enclave_exit_handler_t handler)
{
	push	%rbp
	mov	%rsp, %rbp
	push	%rbx

	mov	%ecx, %eax
.Lenter_enclave
	cmp	$0x2, %eax
	jb	.Linvalid_leaf
	cmp	$0x3, %eax
	ja	.Linvalid_leaf

	mov	0x0x10(%rbp), %rbx
	lea	.Lasync_exit_pointer(%rip), %rcx

.Lasync_exit_pointer:
.Lenclu_eenter_eresume:
	enclu

	xor	%eax, %eax

.Lhandle_exit:
	cmp	$0, 0x20(%rbp)
	jne	.Linvoke_userspace_handler

.Lout
	pop	%rbx
	leave
	ret
}
		

> Then C can call it without additional overhead. And people who need to
> "extend" the C ABI can still do so.
>
Nathaniel McCallum March 13, 2020, 6:32 p.m. UTC | #10
On Fri, Mar 13, 2020 at 12:46 PM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Fri, Mar 13, 2020 at 11:48:54AM -0400, Nathaniel McCallum wrote:
> > Thinking about this more carefully, I still think that at least part
> > of my critique still stands.
> >
> > __vdso_sgx_enter_enclave() doesn't use the x86-64 ABI. This means that
> > there will always be an assembly wrapper for
> > __vdso_sgx_enter_enclave(). But because __vdso_sgx_enter_enclave()
> > doesn't save %rbx, the wrapper is forced to in order to be called from
> > C.
> >
> > A common pattern for the wrapper will be to do something like this:
> >
> > # void enter_enclave(rdi, rsi, rdx, unused, r8, r9, @tcs, @e,
> > @handler, @leaf, @vdso)
> > enter_enclave:
> >     push %rbx
> >     push $0 /* align */
> >     push 0x48(%rsp)
> >     push 0x48(%rsp)
> >     push 0x48(%rsp)
> >
> >     mov 0x70(%rsp), %eax
> >     call *0x68(%rsp)
> >
> >     add $0x20, %rsp
> >     pop %rbx
> >     ret
> >
> > Because __vdso_sgx_enter_enclave() doesn't preserve %rbx, the wrapper
> > is forced to reposition stack parameters in a performance-critical
> > path. On the other hand, if __vdso_sgx_enter_enclave() preserved %rbx,
> > you could implement the above as:
> >
> > # void enter_enclave(rdi, rsi, rdx, unused, r8, r9, @tcs, @e,
> > @handler, @leaf, @vdso)
> > enter_enclave:
> >     mov 0x20(%rsp), %eax
> >     jmp *0x28(%rsp)
> >
> > This also implies that if __vdso_sgx_enter_enclave() took @leaf as a
> > stack parameter and preserved %rbx, it would be x86-64 ABI compliant
> > enough to call from C if the enclave preserves all callee-saved
> > registers besides %rbx (Enarx does).
> >
> > What are the downsides of this approach? It also doesn't harm the more
> > extended case when you need to use an assembly wrapper to setup
> > additional registers. This can still be done. It does imply an extra
> > push and mov instruction. But because there are currently an odd
> > number of stack function parameters, the push also removes an
> > alignment instruction where the stack is aligned before the call to
> > __vdso_sgx_enter_enclave() (likely). Further, the push and mov are
> > going to be performed by *someone* in order to call
> > __vdso_sgx_enter_enclave() from C.
> >
> > Therefore, I'd like to propose that __vdso_sgx_enter_enclave():
> >   * Preserve %rbx.
>
> At first glance, that looks sane.  Being able to call __vdso... from C
> would certainly be nice.

Agreed. I think ergonomically we want __vdso...() to be called from C
and the handler to be implemented in asm (optionally); without
breaking the ability to call __vdso..() from asm in special cases.

I think all ergonomic issues get solved by the following:
   * Pass a void * into the handler from C through __vdso...().
   * Allow the handler to pop parameters off of the output stack without hacks.

This allows the handler to pop extra arguments off the stack and write
them into the memory at the void *. Then the handler can be very small
and pass logic back to the caller of __vdso...().

Here's what this all means for the enclave. For maximum usability, the
enclave should preserve all callee-saved registers (except %rbx, which
is preserved by __vdso..()). For each ABI rule that the enclave
breaks, you need logic in a handler to fix it. So if you push return
params on the stack, the handler needs to undo that.

This doesn't compromise the ability to treat __vsdo...() like ENCLU if
you need the full power. But it does make it significantly easier to
consume when you don't have special needs. So as I see it, __vdso...()
should:

1. preserve %rbx
2. take leaf in %rcx
3. gain a void* stack param which is passed to the handler
4. sub/add to %rsp rather than save/restore

That would make this a very usable and fast interface without
sacrificing any of its current power.

> >   * Take the leaf as an additional stack parameter instead of passing
> > it in %rax.
>
> Does the leaf even need to be a stack param?  Wouldn't it be possible to
> use %rcx as @leaf instead of @unusued?  E.g.

Even better!

> int __vdso_sgx_enter_enclave(unsigned long rdi, unsigned long rsi,
>                              unsigned long rdx, unsigned int leaf,
>                              unsigned long r8,  unsigned long r9,
>                              void *tcs, struct sgx_enclave_exception *e,
>                              sgx_enclave_exit_handler_t handler)
> {
>         push    %rbp
>         mov     %rsp, %rbp
>         push    %rbx
>
>         mov     %ecx, %eax
> .Lenter_enclave
>         cmp     $0x2, %eax
>         jb      .Linvalid_leaf
>         cmp     $0x3, %eax
>         ja      .Linvalid_leaf
>
>         mov     0x0x10(%rbp), %rbx
>         lea     .Lasync_exit_pointer(%rip), %rcx
>
> .Lasync_exit_pointer:
> .Lenclu_eenter_eresume:
>         enclu
>
>         xor     %eax, %eax
>
> .Lhandle_exit:
>         cmp     $0, 0x20(%rbp)
>         jne     .Linvoke_userspace_handler
>
> .Lout
>         pop     %rbx
>         leave
>         ret
> }
>
>
> > Then C can call it without additional overhead. And people who need to
> > "extend" the C ABI can still do so.
> >
>
Sean Christopherson March 13, 2020, 6:44 p.m. UTC | #11
On Fri, Mar 13, 2020 at 02:32:29PM -0400, Nathaniel McCallum wrote:
> On Fri, Mar 13, 2020 at 12:46 PM Sean Christopherson
> <sean.j.christopherson@intel.com> wrote:
> >
> > On Fri, Mar 13, 2020 at 11:48:54AM -0400, Nathaniel McCallum wrote:
> > > Therefore, I'd like to propose that __vdso_sgx_enter_enclave():
> > >   * Preserve %rbx.
> >
> > At first glance, that looks sane.  Being able to call __vdso... from C
> > would certainly be nice.
> 
> Agreed. I think ergonomically we want __vdso...() to be called from C
> and the handler to be implemented in asm (optionally); without
> breaking the ability to call __vdso..() from asm in special cases.
> 
> I think all ergonomic issues get solved by the following:
>    * Pass a void * into the handler from C through __vdso...().
>    * Allow the handler to pop parameters off of the output stack without hacks.
> 
> This allows the handler to pop extra arguments off the stack and write
> them into the memory at the void *. Then the handler can be very small
> and pass logic back to the caller of __vdso...().
> 
> Here's what this all means for the enclave. For maximum usability, the
> enclave should preserve all callee-saved registers (except %rbx, which
> is preserved by __vdso..()). For each ABI rule that the enclave
> breaks, you need logic in a handler to fix it. So if you push return
> params on the stack, the handler needs to undo that.

Or the untrusted runtime needs to wrap the __vdso() to save state that is
clobbered by the enclave.  Just want to make it crystal clear that using a
handler is only required for stack shenanigans.

> This doesn't compromise the ability to treat __vsdo...() like ENCLU if
> you need the full power. But it does make it significantly easier to
> consume when you don't have special needs. So as I see it, __vdso...()
> should:
> 
> 1. preserve %rbx
> 2. take leaf in %rcx
> 3. gain a void* stack param which is passed to the handler

Unless I'm misunderstanding the request, this already exists.  %rsp at the
time of EEXIT is passed to the handler.

> 4. sub/add to %rsp rather than save/restore

Can you elaborate on why you want to sub/add to %rsp instead of having the
enclave unwind the stack?  Preserving %rsp across EEXIT/ERESUME seems more
in line with function call semantics, which I assume is desirable?  E.g.

  push param3
  push param2
  push param1

  enclu[EEXIT]

  add $0x18, %rsp

> That would make this a very usable and fast interface without
> sacrificing any of its current power.
Nathaniel McCallum March 13, 2020, 8:14 p.m. UTC | #12
On Fri, Mar 13, 2020 at 2:45 PM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Fri, Mar 13, 2020 at 02:32:29PM -0400, Nathaniel McCallum wrote:
> > On Fri, Mar 13, 2020 at 12:46 PM Sean Christopherson
> > <sean.j.christopherson@intel.com> wrote:
> > >
> > > On Fri, Mar 13, 2020 at 11:48:54AM -0400, Nathaniel McCallum wrote:
> > > > Therefore, I'd like to propose that __vdso_sgx_enter_enclave():
> > > >   * Preserve %rbx.
> > >
> > > At first glance, that looks sane.  Being able to call __vdso... from C
> > > would certainly be nice.
> >
> > Agreed. I think ergonomically we want __vdso...() to be called from C
> > and the handler to be implemented in asm (optionally); without
> > breaking the ability to call __vdso..() from asm in special cases.
> >
> > I think all ergonomic issues get solved by the following:
> >    * Pass a void * into the handler from C through __vdso...().
> >    * Allow the handler to pop parameters off of the output stack without hacks.
> >
> > This allows the handler to pop extra arguments off the stack and write
> > them into the memory at the void *. Then the handler can be very small
> > and pass logic back to the caller of __vdso...().
> >
> > Here's what this all means for the enclave. For maximum usability, the
> > enclave should preserve all callee-saved registers (except %rbx, which
> > is preserved by __vdso..()). For each ABI rule that the enclave
> > breaks, you need logic in a handler to fix it. So if you push return
> > params on the stack, the handler needs to undo that.
>
> Or the untrusted runtime needs to wrap the __vdso() to save state that is
> clobbered by the enclave.  Just want to make it crystal clear that using a
> handler is only required for stack shenanigans.

Yup.

> > This doesn't compromise the ability to treat __vsdo...() like ENCLU if
> > you need the full power. But it does make it significantly easier to
> > consume when you don't have special needs. So as I see it, __vdso...()
> > should:
> >
> > 1. preserve %rbx
> > 2. take leaf in %rcx
> > 3. gain a void* stack param which is passed to the handler
>
> Unless I'm misunderstanding the request, this already exists.  %rsp at the
> time of EEXIT is passed to the handler.

Sorry, different stack parameter. I mean this:

typedef int (*sgx_enclave_exit_handler_t)(
    long rdi,
    long rsi,
    long rdx,
    long ursp,
    long r8,
    long r9,
    int ret,
    void *tcs,
    struct sgx_enclave_exception *e,
    void *misc
);

int __vdso_sgx_enter_enclave(
    long rdi,
    long rsi,
    long rdx,
    int leaf,
    long r8,
    long r9,
    void *tcs,
    struct sgx_enclave_exception *e,
    void *misc,
    sgx_enclave_exit_handler_t handler
);

This is so that the caller of __vdso...() can pass context into the
handler. Note that I've also reordered the stack parameters so that
the stack order can be reused.

> > 4. sub/add to %rsp rather than save/restore
>
> Can you elaborate on why you want to sub/add to %rsp instead of having the
> enclave unwind the stack?  Preserving %rsp across EEXIT/ERESUME seems more
> in line with function call semantics, which I assume is desirable?  E.g.
>
>   push param3
>   push param2
>   push param1
>
>   enclu[EEXIT]
>
>   add $0x18, %rsp

Before enclave EEXIT, the enclave restores %rsp to the value it had
before EENTER was called. Then it pushes additional output arguments
onto the stack. The enclave calls EENCLU[EEXIT].

We are now in __vdso...() on the way back to the caller. However, %rsp
has a different value than we entered the function with. This breaks
x86_64 ABI, obviously. The handler needs to fix this up: how does it
do so?

In the current code, __vdso..() saves the value of %rsp, calls the
handler and then restores %rsp. The handler can fix up the stack by
setting the correct value to %rbx and returning without restoring it.
But this requires internal knowledge of the __vdso...() function,
which could theoretically change in the future.

If instead the __vdso...() only did add/sub, then the handler could do:
1. pop return address
2. pop handler stack params
3. pop enclave additional output stack params
4. push handler stack params
5. push return address

While this is more work, it is standard calling convention work that
doesn't require internal knowledge of __vdso..(). Alternatively, if we
don't like the extra work, we can document the %rbx hack explicitly
into the handler documentation and make it part of the interface. But
we need some explicit way for the handler to pop enclave output stack
params that doesn't depend on internal knowledge of the __vdso...()
invariants.

> > That would make this a very usable and fast interface without
> > sacrificing any of its current power.
>
Sean Christopherson March 13, 2020, 10:08 p.m. UTC | #13
On Fri, Mar 13, 2020 at 04:14:01PM -0400, Nathaniel McCallum wrote:
> On Fri, Mar 13, 2020 at 2:45 PM Sean Christopherson
> <sean.j.christopherson@intel.com> wrote:
 > > This doesn't compromise the ability to treat __vsdo...() like ENCLU if
> > > you need the full power. But it does make it significantly easier to
> > > consume when you don't have special needs. So as I see it, __vdso...()
> > > should:
> > >
> > > 1. preserve %rbx
> > > 2. take leaf in %rcx
> > > 3. gain a void* stack param which is passed to the handler
> >
> > Unless I'm misunderstanding the request, this already exists.  %rsp at the
> > time of EEXIT is passed to the handler.
> 
> Sorry, different stack parameter. I mean this:
> 
> typedef int (*sgx_enclave_exit_handler_t)(
>     long rdi,
>     long rsi,
>     long rdx,
>     long ursp,
>     long r8,
>     long r9,
>     int ret,
>     void *tcs,
>     struct sgx_enclave_exception *e,
>     void *misc
> );
> 
> int __vdso_sgx_enter_enclave(
>     long rdi,
>     long rsi,
>     long rdx,
>     int leaf,
>     long r8,
>     long r9,
>     void *tcs,
>     struct sgx_enclave_exception *e,
>     void *misc,
>     sgx_enclave_exit_handler_t handler
> );
> 
> This is so that the caller of __vdso...() can pass context into the
> handler.

Hrm, I'm not a fan of adding a param that is only consumed by the handler,
especially when there are multiple alternatives, e.g. fudge the param in
assembly before calling __vdso(), have the enclave supply the context in a
volatile register, etc...

> Note that I've also reordered the stack parameters so that the stack
> order can be reused.

Ah, ret<->tcs, took me a minute...

Does preserving tsc->e->misc ordering matter all that much?  __vdso() needs
to manually copy them either way.  I ask because putting @misc at the end
would allow implementations that don't use @handler to omit the param (if
I've done my math correctly, which is always a big if).  That would make
adding the handler-only param a little more palatable.

> > > 4. sub/add to %rsp rather than save/restore
> >
> > Can you elaborate on why you want to sub/add to %rsp instead of having the
> > enclave unwind the stack?  Preserving %rsp across EEXIT/ERESUME seems more
> > in line with function call semantics, which I assume is desirable?  E.g.
> >
> >   push param3
> >   push param2
> >   push param1
> >
> >   enclu[EEXIT]
> >
> >   add $0x18, %rsp
> 
> Before enclave EEXIT, the enclave restores %rsp to the value it had
> before EENTER was called. Then it pushes additional output arguments
> onto the stack. The enclave calls EENCLU[EEXIT].
> 
> We are now in __vdso...() on the way back to the caller. However, %rsp
> has a different value than we entered the function with. This breaks
> x86_64 ABI, obviously. The handler needs to fix this up: how does it
> do so?
> 
> In the current code, __vdso..() saves the value of %rsp, calls the
> handler and then restores %rsp. The handler can fix up the stack by
> setting the correct value to %rbx and returning without restoring it.

Ah, you're referring to the patch where the handler decides to return all
the way back to the caller of __vdso...().

> But this requires internal knowledge of the __vdso...() function,
> which could theoretically change in the future.
> 
> If instead the __vdso...() only did add/sub, then the handler could do:
> 1. pop return address
> 2. pop handler stack params
> 3. pop enclave additional output stack params
> 4. push handler stack params
> 5. push return address
> 
> While this is more work, it is standard calling convention work that
> doesn't require internal knowledge of __vdso..(). Alternatively, if we
> don't like the extra work, we can document the %rbx hack explicitly
> into the handler documentation and make it part of the interface. But
> we need some explicit way for the handler to pop enclave output stack
> params that doesn't depend on internal knowledge of the __vdso...()
> invariants.

IIUC, this is what you're suggesting?  Having to align the stack makes this
a bit annoying, but it's not bad by any means.

diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
index 94a8e5f99961..05d54f79b557 100644
--- a/arch/x86/entry/vdso/vsgx_enter_enclave.S
+++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
@@ -139,8 +139,9 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
        /* Pass the untrusted RSP (at exit) to the callback via %rcx. */
        mov     %rsp, %rcx

-       /* Save the untrusted RSP in %rbx (non-volatile register). */
+       /* Save the untrusted RSP offset in %rbx (non-volatile register). */
        mov     %rsp, %rbx
+       and     $0xf, %rbx

        /*
         * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned
@@ -161,8 +162,8 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
        mov     0x20(%rbp), %rax
        call    .Lretpoline

-       /* Restore %rsp to its post-exit value. */
-       mov     %rbx, %rsp
+       /* Undo the post-exit %rsp adjustment. */
+       lea     0x20(%rsp,%rbx), %rsp


That's reasonable, let's the handler play more games with minimal overhead.

> > > That would make this a very usable and fast interface without
> > > sacrificing any of its current power.
> >
>
Nathaniel McCallum March 14, 2020, 2:10 p.m. UTC | #14
On Fri, Mar 13, 2020 at 6:08 PM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Fri, Mar 13, 2020 at 04:14:01PM -0400, Nathaniel McCallum wrote:
> > On Fri, Mar 13, 2020 at 2:45 PM Sean Christopherson
> > <sean.j.christopherson@intel.com> wrote:
>  > > This doesn't compromise the ability to treat __vsdo...() like ENCLU if
> > > > you need the full power. But it does make it significantly easier to
> > > > consume when you don't have special needs. So as I see it, __vdso...()
> > > > should:
> > > >
> > > > 1. preserve %rbx
> > > > 2. take leaf in %rcx
> > > > 3. gain a void* stack param which is passed to the handler
> > >
> > > Unless I'm misunderstanding the request, this already exists.  %rsp at the
> > > time of EEXIT is passed to the handler.
> >
> > Sorry, different stack parameter. I mean this:
> >
> > typedef int (*sgx_enclave_exit_handler_t)(
> >     long rdi,
> >     long rsi,
> >     long rdx,
> >     long ursp,
> >     long r8,
> >     long r9,
> >     int ret,
> >     void *tcs,
> >     struct sgx_enclave_exception *e,
> >     void *misc
> > );
> >
> > int __vdso_sgx_enter_enclave(
> >     long rdi,
> >     long rsi,
> >     long rdx,
> >     int leaf,
> >     long r8,
> >     long r9,
> >     void *tcs,
> >     struct sgx_enclave_exception *e,
> >     void *misc,
> >     sgx_enclave_exit_handler_t handler
> > );
> >
> > This is so that the caller of __vdso...() can pass context into the
> > handler.
>
> Hrm, I'm not a fan of adding a param that is only consumed by the handler,
> especially when there are multiple alternatives, e.g. fudge the param in
> assembly before calling __vdso(), have the enclave supply the context in a
> volatile register, etc...

Yes, but all of those require assembly. The whole point of this is
ergonomics without assembly. Once you can call __vdso() without
assembly, having to resort to assembly to make it useful will feel
painful. I imagine it would be pretty common to pass something like a
jmp_buf reference or a reference to a struct for collecting output
stack arguments through misc.

> > Note that I've also reordered the stack parameters so that the stack
> > order can be reused.
>
> Ah, ret<->tcs, took me a minute...
>
> Does preserving tsc->e->misc ordering matter all that much?

Not really. I was just trying to aid the reader of the assembly. If
there are more important concerns, fine.

>  __vdso() needs
> to manually copy them either way.  I ask because putting @misc at the end
> would allow implementations that don't use @handler to omit the param (if
> I've done my math correctly, which is always a big if).  That would make
> adding the handler-only param a little more palatable.

Fine by me.

> > > > 4. sub/add to %rsp rather than save/restore
> > >
> > > Can you elaborate on why you want to sub/add to %rsp instead of having the
> > > enclave unwind the stack?  Preserving %rsp across EEXIT/ERESUME seems more
> > > in line with function call semantics, which I assume is desirable?  E.g.
> > >
> > >   push param3
> > >   push param2
> > >   push param1
> > >
> > >   enclu[EEXIT]
> > >
> > >   add $0x18, %rsp
> >
> > Before enclave EEXIT, the enclave restores %rsp to the value it had
> > before EENTER was called. Then it pushes additional output arguments
> > onto the stack. The enclave calls EENCLU[EEXIT].
> >
> > We are now in __vdso...() on the way back to the caller. However, %rsp
> > has a different value than we entered the function with. This breaks
> > x86_64 ABI, obviously. The handler needs to fix this up: how does it
> > do so?
> >
> > In the current code, __vdso..() saves the value of %rsp, calls the
> > handler and then restores %rsp. The handler can fix up the stack by
> > setting the correct value to %rbx and returning without restoring it.
>
> Ah, you're referring to the patch where the handler decides to return all
> the way back to the caller of __vdso...().
>
> > But this requires internal knowledge of the __vdso...() function,
> > which could theoretically change in the future.
> >
> > If instead the __vdso...() only did add/sub, then the handler could do:
> > 1. pop return address
> > 2. pop handler stack params
> > 3. pop enclave additional output stack params
> > 4. push handler stack params
> > 5. push return address
> >
> > While this is more work, it is standard calling convention work that
> > doesn't require internal knowledge of __vdso..(). Alternatively, if we
> > don't like the extra work, we can document the %rbx hack explicitly
> > into the handler documentation and make it part of the interface. But
> > we need some explicit way for the handler to pop enclave output stack
> > params that doesn't depend on internal knowledge of the __vdso...()
> > invariants.
>
> IIUC, this is what you're suggesting?  Having to align the stack makes this
> a bit annoying, but it's not bad by any means.
>
> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> index 94a8e5f99961..05d54f79b557 100644
> --- a/arch/x86/entry/vdso/vsgx_enter_enclave.S
> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> @@ -139,8 +139,9 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
>         /* Pass the untrusted RSP (at exit) to the callback via %rcx. */
>         mov     %rsp, %rcx
>
> -       /* Save the untrusted RSP in %rbx (non-volatile register). */
> +       /* Save the untrusted RSP offset in %rbx (non-volatile register). */
>         mov     %rsp, %rbx
> +       and     $0xf, %rbx
>
>         /*
>          * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned
> @@ -161,8 +162,8 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
>         mov     0x20(%rbp), %rax
>         call    .Lretpoline
>
> -       /* Restore %rsp to its post-exit value. */
> -       mov     %rbx, %rsp
> +       /* Undo the post-exit %rsp adjustment. */
> +       lea     0x20(%rsp,%rbx), %rsp
>
>
> That's reasonable, let's the handler play more games with minimal overhead.

Yes, exactly!

> > > > That would make this a very usable and fast interface without
> > > > sacrificing any of its current power.
> > >
> >
>
Jarkko Sakkinen March 15, 2020, 1:25 a.m. UTC | #15
On Wed, Mar 11, 2020 at 01:30:07PM -0400, Nathaniel McCallum wrote:
> Currently, the selftest has a wrapper around
> __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
> registers (CSRs), though it uses none of them. Then it calls this
> function which uses %rbx but preserves none of the CSRs. Then it jumps
> into an enclave which zeroes all these registers before returning.
> Thus:
> 
>   1. wrapper saves all CSRs
>   2. wrapper repositions stack arguments
>   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
>   4. selftest zeros all CSRs
>   5. wrapper loads all CSRs
> 
> I'd like to propose instead that the enclave be responsible for saving
> and restoring CSRs. So instead of the above we have:
>   1. __vdso_sgx_enter_enclave() saves %rbx
>   2. enclave saves CSRs
>   3. enclave loads CSRs
>   4. __vdso_sgx_enter_enclave() loads %rbx
> 
> I know that lots of other stuff happens during enclave transitions,
> but at the very least we could reduce the number of instructions
> through this critical path.

What Jethro said and also that it is a good general principle to cut
down the semantics of any vdso as minimal as possible.

I.e. even if saving RBX would make somehow sense it *can* be left
out without loss in terms of what can be done with the vDSO.

/Jarkko
Nathaniel McCallum March 15, 2020, 5:53 p.m. UTC | #16
On Sat, Mar 14, 2020 at 9:25 PM Jarkko Sakkinen
<jarkko.sakkinen@linux.intel.com> wrote:
>
> On Wed, Mar 11, 2020 at 01:30:07PM -0400, Nathaniel McCallum wrote:
> > Currently, the selftest has a wrapper around
> > __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
> > registers (CSRs), though it uses none of them. Then it calls this
> > function which uses %rbx but preserves none of the CSRs. Then it jumps
> > into an enclave which zeroes all these registers before returning.
> > Thus:
> >
> >   1. wrapper saves all CSRs
> >   2. wrapper repositions stack arguments
> >   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
> >   4. selftest zeros all CSRs
> >   5. wrapper loads all CSRs
> >
> > I'd like to propose instead that the enclave be responsible for saving
> > and restoring CSRs. So instead of the above we have:
> >   1. __vdso_sgx_enter_enclave() saves %rbx
> >   2. enclave saves CSRs
> >   3. enclave loads CSRs
> >   4. __vdso_sgx_enter_enclave() loads %rbx
> >
> > I know that lots of other stuff happens during enclave transitions,
> > but at the very least we could reduce the number of instructions
> > through this critical path.
>
> What Jethro said and also that it is a good general principle to cut
> down the semantics of any vdso as minimal as possible.
>
> I.e. even if saving RBX would make somehow sense it *can* be left
> out without loss in terms of what can be done with the vDSO.

Please read the rest of the thread. Sean and I have hammered out some
sensible and effective changes.
Jethro Beekman March 16, 2020, 1:31 p.m. UTC | #17
On 2020-03-15 18:53, Nathaniel McCallum wrote:
> On Sat, Mar 14, 2020 at 9:25 PM Jarkko Sakkinen
> <jarkko.sakkinen@linux.intel.com> wrote:
>>
>> On Wed, Mar 11, 2020 at 01:30:07PM -0400, Nathaniel McCallum wrote:
>>> Currently, the selftest has a wrapper around
>>> __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
>>> registers (CSRs), though it uses none of them. Then it calls this
>>> function which uses %rbx but preserves none of the CSRs. Then it jumps
>>> into an enclave which zeroes all these registers before returning.
>>> Thus:
>>>
>>>   1. wrapper saves all CSRs
>>>   2. wrapper repositions stack arguments
>>>   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
>>>   4. selftest zeros all CSRs
>>>   5. wrapper loads all CSRs
>>>
>>> I'd like to propose instead that the enclave be responsible for saving
>>> and restoring CSRs. So instead of the above we have:
>>>   1. __vdso_sgx_enter_enclave() saves %rbx
>>>   2. enclave saves CSRs
>>>   3. enclave loads CSRs
>>>   4. __vdso_sgx_enter_enclave() loads %rbx
>>>
>>> I know that lots of other stuff happens during enclave transitions,
>>> but at the very least we could reduce the number of instructions
>>> through this critical path.
>>
>> What Jethro said and also that it is a good general principle to cut
>> down the semantics of any vdso as minimal as possible.
>>
>> I.e. even if saving RBX would make somehow sense it *can* be left
>> out without loss in terms of what can be done with the vDSO.
> 
> Please read the rest of the thread. Sean and I have hammered out some
> sensible and effective changes.

I'm not sure they're sensible? By departing from the ENCLU calling convention, both the VDSO and the wrapper become more complicated. The wrapper because now it needs to implement all kinds of logic for different behavior depending on whether the VDSO is or isn't available.

I agree with Jarkko that everything should be kept small and simple. Calling a couple extra instructions is going to have a negligible effect compared to the actual time EENTER/EEXIT take. 

Can someone remind me why we're not passing TCS in RBX but on the stack?

--
Jethro Beekman | Fortanix
Jarkko Sakkinen March 16, 2020, 1:56 p.m. UTC | #18
On Sun, 2020-03-15 at 13:53 -0400, Nathaniel McCallum wrote:
> On Sat, Mar 14, 2020 at 9:25 PM Jarkko Sakkinen
> <jarkko.sakkinen@linux.intel.com> wrote:
> > On Wed, Mar 11, 2020 at 01:30:07PM -0400, Nathaniel McCallum wrote:
> > > Currently, the selftest has a wrapper around
> > > __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
> > > registers (CSRs), though it uses none of them. Then it calls this
> > > function which uses %rbx but preserves none of the CSRs. Then it jumps
> > > into an enclave which zeroes all these registers before returning.
> > > Thus:
> > > 
> > >   1. wrapper saves all CSRs
> > >   2. wrapper repositions stack arguments
> > >   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
> > >   4. selftest zeros all CSRs
> > >   5. wrapper loads all CSRs
> > > 
> > > I'd like to propose instead that the enclave be responsible for saving
> > > and restoring CSRs. So instead of the above we have:
> > >   1. __vdso_sgx_enter_enclave() saves %rbx
> > >   2. enclave saves CSRs
> > >   3. enclave loads CSRs
> > >   4. __vdso_sgx_enter_enclave() loads %rbx
> > > 
> > > I know that lots of other stuff happens during enclave transitions,
> > > but at the very least we could reduce the number of instructions
> > > through this critical path.
> > 
> > What Jethro said and also that it is a good general principle to cut
> > down the semantics of any vdso as minimal as possible.
> > 
> > I.e. even if saving RBX would make somehow sense it *can* be left
> > out without loss in terms of what can be done with the vDSO.
> 
> Please read the rest of the thread. Sean and I have hammered out some
> sensible and effective changes.

Have skimmed through that discussion but it comes down how much you get
by obviously degrading some of the robustness. Complexity of the calling
pattern is not something that should be emphasized as that is something
that is anyway hidden inside the runtime.

/Jarkko
Nathaniel McCallum March 16, 2020, 1:57 p.m. UTC | #19
On Mon, Mar 16, 2020 at 9:32 AM Jethro Beekman <jethro@fortanix.com> wrote:
>
> On 2020-03-15 18:53, Nathaniel McCallum wrote:
> > On Sat, Mar 14, 2020 at 9:25 PM Jarkko Sakkinen
> > <jarkko.sakkinen@linux.intel.com> wrote:
> >>
> >> On Wed, Mar 11, 2020 at 01:30:07PM -0400, Nathaniel McCallum wrote:
> >>> Currently, the selftest has a wrapper around
> >>> __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
> >>> registers (CSRs), though it uses none of them. Then it calls this
> >>> function which uses %rbx but preserves none of the CSRs. Then it jumps
> >>> into an enclave which zeroes all these registers before returning.
> >>> Thus:
> >>>
> >>>   1. wrapper saves all CSRs
> >>>   2. wrapper repositions stack arguments
> >>>   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
> >>>   4. selftest zeros all CSRs
> >>>   5. wrapper loads all CSRs
> >>>
> >>> I'd like to propose instead that the enclave be responsible for saving
> >>> and restoring CSRs. So instead of the above we have:
> >>>   1. __vdso_sgx_enter_enclave() saves %rbx
> >>>   2. enclave saves CSRs
> >>>   3. enclave loads CSRs
> >>>   4. __vdso_sgx_enter_enclave() loads %rbx
> >>>
> >>> I know that lots of other stuff happens during enclave transitions,
> >>> but at the very least we could reduce the number of instructions
> >>> through this critical path.
> >>
> >> What Jethro said and also that it is a good general principle to cut
> >> down the semantics of any vdso as minimal as possible.
> >>
> >> I.e. even if saving RBX would make somehow sense it *can* be left
> >> out without loss in terms of what can be done with the vDSO.
> >
> > Please read the rest of the thread. Sean and I have hammered out some
> > sensible and effective changes.
>
> I'm not sure they're sensible? By departing from the ENCLU calling convention, both the VDSO
> and the wrapper become more complicated.

For the vDSO, only marginally. I'm counting +4,-2 instructions in my
suggestions. For the wrapper, things become significantly simpler.

> The wrapper because now it needs to implement all
> kinds of logic for different behavior depending on whether the VDSO is or isn't available.

When isn't the vDSO available? Once the patches are merged it will
always be available. Then we also get to live with this interface
forever. I'd rather have a good, usable interface for the long term.

> I agree with Jarkko that everything should be kept small and simple. Calling a couple extra instructions is going to have a negligible effect compared to the actual time EENTER/EEXIT take.

We all agree on small and simple. Nothing I've proposed fails either
of those criteria.

> Can someone remind me why we're not passing TCS in RBX but on the stack?

If you do that, the vDSO will never be callable from C. And, as you've
stated above, calling a couple extra instructions is going to have a
negligible effect.
Jethro Beekman March 16, 2020, 1:59 p.m. UTC | #20
On 2020-03-16 14:57, Nathaniel McCallum wrote:
> On Mon, Mar 16, 2020 at 9:32 AM Jethro Beekman <jethro@fortanix.com> wrote:
>>
>> On 2020-03-15 18:53, Nathaniel McCallum wrote:
>>> On Sat, Mar 14, 2020 at 9:25 PM Jarkko Sakkinen
>>> <jarkko.sakkinen@linux.intel.com> wrote:
>>>>
>>>> On Wed, Mar 11, 2020 at 01:30:07PM -0400, Nathaniel McCallum wrote:
>>>>> Currently, the selftest has a wrapper around
>>>>> __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
>>>>> registers (CSRs), though it uses none of them. Then it calls this
>>>>> function which uses %rbx but preserves none of the CSRs. Then it jumps
>>>>> into an enclave which zeroes all these registers before returning.
>>>>> Thus:
>>>>>
>>>>>   1. wrapper saves all CSRs
>>>>>   2. wrapper repositions stack arguments
>>>>>   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
>>>>>   4. selftest zeros all CSRs
>>>>>   5. wrapper loads all CSRs
>>>>>
>>>>> I'd like to propose instead that the enclave be responsible for saving
>>>>> and restoring CSRs. So instead of the above we have:
>>>>>   1. __vdso_sgx_enter_enclave() saves %rbx
>>>>>   2. enclave saves CSRs
>>>>>   3. enclave loads CSRs
>>>>>   4. __vdso_sgx_enter_enclave() loads %rbx
>>>>>
>>>>> I know that lots of other stuff happens during enclave transitions,
>>>>> but at the very least we could reduce the number of instructions
>>>>> through this critical path.
>>>>
>>>> What Jethro said and also that it is a good general principle to cut
>>>> down the semantics of any vdso as minimal as possible.
>>>>
>>>> I.e. even if saving RBX would make somehow sense it *can* be left
>>>> out without loss in terms of what can be done with the vDSO.
>>>
>>> Please read the rest of the thread. Sean and I have hammered out some
>>> sensible and effective changes.
>>
>> I'm not sure they're sensible? By departing from the ENCLU calling convention, both the VDSO
>> and the wrapper become more complicated.
> 
> For the vDSO, only marginally. I'm counting +4,-2 instructions in my
> suggestions. For the wrapper, things become significantly simpler.
> 
>> The wrapper because now it needs to implement all
>> kinds of logic for different behavior depending on whether the VDSO is or isn't available.
> 
> When isn't the vDSO available?

When you're not on Linux. Or when you're on an old kernel.

--
Jethro Beekman | Fortanix
Nathaniel McCallum March 16, 2020, 2:01 p.m. UTC | #21
On Mon, Mar 16, 2020 at 9:56 AM Jarkko Sakkinen
<jarkko.sakkinen@linux.intel.com> wrote:
>
> On Sun, 2020-03-15 at 13:53 -0400, Nathaniel McCallum wrote:
> > On Sat, Mar 14, 2020 at 9:25 PM Jarkko Sakkinen
> > <jarkko.sakkinen@linux.intel.com> wrote:
> > > On Wed, Mar 11, 2020 at 01:30:07PM -0400, Nathaniel McCallum wrote:
> > > > Currently, the selftest has a wrapper around
> > > > __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
> > > > registers (CSRs), though it uses none of them. Then it calls this
> > > > function which uses %rbx but preserves none of the CSRs. Then it jumps
> > > > into an enclave which zeroes all these registers before returning.
> > > > Thus:
> > > >
> > > >   1. wrapper saves all CSRs
> > > >   2. wrapper repositions stack arguments
> > > >   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
> > > >   4. selftest zeros all CSRs
> > > >   5. wrapper loads all CSRs
> > > >
> > > > I'd like to propose instead that the enclave be responsible for saving
> > > > and restoring CSRs. So instead of the above we have:
> > > >   1. __vdso_sgx_enter_enclave() saves %rbx
> > > >   2. enclave saves CSRs
> > > >   3. enclave loads CSRs
> > > >   4. __vdso_sgx_enter_enclave() loads %rbx
> > > >
> > > > I know that lots of other stuff happens during enclave transitions,
> > > > but at the very least we could reduce the number of instructions
> > > > through this critical path.
> > >
> > > What Jethro said and also that it is a good general principle to cut
> > > down the semantics of any vdso as minimal as possible.
> > >
> > > I.e. even if saving RBX would make somehow sense it *can* be left
> > > out without loss in terms of what can be done with the vDSO.
> >
> > Please read the rest of the thread. Sean and I have hammered out some
> > sensible and effective changes.
>
> Have skimmed through that discussion but it comes down how much you get
> by obviously degrading some of the robustness. Complexity of the calling
> pattern is not something that should be emphasized as that is something
> that is anyway hidden inside the runtime.

My suggestions explicitly maintained robustness, and in fact increased
it. If you think we've lost capability, please speak with specificity
rather than in vague generalities. Under my suggestions we can:
1. call the vDSO from C
2. pass context to the handler
3. have additional stack manipulation options in the handler

The cost for this is a net 2 additional instructions. No existing
capability is lost.
Nathaniel McCallum March 16, 2020, 2:03 p.m. UTC | #22
On Mon, Mar 16, 2020 at 9:59 AM Jethro Beekman <jethro@fortanix.com> wrote:
>
> On 2020-03-16 14:57, Nathaniel McCallum wrote:
> > On Mon, Mar 16, 2020 at 9:32 AM Jethro Beekman <jethro@fortanix.com> wrote:
> >>
> >> On 2020-03-15 18:53, Nathaniel McCallum wrote:
> >>> On Sat, Mar 14, 2020 at 9:25 PM Jarkko Sakkinen
> >>> <jarkko.sakkinen@linux.intel.com> wrote:
> >>>>
> >>>> On Wed, Mar 11, 2020 at 01:30:07PM -0400, Nathaniel McCallum wrote:
> >>>>> Currently, the selftest has a wrapper around
> >>>>> __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
> >>>>> registers (CSRs), though it uses none of them. Then it calls this
> >>>>> function which uses %rbx but preserves none of the CSRs. Then it jumps
> >>>>> into an enclave which zeroes all these registers before returning.
> >>>>> Thus:
> >>>>>
> >>>>>   1. wrapper saves all CSRs
> >>>>>   2. wrapper repositions stack arguments
> >>>>>   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
> >>>>>   4. selftest zeros all CSRs
> >>>>>   5. wrapper loads all CSRs
> >>>>>
> >>>>> I'd like to propose instead that the enclave be responsible for saving
> >>>>> and restoring CSRs. So instead of the above we have:
> >>>>>   1. __vdso_sgx_enter_enclave() saves %rbx
> >>>>>   2. enclave saves CSRs
> >>>>>   3. enclave loads CSRs
> >>>>>   4. __vdso_sgx_enter_enclave() loads %rbx
> >>>>>
> >>>>> I know that lots of other stuff happens during enclave transitions,
> >>>>> but at the very least we could reduce the number of instructions
> >>>>> through this critical path.
> >>>>
> >>>> What Jethro said and also that it is a good general principle to cut
> >>>> down the semantics of any vdso as minimal as possible.
> >>>>
> >>>> I.e. even if saving RBX would make somehow sense it *can* be left
> >>>> out without loss in terms of what can be done with the vDSO.
> >>>
> >>> Please read the rest of the thread. Sean and I have hammered out some
> >>> sensible and effective changes.
> >>
> >> I'm not sure they're sensible? By departing from the ENCLU calling convention, both the VDSO
> >> and the wrapper become more complicated.
> >
> > For the vDSO, only marginally. I'm counting +4,-2 instructions in my
> > suggestions. For the wrapper, things become significantly simpler.
> >
> >> The wrapper because now it needs to implement all
> >> kinds of logic for different behavior depending on whether the VDSO is or isn't available.
> >
> > When isn't the vDSO available?
>
> When you're not on Linux. Or when you're on an old kernel.

I fail to see why the Linux kernel should degrade its new interfaces
for those use cases.
Sean Christopherson March 16, 2020, 5:17 p.m. UTC | #23
On Mon, Mar 16, 2020 at 10:03:31AM -0400, Nathaniel McCallum wrote:
> On Mon, Mar 16, 2020 at 9:59 AM Jethro Beekman <jethro@fortanix.com> wrote:
> >
> > On 2020-03-16 14:57, Nathaniel McCallum wrote:
> > > On Mon, Mar 16, 2020 at 9:32 AM Jethro Beekman <jethro@fortanix.com> wrote:
> > >>
> > >> On 2020-03-15 18:53, Nathaniel McCallum wrote:
> > >>> On Sat, Mar 14, 2020 at 9:25 PM Jarkko Sakkinen
> > >>> <jarkko.sakkinen@linux.intel.com> wrote:
> > >>>>
> > >>>> On Wed, Mar 11, 2020 at 01:30:07PM -0400, Nathaniel McCallum wrote:
> > >>>>> Currently, the selftest has a wrapper around
> > >>>>> __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
> > >>>>> registers (CSRs), though it uses none of them. Then it calls this
> > >>>>> function which uses %rbx but preserves none of the CSRs. Then it jumps
> > >>>>> into an enclave which zeroes all these registers before returning.
> > >>>>> Thus:
> > >>>>>
> > >>>>>   1. wrapper saves all CSRs
> > >>>>>   2. wrapper repositions stack arguments
> > >>>>>   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
> > >>>>>   4. selftest zeros all CSRs
> > >>>>>   5. wrapper loads all CSRs
> > >>>>>
> > >>>>> I'd like to propose instead that the enclave be responsible for saving
> > >>>>> and restoring CSRs. So instead of the above we have:
> > >>>>>   1. __vdso_sgx_enter_enclave() saves %rbx
> > >>>>>   2. enclave saves CSRs
> > >>>>>   3. enclave loads CSRs
> > >>>>>   4. __vdso_sgx_enter_enclave() loads %rbx
> > >>>>>
> > >>>>> I know that lots of other stuff happens during enclave transitions,
> > >>>>> but at the very least we could reduce the number of instructions
> > >>>>> through this critical path.
> > >>>>
> > >>>> What Jethro said and also that it is a good general principle to cut
> > >>>> down the semantics of any vdso as minimal as possible.
> > >>>>
> > >>>> I.e. even if saving RBX would make somehow sense it *can* be left
> > >>>> out without loss in terms of what can be done with the vDSO.
> > >>>
> > >>> Please read the rest of the thread. Sean and I have hammered out some
> > >>> sensible and effective changes.
> > >>
> > >> I'm not sure they're sensible? By departing from the ENCLU calling
> > >> convention, both the VDSO and the wrapper become more complicated.
> > >
> > > For the vDSO, only marginally. I'm counting +4,-2 instructions in my
> > > suggestions. For the wrapper, things become significantly simpler.
> > >
> > >> The wrapper because now it needs to implement all kinds of logic for
> > >> different behavior depending on whether the VDSO is or isn't available.

How so?  The wrapper, if one is needed, will need to have dedicated logic
for the vDSO no matter what interface is defined by the vDSO.  Taking the
leaf in %rcx instead of %rax would at worst add a single instruction.  At
best, it would eliminate the wrapper entirely by making the vDSO callable
from C, e.g. for enclaves+runtimes that treat EENTER/ERESUME as glorified
function calls, i.e. more or less follow the x86-64 ABI.

> > > When isn't the vDSO available?
> >
> > When you're not on Linux. Or when you're on an old kernel.
> 
> I fail to see why the Linux kernel should degrade its new interfaces for
> those use cases.

There are effectively four related, but independent, changes to consider:

  1. Make the RSP fixup in the "return from handler" path relative instead
     of absolute.

  2. Preserve RBX in the vDSO.

  3. Use %rcx instead of %rax to pass @leaf.

  4. Allow the untrusted runtime to pass a parameter directly to its exit
     handler.


For me, #1 is an easy "yes".  It's arguably a bug fix, and the cost is one
uop.

My vote for #2 and #3 would also be a strong "yes".  Although passing @leaf
in %rcx technically diverges from ENCLU, I actually think it will make it
easier to swap between the vDSO and a bare ENCLU.  E.g. have the prototype
for the vDSO be the prototype for the assembly wrapper:

typedef void (*enter_enclave_fn)(unsigned long rdi, unsigned long rsi,
				 unsigned long rdx, unsigned int leaf,
				 unsigned long r8,  unsigned long r9,
				 void *tcs,
				 struct sgx_enclave_exception *e,
				 sgx_enclave_exit_handler_t handler);

int run_enclave(...)
{
	enter_enclave_fn enter_enclave;

	if (vdso)
		enter_enclave = vdso;
	else
		enter_enclave = my_wrapper;
	return enter_enclave(...);
}


I don't have a strong opinion on #4.  It seems superfluous, but if the
parameter is buried at the end of the prototype then it can be completely
ignored by runtimes that don't utilize a handler.
Jarkko Sakkinen March 16, 2020, 9:27 p.m. UTC | #24
On Mon, 2020-03-16 at 09:57 -0400, Nathaniel McCallum wrote:
> For the vDSO, only marginally. I'm counting +4,-2 instructions in my
> suggestions. For the wrapper, things become significantly simpler.

Simpler is not a quality that has very high importance here except
when it comes to vDSO.

At least it is not enough to change to vDSO. What else?

Anyway, I think the documentation should fixed and streamlined 1st.

It is way too verbose prose in some places and in some it completely
lacks the information e.g.

"Debug Exceptions (#DB) and Breakpoints (#BP) are ever fixed up and are
always delivered via standard signals."

Never should state things like that without explaining the reasons.

On the other hand:

"Most exceptions reported on ENCLU, including those that occur within
the enclave, are fixed up and reported synchronously instead of being
delivered via a standard signal. Debug Exceptions (#DB) and Breakpoints
(#BP) are never fixed up and are always delivered via standard signals.
On synchrously reported exceptions, -EFAULT is returned and details
about the exception are recorded in @e, the optional
sgx_enclave_exception struct."

Duplicates information already elsewhere (e.g. return values) and is
just pain to read and comprehend in general.

/Jarkko
Jarkko Sakkinen March 16, 2020, 9:29 p.m. UTC | #25
On Mon, 2020-03-16 at 23:27 +0200, Jarkko Sakkinen wrote:
> On Mon, 2020-03-16 at 09:57 -0400, Nathaniel McCallum wrote:
> > For the vDSO, only marginally. I'm counting +4,-2 instructions in my
> > suggestions. For the wrapper, things become significantly simpler.
> 
> Simpler is not a quality that has very high importance here except
> when it comes to vDSO.
> 
> At least it is not enough to change to vDSO. What else?
                                      ~~
                                      the

In any case, where I stand is that the vDSO implementation itself is
exactly how it should be. The process to get it to this form was
tedious. Now we have a form that the known userbase can live with.

The documentation sucks, agreed. I think by fixing that this would
be a wholelot better.

/Jarkko
Jarkko Sakkinen March 16, 2020, 9:38 p.m. UTC | #26
On Mon, 2020-03-16 at 10:01 -0400, Nathaniel McCallum wrote:
> On Mon, Mar 16, 2020 at 9:56 AM Jarkko Sakkinen
> <jarkko.sakkinen@linux.intel.com> wrote:
> > On Sun, 2020-03-15 at 13:53 -0400, Nathaniel McCallum wrote:
> > > On Sat, Mar 14, 2020 at 9:25 PM Jarkko Sakkinen
> > > <jarkko.sakkinen@linux.intel.com> wrote:
> > > > On Wed, Mar 11, 2020 at 01:30:07PM -0400, Nathaniel McCallum wrote:
> > > > > Currently, the selftest has a wrapper around
> > > > > __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
> > > > > registers (CSRs), though it uses none of them. Then it calls this
> > > > > function which uses %rbx but preserves none of the CSRs. Then it jumps
> > > > > into an enclave which zeroes all these registers before returning.
> > > > > Thus:
> > > > > 
> > > > >   1. wrapper saves all CSRs
> > > > >   2. wrapper repositions stack arguments
> > > > >   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
> > > > >   4. selftest zeros all CSRs
> > > > >   5. wrapper loads all CSRs
> > > > > 
> > > > > I'd like to propose instead that the enclave be responsible for saving
> > > > > and restoring CSRs. So instead of the above we have:
> > > > >   1. __vdso_sgx_enter_enclave() saves %rbx
> > > > >   2. enclave saves CSRs
> > > > >   3. enclave loads CSRs
> > > > >   4. __vdso_sgx_enter_enclave() loads %rbx
> > > > > 
> > > > > I know that lots of other stuff happens during enclave transitions,
> > > > > but at the very least we could reduce the number of instructions
> > > > > through this critical path.
> > > > 
> > > > What Jethro said and also that it is a good general principle to cut
> > > > down the semantics of any vdso as minimal as possible.
> > > > 
> > > > I.e. even if saving RBX would make somehow sense it *can* be left
> > > > out without loss in terms of what can be done with the vDSO.
> > > 
> > > Please read the rest of the thread. Sean and I have hammered out some
> > > sensible and effective changes.
> > 
> > Have skimmed through that discussion but it comes down how much you get
> > by obviously degrading some of the robustness. Complexity of the calling
> > pattern is not something that should be emphasized as that is something
> > that is anyway hidden inside the runtime.
> 
> My suggestions explicitly maintained robustness, and in fact increased
> it. If you think we've lost capability, please speak with specificity
> rather than in vague generalities. Under my suggestions we can:
> 1. call the vDSO from C
> 2. pass context to the handler
> 3. have additional stack manipulation options in the handler
> 
> The cost for this is a net 2 additional instructions. No existing
> capability is lost.

My vague generality in this case is just that the whole design
approach so far has been to minimize the amount of wrapping to
EENTER. And since this has been kind of agreed by most of the
stakeholders doing something against the chosen strategy is
something I do hold some resistance.

I get the idea technically what you are suggesting. Please
understand these are orthogonal axes that I have to care about.

In coummunity sense, it opens a possibility to unknown unknowns [1].

[1] https://www.youtube.com/watch?v=GiPe1OiKQuk

/Jarkko
Sean Christopherson March 16, 2020, 10:53 p.m. UTC | #27
On Mon, Mar 16, 2020 at 11:38:24PM +0200, Jarkko Sakkinen wrote:
> On Mon, 2020-03-16 at 10:01 -0400, Nathaniel McCallum wrote:
> > On Mon, Mar 16, 2020 at 9:56 AM Jarkko Sakkinen
> > <jarkko.sakkinen@linux.intel.com> wrote:
> > > On Sun, 2020-03-15 at 13:53 -0400, Nathaniel McCallum wrote:
> > > > On Sat, Mar 14, 2020 at 9:25 PM Jarkko Sakkinen
> > > > <jarkko.sakkinen@linux.intel.com> wrote:
> > > > > On Wed, Mar 11, 2020 at 01:30:07PM -0400, Nathaniel McCallum wrote:
> > > > > > Currently, the selftest has a wrapper around
> > > > > > __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
> > > > > > registers (CSRs), though it uses none of them. Then it calls this
> > > > > > function which uses %rbx but preserves none of the CSRs. Then it jumps
> > > > > > into an enclave which zeroes all these registers before returning.
> > > > > > Thus:
> > > > > > 
> > > > > >   1. wrapper saves all CSRs
> > > > > >   2. wrapper repositions stack arguments
> > > > > >   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
> > > > > >   4. selftest zeros all CSRs
> > > > > >   5. wrapper loads all CSRs
> > > > > > 
> > > > > > I'd like to propose instead that the enclave be responsible for saving
> > > > > > and restoring CSRs. So instead of the above we have:
> > > > > >   1. __vdso_sgx_enter_enclave() saves %rbx
> > > > > >   2. enclave saves CSRs
> > > > > >   3. enclave loads CSRs
> > > > > >   4. __vdso_sgx_enter_enclave() loads %rbx
> > > > > > 
> > > > > > I know that lots of other stuff happens during enclave transitions,
> > > > > > but at the very least we could reduce the number of instructions
> > > > > > through this critical path.
> > > > > 
> > > > > What Jethro said and also that it is a good general principle to cut
> > > > > down the semantics of any vdso as minimal as possible.
> > > > > 
> > > > > I.e. even if saving RBX would make somehow sense it *can* be left
> > > > > out without loss in terms of what can be done with the vDSO.
> > > > 
> > > > Please read the rest of the thread. Sean and I have hammered out some
> > > > sensible and effective changes.
> > > 
> > > Have skimmed through that discussion but it comes down how much you get
> > > by obviously degrading some of the robustness. Complexity of the calling
> > > pattern is not something that should be emphasized as that is something
> > > that is anyway hidden inside the runtime.
> > 
> > My suggestions explicitly maintained robustness, and in fact increased
> > it. If you think we've lost capability, please speak with specificity
> > rather than in vague generalities. Under my suggestions we can:
> > 1. call the vDSO from C
> > 2. pass context to the handler
> > 3. have additional stack manipulation options in the handler
> > 
> > The cost for this is a net 2 additional instructions. No existing
> > capability is lost.
> 
> My vague generality in this case is just that the whole design
> approach so far has been to minimize the amount of wrapping to
> EENTER.

Yes and no.   If we wanted to minimize the amount of wrapping around the
vDSO's ENCLU then we wouldn't have the exit handler shenanigans in the
first place.  The whole process has been about balancing the wants of each
use case against the overall quality of the API and code.

> And since this has been kind of agreed by most of the
> stakeholders doing something against the chosen strategy is
> something I do hold some resistance.

Up until Nathaniel joined the party, the only stakeholder in terms of the
exit handler was the Intel SDK.  There was a general consensus to pass
registers as-is when there isn't a strong reason to do otherwise.  Note
that Nathaniel has also expressed approval of that approach.

So I think the question that needs to be answered is whether the benefits
of using %rcx instead of %rax to pass @leaf justify the "pass registers
as-is" guideline.  We've effectively already given this waiver for %rbx,
as the whole reason why the TCS is passed in on the stack instead of via
%rbx is so that it can be passed to the exit handler.  E.g. the vDSO
could take the TCS in %rbx and save it on the stack, but we're throwing
the baby out with the bathwater at that point.

The major benefits being that the vDSO would be callable from C and that
the kernel could define a legitimate prototype instead of a frankenstein
prototype that's half assembly and half C.  For me, those are significant
benefits and well worth the extra MOV, PUSH and POP.  For some use cases
it would eliminate the need for an assembly wrapper.  For runtimes that
need an assembly wrapper for whatever reason, it's probably still a win as
a well designed runtime can avoid register shuffling in the wrapper.  And
if there is a runtime that isn't covered by the above, it's at worst an
extra MOV.
Sean Christopherson March 16, 2020, 10:55 p.m. UTC | #28
On Mon, Mar 16, 2020 at 02:31:36PM +0100, Jethro Beekman wrote:
> Can someone remind me why we're not passing TCS in RBX but on the stack?

I finally remembered why.  It's pulled off the stack and passed into the
exit handler.  I'm pretty sure the vDSO could take it in %rbx and manually
save it on the stack, but I'd rather keep the current behavior so that the
vDSO is callable from C (assuming @leaf is changed to be passed via %rcx).
Xing, Cedric March 16, 2020, 11:50 p.m. UTC | #29
On 3/16/2020 3:53 PM, Sean Christopherson wrote:
> On Mon, Mar 16, 2020 at 11:38:24PM +0200, Jarkko Sakkinen wrote:
>> On Mon, 2020-03-16 at 10:01 -0400, Nathaniel McCallum wrote:
>>> On Mon, Mar 16, 2020 at 9:56 AM Jarkko Sakkinen
>>> <jarkko.sakkinen@linux.intel.com> wrote:
>>>> On Sun, 2020-03-15 at 13:53 -0400, Nathaniel McCallum wrote:
>>>>> On Sat, Mar 14, 2020 at 9:25 PM Jarkko Sakkinen
>>>>> <jarkko.sakkinen@linux.intel.com> wrote:
>>>>>> On Wed, Mar 11, 2020 at 01:30:07PM -0400, Nathaniel McCallum wrote:
>>>>>>> Currently, the selftest has a wrapper around
>>>>>>> __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
>>>>>>> registers (CSRs), though it uses none of them. Then it calls this
>>>>>>> function which uses %rbx but preserves none of the CSRs. Then it jumps
>>>>>>> into an enclave which zeroes all these registers before returning.
>>>>>>> Thus:
>>>>>>>
>>>>>>>    1. wrapper saves all CSRs
>>>>>>>    2. wrapper repositions stack arguments
>>>>>>>    3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
>>>>>>>    4. selftest zeros all CSRs
>>>>>>>    5. wrapper loads all CSRs
>>>>>>>
>>>>>>> I'd like to propose instead that the enclave be responsible for saving
>>>>>>> and restoring CSRs. So instead of the above we have:
>>>>>>>    1. __vdso_sgx_enter_enclave() saves %rbx
>>>>>>>    2. enclave saves CSRs
>>>>>>>    3. enclave loads CSRs
>>>>>>>    4. __vdso_sgx_enter_enclave() loads %rbx
>>>>>>>
>>>>>>> I know that lots of other stuff happens during enclave transitions,
>>>>>>> but at the very least we could reduce the number of instructions
>>>>>>> through this critical path.
>>>>>>
>>>>>> What Jethro said and also that it is a good general principle to cut
>>>>>> down the semantics of any vdso as minimal as possible.
>>>>>>
>>>>>> I.e. even if saving RBX would make somehow sense it *can* be left
>>>>>> out without loss in terms of what can be done with the vDSO.
>>>>>
>>>>> Please read the rest of the thread. Sean and I have hammered out some
>>>>> sensible and effective changes.
>>>>
>>>> Have skimmed through that discussion but it comes down how much you get
>>>> by obviously degrading some of the robustness. Complexity of the calling
>>>> pattern is not something that should be emphasized as that is something
>>>> that is anyway hidden inside the runtime.
>>>
>>> My suggestions explicitly maintained robustness, and in fact increased
>>> it. If you think we've lost capability, please speak with specificity
>>> rather than in vague generalities. Under my suggestions we can:
>>> 1. call the vDSO from C
>>> 2. pass context to the handler
>>> 3. have additional stack manipulation options in the handler
>>>
>>> The cost for this is a net 2 additional instructions. No existing
>>> capability is lost.
>>
>> My vague generality in this case is just that the whole design
>> approach so far has been to minimize the amount of wrapping to
>> EENTER.
> 
> Yes and no.   If we wanted to minimize the amount of wrapping around the
> vDSO's ENCLU then we wouldn't have the exit handler shenanigans in the
> first place.  The whole process has been about balancing the wants of each
> use case against the overall quality of the API and code.
> 
The design of this vDSO API was NOT to minimize wrapping, but to allow 
maximal flexibility. More specifically, we strove not to restrict how 
info was exchanged between the enclave and its host process. After all, 
calling convention is compiler specific - i.e. the enclave could be 
built by a different compiler (e.g. MSVC) that doesn't share the same 
list of CSRs as the host process. Therefore, the API has been 
implemented to pass through virtually all registers except those used by 
EENTER itself. Similarly, all registers are passed back from enclave to 
the caller (or the exit handler) except those used by EEXIT. %rbp is an 
exception because the vDSO API has to anchor the stack, using either 
%rsp or %rbp. We picked %rbp to allow the enclave to allocate space on 
the stack.
Xing, Cedric March 16, 2020, 11:56 p.m. UTC | #30
On 3/16/2020 3:55 PM, Sean Christopherson wrote:
> On Mon, Mar 16, 2020 at 02:31:36PM +0100, Jethro Beekman wrote:
>> Can someone remind me why we're not passing TCS in RBX but on the stack?
> 
> I finally remembered why.  It's pulled off the stack and passed into the
> exit handler.  I'm pretty sure the vDSO could take it in %rbx and manually
> save it on the stack, but I'd rather keep the current behavior so that the
> vDSO is callable from C (assuming @leaf is changed to be passed via %rcx).
> 
The idea is that the caller of this vDSO API is C callable, hence it 
cannot receive TCS in %rbx anyway. Then it has to either MOV to %rbx or 
PUSH to stack. Either way the complexity is the same. The vDSO API 
however has to always save it on stack for exit handler. So receiving it 
via stack ends up in simplest code.
Sean Christopherson March 16, 2020, 11:59 p.m. UTC | #31
On Mon, Mar 16, 2020 at 04:50:26PM -0700, Xing, Cedric wrote:
> On 3/16/2020 3:53 PM, Sean Christopherson wrote:
> >On Mon, Mar 16, 2020 at 11:38:24PM +0200, Jarkko Sakkinen wrote:
> >>>My suggestions explicitly maintained robustness, and in fact increased
> >>>it. If you think we've lost capability, please speak with specificity
> >>>rather than in vague generalities. Under my suggestions we can:
> >>>1. call the vDSO from C
> >>>2. pass context to the handler
> >>>3. have additional stack manipulation options in the handler
> >>>
> >>>The cost for this is a net 2 additional instructions. No existing
> >>>capability is lost.
> >>
> >>My vague generality in this case is just that the whole design
> >>approach so far has been to minimize the amount of wrapping to
> >>EENTER.
> >
> >Yes and no.   If we wanted to minimize the amount of wrapping around the
> >vDSO's ENCLU then we wouldn't have the exit handler shenanigans in the
> >first place.  The whole process has been about balancing the wants of each
> >use case against the overall quality of the API and code.
> >
> The design of this vDSO API was NOT to minimize wrapping, but to allow
> maximal flexibility. More specifically, we strove not to restrict how info
> was exchanged between the enclave and its host process. After all, calling
> convention is compiler specific - i.e. the enclave could be built by a
> different compiler (e.g. MSVC) that doesn't share the same list of CSRs as
> the host process. Therefore, the API has been implemented to pass through
> virtually all registers except those used by EENTER itself. Similarly, all
> registers are passed back from enclave to the caller (or the exit handler)
> except those used by EEXIT. %rbp is an exception because the vDSO API has to
> anchor the stack, using either %rsp or %rbp. We picked %rbp to allow the
> enclave to allocate space on the stack.

And unless I'm missing something, using %rcx to pass @leaf would still
satisfy the above, correct?  Ditto for saving/restoring %rbx.

I.e. a runtime that's designed to work with enclave's using a different
calling convention wouldn't be able to take advantage of being able to call
the vDSO from C, but neither would it take on any meaningful burden.
Xing, Cedric March 17, 2020, 12:18 a.m. UTC | #32
On 3/16/2020 4:59 PM, Sean Christopherson wrote:
> On Mon, Mar 16, 2020 at 04:50:26PM -0700, Xing, Cedric wrote:
>> On 3/16/2020 3:53 PM, Sean Christopherson wrote:
>>> On Mon, Mar 16, 2020 at 11:38:24PM +0200, Jarkko Sakkinen wrote:
>>>>> My suggestions explicitly maintained robustness, and in fact increased
>>>>> it. If you think we've lost capability, please speak with specificity
>>>>> rather than in vague generalities. Under my suggestions we can:
>>>>> 1. call the vDSO from C
>>>>> 2. pass context to the handler
>>>>> 3. have additional stack manipulation options in the handler
>>>>>
>>>>> The cost for this is a net 2 additional instructions. No existing
>>>>> capability is lost.
>>>>
>>>> My vague generality in this case is just that the whole design
>>>> approach so far has been to minimize the amount of wrapping to
>>>> EENTER.
>>>
>>> Yes and no.   If we wanted to minimize the amount of wrapping around the
>>> vDSO's ENCLU then we wouldn't have the exit handler shenanigans in the
>>> first place.  The whole process has been about balancing the wants of each
>>> use case against the overall quality of the API and code.
>>>
>> The design of this vDSO API was NOT to minimize wrapping, but to allow
>> maximal flexibility. More specifically, we strove not to restrict how info
>> was exchanged between the enclave and its host process. After all, calling
>> convention is compiler specific - i.e. the enclave could be built by a
>> different compiler (e.g. MSVC) that doesn't share the same list of CSRs as
>> the host process. Therefore, the API has been implemented to pass through
>> virtually all registers except those used by EENTER itself. Similarly, all
>> registers are passed back from enclave to the caller (or the exit handler)
>> except those used by EEXIT. %rbp is an exception because the vDSO API has to
>> anchor the stack, using either %rsp or %rbp. We picked %rbp to allow the
>> enclave to allocate space on the stack.
> 
> And unless I'm missing something, using %rcx to pass @leaf would still
> satisfy the above, correct?  Ditto for saving/restoring %rbx.
> 
> I.e. a runtime that's designed to work with enclave's using a different
> calling convention wouldn't be able to take advantage of being able to call
> the vDSO from C, but neither would it take on any meaningful burden.
> 
Not exactly.

If called directly from C code, the caller would expect CSRs to be 
preserved. Then who should preserve CSRs? It can't be the enclave 
because it may not follow the same calling convention. Moreover, the 
enclave may run into an exception, in which case it doesn't have the 
ability to restore CSRs. So it has to be done by the vDSO API. That 
means CSRs will be overwritten upon enclave exits, which violates the 
goal of "passing all registers back to the caller except those used by 
EEXIT".
Sean Christopherson March 17, 2020, 12:27 a.m. UTC | #33
On Mon, Mar 16, 2020 at 05:18:14PM -0700, Xing, Cedric wrote:
> On 3/16/2020 4:59 PM, Sean Christopherson wrote:
> >On Mon, Mar 16, 2020 at 04:50:26PM -0700, Xing, Cedric wrote:
> >>On 3/16/2020 3:53 PM, Sean Christopherson wrote:
> >>>On Mon, Mar 16, 2020 at 11:38:24PM +0200, Jarkko Sakkinen wrote:
> >>>>>My suggestions explicitly maintained robustness, and in fact increased
> >>>>>it. If you think we've lost capability, please speak with specificity
> >>>>>rather than in vague generalities. Under my suggestions we can:
> >>>>>1. call the vDSO from C
> >>>>>2. pass context to the handler
> >>>>>3. have additional stack manipulation options in the handler
> >>>>>
> >>>>>The cost for this is a net 2 additional instructions. No existing
> >>>>>capability is lost.
> >>>>
> >>>>My vague generality in this case is just that the whole design
> >>>>approach so far has been to minimize the amount of wrapping to
> >>>>EENTER.
> >>>
> >>>Yes and no.   If we wanted to minimize the amount of wrapping around the
> >>>vDSO's ENCLU then we wouldn't have the exit handler shenanigans in the
> >>>first place.  The whole process has been about balancing the wants of each
> >>>use case against the overall quality of the API and code.
> >>>
> >>The design of this vDSO API was NOT to minimize wrapping, but to allow
> >>maximal flexibility. More specifically, we strove not to restrict how info
> >>was exchanged between the enclave and its host process. After all, calling
> >>convention is compiler specific - i.e. the enclave could be built by a
> >>different compiler (e.g. MSVC) that doesn't share the same list of CSRs as
> >>the host process. Therefore, the API has been implemented to pass through
> >>virtually all registers except those used by EENTER itself. Similarly, all
> >>registers are passed back from enclave to the caller (or the exit handler)
> >>except those used by EEXIT. %rbp is an exception because the vDSO API has to
> >>anchor the stack, using either %rsp or %rbp. We picked %rbp to allow the
> >>enclave to allocate space on the stack.
> >
> >And unless I'm missing something, using %rcx to pass @leaf would still
> >satisfy the above, correct?  Ditto for saving/restoring %rbx.
> >
> >I.e. a runtime that's designed to work with enclave's using a different
> >calling convention wouldn't be able to take advantage of being able to call
> >the vDSO from C, but neither would it take on any meaningful burden.
> >
> Not exactly.
> 
> If called directly from C code, the caller would expect CSRs to be
> preserved. Then who should preserve CSRs? It can't be the enclave because it
> may not follow the same calling convention. Moreover, the enclave may run
> into an exception, in which case it doesn't have the ability to restore
> CSRs. So it has to be done by the vDSO API. That means CSRs will be
> overwritten upon enclave exits, which violates the goal of "passing all
> registers back to the caller except those used by EEXIT".

IIUC, Nathaniel's use case is to run only enclaves that are compatible
with Linux's calling convention and to handle enclave exceptions in the
exit handler.

As I qualified above, there would certainly be runtimes and use cases that
would find no advantage in passing @leaf via %rcx and preserving %rbx.  I'm
well aware the Intel SDK falls into that bucket.  But again, the cost to
such runtimes is precisely one reg->reg MOV instruction.
Nathaniel McCallum March 17, 2020, 4:28 p.m. UTC | #34
On Mon, Mar 16, 2020 at 6:53 PM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Mon, Mar 16, 2020 at 11:38:24PM +0200, Jarkko Sakkinen wrote:
> > On Mon, 2020-03-16 at 10:01 -0400, Nathaniel McCallum wrote:
> > > On Mon, Mar 16, 2020 at 9:56 AM Jarkko Sakkinen
> > > <jarkko.sakkinen@linux.intel.com> wrote:
> > > > On Sun, 2020-03-15 at 13:53 -0400, Nathaniel McCallum wrote:
> > > > > On Sat, Mar 14, 2020 at 9:25 PM Jarkko Sakkinen
> > > > > <jarkko.sakkinen@linux.intel.com> wrote:
> > > > > > On Wed, Mar 11, 2020 at 01:30:07PM -0400, Nathaniel McCallum wrote:
> > > > > > > Currently, the selftest has a wrapper around
> > > > > > > __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
> > > > > > > registers (CSRs), though it uses none of them. Then it calls this
> > > > > > > function which uses %rbx but preserves none of the CSRs. Then it jumps
> > > > > > > into an enclave which zeroes all these registers before returning.
> > > > > > > Thus:
> > > > > > >
> > > > > > >   1. wrapper saves all CSRs
> > > > > > >   2. wrapper repositions stack arguments
> > > > > > >   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
> > > > > > >   4. selftest zeros all CSRs
> > > > > > >   5. wrapper loads all CSRs
> > > > > > >
> > > > > > > I'd like to propose instead that the enclave be responsible for saving
> > > > > > > and restoring CSRs. So instead of the above we have:
> > > > > > >   1. __vdso_sgx_enter_enclave() saves %rbx
> > > > > > >   2. enclave saves CSRs
> > > > > > >   3. enclave loads CSRs
> > > > > > >   4. __vdso_sgx_enter_enclave() loads %rbx
> > > > > > >
> > > > > > > I know that lots of other stuff happens during enclave transitions,
> > > > > > > but at the very least we could reduce the number of instructions
> > > > > > > through this critical path.
> > > > > >
> > > > > > What Jethro said and also that it is a good general principle to cut
> > > > > > down the semantics of any vdso as minimal as possible.
> > > > > >
> > > > > > I.e. even if saving RBX would make somehow sense it *can* be left
> > > > > > out without loss in terms of what can be done with the vDSO.
> > > > >
> > > > > Please read the rest of the thread. Sean and I have hammered out some
> > > > > sensible and effective changes.
> > > >
> > > > Have skimmed through that discussion but it comes down how much you get
> > > > by obviously degrading some of the robustness. Complexity of the calling
> > > > pattern is not something that should be emphasized as that is something
> > > > that is anyway hidden inside the runtime.
> > >
> > > My suggestions explicitly maintained robustness, and in fact increased
> > > it. If you think we've lost capability, please speak with specificity
> > > rather than in vague generalities. Under my suggestions we can:
> > > 1. call the vDSO from C
> > > 2. pass context to the handler
> > > 3. have additional stack manipulation options in the handler
> > >
> > > The cost for this is a net 2 additional instructions. No existing
> > > capability is lost.
> >
> > My vague generality in this case is just that the whole design
> > approach so far has been to minimize the amount of wrapping to
> > EENTER.
>
> Yes and no.   If we wanted to minimize the amount of wrapping around the
> vDSO's ENCLU then we wouldn't have the exit handler shenanigans in the
> first place.  The whole process has been about balancing the wants of each
> use case against the overall quality of the API and code.
>
> > And since this has been kind of agreed by most of the
> > stakeholders doing something against the chosen strategy is
> > something I do hold some resistance.
>
> Up until Nathaniel joined the party, the only stakeholder in terms of the
> exit handler was the Intel SDK.

I would hope that having additional stakeholders would ease the path
to adoption.

> There was a general consensus to pass
> registers as-is when there isn't a strong reason to do otherwise.  Note
> that Nathaniel has also expressed approval of that approach.

I still approve that approach.

> So I think the question that needs to be answered is whether the benefits
> of using %rcx instead of %rax to pass @leaf justify the "pass registers
> as-is" guideline.  We've effectively already given this waiver for %rbx,
> as the whole reason why the TCS is passed in on the stack instead of via
> %rbx is so that it can be passed to the exit handler.  E.g. the vDSO
> could take the TCS in %rbx and save it on the stack, but we're throwing
> the baby out with the bathwater at that point.
>
> The major benefits being that the vDSO would be callable from C and that
> the kernel could define a legitimate prototype instead of a frankenstein
> prototype that's half assembly and half C.  For me, those are significant
> benefits and well worth the extra MOV, PUSH and POP.  For some use cases
> it would eliminate the need for an assembly wrapper.  For runtimes that
> need an assembly wrapper for whatever reason, it's probably still a win as
> a well designed runtime can avoid register shuffling in the wrapper.  And
> if there is a runtime that isn't covered by the above, it's at worst an
> extra MOV.
>
Nathaniel McCallum March 17, 2020, 4:37 p.m. UTC | #35
On Mon, Mar 16, 2020 at 8:27 PM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Mon, Mar 16, 2020 at 05:18:14PM -0700, Xing, Cedric wrote:
> > On 3/16/2020 4:59 PM, Sean Christopherson wrote:
> > >On Mon, Mar 16, 2020 at 04:50:26PM -0700, Xing, Cedric wrote:
> > >>On 3/16/2020 3:53 PM, Sean Christopherson wrote:
> > >>>On Mon, Mar 16, 2020 at 11:38:24PM +0200, Jarkko Sakkinen wrote:
> > >>>>>My suggestions explicitly maintained robustness, and in fact increased
> > >>>>>it. If you think we've lost capability, please speak with specificity
> > >>>>>rather than in vague generalities. Under my suggestions we can:
> > >>>>>1. call the vDSO from C
> > >>>>>2. pass context to the handler
> > >>>>>3. have additional stack manipulation options in the handler
> > >>>>>
> > >>>>>The cost for this is a net 2 additional instructions. No existing
> > >>>>>capability is lost.
> > >>>>
> > >>>>My vague generality in this case is just that the whole design
> > >>>>approach so far has been to minimize the amount of wrapping to
> > >>>>EENTER.
> > >>>
> > >>>Yes and no.   If we wanted to minimize the amount of wrapping around the
> > >>>vDSO's ENCLU then we wouldn't have the exit handler shenanigans in the
> > >>>first place.  The whole process has been about balancing the wants of each
> > >>>use case against the overall quality of the API and code.
> > >>>
> > >>The design of this vDSO API was NOT to minimize wrapping, but to allow
> > >>maximal flexibility. More specifically, we strove not to restrict how info
> > >>was exchanged between the enclave and its host process. After all, calling
> > >>convention is compiler specific - i.e. the enclave could be built by a
> > >>different compiler (e.g. MSVC) that doesn't share the same list of CSRs as
> > >>the host process. Therefore, the API has been implemented to pass through
> > >>virtually all registers except those used by EENTER itself. Similarly, all
> > >>registers are passed back from enclave to the caller (or the exit handler)
> > >>except those used by EEXIT. %rbp is an exception because the vDSO API has to
> > >>anchor the stack, using either %rsp or %rbp. We picked %rbp to allow the
> > >>enclave to allocate space on the stack.
> > >
> > >And unless I'm missing something, using %rcx to pass @leaf would still
> > >satisfy the above, correct?  Ditto for saving/restoring %rbx.
> > >
> > >I.e. a runtime that's designed to work with enclave's using a different
> > >calling convention wouldn't be able to take advantage of being able to call
> > >the vDSO from C, but neither would it take on any meaningful burden.
> > >
> > Not exactly.
> >
> > If called directly from C code, the caller would expect CSRs to be
> > preserved. Then who should preserve CSRs? It can't be the enclave because it
> > may not follow the same calling convention. Moreover, the enclave may run
> > into an exception, in which case it doesn't have the ability to restore
> > CSRs. So it has to be done by the vDSO API. That means CSRs will be
> > overwritten upon enclave exits, which violates the goal of "passing all
> > registers back to the caller except those used by EEXIT".
>
> IIUC, Nathaniel's use case is to run only enclaves that are compatible
> with Linux's calling convention and to handle enclave exceptions in the
> exit handler.
>
> As I qualified above, there would certainly be runtimes and use cases that
> would find no advantage in passing @leaf via %rcx and preserving %rbx.  I'm
> well aware the Intel SDK falls into that bucket.  But again, the cost to
> such runtimes is precisely one reg->reg MOV instruction.

It seems to me that some think my proposal represents a shift in
strategic direction. I do not see it that way. I affirm the existing
strategic direction. My proposal only represents a specific
optimization of that strategic direction that benefits certain use
cases without significant cost to all other use cases.
Nathaniel McCallum March 17, 2020, 4:50 p.m. UTC | #36
On Mon, Mar 16, 2020 at 8:18 PM Xing, Cedric <cedric.xing@intel.com> wrote:
>
> On 3/16/2020 4:59 PM, Sean Christopherson wrote:
> > On Mon, Mar 16, 2020 at 04:50:26PM -0700, Xing, Cedric wrote:
> >> On 3/16/2020 3:53 PM, Sean Christopherson wrote:
> >>> On Mon, Mar 16, 2020 at 11:38:24PM +0200, Jarkko Sakkinen wrote:
> >>>>> My suggestions explicitly maintained robustness, and in fact increased
> >>>>> it. If you think we've lost capability, please speak with specificity
> >>>>> rather than in vague generalities. Under my suggestions we can:
> >>>>> 1. call the vDSO from C
> >>>>> 2. pass context to the handler
> >>>>> 3. have additional stack manipulation options in the handler
> >>>>>
> >>>>> The cost for this is a net 2 additional instructions. No existing
> >>>>> capability is lost.
> >>>>
> >>>> My vague generality in this case is just that the whole design
> >>>> approach so far has been to minimize the amount of wrapping to
> >>>> EENTER.
> >>>
> >>> Yes and no.   If we wanted to minimize the amount of wrapping around the
> >>> vDSO's ENCLU then we wouldn't have the exit handler shenanigans in the
> >>> first place.  The whole process has been about balancing the wants of each
> >>> use case against the overall quality of the API and code.
> >>>
> >> The design of this vDSO API was NOT to minimize wrapping, but to allow
> >> maximal flexibility. More specifically, we strove not to restrict how info
> >> was exchanged between the enclave and its host process. After all, calling
> >> convention is compiler specific - i.e. the enclave could be built by a
> >> different compiler (e.g. MSVC) that doesn't share the same list of CSRs as
> >> the host process. Therefore, the API has been implemented to pass through
> >> virtually all registers except those used by EENTER itself. Similarly, all
> >> registers are passed back from enclave to the caller (or the exit handler)
> >> except those used by EEXIT. %rbp is an exception because the vDSO API has to
> >> anchor the stack, using either %rsp or %rbp. We picked %rbp to allow the
> >> enclave to allocate space on the stack.
> >
> > And unless I'm missing something, using %rcx to pass @leaf would still
> > satisfy the above, correct?  Ditto for saving/restoring %rbx.
> >
> > I.e. a runtime that's designed to work with enclave's using a different
> > calling convention wouldn't be able to take advantage of being able to call
> > the vDSO from C, but neither would it take on any meaningful burden.
> >
> Not exactly.
>
> If called directly from C code, the caller would expect CSRs to be
> preserved.

Correct. This requires collaboration between the caller of the vDSO
and the enclave.

> Then who should preserve CSRs?

The enclave.

> It can't be the enclave
> because it may not follow the same calling convention.

This is incorrect. You are presuming there is not tight integration
between the caller of the vDSO and the enclave. In my case, the
integration is total and complete. We have working code today that
does this.

> Moreover, the
> enclave may run into an exception, in which case it doesn't have the
> ability to restore CSRs.

There are two solutions to this:
1. Write the handler in assembly and don't return to C on AEX.
2. The caller can simply preserve the registers. Nothing stops that.

We have implemented #1.

> So it has to be done by the vDSO API.

Nope. See above.

> That
> means CSRs will be overwritten upon enclave exits, which violates the
> goal of "passing all registers back to the caller except those used by
> EEXIT".

All registers get passed to the handler in this scenario, not the caller.

The approach is as follows: the vDSO is callable by C so long as the
enclave respects the ABI *OR* the handler patches up any enclave
deviation from the ABI.
Xing, Cedric March 17, 2020, 9:40 p.m. UTC | #37
Hi Nathaniel,

I reread your email today and thought I might have misunderstood your 
email earlier. What changes are you asking for exactly? Is that just 
passing @leaf in %ecx rather than in %eax? If so, I wouldn't have any 
problem. I agree with you that the resulted API would then be callable 
from C, even though it wouldn't be able to return back to C due to 
tampered %rbx. But I think the vDSO API can preserve %rbx too, given it 
is used by both EENTER and EEXIT (so is unavailable for parameter 
passing anyway). Alternatively, the C caller can setjmp() to be 
longjmp()'d back from within the exit handler.

-Cedric
Sean Christopherson March 17, 2020, 10:09 p.m. UTC | #38
On Tue, Mar 17, 2020 at 02:40:34PM -0700, Xing, Cedric wrote:
> Hi Nathaniel,
> 
> I reread your email today and thought I might have misunderstood your email
> earlier. What changes are you asking for exactly? Is that just passing @leaf
> in %ecx rather than in %eax? If so, I wouldn't have any problem. I agree
> with you that the resulted API would then be callable from C, even though it
> wouldn't be able to return back to C due to tampered %rbx. But I think the
> vDSO API can preserve %rbx too, given it is used by both EENTER and EEXIT
> (so is unavailable for parameter passing anyway). Alternatively, the C
> caller can setjmp() to be longjmp()'d back from within the exit handler.

Yep, exactly.  The other proposed change that is fairly straightforward is
to make the save/restore of %rsp across the exit handler call relative
instead of absolute, i.e. allow the exit handler to modify %rsp.  I don't
think this would conflict with the Intel SDK usage model?

diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
index 94a8e5f99961..05d54f79b557 100644
--- a/arch/x86/entry/vdso/vsgx_enter_enclave.S
+++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
@@ -139,8 +139,9 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
        /* Pass the untrusted RSP (at exit) to the callback via %rcx. */
        mov     %rsp, %rcx

-       /* Save the untrusted RSP in %rbx (non-volatile register). */
+       /* Save the untrusted RSP offset in %rbx (non-volatile register). */
        mov     %rsp, %rbx
+       and     $0xf, %rbx

        /*
         * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned
@@ -161,8 +162,8 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
        mov     0x20(%rbp), %rax
        call    .Lretpoline

-       /* Restore %rsp to its post-exit value. */
-       mov     %rbx, %rsp
+       /* Undo the post-exit %rsp adjustment. */
+       lea     0x20(%rsp,%rbx), %rsp
Xing, Cedric March 17, 2020, 10:23 p.m. UTC | #39
On 3/17/2020 9:50 AM, Nathaniel McCallum wrote:
> On Mon, Mar 16, 2020 at 8:18 PM Xing, Cedric <cedric.xing@intel.com> wrote:
>>
>> On 3/16/2020 4:59 PM, Sean Christopherson wrote:
>>> On Mon, Mar 16, 2020 at 04:50:26PM -0700, Xing, Cedric wrote:
>>>> On 3/16/2020 3:53 PM, Sean Christopherson wrote:
>>>>> On Mon, Mar 16, 2020 at 11:38:24PM +0200, Jarkko Sakkinen wrote:
>>>>>>> My suggestions explicitly maintained robustness, and in fact increased
>>>>>>> it. If you think we've lost capability, please speak with specificity
>>>>>>> rather than in vague generalities. Under my suggestions we can:
>>>>>>> 1. call the vDSO from C
>>>>>>> 2. pass context to the handler
>>>>>>> 3. have additional stack manipulation options in the handler
>>>>>>>
>>>>>>> The cost for this is a net 2 additional instructions. No existing
>>>>>>> capability is lost.
>>>>>>
>>>>>> My vague generality in this case is just that the whole design
>>>>>> approach so far has been to minimize the amount of wrapping to
>>>>>> EENTER.
>>>>>
>>>>> Yes and no.   If we wanted to minimize the amount of wrapping around the
>>>>> vDSO's ENCLU then we wouldn't have the exit handler shenanigans in the
>>>>> first place.  The whole process has been about balancing the wants of each
>>>>> use case against the overall quality of the API and code.
>>>>>
>>>> The design of this vDSO API was NOT to minimize wrapping, but to allow
>>>> maximal flexibility. More specifically, we strove not to restrict how info
>>>> was exchanged between the enclave and its host process. After all, calling
>>>> convention is compiler specific - i.e. the enclave could be built by a
>>>> different compiler (e.g. MSVC) that doesn't share the same list of CSRs as
>>>> the host process. Therefore, the API has been implemented to pass through
>>>> virtually all registers except those used by EENTER itself. Similarly, all
>>>> registers are passed back from enclave to the caller (or the exit handler)
>>>> except those used by EEXIT. %rbp is an exception because the vDSO API has to
>>>> anchor the stack, using either %rsp or %rbp. We picked %rbp to allow the
>>>> enclave to allocate space on the stack.
>>>
>>> And unless I'm missing something, using %rcx to pass @leaf would still
>>> satisfy the above, correct?  Ditto for saving/restoring %rbx.
>>>
>>> I.e. a runtime that's designed to work with enclave's using a different
>>> calling convention wouldn't be able to take advantage of being able to call
>>> the vDSO from C, but neither would it take on any meaningful burden.
>>>
>> Not exactly.
>>
>> If called directly from C code, the caller would expect CSRs to be
>> preserved.
> 
> Correct. This requires collaboration between the caller of the vDSO
> and the enclave.
> 
>> Then who should preserve CSRs?
> 
> The enclave.
> 
>> It can't be the enclave
>> because it may not follow the same calling convention.
> 
> This is incorrect. You are presuming there is not tight integration
> between the caller of the vDSO and the enclave. In my case, the
> integration is total and complete. We have working code today that
> does this.
> 
>> Moreover, the
>> enclave may run into an exception, in which case it doesn't have the
>> ability to restore CSRs.
> 
> There are two solutions to this:
> 1. Write the handler in assembly and don't return to C on AEX.
> 2. The caller can simply preserve the registers. Nothing stops that.
> 
> We have implemented #1.
> 
What if the enclave cannot proceed due to an unhandled exception so the 
execution has to get back to the C caller of the vDSO API?

It seems to me the caller has to preserve CSRs by itself, otherwise it 
cannot continue execution after any enclave exception. Passing @leaf in 
%ecx will allow saving/restoring CSRs in C by setjmp()/longjmp(), with 
the help of an exit handler. But if the C caller has already preserved 
CSRs, why preserve CSRs again inside the enclave? It looks to me things 
can be simplified only if the host process handles no enclave exceptions 
(or exceptions inside the enclave will crash the calling thread). Thus 
the only case of enclave EEXIT'ing back to its caller is considered 
valid, hence the enclave will always be able to restore CSRs, so that 
neither vDSO nor its caller has to preserve CSRs.

Is my understanding correct?
Xing, Cedric March 17, 2020, 10:36 p.m. UTC | #40
On 3/17/2020 3:09 PM, Sean Christopherson wrote:
> On Tue, Mar 17, 2020 at 02:40:34PM -0700, Xing, Cedric wrote:
>> Hi Nathaniel,
>>
>> I reread your email today and thought I might have misunderstood your email
>> earlier. What changes are you asking for exactly? Is that just passing @leaf
>> in %ecx rather than in %eax? If so, I wouldn't have any problem. I agree
>> with you that the resulted API would then be callable from C, even though it
>> wouldn't be able to return back to C due to tampered %rbx. But I think the
>> vDSO API can preserve %rbx too, given it is used by both EENTER and EEXIT
>> (so is unavailable for parameter passing anyway). Alternatively, the C
>> caller can setjmp() to be longjmp()'d back from within the exit handler.
> 
> Yep, exactly.  The other proposed change that is fairly straightforward is
> to make the save/restore of %rsp across the exit handler call relative
> instead of absolute, i.e. allow the exit handler to modify %rsp.  I don't
> think this would conflict with the Intel SDK usage model?
> 
> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> index 94a8e5f99961..05d54f79b557 100644
> --- a/arch/x86/entry/vdso/vsgx_enter_enclave.S
> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> @@ -139,8 +139,9 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
>          /* Pass the untrusted RSP (at exit) to the callback via %rcx. */
>          mov     %rsp, %rcx
> 
> -       /* Save the untrusted RSP in %rbx (non-volatile register). */
> +       /* Save the untrusted RSP offset in %rbx (non-volatile register). */
>          mov     %rsp, %rbx
> +       and     $0xf, %rbx
> 
>          /*
>           * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned
> @@ -161,8 +162,8 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
>          mov     0x20(%rbp), %rax
>          call    .Lretpoline
> 
> -       /* Restore %rsp to its post-exit value. */
> -       mov     %rbx, %rsp
> +       /* Undo the post-exit %rsp adjustment. */
> +       lea     0x20(%rsp,%rbx), %rsp
> 
Yep. Though it looks a bit uncommon, I do think it will work.
Sean Christopherson March 17, 2020, 11:57 p.m. UTC | #41
On Tue, Mar 17, 2020 at 03:36:57PM -0700, Xing, Cedric wrote:
> On 3/17/2020 3:09 PM, Sean Christopherson wrote:
> >On Tue, Mar 17, 2020 at 02:40:34PM -0700, Xing, Cedric wrote:
> >>Hi Nathaniel,
> >>
> >>I reread your email today and thought I might have misunderstood your email
> >>earlier. What changes are you asking for exactly? Is that just passing @leaf
> >>in %ecx rather than in %eax? If so, I wouldn't have any problem. I agree
> >>with you that the resulted API would then be callable from C, even though it
> >>wouldn't be able to return back to C due to tampered %rbx. But I think the
> >>vDSO API can preserve %rbx too, given it is used by both EENTER and EEXIT
> >>(so is unavailable for parameter passing anyway). Alternatively, the C
> >>caller can setjmp() to be longjmp()'d back from within the exit handler.
> >
> >Yep, exactly.  The other proposed change that is fairly straightforward is
> >to make the save/restore of %rsp across the exit handler call relative
> >instead of absolute, i.e. allow the exit handler to modify %rsp.  I don't
> >think this would conflict with the Intel SDK usage model?
> >
> >diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >index 94a8e5f99961..05d54f79b557 100644
> >--- a/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >+++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >@@ -139,8 +139,9 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
> >         /* Pass the untrusted RSP (at exit) to the callback via %rcx. */
> >         mov     %rsp, %rcx
> >
> >-       /* Save the untrusted RSP in %rbx (non-volatile register). */
> >+       /* Save the untrusted RSP offset in %rbx (non-volatile register). */
> >         mov     %rsp, %rbx
> >+       and     $0xf, %rbx
> >
> >         /*
> >          * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned
> >@@ -161,8 +162,8 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
> >         mov     0x20(%rbp), %rax
> >         call    .Lretpoline
> >
> >-       /* Restore %rsp to its post-exit value. */
> >-       mov     %rbx, %rsp
> >+       /* Undo the post-exit %rsp adjustment. */
> >+       lea     0x20(%rsp,%rbx), %rsp
> >
> Yep. Though it looks a bit uncommon, I do think it will work.

Heh, I had about the same level of confidence.

I'll put together a set of patches tomorrow and post them to linux-sgx (and
cc relevant parties).  It'll be easier to continue the discussion with code
to look at and we can stop spamming LKML for a bit :-)
Nathaniel McCallum March 18, 2020, 1:01 p.m. UTC | #42
On Tue, Mar 17, 2020 at 6:23 PM Xing, Cedric <cedric.xing@intel.com> wrote:
>
> On 3/17/2020 9:50 AM, Nathaniel McCallum wrote:
> > On Mon, Mar 16, 2020 at 8:18 PM Xing, Cedric <cedric.xing@intel.com> wrote:
> >>
> >> On 3/16/2020 4:59 PM, Sean Christopherson wrote:
> >>> On Mon, Mar 16, 2020 at 04:50:26PM -0700, Xing, Cedric wrote:
> >>>> On 3/16/2020 3:53 PM, Sean Christopherson wrote:
> >>>>> On Mon, Mar 16, 2020 at 11:38:24PM +0200, Jarkko Sakkinen wrote:
> >>>>>>> My suggestions explicitly maintained robustness, and in fact increased
> >>>>>>> it. If you think we've lost capability, please speak with specificity
> >>>>>>> rather than in vague generalities. Under my suggestions we can:
> >>>>>>> 1. call the vDSO from C
> >>>>>>> 2. pass context to the handler
> >>>>>>> 3. have additional stack manipulation options in the handler
> >>>>>>>
> >>>>>>> The cost for this is a net 2 additional instructions. No existing
> >>>>>>> capability is lost.
> >>>>>>
> >>>>>> My vague generality in this case is just that the whole design
> >>>>>> approach so far has been to minimize the amount of wrapping to
> >>>>>> EENTER.
> >>>>>
> >>>>> Yes and no.   If we wanted to minimize the amount of wrapping around the
> >>>>> vDSO's ENCLU then we wouldn't have the exit handler shenanigans in the
> >>>>> first place.  The whole process has been about balancing the wants of each
> >>>>> use case against the overall quality of the API and code.
> >>>>>
> >>>> The design of this vDSO API was NOT to minimize wrapping, but to allow
> >>>> maximal flexibility. More specifically, we strove not to restrict how info
> >>>> was exchanged between the enclave and its host process. After all, calling
> >>>> convention is compiler specific - i.e. the enclave could be built by a
> >>>> different compiler (e.g. MSVC) that doesn't share the same list of CSRs as
> >>>> the host process. Therefore, the API has been implemented to pass through
> >>>> virtually all registers except those used by EENTER itself. Similarly, all
> >>>> registers are passed back from enclave to the caller (or the exit handler)
> >>>> except those used by EEXIT. %rbp is an exception because the vDSO API has to
> >>>> anchor the stack, using either %rsp or %rbp. We picked %rbp to allow the
> >>>> enclave to allocate space on the stack.
> >>>
> >>> And unless I'm missing something, using %rcx to pass @leaf would still
> >>> satisfy the above, correct?  Ditto for saving/restoring %rbx.
> >>>
> >>> I.e. a runtime that's designed to work with enclave's using a different
> >>> calling convention wouldn't be able to take advantage of being able to call
> >>> the vDSO from C, but neither would it take on any meaningful burden.
> >>>
> >> Not exactly.
> >>
> >> If called directly from C code, the caller would expect CSRs to be
> >> preserved.
> >
> > Correct. This requires collaboration between the caller of the vDSO
> > and the enclave.
> >
> >> Then who should preserve CSRs?
> >
> > The enclave.
> >
> >> It can't be the enclave
> >> because it may not follow the same calling convention.
> >
> > This is incorrect. You are presuming there is not tight integration
> > between the caller of the vDSO and the enclave. In my case, the
> > integration is total and complete. We have working code today that
> > does this.
> >
> >> Moreover, the
> >> enclave may run into an exception, in which case it doesn't have the
> >> ability to restore CSRs.
> >
> > There are two solutions to this:
> > 1. Write the handler in assembly and don't return to C on AEX.
> > 2. The caller can simply preserve the registers. Nothing stops that.
> >
> > We have implemented #1.
> >
> What if the enclave cannot proceed due to an unhandled exception so the
> execution has to get back to the C caller of the vDSO API?

mov $60, %rax
mov $1, %rdi
syscall

We exit in all such cases.

> It seems to me the caller has to preserve CSRs by itself, otherwise it
> cannot continue execution after any enclave exception. Passing @leaf in
> %ecx will allow saving/restoring CSRs in C by setjmp()/longjmp(), with
> the help of an exit handler. But if the C caller has already preserved
> CSRs, why preserve CSRs again inside the enclave? It looks to me things
> can be simplified only if the host process handles no enclave exceptions
> (or exceptions inside the enclave will crash the calling thread). Thus
> the only case of enclave EEXIT'ing back to its caller is considered
> valid, hence the enclave will always be able to restore CSRs, so that
> neither vDSO nor its caller has to preserve CSRs.
>
> Is my understanding correct?
>
Jarkko Sakkinen March 18, 2020, 10:01 p.m. UTC | #43
On Mon, Mar 16, 2020 at 04:56:42PM -0700, Xing, Cedric wrote:
> On 3/16/2020 3:55 PM, Sean Christopherson wrote:
> > On Mon, Mar 16, 2020 at 02:31:36PM +0100, Jethro Beekman wrote:
> > > Can someone remind me why we're not passing TCS in RBX but on the stack?
> > 
> > I finally remembered why.  It's pulled off the stack and passed into the
> > exit handler.  I'm pretty sure the vDSO could take it in %rbx and manually
> > save it on the stack, but I'd rather keep the current behavior so that the
> > vDSO is callable from C (assuming @leaf is changed to be passed via %rcx).
> > 
> The idea is that the caller of this vDSO API is C callable, hence it cannot
> receive TCS in %rbx anyway. Then it has to either MOV to %rbx or PUSH to
> stack. Either way the complexity is the same. The vDSO API however has to
> always save it on stack for exit handler. So receiving it via stack ends up
> in simplest code.

It is never C callable anyway given that not following the ABI so
who cares about being C callable anyway?

/Jarkko
Jarkko Sakkinen March 18, 2020, 10:18 p.m. UTC | #44
On Thu, Mar 19, 2020 at 12:01:26AM +0200, Jarkko Sakkinen wrote:
> On Mon, Mar 16, 2020 at 04:56:42PM -0700, Xing, Cedric wrote:
> > On 3/16/2020 3:55 PM, Sean Christopherson wrote:
> > > On Mon, Mar 16, 2020 at 02:31:36PM +0100, Jethro Beekman wrote:
> > > > Can someone remind me why we're not passing TCS in RBX but on the stack?
> > > 
> > > I finally remembered why.  It's pulled off the stack and passed into the
> > > exit handler.  I'm pretty sure the vDSO could take it in %rbx and manually
> > > save it on the stack, but I'd rather keep the current behavior so that the
> > > vDSO is callable from C (assuming @leaf is changed to be passed via %rcx).
> > > 
> > The idea is that the caller of this vDSO API is C callable, hence it cannot
> > receive TCS in %rbx anyway. Then it has to either MOV to %rbx or PUSH to
> > stack. Either way the complexity is the same. The vDSO API however has to
> > always save it on stack for exit handler. So receiving it via stack ends up
> > in simplest code.
> 
> It is never C callable anyway given that not following the ABI so
> who cares about being C callable anyway?

A direct quote from the documentation:

"**Important!**  __vdso_sgx_enter_enclave() is **NOT** compliant with
the x86-64 ABI, i.e. cannot be called from standard C code."

/Jarkko
Jarkko Sakkinen March 18, 2020, 10:39 p.m. UTC | #45
On Mon, Mar 16, 2020 at 03:53:22PM -0700, Sean Christopherson wrote:
> Yes and no.   If we wanted to minimize the amount of wrapping around the
> vDSO's ENCLU then we wouldn't have the exit handler shenanigans in the
> first place.  The whole process has been about balancing the wants of each
> use case against the overall quality of the API and code.

Minimizing is not something that happens in a void. Given the user base
for the SDK having the handler was a necessity. Otherwise, we would not
have that handler in the first place.

> Up until Nathaniel joined the party, the only stakeholder in terms of the
> exit handler was the Intel SDK.  There was a general consensus to pass
> registers as-is when there isn't a strong reason to do otherwise.  Note
> that Nathaniel has also expressed approval of that approach.

OK, great.

> The major benefits being that the vDSO would be callable from C and that
> the kernel could define a legitimate prototype instead of a frankenstein
> prototype that's half assembly and half C.  For me, those are significant

I was not aware that there was a plot to make it callable by C.

OK, so right now

A. @leaf =  %eax
B. @tcs = 8(%rsp)
C. @e = 0x10(%rsp)
D. @handler = 0x18(%rsp)

On x86-64 Linux C calling convention means DI/SI/DX/CX type of thing.

So what is the thing that we are referring to C calling convetion in
this email discussion?

> benefits and well worth the extra MOV, PUSH and POP.  For some use cases
> it would eliminate the need for an assembly wrapper.  For runtimes that
> need an assembly wrapper for whatever reason, it's probably still a win as
> a well designed runtime can avoid register shuffling in the wrapper.  And
> if there is a runtime that isn't covered by the above, it's at worst an
> extra MOV.

Is it cool if I rip of the documentation from vsgx_enter_enclave.S and
move it to Documentation/ ? It is nasty to keep and update it where it
is right now. How it is right now, it is destined to rotten.

/Jarkko
Jarkko Sakkinen March 18, 2020, 10:58 p.m. UTC | #46
On Tue, Mar 17, 2020 at 12:28:58PM -0400, Nathaniel McCallum wrote:
> On Mon, Mar 16, 2020 at 6:53 PM Sean Christopherson
> <sean.j.christopherson@intel.com> wrote:
> >
> > On Mon, Mar 16, 2020 at 11:38:24PM +0200, Jarkko Sakkinen wrote:
> > > On Mon, 2020-03-16 at 10:01 -0400, Nathaniel McCallum wrote:
> > > > On Mon, Mar 16, 2020 at 9:56 AM Jarkko Sakkinen
> > > > <jarkko.sakkinen@linux.intel.com> wrote:
> > > > > On Sun, 2020-03-15 at 13:53 -0400, Nathaniel McCallum wrote:
> > > > > > On Sat, Mar 14, 2020 at 9:25 PM Jarkko Sakkinen
> > > > > > <jarkko.sakkinen@linux.intel.com> wrote:
> > > > > > > On Wed, Mar 11, 2020 at 01:30:07PM -0400, Nathaniel McCallum wrote:
> > > > > > > > Currently, the selftest has a wrapper around
> > > > > > > > __vdso_sgx_enter_enclave() which preserves all x86-64 ABI callee-saved
> > > > > > > > registers (CSRs), though it uses none of them. Then it calls this
> > > > > > > > function which uses %rbx but preserves none of the CSRs. Then it jumps
> > > > > > > > into an enclave which zeroes all these registers before returning.
> > > > > > > > Thus:
> > > > > > > >
> > > > > > > >   1. wrapper saves all CSRs
> > > > > > > >   2. wrapper repositions stack arguments
> > > > > > > >   3. __vdso_sgx_enter_enclave() modifies, but does not save %rbx
> > > > > > > >   4. selftest zeros all CSRs
> > > > > > > >   5. wrapper loads all CSRs
> > > > > > > >
> > > > > > > > I'd like to propose instead that the enclave be responsible for saving
> > > > > > > > and restoring CSRs. So instead of the above we have:
> > > > > > > >   1. __vdso_sgx_enter_enclave() saves %rbx
> > > > > > > >   2. enclave saves CSRs
> > > > > > > >   3. enclave loads CSRs
> > > > > > > >   4. __vdso_sgx_enter_enclave() loads %rbx
> > > > > > > >
> > > > > > > > I know that lots of other stuff happens during enclave transitions,
> > > > > > > > but at the very least we could reduce the number of instructions
> > > > > > > > through this critical path.
> > > > > > >
> > > > > > > What Jethro said and also that it is a good general principle to cut
> > > > > > > down the semantics of any vdso as minimal as possible.
> > > > > > >
> > > > > > > I.e. even if saving RBX would make somehow sense it *can* be left
> > > > > > > out without loss in terms of what can be done with the vDSO.
> > > > > >
> > > > > > Please read the rest of the thread. Sean and I have hammered out some
> > > > > > sensible and effective changes.
> > > > >
> > > > > Have skimmed through that discussion but it comes down how much you get
> > > > > by obviously degrading some of the robustness. Complexity of the calling
> > > > > pattern is not something that should be emphasized as that is something
> > > > > that is anyway hidden inside the runtime.
> > > >
> > > > My suggestions explicitly maintained robustness, and in fact increased
> > > > it. If you think we've lost capability, please speak with specificity
> > > > rather than in vague generalities. Under my suggestions we can:
> > > > 1. call the vDSO from C
> > > > 2. pass context to the handler
> > > > 3. have additional stack manipulation options in the handler
> > > >
> > > > The cost for this is a net 2 additional instructions. No existing
> > > > capability is lost.
> > >
> > > My vague generality in this case is just that the whole design
> > > approach so far has been to minimize the amount of wrapping to
> > > EENTER.
> >
> > Yes and no.   If we wanted to minimize the amount of wrapping around the
> > vDSO's ENCLU then we wouldn't have the exit handler shenanigans in the
> > first place.  The whole process has been about balancing the wants of each
> > use case against the overall quality of the API and code.
> >
> > > And since this has been kind of agreed by most of the
> > > stakeholders doing something against the chosen strategy is
> > > something I do hold some resistance.
> >
> > Up until Nathaniel joined the party, the only stakeholder in terms of the
> > exit handler was the Intel SDK.
> 
> I would hope that having additional stakeholders would ease the path
> to adoption.
> 
> > There was a general consensus to pass
> > registers as-is when there isn't a strong reason to do otherwise.  Note
> > that Nathaniel has also expressed approval of that approach.
> 
> I still approve that approach.
> 
> > So I think the question that needs to be answered is whether the benefits
> > of using %rcx instead of %rax to pass @leaf justify the "pass registers
> > as-is" guideline.  We've effectively already given this waiver for %rbx,
> > as the whole reason why the TCS is passed in on the stack instead of via
> > %rbx is so that it can be passed to the exit handler.  E.g. the vDSO
> > could take the TCS in %rbx and save it on the stack, but we're throwing
> > the baby out with the bathwater at that point.
> >
> > The major benefits being that the vDSO would be callable from C and that
> > the kernel could define a legitimate prototype instead of a frankenstein
> > prototype that's half assembly and half C.  For me, those are significant
> > benefits and well worth the extra MOV, PUSH and POP.  For some use cases
> > it would eliminate the need for an assembly wrapper.  For runtimes that
> > need an assembly wrapper for whatever reason, it's probably still a win as
> > a well designed runtime can avoid register shuffling in the wrapper.  And
> > if there is a runtime that isn't covered by the above, it's at worst an
> > extra MOV.
> >
> 

Guys, maybe it is just enough discussing. I see things go in circles at
least. Just send a patch against current tree and we'll look into it
then?

I'm a strong believer of "good enough" well, in everything in life. With
a legit patch it is easier to evaluate if what we get is just a
different version of good enough, or perhaps we might get some useful
value out of it.

If you think that you get together something C callable, please
*prove* that by also updating the self test.

Fair enough?

/Jarkko
Sean Christopherson March 18, 2020, 11:40 p.m. UTC | #47
On Sat, Mar 14, 2020 at 10:10:26AM -0400, Nathaniel McCallum wrote:
> On Fri, Mar 13, 2020 at 6:08 PM Sean Christopherson
> <sean.j.christopherson@intel.com> wrote:
> > > > > 4. sub/add to %rsp rather than save/restore
> > > >
> > > > Can you elaborate on why you want to sub/add to %rsp instead of having the
> > > > enclave unwind the stack?  Preserving %rsp across EEXIT/ERESUME seems more
> > > > in line with function call semantics, which I assume is desirable?  E.g.
> > > >
> > > >   push param3
> > > >   push param2
> > > >   push param1
> > > >
> > > >   enclu[EEXIT]
> > > >
> > > >   add $0x18, %rsp
> > >
> > > Before enclave EEXIT, the enclave restores %rsp to the value it had
> > > before EENTER was called. Then it pushes additional output arguments
> > > onto the stack. The enclave calls EENCLU[EEXIT].
> > >
> > > We are now in __vdso...() on the way back to the caller. However, %rsp
> > > has a different value than we entered the function with. This breaks
> > > x86_64 ABI, obviously. The handler needs to fix this up: how does it
> > > do so?

Circling back to this request, because I just realized that the above is
handled by saving %rsp into %rbp and requiring the enclave and handler
to preserve %rbp at all times.

So the below discussion on making the %rsp adjustment relative is moot,
at least with respect to getting out of __vdso() if the enclave has mucked
with the untrusted stack.

> > > In the current code, __vdso..() saves the value of %rsp, calls the
> > > handler and then restores %rsp. The handler can fix up the stack by
> > > setting the correct value to %rbx and returning without restoring it.
> >
> > Ah, you're referring to the patch where the handler decides to return all
> > the way back to the caller of __vdso...().
> >
> > > But this requires internal knowledge of the __vdso...() function,
> > > which could theoretically change in the future.
> > >
> > > If instead the __vdso...() only did add/sub, then the handler could do:
> > > 1. pop return address
> > > 2. pop handler stack params
> > > 3. pop enclave additional output stack params
> > > 4. push handler stack params
> > > 5. push return address

Per above, this is unnecessary when returning to the caller of __vdso().
It would be necessary if the enclave wasn't smart enough to do it's own
stack cleanup, but that seems like a very bizarre contract between the
enclave and its runtime.

The caveat is if %rbx is saved/restored by __vdso().  If we want a
traditional frame pointer, then %rbx would be restored from the stack
before %rsp itself is restored (from %rbp), in which case the exit handler
would need to adjust %rsp using a sequence similar to what you listed
above.

If __vdso() uses a non-standard frame pointer, e.g.

  push %rbp
  push %rbx
  mov  %rsp, %rbp

then %rbx would come off the stack after %rsp is restored from %rbp, i.e.
would be guaranteed to be restored to the pre-EENTER value (unless the
enclave or handler mucks with %rbp).

Anyways, we can discuss how to implement the frame pointer in the context
of the patches, just wanted to point this out here for completeness.

> > > While this is more work, it is standard calling convention work that
> > > doesn't require internal knowledge of __vdso..(). Alternatively, if we
> > > don't like the extra work, we can document the %rbx hack explicitly
> > > into the handler documentation and make it part of the interface. But
> > > we need some explicit way for the handler to pop enclave output stack
> > > params that doesn't depend on internal knowledge of the __vdso...()
> > > invariants.
> >
> > IIUC, this is what you're suggesting?  Having to align the stack makes this
> > a bit annoying, but it's not bad by any means.
> >
> > diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > index 94a8e5f99961..05d54f79b557 100644
> > --- a/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > @@ -139,8 +139,9 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
> >         /* Pass the untrusted RSP (at exit) to the callback via %rcx. */
> >         mov     %rsp, %rcx
> >
> > -       /* Save the untrusted RSP in %rbx (non-volatile register). */
> > +       /* Save the untrusted RSP offset in %rbx (non-volatile register). */
> >         mov     %rsp, %rbx
> > +       and     $0xf, %rbx
> >
> >         /*
> >          * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned
> > @@ -161,8 +162,8 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
> >         mov     0x20(%rbp), %rax
> >         call    .Lretpoline
> >
> > -       /* Restore %rsp to its post-exit value. */
> > -       mov     %rbx, %rsp
> > +       /* Undo the post-exit %rsp adjustment. */
> > +       lea     0x20(%rsp,%rbx), %rsp
> >
> >
> > That's reasonable, let's the handler play more games with minimal overhead.
> 
> Yes, exactly!
> 
> > > > > That would make this a very usable and fast interface without
> > > > > sacrificing any of its current power.
> > > >
> > >
> >
>
Xing, Cedric March 19, 2020, 12:38 a.m. UTC | #48
On 3/18/2020 4:40 PM, Sean Christopherson wrote:
> On Sat, Mar 14, 2020 at 10:10:26AM -0400, Nathaniel McCallum wrote:
>> On Fri, Mar 13, 2020 at 6:08 PM Sean Christopherson
>> <sean.j.christopherson@intel.com> wrote:
>>>>>> 4. sub/add to %rsp rather than save/restore
>>>>>
>>>>> Can you elaborate on why you want to sub/add to %rsp instead of having the
>>>>> enclave unwind the stack?  Preserving %rsp across EEXIT/ERESUME seems more
>>>>> in line with function call semantics, which I assume is desirable?  E.g.
>>>>>
>>>>>    push param3
>>>>>    push param2
>>>>>    push param1
>>>>>
>>>>>    enclu[EEXIT]
>>>>>
>>>>>    add $0x18, %rsp
>>>>
>>>> Before enclave EEXIT, the enclave restores %rsp to the value it had
>>>> before EENTER was called. Then it pushes additional output arguments
>>>> onto the stack. The enclave calls EENCLU[EEXIT].
>>>>
>>>> We are now in __vdso...() on the way back to the caller. However, %rsp
>>>> has a different value than we entered the function with. This breaks
>>>> x86_64 ABI, obviously. The handler needs to fix this up: how does it
>>>> do so?
> 
> Circling back to this request, because I just realized that the above is
> handled by saving %rsp into %rbp and requiring the enclave and handler
> to preserve %rbp at all times.
> 
> So the below discussion on making the %rsp adjustment relative is moot,
> at least with respect to getting out of __vdso() if the enclave has mucked
> with the untrusted stack.
> 
I didn't follow the discussion closely enough to understand the 
motivation behind "add/sub" rather than "restore" %rsp. Now I understand 
and I agree with you that the requested change is unnecessary.

>>>> In the current code, __vdso..() saves the value of %rsp, calls the
>>>> handler and then restores %rsp. The handler can fix up the stack by
>>>> setting the correct value to %rbx and returning without restoring it.
>>>
>>> Ah, you're referring to the patch where the handler decides to return all
>>> the way back to the caller of __vdso...().
>>>
>>>> But this requires internal knowledge of the __vdso...() function,
>>>> which could theoretically change in the future.
>>>>
>>>> If instead the __vdso...() only did add/sub, then the handler could do:
>>>> 1. pop return address
>>>> 2. pop handler stack params
>>>> 3. pop enclave additional output stack params
>>>> 4. push handler stack params
>>>> 5. push return address
> 
> Per above, this is unnecessary when returning to the caller of __vdso().
> It would be necessary if the enclave wasn't smart enough to do it's own
> stack cleanup, but that seems like a very bizarre contract between the
> enclave and its runtime.
> 
> The caveat is if %rbx is saved/restored by __vdso().  If we want a
> traditional frame pointer, then %rbx would be restored from the stack
> before %rsp itself is restored (from %rbp), in which case the exit handler
> would need to adjust %rsp using a sequence similar to what you listed
> above.
> 
> If __vdso() uses a non-standard frame pointer, e.g.
> 
>    push %rbp
>    push %rbx
>    mov  %rsp, %rbp
> 
> then %rbx would come off the stack after %rsp is restored from %rbp, i.e.
> would be guaranteed to be restored to the pre-EENTER value (unless the
> enclave or handler mucks with %rbp).
> 
> Anyways, we can discuss how to implement the frame pointer in the context
> of the patches, just wanted to point this out here for completeness.
> 
%rbx can always be restored as long as it is saved at a fixed offset 
from %rbp. For example, given the standard prolog below:

	push	%rbp
	mov	%rsp, %rbp
	push	%rbx

It can be paired with the following standard epilog:

	mov	-8(%rbp), %rbx
	leave
	ret

Alternatively, given "red zone" of 128 bytes, the following epilog will 
also work:

	leave
	mov	-0x10(%rsp), %rbx
	ret

In no cases do we have to worry about enclave mucking the stack as long 
as %rbp is preserved.

>>>> While this is more work, it is standard calling convention work that
>>>> doesn't require internal knowledge of __vdso..(). Alternatively, if we
>>>> don't like the extra work, we can document the %rbx hack explicitly
>>>> into the handler documentation and make it part of the interface. But
>>>> we need some explicit way for the handler to pop enclave output stack
>>>> params that doesn't depend on internal knowledge of the __vdso...()
>>>> invariants.
>>>
>>> IIUC, this is what you're suggesting?  Having to align the stack makes this
>>> a bit annoying, but it's not bad by any means.
>>>
>>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
>>> index 94a8e5f99961..05d54f79b557 100644
>>> --- a/arch/x86/entry/vdso/vsgx_enter_enclave.S
>>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
>>> @@ -139,8 +139,9 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
>>>          /* Pass the untrusted RSP (at exit) to the callback via %rcx. */
>>>          mov     %rsp, %rcx
>>>
>>> -       /* Save the untrusted RSP in %rbx (non-volatile register). */
>>> +       /* Save the untrusted RSP offset in %rbx (non-volatile register). */
>>>          mov     %rsp, %rbx
>>> +       and     $0xf, %rbx
>>>
>>>          /*
>>>           * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned
>>> @@ -161,8 +162,8 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
>>>          mov     0x20(%rbp), %rax
>>>          call    .Lretpoline
>>>
>>> -       /* Restore %rsp to its post-exit value. */
>>> -       mov     %rbx, %rsp
>>> +       /* Undo the post-exit %rsp adjustment. */
>>> +       lea     0x20(%rsp,%rbx), %rsp
>>>

Per discussion above, this is useful only if the enclave has problem 
cleaning up its own mess left on the untrusted stack, and the exit 
handler wants to EENTER the enclave again by returning to __vdso...(). 
It sounds very uncommon to me, and more like a bug than an expected 
behavior. Are there any existing code doing this or any particular 
application that needs this. If no, I'd say not to do it.
Sean Christopherson March 19, 2020, 1:03 a.m. UTC | #49
On Wed, Mar 18, 2020 at 05:38:29PM -0700, Xing, Cedric wrote:
> On 3/18/2020 4:40 PM, Sean Christopherson wrote:
> %rbx can always be restored as long as it is saved at a fixed offset from
> %rbp. For example, given the standard prolog below:
> 
> 	push	%rbp
> 	mov	%rsp, %rbp
> 	push	%rbx
> 
> It can be paired with the following standard epilog:
> 
> 	mov	-8(%rbp), %rbx
> 	leave
> 	ret
> 
> Alternatively, given "red zone" of 128 bytes, the following epilog will also
> work:
> 
> 	leave
> 	mov	-0x10(%rsp), %rbx
> 	ret
> 
> In no cases do we have to worry about enclave mucking the stack as long as
> %rbp is preserved.
> 
> >>>>While this is more work, it is standard calling convention work that
> >>>>doesn't require internal knowledge of __vdso..(). Alternatively, if we
> >>>>don't like the extra work, we can document the %rbx hack explicitly
> >>>>into the handler documentation and make it part of the interface. But
> >>>>we need some explicit way for the handler to pop enclave output stack
> >>>>params that doesn't depend on internal knowledge of the __vdso...()
> >>>>invariants.
> >>>
> >>>IIUC, this is what you're suggesting?  Having to align the stack makes this
> >>>a bit annoying, but it's not bad by any means.
> >>>
> >>>diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >>>index 94a8e5f99961..05d54f79b557 100644
> >>>--- a/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >>>+++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >>>@@ -139,8 +139,9 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
> >>>         /* Pass the untrusted RSP (at exit) to the callback via %rcx. */
> >>>         mov     %rsp, %rcx
> >>>
> >>>-       /* Save the untrusted RSP in %rbx (non-volatile register). */
> >>>+       /* Save the untrusted RSP offset in %rbx (non-volatile register). */
> >>>         mov     %rsp, %rbx
> >>>+       and     $0xf, %rbx
> >>>
> >>>         /*
> >>>          * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned
> >>>@@ -161,8 +162,8 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
> >>>         mov     0x20(%rbp), %rax
> >>>         call    .Lretpoline
> >>>
> >>>-       /* Restore %rsp to its post-exit value. */
> >>>-       mov     %rbx, %rsp
> >>>+       /* Undo the post-exit %rsp adjustment. */
> >>>+       lea     0x20(%rsp,%rbx), %rsp
> >>>
> 
> Per discussion above, this is useful only if the enclave has problem
> cleaning up its own mess left on the untrusted stack, and the exit handler
> wants to EENTER the enclave again by returning to __vdso...(). It sounds
> very uncommon to me, and more like a bug than an expected behavior. Are
> there any existing code doing this or any particular application that needs
> this. If no, I'd say not to do it.

Ya, I'm on the fence as well.  The only counter-argument is that doing:

	push	%rbp
	mov	%rsp, %rbp
	push	%rbx

	...

	pop	%rbx
	leave
	ret

with the relative adjustment would allow the exit handler (or enclave) to
change %rbx.  I'm not saying that is remote sane, but if we're going for
maximum flexibility...

Anyways, patches incoming, let's discuss there.
Nathaniel McCallum March 20, 2020, 1:55 p.m. UTC | #50
On Wed, Mar 18, 2020 at 7:41 PM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Sat, Mar 14, 2020 at 10:10:26AM -0400, Nathaniel McCallum wrote:
> > On Fri, Mar 13, 2020 at 6:08 PM Sean Christopherson
> > <sean.j.christopherson@intel.com> wrote:
> > > > > > 4. sub/add to %rsp rather than save/restore
> > > > >
> > > > > Can you elaborate on why you want to sub/add to %rsp instead of having the
> > > > > enclave unwind the stack?  Preserving %rsp across EEXIT/ERESUME seems more
> > > > > in line with function call semantics, which I assume is desirable?  E.g.
> > > > >
> > > > >   push param3
> > > > >   push param2
> > > > >   push param1
> > > > >
> > > > >   enclu[EEXIT]
> > > > >
> > > > >   add $0x18, %rsp
> > > >
> > > > Before enclave EEXIT, the enclave restores %rsp to the value it had
> > > > before EENTER was called. Then it pushes additional output arguments
> > > > onto the stack. The enclave calls EENCLU[EEXIT].
> > > >
> > > > We are now in __vdso...() on the way back to the caller. However, %rsp
> > > > has a different value than we entered the function with. This breaks
> > > > x86_64 ABI, obviously. The handler needs to fix this up: how does it
> > > > do so?
>
> Circling back to this request, because I just realized that the above is
> handled by saving %rsp into %rbp and requiring the enclave and handler
> to preserve %rbp at all times.
>
> So the below discussion on making the %rsp adjustment relative is moot,
> at least with respect to getting out of __vdso() if the enclave has mucked
> with the untrusted stack.

You're right. __vdso() will always restore the caller's stack via the
leave instruction at .Lout. So no change is necessary.

> > > > In the current code, __vdso..() saves the value of %rsp, calls the
> > > > handler and then restores %rsp. The handler can fix up the stack by
> > > > setting the correct value to %rbx and returning without restoring it.
> > >
> > > Ah, you're referring to the patch where the handler decides to return all
> > > the way back to the caller of __vdso...().
> > >
> > > > But this requires internal knowledge of the __vdso...() function,
> > > > which could theoretically change in the future.
> > > >
> > > > If instead the __vdso...() only did add/sub, then the handler could do:
> > > > 1. pop return address
> > > > 2. pop handler stack params
> > > > 3. pop enclave additional output stack params
> > > > 4. push handler stack params
> > > > 5. push return address
>
> Per above, this is unnecessary when returning to the caller of __vdso().
> It would be necessary if the enclave wasn't smart enough to do it's own
> stack cleanup, but that seems like a very bizarre contract between the
> enclave and its runtime.
>
> The caveat is if %rbx is saved/restored by __vdso().  If we want a
> traditional frame pointer, then %rbx would be restored from the stack
> before %rsp itself is restored (from %rbp), in which case the exit handler
> would need to adjust %rsp using a sequence similar to what you listed
> above.
>
> If __vdso() uses a non-standard frame pointer, e.g.
>
>   push %rbp
>   push %rbx
>   mov  %rsp, %rbp
>
> then %rbx would come off the stack after %rsp is restored from %rbp, i.e.
> would be guaranteed to be restored to the pre-EENTER value (unless the
> enclave or handler mucks with %rbp).
>
> Anyways, we can discuss how to implement the frame pointer in the context
> of the patches, just wanted to point this out here for completeness.
>
> > > > While this is more work, it is standard calling convention work that
> > > > doesn't require internal knowledge of __vdso..(). Alternatively, if we
> > > > don't like the extra work, we can document the %rbx hack explicitly
> > > > into the handler documentation and make it part of the interface. But
> > > > we need some explicit way for the handler to pop enclave output stack
> > > > params that doesn't depend on internal knowledge of the __vdso...()
> > > > invariants.
> > >
> > > IIUC, this is what you're suggesting?  Having to align the stack makes this
> > > a bit annoying, but it's not bad by any means.
> > >
> > > diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > > index 94a8e5f99961..05d54f79b557 100644
> > > --- a/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > > +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > > @@ -139,8 +139,9 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
> > >         /* Pass the untrusted RSP (at exit) to the callback via %rcx. */
> > >         mov     %rsp, %rcx
> > >
> > > -       /* Save the untrusted RSP in %rbx (non-volatile register). */
> > > +       /* Save the untrusted RSP offset in %rbx (non-volatile register). */
> > >         mov     %rsp, %rbx
> > > +       and     $0xf, %rbx
> > >
> > >         /*
> > >          * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned
> > > @@ -161,8 +162,8 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
> > >         mov     0x20(%rbp), %rax
> > >         call    .Lretpoline
> > >
> > > -       /* Restore %rsp to its post-exit value. */
> > > -       mov     %rbx, %rsp
> > > +       /* Undo the post-exit %rsp adjustment. */
> > > +       lea     0x20(%rsp,%rbx), %rsp
> > >
> > >
> > > That's reasonable, let's the handler play more games with minimal overhead.
> >
> > Yes, exactly!
> >
> > > > > > That would make this a very usable and fast interface without
> > > > > > sacrificing any of its current power.
> > > > >
> > > >
> > >
> >
>
Nathaniel McCallum March 20, 2020, 3:53 p.m. UTC | #51
On Wed, Mar 18, 2020 at 9:01 AM Nathaniel McCallum
<npmccallum@redhat.com> wrote:
>
> On Tue, Mar 17, 2020 at 6:23 PM Xing, Cedric <cedric.xing@intel.com> wrote:
> >
> > On 3/17/2020 9:50 AM, Nathaniel McCallum wrote:
> > > On Mon, Mar 16, 2020 at 8:18 PM Xing, Cedric <cedric.xing@intel.com> wrote:
> > >>
> > >> On 3/16/2020 4:59 PM, Sean Christopherson wrote:
> > >>> On Mon, Mar 16, 2020 at 04:50:26PM -0700, Xing, Cedric wrote:
> > >>>> On 3/16/2020 3:53 PM, Sean Christopherson wrote:
> > >>>>> On Mon, Mar 16, 2020 at 11:38:24PM +0200, Jarkko Sakkinen wrote:
> > >>>>>>> My suggestions explicitly maintained robustness, and in fact increased
> > >>>>>>> it. If you think we've lost capability, please speak with specificity
> > >>>>>>> rather than in vague generalities. Under my suggestions we can:
> > >>>>>>> 1. call the vDSO from C
> > >>>>>>> 2. pass context to the handler
> > >>>>>>> 3. have additional stack manipulation options in the handler
> > >>>>>>>
> > >>>>>>> The cost for this is a net 2 additional instructions. No existing
> > >>>>>>> capability is lost.
> > >>>>>>
> > >>>>>> My vague generality in this case is just that the whole design
> > >>>>>> approach so far has been to minimize the amount of wrapping to
> > >>>>>> EENTER.
> > >>>>>
> > >>>>> Yes and no.   If we wanted to minimize the amount of wrapping around the
> > >>>>> vDSO's ENCLU then we wouldn't have the exit handler shenanigans in the
> > >>>>> first place.  The whole process has been about balancing the wants of each
> > >>>>> use case against the overall quality of the API and code.
> > >>>>>
> > >>>> The design of this vDSO API was NOT to minimize wrapping, but to allow
> > >>>> maximal flexibility. More specifically, we strove not to restrict how info
> > >>>> was exchanged between the enclave and its host process. After all, calling
> > >>>> convention is compiler specific - i.e. the enclave could be built by a
> > >>>> different compiler (e.g. MSVC) that doesn't share the same list of CSRs as
> > >>>> the host process. Therefore, the API has been implemented to pass through
> > >>>> virtually all registers except those used by EENTER itself. Similarly, all
> > >>>> registers are passed back from enclave to the caller (or the exit handler)
> > >>>> except those used by EEXIT. %rbp is an exception because the vDSO API has to
> > >>>> anchor the stack, using either %rsp or %rbp. We picked %rbp to allow the
> > >>>> enclave to allocate space on the stack.
> > >>>
> > >>> And unless I'm missing something, using %rcx to pass @leaf would still
> > >>> satisfy the above, correct?  Ditto for saving/restoring %rbx.
> > >>>
> > >>> I.e. a runtime that's designed to work with enclave's using a different
> > >>> calling convention wouldn't be able to take advantage of being able to call
> > >>> the vDSO from C, but neither would it take on any meaningful burden.
> > >>>
> > >> Not exactly.
> > >>
> > >> If called directly from C code, the caller would expect CSRs to be
> > >> preserved.
> > >
> > > Correct. This requires collaboration between the caller of the vDSO
> > > and the enclave.
> > >
> > >> Then who should preserve CSRs?
> > >
> > > The enclave.
> > >
> > >> It can't be the enclave
> > >> because it may not follow the same calling convention.
> > >
> > > This is incorrect. You are presuming there is not tight integration
> > > between the caller of the vDSO and the enclave. In my case, the
> > > integration is total and complete. We have working code today that
> > > does this.
> > >
> > >> Moreover, the
> > >> enclave may run into an exception, in which case it doesn't have the
> > >> ability to restore CSRs.
> > >
> > > There are two solutions to this:
> > > 1. Write the handler in assembly and don't return to C on AEX.
> > > 2. The caller can simply preserve the registers. Nothing stops that.
> > >
> > > We have implemented #1.
> > >
> > What if the enclave cannot proceed due to an unhandled exception so the
> > execution has to get back to the C caller of the vDSO API?
>
> mov $60, %rax
> mov $1, %rdi
> syscall
>
> We exit in all such cases.

Another solution is for the enclave to push the non-volatile registers
to the non-enclave stack upon entry and let the handler restore them.
That works for both EEXIT and AEX and you can return to C code then.

> > It seems to me the caller has to preserve CSRs by itself, otherwise it
> > cannot continue execution after any enclave exception. Passing @leaf in
> > %ecx will allow saving/restoring CSRs in C by setjmp()/longjmp(), with
> > the help of an exit handler. But if the C caller has already preserved
> > CSRs, why preserve CSRs again inside the enclave? It looks to me things
> > can be simplified only if the host process handles no enclave exceptions
> > (or exceptions inside the enclave will crash the calling thread). Thus
> > the only case of enclave EEXIT'ing back to its caller is considered
> > valid, hence the enclave will always be able to restore CSRs, so that
> > neither vDSO nor its caller has to preserve CSRs.
> >
> > Is my understanding correct?
> >
diff mbox series

Patch

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 657e01d34d02..fa50c76a17a8 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -24,6 +24,7 @@  VDSO32-$(CONFIG_IA32_EMULATION)	:= y
 
 # files to link into the vdso
 vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
+vobjs-$(VDSO64-y)		+= vsgx_enter_enclave.o
 
 # files to link into kernel
 obj-y				+= vma.o extable.o
@@ -90,6 +91,7 @@  $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
 CFLAGS_REMOVE_vclock_gettime.o = -pg
 CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
 CFLAGS_REMOVE_vgetcpu.o = -pg
+CFLAGS_REMOVE_vsgx_enter_enclave.o = -pg
 
 #
 # X32 processes use x32 vDSO to access 64bit kernel data.
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index 36b644e16272..4bf48462fca7 100644
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
@@ -27,6 +27,7 @@  VERSION {
 		__vdso_time;
 		clock_getres;
 		__vdso_clock_getres;
+		__vdso_sgx_enter_enclave;
 	local: *;
 	};
 }
diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
new file mode 100644
index 000000000000..94a8e5f99961
--- /dev/null
+++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
@@ -0,0 +1,187 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+#include <asm/errno.h>
+
+#include "extable.h"
+
+#define EX_LEAF		0*8
+#define EX_TRAPNR	0*8+4
+#define EX_ERROR_CODE	0*8+6
+#define EX_ADDRESS	1*8
+
+.code64
+.section .text, "ax"
+
+/**
+ * __vdso_sgx_enter_enclave() - Enter an SGX enclave
+ * @leaf:	ENCLU leaf, must be EENTER or ERESUME
+ * @tcs:	TCS, must be non-NULL
+ * @e:		Optional struct sgx_enclave_exception instance
+ * @handler:	Optional enclave exit handler
+ *
+ * **Important!**  __vdso_sgx_enter_enclave() is **NOT** compliant with the
+ * x86-64 ABI, i.e. cannot be called from standard C code.
+ *
+ * Input ABI:
+ *  @leaf	%eax
+ *  @tcs	8(%rsp)
+ *  @e 		0x10(%rsp)
+ *  @handler	0x18(%rsp)
+ *
+ * Output ABI:
+ *  @ret	%eax
+ *
+ * All general purpose registers except RAX, RBX and RCX are passed as-is to
+ * the enclave. RAX, RBX and RCX are consumed by EENTER and ERESUME and are
+ * loaded with @leaf, asynchronous exit pointer, and @tcs respectively.
+ *
+ * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the
+ * pre-enclave state, e.g. to retrieve @e and @handler after an enclave exit.
+ * All other registers are available for use by the enclave and its runtime,
+ * e.g. an enclave can push additional data onto the stack (and modify RSP) to
+ * pass information to the optional exit handler (see below).
+ *
+ * Most exceptions reported on ENCLU, including those that occur within the
+ * enclave, are fixed up and reported synchronously instead of being delivered
+ * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are
+ * never fixed up and are always delivered via standard signals. On synchrously
+ * reported exceptions, -EFAULT is returned and details about the exception are
+ * recorded in @e, the optional sgx_enclave_exception struct.
+
+ * If an exit handler is provided, the handler will be invoked on synchronous
+ * exits from the enclave and for all synchronously reported exceptions. In
+ * latter case, @e is filled prior to invoking the handler.
+ *
+ * The exit handler's return value is interpreted as follows:
+ *  >0:		continue, restart __vdso_sgx_enter_enclave() with @ret as @leaf
+ *   0:		success, return @ret to the caller
+ *  <0:		error, return @ret to the caller
+ *
+ * The userspace exit handler is responsible for unwinding the stack, e.g. to
+ * pop @e, u_rsp and @tcs, prior to returning to __vdso_sgx_enter_enclave().
+ * The exit handler may also transfer control, e.g. via longjmp() or a C++
+ * exception, without returning to __vdso_sgx_enter_enclave().
+ *
+ * Return:
+ *  0 on success,
+ *  -EINVAL if ENCLU leaf is not allowed,
+ *  -EFAULT if an exception occurs on ENCLU or within the enclave
+ *  -errno for all other negative values returned by the userspace exit handler
+ */
+#ifdef SGX_KERNEL_DOC
+/* C-style function prototype to coerce kernel-doc into parsing the comment. */
+int __vdso_sgx_enter_enclave(int leaf, void *tcs,
+			     struct sgx_enclave_exception *e,
+			     sgx_enclave_exit_handler_t handler);
+#endif
+SYM_FUNC_START(__vdso_sgx_enter_enclave)
+	/* Prolog */
+	.cfi_startproc
+	push	%rbp
+	.cfi_adjust_cfa_offset	8
+	.cfi_rel_offset		%rbp, 0
+	mov	%rsp, %rbp
+	.cfi_def_cfa_register	%rbp
+
+.Lenter_enclave:
+	/* EENTER <= leaf <= ERESUME */
+	cmp	$0x2, %eax
+	jb	.Linvalid_leaf
+	cmp	$0x3, %eax
+	ja	.Linvalid_leaf
+
+	/* Load TCS and AEP */
+	mov	0x10(%rbp), %rbx
+	lea	.Lasync_exit_pointer(%rip), %rcx
+
+	/* Single ENCLU serving as both EENTER and AEP (ERESUME) */
+.Lasync_exit_pointer:
+.Lenclu_eenter_eresume:
+	enclu
+
+	/* EEXIT jumps here unless the enclave is doing something fancy. */
+	xor	%eax, %eax
+
+	/* Invoke userspace's exit handler if one was provided. */
+.Lhandle_exit:
+	cmp	$0, 0x20(%rbp)
+	jne	.Linvoke_userspace_handler
+
+.Lout:
+	leave
+	.cfi_def_cfa		%rsp, 8
+	ret
+
+	/* The out-of-line code runs with the pre-leave stack frame. */
+	.cfi_def_cfa		%rbp, 16
+
+.Linvalid_leaf:
+	mov	$(-EINVAL), %eax
+	jmp	.Lout
+
+.Lhandle_exception:
+	mov	0x18(%rbp), %rcx
+	test    %rcx, %rcx
+	je	.Lskip_exception_info
+
+	/* Fill optional exception info. */
+	mov	%eax, EX_LEAF(%rcx)
+	mov	%di,  EX_TRAPNR(%rcx)
+	mov	%si,  EX_ERROR_CODE(%rcx)
+	mov	%rdx, EX_ADDRESS(%rcx)
+.Lskip_exception_info:
+	mov	$(-EFAULT), %eax
+	jmp	.Lhandle_exit
+
+.Linvoke_userspace_handler:
+	/* Pass the untrusted RSP (at exit) to the callback via %rcx. */
+	mov	%rsp, %rcx
+
+	/* Save the untrusted RSP in %rbx (non-volatile register). */
+	mov	%rsp, %rbx
+
+	/*
+	 * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned
+	 * _after_ pushing the parameters on the stack, hence the bonus push.
+	 */
+	and	$-0x10, %rsp
+	push	%rax
+
+	/* Push @e, the "return" value and @tcs as params to the callback. */
+	push	0x18(%rbp)
+	push	%rax
+	push	0x10(%rbp)
+
+	/* Clear RFLAGS.DF per x86_64 ABI */
+	cld
+
+	/* Load the callback pointer to %rax and invoke it via retpoline. */
+	mov	0x20(%rbp), %rax
+	call	.Lretpoline
+
+	/* Restore %rsp to its post-exit value. */
+	mov	%rbx, %rsp
+
+	/*
+	 * If the return from callback is zero or negative, return immediately,
+	 * else re-execute ENCLU with the postive return value interpreted as
+	 * the requested ENCLU leaf.
+	 */
+	cmp	$0, %eax
+	jle	.Lout
+	jmp	.Lenter_enclave
+
+.Lretpoline:
+	call	2f
+1:	pause
+	lfence
+	jmp	1b
+2:	mov	%rax, (%rsp)
+	ret
+	.cfi_endproc
+
+_ASM_VDSO_EXTABLE_HANDLE(.Lenclu_eenter_eresume, .Lhandle_exception)
+
+SYM_FUNC_END(__vdso_sgx_enter_enclave)
diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
index 57d0d30c79b3..e196cfd44b70 100644
--- a/arch/x86/include/uapi/asm/sgx.h
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -74,4 +74,41 @@  struct sgx_enclave_set_attribute {
 	__u64 attribute_fd;
 };
 
+/**
+ * struct sgx_enclave_exception - structure to report exceptions encountered in
+ *				  __vdso_sgx_enter_enclave()
+ *
+ * @leaf:	ENCLU leaf from \%eax at time of exception
+ * @trapnr:	exception trap number, a.k.a. fault vector
+ * @error_code:	exception error code
+ * @address:	exception address, e.g. CR2 on a #PF
+ * @reserved:	reserved for future use
+ */
+struct sgx_enclave_exception {
+	__u32 leaf;
+	__u16 trapnr;
+	__u16 error_code;
+	__u64 address;
+	__u64 reserved[2];
+};
+
+/**
+ * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by
+ *					__vdso_sgx_enter_enclave()
+ *
+ * @rdi:	RDI at the time of enclave exit
+ * @rsi:	RSI at the time of enclave exit
+ * @rdx:	RDX at the time of enclave exit
+ * @ursp:	RSP at the time of enclave exit (untrusted stack)
+ * @r8:		R8 at the time of enclave exit
+ * @r9:		R9 at the time of enclave exit
+ * @tcs:	Thread Control Structure used to enter enclave
+ * @ret:	0 on success (EEXIT), -EFAULT on an exception
+ * @e:		Pointer to struct sgx_enclave_exception (as provided by caller)
+ */
+typedef int (*sgx_enclave_exit_handler_t)(long rdi, long rsi, long rdx,
+					  long ursp, long r8, long r9,
+					  void *tcs, int ret,
+					  struct sgx_enclave_exception *e);
+
 #endif /* _UAPI_ASM_X86_SGX_H */